From cd26dd19329a2e3f56ce530d4a062a438c20ea06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EA=B9=80=EC=9D=98=EC=A7=84?= <uijin@goyoai.com>
Date: Mon, 27 Oct 2025 17:20:14 +0900
Subject: [PATCH] fix triton token2wav model cache thread unsafety

---
 runtime/triton_trtllm/model_repo/token2wav/1/model.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/runtime/triton_trtllm/model_repo/token2wav/1/model.py b/runtime/triton_trtllm/model_repo/token2wav/1/model.py
index 1e38052..53d5a54 100644
--- a/runtime/triton_trtllm/model_repo/token2wav/1/model.py
+++ b/runtime/triton_trtllm/model_repo/token2wav/1/model.py
@@ -28,6 +28,7 @@ import json
 import os
 
 import logging
+from uuid import uuid4
 
 import torch
 from torch.utils.dlpack import to_dlpack
@@ -235,17 +236,17 @@ class TritonPythonModel:
                     stream = True
                 else:
                     stream = False
-                request_id = request.request_id()
+                uuid = uuid4().hex
                 audio_hat = self.token2wav_model.model.token2wav(token=target_speech_tokens,
                                                                  prompt_token=prompt_speech_tokens,
                                                                  prompt_feat=prompt_speech_feat,
                                                                  embedding=prompt_spk_embedding,
                                                                  token_offset=token_offset,
-                                                                 uuid=request_id,
+                                                                 uuid=uuid,
                                                                  stream=stream,
                                                                  finalize=finalize)
                 if finalize:
-                    self.token2wav_model.model.hift_cache_dict.pop(request_id)
+                    self.token2wav_model.model.hift_cache_dict.pop(uuid)
 
             else:
                 tts_mel, _ = self.token2wav_model.model.flow.inference(