fix triton token2wav model cache thread unsafety

2026-02-04 17:39:25 +08:00 · 2025-10-27 17:20:14 +09:00
parent 6e01309e01
commit cd26dd1932
1 changed files with 4 additions and 3 deletions
--- a/runtime/triton_trtllm/model_repo/token2wav/1/model.py
+++ b/runtime/triton_trtllm/model_repo/token2wav/1/model.py
@@ -28,6 +28,7 @@ import json
 import os
 import logging
 from uuid import uuid4
 import torch
 from torch.utils.dlpack import to_dlpack
@@ -235,17 +236,17 @@ class TritonPythonModel:
                    stream = True
                else:
                    stream = False
-                request_id = request.request_id()
+                uuid = uuid4().hex
                audio_hat = self.token2wav_model.model.token2wav(token=target_speech_tokens,
                                                                 prompt_token=prompt_speech_tokens,
                                                                 prompt_feat=prompt_speech_feat,
                                                                 embedding=prompt_spk_embedding,
                                                                 token_offset=token_offset,
-                                                                 uuid=request_id,
+                                                                 uuid=uuid,
                                                                 stream=stream,
                                                                 finalize=finalize)
                if finalize:
-                    self.token2wav_model.model.hift_cache_dict.pop(request_id)
+                    self.token2wav_model.model.hift_cache_dict.pop(uuid)
            else:
                tts_mel, _ = self.token2wav_model.model.flow.inference(