diff --git a/runtime/triton_trtllm/README.md b/runtime/triton_trtllm/README.md
index 62871fb..8d4892c 100644
--- a/runtime/triton_trtllm/README.md
+++ b/runtime/triton_trtllm/README.md
@@ -84,6 +84,8 @@ The following results were obtained by decoding on a single L20 GPU with 26 prom
 | Streaming, use_spk2info_cache=True | 2 | 323.04 | 316.83 | 0.0905 |
 | Streaming, use_spk2info_cache=True | 4 | 977.68 | 903.68| 0.0733 |
 
+> If your service only needs a fixed speaker, you can set `use_spk2info_cache=True` in `run.sh`. To add more speakers, refer to the instructions [here](https://github.com/qi-hua/async_cosyvoice?tab=readme-ov-file#9-spk2info-%E8%AF%B4%E6%98%8E).
+
 **Offline TTS (Full Sentence Latency)**
 | Mode | Note | Concurrency | Avg Latency (ms) | P50 Latency (ms) | RTF |
 |---|---|---|---|---|---|
diff --git a/runtime/triton_trtllm/run.sh b/runtime/triton_trtllm/run.sh
index 259d913..a60f4a3 100644
--- a/runtime/triton_trtllm/run.sh
+++ b/runtime/triton_trtllm/run.sh
@@ -15,7 +15,7 @@ trt_engines_dir=./trt_engines_${trt_dtype}
 
 model_repo=./model_repo_cosyvoice2
 
-use_spk2info_cache=True
+use_spk2info_cache=False
 
 if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
     echo "Cloning CosyVoice"