This commit is contained in:
lyuxiang.lx
2025-06-01 08:46:47 +00:00
parent 1c7976779b
commit c939c80480

View File

@@ -441,6 +441,8 @@ class Qwen2LM(TransformerLM):
# in stream mode, yield token one by one
yield top_ids
out_tokens.append(top_ids)
if len(out_tokens) == max_len:
break
time.sleep(0.001)
with self.lock:
self.vllm_output_queue.pop(uuid)