update export_codec_vllm

This commit is contained in:
雾聪
2025-02-26 20:25:14 +08:00
parent f280558bcb
commit 54e9384fb1
2 changed files with 7 additions and 1 deletions

View File

@@ -353,12 +353,14 @@ class Qwen2LM(TransformerLM):
if str(request_output.request_id) != str(request_id):
continue
if not request_output.finished:
print(f"Partial request output: {request_output}")
# print(f"Partial request output: {request_output}")
out_token = list(request_output.outputs[0].token_ids)[-1]
yield out_token
out_token_ids.append(out_token)
else:
break
if not vllm_codec_engine.has_unfinished_requests():
break
@torch.inference_mode()
def inference_bistream(