mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 09:29:25 +08:00
Merge branch 'main' into dev/lyuxiang.lx
This commit is contained in:
@@ -36,7 +36,7 @@ Stage `0` converts raw JSONL files into the parquet format expected by veRL:
|
||||
```bash
|
||||
bash run.sh 0 0
|
||||
```
|
||||
Create two JSONL files—`train.jsonl` and `test.jsonl`.
|
||||
Create two JSONL files—`train.jsonl` and `test.jsonl`.
|
||||
The script will then generate two Parquet files:
|
||||
|
||||
```
|
||||
@@ -111,7 +111,7 @@ bash run.sh 5 5
|
||||
|
||||
The script converts the Hugging Face checkpoint back into the format expected by the CosyVoice repository.
|
||||
> [!TIP]
|
||||
> However, we observed a slight accuracy drop when using the RL-trained model after conversion, compared with the Hugging Face format.
|
||||
> However, we observed a slight accuracy drop when using the RL-trained model after conversion, compared with the Hugging Face format.
|
||||
|
||||
## Results
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ except RuntimeError:
|
||||
pass
|
||||
|
||||
|
||||
TEMPLATE = "{% for message in messages %}{%- if message['role'] == 'user' %}{{- '<|im_start|>' + message['role'] + '\n' + 'Convert the text to speech: ' + message['content'] + '<|im_end|>\n'}}{%- elif message['role'] == 'assistant' %}{{- '<|im_start|>' + message['role'] + '\n' + '<|SPEECH_GENERATION_START|>' + message['content']}}{%- endif %}{%- endfor %}"
|
||||
TEMPLATE = "{% for message in messages %}{%- if message['role'] == 'user' %}{{- '<|im_start|>' + message['role'] + '\n' + 'Convert the text to speech: ' + message['content'] + '<|im_end|>\n'}}{%- elif message['role'] == 'assistant' %}{{- '<|im_start|>' + message['role'] + '\n' + '<|SPEECH_GENERATION_START|>' + message['content']}}{%- endif %}{%- endfor %}" # noqa: E501
|
||||
|
||||
|
||||
def audio_decode_cosyvoice2(
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
@@ -33,7 +33,7 @@ fi
|
||||
|
||||
if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
|
||||
log "stage -1: download official CosyVoice2-0.5B LLM model and convert to huggingface compatible checkpoint"
|
||||
modelscope download --model iic/CosyVoice2-0.5B --local_dir $model_scope_model_path
|
||||
modelscope download --model iic/CosyVoice2-0.5B --local_dir $model_scope_model_path
|
||||
python3 pretrained_to_huggingface.py \
|
||||
--pretrained-cosyvoice2-path $model_scope_model_path \
|
||||
--save-path $sft_model_path
|
||||
@@ -61,7 +61,7 @@ fi
|
||||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
log "stage 1: start token2wav asr server for reward function"
|
||||
python3 token2wav_asr_server.py --number-of-devices 8
|
||||
fi
|
||||
fi
|
||||
|
||||
exp_name=official_llm_aishell3_grpo
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
@@ -125,7 +125,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
--backend fsdp \
|
||||
--local_dir $llm_path/actor \
|
||||
--target_dir $llm_path/merged_hf_model || exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
||||
log "stage 4: Test the model"
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2023 by manyeyes
|
||||
# Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
@@ -195,7 +193,7 @@ def write_error_stats(
|
||||
hyp = list("".join(hyp))
|
||||
results[i] = (cut_id, ref, hyp)
|
||||
|
||||
for cut_id, ref, hyp in results:
|
||||
for _cut_id, ref, hyp in results:
|
||||
ali = kaldialign.align(ref, hyp, ERR, sclite_mode=sclite_mode)
|
||||
for ref_word, hyp_word in ali:
|
||||
if ref_word == ERR:
|
||||
|
||||
@@ -295,7 +295,7 @@ def main():
|
||||
metrics_port=8002,
|
||||
)
|
||||
|
||||
device_ids = [i for i in range(args.number_of_devices)]
|
||||
device_ids = list(range(args.number_of_devices))
|
||||
device_ids = device_ids * args.number_of_instances_per_device
|
||||
|
||||
with Triton(config=triton_config) as triton:
|
||||
|
||||
Reference in New Issue
Block a user