add instruct

This commit is contained in:
lyuxiang.lx
2025-12-11 09:43:25 +00:00
parent 3298d6f3e3
commit ebef63066f
5 changed files with 36 additions and 3 deletions

View File

@@ -40,6 +40,11 @@ def main():
with open('{}/spk2utt'.format(args.des_dir), 'w') as f:
for k, v in spk2utt.items():
f.write('{} {}\n'.format(k, ' '.join(v)))
if args.instruct is True:
with open('{}/instruct'.format(args.des_dir), 'w') as f:
for k, v in utt2text.items():
# NOTE in CosyVoice3, we add instruct in sequence
f.write('{} You are a helpful assistant.<|endofprompt|>\n'.format(k, v))
return
@@ -49,7 +54,9 @@ if __name__ == "__main__":
type=str)
parser.add_argument('--des_dir',
type=str)
parser.add_argument('--ref_model',
type=str)
parser.add_argument('--instruct',
action='store_true',
default=False,
help='create instruct file or not')
args = parser.parse_args()
main()

View File

@@ -20,7 +20,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo "Data preparation, prepare wav.scp/text/utt2spk/spk2utt"
for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
mkdir -p data/$x
python local/prepare_data.py --src_dir $data_dir/LibriTTS/$x --des_dir data/$x
python local/prepare_data.py --src_dir $data_dir/LibriTTS/$x --des_dir data/$x --instruct
done
fi
@@ -46,6 +46,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
mkdir -p data/$x/parquet
tools/make_parquet_list.py --num_utts_per_parquet 1000 \
--num_processes 10 \
--instruct \
--src_dir data/$x \
--des_dir data/$x/parquet
done