remove academic and change to iic/CosyVoice_ttsfrd

This commit is contained in:
lyuxiang.lx
2024-07-07 12:19:34 +08:00
parent 834053940d
commit 71238461f0
7 changed files with 22 additions and 13 deletions

3
.gitmodules vendored
View File

@@ -1,6 +1,3 @@
[submodule "third_party/AcademiCodec"]
path = third_party/AcademiCodec
url = https://github.com/yangdongchao/AcademiCodec.git
[submodule "third_party/Matcha-TTS"] [submodule "third_party/Matcha-TTS"]
path = third_party/Matcha-TTS path = third_party/Matcha-TTS
url = https://github.com/shivammehta25/Matcha-TTS.git url = https://github.com/shivammehta25/Matcha-TTS.git

View File

@@ -33,7 +33,7 @@ sudo yum install sox sox-devel
**Model download** **Model download**
We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `speech_kantts_ttsfrd` resource. We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `CosyVoice-ttsfrd` resource.
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step. If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
@@ -43,7 +43,7 @@ from modelscope import snapshot_download
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M') snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT') snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct') snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
snapshot_download('speech_tts/speech_kantts_ttsfrd', local_dir='pretrained_models/speech_kantts_ttsfrd') snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
``` ```
``` sh ``` sh
@@ -52,12 +52,12 @@ mkdir -p pretrained_models
git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
git clone https://www.modelscope.cn/speech_tts/speech_kantts_ttsfrd.git pretrained_models/speech_kantts_ttsfrd git clone https://www.modelscope.cn/iic/CosyVoice-ttsfrd.git pretrained_models/CosyVoice-ttsfrd
``` ```
Unzip `ttsfrd` resouce and install `ttsfrd` package Unzip `ttsfrd` resouce and install `ttsfrd` package
``` sh ``` sh
cd pretrained_models/speech_kantts_ttsfrd/ cd pretrained_models/CosyVoice-ttsfrd/
unzip resource.zip -d . unzip resource.zip -d .
pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
``` ```

View File

@@ -50,7 +50,7 @@ class CosyVoiceFrontEnd:
self.inflect_parser = inflect.engine() self.inflect_parser = inflect.engine()
self.frd = ttsfrd.TtsFrontendEngine() self.frd = ttsfrd.TtsFrontendEngine()
ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
assert self.frd.initialize('{}/../../pretrained_models/speech_kantts_ttsfrd/resource'.format(ROOT_DIR)) is True, 'failed to initialize ttsfrd resource' assert self.frd.initialize('{}/../../pretrained_models/CosyVoice-ttsfrd/resource'.format(ROOT_DIR)) is True, 'failed to initialize ttsfrd resource'
self.frd.set_lang_type('pinyin') self.frd.set_lang_type('pinyin')
self.frd.enable_pinyin_mix(True) self.frd.enable_pinyin_mix(True)
self.frd.set_breakmodel_index(1) self.frd.set_breakmodel_index(1)

View File

@@ -27,8 +27,8 @@ from torch.nn.utils import weight_norm
from torch.distributions.uniform import Uniform from torch.distributions.uniform import Uniform
from cosyvoice.transformer.activation import Snake from cosyvoice.transformer.activation import Snake
from academicodec.utils import get_padding from cosyvoice.utils.common import get_padding
from academicodec.utils import init_weights from cosyvoice.utils.common import init_weights
"""hifigan based generator implementation. """hifigan based generator implementation.

View File

@@ -91,3 +91,13 @@ def th_accuracy(pad_outputs: torch.Tensor, pad_targets: torch.Tensor,
pad_pred.masked_select(mask) == pad_targets.masked_select(mask)) pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
denominator = torch.sum(mask) denominator = torch.sum(mask)
return (numerator / denominator).detach() return (numerator / denominator).detach()
def get_padding(kernel_size, dilation=1):
return int((kernel_size * dilation - dilation) / 2)
def init_weights(m, mean=0.0, std=0.01):
classname = m.__class__.__name__
if classname.find("Conv") != -1:
m.weight.data.normal_(mean, std)

View File

@@ -5,8 +5,11 @@ WORKDIR /opt/CosyVoice
RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list
RUN apt-get update -y RUN apt-get update -y
RUN apt-get -y install python3-dev cmake python3-pip git RUN apt-get -y install python3-dev cmake python3-pip git unzip
RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
RUN cd CosyVoice && pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com RUN cd CosyVoice && pip3 install --default-timeout=3600 -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
RUN apt install git-lfs && git lfs install
RUN cd CosyVoice && git clone https://www.modelscope.cn/iic/CosyVoice-ttsfrd.git pretrained_models/CosyVoice-ttsfrd
RUN cd CosyVoice/pretrained_models/CosyVoice-ttsfrd && unzip resource.zip -d . && pip3 install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto
CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && . ./path/sh && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"] CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && . ./path/sh && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"]