From fff6f9f1e0cb44c14d9685f6d5ee2e1b88fd09e7 Mon Sep 17 00:00:00 2001 From: iflamed Date: Mon, 8 Jul 2024 18:51:06 +0800 Subject: [PATCH] add download models script and fastapi server to serve tts --- README.md | 20 ++++++++++++-------- download.py | 6 ++++++ main.py | 40 ++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 +++- 4 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 download.py create mode 100644 main.py diff --git a/README.md b/README.md index d341d97..0d9ca78 100644 --- a/README.md +++ b/README.md @@ -37,17 +37,13 @@ We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoi If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step. -``` python -# SDK模型下载 -from modelscope import snapshot_download -snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M') -snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT') -snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct') -snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd') +Download models with python script. +``` shell +python download.py ``` +Download models with git, you should install `git lfs` first. ``` sh -# git模型下载,请确保已安装git lfs mkdir -p pretrained_models git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT @@ -120,6 +116,14 @@ python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`. You can get familiar with CosyVoice following this recipie. +**Serve with FastAPI** +```sh +# For development +fastapi dev --port 3003 +# For production +fastapi run --port 3003 +``` + **Build for deployment** Optionally, if you want to use grpc for service deployment, diff --git a/download.py b/download.py new file mode 100644 index 0000000..5890ac1 --- /dev/null +++ b/download.py @@ -0,0 +1,6 @@ +# SDK模型下载 +from modelscope import snapshot_download +snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M') +snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT') +snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct') +snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd') diff --git a/main.py b/main.py new file mode 100644 index 0000000..d212dd3 --- /dev/null +++ b/main.py @@ -0,0 +1,40 @@ +import io,time +from fastapi import FastAPI, Response +from fastapi.responses import HTMLResponse +from cosyvoice.cli.cosyvoice import CosyVoice +import torchaudio + +cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT') +# sft usage +print(cosyvoice.list_avaliable_spks()) +app = FastAPI() + +@app.get("/api/voice/tts") +async def tts(query: str, role: str): + start = time.process_time() + output = cosyvoice.inference_sft(query, role) + end = time.process_time() + print("infer time:", end-start, "seconds") + buffer = io.BytesIO() + torchaudio.save(buffer, output['tts_speech'], 22050, format="wav") + buffer.seek(0) + return Response(content=buffer.read(-1), media_type="audio/wav") + +@app.get("/api/voice/roles") +async def roles(): + return {"roles": cosyvoice.list_avaliable_spks()} + +@app.get("/", response_class=HTMLResponse) +async def root(): + return """ + + + + + Api information + + + Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. Documents of API + + + """ diff --git a/requirements.txt b/requirements.txt index 39e1374..8129558 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,4 +25,6 @@ soundfile==0.12.1 tensorboard==2.14.0 torch==2.0.1 torchaudio==2.0.2 -wget==3.2 \ No newline at end of file +wget==3.2 +fastapi==0.111.0 +fastapi-cli==0.0.4 \ No newline at end of file