add download models script and fastapi server to serve tts

This commit is contained in:
iflamed
2024-07-08 18:51:06 +08:00
parent 4e43a9d98b
commit fff6f9f1e0
4 changed files with 61 additions and 9 deletions

View File

@@ -37,17 +37,13 @@ We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoi
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step. If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
``` python Download models with python script.
# SDK模型下载 ``` shell
from modelscope import snapshot_download python download.py
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
``` ```
Download models with git, you should install `git lfs` first.
``` sh ``` sh
# git模型下载请确保已安装git lfs
mkdir -p pretrained_models mkdir -p pretrained_models
git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
@@ -120,6 +116,14 @@ python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`. For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
You can get familiar with CosyVoice following this recipie. You can get familiar with CosyVoice following this recipie.
**Serve with FastAPI**
```sh
# For development
fastapi dev --port 3003
# For production
fastapi run --port 3003
```
**Build for deployment** **Build for deployment**
Optionally, if you want to use grpc for service deployment, Optionally, if you want to use grpc for service deployment,

6
download.py Normal file
View File

@@ -0,0 +1,6 @@
# SDK模型下载
from modelscope import snapshot_download
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')

40
main.py Normal file
View File

@@ -0,0 +1,40 @@
import io,time
from fastapi import FastAPI, Response
from fastapi.responses import HTMLResponse
from cosyvoice.cli.cosyvoice import CosyVoice
import torchaudio
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
# sft usage
print(cosyvoice.list_avaliable_spks())
app = FastAPI()
@app.get("/api/voice/tts")
async def tts(query: str, role: str):
start = time.process_time()
output = cosyvoice.inference_sft(query, role)
end = time.process_time()
print("infer time:", end-start, "seconds")
buffer = io.BytesIO()
torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
buffer.seek(0)
return Response(content=buffer.read(-1), media_type="audio/wav")
@app.get("/api/voice/roles")
async def roles():
return {"roles": cosyvoice.list_avaliable_spks()}
@app.get("/", response_class=HTMLResponse)
async def root():
return """
<!DOCTYPE html>
<html lang=zh-cn>
<head>
<meta charset=utf-8>
<title>Api information</title>
</head>
<body>
Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
</body>
</html>
"""

View File

@@ -25,4 +25,6 @@ soundfile==0.12.1
tensorboard==2.14.0 tensorboard==2.14.0
torch==2.0.1 torch==2.0.1
torchaudio==2.0.2 torchaudio==2.0.2
wget==3.2 wget==3.2
fastapi==0.111.0
fastapi-cli==0.0.4