mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-05 18:09:24 +08:00
add download models script and fastapi server to serve tts
This commit is contained in:
20
README.md
20
README.md
@@ -37,17 +37,13 @@ We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoi
|
|||||||
|
|
||||||
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
|
||||||
|
|
||||||
``` python
|
Download models with python script.
|
||||||
# SDK模型下载
|
``` shell
|
||||||
from modelscope import snapshot_download
|
python download.py
|
||||||
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
|
|
||||||
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
|
|
||||||
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
|
|
||||||
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Download models with git, you should install `git lfs` first.
|
||||||
``` sh
|
``` sh
|
||||||
# git模型下载,请确保已安装git lfs
|
|
||||||
mkdir -p pretrained_models
|
mkdir -p pretrained_models
|
||||||
git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
|
git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
|
||||||
git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
|
git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
|
||||||
@@ -120,6 +116,14 @@ python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
|
|||||||
For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
|
For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
|
||||||
You can get familiar with CosyVoice following this recipie.
|
You can get familiar with CosyVoice following this recipie.
|
||||||
|
|
||||||
|
**Serve with FastAPI**
|
||||||
|
```sh
|
||||||
|
# For development
|
||||||
|
fastapi dev --port 3003
|
||||||
|
# For production
|
||||||
|
fastapi run --port 3003
|
||||||
|
```
|
||||||
|
|
||||||
**Build for deployment**
|
**Build for deployment**
|
||||||
|
|
||||||
Optionally, if you want to use grpc for service deployment,
|
Optionally, if you want to use grpc for service deployment,
|
||||||
|
|||||||
6
download.py
Normal file
6
download.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# SDK模型下载
|
||||||
|
from modelscope import snapshot_download
|
||||||
|
snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
|
||||||
|
snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
|
||||||
|
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
|
||||||
|
snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
|
||||||
40
main.py
Normal file
40
main.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import io,time
|
||||||
|
from fastapi import FastAPI, Response
|
||||||
|
from fastapi.responses import HTMLResponse
|
||||||
|
from cosyvoice.cli.cosyvoice import CosyVoice
|
||||||
|
import torchaudio
|
||||||
|
|
||||||
|
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
|
||||||
|
# sft usage
|
||||||
|
print(cosyvoice.list_avaliable_spks())
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
@app.get("/api/voice/tts")
|
||||||
|
async def tts(query: str, role: str):
|
||||||
|
start = time.process_time()
|
||||||
|
output = cosyvoice.inference_sft(query, role)
|
||||||
|
end = time.process_time()
|
||||||
|
print("infer time:", end-start, "seconds")
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
|
||||||
|
buffer.seek(0)
|
||||||
|
return Response(content=buffer.read(-1), media_type="audio/wav")
|
||||||
|
|
||||||
|
@app.get("/api/voice/roles")
|
||||||
|
async def roles():
|
||||||
|
return {"roles": cosyvoice.list_avaliable_spks()}
|
||||||
|
|
||||||
|
@app.get("/", response_class=HTMLResponse)
|
||||||
|
async def root():
|
||||||
|
return """
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang=zh-cn>
|
||||||
|
<head>
|
||||||
|
<meta charset=utf-8>
|
||||||
|
<title>Api information</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
@@ -26,3 +26,5 @@ tensorboard==2.14.0
|
|||||||
torch==2.0.1
|
torch==2.0.1
|
||||||
torchaudio==2.0.2
|
torchaudio==2.0.2
|
||||||
wget==3.2
|
wget==3.2
|
||||||
|
fastapi==0.111.0
|
||||||
|
fastapi-cli==0.0.4
|
||||||
Reference in New Issue
Block a user