From fff6f9f1e0cb44c14d9685f6d5ee2e1b88fd09e7 Mon Sep 17 00:00:00 2001
From: iflamed <iflamed@gmail.com>
Date: Mon, 8 Jul 2024 18:51:06 +0800
Subject: [PATCH] add download models script and fastapi server to serve tts

---
 README.md        | 20 ++++++++++++--------
 download.py      |  6 ++++++
 main.py          | 40 ++++++++++++++++++++++++++++++++++++++++
 requirements.txt |  4 +++-
 4 files changed, 61 insertions(+), 9 deletions(-)
 create mode 100644 download.py
 create mode 100644 main.py
diff --git a/README.md b/README.md
index d341d97..0d9ca78 100644
--- a/README.md
+++ b/README.md
@@ -37,17 +37,13 @@ We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoi
 
 If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
 
-``` python
-# SDK模型下载
-from modelscope import snapshot_download
-snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
-snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
-snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
-snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
+Download models with python script.
+``` shell
+python download.py
 ```
 
+Download models with git, you should install `git lfs` first.
 ``` sh
-# git模型下载，请确保已安装git lfs
 mkdir -p pretrained_models
 git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
 git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
@@ -120,6 +116,14 @@ python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
 For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
 You can get familiar with CosyVoice following this recipie.
 
+**Serve with FastAPI**
+```sh
+# For development
+fastapi dev --port 3003
+# For production
+fastapi run --port 3003
+```
+
 **Build for deployment**
 
 Optionally, if you want to use grpc for service deployment,
diff --git a/download.py b/download.py
new file mode 100644
index 0000000..5890ac1
--- /dev/null
+++ b/download.py
@@ -0,0 +1,6 @@
+# SDK模型下载
+from modelscope import snapshot_download
+snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
+snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
+snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
+snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..d212dd3
--- /dev/null
+++ b/main.py
@@ -0,0 +1,40 @@
+import io,time
+from fastapi import FastAPI, Response
+from fastapi.responses import HTMLResponse
+from cosyvoice.cli.cosyvoice import CosyVoice
+import torchaudio
+
+cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
+# sft usage
+print(cosyvoice.list_avaliable_spks())
+app = FastAPI()
+
+@app.get("/api/voice/tts")
+async def tts(query: str, role: str):
+    start = time.process_time()
+    output = cosyvoice.inference_sft(query, role)
+    end = time.process_time()
+    print("infer time:", end-start, "seconds")
+    buffer = io.BytesIO()
+    torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
+    buffer.seek(0)
+    return Response(content=buffer.read(-1), media_type="audio/wav")
+
+@app.get("/api/voice/roles")
+async def roles():
+    return {"roles": cosyvoice.list_avaliable_spks()}
+
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    return """
+    <!DOCTYPE html>
+    <html lang=zh-cn>
+        <head>
+            <meta charset=utf-8>
+            <title>Api information</title>
+        </head>
+        <body>
+            Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
+        </body>
+    </html>
+    """
diff --git a/requirements.txt b/requirements.txt
index 39e1374..8129558 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,4 +25,6 @@ soundfile==0.12.1
 tensorboard==2.14.0
 torch==2.0.1
 torchaudio==2.0.2
-wget==3.2
\ No newline at end of file
+wget==3.2
+fastapi==0.111.0
+fastapi-cli==0.0.4
\ No newline at end of file