add download models script and fastapi server to serve tts

2026-02-05 01:49:25 +08:00 · 2024-07-08 18:51:06 +08:00
parent 4e43a9d98b
commit fff6f9f1e0
4 changed files with 61 additions and 9 deletions
--- a/main.py
+++ b/main.py
@@ -0,0 +1,40 @@
+import io,time
+from fastapi import FastAPI, Response
+from fastapi.responses import HTMLResponse
+from cosyvoice.cli.cosyvoice import CosyVoice
+import torchaudio
+
+cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
+# sft usage
+print(cosyvoice.list_avaliable_spks())
+app = FastAPI()
+
+@app.get("/api/voice/tts")
+async def tts(query: str, role: str):
+    start = time.process_time()
+    output = cosyvoice.inference_sft(query, role)
+    end = time.process_time()
+    print("infer time:", end-start, "seconds")
+    buffer = io.BytesIO()
+    torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
+    buffer.seek(0)
+    return Response(content=buffer.read(-1), media_type="audio/wav")
+
+@app.get("/api/voice/roles")
+async def roles():
+    return {"roles": cosyvoice.list_avaliable_spks()}
+
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    return """
+    <!DOCTYPE html>
+    <html lang=zh-cn>
+        <head>
+            <meta charset=utf-8>
+            <title>Api information</title>
+        </head>
+        <body>
+            Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
+        </body>
+    </html>
+    """