This commit is contained in:
lyuxiang.lx
2024-09-05 16:15:34 +08:00
parent eeebc45313
commit 90433f5373
35 changed files with 189 additions and 122 deletions

View File

@@ -96,7 +96,8 @@ if __name__ == "__main__":
default='../../../zero_shot_prompt.wav')
parser.add_argument('--instruct_text',
type=str,
default='Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
default='Theo \'Crimson\', is a fiery, passionate rebel leader. \
Fights with fervor for justice, but struggles with impulsiveness.')
parser.add_argument('--tts_wav',
type=str,
default='demo.wav')

View File

@@ -13,9 +13,6 @@
# limitations under the License.
import os
import sys
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append('{}/../../..'.format(ROOT_DIR))
sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
from concurrent import futures
import argparse
import cosyvoice_pb2
@@ -25,11 +22,15 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
import grpc
import torch
import numpy as np
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append('{}/../../..'.format(ROOT_DIR))
sys.path.append('{}/../../../third_party/Matcha-TTS'.format(ROOT_DIR))
from cosyvoice.cli.cosyvoice import CosyVoice
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s')
class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer):
def __init__(self, args):
self.cosyvoice = CosyVoice(args.model_dir)
@@ -43,7 +44,9 @@ class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer):
logging.info('get zero_shot inference request')
prompt_speech_16k = torch.from_numpy(np.array(np.frombuffer(request.zero_shot_request.prompt_audio, dtype=np.int16))).unsqueeze(dim=0)
prompt_speech_16k = prompt_speech_16k.float() / (2**15)
model_output = self.cosyvoice.inference_zero_shot(request.zero_shot_request.tts_text, request.zero_shot_request.prompt_text, prompt_speech_16k)
model_output = self.cosyvoice.inference_zero_shot(request.zero_shot_request.tts_text,
request.zero_shot_request.prompt_text,
prompt_speech_16k)
elif request.HasField('cross_lingual_request'):
logging.info('get cross_lingual inference request')
prompt_speech_16k = torch.from_numpy(np.array(np.frombuffer(request.cross_lingual_request.prompt_audio, dtype=np.int16))).unsqueeze(dim=0)
@@ -51,7 +54,9 @@ class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer):
model_output = self.cosyvoice.inference_cross_lingual(request.cross_lingual_request.tts_text, prompt_speech_16k)
else:
logging.info('get instruct inference request')
model_output = self.cosyvoice.inference_instruct(request.instruct_request.tts_text, request.instruct_request.spk_id, request.instruct_request.instruct_text)
model_output = self.cosyvoice.inference_instruct(request.instruct_request.tts_text,
request.instruct_request.spk_id,
request.instruct_request.instruct_text)
logging.info('send inference response')
for i in model_output:
@@ -59,6 +64,7 @@ class CosyVoiceServiceImpl(cosyvoice_pb2_grpc.CosyVoiceServicer):
response.tts_audio = (i['tts_speech'].numpy() * (2 ** 15)).astype(np.int16).tobytes()
yield response
def main():
grpcServer = grpc.server(futures.ThreadPoolExecutor(max_workers=args.max_conc), maximum_concurrent_rpcs=args.max_conc)
cosyvoice_pb2_grpc.add_CosyVoiceServicer_to_server(CosyVoiceServiceImpl(args), grpcServer)