mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-05 09:59:23 +08:00
update
This commit is contained in:
@@ -23,7 +23,7 @@ import torch
|
||||
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append('{}/../..'.format(ROOT_DIR))
|
||||
sys.path.append('{}/../../third_party/Matcha-TTS'.format(ROOT_DIR))
|
||||
from cosyvoice.cli.cosyvoice import CosyVoice
|
||||
from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
|
||||
|
||||
|
||||
def get_args():
|
||||
@@ -37,6 +37,15 @@ def get_args():
|
||||
return args
|
||||
|
||||
|
||||
def get_optimized_script(model, preserved_attrs=[]):
|
||||
script = torch.jit.script(model)
|
||||
if preserved_attrs != []:
|
||||
script = torch.jit.freeze(script, preserved_attrs=preserved_attrs)
|
||||
else:
|
||||
script = torch.jit.freeze(script)
|
||||
script = torch.jit.optimize_for_inference(script)
|
||||
return script
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
logging.basicConfig(level=logging.DEBUG,
|
||||
@@ -46,28 +55,35 @@ def main():
|
||||
torch._C._jit_set_profiling_mode(False)
|
||||
torch._C._jit_set_profiling_executor(False)
|
||||
|
||||
cosyvoice = CosyVoice(args.model_dir, load_jit=False, load_onnx=False)
|
||||
try:
|
||||
model = CosyVoice(args.model_dir)
|
||||
except:
|
||||
try:
|
||||
model = CosyVoice2(args.model_dir)
|
||||
except:
|
||||
raise TypeError('no valid model_type!')
|
||||
|
||||
# 1. export llm text_encoder
|
||||
llm_text_encoder = cosyvoice.model.llm.text_encoder.half()
|
||||
script = torch.jit.script(llm_text_encoder)
|
||||
script = torch.jit.freeze(script)
|
||||
script = torch.jit.optimize_for_inference(script)
|
||||
script.save('{}/llm.text_encoder.fp16.zip'.format(args.model_dir))
|
||||
if not isinstance(model, CosyVoice2):
|
||||
# 1. export llm text_encoder
|
||||
llm_text_encoder = model.model.llm.text_encoder
|
||||
script = get_optimized_script(llm_text_encoder)
|
||||
script.save('{}/llm.text_encoder.fp32.zip'.format(args.model_dir))
|
||||
script = get_optimized_script(llm_text_encoder.half())
|
||||
script.save('{}/llm.text_encoder.fp16.zip'.format(args.model_dir))
|
||||
|
||||
# 2. export llm llm
|
||||
llm_llm = cosyvoice.model.llm.llm.half()
|
||||
script = torch.jit.script(llm_llm)
|
||||
script = torch.jit.freeze(script, preserved_attrs=['forward_chunk'])
|
||||
script = torch.jit.optimize_for_inference(script)
|
||||
script.save('{}/llm.llm.fp16.zip'.format(args.model_dir))
|
||||
# 2. export llm llm
|
||||
llm_llm = model.model.llm.llm
|
||||
script = get_optimized_script(llm_llm, ['forward_chunk'])
|
||||
script.save('{}/llm.llm.fp32.zip'.format(args.model_dir))
|
||||
script = get_optimized_script(llm_llm.half(), ['forward_chunk'])
|
||||
script.save('{}/llm.llm.fp16.zip'.format(args.model_dir))
|
||||
|
||||
# 3. export flow encoder
|
||||
flow_encoder = cosyvoice.model.flow.encoder
|
||||
script = torch.jit.script(flow_encoder)
|
||||
script = torch.jit.freeze(script)
|
||||
script = torch.jit.optimize_for_inference(script)
|
||||
flow_encoder = model.model.flow.encoder
|
||||
script = get_optimized_script(flow_encoder)
|
||||
script.save('{}/flow.encoder.fp32.zip'.format(args.model_dir))
|
||||
script = get_optimized_script(flow_encoder.half())
|
||||
script.save('{}/flow.encoder.fp16.zip'.format(args.model_dir))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -27,7 +27,7 @@ from tqdm import tqdm
|
||||
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append('{}/../..'.format(ROOT_DIR))
|
||||
sys.path.append('{}/../../third_party/Matcha-TTS'.format(ROOT_DIR))
|
||||
from cosyvoice.cli.cosyvoice import CosyVoice
|
||||
from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
|
||||
|
||||
|
||||
def get_dummy_input(batch_size, seq_len, out_channels, device):
|
||||
@@ -56,14 +56,20 @@ def main():
|
||||
logging.basicConfig(level=logging.DEBUG,
|
||||
format='%(asctime)s %(levelname)s %(message)s')
|
||||
|
||||
cosyvoice = CosyVoice(args.model_dir, load_jit=False, load_onnx=False)
|
||||
try:
|
||||
model = CosyVoice(args.model_dir)
|
||||
except:
|
||||
try:
|
||||
model = CosyVoice2(args.model_dir)
|
||||
except:
|
||||
raise TypeError('no valid model_type!')
|
||||
|
||||
# 1. export flow decoder estimator
|
||||
estimator = cosyvoice.model.flow.decoder.estimator
|
||||
estimator = model.model.flow.decoder.estimator
|
||||
|
||||
device = cosyvoice.model.device
|
||||
batch_size, seq_len = 1, 256
|
||||
out_channels = cosyvoice.model.flow.decoder.estimator.out_channels
|
||||
device = model.model.device
|
||||
batch_size, seq_len = 2, 256
|
||||
out_channels = model.model.flow.decoder.estimator.out_channels
|
||||
x, mask, mu, t, spks, cond = get_dummy_input(batch_size, seq_len, out_channels, device)
|
||||
torch.onnx.export(
|
||||
estimator,
|
||||
@@ -75,13 +81,11 @@ def main():
|
||||
input_names=['x', 'mask', 'mu', 't', 'spks', 'cond'],
|
||||
output_names=['estimator_out'],
|
||||
dynamic_axes={
|
||||
'x': {0: 'batch_size', 2: 'seq_len'},
|
||||
'mask': {0: 'batch_size', 2: 'seq_len'},
|
||||
'mu': {0: 'batch_size', 2: 'seq_len'},
|
||||
'cond': {0: 'batch_size', 2: 'seq_len'},
|
||||
't': {0: 'batch_size'},
|
||||
'spks': {0: 'batch_size'},
|
||||
'estimator_out': {0: 'batch_size', 2: 'seq_len'},
|
||||
'x': {2: 'seq_len'},
|
||||
'mask': {2: 'seq_len'},
|
||||
'mu': {2: 'seq_len'},
|
||||
'cond': {2: 'seq_len'},
|
||||
'estimator_out': {2: 'seq_len'},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -94,7 +98,7 @@ def main():
|
||||
sess_options=option, providers=providers)
|
||||
|
||||
for _ in tqdm(range(10)):
|
||||
x, mask, mu, t, spks, cond = get_dummy_input(random.randint(1, 6), random.randint(16, 512), out_channels, device)
|
||||
x, mask, mu, t, spks, cond = get_dummy_input(batch_size, random.randint(16, 512), out_channels, device)
|
||||
output_pytorch = estimator(x, mask, mu, t, spks, cond)
|
||||
ort_inputs = {
|
||||
'x': x.cpu().numpy(),
|
||||
|
||||
@@ -6,4 +6,5 @@ TRT_DIR=<YOUR_TRT_DIR>
|
||||
MODEL_DIR=<COSYVOICE2_MODEL_DIR>
|
||||
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TRT_DIR/lib:/usr/local/cuda/lib64
|
||||
$TRT_DIR/bin/trtexec --onnx=$MODEL_DIR/flow.decoder.estimator.fp32.onnx --saveEngine=$MODEL_DIR/flow.decoder.estimator.fp32.mygpu.plan --minShapes=x:2x80x4,mask:2x1x4,mu:2x80x4,cond:2x80x4 --optShapes=x:2x80x193,mask:2x1x193,mu:2x80x193,cond:2x80x193 --maxShapes=x:2x80x6800,mask:2x1x6800,mu:2x80x6800,cond:2x80x6800 --inputIOFormats=fp32:chw,fp32:chw,fp32:chw,fp32:chw,fp32:chw,fp32:chw --outputIOFormats=fp32:chw
|
||||
$TRT_DIR/bin/trtexec --onnx=$MODEL_DIR/flow.decoder.estimator.fp32.onnx --saveEngine=$MODEL_DIR/flow.decoder.estimator.fp16.mygpu.plan --fp16 --minShapes=x:2x80x4,mask:2x1x4,mu:2x80x4,cond:2x80x4 --optShapes=x:2x80x193,mask:2x1x193,mu:2x80x193,cond:2x80x193 --maxShapes=x:2x80x6800,mask:2x1x6800,mu:2x80x6800,cond:2x80x6800 --inputIOFormats=fp16:chw,fp16:chw,fp16:chw,fp16:chw,fp16:chw,fp16:chw --outputIOFormats=fp16:chw
|
||||
|
||||
Reference in New Issue
Block a user