support online onnx to trt conversion

2026-02-05 18:09:24 +08:00 · 2025-01-07 17:20:06 +08:00
parent 5d12ced727
commit b6a1116d15
4 changed files with 146 additions and 26 deletions
--- a/cosyvoice/trt/estimator_trt.py
+++ b/cosyvoice/trt/estimator_trt.py
@@ -0,0 +1,141 @@
+import os
+import torch
+import tensorrt as trt
+import logging
+import threading
+
+
+_min_shape = [(2, 80, 4), (2, 1, 4), (2, 80, 4), (2,), (2, 80), (2, 80, 4)]
+
+_opt_shape = [(2, 80, 193), (2, 1, 193), (2, 80, 193), (2,), (2, 80), (2, 80, 193)]
+
+_max_shape = [(2, 80, 6800), (2, 1, 6800), (2, 80, 6800), (2,), (2, 80), (2, 80, 6800)]
+
+
+class EstimatorTRT:
+    def __init__(self, path_prefix: str, device: torch.device, fp16: bool = True):
+        self.lock = threading.Lock()
+        self.device = device
+        with torch.cuda.device(device):
+            self.input_names = ["x", "mask", "mu", "t", "spks", "cond"]
+            self.output_name = "estimator_out"
+
+            onnx_path = path_prefix + ".fp32.onnx"
+            precision = ".fp16" if fp16 else ".fp32"
+            trt_path = path_prefix + precision +".plan"
+
+            self.fp16 = fp16
+            self.logger = trt.Logger(trt.Logger.INFO)
+            self.trt_runtime = trt.Runtime(self.logger)
+
+            save_trt = not os.environ.get("NOT_SAVE_TRT", "0") == "1"
+
+            if os.path.exists(trt_path):
+                self.engine = self._load_trt(trt_path)
+            else:
+                self.engine = self._convert_onnx_to_trt(onnx_path, trt_path, save_trt)
+
+            self.context = self.engine.create_execution_context()
+
+    def _convert_onnx_to_trt(
+        self, onnx_path: str, trt_path: str, save_trt: bool = True
+    ):
+        logging.info("Converting onnx to trt...")
+
+        network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        builder = trt.Builder(self.logger)
+        network = builder.create_network(network_flags)
+        parser = trt.OnnxParser(network, self.logger)
+        config = builder.create_builder_config()
+
+        config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 33) # 8GB
+        if (self.fp16):
+            config.set_flag(trt.BuilderFlag.FP16)
+
+        profile = builder.create_optimization_profile()
+
+        # load onnx model
+        with open(onnx_path, "rb") as f:
+            if not parser.parse(f.read()):
+                for error in range(parser.num_errors):
+                    print(parser.get_error(error))
+                exit(1)
+
+        # set input shapes
+        for i in range(len(self.input_names)):
+            profile.set_shape(
+                self.input_names[i], _min_shape[i], _opt_shape[i], _max_shape[i]
+            )
+
+        tensor_dtype = trt.DataType.HALF if self.fp16 else trt.DataType.FLOAT
+
+        # set input and output data type
+        for i in range(network.num_inputs):
+            input_tensor = network.get_input(i)
+            input_tensor.dtype = tensor_dtype
+
+        for i in range(network.num_outputs):
+            output_tensor = network.get_output(i)
+            output_tensor.dtype = tensor_dtype
+
+        config.add_optimization_profile(profile)
+        engine_bytes = builder.build_serialized_network(network, config)
+
+        # save trt engine
+        if save_trt:
+            with open(trt_path, "wb") as f:
+                f.write(engine_bytes)
+            print("trt engine saved to {}".format(trt_path))
+
+        engine = self.trt_runtime.deserialize_cuda_engine(engine_bytes)
+        return engine
+
+    def _load_trt(self, trt_path: str):
+        logging.info("Found trt engine, loading...")
+
+        with open(trt_path, "rb") as f:
+            engine_bytes = f.read()
+        engine = self.trt_runtime.deserialize_cuda_engine(engine_bytes)
+        return engine
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        mask: torch.Tensor,
+        mu: torch.Tensor,
+        t: torch.Tensor,
+        spks: torch.Tensor,
+        cond: torch.Tensor,
+    ):
+        with self.lock:
+            with torch.cuda.device(self.device):
+                self.context.set_input_shape("x", (2, 80, x.size(2)))
+                self.context.set_input_shape("mask", (2, 1, x.size(2)))
+                self.context.set_input_shape("mu", (2, 80, x.size(2)))
+                self.context.set_input_shape("t", (2,))
+                self.context.set_input_shape("spks", (2, 80))
+                self.context.set_input_shape("cond", (2, 80, x.size(2)))
+                # run trt engine
+                self.context.execute_v2(
+                    [
+                        x.contiguous().data_ptr(),
+                        mask.contiguous().data_ptr(),
+                        mu.contiguous().data_ptr(),
+                        t.contiguous().data_ptr(),
+                        spks.contiguous().data_ptr(),
+                        cond.contiguous().data_ptr(),
+                        x.data_ptr(),
+                    ]
+                )
+                return x
+
+    def __call__(
+        self,
+        x: torch.Tensor,
+        mask: torch.Tensor,
+        mu: torch.Tensor,
+        t: torch.Tensor,
+        spks: torch.Tensor,
+        cond: torch.Tensor,
+    ):
+        return self.forward(x, mask, mu, t, spks, cond)