add flow decoder cache

2026-02-04 09:29:25 +08:00 · 2025-01-23 16:48:13 +08:00
parent 190840b8dc
commit 1c062ab381
21 changed files with 1601 additions and 214 deletions
--- a/cosyvoice/utils/mask.py
+++ b/cosyvoice/utils/mask.py
@@ -87,7 +87,7 @@ def subsequent_mask(
    return mask


-def subsequent_chunk_mask_deprecated(
+def subsequent_chunk_mask(
        size: int,
        chunk_size: int,
        num_left_chunks: int = -1,
@@ -125,41 +125,6 @@ def subsequent_chunk_mask_deprecated(
    return ret


-def subsequent_chunk_mask(
-        size: int,
-        chunk_size: int,
-        num_left_chunks: int = -1,
-        device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-
-    Returns:
-        torch.Tensor: mask
-
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    # NOTE this modified implementation meets onnx export requirements, but it doesn't support num_left_chunks
-    # actually this is not needed after we have inference cache implemented, will remove it later
-    pos_idx = torch.arange(size, device=device)
-    block_value = (torch.div(pos_idx, chunk_size, rounding_mode='trunc') + 1) * chunk_size
-    ret = pos_idx.unsqueeze(0) < block_value.unsqueeze(1)
-    return ret
-
-
 def add_optional_chunk_mask(xs: torch.Tensor,
                            masks: torch.Tensor,
                            use_dynamic_chunk: bool,