Moving from diffusers to incode

2026-02-04 17:59:19 +08:00 · 2023-09-16 19:57:12 +00:00
parent f37918d9d2
commit 88bc7d05eb
4 changed files with 365 additions and 6 deletions
--- a/matcha/models/components/decoder.py
+++ b/matcha/models/components/decoder.py
@@ -1,13 +1,15 @@
 import math
+from typing import Optional

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from conformer import ConformerBlock
-from diffusers.models.attention import BasicTransformerBlock
-from diffusers.models.embeddings import TimestepEmbedding
+from diffusers.models.activations import get_activation
 from einops import pack, rearrange, repeat

+from matcha.models.components.transformer import BasicTransformerBlock
+

 class SinusoidalPosEmb(torch.nn.Module):
    def __init__(self, dim):
@@ -67,6 +69,51 @@ class Downsample1D(nn.Module):
    def forward(self, x):
        return self.conv(x)

+class TimestepEmbedding(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        time_embed_dim: int,
+        act_fn: str = "silu",
+        out_dim: int = None,
+        post_act_fn: Optional[str] = None,
+        cond_proj_dim=None,
+    ):
+        super().__init__()
+
+        self.linear_1 = nn.Linear(in_channels, time_embed_dim)
+
+        if cond_proj_dim is not None:
+            self.cond_proj = nn.Linear(cond_proj_dim, in_channels, bias=False)
+        else:
+            self.cond_proj = None
+
+        self.act = get_activation(act_fn)
+
+        if out_dim is not None:
+            time_embed_dim_out = out_dim
+        else:
+            time_embed_dim_out = time_embed_dim
+        self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim_out)
+
+        if post_act_fn is None:
+            self.post_act = None
+        else:
+            self.post_act = get_activation(post_act_fn)
+
+    def forward(self, sample, condition=None):
+        if condition is not None:
+            sample = sample + self.cond_proj(condition)
+        sample = self.linear_1(sample)
+
+        if self.act is not None:
+            sample = self.act(sample)
+
+        sample = self.linear_2(sample)
+
+        if self.post_act is not None:
+            sample = self.post_act(sample)
+        return sample

 class Upsample1D(nn.Module):
    """A 1D upsampling layer with an optional convolution.