File size: 6,796 Bytes
0c1e054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange

def set_sigma_for_DCLS(model, s):
    for name, module in model.named_modules():
        if module.__class__.__name__ == 'DelayConv':
            if hasattr(module, 'sigma'):
                module.sigma = s
    print('Set sigma to ',s)

class DropoutNd(nn.Module):
    def __init__(self, p: float = 0.5, tie=True, transposed=True):
        """

        tie: tie dropout mask across sequence lengths (Dropout1d/2d/3d)

        """
        super().__init__()
        if p < 0 or p >= 1:
            raise ValueError("dropout probability has to be in [0, 1), " "but got {}".format(p))
        self.p = p
        self.tie = tie
        self.transposed = transposed
        self.binomial = torch.distributions.binomial.Binomial(probs=1-self.p)

    def forward(self, X):
        """X: (batch, dim, lengths...)."""
        if self.training:
            if not self.transposed: X = rearrange(X, 'b ... d -> b d ...')
            # binomial = torch.distributions.binomial.Binomial(probs=1-self.p) # This is incredibly slow because of CPU -> GPU copying
            mask_shape = X.shape[:2] + (1,) * (X.ndim - 2) if self.tie else X.shape
            # mask = self.binomial.sample(mask_shape)
            mask = torch.rand(*mask_shape, device=X.device) < 1. - self.p
            X = X * mask * (1.0 / (1 - self.p))
            if not self.transposed: X = rearrange(X, 'b d ... -> b ... d')
            return X
        return X

class DelayConv(nn.Module):
    def __init__(

        self,

        in_c,

        k,

        dropout=0.0,

        n_delay=1,

        dilation=1,

        kernel_type='triangle_r_temp'

    ):
        super().__init__()
        self.C = in_c  # 输入和输出通道数
        self.win_len = k
        self.dilation = dilation
        self.n_delay = n_delay
        self.kernel_type = kernel_type

        self.t = torch.arange(self.win_len).float().unsqueeze(0)  # [1, k]
        self.sigma = self.win_len // 2

        self.delay_kernel = None
        self.bump = None

        # ========== 修改:d 形状 -> [C_out, C_in, n_delay] ==========
        d = torch.rand(self.C, self.C, self.n_delay)
        with torch.no_grad():
            for co in range(self.C):
                for ci in range(self.C):
                    d[co, ci, :] = torch.randperm(self.win_len - 2)[:self.n_delay] + 1
        self.register("d", d, lr=1e-2)

        # 初始化权重: [C_out, C_in, k]
        weight = torch.ones([self.C, self.C, k])
        with torch.no_grad():
            for co in range(self.C):            # output channel
                for ci in range(self.C):        # input channel
                    for i in range(k - 2, -1, -1):
                        weight[co, ci, i] = weight[co, ci, i + 1] / 2

        self.weight = nn.Parameter(weight)

        self.dropout = nn.Dropout(dropout / 5) if dropout > 0.0 else nn.Identity()

    def register(self, name, tensor, lr=None):
        """注册可训练或固定参数"""
        if lr == 0.0:
            self.register_buffer(name, tensor)
        else:
            self.register_parameter(name, nn.Parameter(tensor))
            optim = {"weight_decay": 0}
            if lr is not None:
                optim["lr"] = lr
            setattr(getattr(self, name), "_optim", optim)

    def update_kernel(self, device):
        """

        输出 delay kernel: shape [C_out, C_in, k]

        """
        t = self.t.to(device).view(1, 1, 1, -1)  # [1,1,1,k]
        d = self.d.to(device)                    # [C_out, C_in, n_delay]

        # ---------- 计算 bump ----------
        if self.kernel_type == 'gauss':
            bump = torch.exp(-0.5 * ((t - self.win_len + d.unsqueeze(-1) + 1) / self.sigma) ** 2)
            bump = (bump - 1e-3).relu() + 1e-3
            bump = bump / (bump.sum(dim=-1, keepdim=True) + 1e-7)

        elif self.kernel_type == 'triangle':
            bump = torch.relu(1 - torch.abs((t - self.win_len + d.unsqueeze(-1) + 1) / self.sigma))
            bump = bump / (bump.sum(dim=-1, keepdim=True).detach() + 1e-7)

        elif self.kernel_type == 'triangle_r':
            d_int = (d.round() - d).detach() + d
            bump = torch.relu(1 - torch.abs((t - self.win_len + d_int.unsqueeze(-1) + 1) / self.sigma))
            bump = bump / (bump.sum(dim=-1, keepdim=True).detach() + 1e-7)

        elif self.kernel_type == 'triangle_r_temp':
            scale = min(1.0, 1.0 / self.sigma)
            d_int = (d.round() - d).detach() * scale + d
            bump = torch.relu(1 - torch.abs((t - self.win_len + d_int.unsqueeze(-1) + 1) / self.sigma))
            bump = bump / (bump.sum(dim=-1, keepdim=True).detach() + 1e-7)  # [C_out, C_in, n_delay, k]
            # ------ 在eval模式硬化bump ------
            if not self.training:
                max_idx = bump.argmax(dim=-1, keepdim=True)  # 找最大值索引
                hard_mask = torch.zeros_like(bump)
                hard_mask.scatter_(-1, max_idx, 1.0)
                bump = bump * hard_mask
            # --------------------------------
        else:
            raise ValueError(f"Unknown kernel_type: {self.kernel_type}")

        # bump: [C_out, C_in, n_delay, k]
        self.bump = bump.detach().clone().to(device)

        # ---------- 沿 n_delay 维度求和: [C_out, C_in, k] ----------
        bump_sum = bump.sum(dim=2)

        # ---------- 生成最终卷积核 ----------
        # weight: [C_out, C_in, k]
        self.delay_kernel = (self.weight * bump_sum).to(device)  # [C_out, C_in, k]

    def forward(self, x):
        """

        x: (T, B, N, C)

        return: (T*B, C, N)

        """
        # 调整维度
        x = x.permute(0, 1, 3, 2).contiguous()  # (T, B, N, C)
        T, B, N, C = x.shape
        assert C == self.C, f"Input channel mismatch: {C} vs {self.C}"
        x = x.permute(1, 2, 3, 0).contiguous()  # (B, N, C, T)

        # 合并 B*N 作为 batch
        x_reshaped = x.view(B * N, C, T)  # (B*N, C, T)
        device = x.device

        # 更新 kernel
        self.update_kernel(device)  # -> [C_out, C_in, k]
        kernel = self.delay_kernel

        # padding
        pad_left = (self.win_len - 1) * self.dilation
        x_padded = F.pad(x_reshaped, (pad_left, 0))  # (B*N, C, T+pad)

        # 全通道卷积: groups=1 (跨通道交互)
        y = F.conv1d(x_padded, kernel, stride=1, dilation=self.dilation, groups=1)  # (B*N, C, T)

        # 还原到原始形状
        y = y.view(B, N, C, T).permute(3, 0, 2, 1).contiguous().view(-1, C, N)  # (T*B, C, N)

        return self.dropout(y)