File size: 6,796 Bytes
0c1e054 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
def set_sigma_for_DCLS(model, s):
for name, module in model.named_modules():
if module.__class__.__name__ == 'DelayConv':
if hasattr(module, 'sigma'):
module.sigma = s
print('Set sigma to ',s)
class DropoutNd(nn.Module):
def __init__(self, p: float = 0.5, tie=True, transposed=True):
"""
tie: tie dropout mask across sequence lengths (Dropout1d/2d/3d)
"""
super().__init__()
if p < 0 or p >= 1:
raise ValueError("dropout probability has to be in [0, 1), " "but got {}".format(p))
self.p = p
self.tie = tie
self.transposed = transposed
self.binomial = torch.distributions.binomial.Binomial(probs=1-self.p)
def forward(self, X):
"""X: (batch, dim, lengths...)."""
if self.training:
if not self.transposed: X = rearrange(X, 'b ... d -> b d ...')
# binomial = torch.distributions.binomial.Binomial(probs=1-self.p) # This is incredibly slow because of CPU -> GPU copying
mask_shape = X.shape[:2] + (1,) * (X.ndim - 2) if self.tie else X.shape
# mask = self.binomial.sample(mask_shape)
mask = torch.rand(*mask_shape, device=X.device) < 1. - self.p
X = X * mask * (1.0 / (1 - self.p))
if not self.transposed: X = rearrange(X, 'b d ... -> b ... d')
return X
return X
class DelayConv(nn.Module):
def __init__(
self,
in_c,
k,
dropout=0.0,
n_delay=1,
dilation=1,
kernel_type='triangle_r_temp'
):
super().__init__()
self.C = in_c # 输入和输出通道数
self.win_len = k
self.dilation = dilation
self.n_delay = n_delay
self.kernel_type = kernel_type
self.t = torch.arange(self.win_len).float().unsqueeze(0) # [1, k]
self.sigma = self.win_len // 2
self.delay_kernel = None
self.bump = None
# ========== 修改:d 形状 -> [C_out, C_in, n_delay] ==========
d = torch.rand(self.C, self.C, self.n_delay)
with torch.no_grad():
for co in range(self.C):
for ci in range(self.C):
d[co, ci, :] = torch.randperm(self.win_len - 2)[:self.n_delay] + 1
self.register("d", d, lr=1e-2)
# 初始化权重: [C_out, C_in, k]
weight = torch.ones([self.C, self.C, k])
with torch.no_grad():
for co in range(self.C): # output channel
for ci in range(self.C): # input channel
for i in range(k - 2, -1, -1):
weight[co, ci, i] = weight[co, ci, i + 1] / 2
self.weight = nn.Parameter(weight)
self.dropout = nn.Dropout(dropout / 5) if dropout > 0.0 else nn.Identity()
def register(self, name, tensor, lr=None):
"""注册可训练或固定参数"""
if lr == 0.0:
self.register_buffer(name, tensor)
else:
self.register_parameter(name, nn.Parameter(tensor))
optim = {"weight_decay": 0}
if lr is not None:
optim["lr"] = lr
setattr(getattr(self, name), "_optim", optim)
def update_kernel(self, device):
"""
输出 delay kernel: shape [C_out, C_in, k]
"""
t = self.t.to(device).view(1, 1, 1, -1) # [1,1,1,k]
d = self.d.to(device) # [C_out, C_in, n_delay]
# ---------- 计算 bump ----------
if self.kernel_type == 'gauss':
bump = torch.exp(-0.5 * ((t - self.win_len + d.unsqueeze(-1) + 1) / self.sigma) ** 2)
bump = (bump - 1e-3).relu() + 1e-3
bump = bump / (bump.sum(dim=-1, keepdim=True) + 1e-7)
elif self.kernel_type == 'triangle':
bump = torch.relu(1 - torch.abs((t - self.win_len + d.unsqueeze(-1) + 1) / self.sigma))
bump = bump / (bump.sum(dim=-1, keepdim=True).detach() + 1e-7)
elif self.kernel_type == 'triangle_r':
d_int = (d.round() - d).detach() + d
bump = torch.relu(1 - torch.abs((t - self.win_len + d_int.unsqueeze(-1) + 1) / self.sigma))
bump = bump / (bump.sum(dim=-1, keepdim=True).detach() + 1e-7)
elif self.kernel_type == 'triangle_r_temp':
scale = min(1.0, 1.0 / self.sigma)
d_int = (d.round() - d).detach() * scale + d
bump = torch.relu(1 - torch.abs((t - self.win_len + d_int.unsqueeze(-1) + 1) / self.sigma))
bump = bump / (bump.sum(dim=-1, keepdim=True).detach() + 1e-7) # [C_out, C_in, n_delay, k]
# ------ 在eval模式硬化bump ------
if not self.training:
max_idx = bump.argmax(dim=-1, keepdim=True) # 找最大值索引
hard_mask = torch.zeros_like(bump)
hard_mask.scatter_(-1, max_idx, 1.0)
bump = bump * hard_mask
# --------------------------------
else:
raise ValueError(f"Unknown kernel_type: {self.kernel_type}")
# bump: [C_out, C_in, n_delay, k]
self.bump = bump.detach().clone().to(device)
# ---------- 沿 n_delay 维度求和: [C_out, C_in, k] ----------
bump_sum = bump.sum(dim=2)
# ---------- 生成最终卷积核 ----------
# weight: [C_out, C_in, k]
self.delay_kernel = (self.weight * bump_sum).to(device) # [C_out, C_in, k]
def forward(self, x):
"""
x: (T, B, N, C)
return: (T*B, C, N)
"""
# 调整维度
x = x.permute(0, 1, 3, 2).contiguous() # (T, B, N, C)
T, B, N, C = x.shape
assert C == self.C, f"Input channel mismatch: {C} vs {self.C}"
x = x.permute(1, 2, 3, 0).contiguous() # (B, N, C, T)
# 合并 B*N 作为 batch
x_reshaped = x.view(B * N, C, T) # (B*N, C, T)
device = x.device
# 更新 kernel
self.update_kernel(device) # -> [C_out, C_in, k]
kernel = self.delay_kernel
# padding
pad_left = (self.win_len - 1) * self.dilation
x_padded = F.pad(x_reshaped, (pad_left, 0)) # (B*N, C, T+pad)
# 全通道卷积: groups=1 (跨通道交互)
y = F.conv1d(x_padded, kernel, stride=1, dilation=self.dilation, groups=1) # (B*N, C, T)
# 还原到原始形状
y = y.view(B, N, C, T).permute(3, 0, 2, 1).contiguous().view(-1, C, N) # (T*B, C, N)
return self.dropout(y) |