Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

classification/config.json +30 -0
classification/configuration_neuroclr.py +59 -0
classification/export_classification_to_hf.py +109 -0
classification/model.safetensors +3 -0
classification/modeling_neuroclr.py +301 -0
pretraining/config.json +19 -0
pretraining/configuration_neuroclr.py +31 -0
pretraining/export_pretraining_to_hf.py +61 -0
pretraining/model.safetensors +3 -0
pretraining/modeling_neuroclr.py +79 -0
upload_to_hf.py +12 -0

classification/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "TSlength": 128,
+  "architectures": [
+    "NeuroCLRForSequenceClassification"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_neuroclr.NeuroCLRConfig",
+    "AutoModelForSequenceClassification": "modeling_neuroclr.NeuroCLRForSequenceClassification"
+  },
+  "base_filters": 256,
+  "downsample_gap": 6,
+  "freeze_encoder": true,
+  "groups": 32,
+  "increasefilter_gap": 12,
+  "kernel_size": 16,
+  "model_type": "neuroclr",
+  "n_block": 48,
+  "n_rois": 200,
+  "nhead": 2,
+  "nlayer": 2,
+  "normalize_input": true,
+  "pooling": "flatten",
+  "projector_out1": 128,
+  "projector_out2": 64,
+  "stride": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "use_bn": true,
+  "use_do": true
+}

classification/configuration_neuroclr.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# classification/configuration_neuroclr.py
+from transformers import PretrainedConfig
+class NeuroCLRConfig(PretrainedConfig):
+    model_type = "neuroclr"
+    def __init__(
+        self,
+        # Encoder / SSL
+        TSlength: int = 128,
+        nhead: int = 4,
+        nlayer: int = 4,
+        projector_out1: int = 256,
+        projector_out2: int = 128,
+        pooling: str = "flatten",      # input is [B,1,128]
+        normalize_input: bool = True,
+        # Classification
+        n_rois: int = 200,
+        num_labels: int = 2,
+        # ResNet1D head hyperparams
+        base_filters: int = 256,
+        kernel_size: int = 16,
+        stride: int = 2,
+        groups: int = 32,
+        n_block: int = 48,
+        downsample_gap: int = 6,
+        increasefilter_gap: int = 12,
+        use_bn: bool = True,
+        use_do: bool = True,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        # Encoder
+        self.TSlength = TSlength
+        self.nhead = nhead
+        self.nlayer = nlayer
+        self.projector_out1 = projector_out1
+        self.projector_out2 = projector_out2
+        self.pooling = pooling
+        self.normalize_input = normalize_input
+        # Classification
+        self.n_rois = n_rois
+        self.num_labels = num_labels
+        # ResNet1D head
+        self.base_filters = base_filters
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.groups = groups
+        self.n_block = n_block
+        self.downsample_gap = downsample_gap
+        self.increasefilter_gap = increasefilter_gap
+        self.use_bn = use_bn
+        self.use_do = use_do

classification/export_classification_to_hf.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import torch
+from configuration_neuroclr import NeuroCLRConfig
+from modeling_neuroclr import NeuroCLRForSequenceClassification
+# -------- EDIT THESE PATHS + nhead if needed ----------
+PRETRAIN_CKPT = ""
+HEAD_CKPT     = ""
+OUT_DIR       = "."
+CFG = dict(
+    # encoder MUST match the pretrained export
+    TSlength=128,
+    nhead=2,        # change if needed
+    nlayer=2,       # we confirmed this from your pretraining ckpt
+    projector_out1=128,
+    projector_out2=64,
+    pooling="flatten",
+    normalize_input=True,
+    # classification
+    n_rois=200,
+    num_labels=2,
+    freeze_encoder=True,  # encoder frozen by default
+    # ResNet1D head (your exact settings)
+    base_filters=256,
+    kernel_size=16,
+    stride=2,
+    groups=32,
+    n_block=48,
+    downsample_gap=6,
+    increasefilter_gap=12,
+    use_bn=True,
+    use_do=True,
+)
+# -----------------------------------------------------
+def load_model_state_dict(path):
+    ckpt = torch.load(path, map_location="cpu")
+    if isinstance(ckpt, dict):
+        if "model_state_dict" in ckpt:
+            return ckpt["model_state_dict"]
+        if "state_dict" in ckpt:
+            return ckpt["state_dict"]
+        return ckpt
+    return ckpt
+def remap_encoder(sd):
+    # pretraining ckpt keys: transformer_encoder.* and projector.*
+    new = {}
+    for k, v in sd.items():
+        k2 = k.replace("module.", "")
+        if k2.startswith("transformer_encoder.") or k2.startswith("projector."):
+            new["encoder." + k2] = v
+    return new
+def remap_head(sd):
+    # head ckpt keys likely start with first_block_conv.*, basicblock_list.*, dense.* etc.
+    new = {}
+    for k, v in sd.items():
+        k2 = k.replace("module.", "")
+        head_prefixes = (
+            "first_block_conv.", "first_block_bn.", "first_block_relu.",
+            "basicblock_list.", "final_bn.", "final_relu.", "dense."
+        )
+        if k2.startswith(head_prefixes):
+            new["head." + k2] = v
+        # If your checkpoint already has head.* then keep it
+        elif k2.startswith("head."):
+            new[k2] = v
+    return new
+def main():
+    config = NeuroCLRConfig(**CFG)
+    # Enables HF auto-classes loading from this folder
+    config.auto_map = {
+        "AutoConfig": "configuration_neuroclr.NeuroCLRConfig",
+        "AutoModelForSequenceClassification": "modeling_neuroclr.NeuroCLRForSequenceClassification",
+    }
+    model = NeuroCLRForSequenceClassification(config)
+    # 1) Load encoder weights from pretraining ckpt
+    enc_sd_raw = load_model_state_dict(PRETRAIN_CKPT)
+    enc_sd = remap_encoder(enc_sd_raw)
+    # 2) Load head weights from classification ckpt
+    head_sd_raw = load_model_state_dict(HEAD_CKPT)
+    head_sd = remap_head(head_sd_raw)
+    # 3) Merge and load
+    merged = {}
+    merged.update(enc_sd)
+    merged.update(head_sd)
+    missing, unexpected = model.load_state_dict(merged, strict=False)
+    print("Missing:", missing)
+    print("Unexpected:", unexpected)
+    # Save to HF folder
+    model.save_pretrained(OUT_DIR, safe_serialization=True)
+    print("Saved HF classification model to:", OUT_DIR)
+if __name__ == "__main__":
+    main()

classification/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75a30b12cd8b5d195b93b305693b83543dcf8b758d5a0fe5aec8e5e968c777fe
+size 268265544

classification/modeling_neuroclr.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import TransformerEncoder, TransformerEncoderLayer
+from transformers import PreTrainedModel
+from configuration_neuroclr import NeuroCLRConfig
+# --------------------------
+# SSL Encoder (per-ROI)
+# --------------------------
+class NeuroCLR(nn.Module):
+    def __init__(self, config: NeuroCLRConfig):
+        super().__init__()
+        encoder_layer = TransformerEncoderLayer(
+            d_model=config.TSlength,
+            dim_feedforward=2 * config.TSlength,
+            nhead=config.nhead,
+            batch_first=True,
+        )
+        self.transformer_encoder = TransformerEncoder(encoder_layer, config.nlayer)
+        self.projector = nn.Sequential(
+            nn.Linear(config.TSlength, config.projector_out1),
+            nn.BatchNorm1d(config.projector_out1),
+            nn.ReLU(),
+            nn.Linear(config.projector_out1, config.projector_out2),
+        )
+        self.normalize_input = config.normalize_input
+        self.pooling = config.pooling
+        self.TSlength = config.TSlength
+    def forward(self, x):
+        # x: [B, 1, 128]
+        if self.normalize_input:
+            x = F.normalize(x, dim=-1)
+        x = self.transformer_encoder(x)  # [B, 1, 128]
+        if self.pooling == "flatten":
+            h = x.reshape(x.shape[0], -1)  # [B, 128]
+        elif self.pooling == "mean":
+            h = x.mean(dim=1)
+        elif self.pooling == "last":
+            h = x[:, -1, :]
+        else:
+            raise ValueError(f"Unknown pooling='{self.pooling}'")
+        if h.shape[1] != self.TSlength:
+            raise ValueError(f"h dim {h.shape[1]} != TSlength {self.TSlength}")
+        z = self.projector(h)
+        return h, z
+# --------------------------
+# Your ResNet1D head (verbatim)
+# --------------------------
+class MyConv1dPadSame(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1):
+        super().__init__()
+        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride, groups=groups)
+        self.kernel_size = kernel_size
+        self.stride = stride
+    def forward(self, x):
+        in_dim = x.shape[-1]
+        out_dim = (in_dim + self.stride - 1) // self.stride
+        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
+        pad_left = p // 2
+        pad_right = p - pad_left
+        x = F.pad(x, (pad_left, pad_right), "constant", 0)
+        return self.conv(x)
+class MyMaxPool1dPadSame(nn.Module):
+    def __init__(self, kernel_size):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.stride = 1
+        self.max_pool = nn.MaxPool1d(kernel_size=kernel_size)
+    def forward(self, x):
+        in_dim = x.shape[-1]
+        out_dim = (in_dim + self.stride - 1) // self.stride
+        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
+        pad_left = p // 2
+        pad_right = p - pad_left
+        x = F.pad(x, (pad_left, pad_right), "constant", 0)
+        return self.max_pool(x)
+class BasicBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, groups, downsample, use_bn, use_do, is_first_block=False):
+        super().__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.downsample = downsample
+        self.use_bn = use_bn
+        self.use_do = use_do
+        self.is_first_block = is_first_block
+        conv_stride = stride if downsample else 1
+        self.bn1 = nn.BatchNorm1d(in_channels)
+        self.relu1 = nn.ReLU()
+        self.do1 = nn.Dropout(p=0.75)
+        self.conv1 = MyConv1dPadSame(in_channels, out_channels, kernel_size, stride=conv_stride, groups=groups)
+        self.bn2 = nn.BatchNorm1d(out_channels)
+        self.relu2 = nn.ReLU()
+        self.do2 = nn.Dropout(p=0.75)
+        self.conv2 = MyConv1dPadSame(out_channels, out_channels, kernel_size, stride=1, groups=groups)
+        self.max_pool = MyMaxPool1dPadSame(kernel_size=conv_stride)
+    def forward(self, x):
+        identity = x
+        out = x
+        if not self.is_first_block:
+            if self.use_bn:
+                out = self.bn1(out)
+            out = self.relu1(out)
+            if self.use_do:
+                out = self.do1(out)
+        out = self.conv1(out)
+        if self.use_bn:
+            out = self.bn2(out)
+        out = self.relu2(out)
+        if self.use_do:
+            out = self.do2(out)
+        out = self.conv2(out)
+        if self.downsample:
+            identity = self.max_pool(identity)
+        if self.out_channels != self.in_channels:
+            identity = identity.transpose(-1, -2)
+            ch1 = (self.out_channels - self.in_channels) // 2
+            ch2 = self.out_channels - self.in_channels - ch1
+            identity = F.pad(identity, (ch1, ch2), "constant", 0)
+            identity = identity.transpose(-1, -2)
+        out += identity
+        return out
+class ResNet1D(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        base_filters,
+        kernel_size,
+        stride,
+        groups,
+        n_block,
+        n_classes,
+        downsample_gap=2,
+        increasefilter_gap=4,
+        use_bn=True,
+        use_do=True,
+        verbose=False
+    ):
+        super().__init__()
+        self.verbose = verbose
+        self.n_block = n_block
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.groups = groups
+        self.use_bn = use_bn
+        self.use_do = use_do
+        self.downsample_gap = downsample_gap
+        self.increasefilter_gap = increasefilter_gap
+        self.first_block_conv = MyConv1dPadSame(in_channels, base_filters, kernel_size=self.kernel_size, stride=1)
+        self.first_block_bn = nn.BatchNorm1d(base_filters)
+        self.first_block_relu = nn.ReLU()
+        out_channels = base_filters
+        self.basicblock_list = nn.ModuleList()
+        for i_block in range(self.n_block):
+            is_first_block = (i_block == 0)
+            downsample = (i_block % self.downsample_gap == 1)
+            if is_first_block:
+                in_ch = base_filters
+                out_ch = in_ch
+            else:
+                in_ch = int(base_filters * 2 ** ((i_block - 1) // self.increasefilter_gap))
+                if (i_block % self.increasefilter_gap == 0) and (i_block != 0):
+                    out_ch = in_ch * 2
+                else:
+                    out_ch = in_ch
+            block = BasicBlock(
+                in_channels=in_ch,
+                out_channels=out_ch,
+                kernel_size=self.kernel_size,
+                stride=self.stride,
+                groups=self.groups,
+                downsample=downsample,
+                use_bn=self.use_bn,
+                use_do=self.use_do,
+                is_first_block=is_first_block,
+            )
+            self.basicblock_list.append(block)
+            out_channels = out_ch
+        self.final_bn = nn.BatchNorm1d(out_channels)
+        self.final_relu = nn.ReLU(inplace=True)
+        self.dense = nn.Linear(out_channels, n_classes)
+    def forward(self, x):
+        out = self.first_block_conv(x)
+        if self.use_bn:
+            out = self.first_block_bn(out)
+        out = self.first_block_relu(out)
+        for block in self.basicblock_list:
+            out = block(out)
+        if self.use_bn:
+            out = self.final_bn(out)
+        out = self.final_relu(out)
+        out = out.mean(-1)
+        out = self.dense(out)
+        return out
+# --------------------------
+# HF model: encoder + ResNet1D head
+# --------------------------
+class NeuroCLRForSequenceClassification(PreTrainedModel):
+    """
+    Expected input x: [B, 200, 128]
+    - runs encoder per ROI: [B,1,128] -> h_r [B,128]
+    - stacks into H: [B,200,128]
+    - feeds ResNet1D: [B,200,128] -> logits
+    """
+    config_class = NeuroCLRConfig
+    base_model_prefix = "neuroclr"
+    def __init__(self, config: NeuroCLRConfig):
+        super().__init__(config)
+        self.encoder = NeuroCLR(config)
+        # Freeze the encoder
+        for p in self.encoder.parameters():
+            p.requires_grad = False
+        self.head = ResNet1D(
+            in_channels=config.n_rois,
+            base_filters=config.base_filters,
+            kernel_size=config.kernel_size,
+            stride=config.stride,
+            groups=config.groups,
+            n_block=config.n_block,
+            n_classes=config.num_labels,
+            downsample_gap=config.downsample_gap,
+            increasefilter_gap=config.increasefilter_gap,
+            use_bn=config.use_bn,
+            use_do=config.use_do,
+        )
+        self.post_init()
+    def forward(self, x: torch.Tensor, labels: torch.Tensor = None, **kwargs):
+        # x: [B, 200, 128]
+        if x.ndim != 3 or x.shape[1] != self.config.n_rois or x.shape[2] != self.config.TSlength:
+            raise ValueError(
+                f"Expected x shape [B,{self.config.n_rois},{self.config.TSlength}] but got {tuple(x.shape)}"
+            )
+        B, R, L = x.shape
+        # Encode each ROI independently (ROI-wise SSL)
+        hs = []
+        for r in range(R):
+            xr = x[:, r, :].unsqueeze(1)  # [B,1,128]
+            with torch.no_grad():
+                h, _ = self.encoder(xr)
+            # h, _ = self.encoder(xr)  # h: [B,128]
+            hs.append(h.unsqueeze(1))  # [B,1,128]
+        H = torch.cat(hs, dim=1)  # [B,200,128]
+        logits = self.head(H)     # head expects [B,200,128]
+        loss = None
+        if labels is not None:
+            loss = nn.CrossEntropyLoss()(logits, labels)
+        return {"loss": loss, "logits": logits}

pretraining/config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "TSlength": 128,
+  "architectures": [
+    "NeuroCLRModel"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_neuroclr.NeuroCLRConfig",
+    "AutoModel": "modeling_neuroclr.NeuroCLRModel"
+  },
+  "model_type": "neuroclr",
+  "nhead": 2,
+  "nlayer": 2,
+  "normalize_input": true,
+  "pooling": "flatten",
+  "projector_out1": 128,
+  "projector_out2": 64,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2"
+}

pretraining/configuration_neuroclr.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from transformers import PretrainedConfig
+class NeuroCLRConfig(PretrainedConfig):
+    model_type = "neuroclr"
+    def __init__(
+        self,
+        TSlength: int = 128,
+        nhead: int = 2,
+        nlayer: int = 2,
+        projector_out1: int = 128,
+        projector_out2: int = 64,
+        # classification
+        num_labels: int = 2,
+        # pooling to avoid flatten dimension mismatch
+        pooling: str = "flatten",  # "mean" recommended; "flatten" only if seq_len==1
+        normalize_input: bool = True,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.TSlength = TSlength
+        self.nhead = nhead
+        self.nlayer = nlayer
+        self.projector_out1 = projector_out1
+        self.projector_out2 = projector_out2
+        self.num_labels = num_labels
+        self.pooling = pooling
+        self.normalize_input = normalize_input

pretraining/export_pretraining_to_hf.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import torch
+from configuration_neuroclr import NeuroCLRConfig
+from modeling_neuroclr import NeuroCLRModel
+# ---- EDIT these to match your training ----
+CFG = dict(
+    TSlength=128,
+    nhead=2,
+    nlayer=2,
+    projector_out1=128,
+    projector_out2=64,
+    pooling="flatten",       # because input is [B,1,128]
+    normalize_input=True,
+)
+CKPT_PATH = ""
+OUT_DIR = "."  # saves into pretraining/ folder
+# ------------------------------------------
+def remap_state_dict(sd):
+    new_sd = {}
+    for k, v in sd.items():
+        k2 = k.replace("module.", "")  # if DDP ever used
+        if k2.startswith("transformer_encoder.") or k2.startswith("projector."):
+            new_sd["neuroclr." + k2] = v
+        else:
+            # keep anything else as-is (usually none)
+            new_sd[k2] = v
+    return new_sd
+def main():
+    config = NeuroCLRConfig(**CFG)
+    # This enables AutoModel loading from this folder
+    config.auto_map = {
+        "AutoConfig": "configuration_neuroclr.NeuroCLRConfig",
+        "AutoModel": "modeling_neuroclr.NeuroCLRModel",
+    }
+    model = NeuroCLRModel(config)
+    ckpt = torch.load(CKPT_PATH, map_location="cpu")
+    # Your checkpoint uses model_state_dict
+    if isinstance(ckpt, dict) and "model_state_dict" in ckpt:
+        sd = ckpt["model_state_dict"]
+    elif isinstance(ckpt, dict) and "state_dict" in ckpt:
+        sd = ckpt["state_dict"]
+    else:
+        sd = ckpt
+    sd = remap_state_dict(sd)
+    missing, unexpected = model.load_state_dict(sd, strict=False)
+    print("Missing:", missing)
+    print("Unexpected:", unexpected)
+    model.save_pretrained(OUT_DIR, safe_serialization=True)
+    print("Saved HF pretraining model to:", OUT_DIR)
+if __name__ == "__main__":
+    main()

pretraining/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f2a85ac990c09ae2debb3796dd0161d7c8f7c14213e62fb917c481f35296279
+size 1164680

pretraining/modeling_neuroclr.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import TransformerEncoder, TransformerEncoderLayer
+from transformers import PreTrainedModel
+from configuration_neuroclr import NeuroCLRConfig
+class NeuroCLR(nn.Module):
+    """
+    Transformer expects x: [B, S, TSlength] because d_model = TSlength.
+    """
+    def __init__(self, config: NeuroCLRConfig):
+        super().__init__()
+        encoder_layer = TransformerEncoderLayer(
+            d_model=config.TSlength,
+            dim_feedforward=2 * config.TSlength,
+            nhead=config.nhead,
+            batch_first=True,
+        )
+        self.transformer_encoder = TransformerEncoder(encoder_layer, config.nlayer)
+        self.projector = nn.Sequential(
+            nn.Linear(config.TSlength, config.projector_out1),
+            nn.BatchNorm1d(config.projector_out1),
+            nn.ReLU(),
+            nn.Linear(config.projector_out1, config.projector_out2),
+        )
+        self.normalize_input = config.normalize_input
+        self.pooling = config.pooling
+        self.TSlength = config.TSlength
+    def forward(self, x: torch.Tensor):
+        # x: [B, S, TSlength]
+        if self.normalize_input:
+            x = F.normalize(x, dim=-1)
+        x = self.transformer_encoder(x)  # [B, S, TSlength]
+        # Make h shape always [B, TSlength]
+        if self.pooling == "mean":
+            h = x.mean(dim=1)  # [B, TSlength]
+        elif self.pooling == "last":
+            h = x[:, -1, :]    # [B, TSlength]
+        elif self.pooling == "flatten":
+            # ONLY valid if S == 1
+            h = x.reshape(x.shape[0], -1)
+            if h.shape[1] != self.TSlength:
+                raise ValueError(
+                    f"pooling='flatten' requires seq_len==1 so h dim == TSlength. "
+                    f"Got h dim {h.shape[1]} vs TSlength {self.TSlength}."
+                )
+        else:
+            raise ValueError(f"Unknown pooling='{self.pooling}'. Use 'mean', 'last', or 'flatten'.")
+        z = self.projector(h)
+        return h, z
+class NeuroCLRModel(PreTrainedModel):
+    """
+    Loads with:
+      AutoModel.from_pretrained(..., trust_remote_code=True)
+    """
+    config_class = NeuroCLRConfig
+    base_model_prefix = "neuroclr"
+    def __init__(self, config: NeuroCLRConfig):
+        super().__init__(config)
+        self.neuroclr = NeuroCLR(config)
+        self.post_init()
+    def forward(self, x: torch.Tensor, **kwargs):
+        h, z = self.neuroclr(x)
+        return {"h": h, "z": z}

upload_to_hf.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from huggingface_hub import create_repo, upload_folder
+REPO_ID = "SaeedLab/NeuroCLR"
+# create_repo(REPO_ID, repo_type="model", exist_ok=True)
+upload_folder(
+    repo_id=REPO_ID,
+    repo_type="model",
+    folder_path=".",   # uploads pretraining/ and classification/
+)
+print("Uploaded to:", REPO_ID)