InstaDeepAI
/

ChatNT

@@ -417,10 +417,6 @@ class TorchBioBrainDecoder(nn.Module):
             # Insert the bio embeddings at the SEQ token positions
             processed_tokens_ids = english_token_ids.clone()
-            print("(debug) Before call tokens embeddings shape : ", tokens_embeddings.shape)
-            print("(debug) Before call Processed tokens ids shape : ", processed_tokens_ids.shape)
-            print("(debug) Before call Projected bio embeddings shape : ", projected_bio_embeddings.shape)
-            print("num bio sequences : ", num_bio_sequences)
             for bio_seq_num in range(num_bio_sequences):
                 tokens_embeddings, processed_tokens_ids = self.insert_embeddings(
                     processed_tokens_ids,
@@ -428,7 +424,6 @@ class TorchBioBrainDecoder(nn.Module):
                     projected_bio_embeddings[:, bio_seq_num, :, :],
                     bio_seq_num=bio_seq_num,
                 )
-                print("After call : ", tokens_embeddings.shape)
         # Regular GPT pass through
         embeddings = self.gpt_model.apply_transformer_layers(tokens_embeddings)
@@ -471,8 +466,6 @@ class TorchBioBrainDecoder(nn.Module):
                 - input_embeddings with resampled_embeddings inserted at the SEQ token
                 - tokens with the SEQ token set to -1
         """
-        print("Tokens : ", list(tokens))
-        print("seq_token_id : ", self.seq_token_id)
         def _insert(
             tokens_1d: torch.Tensor,
@@ -488,7 +481,6 @@ class TorchBioBrainDecoder(nn.Module):
             """
             indices = torch.where(tokens_1d == self.seq_token_id)[0]
             if indices.numel() > 0:
-                print("going in if")
                 idx = indices[0].item()
                 insertion_pos = idx + resampled_embeddings_1d.shape[-2] * bio_seq_num
                 x = torch.cat(
@@ -505,7 +497,6 @@ class TorchBioBrainDecoder(nn.Module):
                 tokens_1d[idx] = -1
                 return x, tokens_1d
             else:
-                print("going in else")
                 return (
                     input_embeddings,
                     tokens_1d,
@@ -680,6 +671,11 @@ class TorchMultiOmicsModel(PreTrainedModel):
                     Shape (batch_size, num_bio_sequences, ?, embed_dim)
         """
         english_token_ids, bio_token_ids = multi_omics_tokens_ids
         # Replace config.vocab_size value in english tokens
         # We do this because the default vocab size (32000) doesn't match with the
@@ -698,8 +694,6 @@ class TorchMultiOmicsModel(PreTrainedModel):
             vocab_size - 1
         )
-        print("seq token id : ", self.seq_token_id)
-        print("Tokens at step 1 in multiomics : ", list(english_token_ids))
         if bio_token_ids is None:
             projected_bio_embeddings = None
         else:
@@ -724,9 +718,7 @@ class TorchMultiOmicsModel(PreTrainedModel):
                 ]
                 projected_bio_embeddings = torch.stack(projected_bio_embeddings, dim=1)
-        # decode
-        print("Tokens at step 2 in multiomics : ", list(english_token_ids))
         logits = self.biobrain_decoder(
             english_token_ids=english_token_ids,
             projected_bio_embeddings=projected_bio_embeddings,

             # Insert the bio embeddings at the SEQ token positions
             processed_tokens_ids = english_token_ids.clone()
             for bio_seq_num in range(num_bio_sequences):
                 tokens_embeddings, processed_tokens_ids = self.insert_embeddings(
                     processed_tokens_ids,
                     projected_bio_embeddings[:, bio_seq_num, :, :],
                     bio_seq_num=bio_seq_num,
                 )
         # Regular GPT pass through
         embeddings = self.gpt_model.apply_transformer_layers(tokens_embeddings)
                 - input_embeddings with resampled_embeddings inserted at the SEQ token
                 - tokens with the SEQ token set to -1
         """
         def _insert(
             tokens_1d: torch.Tensor,
             """
             indices = torch.where(tokens_1d == self.seq_token_id)[0]
             if indices.numel() > 0:
                 idx = indices[0].item()
                 insertion_pos = idx + resampled_embeddings_1d.shape[-2] * bio_seq_num
                 x = torch.cat(
                 tokens_1d[idx] = -1
                 return x, tokens_1d
             else:
                 return (
                     input_embeddings,
                     tokens_1d,
                     Shape (batch_size, num_bio_sequences, ?, embed_dim)
         """
         english_token_ids, bio_token_ids = multi_omics_tokens_ids
+        english_token_ids = english_token_ids.clone()
+        bio_token_ids = bio_token_ids.clone()
+        projection_english_tokens_ids = projection_english_tokens_ids.clone()
+        if projected_bio_embeddings is not None:
+            projected_bio_embeddings = projected_bio_embeddings.clone()
         # Replace config.vocab_size value in english tokens
         # We do this because the default vocab size (32000) doesn't match with the
             vocab_size - 1
         )
         if bio_token_ids is None:
             projected_bio_embeddings = None
         else:
                 ]
                 projected_bio_embeddings = torch.stack(projected_bio_embeddings, dim=1)
+        # decode
         logits = self.biobrain_decoder(
             english_token_ids=english_token_ids,
             projected_bio_embeddings=projected_bio_embeddings,