Crystalcareai
/

Quiet-Star-Custom

Text Generation

Model card Files Files and versions

Crystalcareai commited on Apr 7, 2024

Commit

cc66ab7

·

verified ·

1 Parent(s): 6a35495

Update generate.py

Files changed (1) hide show

generate.py +12 -10

generate.py CHANGED Viewed

@@ -45,15 +45,12 @@ def custom_generate(
 ):
     if input_ids is None or input_ids.nelement() == 0:
         # If input_ids is None or an empty tensor, create a default input tensor
-        input_ids = torch.LongTensor([[self.tokenizer.bos_token_id]])
-        attention_mask = torch.ones_like(input_ids)
     device = input_ids.device
     with torch.no_grad():
         batch_size = input_ids.shape[0]
-        if max_new_tokens is None:
-            raise ValueError("max_new_tokens must be provided.")
         finished_generating = torch.zeros(batch_size, dtype=torch.bool, device=device)
         generated_token_ids = torch.full((batch_size, max_new_tokens), self.tokenizer.pad_token_id, dtype=torch.long, device=device)
@@ -156,10 +153,10 @@ def generate(
     torch_dtype=torch.bfloat16,
     **model_kwargs,
 ):
-    # Set default value for max_new_tokens if not provided
-    if max_new_tokens is None:
-        max_new_tokens = 128  # Set a reasonable default value
     # Set model attributes
     self.max_thoughts = n_ahead + n_ahead_talk + 1
     self.merged_talk_heads = merged_talk_heads
@@ -186,11 +183,16 @@ def generate(
     if isinstance(input_ids, str):
         input_ids = self.tokenizer.encode(input_ids, return_tensors='pt')
     generated_token_ids = custom_generate(
         self,
-        input_ids=input_ids,  # Pass input_ids explicitly
         attention_mask=attention_mask,
-        max_new_tokens=max_new_tokens,  # Pass max_new_tokens explicitly
         min_length=min_length,
         do_sample=do_sample,
         early_stopping=early_stopping,

 ):
     if input_ids is None or input_ids.nelement() == 0:
         # If input_ids is None or an empty tensor, create a default input tensor
+        input_ids = torch.LongTensor([[self.tokenizer.bos_token_id]]).to(self.device)
+        attention_mask = torch.ones_like(input_ids).to(self.device)
     device = input_ids.device
     with torch.no_grad():
         batch_size = input_ids.shape[0]
         finished_generating = torch.zeros(batch_size, dtype=torch.bool, device=device)
         generated_token_ids = torch.full((batch_size, max_new_tokens), self.tokenizer.pad_token_id, dtype=torch.long, device=device)
     torch_dtype=torch.bfloat16,
     **model_kwargs,
 ):
+    if max_new_tokens is None:
+        max_new_tokens = 128
     # Set model attributes
     self.max_thoughts = n_ahead + n_ahead_talk + 1
     self.merged_talk_heads = merged_talk_heads
     if isinstance(input_ids, str):
         input_ids = self.tokenizer.encode(input_ids, return_tensors='pt')
+    # Move input_ids and attention_mask to the same device as the model
+    input_ids = input_ids.to(self.device)
+    if attention_mask is not None:
+        attention_mask = attention_mask.to(self.device)
     generated_token_ids = custom_generate(
         self,
+        input_ids=input_ids,
         attention_mask=attention_mask,
+        max_new_tokens=max_new_tokens,
         min_length=min_length,
         do_sample=do_sample,
         early_stopping=early_stopping,