Spaces:

RocketFarmStudios
/

CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 26

Commit

2639902

verified ·

1 Parent(s): 936692d

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -15

app.py CHANGED Viewed

@@ -7,9 +7,8 @@ import gradio as gr
 # Constants
 MAX_MODEL_TOKENS = 131072
 MAX_NEW_TOKENS = 4096
-MAX_CHUNK_TOKENS = 8192
 PROMPT_OVERHEAD = 300
-BATCH_SIZE = 10  # Bigger batch for faster processing
 # Paths
 persistent_dir = "/data/hf_cache"
@@ -47,14 +46,13 @@ def extract_text_from_excel(path: str) -> str:
         try:
             df = xls.parse(sheet_name).astype(str).fillna("")
         except Exception:
-            continue  # Skip sheet if unreadable
         for idx, row in df.iterrows():
-            # If the row has at least 2 non-empty values and is not totally empty
             non_empty = [cell.strip() for cell in row if cell.strip() != ""]
             if len(non_empty) >= 2:
                 text_line = " | ".join(non_empty)
-                if len(text_line) > 15:  # Ignore very small lines
                     all_text.append(f"[{sheet_name}] {text_line}")
     return "\n".join(all_text)
@@ -94,13 +92,12 @@ def init_agent() -> TxAgent:
     agent.init_model()
     return agent
-# Serial analyze (safe for vLLM)
-def analyze_serial(agent, batch_chunks: List[List[str]]) -> List[str]:
     results = []
-    for idx, batch in enumerate(batch_chunks):
-        prompt = "\n\n".join(build_prompt(chunk) for chunk in batch)
         if estimate_tokens(prompt) > MAX_MODEL_TOKENS:
-            results.append(f"❌ Batch {idx+1} too long. Skipped.")
             continue
         response = ""
         try:
@@ -123,7 +120,7 @@ def analyze_serial(agent, batch_chunks: List[List[str]]) -> List[str]:
                     response += r.content
             results.append(clean_response(response))
         except Exception as e:
-            results.append(f"❌ Error in batch {idx+1}: {str(e)}")
     gc.collect()
     return results
@@ -158,14 +155,13 @@ def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Di
     try:
         extracted = extract_text_from_excel(file.name)
         chunks = split_text(extracted)
-        batch_chunks = [chunks[i:i+BATCH_SIZE] for i in range(0, len(chunks), BATCH_SIZE)]
-        messages.append({"role": "assistant", "content": f"🔍 Split into {len(batch_chunks)} batches. Analyzing..."})
-        chunk_results = analyze_serial(agent, batch_chunks)
         valid = [res for res in chunk_results if not res.startswith("❌")]
         if not valid:
-            messages.append({"role": "assistant", "content": "❌ No valid batch outputs."})
             return messages, None
         summary = generate_final_summary(agent, "\n\n".join(valid))

 # Constants
 MAX_MODEL_TOKENS = 131072
 MAX_NEW_TOKENS = 4096
+MAX_CHUNK_TOKENS = 8192  # IMPORTANT: Split input into 8k tokens chunks
 PROMPT_OVERHEAD = 300
 # Paths
 persistent_dir = "/data/hf_cache"
         try:
             df = xls.parse(sheet_name).astype(str).fillna("")
         except Exception:
+            continue
         for idx, row in df.iterrows():
             non_empty = [cell.strip() for cell in row if cell.strip() != ""]
             if len(non_empty) >= 2:
                 text_line = " | ".join(non_empty)
+                if len(text_line) > 15:
                     all_text.append(f"[{sheet_name}] {text_line}")
     return "\n".join(all_text)
     agent.init_model()
     return agent
+def analyze_serial(agent, chunks: List[str]) -> List[str]:
     results = []
+    for idx, chunk in enumerate(chunks):
+        prompt = build_prompt(chunk)
         if estimate_tokens(prompt) > MAX_MODEL_TOKENS:
+            results.append(f"❌ Chunk {idx+1} too long. Skipped.")
             continue
         response = ""
         try:
                     response += r.content
             results.append(clean_response(response))
         except Exception as e:
+            results.append(f"❌ Error in chunk {idx+1}: {str(e)}")
     gc.collect()
     return results
     try:
         extracted = extract_text_from_excel(file.name)
         chunks = split_text(extracted)
+        messages.append({"role": "assistant", "content": f"🔍 Split into {len(chunks)} chunks. Analyzing..."})
+        chunk_results = analyze_serial(agent, chunks)
         valid = [res for res in chunk_results if not res.startswith("❌")]
         if not valid:
+            messages.append({"role": "assistant", "content": "❌ No valid chunk outputs."})
             return messages, None
         summary = generate_final_summary(agent, "\n\n".join(valid))