taruschirag commited on
Commit
d3f43ac
·
verified ·
1 Parent(s): 6f8b750

Update app.py

Browse files

using binary search

Files changed (1) hide show
  1. app.py +95 -34
app.py CHANGED
@@ -133,61 +133,122 @@ class ModelWrapper:
133
  MODEL_NAME = "Qwen/Qwen3-0.6B"
134
  model = ModelWrapper(MODEL_NAME)
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  # — Gradio inference function —
137
  def compliance_check(rules_text, transcript_text, thinking):
138
- # This is the general byte limit for any response leaving this function
139
- MAX_RESPONSE_BYTES = 4096
 
 
 
140
 
141
  try:
142
- rules = [r for r in rules_text.split("\n") if r.strip()]
 
 
 
 
 
 
 
 
 
 
143
  inp = format_rules(rules) + format_transcript(transcript_text)
144
 
145
- out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=256)
 
146
 
147
- # Clean up the successful output
148
  out = str(out).strip()
149
  if not out:
150
- out = "No response generated. Please try with different input."
151
 
 
 
 
152
  except Exception as e:
153
- # If any error happens, create an error message instead
154
- full_error = str(e)
155
- print(f"Full error: {full_error}") # Log the full error for debugging
156
- # The output 'out' will now be this error message
157
- out = f"An error occurred: {full_error}"
158
-
159
- # --- UNIVERSAL BYTE CHECK FOR ALL OUTPUTS ---
160
- # This block now runs for both successful results and error messages.
 
161
 
162
- # Encode the final output (whether result or error) to bytes
163
- out_bytes = out.encode('utf-8')
164
-
165
- # Check if the byte length exceeds our limit
166
- if len(out_bytes) > MAX_RESPONSE_BYTES:
167
- # Truncate the byte string safely
168
- truncated_bytes = out_bytes[:MAX_RESPONSE_BYTES]
169
- out = truncated_bytes.decode('utf-8', errors='ignore')
170
- # Add a clear message indicating it was truncated
171
- out += "\n\n[Response truncated to prevent server errors]"
172
-
173
  return out
174
 
175
-
176
-
177
- # — build Gradio interface —
178
  demo = gr.Interface(
179
  fn=compliance_check,
180
  inputs=[
181
- gr.Textbox(lines=5, label="Rules (one per line)", max_lines=10),
182
- gr.Textbox(lines=10, label="Transcript", max_lines=15),
 
 
 
 
 
 
 
 
 
 
183
  gr.Checkbox(label="Enable ⟨think⟩ mode", value=True)
184
  ],
185
- outputs=gr.Textbox(label="Compliance Output", lines=10, max_lines=15),
 
 
 
 
 
186
  title="DynaGuard Compliance Checker",
187
- description="Paste your rules & transcript, then hit Submit.",
188
  allow_flagging="never",
189
- show_progress=True
 
 
190
  )
191
 
192
  if __name__ == "__main__":
193
- demo.launch()
 
 
 
 
 
 
 
 
133
  MODEL_NAME = "Qwen/Qwen3-0.6B"
134
  model = ModelWrapper(MODEL_NAME)
135
 
136
+ def safe_truncate_to_bytes(text, max_bytes=3000, safety_buffer=100):
137
+ """
138
+ Safely truncate text to fit within byte limit with proper UTF-8 handling
139
+ """
140
+ # Apply safety buffer to avoid edge cases
141
+ target_bytes = max_bytes - safety_buffer
142
+
143
+ # If text is already short enough, return as is
144
+ if len(text.encode('utf-8')) <= target_bytes:
145
+ return text
146
+
147
+ # Binary search for the largest substring that fits
148
+ left, right = 0, len(text)
149
+ result = ""
150
+
151
+ while left <= right:
152
+ mid = (left + right) // 2
153
+ candidate = text[:mid]
154
+ candidate_bytes = len(candidate.encode('utf-8'))
155
+
156
+ if candidate_bytes <= target_bytes:
157
+ result = candidate
158
+ left = mid + 1
159
+ else:
160
+ right = mid - 1
161
+
162
+ # Add truncation notice
163
+ if result != text:
164
+ result += "\n\n[Response truncated to prevent server errors]"
165
+
166
+ return result
167
+
168
  # — Gradio inference function —
169
  def compliance_check(rules_text, transcript_text, thinking):
170
+ """
171
+ Enhanced compliance check with robust error handling and response size management
172
+ """
173
+ # Conservative byte limit with safety margin
174
+ MAX_RESPONSE_BYTES = 3000
175
 
176
  try:
177
+ # Input validation
178
+ if not rules_text.strip():
179
+ return "Error: Please provide at least one rule."
180
+
181
+ if not transcript_text.strip():
182
+ return "Error: Please provide a transcript to analyze."
183
+
184
+ rules = [r.strip() for r in rules_text.split("\n") if r.strip()]
185
+ if not rules:
186
+ return "Error: No valid rules found. Please enter rules separated by newlines."
187
+
188
  inp = format_rules(rules) + format_transcript(transcript_text)
189
 
190
+ # Generate response with conservative token limit
191
+ out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=200)
192
 
193
+ # Clean up the output
194
  out = str(out).strip()
195
  if not out:
196
+ out = "No response generated. Please try with different input or check your model configuration."
197
 
198
+ except torch.cuda.OutOfMemoryError:
199
+ out = "Error: GPU out of memory. Try with shorter input text or restart the application."
200
+
201
  except Exception as e:
202
+ # Create a concise error message
203
+ error_msg = str(e)
204
+ if len(error_msg) > 200:
205
+ error_msg = error_msg[:200] + "..."
206
+ out = f"Processing error: {error_msg}"
207
+ print(f"Full error details: {str(e)}") # Log full error for debugging
208
+
209
+ # Apply safe truncation to ALL outputs (success and error cases)
210
+ out = safe_truncate_to_bytes(out, MAX_RESPONSE_BYTES)
211
 
 
 
 
 
 
 
 
 
 
 
 
212
  return out
213
 
214
+ # — build Gradio interface with better configuration —
 
 
215
  demo = gr.Interface(
216
  fn=compliance_check,
217
  inputs=[
218
+ gr.Textbox(
219
+ lines=5,
220
+ label="Rules (one per line)",
221
+ max_lines=10,
222
+ placeholder="Enter compliance rules, one per line..."
223
+ ),
224
+ gr.Textbox(
225
+ lines=10,
226
+ label="Transcript",
227
+ max_lines=15,
228
+ placeholder="Paste the transcript to analyze..."
229
+ ),
230
  gr.Checkbox(label="Enable ⟨think⟩ mode", value=True)
231
  ],
232
+ outputs=gr.Textbox(
233
+ label="Compliance Output",
234
+ lines=10,
235
+ max_lines=15,
236
+ show_copy_button=True
237
+ ),
238
  title="DynaGuard Compliance Checker",
239
+ description="Paste your rules & transcript, then hit Submit. Responses are automatically sized for optimal performance.",
240
  allow_flagging="never",
241
+ show_progress=True,
242
+ # Add these parameters for better stability
243
+ cache_examples=False
244
  )
245
 
246
  if __name__ == "__main__":
247
+ # Launch with more conservative settings
248
+ demo.launch(
249
+ server_name="0.0.0.0",
250
+ server_port=7860,
251
+ show_error=True,
252
+ quiet=False,
253
+ inbrowser=False
254
+ )