Claude commited on
Commit
d40e922
·
unverified ·
1 Parent(s): cbe6e22

Refactor app.py to use BigQuery for PR mining

Browse files

Major changes:
- Remove debug mode and argparse completely
- Add BigQuery client and mining functions
- Remove GitHub API complexity (TokenPool, rate limiting, time partitioning)
- Replace daily incremental updates with weekly BigQuery mining
- Add UPDATE_TIME_FRAME_DAYS configuration (30 days)
- Update save_pr_metadata_to_hf to use upload_large_folder with complete overwrite
- Add top_n filtering to monthly metrics (show top 5 agents)
- Replace hardcoded colors with HSL color generation
- Replace hardcoded month references with LEADERBOARD_TIME_FRAME_DAYS // 30
- Rename get_daily_files_last_n_months to get_daily_files_last_time_frame
- Clean up unused imports (remove threading, datasets)

This makes the app more efficient by using BigQuery for batch mining instead of
GitHub API rate-limited incremental updates.

Files changed (1) hide show
  1. app.py +444 -1226
app.py CHANGED
@@ -1,54 +1,33 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard
3
  import json
4
  import os
5
  import time
 
6
  import requests
7
  from datetime import datetime, timezone, timedelta
8
  from collections import defaultdict
9
  from huggingface_hub import HfApi, hf_hub_download
10
- from datasets import load_dataset, Dataset
11
- import threading
12
  from dotenv import load_dotenv
13
  import pandas as pd
14
  import random
15
- import argparse
16
  import plotly.graph_objects as go
17
  from plotly.subplots import make_subplots
18
  from apscheduler.schedulers.background import BackgroundScheduler
19
  from apscheduler.triggers.cron import CronTrigger
 
20
 
21
  # Load environment variables
22
  load_dotenv()
23
 
24
- # Parse command-line arguments
25
- parser = argparse.ArgumentParser(description='SWE Agent PR Leaderboard')
26
- parser.add_argument('--debug', '--DEBUG', action='store_true',
27
- help='Enable debug mode (limits PR retrieval to 10 per query pattern)')
28
- parser.add_argument('--no-debug', '--production', action='store_true',
29
- help='Explicitly disable debug mode (force production mode)')
30
- args = parser.parse_args()
31
-
32
  # =============================================================================
33
  # CONFIGURATION
34
  # =============================================================================
35
 
36
- # DEBUG MODE: Set to True to limit PR retrieval for testing
37
- # When enabled, only fetches up to 10 PRs per query pattern per agent
38
- # Priority: 1) Command-line args, 2) Environment variable, 3) Default (False)
39
- if args.no_debug:
40
- DEBUG_MODE = False
41
- elif args.debug:
42
- DEBUG_MODE = True
43
- else:
44
- DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
45
-
46
- # In-memory cache for debug mode (data persists during session but NOT saved to HF)
47
- DEBUG_PR_METADATA_CACHE = defaultdict(list)
48
-
49
  AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
50
  PR_METADATA_REPO = "SWE-Arena/pr_metadata" # HuggingFace dataset for PR metadata
51
- LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard (past 6 months)
 
52
 
53
  LEADERBOARD_COLUMNS = [
54
  ("Agent Name", "string"),
@@ -66,7 +45,7 @@ def load_jsonl(filename):
66
  """Load JSONL file and return list of dictionaries."""
67
  if not os.path.exists(filename):
68
  return []
69
-
70
  data = []
71
  with open(filename, 'r', encoding='utf-8') as f:
72
  for line in f:
@@ -87,784 +66,286 @@ def save_jsonl(filename, data):
87
  f.write(json.dumps(item) + '\n')
88
 
89
 
90
- def cache_to_dict(cache_list):
91
- """Convert list of cache entries to dictionary by identifier."""
92
- return {entry['github_identifier']: entry for entry in cache_list}
93
-
94
-
95
- def dict_to_cache(cache_dict):
96
- """Convert dictionary back to list of values."""
97
- return list(cache_dict.values())
98
-
99
-
100
- def normalize_date_format(date_string):
101
- """
102
- Convert date strings to standardized ISO 8601 format with Z suffix.
103
- Handles both old format (2025-10-15T23:23:47.983068) and new format (2025-10-15T23:23:47Z).
104
- """
105
- if not date_string or date_string == 'N/A':
106
- return 'N/A'
107
-
108
- try:
109
- # Parse the date string (handles both with and without microseconds)
110
- if '.' in date_string:
111
- # Old format with microseconds
112
- dt = datetime.fromisoformat(date_string.replace('Z', '+00:00'))
113
- else:
114
- # Already in correct format or GitHub format
115
- return date_string
116
-
117
- # Convert to standardized format
118
- return dt.strftime('%Y-%m-%dT%H:%M:%SZ')
119
- except Exception as e:
120
- print(f"Warning: Could not parse date '{date_string}': {e}")
121
- return date_string
122
-
123
-
124
  # =============================================================================
125
- # GITHUB API OPERATIONS
126
  # =============================================================================
127
 
128
- def request_with_backoff(method, url, *, headers=None, params=None, json_body=None, data=None, max_retries=10, timeout=30, token_pool=None, token=None):
129
  """
130
- Perform an HTTP request with exponential backoff and jitter for GitHub API.
131
- Retries on 403/429 (rate limits), 5xx server errors, and transient network exceptions.
132
 
133
- Args:
134
- token_pool: Optional TokenPool instance for marking rate-limited tokens
135
- token: Optional token string used for this request (for rate limit tracking)
136
-
137
- Returns the final requests.Response on success or non-retryable status, or None after exhausting retries.
138
  """
139
- delay = 1.0
140
- for attempt in range(max_retries):
141
- try:
142
- resp = requests.request(
143
- method,
144
- url,
145
- headers=headers or {},
146
- params=params,
147
- json=json_body,
148
- data=data,
149
- timeout=timeout
150
- )
151
 
152
- status = resp.status_code
 
 
 
 
153
 
154
- # Success
155
- if 200 <= status < 300:
156
- return resp
157
 
158
- # Rate limits or server errors -> retry with backoff
159
- if status in (403, 429) or 500 <= status < 600:
160
- wait = None
161
- reset_timestamp = None
162
 
163
- # Prefer Retry-After when present
164
- retry_after = resp.headers.get('Retry-After') or resp.headers.get('retry-after')
165
- if retry_after:
166
- try:
167
- wait = float(retry_after)
168
- except Exception:
169
- wait = None
170
-
171
- # Fallback to X-RateLimit-Reset when 403/429
172
- if wait is None and status in (403, 429):
173
- reset_hdr = resp.headers.get('X-RateLimit-Reset') or resp.headers.get('x-ratelimit-reset')
174
- if reset_hdr:
175
- try:
176
- reset_ts = int(float(reset_hdr))
177
- reset_timestamp = reset_ts
178
- wait = max(reset_ts - time.time() + 2, 1)
179
- except Exception:
180
- wait = None
181
 
182
- # Final fallback: exponential backoff with jitter
183
- if wait is None:
184
- wait = delay + random.uniform(0, 0.5)
185
-
186
- # Mark token as rate-limited if we have token pool and token info
187
- if status in (403, 429) and token_pool and token:
188
- token_pool.mark_rate_limited(token, reset_timestamp)
189
-
190
- # Cap individual wait to avoid extreme sleeps
191
- wait = max(1.0, min(wait, 120.0))
192
- print(f"GitHub API {status}. Backing off {wait:.1f}s (attempt {attempt + 1}/{max_retries})...")
193
- time.sleep(wait)
194
- delay = min(delay * 2, 60.0)
195
- continue
196
-
197
- # Non-retryable error; return response for caller to handle
198
- return resp
199
-
200
- except requests.RequestException as e:
201
- # Network error -> retry with backoff
202
- wait = delay + random.uniform(0, 0.5)
203
- wait = max(1.0, min(wait, 60.0))
204
- print(f"Request error: {e}. Retrying in {wait:.1f}s (attempt {attempt + 1}/{max_retries})...")
205
- time.sleep(wait)
206
- delay = min(delay * 2, 60.0)
207
-
208
- print(f"Exceeded max retries for {url}")
209
- return None
210
-
211
- def get_github_tokens():
212
- """Get all GitHub tokens from environment variables (all vars starting with GITHUB_TOKEN)."""
213
- tokens = []
214
- for key, value in os.environ.items():
215
- if key.startswith('GITHUB_TOKEN') and value:
216
- tokens.append(value)
217
-
218
- if not tokens:
219
- print("Warning: No GITHUB_TOKEN* found. API rate limits: 60/hour (authenticated: 5000/hour)")
220
  else:
221
- print(f" Loaded {len(tokens)} GitHub token(s) for token pool")
222
-
223
- return tokens
224
-
225
-
226
- def get_github_token():
227
- """Get primary GitHub token from environment variables (for backward compatibility)."""
228
- token = os.getenv('GITHUB_TOKEN')
229
- if not token:
230
- print("Warning: GITHUB_TOKEN not found. API rate limits: 60/hour (authenticated: 5000/hour)")
231
- return token
232
-
233
-
234
- class TokenPool:
235
- """
236
- Hybrid token pool that manages GitHub tokens with parallel execution and round-robin fallback.
237
-
238
- Strategy:
239
- - 50% of tokens allocated to parallel pool (for concurrent API calls)
240
- - 50% of tokens allocated to round-robin pool (for rate limit fallback)
241
- - Automatically switches to round-robin when parallel tokens hit rate limits
242
- - Thread-safe for concurrent access
243
- """
244
- def __init__(self, tokens):
245
- import threading
246
-
247
- self.all_tokens = tokens if tokens else [None]
248
- self.lock = threading.Lock()
249
-
250
- # Split tokens into parallel and round-robin pools (50/50)
251
- total_tokens = len(self.all_tokens)
252
- split_point = max(1, total_tokens // 2) # At least 1 token in each pool
253
-
254
- self.parallel_tokens = self.all_tokens[:split_point]
255
- self.roundrobin_tokens = self.all_tokens[split_point:]
256
-
257
- # If only 1 token, use it in both pools
258
- if total_tokens == 1:
259
- self.parallel_tokens = self.all_tokens
260
- self.roundrobin_tokens = self.all_tokens
261
-
262
- # Track rate-limited tokens with reset times
263
- self.rate_limited_parallel = {} # {token: reset_timestamp}
264
- self.rate_limited_roundrobin = {} # {token: reset_timestamp}
265
-
266
- # Round-robin index for fallback pool
267
- self.roundrobin_index = 0
268
-
269
- # Statistics
270
- self.parallel_calls = 0
271
- self.roundrobin_calls = 0
272
- self.fallback_triggers = 0
273
-
274
- print(f"🔄 Hybrid Token Pool initialized:")
275
- print(f" Total tokens: {total_tokens}")
276
- print(f" Parallel pool: {len(self.parallel_tokens)} token(s)")
277
- print(f" Round-robin pool: {len(self.roundrobin_tokens)} token(s)")
278
-
279
- def _clean_expired_rate_limits(self):
280
- """Remove tokens from rate limit tracking if their reset time has passed."""
281
- current_time = time.time()
282
-
283
- # Clean parallel pool
284
- expired_parallel = [token for token, reset_time in self.rate_limited_parallel.items()
285
- if current_time >= reset_time]
286
- for token in expired_parallel:
287
- del self.rate_limited_parallel[token]
288
-
289
- # Clean round-robin pool
290
- expired_roundrobin = [token for token, reset_time in self.rate_limited_roundrobin.items()
291
- if current_time >= reset_time]
292
- for token in expired_roundrobin:
293
- del self.rate_limited_roundrobin[token]
294
-
295
- def get_parallel_token(self):
296
- """
297
- Get a token from the parallel pool for concurrent execution.
298
- Returns None if all parallel tokens are rate-limited.
299
- """
300
- with self.lock:
301
- self._clean_expired_rate_limits()
302
-
303
- # Find first non-rate-limited token in parallel pool
304
- for token in self.parallel_tokens:
305
- if token not in self.rate_limited_parallel:
306
- self.parallel_calls += 1
307
- return token
308
-
309
- return None # All parallel tokens are rate-limited
310
-
311
- def get_available_parallel_tokens(self):
312
- """
313
- Get all available tokens from parallel pool (not rate-limited).
314
- Used for batch parallel execution.
315
- """
316
- with self.lock:
317
- self._clean_expired_rate_limits()
318
- available = [token for token in self.parallel_tokens
319
- if token not in self.rate_limited_parallel]
320
- return available
321
-
322
- def get_roundrobin_token(self):
323
- """
324
- Get the next token from round-robin pool (fallback mechanism).
325
- Skips rate-limited tokens and rotates to the next available one.
326
- """
327
- with self.lock:
328
- self._clean_expired_rate_limits()
329
-
330
- attempts = 0
331
- max_attempts = len(self.roundrobin_tokens)
332
-
333
- while attempts < max_attempts:
334
- token = self.roundrobin_tokens[self.roundrobin_index]
335
- self.roundrobin_index = (self.roundrobin_index + 1) % len(self.roundrobin_tokens)
336
-
337
- if token not in self.rate_limited_roundrobin:
338
- self.roundrobin_calls += 1
339
- return token
340
-
341
- attempts += 1
342
-
343
- # All round-robin tokens are rate-limited
344
- return None
345
-
346
- def get_next_token(self):
347
- """
348
- Get the next available token (try parallel first, fallback to round-robin).
349
- This is the main method for backwards compatibility.
350
- """
351
- # Try parallel pool first
352
- token = self.get_parallel_token()
353
- if token:
354
- return token
355
-
356
- # Fallback to round-robin
357
- with self.lock:
358
- self.fallback_triggers += 1
359
-
360
- token = self.get_roundrobin_token()
361
- if token:
362
- return token
363
-
364
- # All tokens exhausted - return first parallel token anyway (will hit rate limit)
365
- return self.parallel_tokens[0] if self.parallel_tokens else None
366
-
367
- def get_headers(self):
368
- """Get headers with the next available token."""
369
- token = self.get_next_token()
370
- return {'Authorization': f'token {token}'} if token else {}
371
-
372
- def mark_rate_limited(self, token, reset_timestamp=None):
373
- """
374
- Mark a token as rate-limited with optional reset timestamp.
375
-
376
- Args:
377
- token: The token that hit rate limit
378
- reset_timestamp: Unix timestamp when rate limit resets (optional)
379
- """
380
- with self.lock:
381
- # Default to 1 hour from now if no reset time provided
382
- if reset_timestamp is None:
383
- reset_timestamp = time.time() + 3600
384
-
385
- # Mark in appropriate pool
386
- if token in self.parallel_tokens:
387
- self.rate_limited_parallel[token] = reset_timestamp
388
- print(f" ⚠️ Parallel token marked as rate-limited until {datetime.fromtimestamp(reset_timestamp, timezone.utc).strftime('%H:%M:%S UTC')}")
389
-
390
- if token in self.roundrobin_tokens:
391
- self.rate_limited_roundrobin[token] = reset_timestamp
392
- print(f" ⚠️ Round-robin token marked as rate-limited until {datetime.fromtimestamp(reset_timestamp, timezone.utc).strftime('%H:%M:%S UTC')}")
393
-
394
- def get_stats(self):
395
- """Get usage statistics for monitoring."""
396
- with self.lock:
397
- return {
398
- 'parallel_calls': self.parallel_calls,
399
- 'roundrobin_calls': self.roundrobin_calls,
400
- 'fallback_triggers': self.fallback_triggers,
401
- 'parallel_rate_limited': len(self.rate_limited_parallel),
402
- 'roundrobin_rate_limited': len(self.rate_limited_roundrobin)
403
- }
404
-
405
- def print_stats(self):
406
- """Print usage statistics."""
407
- stats = self.get_stats()
408
- total_calls = stats['parallel_calls'] + stats['roundrobin_calls']
409
-
410
- if total_calls > 0:
411
- print(f"\n📊 Token Pool Statistics:")
412
- print(f" Total API calls: {total_calls}")
413
- print(f" Parallel calls: {stats['parallel_calls']} ({stats['parallel_calls']/total_calls*100:.1f}%)")
414
- print(f" Round-robin calls: {stats['roundrobin_calls']} ({stats['roundrobin_calls']/total_calls*100:.1f}%)")
415
- print(f" Fallback triggers: {stats['fallback_triggers']}")
416
- print(f" Currently rate-limited: {stats['parallel_rate_limited']} parallel, {stats['roundrobin_rate_limited']} round-robin")
417
-
418
-
419
- def validate_github_username(identifier):
420
- """Verify that a GitHub identifier exists with backoff-aware requests."""
421
- try:
422
- token = get_github_token()
423
- headers = {'Authorization': f'token {token}'} if token else {}
424
- url = f'https://api.github.com/users/{identifier}'
425
- response = request_with_backoff('GET', url, headers=headers, max_retries=1,
426
- token_pool=None, token=token)
427
- if response is None:
428
- return False, "Validation error: network/rate limit exhausted"
429
- if response.status_code == 200:
430
- return True, "Username is valid"
431
- elif response.status_code == 404:
432
- return False, "GitHub identifier not found"
433
- else:
434
- return False, f"Validation error: HTTP {response.status_code}"
435
- except Exception as e:
436
- return False, f"Validation error: {str(e)}"
437
 
438
 
439
- def fetch_prs_with_time_partition(base_query, start_date, end_date, token_pool, prs_by_id, debug_limit=None, depth=0):
440
  """
441
- Fetch PRs within a specific time range using time-based partitioning.
442
- Recursively splits the time range if hitting the 1000-result limit.
443
- Supports splitting by day, hour, minute, and second as needed.
444
 
445
  Args:
446
- token_pool: TokenPool instance for rotating tokens
447
- debug_limit: If set, stops fetching after this many PRs (for testing)
448
- depth: Current recursion depth (for tracking)
449
 
450
- Returns the number of PRs found in this time partition.
 
451
  """
452
- # Calculate time difference
453
- time_diff = end_date - start_date
454
- total_seconds = time_diff.total_seconds()
455
-
456
- # Determine granularity and format dates accordingly
457
- if total_seconds >= 86400: # >= 1 day
458
- # Use day granularity (YYYY-MM-DD)
459
- start_str = start_date.strftime('%Y-%m-%d')
460
- end_str = end_date.strftime('%Y-%m-%d')
461
- elif total_seconds >= 3600: # >= 1 hour but < 1 day
462
- # Use hour granularity (YYYY-MM-DDTHH:MM:SSZ)
463
- start_str = start_date.strftime('%Y-%m-%dT%H:00:00Z')
464
- end_str = end_date.strftime('%Y-%m-%dT%H:59:59Z')
465
- elif total_seconds >= 60: # >= 1 minute but < 1 hour
466
- # Use minute granularity (YYYY-MM-DDTHH:MM:SSZ)
467
- start_str = start_date.strftime('%Y-%m-%dT%H:%M:00Z')
468
- end_str = end_date.strftime('%Y-%m-%dT%H:%M:59Z')
469
- else: # < 1 minute
470
- # Use second granularity (YYYY-MM-DDTHH:MM:SSZ)
471
- start_str = start_date.strftime('%Y-%m-%dT%H:%M:%SZ')
472
- end_str = end_date.strftime('%Y-%m-%dT%H:%M:%SZ')
473
-
474
- # Add date range to query
475
- query = f'{base_query} created:{start_str}..{end_str}'
476
-
477
- indent = " " + " " * depth
478
- print(f"{indent}Searching range {start_str} to {end_str}...")
479
-
480
- page = 1
481
- per_page = 100
482
- total_in_partition = 0
483
-
484
- while True:
485
- # Check debug limit
486
- if debug_limit is not None and total_in_partition >= debug_limit:
487
- print(f"{indent} 🐛 DEBUG MODE: Reached limit of {debug_limit} PRs, stopping...")
488
- return total_in_partition
489
- url = 'https://api.github.com/search/issues'
490
- params = {
491
- 'q': query,
492
- 'per_page': per_page,
493
- 'page': page,
494
- 'sort': 'created',
495
- 'order': 'asc'
496
- }
497
-
498
- try:
499
- # Get token for tracking
500
- token = token_pool.get_next_token()
501
- headers = {'Authorization': f'token {token}'} if token else {}
502
-
503
- response = request_with_backoff('GET', url, headers=headers, params=params,
504
- token_pool=token_pool, token=token)
505
- if response is None:
506
- print(f"{indent} Error: retries exhausted for range {start_str} to {end_str}")
507
- return total_in_partition
508
-
509
- if response.status_code != 200:
510
- print(f"{indent} Error: HTTP {response.status_code} for range {start_str} to {end_str}")
511
- return total_in_partition
512
-
513
- data = response.json()
514
- total_count = data.get('total_count', 0)
515
- items = data.get('items', [])
516
-
517
- if not items:
518
- break
519
-
520
- # Add PRs to global dict
521
- for pr in items:
522
- pr_id = pr.get('id')
523
- if pr_id and pr_id not in prs_by_id:
524
- prs_by_id[pr_id] = pr
525
- total_in_partition += 1
526
-
527
- # Check if we hit the 1000-result limit
528
- if total_count > 1000 and page == 10:
529
- print(f"{indent} ⚠️ Hit 1000-result limit ({total_count} total). Splitting time range...")
530
-
531
- # Determine how to split based on time range duration
532
- if total_seconds < 2: # Less than 2 seconds - can't split further
533
- print(f"{indent} ⚠️ Cannot split further (range < 2 seconds). Some results may be missing.")
534
- break
535
-
536
- elif total_seconds < 120: # Less than 2 minutes - split by seconds
537
- # Split into 2-4 parts depending on range
538
- num_splits = min(4, max(2, int(total_seconds / 30)))
539
- split_duration = time_diff / num_splits
540
- split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
541
-
542
- total_from_splits = 0
543
- for i in range(num_splits):
544
- split_start = split_dates[i]
545
- split_end = split_dates[i + 1]
546
- # Avoid overlapping ranges (add 1 second to start)
547
- if i > 0:
548
- split_start = split_start + timedelta(seconds=1)
549
-
550
- count = fetch_prs_with_time_partition(
551
- base_query, split_start, split_end, token_pool, prs_by_id, debug_limit, depth + 1
552
- )
553
- total_from_splits += count
554
-
555
- return total_from_splits
556
-
557
- elif total_seconds < 7200: # Less than 2 hours - split by minutes
558
- # Split into 2-4 parts
559
- num_splits = min(4, max(2, int(total_seconds / 1800)))
560
- split_duration = time_diff / num_splits
561
- split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
562
-
563
- total_from_splits = 0
564
- for i in range(num_splits):
565
- split_start = split_dates[i]
566
- split_end = split_dates[i + 1]
567
- # Avoid overlapping ranges (add 1 minute to start)
568
- if i > 0:
569
- split_start = split_start + timedelta(minutes=1)
570
-
571
- count = fetch_prs_with_time_partition(
572
- base_query, split_start, split_end, token_pool, prs_by_id, debug_limit, depth + 1
573
- )
574
- total_from_splits += count
575
-
576
- return total_from_splits
577
-
578
- elif total_seconds < 172800: # Less than 2 days - split by hours
579
- # Split into 2-4 parts
580
- num_splits = min(4, max(2, int(total_seconds / 43200)))
581
- split_duration = time_diff / num_splits
582
- split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
583
-
584
- total_from_splits = 0
585
- for i in range(num_splits):
586
- split_start = split_dates[i]
587
- split_end = split_dates[i + 1]
588
- # Avoid overlapping ranges (add 1 hour to start)
589
- if i > 0:
590
- split_start = split_start + timedelta(hours=1)
591
-
592
- count = fetch_prs_with_time_partition(
593
- base_query, split_start, split_end, token_pool, prs_by_id, debug_limit, depth + 1
594
- )
595
- total_from_splits += count
596
-
597
- return total_from_splits
598
-
599
- else: # 2+ days - split by days
600
- days_diff = time_diff.days
601
-
602
- # Use aggressive splitting for large ranges or deep recursion
603
- # Split into 4 parts if range is > 30 days, otherwise split in half
604
- if days_diff > 30 or depth > 5:
605
- # Split into 4 parts for more aggressive partitioning
606
- quarter_diff = time_diff / 4
607
- split_dates = [
608
- start_date,
609
- start_date + quarter_diff,
610
- start_date + quarter_diff * 2,
611
- start_date + quarter_diff * 3,
612
- end_date
613
- ]
614
-
615
- total_from_splits = 0
616
- for i in range(4):
617
- split_start = split_dates[i]
618
- split_end = split_dates[i + 1]
619
- # Avoid overlapping ranges
620
- if i > 0:
621
- split_start = split_start + timedelta(days=1)
622
-
623
- count = fetch_prs_with_time_partition(
624
- base_query, split_start, split_end, token_pool, prs_by_id, debug_limit, depth + 1
625
- )
626
- total_from_splits += count
627
-
628
- return total_from_splits
629
- else:
630
- # Binary split for smaller ranges
631
- mid_date = start_date + time_diff / 2
632
-
633
- # Recursively fetch both halves
634
- count1 = fetch_prs_with_time_partition(
635
- base_query, start_date, mid_date, token_pool, prs_by_id, debug_limit, depth + 1
636
- )
637
- count2 = fetch_prs_with_time_partition(
638
- base_query, mid_date + timedelta(days=1), end_date, token_pool, prs_by_id, debug_limit, depth + 1
639
- )
640
-
641
- return count1 + count2
642
-
643
- # Normal pagination: check if there are more pages
644
- if len(items) < per_page or page >= 10:
645
- break
646
-
647
- page += 1
648
- time.sleep(0.5) # Courtesy delay between pages
649
-
650
- except Exception as e:
651
- print(f"{indent} Error fetching range {start_str} to {end_str}: {str(e)}")
652
- return total_in_partition
653
 
654
- if total_in_partition > 0:
655
- print(f"{indent} ✓ Found {total_in_partition} PRs in range {start_str} to {end_str}")
 
 
656
 
657
- return total_in_partition
 
 
658
 
659
 
660
- def extract_pr_metadata(pr):
661
  """
662
- Extract minimal PR metadata for efficient storage.
663
- Only keeps essential fields: html_url, created_at, merged_at, closed_at.
664
- Note: agent_name is not stored as it's inferred from the folder structure.
665
- """
666
- pull_request = pr.get('pull_request', {})
667
-
668
- # Extract dates
669
- created_at = pr.get('created_at')
670
- merged_at = pull_request.get('merged_at')
671
- closed_at = pr.get('closed_at')
672
-
673
- # Only store closed_at if PR is closed but not merged
674
- if merged_at:
675
- closed_at = None # Don't store redundant info
676
-
677
- return {
678
- 'html_url': pr.get('html_url'),
679
- 'created_at': created_at,
680
- 'merged_at': merged_at,
681
- 'closed_at': closed_at
682
- }
683
-
684
 
685
- def fetch_prs_parallel(query_patterns, start_date, end_date, token_pool, max_workers=None):
686
- """
687
- Fetch PRs for multiple query patterns in parallel using available tokens.
 
688
 
689
  Args:
690
- query_patterns: List of query pattern strings
691
- start_date: Start date for PR search
692
- end_date: End date for PR search
693
- token_pool: TokenPool instance
694
- max_workers: Maximum number of concurrent workers (defaults to number of available parallel tokens)
695
 
696
  Returns:
697
- Dictionary mapping query pattern to list of PRs found
698
  """
699
- import concurrent.futures
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
700
 
701
- # Determine number of workers based on available parallel tokens
702
- available_tokens = token_pool.get_available_parallel_tokens()
703
- if not available_tokens:
704
- # Fall back to sequential if no parallel tokens available
705
- print(" ⚠️ No parallel tokens available, using sequential fallback")
706
- return None
 
 
 
 
 
 
 
 
 
 
 
707
 
708
- if max_workers is None:
709
- max_workers = len(available_tokens)
710
 
711
- print(f" 🚀 Starting parallel execution with {max_workers} worker(s)")
 
 
712
 
713
- results = {}
714
 
715
- def fetch_single_pattern(pattern):
716
- """Fetch PRs for a single query pattern."""
717
- prs_by_id = {}
718
- try:
719
- prs_found = fetch_prs_with_time_partition(
720
- pattern,
721
- start_date,
722
- end_date,
723
- token_pool,
724
- prs_by_id,
725
- debug_limit=None
726
- )
727
- return pattern, prs_by_id
728
- except Exception as e:
729
- print(f" ✗ Error in parallel fetch for pattern '{pattern}': {str(e)}")
730
- return pattern, {}
731
-
732
- # Execute patterns in parallel
733
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
734
- # Submit all tasks
735
- future_to_pattern = {
736
- executor.submit(fetch_single_pattern, pattern): pattern
737
- for pattern in query_patterns
738
- }
739
 
740
- # Collect results as they complete
741
- for future in concurrent.futures.as_completed(future_to_pattern):
742
- pattern = future_to_pattern[future]
743
- try:
744
- pattern_key, prs = future.result()
745
- results[pattern_key] = prs
746
- print(f" ✓ Parallel fetch completed for pattern: {pattern_key}")
747
- except Exception as e:
748
- print(f" ✗ Parallel fetch failed for pattern '{pattern}': {str(e)}")
749
- results[pattern] = {}
750
 
751
- return results
 
 
752
 
 
 
 
753
 
754
- def fetch_daily_prs_metadata(identifier, agent_name, token_pool=None, target_date=None, use_parallel=True):
755
- """
756
- Fetch pull requests for a specific date (used for daily incremental updates).
 
 
 
757
 
758
- Args:
759
- identifier: GitHub username or bot identifier
760
- agent_name: Human-readable name of the agent for metadata purposes
761
- token_pool: TokenPool instance for rotating tokens
762
- target_date: Date object for which to fetch PRs (defaults to yesterday)
763
 
764
- Returns:
765
- List of dictionaries containing minimal PR metadata for that date
766
- """
767
- if target_date is None:
768
- target_date = (datetime.now(timezone.utc) - timedelta(days=1)).date()
769
-
770
- # Debug mode: limit PR retrieval for testing
771
- debug_limit_per_pattern = 10 if DEBUG_MODE else None
772
-
773
- if DEBUG_MODE:
774
- print(f"\n🐛 DEBUG MODE ENABLED: Limiting to {debug_limit_per_pattern} PRs per query pattern")
775
-
776
- # Define query patterns per rules:
777
- # 1) author pattern only if identifier contains "[bot]"
778
- # 2) co-author and head patterns use identifier with "[bot]" removed
779
- stripped_id = identifier.replace('[bot]', '')
780
- query_patterns = []
781
- if '[bot]' in identifier:
782
- query_patterns.append(f'is:pr author:{identifier}')
783
- if stripped_id:
784
- query_patterns.append(f'is:pr "co-authored-by: {stripped_id}"')
785
- query_patterns.append(f'is:pr head:{stripped_id}/')
786
-
787
- # Use a dict to deduplicate PRs by ID
788
- prs_by_id = {}
789
-
790
- # Convert target_date to datetime for API queries
791
- start_date = datetime.combine(target_date, datetime.min.time()).replace(tzinfo=timezone.utc)
792
- end_date = datetime.combine(target_date, datetime.max.time()).replace(tzinfo=timezone.utc)
793
-
794
- # Try parallel execution first if enabled
795
- if use_parallel and not DEBUG_MODE and len(query_patterns) > 1:
796
- print(f"\n🚀 Attempting parallel execution for {len(query_patterns)} query patterns...")
797
- parallel_start_time = time.time()
798
-
799
- parallel_results = fetch_prs_parallel(query_patterns, start_date, end_date, token_pool)
800
-
801
- if parallel_results is not None:
802
- # Merge results from parallel execution
803
- for pattern, pattern_prs in parallel_results.items():
804
- for pr_id, pr in pattern_prs.items():
805
- if pr_id not in prs_by_id:
806
- prs_by_id[pr_id] = pr
807
-
808
- parallel_duration = time.time() - parallel_start_time
809
- print(f"\n ✅ Parallel execution complete: {len(prs_by_id)} unique PRs found")
810
- print(f" ⏱️ Total time: {parallel_duration:.1f} seconds")
811
-
812
- # Print token pool statistics
813
- token_pool.print_stats()
814
- else:
815
- # Fallback to sequential execution
816
- print(" ⚠️ Parallel execution not available, falling back to sequential...")
817
- use_parallel = False
818
-
819
- # Sequential execution (fallback or if parallel disabled)
820
- if not use_parallel or DEBUG_MODE or len(query_patterns) <= 1:
821
- for query_pattern in query_patterns:
822
- print(f"\n🔍 Searching with query: {query_pattern}")
823
- print(f" Date: {target_date.strftime('%Y-%m-%d')}")
824
-
825
- pattern_start_time = time.time()
826
- initial_count = len(prs_by_id)
827
-
828
- # Fetch with time partitioning (for single day)
829
- prs_found = fetch_prs_with_time_partition(
830
- query_pattern,
831
- start_date,
832
- end_date,
833
- token_pool,
834
- prs_by_id,
835
- debug_limit_per_pattern
836
- )
837
 
838
- pattern_duration = time.time() - pattern_start_time
839
- new_prs = len(prs_by_id) - initial_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
840
 
841
- print(f" ✓ Pattern complete: {new_prs} new PRs found ({prs_found} total fetched, {len(prs_by_id) - initial_count - (prs_found - new_prs)} duplicates)")
842
- print(f" ⏱️ Time taken: {pattern_duration:.1f} seconds")
 
 
 
843
 
844
- # Delay between different query patterns (shorter in debug mode)
845
- time.sleep(0.2 if DEBUG_MODE else 1.0)
846
 
847
- # Convert to lightweight metadata
848
- all_prs = list(prs_by_id.values())
 
849
 
850
- if DEBUG_MODE:
851
- print(f"\n✅ COMPLETE (DEBUG MODE): Found {len(all_prs)} unique PRs for {identifier} on {target_date}")
852
- print(f" Note: In production mode, this would fetch ALL PRs")
853
- else:
854
- print(f"\n✅ COMPLETE: Found {len(all_prs)} unique PRs for {identifier} on {target_date}")
855
- print(f"📦 Extracting minimal metadata...")
856
 
857
- metadata_list = [extract_pr_metadata(pr) for pr in all_prs]
858
 
859
- return metadata_list
 
 
 
 
 
860
 
 
 
 
 
 
 
 
 
 
 
861
 
862
 
 
 
 
863
 
864
  def calculate_pr_stats_from_metadata(metadata_list):
865
  """
866
  Calculate statistics from a list of PR metadata (lightweight objects).
867
- Works with minimal metadata: html_url, created_at, merged_at, closed_at, agent_name.
868
 
869
  Returns a dictionary with comprehensive PR metrics.
870
 
@@ -893,11 +374,15 @@ def calculate_pr_stats_from_metadata(metadata_list):
893
  }
894
 
895
 
896
- def calculate_monthly_metrics_by_agent():
897
  """
898
- Calculate monthly metrics for all agents for visualization.
899
  Loads data directly from SWE-Arena/pr_metadata dataset.
900
 
 
 
 
 
901
  Returns:
902
  dict: {
903
  'agents': list of agent names,
@@ -962,8 +447,7 @@ def calculate_monthly_metrics_by_agent():
962
  for month in months:
963
  prs_in_month = month_dict.get(month, [])
964
 
965
- # Count merged PRs (those with merged_at during this time)
966
- # Note: We're filtering by created_at, but counting based on merged_at/closed_at
967
  merged_count = sum(1 for pr in prs_in_month if pr.get('merged_at'))
968
 
969
  # Count closed but not merged
@@ -989,8 +473,25 @@ def calculate_monthly_metrics_by_agent():
989
  'closed_not_merged': closed_not_merged_list
990
  }
991
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
992
  return {
993
- 'agents': sorted(list(agent_month_data.keys())),
994
  'months': months,
995
  'data': result_data
996
  }
@@ -1026,106 +527,76 @@ def save_pr_metadata_to_hf(metadata_list, agent_identifier):
1026
  """
1027
  Save PR metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
1028
  Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's PRs.
1029
- In debug mode, saves to in-memory cache only.
1030
 
1031
- This function APPENDS new metadata and DEDUPLICATES by html_url.
1032
- Uses batch upload to avoid HuggingFace rate limits (256 commits/hour).
1033
 
1034
  Args:
1035
  metadata_list: List of PR metadata dictionaries
1036
  agent_identifier: GitHub identifier of the agent (used as folder name)
1037
  """
1038
- import tempfile
1039
  import shutil
1040
 
1041
- # Skip saving to HF in debug mode - use in-memory cache instead
1042
- if DEBUG_MODE:
1043
- global DEBUG_PR_METADATA_CACHE
1044
- # Merge with existing cache, deduplicating by html_url
1045
- existing = {pr['html_url']: pr for pr in DEBUG_PR_METADATA_CACHE[agent_identifier] if pr.get('html_url')}
1046
- new = {pr['html_url']: pr for pr in metadata_list if pr.get('html_url')}
1047
- existing.update(new)
1048
- DEBUG_PR_METADATA_CACHE[agent_identifier] = list(existing.values())
1049
- print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(metadata_list)} PRs) - NOT saved to HuggingFace")
1050
- return True
1051
-
1052
  try:
1053
  token = get_hf_token()
1054
  if not token:
1055
  raise Exception("No HuggingFace token found")
1056
 
1057
- api = HfApi()
1058
 
1059
- # Group by exact date (year, month, day)
1060
  grouped = group_metadata_by_date(metadata_list)
1061
 
1062
- # Create a temporary directory to prepare all files for batch upload
 
 
 
 
1063
  temp_dir = tempfile.mkdtemp()
1064
- agent_dir = os.path.join(temp_dir, agent_identifier)
1065
- os.makedirs(agent_dir, exist_ok=True)
1066
 
1067
  try:
1068
- print(f"📦 Preparing {len(grouped)} daily files for batch upload...")
1069
 
 
1070
  for (pr_year, month, day), day_metadata in grouped.items():
1071
- # New structure: [agent_identifier]/YYYY.MM.DD.jsonl
1072
  filename = f"{agent_identifier}/{pr_year}.{month:02d}.{day:02d}.jsonl"
1073
- local_path = os.path.join(agent_dir, f"{pr_year}.{month:02d}.{day:02d}.jsonl")
1074
 
1075
- print(f" Preparing {len(day_metadata)} PRs for {filename}...")
 
1076
 
1077
- # Download existing file if it exists
1078
- existing_metadata = []
1079
- try:
1080
- file_path = hf_hub_download(
1081
- repo_id=PR_METADATA_REPO,
1082
- filename=filename,
1083
- repo_type="dataset",
1084
- token=token
1085
- )
1086
- existing_metadata = load_jsonl(file_path)
1087
- print(f" Found {len(existing_metadata)} existing PRs, merging...")
1088
- except Exception:
1089
- print(f" No existing file found, creating new...")
1090
-
1091
- # Merge and deduplicate by html_url
1092
- existing_by_url = {meta['html_url']: meta for meta in existing_metadata if meta.get('html_url')}
1093
- new_by_url = {meta['html_url']: meta for meta in day_metadata if meta.get('html_url')}
1094
-
1095
- # Update with new data (new data overwrites old)
1096
- existing_by_url.update(new_by_url)
1097
- merged_metadata = list(existing_by_url.values())
1098
-
1099
- # Save to temp directory
1100
- save_jsonl(local_path, merged_metadata)
1101
- print(f" ✓ Prepared {len(merged_metadata)} total PRs")
1102
-
1103
- # Batch upload entire folder in a single commit
1104
- print(f"\n📤 Uploading all files for {agent_identifier} in one batch...")
1105
- api.upload_folder(
1106
  folder_path=temp_dir,
1107
  repo_id=PR_METADATA_REPO,
1108
- repo_type="dataset",
1109
- token=token,
1110
- commit_message=f"Update PR metadata for {agent_identifier}"
1111
  )
1112
- print(f" ✓ Successfully uploaded {len(grouped)} files in 1 commit")
1113
 
1114
- finally:
1115
- # Clean up temporary directory
1116
- shutil.rmtree(temp_dir, ignore_errors=True)
1117
 
1118
- return True
 
 
 
1119
 
1120
  except Exception as e:
1121
- print(f"✗ Error saving PR metadata: {str(e)}")
 
 
1122
  return False
1123
 
1124
 
1125
  def load_pr_metadata():
1126
  """
1127
  Loads PR metadata from the last LEADERBOARD_TIME_FRAME_DAYS only.
1128
- In debug mode, loads from in-memory cache if available.
1129
 
1130
  Structure: [agent_identifier]/YYYY.MM.DD.jsonl
1131
 
@@ -1133,30 +604,6 @@ def load_pr_metadata():
1133
  List of dictionaries with 'agent_identifier' added to each PR metadata.
1134
  Only includes PRs within the last LEADERBOARD_TIME_FRAME_DAYS.
1135
  """
1136
- # In debug mode, check in-memory cache first
1137
- if DEBUG_MODE and DEBUG_PR_METADATA_CACHE:
1138
- all_metadata = []
1139
- cutoff_date = datetime.now(timezone.utc) - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
1140
-
1141
- for agent_identifier, metadata_list in DEBUG_PR_METADATA_CACHE.items():
1142
- for pr_meta in metadata_list:
1143
- # Filter by created_at date
1144
- created_at = pr_meta.get('created_at')
1145
- if created_at:
1146
- try:
1147
- dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
1148
- if dt >= cutoff_date:
1149
- pr_with_agent = pr_meta.copy()
1150
- pr_with_agent['agent_identifier'] = agent_identifier
1151
- all_metadata.append(pr_with_agent)
1152
- except Exception:
1153
- # If date parsing fails, skip this PR
1154
- continue
1155
-
1156
- if all_metadata:
1157
- print(f"🐛 DEBUG MODE: Loading PR metadata from in-memory cache ({len(all_metadata)} PRs from last {LEADERBOARD_TIME_FRAME_DAYS} days)")
1158
- return all_metadata
1159
-
1160
  try:
1161
  api = HfApi()
1162
  token = get_hf_token()
@@ -1190,7 +637,8 @@ def load_pr_metadata():
1190
  # If date parsing fails, skip this file
1191
  continue
1192
 
1193
- print(f"📥 Loading PR metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(relevant_files)} daily files across all agents)...")
 
1194
 
1195
  all_metadata = []
1196
  for filename in relevant_files:
@@ -1232,79 +680,21 @@ def load_pr_metadata():
1232
  except Exception as e:
1233
  print(f" Warning: Could not load {filename}: {str(e)}")
1234
 
1235
- print(f"✓ Loaded {len(all_metadata)} total PRs from last {LEADERBOARD_TIME_FRAME_DAYS} days")
1236
  return all_metadata
1237
 
1238
  except Exception as e:
1239
- print(f"✗ Error loading PR metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days: {str(e)}")
 
1240
  return []
1241
 
1242
 
1243
- def get_latest_pr_date_for_agent(agent_identifier):
1244
- """
1245
- Get the latest PR creation date for an agent from stored metadata.
1246
- Used for incremental updates - only fetch PRs newer than this date.
1247
-
1248
- Structure: [agent_identifier]/YYYY.MM.DD.jsonl
1249
-
1250
- Args:
1251
- agent_identifier: GitHub identifier of the agent
1252
-
1253
- Returns:
1254
- datetime or None if no existing PRs found.
1255
- """
1256
- try:
1257
- api = HfApi()
1258
- token = get_hf_token()
1259
-
1260
- # List all files in the repository
1261
- files = api.list_repo_files(repo_id=PR_METADATA_REPO, repo_type="dataset")
1262
-
1263
- # Filter for files in this agent's folder
1264
- # New structure: [agent_identifier]/YYYY.MM.DD.jsonl
1265
- agent_pattern = f"{agent_identifier}/"
1266
- agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')]
1267
-
1268
- if not agent_files:
1269
- return None
1270
-
1271
- # Find latest created_at across all files
1272
- latest_date = None
1273
- for filename in agent_files:
1274
- try:
1275
- file_path = hf_hub_download(
1276
- repo_id=PR_METADATA_REPO,
1277
- filename=filename,
1278
- repo_type="dataset",
1279
- token=token
1280
- )
1281
- metadata = load_jsonl(file_path)
1282
-
1283
- for pr in metadata:
1284
- created_at = pr.get('created_at')
1285
- if created_at:
1286
- try:
1287
- dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
1288
- if latest_date is None or dt > latest_date:
1289
- latest_date = dt
1290
- except Exception:
1291
- continue
1292
- except Exception:
1293
- continue
1294
-
1295
- return latest_date
1296
-
1297
- except Exception:
1298
- return None
1299
-
1300
-
1301
- def get_daily_files_last_n_months(agent_identifier, n_months=6):
1302
  """
1303
- Get list of daily file paths for an agent from the last N months.
1304
 
1305
  Args:
1306
  agent_identifier: GitHub identifier of the agent
1307
- n_months: Number of months to look back (default: 6)
1308
 
1309
  Returns:
1310
  List of file paths in format: [agent_identifier]/YYYY.MM.DD.jsonl
@@ -1313,9 +703,9 @@ def get_daily_files_last_n_months(agent_identifier, n_months=6):
1313
  api = HfApi()
1314
  token = get_hf_token()
1315
 
1316
- # Calculate date range
1317
  today = datetime.now(timezone.utc)
1318
- n_months_ago = today - timedelta(days=30 * n_months)
1319
 
1320
  # List all files in the repository
1321
  files = api.list_repo_files(repo_id=PR_METADATA_REPO, repo_type="dataset")
@@ -1341,8 +731,8 @@ def get_daily_files_last_n_months(agent_identifier, n_months=6):
1341
  file_year, file_month, file_day = map(int, date_components)
1342
  file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)
1343
 
1344
- # Include if within last n_months
1345
- if n_months_ago <= file_date <= today:
1346
  recent_files.append(filename)
1347
  except Exception:
1348
  continue
@@ -1354,173 +744,6 @@ def get_daily_files_last_n_months(agent_identifier, n_months=6):
1354
  return []
1355
 
1356
 
1357
-
1358
-
1359
- def fetch_pr_current_status(pr_url, token, token_pool=None):
1360
- """
1361
- Fetch the current status of a single PR from GitHub API.
1362
-
1363
- Args:
1364
- pr_url: PR HTML URL (e.g., https://github.com/owner/repo/pull/123)
1365
- token: GitHub API token
1366
- token_pool: Optional TokenPool for rate limit tracking
1367
-
1368
- Returns:
1369
- Dictionary with updated merged_at and closed_at, or None if failed
1370
- """
1371
- try:
1372
- # Convert HTML URL to API URL
1373
- # https://github.com/owner/repo/pull/123 -> https://api.github.com/repos/owner/repo/pulls/123
1374
- parts = pr_url.replace('https://github.com/', '').split('/')
1375
- if len(parts) < 4:
1376
- return None
1377
-
1378
- owner, repo, pull_word, pr_number = parts[0], parts[1], parts[2], parts[3]
1379
- api_url = f'https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}'
1380
-
1381
- headers = {'Authorization': f'token {token}'} if token else {}
1382
- response = request_with_backoff('GET', api_url, headers=headers, max_retries=3,
1383
- token_pool=token_pool, token=token)
1384
-
1385
- if response is None or response.status_code != 200:
1386
- return None
1387
-
1388
- pr_data = response.json()
1389
- merged_at = pr_data.get('merged_at')
1390
- closed_at = pr_data.get('closed_at')
1391
-
1392
- # Only store closed_at if not merged
1393
- if merged_at:
1394
- closed_at = None
1395
-
1396
- return {
1397
- 'merged_at': merged_at,
1398
- 'closed_at': closed_at
1399
- }
1400
-
1401
- except Exception as e:
1402
- print(f" Error fetching PR status for {pr_url}: {str(e)}")
1403
- return None
1404
-
1405
-
1406
- def refresh_open_prs_for_agent(agent_identifier, token, token_pool=None):
1407
- """
1408
- Refresh status for all open PRs from the last 6 months for an agent.
1409
- Only updates PRs that are still open (no merged_at, no closed_at).
1410
-
1411
- This implements the smart update strategy:
1412
- - Skip PRs that are already closed/merged
1413
- - Fetch current status for open PRs
1414
- - Update and save back to daily files
1415
-
1416
- Args:
1417
- agent_identifier: GitHub identifier of the agent
1418
- token: GitHub API token
1419
- token_pool: Optional TokenPool for rate limit tracking
1420
-
1421
- Returns:
1422
- Tuple: (total_checked, updated_count)
1423
- """
1424
- print(f"\n🔄 Refreshing open PRs for {agent_identifier} (last 6 months)...")
1425
-
1426
- try:
1427
- # Get daily files from last 6 months
1428
- recent_files = get_daily_files_last_n_months(agent_identifier, n_months=6)
1429
-
1430
- if not recent_files:
1431
- print(f" No recent files found for {agent_identifier}")
1432
- return (0, 0)
1433
-
1434
- print(f" Found {len(recent_files)} daily files to check")
1435
-
1436
- total_checked = 0
1437
- updated_count = 0
1438
-
1439
- # Process each file
1440
- for filename in recent_files:
1441
- try:
1442
- # Download file
1443
- file_path = hf_hub_download(
1444
- repo_id=PR_METADATA_REPO,
1445
- filename=filename,
1446
- repo_type="dataset",
1447
- token=get_hf_token()
1448
- )
1449
- prs = load_jsonl(file_path)
1450
-
1451
- if not prs:
1452
- continue
1453
-
1454
- updated_prs = []
1455
- file_had_updates = False
1456
-
1457
- # Check each PR
1458
- for pr in prs:
1459
- # Skip if already closed or merged
1460
- if pr.get('merged_at') or pr.get('closed_at'):
1461
- updated_prs.append(pr)
1462
- continue
1463
-
1464
- # PR is open, fetch current status
1465
- total_checked += 1
1466
- pr_url = pr.get('html_url')
1467
-
1468
- if not pr_url:
1469
- updated_prs.append(pr)
1470
- continue
1471
-
1472
- current_status = fetch_pr_current_status(pr_url, token, token_pool)
1473
-
1474
- if current_status:
1475
- # Check if status changed
1476
- if current_status['merged_at'] or current_status['closed_at']:
1477
- print(f" ✓ PR status changed: {pr_url}")
1478
- pr['merged_at'] = current_status['merged_at']
1479
- pr['closed_at'] = current_status['closed_at']
1480
- updated_count += 1
1481
- file_had_updates = True
1482
-
1483
- updated_prs.append(pr)
1484
- time.sleep(0.1) # Rate limiting courtesy delay
1485
-
1486
- # Save file if there were updates
1487
- if file_had_updates:
1488
- # Extract filename components for local save
1489
- parts = filename.split('/')
1490
- local_filename = parts[-1] # Just YYYY.MM.DD.jsonl
1491
-
1492
- # Save locally
1493
- save_jsonl(local_filename, updated_prs)
1494
-
1495
- try:
1496
- # Upload back to HuggingFace
1497
- api = HfApi()
1498
- upload_with_retry(
1499
- api=api,
1500
- path_or_fileobj=local_filename,
1501
- path_in_repo=filename,
1502
- repo_id=PR_METADATA_REPO,
1503
- repo_type="dataset",
1504
- token=get_hf_token()
1505
- )
1506
- print(f" 💾 Updated {filename}")
1507
- finally:
1508
- # Always clean up local file, even if upload fails
1509
- if os.path.exists(local_filename):
1510
- os.remove(local_filename)
1511
-
1512
- except Exception as e:
1513
- print(f" Warning: Could not process {filename}: {str(e)}")
1514
- continue
1515
-
1516
- print(f" ✅ Refresh complete: {total_checked} open PRs checked, {updated_count} updated")
1517
- return (total_checked, updated_count)
1518
-
1519
- except Exception as e:
1520
- print(f" ✗ Error refreshing PRs for {agent_identifier}: {str(e)}")
1521
- return (0, 0)
1522
-
1523
-
1524
  # =============================================================================
1525
  # HUGGINGFACE DATASET OPERATIONS
1526
  # =============================================================================
@@ -1550,6 +773,11 @@ def load_agents_from_hf():
1550
 
1551
  with open(file_path, 'r') as f:
1552
  agent_data = json.load(f)
 
 
 
 
 
1553
  agents.append(agent_data)
1554
 
1555
  except Exception as e:
@@ -1564,8 +792,6 @@ def load_agents_from_hf():
1564
  return None
1565
 
1566
 
1567
-
1568
-
1569
  def get_hf_token():
1570
  """Get HuggingFace token from environment variables."""
1571
  token = os.getenv('HF_TOKEN')
@@ -1655,111 +881,105 @@ def save_agent_to_hf(data):
1655
  return False
1656
 
1657
 
1658
-
1659
-
1660
  # =============================================================================
1661
  # DATA MANAGEMENT
1662
  # =============================================================================
1663
 
1664
- def update_all_agents_incremental():
1665
  """
1666
- Daily incremental update - refreshes open PRs and fetches new PRs for all agents.
1667
-
1668
- Strategy:
1669
- 1. Refresh status of all open PRs from the last LEADERBOARD_TIME_FRAME_DAYS - 1 days
1670
- (to check if any have been merged or closed)
1671
- 2. Fetch new PRs created yesterday (from 12:00 AM to 11:59:59 PM yesterday)
1672
- 3. Update the corresponding daily files (YYYY.MM.DD.jsonl)
1673
- 4. This runs daily to keep data fresh without re-mining everything
1674
  """
 
 
 
 
 
 
 
 
 
 
 
 
1675
  print(f"\n{'='*80}")
1676
- print(f"🕛 Daily Incremental PR Mining started at {datetime.now(timezone.utc).isoformat()}")
1677
- print(f"{'='*80}")
 
 
1678
 
 
1679
  try:
1680
- # Initialize token pool
1681
- tokens = get_github_tokens()
1682
- token_pool = TokenPool(tokens)
1683
- # Also get single token for backward-compatible functions
1684
- token = token_pool.get_next_token()
1685
-
1686
- # Load agent metadata from HuggingFace
1687
- agents = load_agents_from_hf()
1688
- if not agents:
1689
- print("No agents found in HuggingFace dataset")
1690
- return
1691
-
1692
- # Calculate yesterday's date
1693
- yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).date()
1694
- print(f"\n📅 Daily Incremental Update for {yesterday.strftime('%Y-%m-%d')} for all agents...")
1695
-
1696
- agents_processed = 0
1697
- total_refreshed = 0
1698
- total_refreshed_updated = 0
1699
- total_new_prs = 0
1700
-
1701
- # Update each agent
1702
- for agent in agents:
1703
- identifier = agent.get('github_identifier')
1704
- agent_name = agent.get('agent_name', 'Unknown')
1705
-
1706
- if not identifier:
1707
- print(f"Warning: Skipping agent without identifier: {agent}")
1708
- continue
1709
 
1710
- try:
1711
- print(f"\n{'='*80}")
1712
- print(f"Processing: {agent_name} ({identifier})")
1713
- print(f"{'='*80}")
1714
-
1715
- # STEP 1: Refresh all open PRs from the last LEADERBOARD_TIME_FRAME_DAYS - 1 days
1716
- print(f"\n🔄 Step 1: Refreshing open PRs (last {LEADERBOARD_TIME_FRAME_DAYS - 1} days)...")
1717
- refreshed_checked, refreshed_updated = refresh_open_prs_for_agent(
1718
- identifier,
1719
- token,
1720
- token_pool
1721
- )
1722
- total_refreshed += refreshed_checked
1723
- total_refreshed_updated += refreshed_updated
1724
-
1725
- # STEP 2: Fetch new PRs created yesterday (12:00 AM to 11:59:59 PM yesterday)
1726
- print(f"\n📥 Step 2: Fetching new PRs created on {yesterday.strftime('%Y-%m-%d')} (12:00 AM to 11:59:59 PM)...")
1727
- new_metadata = fetch_daily_prs_metadata(
1728
- identifier,
1729
- agent_name,
1730
- token_pool,
1731
- target_date=yesterday
1732
- )
1733
 
1734
- if new_metadata:
1735
- # Save new metadata to HuggingFace
1736
- print(f"💾 Saving {len(new_metadata)} new PRs from {yesterday}...")
1737
- save_pr_metadata_to_hf(new_metadata, identifier)
1738
- total_new_prs += len(new_metadata)
1739
- else:
1740
- print(f" No new PRs found created on {yesterday}")
 
 
1741
 
1742
- agents_processed += 1
 
 
 
1743
 
1744
- except Exception as e:
1745
- print(f"✗ Error updating {identifier}: {str(e)}")
1746
- import traceback
1747
- traceback.print_exc()
1748
- continue
 
 
1749
 
1750
- print(f"\n{'='*80}")
1751
- print(f"📊 Mining Summary:")
1752
- print(f" Total agents processed: {agents_processed}")
1753
- print(f" Open PRs refreshed: {total_refreshed} checked, {total_refreshed_updated} updated")
1754
- print(f" New PRs added (from yesterday): {total_new_prs}")
1755
- print(f"{'='*80}")
1756
 
1757
- print(f"\n✅ Daily Incremental PR Mining completed at {datetime.now(timezone.utc).isoformat()}")
1758
 
1759
- except Exception as e:
1760
- print(f"✗ Daily mining failed: {str(e)}")
1761
- import traceback
1762
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1763
 
1764
 
1765
  def construct_leaderboard_from_metadata():
@@ -1805,15 +1025,26 @@ def construct_leaderboard_from_metadata():
1805
  # UI FUNCTIONS
1806
  # =============================================================================
1807
 
1808
- def create_monthly_metrics_plot():
 
 
 
 
 
 
 
 
1809
  """
1810
  Create a Plotly figure with dual y-axes showing:
1811
  - Left y-axis: Acceptance rate (%) as line curves
1812
  - Right y-axis: Total PRs created as bar charts
1813
 
1814
  Each agent gets a unique color for both their line and bars.
 
 
 
1815
  """
1816
- metrics = calculate_monthly_metrics_by_agent()
1817
 
1818
  if not metrics['agents'] or not metrics['months']:
1819
  # Return an empty figure with a message
@@ -1834,19 +1065,16 @@ def create_monthly_metrics_plot():
1834
  # Create figure with secondary y-axis
1835
  fig = make_subplots(specs=[[{"secondary_y": True}]])
1836
 
1837
- # Define colors for agents (using a color palette)
1838
- colors = [
1839
- '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
1840
- '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
1841
- ]
1842
-
1843
  agents = metrics['agents']
1844
  months = metrics['months']
1845
  data = metrics['data']
1846
 
 
 
 
1847
  # Add traces for each agent
1848
  for idx, agent_name in enumerate(agents):
1849
- color = colors[idx % len(colors)]
1850
  agent_data = data[agent_name]
1851
 
1852
  # Add line trace for acceptance rate (left y-axis)
@@ -1966,13 +1194,11 @@ def get_leaderboard_dataframe():
1966
  return df
1967
 
1968
 
1969
-
1970
-
1971
  def submit_agent(identifier, agent_name, organization, description, website):
1972
  """
1973
  Submit a new agent to the leaderboard.
1974
  Validates input and saves submission.
1975
- PR data will be populated by the daily incremental update.
1976
  """
1977
  # Validate required fields
1978
  if not identifier or not identifier.strip():
@@ -2016,7 +1242,7 @@ def submit_agent(identifier, agent_name, organization, description, website):
2016
  if not save_agent_to_hf(submission):
2017
  return "❌ Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2018
 
2019
- success_msg = f"✅ Successfully submitted {agent_name}!\n\nPR data will be populated by the daily incremental update (runs at 12:00 AM UTC)."
2020
  return success_msg, get_leaderboard_dataframe(), create_monthly_metrics_plot()
2021
 
2022
 
@@ -2024,73 +1250,65 @@ def submit_agent(identifier, agent_name, organization, description, website):
2024
  # GRADIO APPLICATION
2025
  # =============================================================================
2026
 
2027
- # Initialize data before creating UI
2028
- if DEBUG_MODE:
2029
- print("\n" + "="*80)
2030
- print("🐛 DEBUG MODE ENABLED 🐛")
2031
- print("="*80)
2032
- print("PR retrieval is limited to 10 PRs per query pattern per agent")
2033
-
2034
- # Show how debug mode was enabled
2035
- if args.debug:
2036
- print("Enabled via: command-line flag '--debug'")
2037
- print("To disable: run without '--debug' flag")
2038
- else:
2039
- print("Enabled via: DEBUG_MODE environment variable")
2040
- print("To disable: run with '--no-debug' flag or unset DEBUG_MODE")
2041
 
2042
- print("="*80 + "\n")
2043
- else:
2044
- print("\n🚀 Starting in PRODUCTION MODE - full PR retrieval enabled")
2045
- if args.no_debug:
2046
- print(" (Explicitly set via '--no-debug' flag)")
2047
- print()
2048
-
2049
- # Start APScheduler for daily incremental PR mining at 12:00 AM UTC
2050
  scheduler = BackgroundScheduler(timezone="UTC")
2051
  scheduler.add_job(
2052
- update_all_agents_incremental,
2053
- trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
2054
- id='daily_incremental_pr_mining',
2055
- name='Daily Incremental PR Mining',
2056
  replace_existing=True
2057
  )
2058
  scheduler.start()
2059
- print("✓ Scheduler started: Daily Incremental PR Mining at 12:00 AM UTC")
2060
 
2061
  # Create Gradio interface
2062
  with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
2063
-
 
2064
  gr.Markdown("# 🏆 SWE Agent PR Leaderboard")
2065
- gr.Markdown("Track and compare GitHub pull request statistics for SWE agents (last 6 months)")
2066
-
2067
  with gr.Tabs():
2068
-
2069
  # Leaderboard Tab
2070
  with gr.Tab("📊 Leaderboard"):
2071
- gr.Markdown("*All statistics are based on PRs from the last 6 months*")
2072
 
2073
  leaderboard_table = Leaderboard(
2074
  value=get_leaderboard_dataframe(),
2075
  datatype=LEADERBOARD_COLUMNS,
2076
  search_columns=["Agent Name", "Website"],
2077
- filter_columns=["Acceptance Rate (%)"]
 
 
 
 
 
 
 
 
 
2078
  )
2079
 
2080
- gr.Markdown("### Monthly Metrics")
2081
- gr.Markdown("Track acceptance rates and PR activity over time")
2082
 
2083
  monthly_plot = gr.Plot(
2084
- value=create_monthly_metrics_plot(),
2085
  label="Monthly PR Metrics"
2086
  )
2087
-
2088
  # Submit Agent Tab
2089
  with gr.Tab("➕ Submit Agent"):
2090
-
2091
  gr.Markdown("### Submit Your Agent")
2092
- gr.Markdown("Fill in the details below to add your agent to the leaderboard. Make sure you're logged in to HuggingFace CLI on your machine.")
2093
-
2094
  with gr.Row():
2095
  with gr.Column():
2096
  github_input = gr.Textbox(
@@ -2101,7 +1319,7 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
2101
  label="Agent Name*",
2102
  placeholder="Your agent's display name"
2103
  )
2104
-
2105
  with gr.Column():
2106
  organization_input = gr.Textbox(
2107
  label="Organization*",
@@ -2113,10 +1331,10 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
2113
  lines=3
2114
  )
2115
  website_input = gr.Textbox(
2116
- label="Website",
2117
  placeholder="https://your-agent-website.com"
2118
  )
2119
-
2120
  submit_button = gr.Button(
2121
  "Submit Agent",
2122
  variant="primary"
@@ -2125,7 +1343,7 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
2125
  label="Submission Status",
2126
  interactive=False
2127
  )
2128
-
2129
  # Event handler
2130
  submit_button.click(
2131
  fn=submit_agent,
@@ -2136,4 +1354,4 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
2136
 
2137
  # Launch application
2138
  if __name__ == "__main__":
2139
- app.launch()
 
1
  import gradio as gr
2
+ from gradio_leaderboard import Leaderboard, ColumnFilter
3
  import json
4
  import os
5
  import time
6
+ import tempfile
7
  import requests
8
  from datetime import datetime, timezone, timedelta
9
  from collections import defaultdict
10
  from huggingface_hub import HfApi, hf_hub_download
 
 
11
  from dotenv import load_dotenv
12
  import pandas as pd
13
  import random
 
14
  import plotly.graph_objects as go
15
  from plotly.subplots import make_subplots
16
  from apscheduler.schedulers.background import BackgroundScheduler
17
  from apscheduler.triggers.cron import CronTrigger
18
+ from google.cloud import bigquery
19
 
20
  # Load environment variables
21
  load_dotenv()
22
 
 
 
 
 
 
 
 
 
23
  # =============================================================================
24
  # CONFIGURATION
25
  # =============================================================================
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
28
  PR_METADATA_REPO = "SWE-Arena/pr_metadata" # HuggingFace dataset for PR metadata
29
+ LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for constructing leaderboard
30
+ UPDATE_TIME_FRAME_DAYS = 30 # Time frame for mining new PRs
31
 
32
  LEADERBOARD_COLUMNS = [
33
  ("Agent Name", "string"),
 
45
  """Load JSONL file and return list of dictionaries."""
46
  if not os.path.exists(filename):
47
  return []
48
+
49
  data = []
50
  with open(filename, 'r', encoding='utf-8') as f:
51
  for line in f:
 
66
  f.write(json.dumps(item) + '\n')
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # =============================================================================
70
+ # BIGQUERY FUNCTIONS
71
  # =============================================================================
72
 
73
+ def get_bigquery_client():
74
  """
75
+ Initialize BigQuery client using credentials from environment variable.
 
76
 
77
+ Expects GOOGLE_APPLICATION_CREDENTIALS_JSON environment variable containing
78
+ the service account JSON credentials as a string.
 
 
 
79
  """
80
+ # Get the JSON content from environment variable
81
+ creds_json = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS_JSON')
 
 
 
 
 
 
 
 
 
 
82
 
83
+ if creds_json:
84
+ # Create a temporary file to store credentials
85
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
86
+ temp_file.write(creds_json)
87
+ temp_path = temp_file.name
88
 
89
+ # Set environment variable to point to temp file
90
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = temp_path
 
91
 
92
+ # Initialize BigQuery client
93
+ client = bigquery.Client()
 
 
94
 
95
+ # Clean up temp file
96
+ os.unlink(temp_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ return client
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  else:
100
+ raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
 
103
+ def generate_table_union_statements(start_date, end_date):
104
  """
105
+ Generate UNION ALL statements for githubarchive.day tables in date range.
 
 
106
 
107
  Args:
108
+ start_date: Start datetime
109
+ end_date: End datetime
 
110
 
111
+ Returns:
112
+ String with UNION ALL SELECT statements for all tables in range
113
  """
114
+ table_names = []
115
+ current_date = start_date
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ while current_date < end_date:
118
+ table_name = f"`githubarchive.day.{current_date.strftime('%Y%m%d')}`"
119
+ table_names.append(table_name)
120
+ current_date += timedelta(days=1)
121
 
122
+ # Create UNION ALL chain
123
+ union_parts = [f"SELECT * FROM {table}" for table in table_names]
124
+ return " UNION ALL ".join(union_parts)
125
 
126
 
127
+ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date):
128
  """
129
+ Fetch PR metadata for ALL agents using ONE comprehensive BigQuery query.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ This query fetches:
132
+ 1. PRs authored by agents (user.login matches identifier)
133
+ 2. PRs with co-authored-by (search in body for co-authored-by)
134
+ 3. PRs from branches starting with agent identifier (head.ref pattern)
135
 
136
  Args:
137
+ client: BigQuery client instance
138
+ identifiers: List of GitHub usernames/bot identifiers
139
+ start_date: Start datetime (timezone-aware)
140
+ end_date: End datetime (timezone-aware)
 
141
 
142
  Returns:
143
+ Dictionary mapping agent identifier to list of PR metadata
144
  """
145
+ print(f"\n🔍 Querying BigQuery for ALL {len(identifiers)} agents in ONE QUERY")
146
+ print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
147
+
148
+ # Generate table UNION statements for the time range
149
+ table_union = generate_table_union_statements(start_date, end_date)
150
+
151
+ # Build identifier lists for SQL IN clauses
152
+ # For author matching, include identifiers with [bot]
153
+ author_list = ', '.join([f"'{id}'" for id in identifiers if '[bot]' in id])
154
+
155
+ # For branch matching and co-author, use stripped identifiers (without [bot])
156
+ stripped_identifiers = [id.replace('[bot]', '') for id in identifiers]
157
+
158
+ # Build co-author pattern (search in body)
159
+ coauthor_patterns = ' OR '.join([f"LOWER(JSON_EXTRACT_SCALAR(payload, '$.pull_request.body')) LIKE '%co-authored-by: {id.lower()}%'"
160
+ for id in stripped_identifiers if id])
161
+
162
+ # Build branch pattern
163
+ branch_patterns = ' OR '.join([f"JSON_EXTRACT_SCALAR(payload, '$.pull_request.head.ref') LIKE '{id}/%'"
164
+ for id in stripped_identifiers if id])
165
+
166
+ # Build comprehensive query with CTE
167
+ query = f"""
168
+ WITH pr_events AS (
169
+ -- Get all PR events (opened, closed) for all agents
170
+ SELECT
171
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as html_url,
172
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.user.login') as pr_author,
173
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.head.ref') as branch_name,
174
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.body') as pr_body,
175
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.created_at') as created_at,
176
+ CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged') AS BOOL) as is_merged,
177
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged_at') as merged_at,
178
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,
179
+ JSON_EXTRACT_SCALAR(payload, '$.action') as action,
180
+ created_at as event_time
181
+ FROM (
182
+ {table_union}
183
+ )
184
+ WHERE
185
+ type = 'PullRequestEvent'
186
+ AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IS NOT NULL
187
+ AND (
188
+ -- Match PRs authored by agents with [bot] suffix
189
+ {f"JSON_EXTRACT_SCALAR(payload, '$.pull_request.user.login') IN ({author_list})" if author_list else "FALSE"}
190
+ {" OR " if author_list and (coauthor_patterns or branch_patterns) else ""}
191
+ -- Match PRs with co-authored-by in body
192
+ {f"({coauthor_patterns})" if coauthor_patterns else ""}
193
+ {" OR " if coauthor_patterns and branch_patterns else ""}
194
+ -- Match PRs with branch names starting with agent identifier
195
+ {f"({branch_patterns})" if branch_patterns else ""}
196
+ )
197
+ ),
198
+
199
+ pr_latest_state AS (
200
+ -- Get the latest state for each PR (most recent event)
201
+ SELECT
202
+ html_url,
203
+ pr_author,
204
+ branch_name,
205
+ pr_body,
206
+ created_at,
207
+ merged_at,
208
+ closed_at,
209
+ ROW_NUMBER() OVER (PARTITION BY html_url ORDER BY event_time DESC) as row_num
210
+ FROM pr_events
211
+ )
212
 
213
+ -- Return deduplicated PR metadata
214
+ SELECT DISTINCT
215
+ html_url,
216
+ pr_author,
217
+ branch_name,
218
+ pr_body,
219
+ created_at,
220
+ merged_at,
221
+ -- Only include closed_at if PR is closed but not merged
222
+ CASE
223
+ WHEN merged_at IS NOT NULL THEN NULL
224
+ ELSE closed_at
225
+ END as closed_at
226
+ FROM pr_latest_state
227
+ WHERE row_num = 1
228
+ ORDER BY created_at DESC
229
+ """
230
 
231
+ print(f" Querying {(end_date - start_date).days} days of GitHub Archive data...")
232
+ print(f" Agents: {', '.join(identifiers[:5])}{'...' if len(identifiers) > 5 else ''}")
233
 
234
+ try:
235
+ query_job = client.query(query)
236
+ results = list(query_job.result())
237
 
238
+ print(f" ✓ Found {len(results)} total PRs across all agents")
239
 
240
+ # Group results by agent
241
+ metadata_by_agent = defaultdict(list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
+ for row in results:
244
+ # Convert datetime objects to ISO strings
245
+ created_at = row.created_at
246
+ if hasattr(created_at, 'isoformat'):
247
+ created_at = created_at.isoformat()
 
 
 
 
 
248
 
249
+ merged_at = row.merged_at
250
+ if hasattr(merged_at, 'isoformat'):
251
+ merged_at = merged_at.isoformat()
252
 
253
+ closed_at = row.closed_at
254
+ if hasattr(closed_at, 'isoformat'):
255
+ closed_at = closed_at.isoformat()
256
 
257
+ pr_data = {
258
+ 'html_url': row.html_url,
259
+ 'created_at': created_at,
260
+ 'merged_at': merged_at,
261
+ 'closed_at': closed_at,
262
+ }
263
 
264
+ # Assign to agent based on author, co-author, or branch pattern
265
+ pr_author = row.pr_author
266
+ branch_name = row.branch_name or ''
267
+ pr_body = (row.pr_body or '').lower()
 
268
 
269
+ # First, try to match by author
270
+ if pr_author and pr_author in identifiers:
271
+ metadata_by_agent[pr_author].append(pr_data)
272
+ else:
273
+ # Try to match by co-author or branch pattern
274
+ for identifier in identifiers:
275
+ stripped_id = identifier.replace('[bot]', '')
276
+ if not stripped_id:
277
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
+ # Check co-author
280
+ if f'co-authored-by: {stripped_id.lower()}' in pr_body:
281
+ metadata_by_agent[identifier].append(pr_data)
282
+ break
283
+
284
+ # Check branch pattern
285
+ if branch_name.startswith(f"{stripped_id}/"):
286
+ metadata_by_agent[identifier].append(pr_data)
287
+ break
288
+
289
+ # Print breakdown by agent
290
+ print(f"\n 📊 Results breakdown by agent:")
291
+ for identifier in identifiers:
292
+ count = len(metadata_by_agent.get(identifier, []))
293
+ if count > 0:
294
+ metadata = metadata_by_agent[identifier]
295
+ merged_count = sum(1 for m in metadata if m['merged_at'] is not None)
296
+ closed_count = sum(1 for m in metadata if m['closed_at'] is not None and m['merged_at'] is None)
297
+ open_count = count - merged_count - closed_count
298
+ print(f" {identifier}: {count} PRs ({merged_count} merged, {closed_count} closed, {open_count} open)")
299
+
300
+ # Convert defaultdict to regular dict
301
+ return dict(metadata_by_agent)
302
 
303
+ except Exception as e:
304
+ print(f" BigQuery error: {str(e)}")
305
+ import traceback
306
+ traceback.print_exc()
307
+ return {}
308
 
 
 
309
 
310
+ # =============================================================================
311
+ # GITHUB API OPERATIONS (Minimal - Only for Validation)
312
+ # =============================================================================
313
 
314
+ def get_github_token():
315
+ """Get first GitHub token from environment variables."""
316
+ token = os.getenv('GITHUB_TOKEN')
317
+ if not token:
318
+ print("Warning: GITHUB_TOKEN not found. Validation will be limited.")
319
+ return token
320
 
 
321
 
322
+ def validate_github_username(identifier):
323
+ """Verify that a GitHub identifier exists (simple validation)."""
324
+ try:
325
+ token = get_github_token()
326
+ headers = {'Authorization': f'token {token}'} if token else {}
327
+ url = f'https://api.github.com/users/{identifier}'
328
 
329
+ response = requests.get(url, headers=headers, timeout=10)
330
+
331
+ if response.status_code == 200:
332
+ return True, "Username is valid"
333
+ elif response.status_code == 404:
334
+ return False, "GitHub identifier not found"
335
+ else:
336
+ return False, f"Validation error: HTTP {response.status_code}"
337
+ except Exception as e:
338
+ return False, f"Validation error: {str(e)}"
339
 
340
 
341
+ # =============================================================================
342
+ # PR STATISTICS
343
+ # =============================================================================
344
 
345
  def calculate_pr_stats_from_metadata(metadata_list):
346
  """
347
  Calculate statistics from a list of PR metadata (lightweight objects).
348
+ Works with minimal metadata: html_url, created_at, merged_at, closed_at.
349
 
350
  Returns a dictionary with comprehensive PR metrics.
351
 
 
374
  }
375
 
376
 
377
+ def calculate_monthly_metrics_by_agent(top_n=None):
378
  """
379
+ Calculate monthly metrics for all agents (or top N agents) for visualization.
380
  Loads data directly from SWE-Arena/pr_metadata dataset.
381
 
382
+ Args:
383
+ top_n: If specified, only return metrics for the top N agents by total PRs.
384
+ Agents are ranked by their total PR count across all months.
385
+
386
  Returns:
387
  dict: {
388
  'agents': list of agent names,
 
447
  for month in months:
448
  prs_in_month = month_dict.get(month, [])
449
 
450
+ # Count merged PRs
 
451
  merged_count = sum(1 for pr in prs_in_month if pr.get('merged_at'))
452
 
453
  # Count closed but not merged
 
473
  'closed_not_merged': closed_not_merged_list
474
  }
475
 
476
+ # Filter to top N agents if specified
477
+ agents_list = sorted(list(agent_month_data.keys()))
478
+ if top_n is not None and top_n > 0:
479
+ # Calculate total PRs for each agent across all months
480
+ agent_totals = []
481
+ for agent_name in agents_list:
482
+ total_pr_count = sum(result_data[agent_name]['total_prs'])
483
+ agent_totals.append((agent_name, total_pr_count))
484
+
485
+ # Sort by total PRs (descending) and take top N
486
+ agent_totals.sort(key=lambda x: x[1], reverse=True)
487
+ top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
488
+
489
+ # Filter result_data to only include top agents
490
+ result_data = {agent: result_data[agent] for agent in top_agents if agent in result_data}
491
+ agents_list = top_agents
492
+
493
  return {
494
+ 'agents': agents_list,
495
  'months': months,
496
  'data': result_data
497
  }
 
527
  """
528
  Save PR metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
529
  Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's PRs.
 
530
 
531
+ This function OVERWRITES existing files completely with fresh data from BigQuery.
532
+ Uses batch upload to avoid rate limit (uploads entire folder in single operation).
533
 
534
  Args:
535
  metadata_list: List of PR metadata dictionaries
536
  agent_identifier: GitHub identifier of the agent (used as folder name)
537
  """
 
538
  import shutil
539
 
 
 
 
 
 
 
 
 
 
 
 
540
  try:
541
  token = get_hf_token()
542
  if not token:
543
  raise Exception("No HuggingFace token found")
544
 
545
+ api = HfApi(token=token)
546
 
547
+ # Group by date (year, month, day)
548
  grouped = group_metadata_by_date(metadata_list)
549
 
550
+ if not grouped:
551
+ print(f" No valid metadata to save for {agent_identifier}")
552
+ return False
553
+
554
+ # Create a temporary directory for batch upload
555
  temp_dir = tempfile.mkdtemp()
556
+ agent_folder = os.path.join(temp_dir, agent_identifier)
557
+ os.makedirs(agent_folder, exist_ok=True)
558
 
559
  try:
560
+ print(f" 📦 Preparing batch upload for {len(grouped)} daily files...")
561
 
562
+ # Process each daily file
563
  for (pr_year, month, day), day_metadata in grouped.items():
 
564
  filename = f"{agent_identifier}/{pr_year}.{month:02d}.{day:02d}.jsonl"
565
+ local_filename = os.path.join(agent_folder, f"{pr_year}.{month:02d}.{day:02d}.jsonl")
566
 
567
+ # Sort by created_at for better organization
568
+ day_metadata.sort(key=lambda x: x.get('created_at', ''), reverse=True)
569
 
570
+ # Save to temp directory (complete overwrite, no merging)
571
+ save_jsonl(local_filename, day_metadata)
572
+ print(f" Prepared {len(day_metadata)} PRs for {filename}")
573
+
574
+ # Upload entire folder using upload_large_folder (optimized for large files)
575
+ print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
576
+ api.upload_large_folder(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
  folder_path=temp_dir,
578
  repo_id=PR_METADATA_REPO,
579
+ repo_type="dataset"
 
 
580
  )
581
+ print(f" ✓ Batch upload complete for {agent_identifier}")
582
 
583
+ return True
 
 
584
 
585
+ finally:
586
+ # Always clean up temp directory
587
+ if os.path.exists(temp_dir):
588
+ shutil.rmtree(temp_dir)
589
 
590
  except Exception as e:
591
+ print(f" ✗ Error saving PR metadata: {str(e)}")
592
+ import traceback
593
+ traceback.print_exc()
594
  return False
595
 
596
 
597
  def load_pr_metadata():
598
  """
599
  Loads PR metadata from the last LEADERBOARD_TIME_FRAME_DAYS only.
 
600
 
601
  Structure: [agent_identifier]/YYYY.MM.DD.jsonl
602
 
 
604
  List of dictionaries with 'agent_identifier' added to each PR metadata.
605
  Only includes PRs within the last LEADERBOARD_TIME_FRAME_DAYS.
606
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  try:
608
  api = HfApi()
609
  token = get_hf_token()
 
637
  # If date parsing fails, skip this file
638
  continue
639
 
640
+ total_months = LEADERBOARD_TIME_FRAME_DAYS // 30
641
+ print(f"📥 Loading PR metadata from last {total_months} months ({len(relevant_files)} daily files across all agents)...")
642
 
643
  all_metadata = []
644
  for filename in relevant_files:
 
680
  except Exception as e:
681
  print(f" Warning: Could not load {filename}: {str(e)}")
682
 
683
+ print(f"✓ Loaded {len(all_metadata)} total PRs from last {total_months} months")
684
  return all_metadata
685
 
686
  except Exception as e:
687
+ total_months = LEADERBOARD_TIME_FRAME_DAYS // 30
688
+ print(f"✗ Error loading PR metadata from last {total_months} months: {str(e)}")
689
  return []
690
 
691
 
692
+ def get_daily_files_last_time_frame(agent_identifier):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
693
  """
694
+ Get list of daily file paths for an agent from the configured time frame.
695
 
696
  Args:
697
  agent_identifier: GitHub identifier of the agent
 
698
 
699
  Returns:
700
  List of file paths in format: [agent_identifier]/YYYY.MM.DD.jsonl
 
703
  api = HfApi()
704
  token = get_hf_token()
705
 
706
+ # Calculate date range using configured time frame
707
  today = datetime.now(timezone.utc)
708
+ cutoff_date = today - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
709
 
710
  # List all files in the repository
711
  files = api.list_repo_files(repo_id=PR_METADATA_REPO, repo_type="dataset")
 
731
  file_year, file_month, file_day = map(int, date_components)
732
  file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)
733
 
734
+ # Include if within configured time frame
735
+ if cutoff_date <= file_date <= today:
736
  recent_files.append(filename)
737
  except Exception:
738
  continue
 
744
  return []
745
 
746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
747
  # =============================================================================
748
  # HUGGINGFACE DATASET OPERATIONS
749
  # =============================================================================
 
773
 
774
  with open(file_path, 'r') as f:
775
  agent_data = json.load(f)
776
+
777
+ # Extract github_identifier from filename (remove .json extension)
778
+ github_identifier = json_file.replace('.json', '')
779
+ agent_data['github_identifier'] = github_identifier
780
+
781
  agents.append(agent_data)
782
 
783
  except Exception as e:
 
792
  return None
793
 
794
 
 
 
795
  def get_hf_token():
796
  """Get HuggingFace token from environment variables."""
797
  token = os.getenv('HF_TOKEN')
 
881
  return False
882
 
883
 
 
 
884
  # =============================================================================
885
  # DATA MANAGEMENT
886
  # =============================================================================
887
 
888
+ def mine_all_agents():
889
  """
890
+ Mine PR metadata for all agents within UPDATE_TIME_FRAME_DAYS and save to HuggingFace.
891
+ Uses ONE BigQuery query for ALL agents (most efficient approach).
892
+
893
+ This runs weekly to refresh the data with the latest PRs from the past UPDATE_TIME_FRAME_DAYS.
 
 
 
 
894
  """
895
+ # Load agent metadata from HuggingFace
896
+ agents = load_agents_from_hf()
897
+ if not agents:
898
+ print("No agents found in HuggingFace dataset")
899
+ return
900
+
901
+ # Extract all identifiers
902
+ identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
903
+ if not identifiers:
904
+ print("No valid agent identifiers found")
905
+ return
906
+
907
  print(f"\n{'='*80}")
908
+ print(f"Starting PR metadata mining for {len(identifiers)} agents")
909
+ print(f"Time frame: Last {UPDATE_TIME_FRAME_DAYS} days")
910
+ print(f"Data source: BigQuery + GitHub Archive (ONE QUERY FOR ALL AGENTS)")
911
+ print(f"{'='*80}\n")
912
 
913
+ # Initialize BigQuery client
914
  try:
915
+ client = get_bigquery_client()
916
+ except Exception as e:
917
+ print(f"✗ Failed to initialize BigQuery client: {str(e)}")
918
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
919
 
920
+ # Define time range: past UPDATE_TIME_FRAME_DAYS (excluding today)
921
+ current_time = datetime.now(timezone.utc)
922
+ end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0)
923
+ start_date = end_date - timedelta(days=UPDATE_TIME_FRAME_DAYS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
924
 
925
+ try:
926
+ all_metadata = fetch_all_pr_metadata_single_query(
927
+ client, identifiers, start_date, end_date
928
+ )
929
+ except Exception as e:
930
+ print(f"✗ Error during BigQuery fetch: {str(e)}")
931
+ import traceback
932
+ traceback.print_exc()
933
+ return
934
 
935
+ # Save results for each agent
936
+ print(f"\n{'='*80}")
937
+ print(f"💾 Saving results to HuggingFace for each agent...")
938
+ print(f"{'='*80}\n")
939
 
940
+ success_count = 0
941
+ error_count = 0
942
+ no_data_count = 0
943
+
944
+ for i, agent in enumerate(agents, 1):
945
+ identifier = agent.get('github_identifier')
946
+ agent_name = agent.get('agent_name', 'Unknown')
947
 
948
+ if not identifier:
949
+ print(f"[{i}/{len(agents)}] Skipping agent without identifier")
950
+ error_count += 1
951
+ continue
 
 
952
 
953
+ metadata = all_metadata.get(identifier, [])
954
 
955
+ print(f"[{i}/{len(agents)}] {agent_name} ({identifier}):")
956
+
957
+ try:
958
+ if metadata:
959
+ print(f" 💾 Saving {len(metadata)} PR records...")
960
+ if save_pr_metadata_to_hf(metadata, identifier):
961
+ success_count += 1
962
+ else:
963
+ error_count += 1
964
+ else:
965
+ print(f" No PRs found")
966
+ no_data_count += 1
967
+
968
+ except Exception as e:
969
+ print(f" ✗ Error saving {identifier}: {str(e)}")
970
+ import traceback
971
+ traceback.print_exc()
972
+ error_count += 1
973
+ continue
974
+
975
+ print(f"\n{'='*80}")
976
+ print(f"✅ Mining complete!")
977
+ print(f" Total agents: {len(agents)}")
978
+ print(f" Successfully saved: {success_count}")
979
+ print(f" No data (skipped): {no_data_count}")
980
+ print(f" Errors: {error_count}")
981
+ print(f" BigQuery queries executed: 1")
982
+ print(f"{'='*80}\n")
983
 
984
 
985
  def construct_leaderboard_from_metadata():
 
1025
  # UI FUNCTIONS
1026
  # =============================================================================
1027
 
1028
+ def generate_color(index, total):
1029
+ """Generate distinct colors using HSL color space for better distribution"""
1030
+ hue = (index * 360 / total) % 360
1031
+ saturation = 70 + (index % 3) * 10 # Vary saturation slightly
1032
+ lightness = 45 + (index % 2) * 10 # Vary lightness slightly
1033
+ return f'hsl({hue}, {saturation}%, {lightness}%)'
1034
+
1035
+
1036
+ def create_monthly_metrics_plot(top_n=5):
1037
  """
1038
  Create a Plotly figure with dual y-axes showing:
1039
  - Left y-axis: Acceptance rate (%) as line curves
1040
  - Right y-axis: Total PRs created as bar charts
1041
 
1042
  Each agent gets a unique color for both their line and bars.
1043
+
1044
+ Args:
1045
+ top_n: Number of top agents to show (default: 5)
1046
  """
1047
+ metrics = calculate_monthly_metrics_by_agent(top_n=top_n)
1048
 
1049
  if not metrics['agents'] or not metrics['months']:
1050
  # Return an empty figure with a message
 
1065
  # Create figure with secondary y-axis
1066
  fig = make_subplots(specs=[[{"secondary_y": True}]])
1067
 
 
 
 
 
 
 
1068
  agents = metrics['agents']
1069
  months = metrics['months']
1070
  data = metrics['data']
1071
 
1072
+ # Generate colors for all agents using HSL
1073
+ agent_colors = {agent: generate_color(idx, len(agents)) for idx, agent in enumerate(agents)}
1074
+
1075
  # Add traces for each agent
1076
  for idx, agent_name in enumerate(agents):
1077
+ color = agent_colors[agent_name]
1078
  agent_data = data[agent_name]
1079
 
1080
  # Add line trace for acceptance rate (left y-axis)
 
1194
  return df
1195
 
1196
 
 
 
1197
  def submit_agent(identifier, agent_name, organization, description, website):
1198
  """
1199
  Submit a new agent to the leaderboard.
1200
  Validates input and saves submission.
1201
+ PR data will be populated by the weekly mining task.
1202
  """
1203
  # Validate required fields
1204
  if not identifier or not identifier.strip():
 
1242
  if not save_agent_to_hf(submission):
1243
  return "❌ Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
1244
 
1245
+ success_msg = f"✅ Successfully submitted {agent_name}!\n\nPR data will be populated by the weekly mining task (runs every Monday at 12:00 AM UTC)."
1246
  return success_msg, get_leaderboard_dataframe(), create_monthly_metrics_plot()
1247
 
1248
 
 
1250
  # GRADIO APPLICATION
1251
  # =============================================================================
1252
 
1253
+ print(f"\n🚀 Starting SWE Agent PR Leaderboard")
1254
+ print(f" Leaderboard time frame: {LEADERBOARD_TIME_FRAME_DAYS} days ({LEADERBOARD_TIME_FRAME_DAYS // 30} months)")
1255
+ print(f" Mining update frequency: Every {UPDATE_TIME_FRAME_DAYS} days\n")
 
 
 
 
 
 
 
 
 
 
 
1256
 
1257
+ # Start APScheduler for weekly PR mining at 12:00 AM UTC every Monday
 
 
 
 
 
 
 
1258
  scheduler = BackgroundScheduler(timezone="UTC")
1259
  scheduler.add_job(
1260
+ mine_all_agents,
1261
+ trigger=CronTrigger(day_of_week='mon', hour=0, minute=0), # 12:00 AM UTC every Monday
1262
+ id='weekly_pr_mining',
1263
+ name='Weekly PR Mining',
1264
  replace_existing=True
1265
  )
1266
  scheduler.start()
1267
+ print(f"✓ Scheduler started: Weekly PR Mining at 12:00 AM UTC every Monday (mines last {UPDATE_TIME_FRAME_DAYS} days)")
1268
 
1269
  # Create Gradio interface
1270
  with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
1271
+ total_months = LEADERBOARD_TIME_FRAME_DAYS // 30
1272
+
1273
  gr.Markdown("# 🏆 SWE Agent PR Leaderboard")
1274
+ gr.Markdown(f"Track and compare GitHub pull request statistics for SWE agents (last {total_months} months)")
1275
+
1276
  with gr.Tabs():
1277
+
1278
  # Leaderboard Tab
1279
  with gr.Tab("📊 Leaderboard"):
1280
+ gr.Markdown(f"*All statistics are based on PRs from the last {total_months} months*")
1281
 
1282
  leaderboard_table = Leaderboard(
1283
  value=get_leaderboard_dataframe(),
1284
  datatype=LEADERBOARD_COLUMNS,
1285
  search_columns=["Agent Name", "Website"],
1286
+ filter_columns=[
1287
+ ColumnFilter(
1288
+ "Acceptance Rate (%)",
1289
+ min=0,
1290
+ max=100,
1291
+ default=[0, 100],
1292
+ type="slider",
1293
+ label="Acceptance Rate (%)"
1294
+ )
1295
+ ]
1296
  )
1297
 
1298
+ gr.Markdown("### Monthly Metrics - Top 5 Agents")
1299
+ gr.Markdown("Track acceptance rates and PR activity over time for the most active agents")
1300
 
1301
  monthly_plot = gr.Plot(
1302
+ value=create_monthly_metrics_plot(top_n=5),
1303
  label="Monthly PR Metrics"
1304
  )
1305
+
1306
  # Submit Agent Tab
1307
  with gr.Tab("➕ Submit Agent"):
1308
+
1309
  gr.Markdown("### Submit Your Agent")
1310
+ gr.Markdown("Fill in the details below to add your agent to the leaderboard.")
1311
+
1312
  with gr.Row():
1313
  with gr.Column():
1314
  github_input = gr.Textbox(
 
1319
  label="Agent Name*",
1320
  placeholder="Your agent's display name"
1321
  )
1322
+
1323
  with gr.Column():
1324
  organization_input = gr.Textbox(
1325
  label="Organization*",
 
1331
  lines=3
1332
  )
1333
  website_input = gr.Textbox(
1334
+ label="Website*",
1335
  placeholder="https://your-agent-website.com"
1336
  )
1337
+
1338
  submit_button = gr.Button(
1339
  "Submit Agent",
1340
  variant="primary"
 
1343
  label="Submission Status",
1344
  interactive=False
1345
  )
1346
+
1347
  # Event handler
1348
  submit_button.click(
1349
  fn=submit_agent,
 
1354
 
1355
  # Launch application
1356
  if __name__ == "__main__":
1357
+ app.launch()