zhiminy commited on
Commit
722854c
·
1 Parent(s): 98e97d8
Files changed (1) hide show
  1. app.py +551 -134
app.py CHANGED
@@ -4,7 +4,7 @@ import json
4
  import os
5
  import time
6
  import requests
7
- from datetime import datetime, timezone
8
  from collections import defaultdict
9
  from huggingface_hub import HfApi, hf_hub_download
10
  from datasets import load_dataset, Dataset
@@ -12,17 +12,37 @@ import threading
12
  from dotenv import load_dotenv
13
  import pandas as pd
14
  import random
 
15
 
16
  # Load environment variables
17
  load_dotenv()
18
 
 
 
 
 
 
 
 
 
19
  # =============================================================================
20
  # CONFIGURATION
21
  # =============================================================================
22
 
 
 
 
 
 
 
 
 
 
 
23
  CACHE_FILE = "agent_pr_cache.jsonl"
24
  AGENTS_REPO = "SWE-Arena/pr_agents" # HuggingFace dataset for agent metadata
25
  LEADERBOARD_REPO = "SWE-Arena/pr_leaderboard"
 
26
  UPDATE_INTERVAL = 86400 # 24 hours in seconds
27
 
28
  LEADERBOARD_COLUMNS = [
@@ -31,7 +51,6 @@ LEADERBOARD_COLUMNS = [
31
  ("Total PRs", "number"),
32
  ("Merged PRs", "number"),
33
  ("Acceptance Rate (%)", "number"),
34
- ("Median Merge Duration (minutes)", "number"),
35
  ]
36
 
37
  # =============================================================================
@@ -191,7 +210,7 @@ def validate_github_username(identifier):
191
  token = get_github_token()
192
  headers = {'Authorization': f'token {token}'} if token else {}
193
  url = f'https://api.github.com/users/{identifier}'
194
- response = request_with_backoff('GET', url, headers=headers, max_retries=6)
195
  if response is None:
196
  return False, "Validation error: network/rate limit exhausted"
197
  if response.status_code == 200:
@@ -204,18 +223,151 @@ def validate_github_username(identifier):
204
  return False, f"Validation error: {str(e)}"
205
 
206
 
207
- def fetch_all_prs(identifier, token=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  """
209
- Fetch all pull requests associated with a GitHub user/bot.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  Searches using multiple query patterns:
211
  - is:pr author:{identifier} (authored by the user)
212
  - is:pr head:{identifier}/ (branch names starting with identifier)
213
  - is:pr "co-authored-by: {identifier}" (co-authored commits)
214
 
215
- Uses pagination to retrieve all results and deduplicates by PR ID.
 
 
 
 
 
 
 
216
  """
217
  headers = {'Authorization': f'token {token}'} if token else {}
218
 
 
 
 
 
 
 
219
  # Define all query patterns to search
220
  query_patterns = [
221
  f'is:pr author:{identifier}',
@@ -226,129 +378,256 @@ def fetch_all_prs(identifier, token=None):
226
  # Use a dict to deduplicate PRs by ID
227
  prs_by_id = {}
228
 
229
- for query in query_patterns:
230
- print(f"Searching with query: {query}")
231
- page = 1
232
- per_page = 100
233
-
234
- while True:
235
- url = f'https://api.github.com/search/issues'
236
- params = {
237
- 'q': query,
238
- 'per_page': per_page,
239
- 'page': page
240
- }
 
 
 
 
 
 
 
 
241
 
242
- try:
243
- response = request_with_backoff('GET', url, headers=headers, params=params)
244
- if response is None:
245
- print(f"Error fetching PRs for query '{query}': retries exhausted")
246
- break
247
 
248
- if response.status_code != 200:
249
- print(f"Error fetching PRs for query '{query}': HTTP {response.status_code}")
250
- break
251
 
252
- data = response.json()
253
- items = data.get('items', [])
254
 
255
- if not items:
256
- break
 
 
 
 
 
 
257
 
258
- # Add PRs to dict, using ID as key to avoid duplicates
259
- for pr in items:
260
- pr_id = pr.get('id')
261
- if pr_id and pr_id not in prs_by_id:
262
- prs_by_id[pr_id] = pr
263
 
264
- # Check if there are more pages
265
- if len(items) < per_page:
266
- break
 
 
267
 
268
- page += 1
269
- time.sleep(0.5) # Courtesy delay between pages
270
 
271
- except Exception as e:
272
- print(f"Error fetching PRs for query '{query}': {str(e)}")
273
- break
274
 
275
- # Delay between different query patterns
276
- time.sleep(0.5)
277
 
278
- # Convert dict back to list
279
- all_prs = list(prs_by_id.values())
280
- print(f"Found {len(all_prs)} unique PRs for {identifier}")
 
281
 
282
- return all_prs
283
 
 
 
284
 
285
- def calculate_pr_stats(prs):
286
- """
287
- Calculate statistics from a list of pull requests.
288
- Returns a dictionary with comprehensive PR metrics.
289
  """
290
- total_prs = len(prs)
291
- merged = 0
292
- repos = set()
293
- merged_times = [] # Store merged times in minutes for merged PRs
294
-
295
- for pr in prs:
296
- # Track repository information
297
- repo_url = pr.get('repository_url', '')
298
- if repo_url:
299
- repo_name = '/'.join(repo_url.split('/')[-2:])
300
- repos.add(repo_name)
301
-
302
- # Track PR status
303
- state = pr.get('state')
304
- if state == 'closed':
305
- pull_request = pr.get('pull_request', {})
306
- merged_at = pull_request.get('merged_at')
307
- if merged_at:
308
- merged += 1
309
-
310
- # Calculate merged time (creation to merge)
311
- try:
312
- created_at = pr.get('created_at')
313
- if created_at and merged_at:
314
- created_dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
315
- merged_dt = datetime.fromisoformat(merged_at.replace('Z', '+00:00'))
316
- merged_time_minutes = (merged_dt - created_dt).total_seconds() / 60 # Convert to minutes
317
- merged_times.append(merged_time_minutes)
318
- except Exception as e:
319
- print(f"Warning: Could not calculate merged time for PR: {e}")
320
-
321
- acceptance_rate = (merged / total_prs * 100) if total_prs > 0 else 0
322
-
323
- # Calculate median merged time
324
- median_merged_time = None
325
- if merged_times:
326
- merged_times.sort()
327
- n = len(merged_times)
328
- if n % 2 == 0:
329
- median_merged_time = (merged_times[n // 2 - 1] + merged_times[n // 2]) / 2
330
- else:
331
- median_merged_time = merged_times[n // 2]
332
- median_merged_time = round(median_merged_time, 2)
333
-
334
  return {
335
  'total_prs': total_prs,
336
  'merged': merged,
337
  'acceptance_rate': round(acceptance_rate, 2),
338
- 'median_merged_time': median_merged_time,
339
  }
340
 
341
 
342
- def fetch_agent_stats(identifier, token=None):
 
 
 
 
343
  """
344
- Fetch and calculate PR statistics for a single agent.
345
- Returns dictionary with all stats and metadata.
346
  """
347
- print(f"Fetching data for {identifier}...")
348
- prs = fetch_all_prs(identifier, token)
349
- stats = calculate_pr_stats(prs)
350
- stats['github_identifier'] = identifier
351
- return stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
 
354
  # =============================================================================
@@ -503,12 +782,20 @@ def save_leaderboard_to_hf(cache_dict):
503
  # DATA MANAGEMENT
504
  # =============================================================================
505
 
506
- def update_all_agents():
507
  """
508
- Update PR statistics for all agents from HuggingFace dataset.
 
 
 
 
 
 
 
509
  Returns dictionary of all agent data with current stats.
510
  """
511
  token = get_github_token()
 
512
 
513
  # Load agent metadata from HuggingFace
514
  agents = load_agents_from_hf()
@@ -523,17 +810,54 @@ def update_all_agents():
523
  # Update each agent
524
  for agent in agents:
525
  identifier = agent.get('github_identifier')
 
 
526
  if not identifier:
527
  print(f"Warning: Skipping agent without identifier: {agent}")
528
  continue
529
 
530
  try:
531
- # Fetch fresh PR statistics
532
- stats = fetch_agent_stats(identifier, token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
 
534
  # Merge metadata with stats
535
  cache_dict[identifier] = {
536
- 'agent_name': agent.get('agent_name', 'Unknown'),
537
  'organization': agent.get('organization', 'Unknown'),
538
  'github_identifier': identifier,
539
  **stats
@@ -541,19 +865,62 @@ def update_all_agents():
541
 
542
  # Progressive save
543
  save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
544
- print(f"✓ Updated {identifier}")
545
 
546
  except Exception as e:
547
  print(f"✗ Error updating {identifier}: {str(e)}")
 
 
548
  continue
549
 
550
  return cache_dict
551
 
552
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  def initialize_data():
554
  """
555
  Initialize data on application startup.
556
- Priority: Leaderboard dataset > HuggingFace agents dataset
557
  """
558
  print("🚀 Initializing leaderboard data...")
559
 
@@ -564,12 +931,23 @@ def initialize_data():
564
  print("✓ Initialized from leaderboard dataset")
565
  return
566
 
567
- # Try loading agents from HuggingFace and mining GitHub data
 
 
 
 
 
 
 
 
 
 
 
568
  agents = load_agents_from_hf()
569
  if agents:
570
  print(f"✓ Loaded {len(agents)} agents from HuggingFace")
571
- print("⛏️ Mining GitHub data...")
572
- cache_dict = update_all_agents()
573
  if cache_dict:
574
  save_leaderboard_to_hf(cache_dict)
575
  return
@@ -601,7 +979,6 @@ def get_leaderboard_dataframe():
601
  data.get('total_prs', 0),
602
  data.get('merged', 0),
603
  data.get('acceptance_rate', 0.0),
604
- data.get('median_merged_time', None),
605
  ])
606
 
607
  # Create DataFrame
@@ -609,7 +986,7 @@ def get_leaderboard_dataframe():
609
  df = pd.DataFrame(rows, columns=column_names)
610
 
611
  # Ensure numeric types
612
- numeric_cols = ["Total PRs", "Merged PRs", "Acceptance Rate (%)", "Median Merge Duration (minutes)"]
613
  for col in numeric_cols:
614
  if col in df.columns:
615
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
@@ -622,10 +999,10 @@ def get_leaderboard_dataframe():
622
 
623
 
624
  def refresh_leaderboard():
625
- """Manually trigger data refresh for all agents."""
626
  try:
627
- print("🔄 Manual refresh initiated")
628
- cache_dict = update_all_agents()
629
  if cache_dict:
630
  save_leaderboard_to_hf(cache_dict)
631
  return "✅ Data refreshed successfully!", get_leaderboard_dataframe()
@@ -638,7 +1015,7 @@ def refresh_leaderboard():
638
  def submit_agent(identifier, agent_name, organization, description, website):
639
  """
640
  Submit a new agent to the leaderboard.
641
- Validates input, saves submission, and fetches PR data.
642
  """
643
  # Validate required fields
644
  if not identifier or not identifier.strip():
@@ -681,26 +1058,38 @@ def submit_agent(identifier, agent_name, organization, description, website):
681
  # Save to HuggingFace
682
  if not save_agent_to_hf(submission):
683
  return "❌ Failed to save submission", get_leaderboard_dataframe()
684
-
685
- # Fetch PR data immediately
686
  token = get_github_token()
687
  try:
688
- stats = fetch_agent_stats(identifier, token)
689
-
 
 
 
 
 
 
 
 
 
 
690
  # Update cache
691
  cache_list = load_jsonl(CACHE_FILE)
692
  cache_dict = cache_to_dict(cache_list)
693
  cache_dict[identifier] = {**submission, **stats}
694
  save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
695
-
696
  # Save to HuggingFace
697
  save_leaderboard_to_hf(cache_dict)
698
-
699
  return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe()
700
-
701
  except Exception as e:
702
  error_msg = f"⚠️ Submitted {agent_name}, but failed to fetch PR data: {str(e)}"
703
  print(error_msg)
 
 
704
  return error_msg, get_leaderboard_dataframe()
705
 
706
 
@@ -709,17 +1098,24 @@ def submit_agent(identifier, agent_name, organization, description, website):
709
  # =============================================================================
710
 
711
  def scheduled_update_task():
712
- """Background daemon thread for periodic data updates."""
 
 
 
713
  while True:
714
  time.sleep(UPDATE_INTERVAL)
715
- print(f"\n🔄 Scheduled update started at {datetime.now().isoformat()}")
 
 
716
  try:
717
- cache_dict = update_all_agents()
718
  if cache_dict:
719
  save_leaderboard_to_hf(cache_dict)
720
- print("✓ Scheduled update completed")
721
  except Exception as e:
722
  print(f"✗ Scheduled update failed: {str(e)}")
 
 
723
 
724
 
725
  # =============================================================================
@@ -727,6 +1123,27 @@ def scheduled_update_task():
727
  # =============================================================================
728
 
729
  # Initialize data before creating UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  initialize_data()
731
 
732
  # Start background update thread
@@ -756,7 +1173,7 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
756
  value=get_leaderboard_dataframe(),
757
  datatype=LEADERBOARD_COLUMNS,
758
  search_columns=["Agent Name", "Organization"],
759
- filter_columns=["Acceptance Rate (%)", "Median Merge Duration (minutes)"]
760
  )
761
 
762
  refresh_button.click(
 
4
  import os
5
  import time
6
  import requests
7
+ from datetime import datetime, timezone, timedelta
8
  from collections import defaultdict
9
  from huggingface_hub import HfApi, hf_hub_download
10
  from datasets import load_dataset, Dataset
 
12
  from dotenv import load_dotenv
13
  import pandas as pd
14
  import random
15
+ import argparse
16
 
17
  # Load environment variables
18
  load_dotenv()
19
 
20
+ # Parse command-line arguments
21
+ parser = argparse.ArgumentParser(description='SWE Agent PR Leaderboard')
22
+ parser.add_argument('--debug', '--DEBUG', action='store_true',
23
+ help='Enable debug mode (limits PR retrieval to 10 per query pattern)')
24
+ parser.add_argument('--no-debug', '--production', action='store_true',
25
+ help='Explicitly disable debug mode (force production mode)')
26
+ args = parser.parse_args()
27
+
28
  # =============================================================================
29
  # CONFIGURATION
30
  # =============================================================================
31
 
32
+ # DEBUG MODE: Set to True to limit PR retrieval for testing
33
+ # When enabled, only fetches up to 10 PRs per query pattern per agent
34
+ # Priority: 1) Command-line args, 2) Environment variable, 3) Default (False)
35
+ if args.no_debug:
36
+ DEBUG_MODE = False
37
+ elif args.debug:
38
+ DEBUG_MODE = True
39
+ else:
40
+ DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
41
+
42
  CACHE_FILE = "agent_pr_cache.jsonl"
43
  AGENTS_REPO = "SWE-Arena/pr_agents" # HuggingFace dataset for agent metadata
44
  LEADERBOARD_REPO = "SWE-Arena/pr_leaderboard"
45
+ PR_METADATA_REPO = "SWE-Arena/pr_metadata" # HuggingFace dataset for PR metadata
46
  UPDATE_INTERVAL = 86400 # 24 hours in seconds
47
 
48
  LEADERBOARD_COLUMNS = [
 
51
  ("Total PRs", "number"),
52
  ("Merged PRs", "number"),
53
  ("Acceptance Rate (%)", "number"),
 
54
  ]
55
 
56
  # =============================================================================
 
210
  token = get_github_token()
211
  headers = {'Authorization': f'token {token}'} if token else {}
212
  url = f'https://api.github.com/users/{identifier}'
213
+ response = request_with_backoff('GET', url, headers=headers, max_retries=1)
214
  if response is None:
215
  return False, "Validation error: network/rate limit exhausted"
216
  if response.status_code == 200:
 
223
  return False, f"Validation error: {str(e)}"
224
 
225
 
226
+ def fetch_prs_with_time_partition(base_query, start_date, end_date, headers, prs_by_id, debug_limit=None):
227
+ """
228
+ Fetch PRs within a specific time range using time-based partitioning.
229
+ Recursively splits the time range if hitting the 1000-result limit.
230
+
231
+ Args:
232
+ debug_limit: If set, stops fetching after this many PRs (for testing)
233
+
234
+ Returns the number of PRs found in this time partition.
235
+ """
236
+ # Format dates for GitHub search (YYYY-MM-DD)
237
+ start_str = start_date.strftime('%Y-%m-%d')
238
+ end_str = end_date.strftime('%Y-%m-%d')
239
+
240
+ # Add date range to query
241
+ query = f'{base_query} created:{start_str}..{end_str}'
242
+
243
+ print(f" Searching range {start_str} to {end_str}...")
244
+
245
+ page = 1
246
+ per_page = 100
247
+ total_in_partition = 0
248
+
249
+ while True:
250
+ # Check debug limit
251
+ if debug_limit is not None and total_in_partition >= debug_limit:
252
+ print(f" 🐛 DEBUG MODE: Reached limit of {debug_limit} PRs, stopping...")
253
+ return total_in_partition
254
+ url = 'https://api.github.com/search/issues'
255
+ params = {
256
+ 'q': query,
257
+ 'per_page': per_page,
258
+ 'page': page,
259
+ 'sort': 'created',
260
+ 'order': 'asc'
261
+ }
262
+
263
+ try:
264
+ response = request_with_backoff('GET', url, headers=headers, params=params)
265
+ if response is None:
266
+ print(f" Error: retries exhausted for range {start_str} to {end_str}")
267
+ return total_in_partition
268
+
269
+ if response.status_code != 200:
270
+ print(f" Error: HTTP {response.status_code} for range {start_str} to {end_str}")
271
+ return total_in_partition
272
+
273
+ data = response.json()
274
+ total_count = data.get('total_count', 0)
275
+ items = data.get('items', [])
276
+
277
+ if not items:
278
+ break
279
+
280
+ # Add PRs to global dict
281
+ for pr in items:
282
+ pr_id = pr.get('id')
283
+ if pr_id and pr_id not in prs_by_id:
284
+ prs_by_id[pr_id] = pr
285
+ total_in_partition += 1
286
+
287
+ # Check if we hit the 1000-result limit
288
+ if total_count > 1000 and page == 10:
289
+ print(f" ⚠️ Hit 1000-result limit ({total_count} total). Splitting time range...")
290
+
291
+ # Calculate midpoint
292
+ time_diff = end_date - start_date
293
+ mid_date = start_date + time_diff / 2
294
+
295
+ # Recursively fetch both halves
296
+ count1 = fetch_prs_with_time_partition(base_query, start_date, mid_date, headers, prs_by_id, debug_limit)
297
+ count2 = fetch_prs_with_time_partition(base_query, mid_date + timedelta(days=1), end_date, headers, prs_by_id, debug_limit)
298
+
299
+ return count1 + count2
300
+
301
+ # Normal pagination: check if there are more pages
302
+ if len(items) < per_page or page >= 10:
303
+ break
304
+
305
+ page += 1
306
+ time.sleep(0.5) # Courtesy delay between pages
307
+
308
+ except Exception as e:
309
+ print(f" Error fetching range {start_str} to {end_str}: {str(e)}")
310
+ return total_in_partition
311
+
312
+ if total_in_partition > 0:
313
+ print(f" ✓ Found {total_in_partition} PRs in range {start_str} to {end_str}")
314
+
315
+ return total_in_partition
316
+
317
+
318
+ def extract_pr_metadata(pr, agent_name):
319
+ """
320
+ Extract minimal PR metadata for efficient storage.
321
+ Only keeps essential fields: html_url, created_at, merged_at, closed_at, agent_name.
322
  """
323
+ pull_request = pr.get('pull_request', {})
324
+
325
+ # Extract dates
326
+ created_at = pr.get('created_at')
327
+ merged_at = pull_request.get('merged_at')
328
+ closed_at = pr.get('closed_at')
329
+
330
+ # Only store closed_at if PR is closed but not merged
331
+ if merged_at:
332
+ closed_at = None # Don't store redundant info
333
+
334
+ return {
335
+ 'html_url': pr.get('html_url'),
336
+ 'created_at': created_at,
337
+ 'merged_at': merged_at,
338
+ 'closed_at': closed_at,
339
+ 'agent_name': agent_name
340
+ }
341
+
342
+
343
+ def fetch_all_prs_metadata(identifier, agent_name, token=None, start_from_date=None):
344
+ """
345
+ Fetch ALL pull requests associated with a GitHub user/bot.
346
+ Returns lightweight metadata instead of full PR objects.
347
+
348
+ Uses time-based partitioning to bypass GitHub's 1000-result limit per query.
349
  Searches using multiple query patterns:
350
  - is:pr author:{identifier} (authored by the user)
351
  - is:pr head:{identifier}/ (branch names starting with identifier)
352
  - is:pr "co-authored-by: {identifier}" (co-authored commits)
353
 
354
+ Args:
355
+ identifier: GitHub username/bot identifier
356
+ agent_name: Human-readable agent name for metadata
357
+ token: GitHub API token
358
+ start_from_date: Only fetch PRs created after this date (for incremental updates)
359
+
360
+ Returns:
361
+ List of minimal PR metadata dictionaries
362
  """
363
  headers = {'Authorization': f'token {token}'} if token else {}
364
 
365
+ # Debug mode: limit PR retrieval for testing
366
+ debug_limit_per_pattern = 10 if DEBUG_MODE else None
367
+
368
+ if DEBUG_MODE:
369
+ print(f"\n🐛 DEBUG MODE ENABLED: Limiting to {debug_limit_per_pattern} PRs per query pattern")
370
+
371
  # Define all query patterns to search
372
  query_patterns = [
373
  f'is:pr author:{identifier}',
 
378
  # Use a dict to deduplicate PRs by ID
379
  prs_by_id = {}
380
 
381
+ # Define time range: start from specified date or GitHub founding
382
+ start_date = start_from_date or datetime(2008, 1, 1, tzinfo=timezone.utc)
383
+ end_date = datetime.now(timezone.utc)
384
+
385
+ for query_pattern in query_patterns:
386
+ print(f"\n🔍 Searching with query: {query_pattern}")
387
+ print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
388
+
389
+ pattern_start_time = time.time()
390
+ initial_count = len(prs_by_id)
391
+
392
+ # Fetch with time partitioning
393
+ prs_found = fetch_prs_with_time_partition(
394
+ query_pattern,
395
+ start_date,
396
+ end_date,
397
+ headers,
398
+ prs_by_id,
399
+ debug_limit_per_pattern
400
+ )
401
 
402
+ pattern_duration = time.time() - pattern_start_time
403
+ new_prs = len(prs_by_id) - initial_count
 
 
 
404
 
405
+ print(f" ✓ Pattern complete: {new_prs} new PRs found ({prs_found} total fetched, {len(prs_by_id) - initial_count - (prs_found - new_prs)} duplicates)")
406
+ print(f" ⏱️ Time taken: {pattern_duration:.1f} seconds")
 
407
 
408
+ # Delay between different query patterns (shorter in debug mode)
409
+ time.sleep(0.2 if DEBUG_MODE else 1.0)
410
 
411
+ # Convert to lightweight metadata
412
+ all_prs = list(prs_by_id.values())
413
+ if DEBUG_MODE:
414
+ print(f"\n✅ COMPLETE (DEBUG MODE): Found {len(all_prs)} unique PRs for {identifier}")
415
+ print(f" Note: In production mode, this would fetch ALL PRs")
416
+ else:
417
+ print(f"\n✅ COMPLETE: Found {len(all_prs)} unique PRs for {identifier}")
418
+ print(f"📦 Extracting minimal metadata...")
419
 
420
+ metadata_list = [extract_pr_metadata(pr, agent_name) for pr in all_prs]
 
 
 
 
421
 
422
+ # Calculate memory savings
423
+ import sys
424
+ original_size = sys.getsizeof(str(all_prs))
425
+ metadata_size = sys.getsizeof(str(metadata_list))
426
+ savings_pct = ((original_size - metadata_size) / original_size * 100) if original_size > 0 else 0
427
 
428
+ print(f"💾 Memory efficiency: {original_size // 1024}KB → {metadata_size // 1024}KB (saved {savings_pct:.1f}%)")
 
429
 
430
+ return metadata_list
 
 
431
 
 
 
432
 
433
+ def calculate_pr_stats_from_metadata(metadata_list):
434
+ """
435
+ Calculate statistics from a list of PR metadata (lightweight objects).
436
+ Works with minimal metadata: html_url, created_at, merged_at, closed_at, agent_name.
437
 
438
+ Returns a dictionary with comprehensive PR metrics.
439
 
440
+ Acceptance rate is calculated as:
441
+ merged PRs / (merged PRs + closed but not merged PRs) * 100
442
 
443
+ This only counts PRs where a decision has been made (either merged or rejected/closed).
 
 
 
444
  """
445
+ total_prs = len(metadata_list)
446
+ merged = sum(1 for pr_meta in metadata_list if pr_meta.get('merged_at'))
447
+
448
+ # Count closed PRs (rejected) - those with closed_at but no merged_at
449
+ closed_not_merged = sum(1 for pr_meta in metadata_list
450
+ if pr_meta.get('closed_at') and not pr_meta.get('merged_at'))
451
+
452
+ # Total decisions made = merged + closed (rejected)
453
+ total_decisions = merged + closed_not_merged
454
+
455
+ # Calculate acceptance rate based on decisions made
456
+ acceptance_rate = (merged / total_decisions * 100) if total_decisions > 0 else 0
457
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  return {
459
  'total_prs': total_prs,
460
  'merged': merged,
461
  'acceptance_rate': round(acceptance_rate, 2),
 
462
  }
463
 
464
 
465
+ # =============================================================================
466
+ # PR METADATA STORAGE & RETRIEVAL
467
+ # =============================================================================
468
+
469
+ def group_metadata_by_year_month(metadata_list):
470
  """
471
+ Group PR metadata by year.month for efficient storage.
472
+ Returns dict: {(year, month): [metadata_list]}
473
  """
474
+ grouped = defaultdict(list)
475
+
476
+ for pr_meta in metadata_list:
477
+ created_at = pr_meta.get('created_at')
478
+ if not created_at:
479
+ continue
480
+
481
+ try:
482
+ dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
483
+ key = (dt.year, dt.month)
484
+ grouped[key].append(pr_meta)
485
+ except Exception as e:
486
+ print(f"Warning: Could not parse date '{created_at}': {e}")
487
+
488
+ return dict(grouped)
489
+
490
+
491
+ def save_pr_metadata_to_hf(metadata_list):
492
+ """
493
+ Save PR metadata to HuggingFace dataset, organized by year.month.
494
+ Each file is named YYYY.MM.jsonl and contains all PRs created in that month.
495
+
496
+ This function APPENDS new metadata and DEDUPLICATES by html_url.
497
+ """
498
+ try:
499
+ token = get_hf_token()
500
+ if not token:
501
+ raise Exception("No HuggingFace token found")
502
+
503
+ api = HfApi()
504
+
505
+ # Group by year.month
506
+ grouped = group_metadata_by_year_month(metadata_list)
507
+
508
+ for (year, month), month_metadata in grouped.items():
509
+ filename = f"{year}.{month:02d}.jsonl"
510
+ print(f"📤 Uploading {len(month_metadata)} PRs to {filename}...")
511
+
512
+ # Download existing file if it exists
513
+ existing_metadata = []
514
+ try:
515
+ file_path = hf_hub_download(
516
+ repo_id=PR_METADATA_REPO,
517
+ filename=filename,
518
+ repo_type="dataset",
519
+ token=token
520
+ )
521
+ existing_metadata = load_jsonl(file_path)
522
+ print(f" Found {len(existing_metadata)} existing PRs in {filename}")
523
+ except Exception:
524
+ print(f" No existing file found for {filename}, creating new")
525
+
526
+ # Merge and deduplicate by html_url
527
+ existing_by_url = {meta['html_url']: meta for meta in existing_metadata if meta.get('html_url')}
528
+ new_by_url = {meta['html_url']: meta for meta in month_metadata if meta.get('html_url')}
529
+
530
+ # Update with new data (new data overwrites old)
531
+ existing_by_url.update(new_by_url)
532
+ merged_metadata = list(existing_by_url.values())
533
+
534
+ # Save locally
535
+ save_jsonl(filename, merged_metadata)
536
+
537
+ # Upload to HuggingFace
538
+ api.upload_file(
539
+ path_or_fileobj=filename,
540
+ path_in_repo=filename,
541
+ repo_id=PR_METADATA_REPO,
542
+ repo_type="dataset",
543
+ token=token
544
+ )
545
+
546
+ # Clean up local file
547
+ os.remove(filename)
548
+
549
+ print(f" ✓ Saved {len(merged_metadata)} total PRs to {filename}")
550
+
551
+ return True
552
+
553
+ except Exception as e:
554
+ print(f"✗ Error saving PR metadata: {str(e)}")
555
+ return False
556
+
557
+
558
+ def load_pr_metadata_for_year(year):
559
+ """
560
+ Load all PR metadata for a specific year from HuggingFace.
561
+ Returns list of all PR metadata from that year.
562
+ """
563
+ try:
564
+ api = HfApi()
565
+ token = get_hf_token()
566
+
567
+ # List all files in the repository
568
+ files = api.list_repo_files(repo_id=PR_METADATA_REPO, repo_type="dataset")
569
+
570
+ # Filter for files matching the year pattern (e.g., 2025.01.jsonl, 2025.02.jsonl)
571
+ year_pattern = f"{year}."
572
+ year_files = [f for f in files if f.startswith(year_pattern) and f.endswith('.jsonl')]
573
+
574
+ print(f"📥 Loading PR metadata for {year} ({len(year_files)} files)...")
575
+
576
+ all_metadata = []
577
+ for filename in year_files:
578
+ try:
579
+ file_path = hf_hub_download(
580
+ repo_id=PR_METADATA_REPO,
581
+ filename=filename,
582
+ repo_type="dataset",
583
+ token=token
584
+ )
585
+ month_metadata = load_jsonl(file_path)
586
+ all_metadata.extend(month_metadata)
587
+ print(f" ✓ Loaded {len(month_metadata)} PRs from {filename}")
588
+ except Exception as e:
589
+ print(f" Warning: Could not load {filename}: {str(e)}")
590
+
591
+ print(f"✓ Loaded {len(all_metadata)} total PRs for {year}")
592
+ return all_metadata
593
+
594
+ except Exception as e:
595
+ print(f"✗ Error loading PR metadata for {year}: {str(e)}")
596
+ return []
597
+
598
+
599
+ def get_latest_pr_date_for_agent(agent_name, current_year):
600
+ """
601
+ Get the latest PR creation date for an agent from stored metadata.
602
+ Used for incremental updates - only fetch PRs newer than this date.
603
+
604
+ Returns datetime or None if no existing PRs found.
605
+ """
606
+ try:
607
+ metadata = load_pr_metadata_for_year(current_year)
608
+
609
+ # Filter for this agent
610
+ agent_prs = [pr for pr in metadata if pr.get('agent_name') == agent_name]
611
+
612
+ if not agent_prs:
613
+ return None
614
+
615
+ # Find latest created_at
616
+ latest_date = None
617
+ for pr in agent_prs:
618
+ created_at = pr.get('created_at')
619
+ if created_at:
620
+ try:
621
+ dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
622
+ if latest_date is None or dt > latest_date:
623
+ latest_date = dt
624
+ except Exception:
625
+ continue
626
+
627
+ return latest_date
628
+
629
+ except Exception:
630
+ return None
631
 
632
 
633
  # =============================================================================
 
782
  # DATA MANAGEMENT
783
  # =============================================================================
784
 
785
+ def update_all_agents_incremental():
786
  """
787
+ Memory-efficient incremental update of PR statistics for all agents.
788
+
789
+ Strategy:
790
+ 1. For each agent, check latest PR date from stored metadata
791
+ 2. Only fetch NEW PRs created after that date
792
+ 3. Store minimal metadata (not full PR objects) to avoid storage limits
793
+ 4. Construct leaderboard from stored metadata
794
+
795
  Returns dictionary of all agent data with current stats.
796
  """
797
  token = get_github_token()
798
+ current_year = datetime.now().year
799
 
800
  # Load agent metadata from HuggingFace
801
  agents = load_agents_from_hf()
 
810
  # Update each agent
811
  for agent in agents:
812
  identifier = agent.get('github_identifier')
813
+ agent_name = agent.get('agent_name', 'Unknown')
814
+
815
  if not identifier:
816
  print(f"Warning: Skipping agent without identifier: {agent}")
817
  continue
818
 
819
  try:
820
+ print(f"\n{'='*80}")
821
+ print(f"Processing: {agent_name} ({identifier})")
822
+ print(f"{'='*80}")
823
+
824
+ # Check for existing metadata to determine incremental update date
825
+ latest_pr_date = get_latest_pr_date_for_agent(agent_name, current_year)
826
+
827
+ if latest_pr_date:
828
+ print(f"📅 Latest PR found: {latest_pr_date.strftime('%Y-%m-%d %H:%M:%S')}")
829
+ print(f" Fetching only PRs created after this date...")
830
+ start_from = latest_pr_date + timedelta(seconds=1) # Start 1 second after
831
+ else:
832
+ print(f"📅 No existing PRs found. Fetching all PR metadata...")
833
+ start_from = None
834
+
835
+ # Fetch PR metadata (lightweight, memory-efficient)
836
+ new_metadata = fetch_all_prs_metadata(
837
+ identifier,
838
+ agent_name,
839
+ token,
840
+ start_from_date=start_from
841
+ )
842
+
843
+ if new_metadata:
844
+ # Save new metadata to HuggingFace (organized by year.month)
845
+ print(f"💾 Saving {len(new_metadata)} new PR records...")
846
+ save_pr_metadata_to_hf(new_metadata)
847
+
848
+ # Load all metadata for current year to calculate stats
849
+ print(f"📊 Calculating statistics from stored metadata...")
850
+ all_year_metadata = load_pr_metadata_for_year(current_year)
851
+
852
+ # Filter for this specific agent
853
+ agent_metadata = [pr for pr in all_year_metadata if pr.get('agent_name') == agent_name]
854
+
855
+ # Calculate stats from metadata
856
+ stats = calculate_pr_stats_from_metadata(agent_metadata)
857
 
858
  # Merge metadata with stats
859
  cache_dict[identifier] = {
860
+ 'agent_name': agent_name,
861
  'organization': agent.get('organization', 'Unknown'),
862
  'github_identifier': identifier,
863
  **stats
 
865
 
866
  # Progressive save
867
  save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
868
+ print(f"✓ Updated {identifier}: {stats['total_prs']} PRs, {stats['acceptance_rate']}% acceptance")
869
 
870
  except Exception as e:
871
  print(f"✗ Error updating {identifier}: {str(e)}")
872
+ import traceback
873
+ traceback.print_exc()
874
  continue
875
 
876
  return cache_dict
877
 
878
 
879
+ def construct_leaderboard_from_metadata():
880
+ """
881
+ Construct leaderboard from stored PR metadata instead of fetching all PRs.
882
+ Much more memory-efficient and faster.
883
+
884
+ Returns dictionary of agent stats.
885
+ """
886
+ print("📊 Constructing leaderboard from PR metadata...")
887
+ current_year = datetime.now().year
888
+
889
+ # Load agents
890
+ agents = load_agents_from_hf()
891
+ if not agents:
892
+ print("No agents found")
893
+ return {}
894
+
895
+ # Load all PR metadata for current year
896
+ all_metadata = load_pr_metadata_for_year(current_year)
897
+
898
+ cache_dict = {}
899
+
900
+ for agent in agents:
901
+ identifier = agent.get('github_identifier')
902
+ agent_name = agent.get('agent_name', 'Unknown')
903
+
904
+ # Filter metadata for this agent
905
+ agent_metadata = [pr for pr in all_metadata if pr.get('agent_name') == agent_name]
906
+
907
+ # Calculate stats
908
+ stats = calculate_pr_stats_from_metadata(agent_metadata)
909
+
910
+ cache_dict[identifier] = {
911
+ 'agent_name': agent_name,
912
+ 'organization': agent.get('organization', 'Unknown'),
913
+ 'github_identifier': identifier,
914
+ **stats
915
+ }
916
+
917
+ return cache_dict
918
+
919
+
920
  def initialize_data():
921
  """
922
  Initialize data on application startup.
923
+ Priority: 1) Leaderboard dataset, 2) PR metadata (if available), 3) Full GitHub mining
924
  """
925
  print("🚀 Initializing leaderboard data...")
926
 
 
931
  print("✓ Initialized from leaderboard dataset")
932
  return
933
 
934
+ # Try constructing from PR metadata (fast, memory-efficient)
935
+ try:
936
+ cache_dict = construct_leaderboard_from_metadata()
937
+ if cache_dict:
938
+ save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
939
+ save_leaderboard_to_hf(cache_dict)
940
+ print("✓ Initialized from PR metadata")
941
+ return
942
+ except Exception as e:
943
+ print(f"Could not construct from metadata: {e}")
944
+
945
+ # Fallback: Full incremental mining from GitHub
946
  agents = load_agents_from_hf()
947
  if agents:
948
  print(f"✓ Loaded {len(agents)} agents from HuggingFace")
949
+ print("⛏️ Mining GitHub data (this may take a while)...")
950
+ cache_dict = update_all_agents_incremental()
951
  if cache_dict:
952
  save_leaderboard_to_hf(cache_dict)
953
  return
 
979
  data.get('total_prs', 0),
980
  data.get('merged', 0),
981
  data.get('acceptance_rate', 0.0),
 
982
  ])
983
 
984
  # Create DataFrame
 
986
  df = pd.DataFrame(rows, columns=column_names)
987
 
988
  # Ensure numeric types
989
+ numeric_cols = ["Total PRs", "Merged PRs", "Acceptance Rate (%)"]
990
  for col in numeric_cols:
991
  if col in df.columns:
992
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
 
999
 
1000
 
1001
  def refresh_leaderboard():
1002
+ """Manually trigger data refresh for all agents using incremental updates."""
1003
  try:
1004
+ print("🔄 Manual refresh initiated (incremental mode)")
1005
+ cache_dict = update_all_agents_incremental()
1006
  if cache_dict:
1007
  save_leaderboard_to_hf(cache_dict)
1008
  return "✅ Data refreshed successfully!", get_leaderboard_dataframe()
 
1015
  def submit_agent(identifier, agent_name, organization, description, website):
1016
  """
1017
  Submit a new agent to the leaderboard.
1018
+ Validates input, saves submission, and fetches PR metadata (memory-efficient).
1019
  """
1020
  # Validate required fields
1021
  if not identifier or not identifier.strip():
 
1058
  # Save to HuggingFace
1059
  if not save_agent_to_hf(submission):
1060
  return "❌ Failed to save submission", get_leaderboard_dataframe()
1061
+
1062
+ # Fetch PR metadata immediately (memory-efficient)
1063
  token = get_github_token()
1064
  try:
1065
+ print(f"Fetching PR metadata for {agent_name}...")
1066
+
1067
+ # Fetch lightweight metadata
1068
+ metadata_list = fetch_all_prs_metadata(identifier, agent_name, token)
1069
+
1070
+ if metadata_list:
1071
+ # Save metadata to HuggingFace
1072
+ save_pr_metadata_to_hf(metadata_list)
1073
+
1074
+ # Calculate stats from metadata
1075
+ stats = calculate_pr_stats_from_metadata(metadata_list)
1076
+
1077
  # Update cache
1078
  cache_list = load_jsonl(CACHE_FILE)
1079
  cache_dict = cache_to_dict(cache_list)
1080
  cache_dict[identifier] = {**submission, **stats}
1081
  save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
1082
+
1083
  # Save to HuggingFace
1084
  save_leaderboard_to_hf(cache_dict)
1085
+
1086
  return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe()
1087
+
1088
  except Exception as e:
1089
  error_msg = f"⚠️ Submitted {agent_name}, but failed to fetch PR data: {str(e)}"
1090
  print(error_msg)
1091
+ import traceback
1092
+ traceback.print_exc()
1093
  return error_msg, get_leaderboard_dataframe()
1094
 
1095
 
 
1098
  # =============================================================================
1099
 
1100
  def scheduled_update_task():
1101
+ """
1102
+ Background daemon thread for periodic incremental data updates.
1103
+ Uses memory-efficient incremental fetching to avoid storage eviction.
1104
+ """
1105
  while True:
1106
  time.sleep(UPDATE_INTERVAL)
1107
+ print(f"\n{'='*80}")
1108
+ print(f"🔄 Scheduled incremental update started at {datetime.now().isoformat()}")
1109
+ print(f"{'='*80}")
1110
  try:
1111
+ cache_dict = update_all_agents_incremental()
1112
  if cache_dict:
1113
  save_leaderboard_to_hf(cache_dict)
1114
+ print("✓ Scheduled update completed successfully")
1115
  except Exception as e:
1116
  print(f"✗ Scheduled update failed: {str(e)}")
1117
+ import traceback
1118
+ traceback.print_exc()
1119
 
1120
 
1121
  # =============================================================================
 
1123
  # =============================================================================
1124
 
1125
  # Initialize data before creating UI
1126
+ if DEBUG_MODE:
1127
+ print("\n" + "="*80)
1128
+ print("🐛 DEBUG MODE ENABLED 🐛")
1129
+ print("="*80)
1130
+ print("PR retrieval is limited to 10 PRs per query pattern per agent")
1131
+
1132
+ # Show how debug mode was enabled
1133
+ if args.debug:
1134
+ print("Enabled via: command-line flag '--debug'")
1135
+ print("To disable: run without '--debug' flag")
1136
+ else:
1137
+ print("Enabled via: DEBUG_MODE environment variable")
1138
+ print("To disable: run with '--no-debug' flag or unset DEBUG_MODE")
1139
+
1140
+ print("="*80 + "\n")
1141
+ else:
1142
+ print("\n🚀 Starting in PRODUCTION MODE - full PR retrieval enabled")
1143
+ if args.no_debug:
1144
+ print(" (Explicitly set via '--no-debug' flag)")
1145
+ print()
1146
+
1147
  initialize_data()
1148
 
1149
  # Start background update thread
 
1173
  value=get_leaderboard_dataframe(),
1174
  datatype=LEADERBOARD_COLUMNS,
1175
  search_columns=["Agent Name", "Organization"],
1176
+ filter_columns=["Acceptance Rate (%)"]
1177
  )
1178
 
1179
  refresh_button.click(