add
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import json
|
|
| 4 |
import os
|
| 5 |
import time
|
| 6 |
import requests
|
| 7 |
-
from datetime import datetime, timezone
|
| 8 |
from collections import defaultdict
|
| 9 |
from huggingface_hub import HfApi, hf_hub_download
|
| 10 |
from datasets import load_dataset, Dataset
|
|
@@ -12,17 +12,37 @@ import threading
|
|
| 12 |
from dotenv import load_dotenv
|
| 13 |
import pandas as pd
|
| 14 |
import random
|
|
|
|
| 15 |
|
| 16 |
# Load environment variables
|
| 17 |
load_dotenv()
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# =============================================================================
|
| 20 |
# CONFIGURATION
|
| 21 |
# =============================================================================
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
CACHE_FILE = "agent_pr_cache.jsonl"
|
| 24 |
AGENTS_REPO = "SWE-Arena/pr_agents" # HuggingFace dataset for agent metadata
|
| 25 |
LEADERBOARD_REPO = "SWE-Arena/pr_leaderboard"
|
|
|
|
| 26 |
UPDATE_INTERVAL = 86400 # 24 hours in seconds
|
| 27 |
|
| 28 |
LEADERBOARD_COLUMNS = [
|
|
@@ -31,7 +51,6 @@ LEADERBOARD_COLUMNS = [
|
|
| 31 |
("Total PRs", "number"),
|
| 32 |
("Merged PRs", "number"),
|
| 33 |
("Acceptance Rate (%)", "number"),
|
| 34 |
-
("Median Merge Duration (minutes)", "number"),
|
| 35 |
]
|
| 36 |
|
| 37 |
# =============================================================================
|
|
@@ -191,7 +210,7 @@ def validate_github_username(identifier):
|
|
| 191 |
token = get_github_token()
|
| 192 |
headers = {'Authorization': f'token {token}'} if token else {}
|
| 193 |
url = f'https://api.github.com/users/{identifier}'
|
| 194 |
-
response = request_with_backoff('GET', url, headers=headers, max_retries=
|
| 195 |
if response is None:
|
| 196 |
return False, "Validation error: network/rate limit exhausted"
|
| 197 |
if response.status_code == 200:
|
|
@@ -204,18 +223,151 @@ def validate_github_username(identifier):
|
|
| 204 |
return False, f"Validation error: {str(e)}"
|
| 205 |
|
| 206 |
|
| 207 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
"""
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
Searches using multiple query patterns:
|
| 211 |
- is:pr author:{identifier} (authored by the user)
|
| 212 |
- is:pr head:{identifier}/ (branch names starting with identifier)
|
| 213 |
- is:pr "co-authored-by: {identifier}" (co-authored commits)
|
| 214 |
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
"""
|
| 217 |
headers = {'Authorization': f'token {token}'} if token else {}
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
# Define all query patterns to search
|
| 220 |
query_patterns = [
|
| 221 |
f'is:pr author:{identifier}',
|
|
@@ -226,129 +378,256 @@ def fetch_all_prs(identifier, token=None):
|
|
| 226 |
# Use a dict to deduplicate PRs by ID
|
| 227 |
prs_by_id = {}
|
| 228 |
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
if response is None:
|
| 245 |
-
print(f"Error fetching PRs for query '{query}': retries exhausted")
|
| 246 |
-
break
|
| 247 |
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
break
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
-
|
| 259 |
-
for pr in items:
|
| 260 |
-
pr_id = pr.get('id')
|
| 261 |
-
if pr_id and pr_id not in prs_by_id:
|
| 262 |
-
prs_by_id[pr_id] = pr
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
|
|
|
|
|
|
| 267 |
|
| 268 |
-
|
| 269 |
-
time.sleep(0.5) # Courtesy delay between pages
|
| 270 |
|
| 271 |
-
|
| 272 |
-
print(f"Error fetching PRs for query '{query}': {str(e)}")
|
| 273 |
-
break
|
| 274 |
|
| 275 |
-
# Delay between different query patterns
|
| 276 |
-
time.sleep(0.5)
|
| 277 |
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
| 281 |
|
| 282 |
-
|
| 283 |
|
|
|
|
|
|
|
| 284 |
|
| 285 |
-
|
| 286 |
-
"""
|
| 287 |
-
Calculate statistics from a list of pull requests.
|
| 288 |
-
Returns a dictionary with comprehensive PR metrics.
|
| 289 |
"""
|
| 290 |
-
total_prs = len(
|
| 291 |
-
merged =
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
state = pr.get('state')
|
| 304 |
-
if state == 'closed':
|
| 305 |
-
pull_request = pr.get('pull_request', {})
|
| 306 |
-
merged_at = pull_request.get('merged_at')
|
| 307 |
-
if merged_at:
|
| 308 |
-
merged += 1
|
| 309 |
-
|
| 310 |
-
# Calculate merged time (creation to merge)
|
| 311 |
-
try:
|
| 312 |
-
created_at = pr.get('created_at')
|
| 313 |
-
if created_at and merged_at:
|
| 314 |
-
created_dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
|
| 315 |
-
merged_dt = datetime.fromisoformat(merged_at.replace('Z', '+00:00'))
|
| 316 |
-
merged_time_minutes = (merged_dt - created_dt).total_seconds() / 60 # Convert to minutes
|
| 317 |
-
merged_times.append(merged_time_minutes)
|
| 318 |
-
except Exception as e:
|
| 319 |
-
print(f"Warning: Could not calculate merged time for PR: {e}")
|
| 320 |
-
|
| 321 |
-
acceptance_rate = (merged / total_prs * 100) if total_prs > 0 else 0
|
| 322 |
-
|
| 323 |
-
# Calculate median merged time
|
| 324 |
-
median_merged_time = None
|
| 325 |
-
if merged_times:
|
| 326 |
-
merged_times.sort()
|
| 327 |
-
n = len(merged_times)
|
| 328 |
-
if n % 2 == 0:
|
| 329 |
-
median_merged_time = (merged_times[n // 2 - 1] + merged_times[n // 2]) / 2
|
| 330 |
-
else:
|
| 331 |
-
median_merged_time = merged_times[n // 2]
|
| 332 |
-
median_merged_time = round(median_merged_time, 2)
|
| 333 |
-
|
| 334 |
return {
|
| 335 |
'total_prs': total_prs,
|
| 336 |
'merged': merged,
|
| 337 |
'acceptance_rate': round(acceptance_rate, 2),
|
| 338 |
-
'median_merged_time': median_merged_time,
|
| 339 |
}
|
| 340 |
|
| 341 |
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
"""
|
| 344 |
-
|
| 345 |
-
Returns
|
| 346 |
"""
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
|
| 354 |
# =============================================================================
|
|
@@ -503,12 +782,20 @@ def save_leaderboard_to_hf(cache_dict):
|
|
| 503 |
# DATA MANAGEMENT
|
| 504 |
# =============================================================================
|
| 505 |
|
| 506 |
-
def
|
| 507 |
"""
|
| 508 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
Returns dictionary of all agent data with current stats.
|
| 510 |
"""
|
| 511 |
token = get_github_token()
|
|
|
|
| 512 |
|
| 513 |
# Load agent metadata from HuggingFace
|
| 514 |
agents = load_agents_from_hf()
|
|
@@ -523,17 +810,54 @@ def update_all_agents():
|
|
| 523 |
# Update each agent
|
| 524 |
for agent in agents:
|
| 525 |
identifier = agent.get('github_identifier')
|
|
|
|
|
|
|
| 526 |
if not identifier:
|
| 527 |
print(f"Warning: Skipping agent without identifier: {agent}")
|
| 528 |
continue
|
| 529 |
|
| 530 |
try:
|
| 531 |
-
|
| 532 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
|
| 534 |
# Merge metadata with stats
|
| 535 |
cache_dict[identifier] = {
|
| 536 |
-
'agent_name':
|
| 537 |
'organization': agent.get('organization', 'Unknown'),
|
| 538 |
'github_identifier': identifier,
|
| 539 |
**stats
|
|
@@ -541,19 +865,62 @@ def update_all_agents():
|
|
| 541 |
|
| 542 |
# Progressive save
|
| 543 |
save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
|
| 544 |
-
print(f"✓ Updated {identifier}")
|
| 545 |
|
| 546 |
except Exception as e:
|
| 547 |
print(f"✗ Error updating {identifier}: {str(e)}")
|
|
|
|
|
|
|
| 548 |
continue
|
| 549 |
|
| 550 |
return cache_dict
|
| 551 |
|
| 552 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
def initialize_data():
|
| 554 |
"""
|
| 555 |
Initialize data on application startup.
|
| 556 |
-
Priority: Leaderboard dataset
|
| 557 |
"""
|
| 558 |
print("🚀 Initializing leaderboard data...")
|
| 559 |
|
|
@@ -564,12 +931,23 @@ def initialize_data():
|
|
| 564 |
print("✓ Initialized from leaderboard dataset")
|
| 565 |
return
|
| 566 |
|
| 567 |
-
# Try
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
agents = load_agents_from_hf()
|
| 569 |
if agents:
|
| 570 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 571 |
-
print("⛏️ Mining GitHub data...")
|
| 572 |
-
cache_dict =
|
| 573 |
if cache_dict:
|
| 574 |
save_leaderboard_to_hf(cache_dict)
|
| 575 |
return
|
|
@@ -601,7 +979,6 @@ def get_leaderboard_dataframe():
|
|
| 601 |
data.get('total_prs', 0),
|
| 602 |
data.get('merged', 0),
|
| 603 |
data.get('acceptance_rate', 0.0),
|
| 604 |
-
data.get('median_merged_time', None),
|
| 605 |
])
|
| 606 |
|
| 607 |
# Create DataFrame
|
|
@@ -609,7 +986,7 @@ def get_leaderboard_dataframe():
|
|
| 609 |
df = pd.DataFrame(rows, columns=column_names)
|
| 610 |
|
| 611 |
# Ensure numeric types
|
| 612 |
-
numeric_cols = ["Total PRs", "Merged PRs", "Acceptance Rate (%)"
|
| 613 |
for col in numeric_cols:
|
| 614 |
if col in df.columns:
|
| 615 |
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
|
@@ -622,10 +999,10 @@ def get_leaderboard_dataframe():
|
|
| 622 |
|
| 623 |
|
| 624 |
def refresh_leaderboard():
|
| 625 |
-
"""Manually trigger data refresh for all agents."""
|
| 626 |
try:
|
| 627 |
-
print("🔄 Manual refresh initiated")
|
| 628 |
-
cache_dict =
|
| 629 |
if cache_dict:
|
| 630 |
save_leaderboard_to_hf(cache_dict)
|
| 631 |
return "✅ Data refreshed successfully!", get_leaderboard_dataframe()
|
|
@@ -638,7 +1015,7 @@ def refresh_leaderboard():
|
|
| 638 |
def submit_agent(identifier, agent_name, organization, description, website):
|
| 639 |
"""
|
| 640 |
Submit a new agent to the leaderboard.
|
| 641 |
-
Validates input, saves submission, and fetches PR
|
| 642 |
"""
|
| 643 |
# Validate required fields
|
| 644 |
if not identifier or not identifier.strip():
|
|
@@ -681,26 +1058,38 @@ def submit_agent(identifier, agent_name, organization, description, website):
|
|
| 681 |
# Save to HuggingFace
|
| 682 |
if not save_agent_to_hf(submission):
|
| 683 |
return "❌ Failed to save submission", get_leaderboard_dataframe()
|
| 684 |
-
|
| 685 |
-
# Fetch PR
|
| 686 |
token = get_github_token()
|
| 687 |
try:
|
| 688 |
-
|
| 689 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
# Update cache
|
| 691 |
cache_list = load_jsonl(CACHE_FILE)
|
| 692 |
cache_dict = cache_to_dict(cache_list)
|
| 693 |
cache_dict[identifier] = {**submission, **stats}
|
| 694 |
save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
|
| 695 |
-
|
| 696 |
# Save to HuggingFace
|
| 697 |
save_leaderboard_to_hf(cache_dict)
|
| 698 |
-
|
| 699 |
return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe()
|
| 700 |
-
|
| 701 |
except Exception as e:
|
| 702 |
error_msg = f"⚠️ Submitted {agent_name}, but failed to fetch PR data: {str(e)}"
|
| 703 |
print(error_msg)
|
|
|
|
|
|
|
| 704 |
return error_msg, get_leaderboard_dataframe()
|
| 705 |
|
| 706 |
|
|
@@ -709,17 +1098,24 @@ def submit_agent(identifier, agent_name, organization, description, website):
|
|
| 709 |
# =============================================================================
|
| 710 |
|
| 711 |
def scheduled_update_task():
|
| 712 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 713 |
while True:
|
| 714 |
time.sleep(UPDATE_INTERVAL)
|
| 715 |
-
print(f"\n
|
|
|
|
|
|
|
| 716 |
try:
|
| 717 |
-
cache_dict =
|
| 718 |
if cache_dict:
|
| 719 |
save_leaderboard_to_hf(cache_dict)
|
| 720 |
-
print("✓ Scheduled update completed")
|
| 721 |
except Exception as e:
|
| 722 |
print(f"✗ Scheduled update failed: {str(e)}")
|
|
|
|
|
|
|
| 723 |
|
| 724 |
|
| 725 |
# =============================================================================
|
|
@@ -727,6 +1123,27 @@ def scheduled_update_task():
|
|
| 727 |
# =============================================================================
|
| 728 |
|
| 729 |
# Initialize data before creating UI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
initialize_data()
|
| 731 |
|
| 732 |
# Start background update thread
|
|
@@ -756,7 +1173,7 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
|
|
| 756 |
value=get_leaderboard_dataframe(),
|
| 757 |
datatype=LEADERBOARD_COLUMNS,
|
| 758 |
search_columns=["Agent Name", "Organization"],
|
| 759 |
-
filter_columns=["Acceptance Rate (%)"
|
| 760 |
)
|
| 761 |
|
| 762 |
refresh_button.click(
|
|
|
|
| 4 |
import os
|
| 5 |
import time
|
| 6 |
import requests
|
| 7 |
+
from datetime import datetime, timezone, timedelta
|
| 8 |
from collections import defaultdict
|
| 9 |
from huggingface_hub import HfApi, hf_hub_download
|
| 10 |
from datasets import load_dataset, Dataset
|
|
|
|
| 12 |
from dotenv import load_dotenv
|
| 13 |
import pandas as pd
|
| 14 |
import random
|
| 15 |
+
import argparse
|
| 16 |
|
| 17 |
# Load environment variables
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
+
# Parse command-line arguments
|
| 21 |
+
parser = argparse.ArgumentParser(description='SWE Agent PR Leaderboard')
|
| 22 |
+
parser.add_argument('--debug', '--DEBUG', action='store_true',
|
| 23 |
+
help='Enable debug mode (limits PR retrieval to 10 per query pattern)')
|
| 24 |
+
parser.add_argument('--no-debug', '--production', action='store_true',
|
| 25 |
+
help='Explicitly disable debug mode (force production mode)')
|
| 26 |
+
args = parser.parse_args()
|
| 27 |
+
|
| 28 |
# =============================================================================
|
| 29 |
# CONFIGURATION
|
| 30 |
# =============================================================================
|
| 31 |
|
| 32 |
+
# DEBUG MODE: Set to True to limit PR retrieval for testing
|
| 33 |
+
# When enabled, only fetches up to 10 PRs per query pattern per agent
|
| 34 |
+
# Priority: 1) Command-line args, 2) Environment variable, 3) Default (False)
|
| 35 |
+
if args.no_debug:
|
| 36 |
+
DEBUG_MODE = False
|
| 37 |
+
elif args.debug:
|
| 38 |
+
DEBUG_MODE = True
|
| 39 |
+
else:
|
| 40 |
+
DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
|
| 41 |
+
|
| 42 |
CACHE_FILE = "agent_pr_cache.jsonl"
|
| 43 |
AGENTS_REPO = "SWE-Arena/pr_agents" # HuggingFace dataset for agent metadata
|
| 44 |
LEADERBOARD_REPO = "SWE-Arena/pr_leaderboard"
|
| 45 |
+
PR_METADATA_REPO = "SWE-Arena/pr_metadata" # HuggingFace dataset for PR metadata
|
| 46 |
UPDATE_INTERVAL = 86400 # 24 hours in seconds
|
| 47 |
|
| 48 |
LEADERBOARD_COLUMNS = [
|
|
|
|
| 51 |
("Total PRs", "number"),
|
| 52 |
("Merged PRs", "number"),
|
| 53 |
("Acceptance Rate (%)", "number"),
|
|
|
|
| 54 |
]
|
| 55 |
|
| 56 |
# =============================================================================
|
|
|
|
| 210 |
token = get_github_token()
|
| 211 |
headers = {'Authorization': f'token {token}'} if token else {}
|
| 212 |
url = f'https://api.github.com/users/{identifier}'
|
| 213 |
+
response = request_with_backoff('GET', url, headers=headers, max_retries=1)
|
| 214 |
if response is None:
|
| 215 |
return False, "Validation error: network/rate limit exhausted"
|
| 216 |
if response.status_code == 200:
|
|
|
|
| 223 |
return False, f"Validation error: {str(e)}"
|
| 224 |
|
| 225 |
|
| 226 |
+
def fetch_prs_with_time_partition(base_query, start_date, end_date, headers, prs_by_id, debug_limit=None):
|
| 227 |
+
"""
|
| 228 |
+
Fetch PRs within a specific time range using time-based partitioning.
|
| 229 |
+
Recursively splits the time range if hitting the 1000-result limit.
|
| 230 |
+
|
| 231 |
+
Args:
|
| 232 |
+
debug_limit: If set, stops fetching after this many PRs (for testing)
|
| 233 |
+
|
| 234 |
+
Returns the number of PRs found in this time partition.
|
| 235 |
+
"""
|
| 236 |
+
# Format dates for GitHub search (YYYY-MM-DD)
|
| 237 |
+
start_str = start_date.strftime('%Y-%m-%d')
|
| 238 |
+
end_str = end_date.strftime('%Y-%m-%d')
|
| 239 |
+
|
| 240 |
+
# Add date range to query
|
| 241 |
+
query = f'{base_query} created:{start_str}..{end_str}'
|
| 242 |
+
|
| 243 |
+
print(f" Searching range {start_str} to {end_str}...")
|
| 244 |
+
|
| 245 |
+
page = 1
|
| 246 |
+
per_page = 100
|
| 247 |
+
total_in_partition = 0
|
| 248 |
+
|
| 249 |
+
while True:
|
| 250 |
+
# Check debug limit
|
| 251 |
+
if debug_limit is not None and total_in_partition >= debug_limit:
|
| 252 |
+
print(f" 🐛 DEBUG MODE: Reached limit of {debug_limit} PRs, stopping...")
|
| 253 |
+
return total_in_partition
|
| 254 |
+
url = 'https://api.github.com/search/issues'
|
| 255 |
+
params = {
|
| 256 |
+
'q': query,
|
| 257 |
+
'per_page': per_page,
|
| 258 |
+
'page': page,
|
| 259 |
+
'sort': 'created',
|
| 260 |
+
'order': 'asc'
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
try:
|
| 264 |
+
response = request_with_backoff('GET', url, headers=headers, params=params)
|
| 265 |
+
if response is None:
|
| 266 |
+
print(f" Error: retries exhausted for range {start_str} to {end_str}")
|
| 267 |
+
return total_in_partition
|
| 268 |
+
|
| 269 |
+
if response.status_code != 200:
|
| 270 |
+
print(f" Error: HTTP {response.status_code} for range {start_str} to {end_str}")
|
| 271 |
+
return total_in_partition
|
| 272 |
+
|
| 273 |
+
data = response.json()
|
| 274 |
+
total_count = data.get('total_count', 0)
|
| 275 |
+
items = data.get('items', [])
|
| 276 |
+
|
| 277 |
+
if not items:
|
| 278 |
+
break
|
| 279 |
+
|
| 280 |
+
# Add PRs to global dict
|
| 281 |
+
for pr in items:
|
| 282 |
+
pr_id = pr.get('id')
|
| 283 |
+
if pr_id and pr_id not in prs_by_id:
|
| 284 |
+
prs_by_id[pr_id] = pr
|
| 285 |
+
total_in_partition += 1
|
| 286 |
+
|
| 287 |
+
# Check if we hit the 1000-result limit
|
| 288 |
+
if total_count > 1000 and page == 10:
|
| 289 |
+
print(f" ⚠️ Hit 1000-result limit ({total_count} total). Splitting time range...")
|
| 290 |
+
|
| 291 |
+
# Calculate midpoint
|
| 292 |
+
time_diff = end_date - start_date
|
| 293 |
+
mid_date = start_date + time_diff / 2
|
| 294 |
+
|
| 295 |
+
# Recursively fetch both halves
|
| 296 |
+
count1 = fetch_prs_with_time_partition(base_query, start_date, mid_date, headers, prs_by_id, debug_limit)
|
| 297 |
+
count2 = fetch_prs_with_time_partition(base_query, mid_date + timedelta(days=1), end_date, headers, prs_by_id, debug_limit)
|
| 298 |
+
|
| 299 |
+
return count1 + count2
|
| 300 |
+
|
| 301 |
+
# Normal pagination: check if there are more pages
|
| 302 |
+
if len(items) < per_page or page >= 10:
|
| 303 |
+
break
|
| 304 |
+
|
| 305 |
+
page += 1
|
| 306 |
+
time.sleep(0.5) # Courtesy delay between pages
|
| 307 |
+
|
| 308 |
+
except Exception as e:
|
| 309 |
+
print(f" Error fetching range {start_str} to {end_str}: {str(e)}")
|
| 310 |
+
return total_in_partition
|
| 311 |
+
|
| 312 |
+
if total_in_partition > 0:
|
| 313 |
+
print(f" ✓ Found {total_in_partition} PRs in range {start_str} to {end_str}")
|
| 314 |
+
|
| 315 |
+
return total_in_partition
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def extract_pr_metadata(pr, agent_name):
|
| 319 |
+
"""
|
| 320 |
+
Extract minimal PR metadata for efficient storage.
|
| 321 |
+
Only keeps essential fields: html_url, created_at, merged_at, closed_at, agent_name.
|
| 322 |
"""
|
| 323 |
+
pull_request = pr.get('pull_request', {})
|
| 324 |
+
|
| 325 |
+
# Extract dates
|
| 326 |
+
created_at = pr.get('created_at')
|
| 327 |
+
merged_at = pull_request.get('merged_at')
|
| 328 |
+
closed_at = pr.get('closed_at')
|
| 329 |
+
|
| 330 |
+
# Only store closed_at if PR is closed but not merged
|
| 331 |
+
if merged_at:
|
| 332 |
+
closed_at = None # Don't store redundant info
|
| 333 |
+
|
| 334 |
+
return {
|
| 335 |
+
'html_url': pr.get('html_url'),
|
| 336 |
+
'created_at': created_at,
|
| 337 |
+
'merged_at': merged_at,
|
| 338 |
+
'closed_at': closed_at,
|
| 339 |
+
'agent_name': agent_name
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def fetch_all_prs_metadata(identifier, agent_name, token=None, start_from_date=None):
|
| 344 |
+
"""
|
| 345 |
+
Fetch ALL pull requests associated with a GitHub user/bot.
|
| 346 |
+
Returns lightweight metadata instead of full PR objects.
|
| 347 |
+
|
| 348 |
+
Uses time-based partitioning to bypass GitHub's 1000-result limit per query.
|
| 349 |
Searches using multiple query patterns:
|
| 350 |
- is:pr author:{identifier} (authored by the user)
|
| 351 |
- is:pr head:{identifier}/ (branch names starting with identifier)
|
| 352 |
- is:pr "co-authored-by: {identifier}" (co-authored commits)
|
| 353 |
|
| 354 |
+
Args:
|
| 355 |
+
identifier: GitHub username/bot identifier
|
| 356 |
+
agent_name: Human-readable agent name for metadata
|
| 357 |
+
token: GitHub API token
|
| 358 |
+
start_from_date: Only fetch PRs created after this date (for incremental updates)
|
| 359 |
+
|
| 360 |
+
Returns:
|
| 361 |
+
List of minimal PR metadata dictionaries
|
| 362 |
"""
|
| 363 |
headers = {'Authorization': f'token {token}'} if token else {}
|
| 364 |
|
| 365 |
+
# Debug mode: limit PR retrieval for testing
|
| 366 |
+
debug_limit_per_pattern = 10 if DEBUG_MODE else None
|
| 367 |
+
|
| 368 |
+
if DEBUG_MODE:
|
| 369 |
+
print(f"\n🐛 DEBUG MODE ENABLED: Limiting to {debug_limit_per_pattern} PRs per query pattern")
|
| 370 |
+
|
| 371 |
# Define all query patterns to search
|
| 372 |
query_patterns = [
|
| 373 |
f'is:pr author:{identifier}',
|
|
|
|
| 378 |
# Use a dict to deduplicate PRs by ID
|
| 379 |
prs_by_id = {}
|
| 380 |
|
| 381 |
+
# Define time range: start from specified date or GitHub founding
|
| 382 |
+
start_date = start_from_date or datetime(2008, 1, 1, tzinfo=timezone.utc)
|
| 383 |
+
end_date = datetime.now(timezone.utc)
|
| 384 |
+
|
| 385 |
+
for query_pattern in query_patterns:
|
| 386 |
+
print(f"\n🔍 Searching with query: {query_pattern}")
|
| 387 |
+
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
| 388 |
+
|
| 389 |
+
pattern_start_time = time.time()
|
| 390 |
+
initial_count = len(prs_by_id)
|
| 391 |
+
|
| 392 |
+
# Fetch with time partitioning
|
| 393 |
+
prs_found = fetch_prs_with_time_partition(
|
| 394 |
+
query_pattern,
|
| 395 |
+
start_date,
|
| 396 |
+
end_date,
|
| 397 |
+
headers,
|
| 398 |
+
prs_by_id,
|
| 399 |
+
debug_limit_per_pattern
|
| 400 |
+
)
|
| 401 |
|
| 402 |
+
pattern_duration = time.time() - pattern_start_time
|
| 403 |
+
new_prs = len(prs_by_id) - initial_count
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
+
print(f" ✓ Pattern complete: {new_prs} new PRs found ({prs_found} total fetched, {len(prs_by_id) - initial_count - (prs_found - new_prs)} duplicates)")
|
| 406 |
+
print(f" ⏱️ Time taken: {pattern_duration:.1f} seconds")
|
|
|
|
| 407 |
|
| 408 |
+
# Delay between different query patterns (shorter in debug mode)
|
| 409 |
+
time.sleep(0.2 if DEBUG_MODE else 1.0)
|
| 410 |
|
| 411 |
+
# Convert to lightweight metadata
|
| 412 |
+
all_prs = list(prs_by_id.values())
|
| 413 |
+
if DEBUG_MODE:
|
| 414 |
+
print(f"\n✅ COMPLETE (DEBUG MODE): Found {len(all_prs)} unique PRs for {identifier}")
|
| 415 |
+
print(f" Note: In production mode, this would fetch ALL PRs")
|
| 416 |
+
else:
|
| 417 |
+
print(f"\n✅ COMPLETE: Found {len(all_prs)} unique PRs for {identifier}")
|
| 418 |
+
print(f"📦 Extracting minimal metadata...")
|
| 419 |
|
| 420 |
+
metadata_list = [extract_pr_metadata(pr, agent_name) for pr in all_prs]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
|
| 422 |
+
# Calculate memory savings
|
| 423 |
+
import sys
|
| 424 |
+
original_size = sys.getsizeof(str(all_prs))
|
| 425 |
+
metadata_size = sys.getsizeof(str(metadata_list))
|
| 426 |
+
savings_pct = ((original_size - metadata_size) / original_size * 100) if original_size > 0 else 0
|
| 427 |
|
| 428 |
+
print(f"💾 Memory efficiency: {original_size // 1024}KB → {metadata_size // 1024}KB (saved {savings_pct:.1f}%)")
|
|
|
|
| 429 |
|
| 430 |
+
return metadata_list
|
|
|
|
|
|
|
| 431 |
|
|
|
|
|
|
|
| 432 |
|
| 433 |
+
def calculate_pr_stats_from_metadata(metadata_list):
|
| 434 |
+
"""
|
| 435 |
+
Calculate statistics from a list of PR metadata (lightweight objects).
|
| 436 |
+
Works with minimal metadata: html_url, created_at, merged_at, closed_at, agent_name.
|
| 437 |
|
| 438 |
+
Returns a dictionary with comprehensive PR metrics.
|
| 439 |
|
| 440 |
+
Acceptance rate is calculated as:
|
| 441 |
+
merged PRs / (merged PRs + closed but not merged PRs) * 100
|
| 442 |
|
| 443 |
+
This only counts PRs where a decision has been made (either merged or rejected/closed).
|
|
|
|
|
|
|
|
|
|
| 444 |
"""
|
| 445 |
+
total_prs = len(metadata_list)
|
| 446 |
+
merged = sum(1 for pr_meta in metadata_list if pr_meta.get('merged_at'))
|
| 447 |
+
|
| 448 |
+
# Count closed PRs (rejected) - those with closed_at but no merged_at
|
| 449 |
+
closed_not_merged = sum(1 for pr_meta in metadata_list
|
| 450 |
+
if pr_meta.get('closed_at') and not pr_meta.get('merged_at'))
|
| 451 |
+
|
| 452 |
+
# Total decisions made = merged + closed (rejected)
|
| 453 |
+
total_decisions = merged + closed_not_merged
|
| 454 |
+
|
| 455 |
+
# Calculate acceptance rate based on decisions made
|
| 456 |
+
acceptance_rate = (merged / total_decisions * 100) if total_decisions > 0 else 0
|
| 457 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
return {
|
| 459 |
'total_prs': total_prs,
|
| 460 |
'merged': merged,
|
| 461 |
'acceptance_rate': round(acceptance_rate, 2),
|
|
|
|
| 462 |
}
|
| 463 |
|
| 464 |
|
| 465 |
+
# =============================================================================
|
| 466 |
+
# PR METADATA STORAGE & RETRIEVAL
|
| 467 |
+
# =============================================================================
|
| 468 |
+
|
| 469 |
+
def group_metadata_by_year_month(metadata_list):
|
| 470 |
"""
|
| 471 |
+
Group PR metadata by year.month for efficient storage.
|
| 472 |
+
Returns dict: {(year, month): [metadata_list]}
|
| 473 |
"""
|
| 474 |
+
grouped = defaultdict(list)
|
| 475 |
+
|
| 476 |
+
for pr_meta in metadata_list:
|
| 477 |
+
created_at = pr_meta.get('created_at')
|
| 478 |
+
if not created_at:
|
| 479 |
+
continue
|
| 480 |
+
|
| 481 |
+
try:
|
| 482 |
+
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
|
| 483 |
+
key = (dt.year, dt.month)
|
| 484 |
+
grouped[key].append(pr_meta)
|
| 485 |
+
except Exception as e:
|
| 486 |
+
print(f"Warning: Could not parse date '{created_at}': {e}")
|
| 487 |
+
|
| 488 |
+
return dict(grouped)
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
def save_pr_metadata_to_hf(metadata_list):
|
| 492 |
+
"""
|
| 493 |
+
Save PR metadata to HuggingFace dataset, organized by year.month.
|
| 494 |
+
Each file is named YYYY.MM.jsonl and contains all PRs created in that month.
|
| 495 |
+
|
| 496 |
+
This function APPENDS new metadata and DEDUPLICATES by html_url.
|
| 497 |
+
"""
|
| 498 |
+
try:
|
| 499 |
+
token = get_hf_token()
|
| 500 |
+
if not token:
|
| 501 |
+
raise Exception("No HuggingFace token found")
|
| 502 |
+
|
| 503 |
+
api = HfApi()
|
| 504 |
+
|
| 505 |
+
# Group by year.month
|
| 506 |
+
grouped = group_metadata_by_year_month(metadata_list)
|
| 507 |
+
|
| 508 |
+
for (year, month), month_metadata in grouped.items():
|
| 509 |
+
filename = f"{year}.{month:02d}.jsonl"
|
| 510 |
+
print(f"📤 Uploading {len(month_metadata)} PRs to {filename}...")
|
| 511 |
+
|
| 512 |
+
# Download existing file if it exists
|
| 513 |
+
existing_metadata = []
|
| 514 |
+
try:
|
| 515 |
+
file_path = hf_hub_download(
|
| 516 |
+
repo_id=PR_METADATA_REPO,
|
| 517 |
+
filename=filename,
|
| 518 |
+
repo_type="dataset",
|
| 519 |
+
token=token
|
| 520 |
+
)
|
| 521 |
+
existing_metadata = load_jsonl(file_path)
|
| 522 |
+
print(f" Found {len(existing_metadata)} existing PRs in {filename}")
|
| 523 |
+
except Exception:
|
| 524 |
+
print(f" No existing file found for {filename}, creating new")
|
| 525 |
+
|
| 526 |
+
# Merge and deduplicate by html_url
|
| 527 |
+
existing_by_url = {meta['html_url']: meta for meta in existing_metadata if meta.get('html_url')}
|
| 528 |
+
new_by_url = {meta['html_url']: meta for meta in month_metadata if meta.get('html_url')}
|
| 529 |
+
|
| 530 |
+
# Update with new data (new data overwrites old)
|
| 531 |
+
existing_by_url.update(new_by_url)
|
| 532 |
+
merged_metadata = list(existing_by_url.values())
|
| 533 |
+
|
| 534 |
+
# Save locally
|
| 535 |
+
save_jsonl(filename, merged_metadata)
|
| 536 |
+
|
| 537 |
+
# Upload to HuggingFace
|
| 538 |
+
api.upload_file(
|
| 539 |
+
path_or_fileobj=filename,
|
| 540 |
+
path_in_repo=filename,
|
| 541 |
+
repo_id=PR_METADATA_REPO,
|
| 542 |
+
repo_type="dataset",
|
| 543 |
+
token=token
|
| 544 |
+
)
|
| 545 |
+
|
| 546 |
+
# Clean up local file
|
| 547 |
+
os.remove(filename)
|
| 548 |
+
|
| 549 |
+
print(f" ✓ Saved {len(merged_metadata)} total PRs to {filename}")
|
| 550 |
+
|
| 551 |
+
return True
|
| 552 |
+
|
| 553 |
+
except Exception as e:
|
| 554 |
+
print(f"✗ Error saving PR metadata: {str(e)}")
|
| 555 |
+
return False
|
| 556 |
+
|
| 557 |
+
|
| 558 |
+
def load_pr_metadata_for_year(year):
|
| 559 |
+
"""
|
| 560 |
+
Load all PR metadata for a specific year from HuggingFace.
|
| 561 |
+
Returns list of all PR metadata from that year.
|
| 562 |
+
"""
|
| 563 |
+
try:
|
| 564 |
+
api = HfApi()
|
| 565 |
+
token = get_hf_token()
|
| 566 |
+
|
| 567 |
+
# List all files in the repository
|
| 568 |
+
files = api.list_repo_files(repo_id=PR_METADATA_REPO, repo_type="dataset")
|
| 569 |
+
|
| 570 |
+
# Filter for files matching the year pattern (e.g., 2025.01.jsonl, 2025.02.jsonl)
|
| 571 |
+
year_pattern = f"{year}."
|
| 572 |
+
year_files = [f for f in files if f.startswith(year_pattern) and f.endswith('.jsonl')]
|
| 573 |
+
|
| 574 |
+
print(f"📥 Loading PR metadata for {year} ({len(year_files)} files)...")
|
| 575 |
+
|
| 576 |
+
all_metadata = []
|
| 577 |
+
for filename in year_files:
|
| 578 |
+
try:
|
| 579 |
+
file_path = hf_hub_download(
|
| 580 |
+
repo_id=PR_METADATA_REPO,
|
| 581 |
+
filename=filename,
|
| 582 |
+
repo_type="dataset",
|
| 583 |
+
token=token
|
| 584 |
+
)
|
| 585 |
+
month_metadata = load_jsonl(file_path)
|
| 586 |
+
all_metadata.extend(month_metadata)
|
| 587 |
+
print(f" ✓ Loaded {len(month_metadata)} PRs from {filename}")
|
| 588 |
+
except Exception as e:
|
| 589 |
+
print(f" Warning: Could not load {filename}: {str(e)}")
|
| 590 |
+
|
| 591 |
+
print(f"✓ Loaded {len(all_metadata)} total PRs for {year}")
|
| 592 |
+
return all_metadata
|
| 593 |
+
|
| 594 |
+
except Exception as e:
|
| 595 |
+
print(f"✗ Error loading PR metadata for {year}: {str(e)}")
|
| 596 |
+
return []
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
def get_latest_pr_date_for_agent(agent_name, current_year):
|
| 600 |
+
"""
|
| 601 |
+
Get the latest PR creation date for an agent from stored metadata.
|
| 602 |
+
Used for incremental updates - only fetch PRs newer than this date.
|
| 603 |
+
|
| 604 |
+
Returns datetime or None if no existing PRs found.
|
| 605 |
+
"""
|
| 606 |
+
try:
|
| 607 |
+
metadata = load_pr_metadata_for_year(current_year)
|
| 608 |
+
|
| 609 |
+
# Filter for this agent
|
| 610 |
+
agent_prs = [pr for pr in metadata if pr.get('agent_name') == agent_name]
|
| 611 |
+
|
| 612 |
+
if not agent_prs:
|
| 613 |
+
return None
|
| 614 |
+
|
| 615 |
+
# Find latest created_at
|
| 616 |
+
latest_date = None
|
| 617 |
+
for pr in agent_prs:
|
| 618 |
+
created_at = pr.get('created_at')
|
| 619 |
+
if created_at:
|
| 620 |
+
try:
|
| 621 |
+
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
|
| 622 |
+
if latest_date is None or dt > latest_date:
|
| 623 |
+
latest_date = dt
|
| 624 |
+
except Exception:
|
| 625 |
+
continue
|
| 626 |
+
|
| 627 |
+
return latest_date
|
| 628 |
+
|
| 629 |
+
except Exception:
|
| 630 |
+
return None
|
| 631 |
|
| 632 |
|
| 633 |
# =============================================================================
|
|
|
|
| 782 |
# DATA MANAGEMENT
|
| 783 |
# =============================================================================
|
| 784 |
|
| 785 |
+
def update_all_agents_incremental():
|
| 786 |
"""
|
| 787 |
+
Memory-efficient incremental update of PR statistics for all agents.
|
| 788 |
+
|
| 789 |
+
Strategy:
|
| 790 |
+
1. For each agent, check latest PR date from stored metadata
|
| 791 |
+
2. Only fetch NEW PRs created after that date
|
| 792 |
+
3. Store minimal metadata (not full PR objects) to avoid storage limits
|
| 793 |
+
4. Construct leaderboard from stored metadata
|
| 794 |
+
|
| 795 |
Returns dictionary of all agent data with current stats.
|
| 796 |
"""
|
| 797 |
token = get_github_token()
|
| 798 |
+
current_year = datetime.now().year
|
| 799 |
|
| 800 |
# Load agent metadata from HuggingFace
|
| 801 |
agents = load_agents_from_hf()
|
|
|
|
| 810 |
# Update each agent
|
| 811 |
for agent in agents:
|
| 812 |
identifier = agent.get('github_identifier')
|
| 813 |
+
agent_name = agent.get('agent_name', 'Unknown')
|
| 814 |
+
|
| 815 |
if not identifier:
|
| 816 |
print(f"Warning: Skipping agent without identifier: {agent}")
|
| 817 |
continue
|
| 818 |
|
| 819 |
try:
|
| 820 |
+
print(f"\n{'='*80}")
|
| 821 |
+
print(f"Processing: {agent_name} ({identifier})")
|
| 822 |
+
print(f"{'='*80}")
|
| 823 |
+
|
| 824 |
+
# Check for existing metadata to determine incremental update date
|
| 825 |
+
latest_pr_date = get_latest_pr_date_for_agent(agent_name, current_year)
|
| 826 |
+
|
| 827 |
+
if latest_pr_date:
|
| 828 |
+
print(f"📅 Latest PR found: {latest_pr_date.strftime('%Y-%m-%d %H:%M:%S')}")
|
| 829 |
+
print(f" Fetching only PRs created after this date...")
|
| 830 |
+
start_from = latest_pr_date + timedelta(seconds=1) # Start 1 second after
|
| 831 |
+
else:
|
| 832 |
+
print(f"📅 No existing PRs found. Fetching all PR metadata...")
|
| 833 |
+
start_from = None
|
| 834 |
+
|
| 835 |
+
# Fetch PR metadata (lightweight, memory-efficient)
|
| 836 |
+
new_metadata = fetch_all_prs_metadata(
|
| 837 |
+
identifier,
|
| 838 |
+
agent_name,
|
| 839 |
+
token,
|
| 840 |
+
start_from_date=start_from
|
| 841 |
+
)
|
| 842 |
+
|
| 843 |
+
if new_metadata:
|
| 844 |
+
# Save new metadata to HuggingFace (organized by year.month)
|
| 845 |
+
print(f"💾 Saving {len(new_metadata)} new PR records...")
|
| 846 |
+
save_pr_metadata_to_hf(new_metadata)
|
| 847 |
+
|
| 848 |
+
# Load all metadata for current year to calculate stats
|
| 849 |
+
print(f"📊 Calculating statistics from stored metadata...")
|
| 850 |
+
all_year_metadata = load_pr_metadata_for_year(current_year)
|
| 851 |
+
|
| 852 |
+
# Filter for this specific agent
|
| 853 |
+
agent_metadata = [pr for pr in all_year_metadata if pr.get('agent_name') == agent_name]
|
| 854 |
+
|
| 855 |
+
# Calculate stats from metadata
|
| 856 |
+
stats = calculate_pr_stats_from_metadata(agent_metadata)
|
| 857 |
|
| 858 |
# Merge metadata with stats
|
| 859 |
cache_dict[identifier] = {
|
| 860 |
+
'agent_name': agent_name,
|
| 861 |
'organization': agent.get('organization', 'Unknown'),
|
| 862 |
'github_identifier': identifier,
|
| 863 |
**stats
|
|
|
|
| 865 |
|
| 866 |
# Progressive save
|
| 867 |
save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
|
| 868 |
+
print(f"✓ Updated {identifier}: {stats['total_prs']} PRs, {stats['acceptance_rate']}% acceptance")
|
| 869 |
|
| 870 |
except Exception as e:
|
| 871 |
print(f"✗ Error updating {identifier}: {str(e)}")
|
| 872 |
+
import traceback
|
| 873 |
+
traceback.print_exc()
|
| 874 |
continue
|
| 875 |
|
| 876 |
return cache_dict
|
| 877 |
|
| 878 |
|
| 879 |
+
def construct_leaderboard_from_metadata():
|
| 880 |
+
"""
|
| 881 |
+
Construct leaderboard from stored PR metadata instead of fetching all PRs.
|
| 882 |
+
Much more memory-efficient and faster.
|
| 883 |
+
|
| 884 |
+
Returns dictionary of agent stats.
|
| 885 |
+
"""
|
| 886 |
+
print("📊 Constructing leaderboard from PR metadata...")
|
| 887 |
+
current_year = datetime.now().year
|
| 888 |
+
|
| 889 |
+
# Load agents
|
| 890 |
+
agents = load_agents_from_hf()
|
| 891 |
+
if not agents:
|
| 892 |
+
print("No agents found")
|
| 893 |
+
return {}
|
| 894 |
+
|
| 895 |
+
# Load all PR metadata for current year
|
| 896 |
+
all_metadata = load_pr_metadata_for_year(current_year)
|
| 897 |
+
|
| 898 |
+
cache_dict = {}
|
| 899 |
+
|
| 900 |
+
for agent in agents:
|
| 901 |
+
identifier = agent.get('github_identifier')
|
| 902 |
+
agent_name = agent.get('agent_name', 'Unknown')
|
| 903 |
+
|
| 904 |
+
# Filter metadata for this agent
|
| 905 |
+
agent_metadata = [pr for pr in all_metadata if pr.get('agent_name') == agent_name]
|
| 906 |
+
|
| 907 |
+
# Calculate stats
|
| 908 |
+
stats = calculate_pr_stats_from_metadata(agent_metadata)
|
| 909 |
+
|
| 910 |
+
cache_dict[identifier] = {
|
| 911 |
+
'agent_name': agent_name,
|
| 912 |
+
'organization': agent.get('organization', 'Unknown'),
|
| 913 |
+
'github_identifier': identifier,
|
| 914 |
+
**stats
|
| 915 |
+
}
|
| 916 |
+
|
| 917 |
+
return cache_dict
|
| 918 |
+
|
| 919 |
+
|
| 920 |
def initialize_data():
|
| 921 |
"""
|
| 922 |
Initialize data on application startup.
|
| 923 |
+
Priority: 1) Leaderboard dataset, 2) PR metadata (if available), 3) Full GitHub mining
|
| 924 |
"""
|
| 925 |
print("🚀 Initializing leaderboard data...")
|
| 926 |
|
|
|
|
| 931 |
print("✓ Initialized from leaderboard dataset")
|
| 932 |
return
|
| 933 |
|
| 934 |
+
# Try constructing from PR metadata (fast, memory-efficient)
|
| 935 |
+
try:
|
| 936 |
+
cache_dict = construct_leaderboard_from_metadata()
|
| 937 |
+
if cache_dict:
|
| 938 |
+
save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
|
| 939 |
+
save_leaderboard_to_hf(cache_dict)
|
| 940 |
+
print("✓ Initialized from PR metadata")
|
| 941 |
+
return
|
| 942 |
+
except Exception as e:
|
| 943 |
+
print(f"Could not construct from metadata: {e}")
|
| 944 |
+
|
| 945 |
+
# Fallback: Full incremental mining from GitHub
|
| 946 |
agents = load_agents_from_hf()
|
| 947 |
if agents:
|
| 948 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 949 |
+
print("⛏️ Mining GitHub data (this may take a while)...")
|
| 950 |
+
cache_dict = update_all_agents_incremental()
|
| 951 |
if cache_dict:
|
| 952 |
save_leaderboard_to_hf(cache_dict)
|
| 953 |
return
|
|
|
|
| 979 |
data.get('total_prs', 0),
|
| 980 |
data.get('merged', 0),
|
| 981 |
data.get('acceptance_rate', 0.0),
|
|
|
|
| 982 |
])
|
| 983 |
|
| 984 |
# Create DataFrame
|
|
|
|
| 986 |
df = pd.DataFrame(rows, columns=column_names)
|
| 987 |
|
| 988 |
# Ensure numeric types
|
| 989 |
+
numeric_cols = ["Total PRs", "Merged PRs", "Acceptance Rate (%)"]
|
| 990 |
for col in numeric_cols:
|
| 991 |
if col in df.columns:
|
| 992 |
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
|
|
|
| 999 |
|
| 1000 |
|
| 1001 |
def refresh_leaderboard():
|
| 1002 |
+
"""Manually trigger data refresh for all agents using incremental updates."""
|
| 1003 |
try:
|
| 1004 |
+
print("🔄 Manual refresh initiated (incremental mode)")
|
| 1005 |
+
cache_dict = update_all_agents_incremental()
|
| 1006 |
if cache_dict:
|
| 1007 |
save_leaderboard_to_hf(cache_dict)
|
| 1008 |
return "✅ Data refreshed successfully!", get_leaderboard_dataframe()
|
|
|
|
| 1015 |
def submit_agent(identifier, agent_name, organization, description, website):
|
| 1016 |
"""
|
| 1017 |
Submit a new agent to the leaderboard.
|
| 1018 |
+
Validates input, saves submission, and fetches PR metadata (memory-efficient).
|
| 1019 |
"""
|
| 1020 |
# Validate required fields
|
| 1021 |
if not identifier or not identifier.strip():
|
|
|
|
| 1058 |
# Save to HuggingFace
|
| 1059 |
if not save_agent_to_hf(submission):
|
| 1060 |
return "❌ Failed to save submission", get_leaderboard_dataframe()
|
| 1061 |
+
|
| 1062 |
+
# Fetch PR metadata immediately (memory-efficient)
|
| 1063 |
token = get_github_token()
|
| 1064 |
try:
|
| 1065 |
+
print(f"Fetching PR metadata for {agent_name}...")
|
| 1066 |
+
|
| 1067 |
+
# Fetch lightweight metadata
|
| 1068 |
+
metadata_list = fetch_all_prs_metadata(identifier, agent_name, token)
|
| 1069 |
+
|
| 1070 |
+
if metadata_list:
|
| 1071 |
+
# Save metadata to HuggingFace
|
| 1072 |
+
save_pr_metadata_to_hf(metadata_list)
|
| 1073 |
+
|
| 1074 |
+
# Calculate stats from metadata
|
| 1075 |
+
stats = calculate_pr_stats_from_metadata(metadata_list)
|
| 1076 |
+
|
| 1077 |
# Update cache
|
| 1078 |
cache_list = load_jsonl(CACHE_FILE)
|
| 1079 |
cache_dict = cache_to_dict(cache_list)
|
| 1080 |
cache_dict[identifier] = {**submission, **stats}
|
| 1081 |
save_jsonl(CACHE_FILE, dict_to_cache(cache_dict))
|
| 1082 |
+
|
| 1083 |
# Save to HuggingFace
|
| 1084 |
save_leaderboard_to_hf(cache_dict)
|
| 1085 |
+
|
| 1086 |
return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe()
|
| 1087 |
+
|
| 1088 |
except Exception as e:
|
| 1089 |
error_msg = f"⚠️ Submitted {agent_name}, but failed to fetch PR data: {str(e)}"
|
| 1090 |
print(error_msg)
|
| 1091 |
+
import traceback
|
| 1092 |
+
traceback.print_exc()
|
| 1093 |
return error_msg, get_leaderboard_dataframe()
|
| 1094 |
|
| 1095 |
|
|
|
|
| 1098 |
# =============================================================================
|
| 1099 |
|
| 1100 |
def scheduled_update_task():
|
| 1101 |
+
"""
|
| 1102 |
+
Background daemon thread for periodic incremental data updates.
|
| 1103 |
+
Uses memory-efficient incremental fetching to avoid storage eviction.
|
| 1104 |
+
"""
|
| 1105 |
while True:
|
| 1106 |
time.sleep(UPDATE_INTERVAL)
|
| 1107 |
+
print(f"\n{'='*80}")
|
| 1108 |
+
print(f"🔄 Scheduled incremental update started at {datetime.now().isoformat()}")
|
| 1109 |
+
print(f"{'='*80}")
|
| 1110 |
try:
|
| 1111 |
+
cache_dict = update_all_agents_incremental()
|
| 1112 |
if cache_dict:
|
| 1113 |
save_leaderboard_to_hf(cache_dict)
|
| 1114 |
+
print("✓ Scheduled update completed successfully")
|
| 1115 |
except Exception as e:
|
| 1116 |
print(f"✗ Scheduled update failed: {str(e)}")
|
| 1117 |
+
import traceback
|
| 1118 |
+
traceback.print_exc()
|
| 1119 |
|
| 1120 |
|
| 1121 |
# =============================================================================
|
|
|
|
| 1123 |
# =============================================================================
|
| 1124 |
|
| 1125 |
# Initialize data before creating UI
|
| 1126 |
+
if DEBUG_MODE:
|
| 1127 |
+
print("\n" + "="*80)
|
| 1128 |
+
print("🐛 DEBUG MODE ENABLED 🐛")
|
| 1129 |
+
print("="*80)
|
| 1130 |
+
print("PR retrieval is limited to 10 PRs per query pattern per agent")
|
| 1131 |
+
|
| 1132 |
+
# Show how debug mode was enabled
|
| 1133 |
+
if args.debug:
|
| 1134 |
+
print("Enabled via: command-line flag '--debug'")
|
| 1135 |
+
print("To disable: run without '--debug' flag")
|
| 1136 |
+
else:
|
| 1137 |
+
print("Enabled via: DEBUG_MODE environment variable")
|
| 1138 |
+
print("To disable: run with '--no-debug' flag or unset DEBUG_MODE")
|
| 1139 |
+
|
| 1140 |
+
print("="*80 + "\n")
|
| 1141 |
+
else:
|
| 1142 |
+
print("\n🚀 Starting in PRODUCTION MODE - full PR retrieval enabled")
|
| 1143 |
+
if args.no_debug:
|
| 1144 |
+
print(" (Explicitly set via '--no-debug' flag)")
|
| 1145 |
+
print()
|
| 1146 |
+
|
| 1147 |
initialize_data()
|
| 1148 |
|
| 1149 |
# Start background update thread
|
|
|
|
| 1173 |
value=get_leaderboard_dataframe(),
|
| 1174 |
datatype=LEADERBOARD_COLUMNS,
|
| 1175 |
search_columns=["Agent Name", "Organization"],
|
| 1176 |
+
filter_columns=["Acceptance Rate (%)"]
|
| 1177 |
)
|
| 1178 |
|
| 1179 |
refresh_button.click(
|