update daily msr
Browse files
app.py
CHANGED
|
@@ -465,27 +465,22 @@ def extract_pr_metadata(pr):
|
|
| 465 |
}
|
| 466 |
|
| 467 |
|
| 468 |
-
def
|
| 469 |
"""
|
| 470 |
-
Fetch pull requests
|
| 471 |
-
Returns lightweight metadata instead of full PR objects.
|
| 472 |
-
|
| 473 |
-
This function employs time-based partitioning to navigate GitHub's 1000-result limit per query.
|
| 474 |
-
It searches using multiple query patterns:
|
| 475 |
-
- is:pr author:{identifier} (PRs authored by the bot)
|
| 476 |
-
- is:pr "co-authored-by: {identifier}" (PRs with commits co-authored by the bot)
|
| 477 |
-
- is:pr head:{identifier}/ (PRs with branch names starting with the bot identifier)
|
| 478 |
|
| 479 |
Args:
|
| 480 |
identifier: GitHub username or bot identifier
|
| 481 |
agent_name: Human-readable name of the agent for metadata purposes
|
| 482 |
token: GitHub API token for authentication
|
| 483 |
-
|
| 484 |
-
exclude_dates: Set of date objects to exclude from mining (dates that have already been processed)
|
| 485 |
|
| 486 |
Returns:
|
| 487 |
-
List of dictionaries containing minimal PR metadata
|
| 488 |
"""
|
|
|
|
|
|
|
|
|
|
| 489 |
headers = {'Authorization': f'token {token}'} if token else {}
|
| 490 |
|
| 491 |
# Debug mode: limit PR retrieval for testing
|
|
@@ -508,27 +503,18 @@ def fetch_all_prs_metadata(identifier, agent_name, token=None, start_from_date=N
|
|
| 508 |
# Use a dict to deduplicate PRs by ID
|
| 509 |
prs_by_id = {}
|
| 510 |
|
| 511 |
-
#
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
if start_from_date:
|
| 516 |
-
# Use start_from_date but ensure it's not older than 6 months
|
| 517 |
-
start_date = max(start_from_date, six_months_ago)
|
| 518 |
-
else:
|
| 519 |
-
start_date = six_months_ago
|
| 520 |
-
|
| 521 |
-
# End date is current time
|
| 522 |
-
end_date = current_time
|
| 523 |
|
| 524 |
for query_pattern in query_patterns:
|
| 525 |
print(f"\n🔍 Searching with query: {query_pattern}")
|
| 526 |
-
print(f"
|
| 527 |
|
| 528 |
pattern_start_time = time.time()
|
| 529 |
initial_count = len(prs_by_id)
|
| 530 |
|
| 531 |
-
# Fetch with time partitioning
|
| 532 |
prs_found = fetch_prs_with_time_partition(
|
| 533 |
query_pattern,
|
| 534 |
start_date,
|
|
@@ -550,47 +536,18 @@ def fetch_all_prs_metadata(identifier, agent_name, token=None, start_from_date=N
|
|
| 550 |
# Convert to lightweight metadata
|
| 551 |
all_prs = list(prs_by_id.values())
|
| 552 |
|
| 553 |
-
# Filter out PRs from excluded dates if specified
|
| 554 |
-
if exclude_dates:
|
| 555 |
-
filtered_prs = []
|
| 556 |
-
excluded_count = 0
|
| 557 |
-
for pr in all_prs:
|
| 558 |
-
created_at = pr.get('created_at')
|
| 559 |
-
if created_at:
|
| 560 |
-
try:
|
| 561 |
-
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
|
| 562 |
-
pr_date = dt.date()
|
| 563 |
-
if pr_date not in exclude_dates:
|
| 564 |
-
filtered_prs.append(pr)
|
| 565 |
-
else:
|
| 566 |
-
excluded_count += 1
|
| 567 |
-
except Exception:
|
| 568 |
-
filtered_prs.append(pr) # Keep PRs with unparseable dates
|
| 569 |
-
else:
|
| 570 |
-
filtered_prs.append(pr) # Keep PRs without created_at
|
| 571 |
-
|
| 572 |
-
if excluded_count > 0:
|
| 573 |
-
print(f" ⏭️ Skipped {excluded_count} PRs from already-mined dates")
|
| 574 |
-
all_prs = filtered_prs
|
| 575 |
-
|
| 576 |
if DEBUG_MODE:
|
| 577 |
-
print(f"\n✅ COMPLETE (DEBUG MODE): Found {len(all_prs)} unique PRs for {identifier}")
|
| 578 |
print(f" Note: In production mode, this would fetch ALL PRs")
|
| 579 |
else:
|
| 580 |
-
print(f"\n✅ COMPLETE: Found {len(all_prs)} unique PRs for {identifier}")
|
| 581 |
print(f"📦 Extracting minimal metadata...")
|
| 582 |
|
| 583 |
metadata_list = [extract_pr_metadata(pr) for pr in all_prs]
|
| 584 |
|
| 585 |
-
|
| 586 |
-
import sys
|
| 587 |
-
original_size = sys.getsizeof(str(all_prs))
|
| 588 |
-
metadata_size = sys.getsizeof(str(metadata_list))
|
| 589 |
-
savings_pct = ((original_size - metadata_size) / original_size * 100) if original_size > 0 else 0
|
| 590 |
|
| 591 |
-
print(f"💾 Memory efficiency: {original_size // 1024}KB → {metadata_size // 1024}KB (saved {savings_pct:.1f}%)")
|
| 592 |
|
| 593 |
-
return metadata_list
|
| 594 |
|
| 595 |
|
| 596 |
def calculate_pr_stats_from_metadata(metadata_list):
|
|
@@ -1073,59 +1030,6 @@ def get_daily_files_last_n_months(agent_identifier, n_months=6):
|
|
| 1073 |
return []
|
| 1074 |
|
| 1075 |
|
| 1076 |
-
def get_already_mined_dates(agent_identifier, n_months=6):
|
| 1077 |
-
"""
|
| 1078 |
-
Get set of dates that have already been mined for an agent.
|
| 1079 |
-
|
| 1080 |
-
Args:
|
| 1081 |
-
agent_identifier: GitHub identifier of the agent
|
| 1082 |
-
n_months: Number of months to look back (default: 6)
|
| 1083 |
-
|
| 1084 |
-
Returns:
|
| 1085 |
-
Set of date objects (datetime.date) that already have data files
|
| 1086 |
-
"""
|
| 1087 |
-
try:
|
| 1088 |
-
api = HfApi()
|
| 1089 |
-
|
| 1090 |
-
# Calculate date range
|
| 1091 |
-
today = datetime.now(timezone.utc)
|
| 1092 |
-
n_months_ago = today - timedelta(days=30 * n_months)
|
| 1093 |
-
|
| 1094 |
-
# List all files in the repository
|
| 1095 |
-
files = api.list_repo_files(repo_id=PR_METADATA_REPO, repo_type="dataset")
|
| 1096 |
-
|
| 1097 |
-
# Filter for files in this agent's folder
|
| 1098 |
-
agent_pattern = f"{agent_identifier}/"
|
| 1099 |
-
agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')]
|
| 1100 |
-
|
| 1101 |
-
mined_dates = set()
|
| 1102 |
-
for filename in agent_files:
|
| 1103 |
-
try:
|
| 1104 |
-
# Extract date from filename: [agent_identifier]/YYYY.MM.DD.jsonl
|
| 1105 |
-
parts = filename.split('/')
|
| 1106 |
-
if len(parts) != 2:
|
| 1107 |
-
continue
|
| 1108 |
-
|
| 1109 |
-
date_part = parts[1].replace('.jsonl', '') # Get YYYY.MM.DD
|
| 1110 |
-
date_components = date_part.split('.')
|
| 1111 |
-
if len(date_components) != 3:
|
| 1112 |
-
continue
|
| 1113 |
-
|
| 1114 |
-
file_year, file_month, file_day = map(int, date_components)
|
| 1115 |
-
file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc).date()
|
| 1116 |
-
|
| 1117 |
-
# Only include dates within the last n_months
|
| 1118 |
-
if n_months_ago.date() <= file_date <= today.date():
|
| 1119 |
-
mined_dates.add(file_date)
|
| 1120 |
-
except Exception as e:
|
| 1121 |
-
print(f" Warning: Could not parse date from filename {filename}: {e}")
|
| 1122 |
-
continue
|
| 1123 |
-
|
| 1124 |
-
return mined_dates
|
| 1125 |
-
|
| 1126 |
-
except Exception as e:
|
| 1127 |
-
print(f" Warning: Could not get already-mined dates for {agent_identifier}: {str(e)}")
|
| 1128 |
-
return set()
|
| 1129 |
|
| 1130 |
|
| 1131 |
def fetch_pr_current_status(pr_url, token):
|
|
@@ -1432,101 +1336,98 @@ def save_agent_to_hf(data):
|
|
| 1432 |
|
| 1433 |
def update_all_agents_incremental():
|
| 1434 |
"""
|
| 1435 |
-
|
| 1436 |
|
| 1437 |
Strategy:
|
| 1438 |
-
1.
|
| 1439 |
-
|
| 1440 |
-
|
| 1441 |
-
|
| 1442 |
-
|
| 1443 |
-
6. Construct leaderboard from ALL stored metadata (last 6 months)
|
| 1444 |
-
|
| 1445 |
-
Returns dictionary of all agent data with current stats.
|
| 1446 |
"""
|
| 1447 |
-
|
|
|
|
|
|
|
| 1448 |
|
| 1449 |
-
|
| 1450 |
-
|
| 1451 |
-
if not agents:
|
| 1452 |
-
print("No agents found in HuggingFace dataset")
|
| 1453 |
-
return {}
|
| 1454 |
|
| 1455 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1456 |
|
| 1457 |
-
|
| 1458 |
-
|
| 1459 |
-
|
| 1460 |
-
agent_name = agent.get('agent_name', 'Unknown')
|
| 1461 |
|
| 1462 |
-
|
| 1463 |
-
|
| 1464 |
-
|
|
|
|
| 1465 |
|
| 1466 |
-
|
| 1467 |
-
|
| 1468 |
-
|
| 1469 |
-
|
| 1470 |
-
|
| 1471 |
-
|
| 1472 |
-
|
| 1473 |
-
|
| 1474 |
-
|
| 1475 |
-
|
| 1476 |
-
print(f"
|
| 1477 |
-
|
| 1478 |
-
|
| 1479 |
-
|
|
|
|
|
|
|
|
|
|
| 1480 |
identifier,
|
| 1481 |
-
|
| 1482 |
-
token,
|
| 1483 |
-
start_from_date=None, # Use full 6-month range
|
| 1484 |
-
exclude_dates=None # Re-mine ALL dates (no exclusions)
|
| 1485 |
)
|
| 1486 |
-
|
| 1487 |
-
|
| 1488 |
-
|
| 1489 |
-
|
|
|
|
|
|
|
| 1490 |
identifier,
|
| 1491 |
agent_name,
|
| 1492 |
token,
|
| 1493 |
-
|
| 1494 |
)
|
| 1495 |
|
| 1496 |
-
|
| 1497 |
-
|
| 1498 |
-
|
| 1499 |
-
|
| 1500 |
-
|
| 1501 |
-
|
| 1502 |
-
|
| 1503 |
-
# Load ALL metadata to calculate stats (aggregates entire last 6 months)
|
| 1504 |
-
print(f"📊 Calculating statistics from ALL stored metadata (last 6 months)...")
|
| 1505 |
-
all_metadata = load_pr_metadata()
|
| 1506 |
|
| 1507 |
-
|
| 1508 |
-
agent_metadata = [pr for pr in all_metadata if pr.get('agent_identifier') == identifier]
|
| 1509 |
|
| 1510 |
-
|
| 1511 |
-
|
| 1512 |
-
|
| 1513 |
-
|
| 1514 |
-
|
| 1515 |
-
'agent_name': agent_name,
|
| 1516 |
-
'website': agent.get('website', 'Unknown'),
|
| 1517 |
-
'github_identifier': identifier,
|
| 1518 |
-
**stats
|
| 1519 |
-
}
|
| 1520 |
|
| 1521 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1522 |
|
| 1523 |
-
|
| 1524 |
-
print(f"✗ Error updating {identifier}: {str(e)}")
|
| 1525 |
-
import traceback
|
| 1526 |
-
traceback.print_exc()
|
| 1527 |
-
continue
|
| 1528 |
|
| 1529 |
-
|
|
|
|
|
|
|
|
|
|
| 1530 |
|
| 1531 |
|
| 1532 |
def construct_leaderboard_from_metadata():
|
|
@@ -1568,58 +1469,6 @@ def construct_leaderboard_from_metadata():
|
|
| 1568 |
return cache_dict
|
| 1569 |
|
| 1570 |
|
| 1571 |
-
def initialize_data():
|
| 1572 |
-
"""
|
| 1573 |
-
Initialize data on application startup.
|
| 1574 |
-
Constructs leaderboard from PR metadata only.
|
| 1575 |
-
|
| 1576 |
-
In DEBUG MODE:
|
| 1577 |
-
- If no data available, automatically mine up to 10 PRs per query per agent
|
| 1578 |
-
- Does NOT save to HuggingFace datasets
|
| 1579 |
-
"""
|
| 1580 |
-
print("🚀 Initializing leaderboard data...")
|
| 1581 |
-
|
| 1582 |
-
# Try constructing from PR metadata in SWE-Arena/pr_metadata (fast, memory-efficient)
|
| 1583 |
-
print(f"Checking SWE-Arena/pr_metadata for existing data...")
|
| 1584 |
-
try:
|
| 1585 |
-
cache_dict = construct_leaderboard_from_metadata()
|
| 1586 |
-
# Check if there's actually meaningful data (at least one agent with PRs)
|
| 1587 |
-
has_data = any(entry.get('total_prs', 0) > 0 for entry in cache_dict.values())
|
| 1588 |
-
if cache_dict and has_data:
|
| 1589 |
-
print(f"✓ Found PR metadata in pr_metadata repository")
|
| 1590 |
-
print("✓ Initialized from PR metadata")
|
| 1591 |
-
return
|
| 1592 |
-
else:
|
| 1593 |
-
print(" No meaningful PR metadata found in pr_metadata repository")
|
| 1594 |
-
except Exception as e:
|
| 1595 |
-
print(f" Could not construct from metadata: {e}")
|
| 1596 |
-
|
| 1597 |
-
# If in debug mode and no data available, mine immediately
|
| 1598 |
-
if DEBUG_MODE:
|
| 1599 |
-
print("\n🐛 DEBUG MODE: No data available, mining immediately (up to 10 PRs per query per agent)...")
|
| 1600 |
-
agents = load_agents_from_hf()
|
| 1601 |
-
if agents:
|
| 1602 |
-
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1603 |
-
print("⛏️ Mining GitHub data in debug mode (limited to 10 PRs per query)...")
|
| 1604 |
-
cache_dict = update_all_agents_incremental()
|
| 1605 |
-
print("✓ Debug mining complete (data NOT saved to HuggingFace)")
|
| 1606 |
-
return
|
| 1607 |
-
else:
|
| 1608 |
-
print("⚠️ No agents found. Waiting for first submission...")
|
| 1609 |
-
return
|
| 1610 |
-
|
| 1611 |
-
# Production mode: Fallback to full incremental mining from GitHub
|
| 1612 |
-
agents = load_agents_from_hf()
|
| 1613 |
-
if agents:
|
| 1614 |
-
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1615 |
-
print("⛏️ Mining GitHub data (this may take a while)...")
|
| 1616 |
-
cache_dict = update_all_agents_incremental()
|
| 1617 |
-
return
|
| 1618 |
-
|
| 1619 |
-
# No data available
|
| 1620 |
-
print("⚠️ No data sources available. Waiting for first submission...")
|
| 1621 |
-
|
| 1622 |
-
|
| 1623 |
# =============================================================================
|
| 1624 |
# UI FUNCTIONS
|
| 1625 |
# =============================================================================
|
|
@@ -1792,7 +1641,8 @@ def get_leaderboard_dataframe():
|
|
| 1792 |
def submit_agent(identifier, agent_name, organization, description, website):
|
| 1793 |
"""
|
| 1794 |
Submit a new agent to the leaderboard.
|
| 1795 |
-
Validates input
|
|
|
|
| 1796 |
"""
|
| 1797 |
# Validate required fields
|
| 1798 |
if not identifier or not identifier.strip():
|
|
@@ -1836,64 +1686,8 @@ def submit_agent(identifier, agent_name, organization, description, website):
|
|
| 1836 |
if not save_agent_to_hf(submission):
|
| 1837 |
return "❌ Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1838 |
|
| 1839 |
-
|
| 1840 |
-
|
| 1841 |
-
try:
|
| 1842 |
-
print(f"Fetching PR metadata for {agent_name}...")
|
| 1843 |
-
|
| 1844 |
-
# Fetch lightweight metadata
|
| 1845 |
-
metadata_list = fetch_all_prs_metadata(identifier, agent_name, token)
|
| 1846 |
-
|
| 1847 |
-
if metadata_list:
|
| 1848 |
-
# Save metadata to HuggingFace
|
| 1849 |
-
save_pr_metadata_to_hf(metadata_list, identifier)
|
| 1850 |
-
|
| 1851 |
-
return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1852 |
-
|
| 1853 |
-
except Exception as e:
|
| 1854 |
-
error_msg = f"⚠️ Submitted {agent_name}, but failed to fetch PR data: {str(e)}"
|
| 1855 |
-
print(error_msg)
|
| 1856 |
-
import traceback
|
| 1857 |
-
traceback.print_exc()
|
| 1858 |
-
return error_msg, get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1859 |
-
|
| 1860 |
-
|
| 1861 |
-
# =============================================================================
|
| 1862 |
-
# BACKGROUND TASKS
|
| 1863 |
-
# =============================================================================
|
| 1864 |
-
|
| 1865 |
-
def daily_update_task():
|
| 1866 |
-
"""
|
| 1867 |
-
Daily scheduled task (runs at 12:00 AM UTC) for regular PR mining.
|
| 1868 |
-
|
| 1869 |
-
Strategy:
|
| 1870 |
-
1. Re-mine ALL PRs for all agents within the last 6 months (LEADERBOARD_TIME_FRAME_DAYS)
|
| 1871 |
-
2. Update ALL day files, even if they already exist
|
| 1872 |
-
3. This ensures metadata like 'merged_at' is always current (e.g., PRs merged after initial mining)
|
| 1873 |
-
|
| 1874 |
-
This replaces the old refresh_open_prs approach to ensure no stale data.
|
| 1875 |
-
"""
|
| 1876 |
-
print(f"\n{'='*80}")
|
| 1877 |
-
print(f"🕛 Daily Regular PR Mining started at {datetime.now(timezone.utc).isoformat()}")
|
| 1878 |
-
print(f"{'='*80}")
|
| 1879 |
-
|
| 1880 |
-
try:
|
| 1881 |
-
# Re-mine all PRs for all agents (will update existing day files)
|
| 1882 |
-
print(f"📋 Re-mining all PRs within {LEADERBOARD_TIME_FRAME_DAYS} days for all agents...")
|
| 1883 |
-
cache_dict = update_all_agents_incremental()
|
| 1884 |
-
|
| 1885 |
-
print(f"\n{'='*80}")
|
| 1886 |
-
print(f"📊 Mining Summary:")
|
| 1887 |
-
print(f" Total agents processed: {len(cache_dict)}")
|
| 1888 |
-
print(f" All PR metadata updated (including existing day files)")
|
| 1889 |
-
print(f"{'='*80}")
|
| 1890 |
-
|
| 1891 |
-
print(f"\n✅ Daily Regular PR Mining completed at {datetime.now(timezone.utc).isoformat()}")
|
| 1892 |
-
|
| 1893 |
-
except Exception as e:
|
| 1894 |
-
print(f"✗ Daily mining failed: {str(e)}")
|
| 1895 |
-
import traceback
|
| 1896 |
-
traceback.print_exc()
|
| 1897 |
|
| 1898 |
|
| 1899 |
# =============================================================================
|
|
@@ -1922,19 +1716,17 @@ else:
|
|
| 1922 |
print(" (Explicitly set via '--no-debug' flag)")
|
| 1923 |
print()
|
| 1924 |
|
| 1925 |
-
|
| 1926 |
-
|
| 1927 |
-
# Start APScheduler for daily regular PR mining at 12:00 AM UTC
|
| 1928 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 1929 |
scheduler.add_job(
|
| 1930 |
-
|
| 1931 |
trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
|
| 1932 |
-
id='
|
| 1933 |
-
name='Daily
|
| 1934 |
replace_existing=True
|
| 1935 |
)
|
| 1936 |
scheduler.start()
|
| 1937 |
-
print("✓ Scheduler started: Daily
|
| 1938 |
|
| 1939 |
# Create Gradio interface
|
| 1940 |
with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
|
|
|
|
| 465 |
}
|
| 466 |
|
| 467 |
|
| 468 |
+
def fetch_daily_prs_metadata(identifier, agent_name, token=None, target_date=None):
|
| 469 |
"""
|
| 470 |
+
Fetch pull requests for a specific date (used for daily incremental updates).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
Args:
|
| 473 |
identifier: GitHub username or bot identifier
|
| 474 |
agent_name: Human-readable name of the agent for metadata purposes
|
| 475 |
token: GitHub API token for authentication
|
| 476 |
+
target_date: Date object for which to fetch PRs (defaults to yesterday)
|
|
|
|
| 477 |
|
| 478 |
Returns:
|
| 479 |
+
List of dictionaries containing minimal PR metadata for that date
|
| 480 |
"""
|
| 481 |
+
if target_date is None:
|
| 482 |
+
target_date = (datetime.now(timezone.utc) - timedelta(days=1)).date()
|
| 483 |
+
|
| 484 |
headers = {'Authorization': f'token {token}'} if token else {}
|
| 485 |
|
| 486 |
# Debug mode: limit PR retrieval for testing
|
|
|
|
| 503 |
# Use a dict to deduplicate PRs by ID
|
| 504 |
prs_by_id = {}
|
| 505 |
|
| 506 |
+
# Convert target_date to datetime for API queries
|
| 507 |
+
start_date = datetime.combine(target_date, datetime.min.time()).replace(tzinfo=timezone.utc)
|
| 508 |
+
end_date = datetime.combine(target_date, datetime.max.time()).replace(tzinfo=timezone.utc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
|
| 510 |
for query_pattern in query_patterns:
|
| 511 |
print(f"\n🔍 Searching with query: {query_pattern}")
|
| 512 |
+
print(f" Date: {target_date.strftime('%Y-%m-%d')}")
|
| 513 |
|
| 514 |
pattern_start_time = time.time()
|
| 515 |
initial_count = len(prs_by_id)
|
| 516 |
|
| 517 |
+
# Fetch with time partitioning (for single day)
|
| 518 |
prs_found = fetch_prs_with_time_partition(
|
| 519 |
query_pattern,
|
| 520 |
start_date,
|
|
|
|
| 536 |
# Convert to lightweight metadata
|
| 537 |
all_prs = list(prs_by_id.values())
|
| 538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
if DEBUG_MODE:
|
| 540 |
+
print(f"\n✅ COMPLETE (DEBUG MODE): Found {len(all_prs)} unique PRs for {identifier} on {target_date}")
|
| 541 |
print(f" Note: In production mode, this would fetch ALL PRs")
|
| 542 |
else:
|
| 543 |
+
print(f"\n✅ COMPLETE: Found {len(all_prs)} unique PRs for {identifier} on {target_date}")
|
| 544 |
print(f"📦 Extracting minimal metadata...")
|
| 545 |
|
| 546 |
metadata_list = [extract_pr_metadata(pr) for pr in all_prs]
|
| 547 |
|
| 548 |
+
return metadata_list
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
|
|
|
|
| 550 |
|
|
|
|
| 551 |
|
| 552 |
|
| 553 |
def calculate_pr_stats_from_metadata(metadata_list):
|
|
|
|
| 1030 |
return []
|
| 1031 |
|
| 1032 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1033 |
|
| 1034 |
|
| 1035 |
def fetch_pr_current_status(pr_url, token):
|
|
|
|
| 1336 |
|
| 1337 |
def update_all_agents_incremental():
|
| 1338 |
"""
|
| 1339 |
+
Daily incremental update - refreshes open PRs and fetches new PRs for all agents.
|
| 1340 |
|
| 1341 |
Strategy:
|
| 1342 |
+
1. Refresh status of all open PRs from the last LEADERBOARD_TIME_FRAME_DAYS - 1 days
|
| 1343 |
+
(to check if any have been merged or closed)
|
| 1344 |
+
2. Fetch new PRs created yesterday (from 12:00 AM to 11:59:59 PM yesterday)
|
| 1345 |
+
3. Update the corresponding daily files (YYYY.MM.DD.jsonl)
|
| 1346 |
+
4. This runs daily to keep data fresh without re-mining everything
|
|
|
|
|
|
|
|
|
|
| 1347 |
"""
|
| 1348 |
+
print(f"\n{'='*80}")
|
| 1349 |
+
print(f"🕛 Daily Incremental PR Mining started at {datetime.now(timezone.utc).isoformat()}")
|
| 1350 |
+
print(f"{'='*80}")
|
| 1351 |
|
| 1352 |
+
try:
|
| 1353 |
+
token = get_github_token()
|
|
|
|
|
|
|
|
|
|
| 1354 |
|
| 1355 |
+
# Load agent metadata from HuggingFace
|
| 1356 |
+
agents = load_agents_from_hf()
|
| 1357 |
+
if not agents:
|
| 1358 |
+
print("No agents found in HuggingFace dataset")
|
| 1359 |
+
return
|
| 1360 |
|
| 1361 |
+
# Calculate yesterday's date
|
| 1362 |
+
yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).date()
|
| 1363 |
+
print(f"\n📅 Daily Incremental Update for {yesterday.strftime('%Y-%m-%d')} for all agents...")
|
|
|
|
| 1364 |
|
| 1365 |
+
agents_processed = 0
|
| 1366 |
+
total_refreshed = 0
|
| 1367 |
+
total_refreshed_updated = 0
|
| 1368 |
+
total_new_prs = 0
|
| 1369 |
|
| 1370 |
+
# Update each agent
|
| 1371 |
+
for agent in agents:
|
| 1372 |
+
identifier = agent.get('github_identifier')
|
| 1373 |
+
agent_name = agent.get('agent_name', 'Unknown')
|
| 1374 |
+
|
| 1375 |
+
if not identifier:
|
| 1376 |
+
print(f"Warning: Skipping agent without identifier: {agent}")
|
| 1377 |
+
continue
|
| 1378 |
+
|
| 1379 |
+
try:
|
| 1380 |
+
print(f"\n{'='*80}")
|
| 1381 |
+
print(f"Processing: {agent_name} ({identifier})")
|
| 1382 |
+
print(f"{'='*80}")
|
| 1383 |
+
|
| 1384 |
+
# STEP 1: Refresh all open PRs from the last LEADERBOARD_TIME_FRAME_DAYS - 1 days
|
| 1385 |
+
print(f"\n🔄 Step 1: Refreshing open PRs (last {LEADERBOARD_TIME_FRAME_DAYS - 1} days)...")
|
| 1386 |
+
refreshed_checked, refreshed_updated = refresh_open_prs_for_agent(
|
| 1387 |
identifier,
|
| 1388 |
+
token
|
|
|
|
|
|
|
|
|
|
| 1389 |
)
|
| 1390 |
+
total_refreshed += refreshed_checked
|
| 1391 |
+
total_refreshed_updated += refreshed_updated
|
| 1392 |
+
|
| 1393 |
+
# STEP 2: Fetch new PRs created yesterday (12:00 AM to 11:59:59 PM yesterday)
|
| 1394 |
+
print(f"\n📥 Step 2: Fetching new PRs created on {yesterday.strftime('%Y-%m-%d')} (12:00 AM to 11:59:59 PM)...")
|
| 1395 |
+
new_metadata = fetch_daily_prs_metadata(
|
| 1396 |
identifier,
|
| 1397 |
agent_name,
|
| 1398 |
token,
|
| 1399 |
+
target_date=yesterday
|
| 1400 |
)
|
| 1401 |
|
| 1402 |
+
if new_metadata:
|
| 1403 |
+
# Save new metadata to HuggingFace
|
| 1404 |
+
print(f"💾 Saving {len(new_metadata)} new PRs from {yesterday}...")
|
| 1405 |
+
save_pr_metadata_to_hf(new_metadata, identifier)
|
| 1406 |
+
total_new_prs += len(new_metadata)
|
| 1407 |
+
else:
|
| 1408 |
+
print(f" No new PRs found created on {yesterday}")
|
|
|
|
|
|
|
|
|
|
| 1409 |
|
| 1410 |
+
agents_processed += 1
|
|
|
|
| 1411 |
|
| 1412 |
+
except Exception as e:
|
| 1413 |
+
print(f"✗ Error updating {identifier}: {str(e)}")
|
| 1414 |
+
import traceback
|
| 1415 |
+
traceback.print_exc()
|
| 1416 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1417 |
|
| 1418 |
+
print(f"\n{'='*80}")
|
| 1419 |
+
print(f"📊 Mining Summary:")
|
| 1420 |
+
print(f" Total agents processed: {agents_processed}")
|
| 1421 |
+
print(f" Open PRs refreshed: {total_refreshed} checked, {total_refreshed_updated} updated")
|
| 1422 |
+
print(f" New PRs added (from yesterday): {total_new_prs}")
|
| 1423 |
+
print(f"{'='*80}")
|
| 1424 |
|
| 1425 |
+
print(f"\n✅ Daily Incremental PR Mining completed at {datetime.now(timezone.utc).isoformat()}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1426 |
|
| 1427 |
+
except Exception as e:
|
| 1428 |
+
print(f"✗ Daily mining failed: {str(e)}")
|
| 1429 |
+
import traceback
|
| 1430 |
+
traceback.print_exc()
|
| 1431 |
|
| 1432 |
|
| 1433 |
def construct_leaderboard_from_metadata():
|
|
|
|
| 1469 |
return cache_dict
|
| 1470 |
|
| 1471 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1472 |
# =============================================================================
|
| 1473 |
# UI FUNCTIONS
|
| 1474 |
# =============================================================================
|
|
|
|
| 1641 |
def submit_agent(identifier, agent_name, organization, description, website):
|
| 1642 |
"""
|
| 1643 |
Submit a new agent to the leaderboard.
|
| 1644 |
+
Validates input and saves submission.
|
| 1645 |
+
PR data will be populated by the daily incremental update.
|
| 1646 |
"""
|
| 1647 |
# Validate required fields
|
| 1648 |
if not identifier or not identifier.strip():
|
|
|
|
| 1686 |
if not save_agent_to_hf(submission):
|
| 1687 |
return "❌ Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1688 |
|
| 1689 |
+
success_msg = f"✅ Successfully submitted {agent_name}!\n\nPR data will be populated by the daily incremental update (runs at 12:00 AM UTC)."
|
| 1690 |
+
return success_msg, get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1691 |
|
| 1692 |
|
| 1693 |
# =============================================================================
|
|
|
|
| 1716 |
print(" (Explicitly set via '--no-debug' flag)")
|
| 1717 |
print()
|
| 1718 |
|
| 1719 |
+
# Start APScheduler for daily incremental PR mining at 12:00 AM UTC
|
|
|
|
|
|
|
| 1720 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 1721 |
scheduler.add_job(
|
| 1722 |
+
update_all_agents_incremental,
|
| 1723 |
trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
|
| 1724 |
+
id='daily_incremental_pr_mining',
|
| 1725 |
+
name='Daily Incremental PR Mining',
|
| 1726 |
replace_existing=True
|
| 1727 |
)
|
| 1728 |
scheduler.start()
|
| 1729 |
+
print("✓ Scheduler started: Daily Incremental PR Mining at 12:00 AM UTC")
|
| 1730 |
|
| 1731 |
# Create Gradio interface
|
| 1732 |
with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
|