add
Browse files
README.md
CHANGED
|
@@ -59,13 +59,13 @@ We search GitHub using multiple query patterns to catch all PRs associated with
|
|
| 59 |
The leaderboard refreshes automatically every day at 12:00 AM UTC.
|
| 60 |
|
| 61 |
**Community Submissions**
|
| 62 |
-
Anyone can submit a coding agent to track via the leaderboard. We store agent metadata in Hugging Face datasets (`SWE-Arena/swe_agents`) and
|
| 63 |
|
| 64 |
## Using the Leaderboard
|
| 65 |
|
| 66 |
### Just Browsing?
|
| 67 |
Head to the Leaderboard tab where you'll find:
|
| 68 |
-
- **Searchable table**: Search by agent name or
|
| 69 |
- **Filterable columns**: Filter by acceptance rate to find top performers
|
| 70 |
- **Monthly charts**: Scroll down to see acceptance rate trends and PR activity over time
|
| 71 |
|
|
|
|
| 59 |
The leaderboard refreshes automatically every day at 12:00 AM UTC.
|
| 60 |
|
| 61 |
**Community Submissions**
|
| 62 |
+
Anyone can submit a coding agent to track via the leaderboard. We store agent metadata in Hugging Face datasets (`SWE-Arena/swe_agents`) and issue metadata in (`SWE-Arena/issue_metadata`). The leaderboard is dynamically constructed from the issue metadata. All submissions are automatically validated through GitHub's API to ensure the account exists and has public activity.
|
| 63 |
|
| 64 |
## Using the Leaderboard
|
| 65 |
|
| 66 |
### Just Browsing?
|
| 67 |
Head to the Leaderboard tab where you'll find:
|
| 68 |
+
- **Searchable table**: Search by agent name or website
|
| 69 |
- **Filterable columns**: Filter by acceptance rate to find top performers
|
| 70 |
- **Monthly charts**: Scroll down to see acceptance rate trends and PR activity over time
|
| 71 |
|
app.py
CHANGED
|
@@ -44,16 +44,14 @@ else:
|
|
| 44 |
DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
|
| 45 |
|
| 46 |
# In-memory cache for debug mode (data persists during session but NOT saved to HF)
|
| 47 |
-
DEBUG_LEADERBOARD_CACHE = {}
|
| 48 |
DEBUG_PR_METADATA_CACHE = defaultdict(list)
|
| 49 |
|
| 50 |
AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
|
| 51 |
-
LEADERBOARD_REPO = "SWE-Arena/pr_leaderboard"
|
| 52 |
PR_METADATA_REPO = "SWE-Arena/pr_metadata" # HuggingFace dataset for PR metadata
|
| 53 |
|
| 54 |
LEADERBOARD_COLUMNS = [
|
| 55 |
("Agent Name", "string"),
|
| 56 |
-
("
|
| 57 |
("Total PRs", "number"),
|
| 58 |
("Merged PRs", "number"),
|
| 59 |
("Acceptance Rate (%)", "number"),
|
|
@@ -1178,34 +1176,6 @@ def load_agents_from_hf():
|
|
| 1178 |
return None
|
| 1179 |
|
| 1180 |
|
| 1181 |
-
def load_leaderboard_dataset():
|
| 1182 |
-
"""Load leaderboard data from HuggingFace dataset for current year.
|
| 1183 |
-
In debug mode, loads from in-memory cache if available."""
|
| 1184 |
-
# In debug mode, check in-memory cache first
|
| 1185 |
-
if DEBUG_MODE and DEBUG_LEADERBOARD_CACHE:
|
| 1186 |
-
print(f"🐛 DEBUG MODE: Loading leaderboard from in-memory cache ({len(DEBUG_LEADERBOARD_CACHE)} entries)")
|
| 1187 |
-
return list(DEBUG_LEADERBOARD_CACHE.values())
|
| 1188 |
-
|
| 1189 |
-
try:
|
| 1190 |
-
year = datetime.now().year
|
| 1191 |
-
filename = f"{year}.csv"
|
| 1192 |
-
|
| 1193 |
-
# Try to download the CSV file for current year
|
| 1194 |
-
file_path = hf_hub_download(
|
| 1195 |
-
repo_id=LEADERBOARD_REPO,
|
| 1196 |
-
filename=filename,
|
| 1197 |
-
repo_type="dataset"
|
| 1198 |
-
)
|
| 1199 |
-
|
| 1200 |
-
# Load CSV into list of dicts
|
| 1201 |
-
df = pd.read_csv(file_path)
|
| 1202 |
-
data = df.to_dict('records')
|
| 1203 |
-
print(f"✓ Loaded {len(data)} entries from {filename}")
|
| 1204 |
-
return data
|
| 1205 |
-
|
| 1206 |
-
except Exception as e:
|
| 1207 |
-
print(f"Could not load leaderboard dataset for year {datetime.now().year}: {str(e)}")
|
| 1208 |
-
return None
|
| 1209 |
|
| 1210 |
|
| 1211 |
def get_hf_token():
|
|
@@ -1297,56 +1267,6 @@ def save_agent_to_hf(data):
|
|
| 1297 |
return False
|
| 1298 |
|
| 1299 |
|
| 1300 |
-
def save_leaderboard_to_hf(cache_dict):
|
| 1301 |
-
"""Save complete leaderboard to HuggingFace dataset as CSV.
|
| 1302 |
-
In debug mode, saves to in-memory cache only."""
|
| 1303 |
-
# Skip saving in debug mode - use in-memory cache instead
|
| 1304 |
-
if DEBUG_MODE:
|
| 1305 |
-
global DEBUG_LEADERBOARD_CACHE
|
| 1306 |
-
# Filter out agents with zero total PRs
|
| 1307 |
-
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_prs', 0) > 0}
|
| 1308 |
-
DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
|
| 1309 |
-
data_list = dict_to_cache(filtered_cache_dict)
|
| 1310 |
-
print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
|
| 1311 |
-
return True
|
| 1312 |
-
|
| 1313 |
-
try:
|
| 1314 |
-
token = get_hf_token()
|
| 1315 |
-
if not token:
|
| 1316 |
-
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
|
| 1317 |
-
|
| 1318 |
-
# Filter out agents with zero total PRs
|
| 1319 |
-
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_prs', 0) > 0}
|
| 1320 |
-
# Convert to DataFrame
|
| 1321 |
-
data_list = dict_to_cache(filtered_cache_dict)
|
| 1322 |
-
df = pd.DataFrame(data_list)
|
| 1323 |
-
|
| 1324 |
-
# Save to CSV with year as filename
|
| 1325 |
-
year = datetime.now().year
|
| 1326 |
-
filename = f"{year}.csv"
|
| 1327 |
-
df.to_csv(filename, index=False)
|
| 1328 |
-
|
| 1329 |
-
try:
|
| 1330 |
-
# Upload to HuggingFace
|
| 1331 |
-
api = HfApi()
|
| 1332 |
-
upload_with_retry(
|
| 1333 |
-
api=api,
|
| 1334 |
-
path_or_fileobj=filename,
|
| 1335 |
-
path_in_repo=filename,
|
| 1336 |
-
repo_id=LEADERBOARD_REPO,
|
| 1337 |
-
repo_type="dataset",
|
| 1338 |
-
token=token
|
| 1339 |
-
)
|
| 1340 |
-
print(f"✓ Saved leaderboard to HuggingFace as {filename} ({len(data_list)} entries)")
|
| 1341 |
-
return True
|
| 1342 |
-
finally:
|
| 1343 |
-
# Always clean up local file, even if upload fails
|
| 1344 |
-
if os.path.exists(filename):
|
| 1345 |
-
os.remove(filename)
|
| 1346 |
-
|
| 1347 |
-
except Exception as e:
|
| 1348 |
-
print(f"✗ Error saving leaderboard: {str(e)}")
|
| 1349 |
-
return False
|
| 1350 |
|
| 1351 |
|
| 1352 |
# =============================================================================
|
|
@@ -1436,7 +1356,7 @@ def update_all_agents_incremental():
|
|
| 1436 |
# Merge metadata with stats
|
| 1437 |
cache_dict[identifier] = {
|
| 1438 |
'agent_name': agent_name,
|
| 1439 |
-
'
|
| 1440 |
'github_identifier': identifier,
|
| 1441 |
**stats
|
| 1442 |
}
|
|
@@ -1485,7 +1405,7 @@ def construct_leaderboard_from_metadata():
|
|
| 1485 |
|
| 1486 |
cache_dict[identifier] = {
|
| 1487 |
'agent_name': agent_name,
|
| 1488 |
-
'
|
| 1489 |
'github_identifier': identifier,
|
| 1490 |
**stats
|
| 1491 |
}
|
|
@@ -1496,7 +1416,7 @@ def construct_leaderboard_from_metadata():
|
|
| 1496 |
def initialize_data():
|
| 1497 |
"""
|
| 1498 |
Initialize data on application startup.
|
| 1499 |
-
|
| 1500 |
|
| 1501 |
In DEBUG MODE:
|
| 1502 |
- If no data available, automatically mine up to 10 PRs per query per agent
|
|
@@ -1506,26 +1426,15 @@ def initialize_data():
|
|
| 1506 |
|
| 1507 |
year = datetime.now().year
|
| 1508 |
|
| 1509 |
-
#
|
| 1510 |
-
print(f"
|
| 1511 |
-
leaderboard_data = load_leaderboard_dataset()
|
| 1512 |
-
if leaderboard_data:
|
| 1513 |
-
print(f"✓ Found and loaded {year}.csv from leaderboard repository")
|
| 1514 |
-
print("✓ Initialized from leaderboard dataset")
|
| 1515 |
-
return
|
| 1516 |
-
|
| 1517 |
-
print(f" {year}.csv not found in leaderboard repository")
|
| 1518 |
-
|
| 1519 |
-
# STEP 2: Try constructing from PR metadata in SWE-Arena/pr_metadata (fast, memory-efficient)
|
| 1520 |
-
print(f"STEP 2: Checking SWE-Arena/pr_metadata for existing data...")
|
| 1521 |
try:
|
| 1522 |
cache_dict = construct_leaderboard_from_metadata()
|
| 1523 |
# Check if there's actually meaningful data (at least one agent with PRs)
|
| 1524 |
has_data = any(entry.get('total_prs', 0) > 0 for entry in cache_dict.values())
|
| 1525 |
if cache_dict and has_data:
|
| 1526 |
print(f"✓ Found PR metadata in pr_metadata repository")
|
| 1527 |
-
|
| 1528 |
-
print("✓ Initialized from PR metadata and saved as CSV")
|
| 1529 |
return
|
| 1530 |
else:
|
| 1531 |
print(" No meaningful PR metadata found in pr_metadata repository")
|
|
@@ -1540,10 +1449,7 @@ def initialize_data():
|
|
| 1540 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1541 |
print("⛏️ Mining GitHub data in debug mode (limited to 10 PRs per query)...")
|
| 1542 |
cache_dict = update_all_agents_incremental()
|
| 1543 |
-
|
| 1544 |
-
# In debug mode, this won't actually save to HF
|
| 1545 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1546 |
-
print("✓ Debug mining complete (data NOT saved to HuggingFace)")
|
| 1547 |
return
|
| 1548 |
else:
|
| 1549 |
print("⚠️ No agents found. Waiting for first submission...")
|
|
@@ -1555,8 +1461,6 @@ def initialize_data():
|
|
| 1555 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1556 |
print("⛏️ Mining GitHub data (this may take a while)...")
|
| 1557 |
cache_dict = update_all_agents_incremental()
|
| 1558 |
-
if cache_dict:
|
| 1559 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1560 |
return
|
| 1561 |
|
| 1562 |
# No data available
|
|
@@ -1689,25 +1593,25 @@ def create_monthly_metrics_plot():
|
|
| 1689 |
|
| 1690 |
def get_leaderboard_dataframe():
|
| 1691 |
"""
|
| 1692 |
-
|
| 1693 |
Returns formatted DataFrame sorted by acceptance rate.
|
| 1694 |
"""
|
| 1695 |
-
#
|
| 1696 |
-
|
| 1697 |
|
| 1698 |
-
if not
|
| 1699 |
# Return empty DataFrame with correct columns if no data
|
| 1700 |
column_names = [col[0] for col in LEADERBOARD_COLUMNS]
|
| 1701 |
return pd.DataFrame(columns=column_names)
|
| 1702 |
|
| 1703 |
rows = []
|
| 1704 |
-
for data in
|
| 1705 |
# Filter out agents with zero total PRs
|
| 1706 |
if data.get('total_prs', 0) > 0:
|
| 1707 |
# Only include display-relevant fields
|
| 1708 |
rows.append([
|
| 1709 |
data.get('agent_name', 'Unknown'),
|
| 1710 |
-
data.get('
|
| 1711 |
data.get('total_prs', 0),
|
| 1712 |
data.get('merged', 0),
|
| 1713 |
data.get('acceptance_rate', 0.0),
|
|
@@ -1791,21 +1695,6 @@ def submit_agent(identifier, agent_name, organization, description, website):
|
|
| 1791 |
# Save metadata to HuggingFace
|
| 1792 |
save_pr_metadata_to_hf(metadata_list, identifier)
|
| 1793 |
|
| 1794 |
-
# Calculate stats from metadata
|
| 1795 |
-
stats = calculate_pr_stats_from_metadata(metadata_list)
|
| 1796 |
-
|
| 1797 |
-
# Load current leaderboard
|
| 1798 |
-
leaderboard_data = load_leaderboard_dataset()
|
| 1799 |
-
if not leaderboard_data:
|
| 1800 |
-
leaderboard_data = []
|
| 1801 |
-
|
| 1802 |
-
# Convert to dict for easy updating
|
| 1803 |
-
cache_dict = {entry['github_identifier']: entry for entry in leaderboard_data}
|
| 1804 |
-
cache_dict[identifier] = {**submission, **stats}
|
| 1805 |
-
|
| 1806 |
-
# Save to HuggingFace
|
| 1807 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1808 |
-
|
| 1809 |
return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1810 |
|
| 1811 |
except Exception as e:
|
|
@@ -1873,15 +1762,6 @@ def daily_update_task():
|
|
| 1873 |
print(f" PRs updated (closed/merged): {total_updated}")
|
| 1874 |
print(f"{'='*80}")
|
| 1875 |
|
| 1876 |
-
# Reconstruct leaderboard from all stored metadata
|
| 1877 |
-
print(f"\n📈 Rebuilding leaderboard from refreshed data...")
|
| 1878 |
-
cache_dict = construct_leaderboard_from_metadata()
|
| 1879 |
-
|
| 1880 |
-
if cache_dict:
|
| 1881 |
-
# Save leaderboard
|
| 1882 |
-
save_leaderboard_to_hf(cache_dict)
|
| 1883 |
-
print("✓ Leaderboard updated successfully")
|
| 1884 |
-
|
| 1885 |
print(f"\n✅ Daily update completed at {datetime.now(timezone.utc).isoformat()}")
|
| 1886 |
|
| 1887 |
except Exception as e:
|
|
@@ -1943,7 +1823,7 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
|
|
| 1943 |
leaderboard_table = Leaderboard(
|
| 1944 |
value=get_leaderboard_dataframe(),
|
| 1945 |
datatype=LEADERBOARD_COLUMNS,
|
| 1946 |
-
search_columns=["Agent Name", "
|
| 1947 |
filter_columns=["Acceptance Rate (%)"]
|
| 1948 |
)
|
| 1949 |
|
|
|
|
| 44 |
DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
|
| 45 |
|
| 46 |
# In-memory cache for debug mode (data persists during session but NOT saved to HF)
|
|
|
|
| 47 |
DEBUG_PR_METADATA_CACHE = defaultdict(list)
|
| 48 |
|
| 49 |
AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
|
|
|
|
| 50 |
PR_METADATA_REPO = "SWE-Arena/pr_metadata" # HuggingFace dataset for PR metadata
|
| 51 |
|
| 52 |
LEADERBOARD_COLUMNS = [
|
| 53 |
("Agent Name", "string"),
|
| 54 |
+
("Website", "string"),
|
| 55 |
("Total PRs", "number"),
|
| 56 |
("Merged PRs", "number"),
|
| 57 |
("Acceptance Rate (%)", "number"),
|
|
|
|
| 1176 |
return None
|
| 1177 |
|
| 1178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1179 |
|
| 1180 |
|
| 1181 |
def get_hf_token():
|
|
|
|
| 1267 |
return False
|
| 1268 |
|
| 1269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1270 |
|
| 1271 |
|
| 1272 |
# =============================================================================
|
|
|
|
| 1356 |
# Merge metadata with stats
|
| 1357 |
cache_dict[identifier] = {
|
| 1358 |
'agent_name': agent_name,
|
| 1359 |
+
'website': agent.get('website', 'Unknown'),
|
| 1360 |
'github_identifier': identifier,
|
| 1361 |
**stats
|
| 1362 |
}
|
|
|
|
| 1405 |
|
| 1406 |
cache_dict[identifier] = {
|
| 1407 |
'agent_name': agent_name,
|
| 1408 |
+
'website': agent.get('website', 'Unknown'),
|
| 1409 |
'github_identifier': identifier,
|
| 1410 |
**stats
|
| 1411 |
}
|
|
|
|
| 1416 |
def initialize_data():
|
| 1417 |
"""
|
| 1418 |
Initialize data on application startup.
|
| 1419 |
+
Constructs leaderboard from PR metadata only.
|
| 1420 |
|
| 1421 |
In DEBUG MODE:
|
| 1422 |
- If no data available, automatically mine up to 10 PRs per query per agent
|
|
|
|
| 1426 |
|
| 1427 |
year = datetime.now().year
|
| 1428 |
|
| 1429 |
+
# Try constructing from PR metadata in SWE-Arena/pr_metadata (fast, memory-efficient)
|
| 1430 |
+
print(f"Checking SWE-Arena/pr_metadata for existing data...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1431 |
try:
|
| 1432 |
cache_dict = construct_leaderboard_from_metadata()
|
| 1433 |
# Check if there's actually meaningful data (at least one agent with PRs)
|
| 1434 |
has_data = any(entry.get('total_prs', 0) > 0 for entry in cache_dict.values())
|
| 1435 |
if cache_dict and has_data:
|
| 1436 |
print(f"✓ Found PR metadata in pr_metadata repository")
|
| 1437 |
+
print("✓ Initialized from PR metadata")
|
|
|
|
| 1438 |
return
|
| 1439 |
else:
|
| 1440 |
print(" No meaningful PR metadata found in pr_metadata repository")
|
|
|
|
| 1449 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1450 |
print("⛏️ Mining GitHub data in debug mode (limited to 10 PRs per query)...")
|
| 1451 |
cache_dict = update_all_agents_incremental()
|
| 1452 |
+
print("✓ Debug mining complete (data NOT saved to HuggingFace)")
|
|
|
|
|
|
|
|
|
|
| 1453 |
return
|
| 1454 |
else:
|
| 1455 |
print("⚠️ No agents found. Waiting for first submission...")
|
|
|
|
| 1461 |
print(f"✓ Loaded {len(agents)} agents from HuggingFace")
|
| 1462 |
print("⛏️ Mining GitHub data (this may take a while)...")
|
| 1463 |
cache_dict = update_all_agents_incremental()
|
|
|
|
|
|
|
| 1464 |
return
|
| 1465 |
|
| 1466 |
# No data available
|
|
|
|
| 1593 |
|
| 1594 |
def get_leaderboard_dataframe():
|
| 1595 |
"""
|
| 1596 |
+
Construct leaderboard data from PR metadata and convert to pandas DataFrame for display.
|
| 1597 |
Returns formatted DataFrame sorted by acceptance rate.
|
| 1598 |
"""
|
| 1599 |
+
# Construct leaderboard from PR metadata
|
| 1600 |
+
cache_dict = construct_leaderboard_from_metadata()
|
| 1601 |
|
| 1602 |
+
if not cache_dict:
|
| 1603 |
# Return empty DataFrame with correct columns if no data
|
| 1604 |
column_names = [col[0] for col in LEADERBOARD_COLUMNS]
|
| 1605 |
return pd.DataFrame(columns=column_names)
|
| 1606 |
|
| 1607 |
rows = []
|
| 1608 |
+
for identifier, data in cache_dict.items():
|
| 1609 |
# Filter out agents with zero total PRs
|
| 1610 |
if data.get('total_prs', 0) > 0:
|
| 1611 |
# Only include display-relevant fields
|
| 1612 |
rows.append([
|
| 1613 |
data.get('agent_name', 'Unknown'),
|
| 1614 |
+
data.get('website', 'Unknown'),
|
| 1615 |
data.get('total_prs', 0),
|
| 1616 |
data.get('merged', 0),
|
| 1617 |
data.get('acceptance_rate', 0.0),
|
|
|
|
| 1695 |
# Save metadata to HuggingFace
|
| 1696 |
save_pr_metadata_to_hf(metadata_list, identifier)
|
| 1697 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1698 |
return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1699 |
|
| 1700 |
except Exception as e:
|
|
|
|
| 1762 |
print(f" PRs updated (closed/merged): {total_updated}")
|
| 1763 |
print(f"{'='*80}")
|
| 1764 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1765 |
print(f"\n✅ Daily update completed at {datetime.now(timezone.utc).isoformat()}")
|
| 1766 |
|
| 1767 |
except Exception as e:
|
|
|
|
| 1823 |
leaderboard_table = Leaderboard(
|
| 1824 |
value=get_leaderboard_dataframe(),
|
| 1825 |
datatype=LEADERBOARD_COLUMNS,
|
| 1826 |
+
search_columns=["Agent Name", "Website"],
|
| 1827 |
filter_columns=["Acceptance Rate (%)"]
|
| 1828 |
)
|
| 1829 |
|
msr.py
CHANGED
|
@@ -64,11 +64,9 @@ else:
|
|
| 64 |
# Constants (match app.py)
|
| 65 |
# =============================================================================
|
| 66 |
|
| 67 |
-
DEBUG_LEADERBOARD_CACHE = {}
|
| 68 |
DEBUG_PR_METADATA_CACHE = defaultdict(list)
|
| 69 |
|
| 70 |
AGENTS_REPO = "SWE-Arena/swe_agents"
|
| 71 |
-
LEADERBOARD_REPO = "SWE-Arena/pr_leaderboard"
|
| 72 |
PR_METADATA_REPO = "SWE-Arena/pr_metadata"
|
| 73 |
|
| 74 |
|
|
@@ -622,45 +620,6 @@ def get_already_mined_dates(agent_identifier, n_months=6):
|
|
| 622 |
return set()
|
| 623 |
|
| 624 |
|
| 625 |
-
def save_leaderboard_to_hf(cache_dict):
|
| 626 |
-
if DEBUG_MODE:
|
| 627 |
-
global DEBUG_LEADERBOARD_CACHE
|
| 628 |
-
# Filter out agents with zero total PRs
|
| 629 |
-
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_prs', 0) > 0}
|
| 630 |
-
DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
|
| 631 |
-
data_list = dict_to_cache(filtered_cache_dict)
|
| 632 |
-
print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
|
| 633 |
-
return True
|
| 634 |
-
try:
|
| 635 |
-
token = get_hf_token()
|
| 636 |
-
if not token:
|
| 637 |
-
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your environment.")
|
| 638 |
-
# Filter out agents with zero total PRs
|
| 639 |
-
filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_prs', 0) > 0}
|
| 640 |
-
data_list = dict_to_cache(filtered_cache_dict)
|
| 641 |
-
df = pd.DataFrame(data_list)
|
| 642 |
-
year = datetime.now().year
|
| 643 |
-
filename = f"{year}.csv"
|
| 644 |
-
df.to_csv(filename, index=False)
|
| 645 |
-
api = HfApi()
|
| 646 |
-
try:
|
| 647 |
-
upload_with_retry(
|
| 648 |
-
api=api,
|
| 649 |
-
path_or_fileobj=filename,
|
| 650 |
-
path_in_repo=filename,
|
| 651 |
-
repo_id=LEADERBOARD_REPO,
|
| 652 |
-
repo_type="dataset",
|
| 653 |
-
token=token
|
| 654 |
-
)
|
| 655 |
-
print(f"✓ Saved leaderboard to HuggingFace as {filename} ({len(data_list)} entries)")
|
| 656 |
-
return True
|
| 657 |
-
finally:
|
| 658 |
-
# Always clean up local file, even if upload fails
|
| 659 |
-
if os.path.exists(filename):
|
| 660 |
-
os.remove(filename)
|
| 661 |
-
except Exception as e:
|
| 662 |
-
print(f"✗ Error saving leaderboard: {str(e)}")
|
| 663 |
-
return False
|
| 664 |
|
| 665 |
|
| 666 |
def calculate_pr_stats_from_metadata(metadata_list):
|
|
@@ -745,7 +704,7 @@ def update_all_agents_incremental():
|
|
| 745 |
stats = calculate_pr_stats_from_metadata(agent_metadata)
|
| 746 |
cache_dict[identifier] = {
|
| 747 |
'agent_name': agent_name,
|
| 748 |
-
'
|
| 749 |
'github_identifier': identifier,
|
| 750 |
**stats
|
| 751 |
}
|
|
@@ -761,8 +720,6 @@ def update_all_agents_incremental():
|
|
| 761 |
def run_once():
|
| 762 |
print("\n🚀 Immediate mining run started")
|
| 763 |
cache_dict = update_all_agents_incremental()
|
| 764 |
-
if cache_dict:
|
| 765 |
-
save_leaderboard_to_hf(cache_dict)
|
| 766 |
print("✅ Immediate mining run completed\n")
|
| 767 |
|
| 768 |
|
|
|
|
| 64 |
# Constants (match app.py)
|
| 65 |
# =============================================================================
|
| 66 |
|
|
|
|
| 67 |
DEBUG_PR_METADATA_CACHE = defaultdict(list)
|
| 68 |
|
| 69 |
AGENTS_REPO = "SWE-Arena/swe_agents"
|
|
|
|
| 70 |
PR_METADATA_REPO = "SWE-Arena/pr_metadata"
|
| 71 |
|
| 72 |
|
|
|
|
| 620 |
return set()
|
| 621 |
|
| 622 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 623 |
|
| 624 |
|
| 625 |
def calculate_pr_stats_from_metadata(metadata_list):
|
|
|
|
| 704 |
stats = calculate_pr_stats_from_metadata(agent_metadata)
|
| 705 |
cache_dict[identifier] = {
|
| 706 |
'agent_name': agent_name,
|
| 707 |
+
'website': agent.get('website', 'Unknown'),
|
| 708 |
'github_identifier': identifier,
|
| 709 |
**stats
|
| 710 |
}
|
|
|
|
| 720 |
def run_once():
|
| 721 |
print("\n🚀 Immediate mining run started")
|
| 722 |
cache_dict = update_all_agents_incremental()
|
|
|
|
|
|
|
| 723 |
print("✅ Immediate mining run completed\n")
|
| 724 |
|
| 725 |
|