Claude
commited on
Add save_leaderboard_and_metrics_to_hf function
Browse files- Added function to save computed leaderboard and monthly metrics to HuggingFace
- Uploads to SWE-Arena/leaderboard_metadata as swe-pr.json
- Integrated into mine_all_agents() to run after PR mining completes
- Includes metadata with last_updated timestamp and time_frame_days
- Ensures leaderboard cache is refreshed after weekly mining runs
app.py
CHANGED
|
@@ -892,6 +892,78 @@ def save_agent_to_hf(data):
|
|
| 892 |
return False
|
| 893 |
|
| 894 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 895 |
# =============================================================================
|
| 896 |
# DATA MANAGEMENT
|
| 897 |
# =============================================================================
|
|
@@ -992,6 +1064,13 @@ def mine_all_agents():
|
|
| 992 |
print(f" BigQuery queries executed: 1")
|
| 993 |
print(f"{'='*80}\n")
|
| 994 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
|
| 996 |
def construct_leaderboard_from_metadata():
|
| 997 |
"""
|
|
|
|
| 892 |
return False
|
| 893 |
|
| 894 |
|
| 895 |
+
def save_leaderboard_and_metrics_to_hf():
|
| 896 |
+
"""
|
| 897 |
+
Save leaderboard data and monthly metrics to SWE-Arena/leaderboard_metadata dataset.
|
| 898 |
+
Creates a comprehensive JSON file with both leaderboard stats and monthly metrics.
|
| 899 |
+
If the file exists, it will be overwritten.
|
| 900 |
+
|
| 901 |
+
Returns:
|
| 902 |
+
bool: True if successful, False otherwise
|
| 903 |
+
"""
|
| 904 |
+
import io
|
| 905 |
+
|
| 906 |
+
try:
|
| 907 |
+
token = get_hf_token()
|
| 908 |
+
if not token:
|
| 909 |
+
raise Exception("No HuggingFace token found")
|
| 910 |
+
|
| 911 |
+
api = HfApi(token=token)
|
| 912 |
+
|
| 913 |
+
print(f"\n{'='*80}")
|
| 914 |
+
print(f"📊 Preparing leaderboard and metrics data for upload...")
|
| 915 |
+
print(f"{'='*80}\n")
|
| 916 |
+
|
| 917 |
+
# Get leaderboard data
|
| 918 |
+
print(" Constructing leaderboard data...")
|
| 919 |
+
leaderboard_data = construct_leaderboard_from_metadata()
|
| 920 |
+
|
| 921 |
+
# Get monthly metrics data (all agents, not just top N)
|
| 922 |
+
print(" Calculating monthly metrics...")
|
| 923 |
+
monthly_metrics = calculate_monthly_metrics_by_agent(top_n=None)
|
| 924 |
+
|
| 925 |
+
# Combine into a single structure
|
| 926 |
+
combined_data = {
|
| 927 |
+
"leaderboard": leaderboard_data,
|
| 928 |
+
"monthly_metrics": monthly_metrics,
|
| 929 |
+
"metadata": {
|
| 930 |
+
"last_updated": datetime.now(timezone.utc).isoformat(),
|
| 931 |
+
"time_frame_days": LEADERBOARD_TIME_FRAME_DAYS,
|
| 932 |
+
"total_agents": len(leaderboard_data)
|
| 933 |
+
}
|
| 934 |
+
}
|
| 935 |
+
|
| 936 |
+
print(f" Leaderboard entries: {len(leaderboard_data)}")
|
| 937 |
+
print(f" Monthly metrics for: {len(monthly_metrics['agents'])} agents")
|
| 938 |
+
print(f" Time frame: {LEADERBOARD_TIME_FRAME_DAYS} days")
|
| 939 |
+
|
| 940 |
+
# Convert to JSON and create file-like object
|
| 941 |
+
json_content = json.dumps(combined_data, indent=2)
|
| 942 |
+
file_like_object = io.BytesIO(json_content.encode('utf-8'))
|
| 943 |
+
|
| 944 |
+
# Upload to HuggingFace (will overwrite if exists)
|
| 945 |
+
print(f"\n🤗 Uploading to {LEADERBOARD_REPO}...")
|
| 946 |
+
api.upload_file(
|
| 947 |
+
path_or_fileobj=file_like_object,
|
| 948 |
+
path_in_repo="swe-pr.json",
|
| 949 |
+
repo_id=LEADERBOARD_REPO,
|
| 950 |
+
repo_type="dataset",
|
| 951 |
+
token=token,
|
| 952 |
+
commit_message=f"Update leaderboard data - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC"
|
| 953 |
+
)
|
| 954 |
+
|
| 955 |
+
print(f" ✓ Successfully uploaded swe-pr.json")
|
| 956 |
+
print(f"{'='*80}\n")
|
| 957 |
+
|
| 958 |
+
return True
|
| 959 |
+
|
| 960 |
+
except Exception as e:
|
| 961 |
+
print(f" ✗ Error saving leaderboard data: {str(e)}")
|
| 962 |
+
import traceback
|
| 963 |
+
traceback.print_exc()
|
| 964 |
+
return False
|
| 965 |
+
|
| 966 |
+
|
| 967 |
# =============================================================================
|
| 968 |
# DATA MANAGEMENT
|
| 969 |
# =============================================================================
|
|
|
|
| 1064 |
print(f" BigQuery queries executed: 1")
|
| 1065 |
print(f"{'='*80}\n")
|
| 1066 |
|
| 1067 |
+
# After mining is complete, save leaderboard and metrics to HuggingFace
|
| 1068 |
+
print(f"📤 Uploading leaderboard and metrics data...")
|
| 1069 |
+
if save_leaderboard_and_metrics_to_hf():
|
| 1070 |
+
print(f"✓ Leaderboard and metrics successfully uploaded to {LEADERBOARD_REPO}")
|
| 1071 |
+
else:
|
| 1072 |
+
print(f"⚠️ Failed to upload leaderboard and metrics data")
|
| 1073 |
+
|
| 1074 |
|
| 1075 |
def construct_leaderboard_from_metadata():
|
| 1076 |
"""
|