Claude commited on
Commit
b733054
·
unverified ·
1 Parent(s): 71ce9bc

Add save_leaderboard_and_metrics_to_hf function

Browse files

- Added function to save computed leaderboard and monthly metrics to HuggingFace
- Uploads to SWE-Arena/leaderboard_metadata as swe-pr.json
- Integrated into mine_all_agents() to run after PR mining completes
- Includes metadata with last_updated timestamp and time_frame_days
- Ensures leaderboard cache is refreshed after weekly mining runs

Files changed (1) hide show
  1. app.py +79 -0
app.py CHANGED
@@ -892,6 +892,78 @@ def save_agent_to_hf(data):
892
  return False
893
 
894
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
895
  # =============================================================================
896
  # DATA MANAGEMENT
897
  # =============================================================================
@@ -992,6 +1064,13 @@ def mine_all_agents():
992
  print(f" BigQuery queries executed: 1")
993
  print(f"{'='*80}\n")
994
 
 
 
 
 
 
 
 
995
 
996
  def construct_leaderboard_from_metadata():
997
  """
 
892
  return False
893
 
894
 
895
+ def save_leaderboard_and_metrics_to_hf():
896
+ """
897
+ Save leaderboard data and monthly metrics to SWE-Arena/leaderboard_metadata dataset.
898
+ Creates a comprehensive JSON file with both leaderboard stats and monthly metrics.
899
+ If the file exists, it will be overwritten.
900
+
901
+ Returns:
902
+ bool: True if successful, False otherwise
903
+ """
904
+ import io
905
+
906
+ try:
907
+ token = get_hf_token()
908
+ if not token:
909
+ raise Exception("No HuggingFace token found")
910
+
911
+ api = HfApi(token=token)
912
+
913
+ print(f"\n{'='*80}")
914
+ print(f"📊 Preparing leaderboard and metrics data for upload...")
915
+ print(f"{'='*80}\n")
916
+
917
+ # Get leaderboard data
918
+ print(" Constructing leaderboard data...")
919
+ leaderboard_data = construct_leaderboard_from_metadata()
920
+
921
+ # Get monthly metrics data (all agents, not just top N)
922
+ print(" Calculating monthly metrics...")
923
+ monthly_metrics = calculate_monthly_metrics_by_agent(top_n=None)
924
+
925
+ # Combine into a single structure
926
+ combined_data = {
927
+ "leaderboard": leaderboard_data,
928
+ "monthly_metrics": monthly_metrics,
929
+ "metadata": {
930
+ "last_updated": datetime.now(timezone.utc).isoformat(),
931
+ "time_frame_days": LEADERBOARD_TIME_FRAME_DAYS,
932
+ "total_agents": len(leaderboard_data)
933
+ }
934
+ }
935
+
936
+ print(f" Leaderboard entries: {len(leaderboard_data)}")
937
+ print(f" Monthly metrics for: {len(monthly_metrics['agents'])} agents")
938
+ print(f" Time frame: {LEADERBOARD_TIME_FRAME_DAYS} days")
939
+
940
+ # Convert to JSON and create file-like object
941
+ json_content = json.dumps(combined_data, indent=2)
942
+ file_like_object = io.BytesIO(json_content.encode('utf-8'))
943
+
944
+ # Upload to HuggingFace (will overwrite if exists)
945
+ print(f"\n🤗 Uploading to {LEADERBOARD_REPO}...")
946
+ api.upload_file(
947
+ path_or_fileobj=file_like_object,
948
+ path_in_repo="swe-pr.json",
949
+ repo_id=LEADERBOARD_REPO,
950
+ repo_type="dataset",
951
+ token=token,
952
+ commit_message=f"Update leaderboard data - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC"
953
+ )
954
+
955
+ print(f" ✓ Successfully uploaded swe-pr.json")
956
+ print(f"{'='*80}\n")
957
+
958
+ return True
959
+
960
+ except Exception as e:
961
+ print(f" ✗ Error saving leaderboard data: {str(e)}")
962
+ import traceback
963
+ traceback.print_exc()
964
+ return False
965
+
966
+
967
  # =============================================================================
968
  # DATA MANAGEMENT
969
  # =============================================================================
 
1064
  print(f" BigQuery queries executed: 1")
1065
  print(f"{'='*80}\n")
1066
 
1067
+ # After mining is complete, save leaderboard and metrics to HuggingFace
1068
+ print(f"📤 Uploading leaderboard and metrics data...")
1069
+ if save_leaderboard_and_metrics_to_hf():
1070
+ print(f"✓ Leaderboard and metrics successfully uploaded to {LEADERBOARD_REPO}")
1071
+ else:
1072
+ print(f"⚠️ Failed to upload leaderboard and metrics data")
1073
+
1074
 
1075
  def construct_leaderboard_from_metadata():
1076
  """