zhiminy commited on
Commit
f972073
·
1 Parent(s): ca85b3a
Files changed (2) hide show
  1. app.py +10 -6
  2. msr.py +5 -0
app.py CHANGED
@@ -1154,7 +1154,7 @@ def create_monthly_metrics_plot(top_n=5):
1154
  def get_leaderboard_dataframe():
1155
  """
1156
  Construct leaderboard data from PR metadata and convert to pandas DataFrame for display.
1157
- Returns formatted DataFrame sorted by acceptance rate.
1158
  """
1159
  # Construct leaderboard from PR metadata
1160
  cache_dict = construct_leaderboard_from_metadata()
@@ -1187,9 +1187,9 @@ def get_leaderboard_dataframe():
1187
  if col in df.columns:
1188
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
1189
 
1190
- # Sort by Acceptance Rate (%) descending
1191
- if "Acceptance Rate (%)" in df.columns and not df.empty:
1192
- df = df.sort_values(by="Acceptance Rate (%)", ascending=False).reset_index(drop=True)
1193
 
1194
  return df
1195
 
@@ -1264,14 +1264,18 @@ scheduler.add_job(
1264
  replace_existing=True
1265
  )
1266
  scheduler.start()
1267
- print(f"✓ Scheduler started: Weekly PR Mining at 12:00 AM UTC every Monday (mines last {UPDATE_TIME_FRAME_DAYS} days)")
 
 
 
 
1268
 
1269
  # Create Gradio interface
1270
  with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
1271
  total_months = LEADERBOARD_TIME_FRAME_DAYS // 30
1272
 
1273
  gr.Markdown("# 🏆 SWE Agent PR Leaderboard")
1274
- gr.Markdown(f"Track and compare GitHub pull request statistics for SWE agents (last {total_months} months)")
1275
 
1276
  with gr.Tabs():
1277
 
 
1154
  def get_leaderboard_dataframe():
1155
  """
1156
  Construct leaderboard data from PR metadata and convert to pandas DataFrame for display.
1157
+ Returns formatted DataFrame sorted by total PRs.
1158
  """
1159
  # Construct leaderboard from PR metadata
1160
  cache_dict = construct_leaderboard_from_metadata()
 
1187
  if col in df.columns:
1188
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
1189
 
1190
+ # Sort by Total PRs descending
1191
+ if "Total PRs" in df.columns and not df.empty:
1192
+ df = df.sort_values(by="Total PRs", ascending=False).reset_index(drop=True)
1193
 
1194
  return df
1195
 
 
1264
  replace_existing=True
1265
  )
1266
  scheduler.start()
1267
+ print(f"\n{'='*80}")
1268
+ print(f"✓ Scheduler initialized successfully")
1269
+ print(f"⛏️ Mining schedule: Every Monday at 12:00 AM UTC")
1270
+ print(f"📥 On startup: Only loads cached data from HuggingFace (no mining)")
1271
+ print(f"{'='*80}\n")
1272
 
1273
  # Create Gradio interface
1274
  with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
1275
  total_months = LEADERBOARD_TIME_FRAME_DAYS // 30
1276
 
1277
  gr.Markdown("# 🏆 SWE Agent PR Leaderboard")
1278
+ gr.Markdown(f"Track and compare GitHub pull request statistics for SWE agents")
1279
 
1280
  with gr.Tabs():
1281
 
msr.py CHANGED
@@ -416,6 +416,11 @@ def load_agents_from_hf():
416
  with open(file_path, 'r') as f:
417
  agent_data = json.load(f)
418
 
 
 
 
 
 
419
  # Extract github_identifier from filename (remove .json extension)
420
  github_identifier = json_file.replace('.json', '')
421
  agent_data['github_identifier'] = github_identifier
 
416
  with open(file_path, 'r') as f:
417
  agent_data = json.load(f)
418
 
419
+ # Only process agents with status == "public"
420
+ if agent_data.get('status') != 'public':
421
+ print(f"Skipping {json_file}: status is not 'public'")
422
+ continue
423
+
424
  # Extract github_identifier from filename (remove .json extension)
425
  github_identifier = json_file.replace('.json', '')
426
  agent_data['github_identifier'] = github_identifier