zhimin-z commited on
Commit
3bd3f7b
·
1 Parent(s): 3f66a9e

- **Assistant**: Display name of the assistant

Browse files

- **Website**: Link to the assistant's homepage or documentation

Files changed (3) hide show
  1. README.md +2 -0
  2. app.py +62 -62
  3. msr.py +33 -33
README.md CHANGED
@@ -28,6 +28,8 @@ If an assistant can consistently get pull requests accepted across different pro
28
  Key metrics from the last 180 days:
29
 
30
  **Leaderboard Table**
 
 
31
  - **Total PRs**: Pull requests the assistant has opened
32
  - **Merged PRs**: PRs that got merged (not just closed)
33
  - **Acceptance Rate**: Percentage of concluded PRs that got merged
 
28
  Key metrics from the last 180 days:
29
 
30
  **Leaderboard Table**
31
+ - **Assistant**: Display name of the assistant
32
+ - **Website**: Link to the assistant's homepage or documentation
33
  - **Total PRs**: Pull requests the assistant has opened
34
  - **Merged PRs**: PRs that got merged (not just closed)
35
  - **Acceptance Rate**: Percentage of concluded PRs that got merged
app.py CHANGED
@@ -22,13 +22,13 @@ load_dotenv()
22
  # CONFIGURATION
23
  # =============================================================================
24
 
25
- AGENTS_REPO = "SWE-Arena/bot_metadata" # HuggingFace dataset for agent metadata
26
  LEADERBOARD_FILENAME = f"{os.getenv('COMPOSE_PROJECT_NAME')}.json"
27
  LEADERBOARD_REPO = "SWE-Arena/leaderboard_data" # HuggingFace dataset for leaderboard data
28
  MAX_RETRIES = 5
29
 
30
  LEADERBOARD_COLUMNS = [
31
- ("Agent Name", "string"),
32
  ("Website", "string"),
33
  ("Total PRs", "number"),
34
  ("Merged PRs", "number"),
@@ -96,10 +96,10 @@ def validate_github_username(identifier):
96
  # =============================================================================
97
 
98
  def load_agents_from_hf():
99
- """Load all agent metadata JSON files from HuggingFace dataset."""
100
  try:
101
  api = HfApi()
102
- agents = []
103
 
104
  # List all files in the repository
105
  files = list_repo_files_with_backoff(api=api, repo_id=AGENTS_REPO, repo_type="dataset")
@@ -119,27 +119,27 @@ def load_agents_from_hf():
119
  with open(file_path, 'r') as f:
120
  agent_data = json.load(f)
121
 
122
- # Only process agents with status == "active"
123
  if agent_data.get('status') != 'active':
124
  continue
125
 
126
- # Extract github_identifier from filename (e.g., "agent[bot].json" -> "agent[bot]")
127
  filename_identifier = json_file.replace('.json', '')
128
 
129
  # Add or override github_identifier to match filename
130
  agent_data['github_identifier'] = filename_identifier
131
 
132
- agents.append(agent_data)
133
 
134
  except Exception as e:
135
  print(f"Warning: Could not load {json_file}: {str(e)}")
136
  continue
137
 
138
- print(f"Loaded {len(agents)} agents from HuggingFace")
139
- return agents
140
 
141
  except Exception as e:
142
- print(f"Could not load agents from HuggingFace: {str(e)}")
143
  return None
144
 
145
 
@@ -195,7 +195,7 @@ def upload_with_retry(api, path_or_fileobj, path_in_repo, repo_id, repo_type, to
195
 
196
 
197
  def save_agent_to_hf(data):
198
- """Save a new agent to HuggingFace dataset as {identifier}.json in root."""
199
  try:
200
  api = HfApi()
201
  token = get_hf_token()
@@ -220,7 +220,7 @@ def save_agent_to_hf(data):
220
  repo_type="dataset",
221
  token=token
222
  )
223
- print(f"Saved agent to HuggingFace: {filename}")
224
  return True
225
  finally:
226
  # Always clean up local file, even if upload fails
@@ -228,7 +228,7 @@ def save_agent_to_hf(data):
228
  os.remove(filename)
229
 
230
  except Exception as e:
231
- print(f"Error saving agent: {str(e)}")
232
  return False
233
 
234
 
@@ -275,10 +275,10 @@ def create_monthly_metrics_plot(top_n=5):
275
  - Left y-axis: Acceptance Rate (%) as line curves
276
  - Right y-axis: Total PRs created as bar charts
277
 
278
- Each agent gets a unique color for both their line and bars.
279
 
280
  Args:
281
- top_n: Number of top agents to show (default: 5)
282
  """
283
  # Load from saved dataset
284
  saved_data = load_leaderboard_data_from_hf()
@@ -303,10 +303,10 @@ def create_monthly_metrics_plot(top_n=5):
303
  print(f"Loaded monthly metrics from saved dataset")
304
 
305
  # Apply top_n filter if specified
306
- if top_n is not None and top_n > 0 and metrics.get('agents'):
307
- # Calculate total PRs for each agent
308
  agent_totals = []
309
- for agent_name in metrics['agents']:
310
  agent_data = metrics['data'].get(agent_name, {})
311
  total_prs = sum(agent_data.get('total_prs', []))
312
  agent_totals.append((agent_name, total_prs))
@@ -315,14 +315,14 @@ def create_monthly_metrics_plot(top_n=5):
315
  agent_totals.sort(key=lambda x: x[1], reverse=True)
316
  top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
317
 
318
- # Filter metrics to only include top agents
319
  metrics = {
320
- 'agents': top_agents,
321
  'months': metrics['months'],
322
- 'data': {agent: metrics['data'][agent] for agent in top_agents if agent in metrics['data']}
323
  }
324
 
325
- if not metrics['agents'] or not metrics['months']:
326
  # Return an empty figure with a message
327
  fig = go.Figure()
328
  fig.add_annotation(
@@ -341,7 +341,7 @@ def create_monthly_metrics_plot(top_n=5):
341
  # Create figure with secondary y-axis
342
  fig = make_subplots(specs=[[{"secondary_y": True}]])
343
 
344
- # Generate unique colors for many agents using HSL color space
345
  def generate_color(index, total):
346
  """Generate distinct colors using HSL color space for better distribution"""
347
  hue = (index * 360 / total) % 360
@@ -349,15 +349,15 @@ def create_monthly_metrics_plot(top_n=5):
349
  lightness = 45 + (index % 2) * 10 # Vary lightness slightly
350
  return f'hsl({hue}, {saturation}%, {lightness}%)'
351
 
352
- agents = metrics['agents']
353
  months = metrics['months']
354
  data = metrics['data']
355
 
356
- # Generate colors for all agents
357
- agent_colors = {agent: generate_color(idx, len(agents)) for idx, agent in enumerate(agents)}
358
 
359
- # Add traces for each agent
360
- for idx, agent_name in enumerate(agents):
361
  color = agent_colors[agent_name]
362
  agent_data = data[agent_name]
363
 
@@ -377,8 +377,8 @@ def create_monthly_metrics_plot(top_n=5):
377
  line=dict(color=color, width=2),
378
  marker=dict(size=8),
379
  legendgroup=agent_name,
380
- showlegend=(top_n is not None and top_n <= 10), # Show legend for top N agents
381
- hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
382
  'Month: %{x}<br>' +
383
  'Acceptance Rate: %{y:.2f}%<br>' +
384
  '<extra></extra>'
@@ -387,7 +387,7 @@ def create_monthly_metrics_plot(top_n=5):
387
  )
388
 
389
  # Add bar trace for total PRs (right y-axis)
390
- # Only show bars for months where agent has PRs
391
  x_bars = []
392
  y_bars = []
393
  for month, count in zip(months, agent_data['total_prs']):
@@ -404,11 +404,11 @@ def create_monthly_metrics_plot(top_n=5):
404
  marker=dict(color=color, opacity=0.6),
405
  legendgroup=agent_name,
406
  showlegend=False, # Hide duplicate legend entry (already shown in Scatter)
407
- hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
408
  'Month: %{x}<br>' +
409
  'Total PRs: %{y}<br>' +
410
  '<extra></extra>',
411
- offsetgroup=agent_name # Group bars by agent for proper spacing
412
  ),
413
  secondary_y=True
414
  )
@@ -430,7 +430,7 @@ def create_monthly_metrics_plot(top_n=5):
430
  show_legend = (top_n is not None and top_n <= 10)
431
  fig.update_layout(
432
  title=None,
433
- hovermode='closest', # Show individual agent info on hover
434
  barmode='group',
435
  height=600,
436
  showlegend=show_legend,
@@ -469,9 +469,9 @@ def get_leaderboard_dataframe():
469
  filtered_count = 0
470
  for identifier, data in cache_dict.items():
471
  total_prs = data.get('total_prs', 0)
472
- print(f" Agent '{identifier}': {total_prs} PRs")
473
 
474
- # Filter out agents with zero total PRs
475
  if total_prs == 0:
476
  filtered_count += 1
477
  continue
@@ -485,8 +485,8 @@ def get_leaderboard_dataframe():
485
  data.get('acceptance_rate', 0.0),
486
  ])
487
 
488
- print(f"Filtered out {filtered_count} agents with 0 PRs")
489
- print(f"Leaderboard will show {len(rows)} agents")
490
 
491
  # Create DataFrame
492
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
@@ -510,14 +510,14 @@ def get_leaderboard_dataframe():
510
 
511
  def submit_agent(identifier, agent_name, organization, website):
512
  """
513
- Submit a new agent to the leaderboard.
514
  Validates input and saves submission.
515
  """
516
  # Validate required fields
517
  if not identifier or not identifier.strip():
518
  return "ERROR: GitHub identifier is required", gr.update()
519
  if not agent_name or not agent_name.strip():
520
- return "ERROR: Agent name is required", gr.update()
521
  if not organization or not organization.strip():
522
  return "ERROR: Organization name is required", gr.update()
523
  if not website or not website.strip():
@@ -534,12 +534,12 @@ def submit_agent(identifier, agent_name, organization, website):
534
  if not is_valid:
535
  return f"ERROR: {message}", gr.update()
536
 
537
- # Check for duplicates by loading agents from HuggingFace
538
- agents = load_agents_from_hf()
539
- if agents:
540
- existing_names = {agent['github_identifier'] for agent in agents}
541
  if identifier in existing_names:
542
- return f"WARNING: Agent with identifier '{identifier}' already exists", gr.update()
543
 
544
  # Create submission
545
  submission = {
@@ -576,7 +576,7 @@ def reload_leaderboard_data():
576
  if data:
577
  print(f"Successfully reloaded leaderboard data")
578
  print(f" Last updated: {data.get('metadata', {}).get('last_updated', 'Unknown')}")
579
- print(f" Agents: {len(data.get('leaderboard', {}))}")
580
  else:
581
  print(f"No data available")
582
  except Exception as e:
@@ -589,7 +589,7 @@ def reload_leaderboard_data():
589
  # GRADIO APPLICATION
590
  # =============================================================================
591
 
592
- print(f"\nStarting SWE Agent PR Leaderboard")
593
  print(f" Data source: {LEADERBOARD_REPO}")
594
  print(f" Reload frequency: Daily at 12:00 AM UTC\n")
595
 
@@ -610,19 +610,19 @@ print(f"On startup: Loads cached data from HuggingFace on demand")
610
  print(f"{'='*80}\n")
611
 
612
  # Create Gradio interface
613
- with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
614
- gr.Markdown("# SWE Agent PR Leaderboard")
615
- gr.Markdown(f"Track and compare GitHub pull request statistics for SWE agents")
616
 
617
  with gr.Tabs():
618
 
619
  # Leaderboard Tab
620
  with gr.Tab("Leaderboard"):
621
- gr.Markdown("*Statistics are based on agent PR activity tracked by the system*")
622
  leaderboard_table = Leaderboard(
623
  value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
624
  datatype=LEADERBOARD_COLUMNS,
625
- search_columns=["Agent Name", "Website"],
626
  filter_columns=[
627
  ColumnFilter(
628
  "Acceptance Rate (%)",
@@ -645,8 +645,8 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
645
  # Monthly Metrics Section
646
  gr.Markdown("---") # Divider
647
  with gr.Group():
648
- gr.Markdown("### Monthly Performance - Top 5 Agents")
649
- gr.Markdown("*Shows acceptance rate trends and PR volumes for the most active agents*")
650
  monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
651
 
652
  # Load monthly metrics when app starts
@@ -657,20 +657,20 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
657
  )
658
 
659
 
660
- # Submit Agent Tab
661
- with gr.Tab("Submit Your Agent"):
662
 
663
- gr.Markdown("Fill in the details below to add your agent to the leaderboard.")
664
 
665
  with gr.Row():
666
  with gr.Column():
667
  github_input = gr.Textbox(
668
  label="GitHub Identifier*",
669
- placeholder="Your agent username (e.g., my-agent[bot])"
670
  )
671
  name_input = gr.Textbox(
672
- label="Agent Name*",
673
- placeholder="Your agent's display name"
674
  )
675
 
676
  with gr.Column():
@@ -680,11 +680,11 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
680
  )
681
  website_input = gr.Textbox(
682
  label="Website*",
683
- placeholder="https://your-agent-website.com"
684
  )
685
 
686
  submit_button = gr.Button(
687
- "Submit Agent",
688
  variant="primary"
689
  )
690
  submission_status = gr.Textbox(
 
22
  # CONFIGURATION
23
  # =============================================================================
24
 
25
+ AGENTS_REPO = "SWE-Arena/bot_metadata" # HuggingFace dataset for assistant metadata
26
  LEADERBOARD_FILENAME = f"{os.getenv('COMPOSE_PROJECT_NAME')}.json"
27
  LEADERBOARD_REPO = "SWE-Arena/leaderboard_data" # HuggingFace dataset for leaderboard data
28
  MAX_RETRIES = 5
29
 
30
  LEADERBOARD_COLUMNS = [
31
+ ("Assistant", "string"),
32
  ("Website", "string"),
33
  ("Total PRs", "number"),
34
  ("Merged PRs", "number"),
 
96
  # =============================================================================
97
 
98
  def load_agents_from_hf():
99
+ """Load all assistant metadata JSON files from HuggingFace dataset."""
100
  try:
101
  api = HfApi()
102
+ assistants = []
103
 
104
  # List all files in the repository
105
  files = list_repo_files_with_backoff(api=api, repo_id=AGENTS_REPO, repo_type="dataset")
 
119
  with open(file_path, 'r') as f:
120
  agent_data = json.load(f)
121
 
122
+ # Only process assistants with status == "active"
123
  if agent_data.get('status') != 'active':
124
  continue
125
 
126
+ # Extract github_identifier from filename (e.g., "assistant[bot].json" -> "assistant[bot]")
127
  filename_identifier = json_file.replace('.json', '')
128
 
129
  # Add or override github_identifier to match filename
130
  agent_data['github_identifier'] = filename_identifier
131
 
132
+ assistants.append(agent_data)
133
 
134
  except Exception as e:
135
  print(f"Warning: Could not load {json_file}: {str(e)}")
136
  continue
137
 
138
+ print(f"Loaded {len(assistants)} assistants from HuggingFace")
139
+ return assistants
140
 
141
  except Exception as e:
142
+ print(f"Could not load assistants from HuggingFace: {str(e)}")
143
  return None
144
 
145
 
 
195
 
196
 
197
  def save_agent_to_hf(data):
198
+ """Save a new assistant to HuggingFace dataset as {identifier}.json in root."""
199
  try:
200
  api = HfApi()
201
  token = get_hf_token()
 
220
  repo_type="dataset",
221
  token=token
222
  )
223
+ print(f"Saved assistant to HuggingFace: {filename}")
224
  return True
225
  finally:
226
  # Always clean up local file, even if upload fails
 
228
  os.remove(filename)
229
 
230
  except Exception as e:
231
+ print(f"Error saving assistant: {str(e)}")
232
  return False
233
 
234
 
 
275
  - Left y-axis: Acceptance Rate (%) as line curves
276
  - Right y-axis: Total PRs created as bar charts
277
 
278
+ Each assistant gets a unique color for both their line and bars.
279
 
280
  Args:
281
+ top_n: Number of top assistants to show (default: 5)
282
  """
283
  # Load from saved dataset
284
  saved_data = load_leaderboard_data_from_hf()
 
303
  print(f"Loaded monthly metrics from saved dataset")
304
 
305
  # Apply top_n filter if specified
306
+ if top_n is not None and top_n > 0 and metrics.get('assistants'):
307
+ # Calculate total PRs for each assistant
308
  agent_totals = []
309
+ for agent_name in metrics['assistants']:
310
  agent_data = metrics['data'].get(agent_name, {})
311
  total_prs = sum(agent_data.get('total_prs', []))
312
  agent_totals.append((agent_name, total_prs))
 
315
  agent_totals.sort(key=lambda x: x[1], reverse=True)
316
  top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
317
 
318
+ # Filter metrics to only include top assistants
319
  metrics = {
320
+ 'assistants': top_agents,
321
  'months': metrics['months'],
322
+ 'data': {assistant: metrics['data'][assistant] for assistant in top_agents if assistant in metrics['data']}
323
  }
324
 
325
+ if not metrics['assistants'] or not metrics['months']:
326
  # Return an empty figure with a message
327
  fig = go.Figure()
328
  fig.add_annotation(
 
341
  # Create figure with secondary y-axis
342
  fig = make_subplots(specs=[[{"secondary_y": True}]])
343
 
344
+ # Generate unique colors for many assistants using HSL color space
345
  def generate_color(index, total):
346
  """Generate distinct colors using HSL color space for better distribution"""
347
  hue = (index * 360 / total) % 360
 
349
  lightness = 45 + (index % 2) * 10 # Vary lightness slightly
350
  return f'hsl({hue}, {saturation}%, {lightness}%)'
351
 
352
+ assistants = metrics['assistants']
353
  months = metrics['months']
354
  data = metrics['data']
355
 
356
+ # Generate colors for all assistants
357
+ agent_colors = {assistant: generate_color(idx, len(assistants)) for idx, assistant in enumerate(assistants)}
358
 
359
+ # Add traces for each assistant
360
+ for idx, agent_name in enumerate(assistants):
361
  color = agent_colors[agent_name]
362
  agent_data = data[agent_name]
363
 
 
377
  line=dict(color=color, width=2),
378
  marker=dict(size=8),
379
  legendgroup=agent_name,
380
+ showlegend=(top_n is not None and top_n <= 10), # Show legend for top N assistants
381
+ hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
382
  'Month: %{x}<br>' +
383
  'Acceptance Rate: %{y:.2f}%<br>' +
384
  '<extra></extra>'
 
387
  )
388
 
389
  # Add bar trace for total PRs (right y-axis)
390
+ # Only show bars for months where assistant has PRs
391
  x_bars = []
392
  y_bars = []
393
  for month, count in zip(months, agent_data['total_prs']):
 
404
  marker=dict(color=color, opacity=0.6),
405
  legendgroup=agent_name,
406
  showlegend=False, # Hide duplicate legend entry (already shown in Scatter)
407
+ hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
408
  'Month: %{x}<br>' +
409
  'Total PRs: %{y}<br>' +
410
  '<extra></extra>',
411
+ offsetgroup=agent_name # Group bars by assistant for proper spacing
412
  ),
413
  secondary_y=True
414
  )
 
430
  show_legend = (top_n is not None and top_n <= 10)
431
  fig.update_layout(
432
  title=None,
433
+ hovermode='closest', # Show individual assistant info on hover
434
  barmode='group',
435
  height=600,
436
  showlegend=show_legend,
 
469
  filtered_count = 0
470
  for identifier, data in cache_dict.items():
471
  total_prs = data.get('total_prs', 0)
472
+ print(f" Assistant '{identifier}': {total_prs} PRs")
473
 
474
+ # Filter out assistants with zero total PRs
475
  if total_prs == 0:
476
  filtered_count += 1
477
  continue
 
485
  data.get('acceptance_rate', 0.0),
486
  ])
487
 
488
+ print(f"Filtered out {filtered_count} assistants with 0 PRs")
489
+ print(f"Leaderboard will show {len(rows)} assistants")
490
 
491
  # Create DataFrame
492
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
 
510
 
511
  def submit_agent(identifier, agent_name, organization, website):
512
  """
513
+ Submit a new assistant to the leaderboard.
514
  Validates input and saves submission.
515
  """
516
  # Validate required fields
517
  if not identifier or not identifier.strip():
518
  return "ERROR: GitHub identifier is required", gr.update()
519
  if not agent_name or not agent_name.strip():
520
+ return "ERROR: Assistant name is required", gr.update()
521
  if not organization or not organization.strip():
522
  return "ERROR: Organization name is required", gr.update()
523
  if not website or not website.strip():
 
534
  if not is_valid:
535
  return f"ERROR: {message}", gr.update()
536
 
537
+ # Check for duplicates by loading assistants from HuggingFace
538
+ assistants = load_agents_from_hf()
539
+ if assistants:
540
+ existing_names = {assistant['github_identifier'] for assistant in assistants}
541
  if identifier in existing_names:
542
+ return f"WARNING: Assistant with identifier '{identifier}' already exists", gr.update()
543
 
544
  # Create submission
545
  submission = {
 
576
  if data:
577
  print(f"Successfully reloaded leaderboard data")
578
  print(f" Last updated: {data.get('metadata', {}).get('last_updated', 'Unknown')}")
579
+ print(f" Assistants: {len(data.get('leaderboard', {}))}")
580
  else:
581
  print(f"No data available")
582
  except Exception as e:
 
589
  # GRADIO APPLICATION
590
  # =============================================================================
591
 
592
+ print(f"\nStarting SWE Assistant PR Leaderboard")
593
  print(f" Data source: {LEADERBOARD_REPO}")
594
  print(f" Reload frequency: Daily at 12:00 AM UTC\n")
595
 
 
610
  print(f"{'='*80}\n")
611
 
612
  # Create Gradio interface
613
+ with gr.Blocks(title="SWE Assistant PR Leaderboard", theme=gr.themes.Soft()) as app:
614
+ gr.Markdown("# SWE Assistant PR Leaderboard")
615
+ gr.Markdown(f"Track and compare GitHub pull request statistics for SWE assistants")
616
 
617
  with gr.Tabs():
618
 
619
  # Leaderboard Tab
620
  with gr.Tab("Leaderboard"):
621
+ gr.Markdown("*Statistics are based on assistant PR activity tracked by the system*")
622
  leaderboard_table = Leaderboard(
623
  value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
624
  datatype=LEADERBOARD_COLUMNS,
625
+ search_columns=["Assistant", "Website"],
626
  filter_columns=[
627
  ColumnFilter(
628
  "Acceptance Rate (%)",
 
645
  # Monthly Metrics Section
646
  gr.Markdown("---") # Divider
647
  with gr.Group():
648
+ gr.Markdown("### Monthly Performance - Top 5 Assistants")
649
+ gr.Markdown("*Shows acceptance rate trends and PR volumes for the most active assistants*")
650
  monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
651
 
652
  # Load monthly metrics when app starts
 
657
  )
658
 
659
 
660
+ # Submit Assistant Tab
661
+ with gr.Tab("Submit Your Assistant"):
662
 
663
+ gr.Markdown("Fill in the details below to add your assistant to the leaderboard.")
664
 
665
  with gr.Row():
666
  with gr.Column():
667
  github_input = gr.Textbox(
668
  label="GitHub Identifier*",
669
+ placeholder="Your assistant username (e.g., my-assistant[bot])"
670
  )
671
  name_input = gr.Textbox(
672
+ label="Assistant Name*",
673
+ placeholder="Your assistant's display name"
674
  )
675
 
676
  with gr.Column():
 
680
  )
681
  website_input = gr.Textbox(
682
  label="Website*",
683
+ placeholder="https://your-assistant-website.com"
684
  )
685
 
686
  submit_button = gr.Button(
687
+ "Submit Assistant",
688
  variant="primary"
689
  )
690
  submission_status = gr.Textbox(
msr.py CHANGED
@@ -363,7 +363,7 @@ def fetch_all_pr_metadata_streaming(conn, identifiers, start_date, end_date):
363
  end_date: End datetime (timezone-aware)
364
 
365
  Returns:
366
- Dictionary mapping agent identifier to list of PR metadata
367
  """
368
  identifier_list = ', '.join([f"'{id}'" for id in identifiers])
369
  metadata_by_agent = defaultdict(list)
@@ -495,7 +495,7 @@ def fetch_all_pr_metadata_streaming(conn, identifiers, start_date, end_date):
495
 
496
  # Final summary
497
  agents_with_data = sum(1 for prs in metadata_by_agent.values() if prs)
498
- print(f"\n ✓ Complete: {total_prs} PRs found for {agents_with_data}/{len(identifiers)} agents")
499
 
500
  return dict(metadata_by_agent)
501
 
@@ -558,14 +558,14 @@ def sync_agents_repo():
558
 
559
  def load_agents_from_hf():
560
  """
561
- Load all agent metadata JSON files from local git repository.
562
  ALWAYS syncs with remote first to ensure we have the latest bot data.
563
  """
564
  # MANDATORY: Sync with remote first to get latest bot data
565
- print(f" Syncing bot_data repository to get latest agents...")
566
  sync_agents_repo() # Will raise exception if sync fails
567
 
568
- agents = []
569
 
570
  # Scan local directory for JSON files
571
  if not os.path.exists(AGENTS_REPO_LOCAL_PATH):
@@ -573,7 +573,7 @@ def load_agents_from_hf():
573
 
574
  # Walk through the directory to find all JSON files
575
  files_processed = 0
576
- print(f" Loading agent metadata from {AGENTS_REPO_LOCAL_PATH}...")
577
 
578
  for root, dirs, files in os.walk(AGENTS_REPO_LOCAL_PATH):
579
  # Skip .git directory
@@ -591,7 +591,7 @@ def load_agents_from_hf():
591
  with open(file_path, 'r', encoding='utf-8') as f:
592
  agent_data = json.load(f)
593
 
594
- # Only include active agents
595
  if agent_data.get('status') != 'active':
596
  continue
597
 
@@ -599,14 +599,14 @@ def load_agents_from_hf():
599
  github_identifier = filename.replace('.json', '')
600
  agent_data['github_identifier'] = github_identifier
601
 
602
- agents.append(agent_data)
603
 
604
  except Exception as e:
605
  print(f" ⚠ Error loading {filename}: {str(e)}")
606
  continue
607
 
608
- print(f" ✓ Loaded {len(agents)} active agents (from {files_processed} total files)")
609
- return agents
610
 
611
 
612
  def calculate_pr_stats_from_metadata(metadata_list):
@@ -626,12 +626,12 @@ def calculate_pr_stats_from_metadata(metadata_list):
626
  }
627
 
628
 
629
- def calculate_monthly_metrics_by_agent(all_metadata_dict, agents):
630
- """Calculate monthly metrics for all agents for visualization."""
631
- identifier_to_name = {agent.get('github_identifier'): agent.get('name') for agent in agents if agent.get('github_identifier')}
632
 
633
  if not all_metadata_dict:
634
- return {'agents': [], 'months': [], 'data': {}}
635
 
636
  agent_month_data = defaultdict(lambda: defaultdict(list))
637
 
@@ -690,30 +690,30 @@ def calculate_monthly_metrics_by_agent(all_metadata_dict, agents):
690
  agents_list = sorted(list(agent_month_data.keys()))
691
 
692
  return {
693
- 'agents': agents_list,
694
  'months': months,
695
  'data': result_data
696
  }
697
 
698
 
699
- def construct_leaderboard_from_metadata(all_metadata_dict, agents):
700
  """Construct leaderboard from in-memory PR metadata."""
701
- if not agents:
702
- print("Error: No agents found")
703
  return {}
704
 
705
  cache_dict = {}
706
 
707
- for agent in agents:
708
- identifier = agent.get('github_identifier')
709
- agent_name = agent.get('name', 'Unknown')
710
 
711
  bot_metadata = all_metadata_dict.get(identifier, [])
712
  stats = calculate_pr_stats_from_metadata(bot_metadata)
713
 
714
  cache_dict[identifier] = {
715
  'name': agent_name,
716
- 'website': agent.get('website', 'N/A'),
717
  'github_identifier': identifier,
718
  **stats
719
  }
@@ -767,7 +767,7 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
767
 
768
  def mine_all_agents():
769
  """
770
- Mine PR metadata for all agents using STREAMING batch processing.
771
  Downloads GHArchive data, then uses BATCH-based DuckDB queries.
772
  """
773
  print(f"\n[1/4] Downloading GHArchive data...")
@@ -775,19 +775,19 @@ def mine_all_agents():
775
  if not download_all_gharchive_data():
776
  print("Warning: Download had errors, continuing with available data...")
777
 
778
- print(f"\n[2/4] Loading agent metadata...")
779
 
780
- agents = load_agents_from_hf()
781
- if not agents:
782
- print("Error: No agents found")
783
  return
784
 
785
- identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
786
  if not identifiers:
787
- print("Error: No valid agent identifiers found")
788
  return
789
 
790
- print(f"\n[3/4] Mining PR metadata ({len(identifiers)} agents, {LEADERBOARD_TIME_FRAME_DAYS} days)...")
791
 
792
  try:
793
  conn = get_duckdb_connection()
@@ -815,8 +815,8 @@ def mine_all_agents():
815
  print(f"\n[4/4] Saving leaderboard...")
816
 
817
  try:
818
- leaderboard_dict = construct_leaderboard_from_metadata(all_metadata, agents)
819
- monthly_metrics = calculate_monthly_metrics_by_agent(all_metadata, agents)
820
  save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
821
 
822
  except Exception as e:
@@ -850,7 +850,7 @@ def setup_scheduler():
850
  mine_all_agents,
851
  trigger=trigger,
852
  id='mine_all_agents',
853
- name='Mine GHArchive data for all agents',
854
  replace_existing=True
855
  )
856
 
 
363
  end_date: End datetime (timezone-aware)
364
 
365
  Returns:
366
+ Dictionary mapping assistant identifier to list of PR metadata
367
  """
368
  identifier_list = ', '.join([f"'{id}'" for id in identifiers])
369
  metadata_by_agent = defaultdict(list)
 
495
 
496
  # Final summary
497
  agents_with_data = sum(1 for prs in metadata_by_agent.values() if prs)
498
+ print(f"\n ✓ Complete: {total_prs} PRs found for {agents_with_data}/{len(identifiers)} assistants")
499
 
500
  return dict(metadata_by_agent)
501
 
 
558
 
559
  def load_agents_from_hf():
560
  """
561
+ Load all assistant metadata JSON files from local git repository.
562
  ALWAYS syncs with remote first to ensure we have the latest bot data.
563
  """
564
  # MANDATORY: Sync with remote first to get latest bot data
565
+ print(f" Syncing bot_data repository to get latest assistants...")
566
  sync_agents_repo() # Will raise exception if sync fails
567
 
568
+ assistants = []
569
 
570
  # Scan local directory for JSON files
571
  if not os.path.exists(AGENTS_REPO_LOCAL_PATH):
 
573
 
574
  # Walk through the directory to find all JSON files
575
  files_processed = 0
576
+ print(f" Loading assistant metadata from {AGENTS_REPO_LOCAL_PATH}...")
577
 
578
  for root, dirs, files in os.walk(AGENTS_REPO_LOCAL_PATH):
579
  # Skip .git directory
 
591
  with open(file_path, 'r', encoding='utf-8') as f:
592
  agent_data = json.load(f)
593
 
594
+ # Only include active assistants
595
  if agent_data.get('status') != 'active':
596
  continue
597
 
 
599
  github_identifier = filename.replace('.json', '')
600
  agent_data['github_identifier'] = github_identifier
601
 
602
+ assistants.append(agent_data)
603
 
604
  except Exception as e:
605
  print(f" ⚠ Error loading {filename}: {str(e)}")
606
  continue
607
 
608
+ print(f" ✓ Loaded {len(assistants)} active assistants (from {files_processed} total files)")
609
+ return assistants
610
 
611
 
612
  def calculate_pr_stats_from_metadata(metadata_list):
 
626
  }
627
 
628
 
629
+ def calculate_monthly_metrics_by_agent(all_metadata_dict, assistants):
630
+ """Calculate monthly metrics for all assistants for visualization."""
631
+ identifier_to_name = {assistant.get('github_identifier'): assistant.get('name') for assistant in assistants if assistant.get('github_identifier')}
632
 
633
  if not all_metadata_dict:
634
+ return {'assistants': [], 'months': [], 'data': {}}
635
 
636
  agent_month_data = defaultdict(lambda: defaultdict(list))
637
 
 
690
  agents_list = sorted(list(agent_month_data.keys()))
691
 
692
  return {
693
+ 'assistants': agents_list,
694
  'months': months,
695
  'data': result_data
696
  }
697
 
698
 
699
+ def construct_leaderboard_from_metadata(all_metadata_dict, assistants):
700
  """Construct leaderboard from in-memory PR metadata."""
701
+ if not assistants:
702
+ print("Error: No assistants found")
703
  return {}
704
 
705
  cache_dict = {}
706
 
707
+ for assistant in assistants:
708
+ identifier = assistant.get('github_identifier')
709
+ agent_name = assistant.get('name', 'Unknown')
710
 
711
  bot_metadata = all_metadata_dict.get(identifier, [])
712
  stats = calculate_pr_stats_from_metadata(bot_metadata)
713
 
714
  cache_dict[identifier] = {
715
  'name': agent_name,
716
+ 'website': assistant.get('website', 'N/A'),
717
  'github_identifier': identifier,
718
  **stats
719
  }
 
767
 
768
  def mine_all_agents():
769
  """
770
+ Mine PR metadata for all assistants using STREAMING batch processing.
771
  Downloads GHArchive data, then uses BATCH-based DuckDB queries.
772
  """
773
  print(f"\n[1/4] Downloading GHArchive data...")
 
775
  if not download_all_gharchive_data():
776
  print("Warning: Download had errors, continuing with available data...")
777
 
778
+ print(f"\n[2/4] Loading assistant metadata...")
779
 
780
+ assistants = load_agents_from_hf()
781
+ if not assistants:
782
+ print("Error: No assistants found")
783
  return
784
 
785
+ identifiers = [assistant['github_identifier'] for assistant in assistants if assistant.get('github_identifier')]
786
  if not identifiers:
787
+ print("Error: No valid assistant identifiers found")
788
  return
789
 
790
+ print(f"\n[3/4] Mining PR metadata ({len(identifiers)} assistants, {LEADERBOARD_TIME_FRAME_DAYS} days)...")
791
 
792
  try:
793
  conn = get_duckdb_connection()
 
815
  print(f"\n[4/4] Saving leaderboard...")
816
 
817
  try:
818
+ leaderboard_dict = construct_leaderboard_from_metadata(all_metadata, assistants)
819
+ monthly_metrics = calculate_monthly_metrics_by_agent(all_metadata, assistants)
820
  save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
821
 
822
  except Exception as e:
 
850
  mine_all_agents,
851
  trigger=trigger,
852
  id='mine_all_agents',
853
+ name='Mine GHArchive data for all assistants',
854
  replace_existing=True
855
  )
856