Spaces:

SWE-Arena
/

SWE-PR

Sleeping

App Files Files Community

zhiminy commited on Oct 17

Commit

88db242

1 Parent(s): 19a4aed

add

Browse files

Files changed (3) hide show

README.md +2 -2
app.py +15 -135
msr.py +1 -44

README.md CHANGED Viewed

@@ -59,13 +59,13 @@ We search GitHub using multiple query patterns to catch all PRs associated with
 The leaderboard refreshes automatically every day at 12:00 AM UTC.
 **Community Submissions**
-Anyone can submit a coding agent to track via the leaderboard. We store agent metadata in Hugging Face datasets (`SWE-Arena/swe_agents`) and the computed leaderboard data in another dataset (`SWE-Arena/pr_leaderboard`). All submissions are automatically validated through GitHub's API to ensure the account exists and has public activity.
 ## Using the Leaderboard
 ### Just Browsing?
 Head to the Leaderboard tab where you'll find:
-- **Searchable table**: Search by agent name or organization
 - **Filterable columns**: Filter by acceptance rate to find top performers
 - **Monthly charts**: Scroll down to see acceptance rate trends and PR activity over time

 The leaderboard refreshes automatically every day at 12:00 AM UTC.
 **Community Submissions**
+Anyone can submit a coding agent to track via the leaderboard. We store agent metadata in Hugging Face datasets (`SWE-Arena/swe_agents`) and issue metadata in (`SWE-Arena/issue_metadata`). The leaderboard is dynamically constructed from the issue metadata. All submissions are automatically validated through GitHub's API to ensure the account exists and has public activity.
 ## Using the Leaderboard
 ### Just Browsing?
 Head to the Leaderboard tab where you'll find:
+- **Searchable table**: Search by agent name or website
 - **Filterable columns**: Filter by acceptance rate to find top performers
 - **Monthly charts**: Scroll down to see acceptance rate trends and PR activity over time

app.py CHANGED Viewed

@@ -44,16 +44,14 @@ else:
     DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
 # In-memory cache for debug mode (data persists during session but NOT saved to HF)
-DEBUG_LEADERBOARD_CACHE = {}
 DEBUG_PR_METADATA_CACHE = defaultdict(list)
 AGENTS_REPO = "SWE-Arena/swe_agents"  # HuggingFace dataset for agent metadata
-LEADERBOARD_REPO = "SWE-Arena/pr_leaderboard"
 PR_METADATA_REPO = "SWE-Arena/pr_metadata"  # HuggingFace dataset for PR metadata
 LEADERBOARD_COLUMNS = [
     ("Agent Name", "string"),
-    ("Organization", "string"),
     ("Total PRs", "number"),
     ("Merged PRs", "number"),
     ("Acceptance Rate (%)", "number"),
@@ -1178,34 +1176,6 @@ def load_agents_from_hf():
         return None
-def load_leaderboard_dataset():
-    """Load leaderboard data from HuggingFace dataset for current year.
-    In debug mode, loads from in-memory cache if available."""
-    # In debug mode, check in-memory cache first
-    if DEBUG_MODE and DEBUG_LEADERBOARD_CACHE:
-        print(f"🐛 DEBUG MODE: Loading leaderboard from in-memory cache ({len(DEBUG_LEADERBOARD_CACHE)} entries)")
-        return list(DEBUG_LEADERBOARD_CACHE.values())
-    try:
-        year = datetime.now().year
-        filename = f"{year}.csv"
-        # Try to download the CSV file for current year
-        file_path = hf_hub_download(
-            repo_id=LEADERBOARD_REPO,
-            filename=filename,
-            repo_type="dataset"
-        )
-        # Load CSV into list of dicts
-        df = pd.read_csv(file_path)
-        data = df.to_dict('records')
-        print(f"✓ Loaded {len(data)} entries from {filename}")
-        return data
-    except Exception as e:
-        print(f"Could not load leaderboard dataset for year {datetime.now().year}: {str(e)}")
-        return None
 def get_hf_token():
@@ -1297,56 +1267,6 @@ def save_agent_to_hf(data):
         return False
-def save_leaderboard_to_hf(cache_dict):
-    """Save complete leaderboard to HuggingFace dataset as CSV.
-    In debug mode, saves to in-memory cache only."""
-    # Skip saving in debug mode - use in-memory cache instead
-    if DEBUG_MODE:
-        global DEBUG_LEADERBOARD_CACHE
-        # Filter out agents with zero total PRs
-        filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_prs', 0) > 0}
-        DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
-        data_list = dict_to_cache(filtered_cache_dict)
-        print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
-        return True
-    try:
-        token = get_hf_token()
-        if not token:
-            raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
-        # Filter out agents with zero total PRs
-        filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_prs', 0) > 0}
-        # Convert to DataFrame
-        data_list = dict_to_cache(filtered_cache_dict)
-        df = pd.DataFrame(data_list)
-        # Save to CSV with year as filename
-        year = datetime.now().year
-        filename = f"{year}.csv"
-        df.to_csv(filename, index=False)
-        try:
-            # Upload to HuggingFace
-            api = HfApi()
-            upload_with_retry(
-                api=api,
-                path_or_fileobj=filename,
-                path_in_repo=filename,
-                repo_id=LEADERBOARD_REPO,
-                repo_type="dataset",
-                token=token
-            )
-            print(f"✓ Saved leaderboard to HuggingFace as {filename} ({len(data_list)} entries)")
-            return True
-        finally:
-            # Always clean up local file, even if upload fails
-            if os.path.exists(filename):
-                os.remove(filename)
-    except Exception as e:
-        print(f"✗ Error saving leaderboard: {str(e)}")
-        return False
 # =============================================================================
@@ -1436,7 +1356,7 @@ def update_all_agents_incremental():
             # Merge metadata with stats
             cache_dict[identifier] = {
                 'agent_name': agent_name,
-                'organization': agent.get('organization', 'Unknown'),
                 'github_identifier': identifier,
                 **stats
             }
@@ -1485,7 +1405,7 @@ def construct_leaderboard_from_metadata():
         cache_dict[identifier] = {
             'agent_name': agent_name,
-            'organization': agent.get('organization', 'Unknown'),
             'github_identifier': identifier,
             **stats
         }
@@ -1496,7 +1416,7 @@ def construct_leaderboard_from_metadata():
 def initialize_data():
     """
     Initialize data on application startup.
-    Priority: 1) Leaderboard dataset ({year}.csv), 2) PR metadata (if available), 3) Full GitHub mining
     In DEBUG MODE:
     - If no data available, automatically mine up to 10 PRs per query per agent
@@ -1506,26 +1426,15 @@ def initialize_data():
     year = datetime.now().year
-    # STEP 1: Try loading existing leaderboard CSV from SWE-Arena/pr_leaderboard
-    print(f"STEP 1: Checking for {year}.csv in SWE-Arena/pr_leaderboard...")
-    leaderboard_data = load_leaderboard_dataset()
-    if leaderboard_data:
-        print(f"✓ Found and loaded {year}.csv from leaderboard repository")
-        print("✓ Initialized from leaderboard dataset")
-        return
-    print(f"   {year}.csv not found in leaderboard repository")
-    # STEP 2: Try constructing from PR metadata in SWE-Arena/pr_metadata (fast, memory-efficient)
-    print(f"STEP 2: Checking SWE-Arena/pr_metadata for existing data...")
     try:
         cache_dict = construct_leaderboard_from_metadata()
         # Check if there's actually meaningful data (at least one agent with PRs)
         has_data = any(entry.get('total_prs', 0) > 0 for entry in cache_dict.values())
         if cache_dict and has_data:
             print(f"✓ Found PR metadata in pr_metadata repository")
-            save_leaderboard_to_hf(cache_dict)
-            print("✓ Initialized from PR metadata and saved as CSV")
             return
         else:
             print("   No meaningful PR metadata found in pr_metadata repository")
@@ -1540,10 +1449,7 @@ def initialize_data():
             print(f"✓ Loaded {len(agents)} agents from HuggingFace")
             print("⛏️ Mining GitHub data in debug mode (limited to 10 PRs per query)...")
             cache_dict = update_all_agents_incremental()
-            if cache_dict:
-                # In debug mode, this won't actually save to HF
-                save_leaderboard_to_hf(cache_dict)
-                print("✓ Debug mining complete (data NOT saved to HuggingFace)")
             return
         else:
             print("⚠️ No agents found. Waiting for first submission...")
@@ -1555,8 +1461,6 @@ def initialize_data():
         print(f"✓ Loaded {len(agents)} agents from HuggingFace")
         print("⛏️ Mining GitHub data (this may take a while)...")
         cache_dict = update_all_agents_incremental()
-        if cache_dict:
-            save_leaderboard_to_hf(cache_dict)
         return
     # No data available
@@ -1689,25 +1593,25 @@ def create_monthly_metrics_plot():
 def get_leaderboard_dataframe():
     """
-    Load leaderboard data from HuggingFace and convert to pandas DataFrame for display.
     Returns formatted DataFrame sorted by acceptance rate.
     """
-    # Load leaderboard data from HuggingFace
-    leaderboard_data = load_leaderboard_dataset()
-    if not leaderboard_data:
         # Return empty DataFrame with correct columns if no data
         column_names = [col[0] for col in LEADERBOARD_COLUMNS]
         return pd.DataFrame(columns=column_names)
     rows = []
-    for data in leaderboard_data:
         # Filter out agents with zero total PRs
         if data.get('total_prs', 0) > 0:
             # Only include display-relevant fields
             rows.append([
                 data.get('agent_name', 'Unknown'),
-                data.get('organization', 'Unknown'),
                 data.get('total_prs', 0),
                 data.get('merged', 0),
                 data.get('acceptance_rate', 0.0),
@@ -1791,21 +1695,6 @@ def submit_agent(identifier, agent_name, organization, description, website):
             # Save metadata to HuggingFace
             save_pr_metadata_to_hf(metadata_list, identifier)
-        # Calculate stats from metadata
-        stats = calculate_pr_stats_from_metadata(metadata_list)
-        # Load current leaderboard
-        leaderboard_data = load_leaderboard_dataset()
-        if not leaderboard_data:
-            leaderboard_data = []
-        # Convert to dict for easy updating
-        cache_dict = {entry['github_identifier']: entry for entry in leaderboard_data}
-        cache_dict[identifier] = {**submission, **stats}
-        # Save to HuggingFace
-        save_leaderboard_to_hf(cache_dict)
         return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe(), create_monthly_metrics_plot()
     except Exception as e:
@@ -1873,15 +1762,6 @@ def daily_update_task():
         print(f"   PRs updated (closed/merged): {total_updated}")
         print(f"{'='*80}")
-        # Reconstruct leaderboard from all stored metadata
-        print(f"\n📈 Rebuilding leaderboard from refreshed data...")
-        cache_dict = construct_leaderboard_from_metadata()
-        if cache_dict:
-            # Save leaderboard
-            save_leaderboard_to_hf(cache_dict)
-            print("✓ Leaderboard updated successfully")
         print(f"\n✅ Daily update completed at {datetime.now(timezone.utc).isoformat()}")
     except Exception as e:
@@ -1943,7 +1823,7 @@ with gr.Blocks(title="SWE Agent PR Leaderboard", theme=gr.themes.Soft()) as app:
             leaderboard_table = Leaderboard(
                 value=get_leaderboard_dataframe(),
                 datatype=LEADERBOARD_COLUMNS,
-                search_columns=["Agent Name", "Organization"],
                 filter_columns=["Acceptance Rate (%)"]
             )

     DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
 # In-memory cache for debug mode (data persists during session but NOT saved to HF)
 DEBUG_PR_METADATA_CACHE = defaultdict(list)
 AGENTS_REPO = "SWE-Arena/swe_agents"  # HuggingFace dataset for agent metadata
 PR_METADATA_REPO = "SWE-Arena/pr_metadata"  # HuggingFace dataset for PR metadata
 LEADERBOARD_COLUMNS = [
     ("Agent Name", "string"),
+    ("Website", "string"),
     ("Total PRs", "number"),
     ("Merged PRs", "number"),
     ("Acceptance Rate (%)", "number"),
         return None
 def get_hf_token():
         return False
 # =============================================================================
             # Merge metadata with stats
             cache_dict[identifier] = {
                 'agent_name': agent_name,
+                'website': agent.get('website', 'Unknown'),
                 'github_identifier': identifier,
                 **stats
             }
         cache_dict[identifier] = {
             'agent_name': agent_name,
+            'website': agent.get('website', 'Unknown'),
             'github_identifier': identifier,
             **stats
         }
 def initialize_data():
     """
     Initialize data on application startup.
+    Constructs leaderboard from PR metadata only.
     In DEBUG MODE:
     - If no data available, automatically mine up to 10 PRs per query per agent
     year = datetime.now().year
+    # Try constructing from PR metadata in SWE-Arena/pr_metadata (fast, memory-efficient)
+    print(f"Checking SWE-Arena/pr_metadata for existing data...")
     try:
         cache_dict = construct_leaderboard_from_metadata()
         # Check if there's actually meaningful data (at least one agent with PRs)
         has_data = any(entry.get('total_prs', 0) > 0 for entry in cache_dict.values())
         if cache_dict and has_data:
             print(f"✓ Found PR metadata in pr_metadata repository")
+            print("✓ Initialized from PR metadata")
             return
         else:
             print("   No meaningful PR metadata found in pr_metadata repository")
             print(f"✓ Loaded {len(agents)} agents from HuggingFace")
             print("⛏️ Mining GitHub data in debug mode (limited to 10 PRs per query)...")
             cache_dict = update_all_agents_incremental()
+            print("✓ Debug mining complete (data NOT saved to HuggingFace)")
             return
         else:
             print("⚠️ No agents found. Waiting for first submission...")
         print(f"✓ Loaded {len(agents)} agents from HuggingFace")
         print("⛏️ Mining GitHub data (this may take a while)...")
         cache_dict = update_all_agents_incremental()
         return
     # No data available
 def get_leaderboard_dataframe():
     """
+    Construct leaderboard data from PR metadata and convert to pandas DataFrame for display.
     Returns formatted DataFrame sorted by acceptance rate.
     """
+    # Construct leaderboard from PR metadata
+    cache_dict = construct_leaderboard_from_metadata()
+    if not cache_dict:
         # Return empty DataFrame with correct columns if no data
         column_names = [col[0] for col in LEADERBOARD_COLUMNS]
         return pd.DataFrame(columns=column_names)
     rows = []
+    for identifier, data in cache_dict.items():
         # Filter out agents with zero total PRs
         if data.get('total_prs', 0) > 0:
             # Only include display-relevant fields
             rows.append([
                 data.get('agent_name', 'Unknown'),
+                data.get('website', 'Unknown'),
                 data.get('total_prs', 0),
                 data.get('merged', 0),
                 data.get('acceptance_rate', 0.0),
             # Save metadata to HuggingFace
             save_pr_metadata_to_hf(metadata_list, identifier)
         return f"✅ Successfully submitted {agent_name}!", get_leaderboard_dataframe(), create_monthly_metrics_plot()
     except Exception as e:
         print(f"   PRs updated (closed/merged): {total_updated}")
         print(f"{'='*80}")
         print(f"\n✅ Daily update completed at {datetime.now(timezone.utc).isoformat()}")
     except Exception as e:
             leaderboard_table = Leaderboard(
                 value=get_leaderboard_dataframe(),
                 datatype=LEADERBOARD_COLUMNS,
+                search_columns=["Agent Name", "Website"],
                 filter_columns=["Acceptance Rate (%)"]
             )

msr.py CHANGED Viewed

@@ -64,11 +64,9 @@ else:
 # Constants (match app.py)
 # =============================================================================
-DEBUG_LEADERBOARD_CACHE = {}
 DEBUG_PR_METADATA_CACHE = defaultdict(list)
 AGENTS_REPO = "SWE-Arena/swe_agents"
-LEADERBOARD_REPO = "SWE-Arena/pr_leaderboard"
 PR_METADATA_REPO = "SWE-Arena/pr_metadata"
@@ -622,45 +620,6 @@ def get_already_mined_dates(agent_identifier, n_months=6):
         return set()
-def save_leaderboard_to_hf(cache_dict):
-    if DEBUG_MODE:
-        global DEBUG_LEADERBOARD_CACHE
-        # Filter out agents with zero total PRs
-        filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_prs', 0) > 0}
-        DEBUG_LEADERBOARD_CACHE = filtered_cache_dict.copy()
-        data_list = dict_to_cache(filtered_cache_dict)
-        print(f"🐛 DEBUG MODE: Saved to in-memory cache only ({len(data_list)} entries) - NOT saved to HuggingFace")
-        return True
-    try:
-        token = get_hf_token()
-        if not token:
-            raise Exception("No HuggingFace token found. Please set HF_TOKEN in your environment.")
-        # Filter out agents with zero total PRs
-        filtered_cache_dict = {k: v for k, v in cache_dict.items() if v.get('total_prs', 0) > 0}
-        data_list = dict_to_cache(filtered_cache_dict)
-        df = pd.DataFrame(data_list)
-        year = datetime.now().year
-        filename = f"{year}.csv"
-        df.to_csv(filename, index=False)
-        api = HfApi()
-        try:
-            upload_with_retry(
-                api=api,
-                path_or_fileobj=filename,
-                path_in_repo=filename,
-                repo_id=LEADERBOARD_REPO,
-                repo_type="dataset",
-                token=token
-            )
-            print(f"✓ Saved leaderboard to HuggingFace as {filename} ({len(data_list)} entries)")
-            return True
-        finally:
-            # Always clean up local file, even if upload fails
-            if os.path.exists(filename):
-                os.remove(filename)
-    except Exception as e:
-        print(f"✗ Error saving leaderboard: {str(e)}")
-        return False
 def calculate_pr_stats_from_metadata(metadata_list):
@@ -745,7 +704,7 @@ def update_all_agents_incremental():
             stats = calculate_pr_stats_from_metadata(agent_metadata)
             cache_dict[identifier] = {
                 'agent_name': agent_name,
-                'organization': agent.get('organization', 'Unknown'),
                 'github_identifier': identifier,
                 **stats
             }
@@ -761,8 +720,6 @@ def update_all_agents_incremental():
 def run_once():
     print("\n🚀 Immediate mining run started")
     cache_dict = update_all_agents_incremental()
-    if cache_dict:
-        save_leaderboard_to_hf(cache_dict)
     print("✅ Immediate mining run completed\n")

 # Constants (match app.py)
 # =============================================================================
 DEBUG_PR_METADATA_CACHE = defaultdict(list)
 AGENTS_REPO = "SWE-Arena/swe_agents"
 PR_METADATA_REPO = "SWE-Arena/pr_metadata"
         return set()
 def calculate_pr_stats_from_metadata(metadata_list):
             stats = calculate_pr_stats_from_metadata(agent_metadata)
             cache_dict[identifier] = {
                 'agent_name': agent_name,
+                'website': agent.get('website', 'Unknown'),
                 'github_identifier': identifier,
                 **stats
             }
 def run_once():
     print("\n🚀 Immediate mining run started")
     cache_dict = update_all_agents_incremental()
     print("✅ Immediate mining run completed\n")