add
Browse files
app.py
CHANGED
|
@@ -105,6 +105,61 @@ def parse_date_string(date_string):
|
|
| 105 |
# BIGQUERY FUNCTIONS
|
| 106 |
# =============================================================================
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
def get_bigquery_client():
|
| 109 |
"""
|
| 110 |
Initialize BigQuery client using credentials from environment variable.
|
|
@@ -161,7 +216,10 @@ def generate_table_union_statements(start_date, end_date):
|
|
| 161 |
|
| 162 |
def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date):
|
| 163 |
"""
|
| 164 |
-
Fetch PR metadata for
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
This query fetches PRs authored by agents (user.login matches identifier).
|
| 167 |
|
|
@@ -174,7 +232,7 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
|
|
| 174 |
Returns:
|
| 175 |
Dictionary mapping agent identifier to list of PR metadata
|
| 176 |
"""
|
| 177 |
-
print(f"
|
| 178 |
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
| 179 |
|
| 180 |
# Generate table UNION statements for the time range
|
|
@@ -228,14 +286,14 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
|
|
| 228 |
ORDER BY created_at DESC
|
| 229 |
"""
|
| 230 |
|
| 231 |
-
print(f"
|
| 232 |
-
print(f"
|
| 233 |
|
| 234 |
try:
|
| 235 |
query_job = client.query(query)
|
| 236 |
results = list(query_job.result())
|
| 237 |
|
| 238 |
-
print(f" ✓ Found {len(results)}
|
| 239 |
|
| 240 |
# Group results by agent
|
| 241 |
metadata_by_agent = defaultdict(list)
|
|
@@ -266,8 +324,8 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
|
|
| 266 |
if pr_author and pr_author in identifiers:
|
| 267 |
metadata_by_agent[pr_author].append(pr_data)
|
| 268 |
|
| 269 |
-
# Print breakdown by agent
|
| 270 |
-
print(f"
|
| 271 |
for identifier in identifiers:
|
| 272 |
count = len(metadata_by_agent.get(identifier, []))
|
| 273 |
if count > 0:
|
|
@@ -989,7 +1047,7 @@ def mine_all_agents():
|
|
| 989 |
print(f"\n{'='*80}")
|
| 990 |
print(f"Starting PR metadata mining for {len(identifiers)} agents")
|
| 991 |
print(f"Time frame: Last {UPDATE_TIME_FRAME_DAYS} days")
|
| 992 |
-
print(f"Data source: BigQuery + GitHub Archive (
|
| 993 |
print(f"{'='*80}\n")
|
| 994 |
|
| 995 |
# Initialize BigQuery client
|
|
@@ -1005,8 +1063,9 @@ def mine_all_agents():
|
|
| 1005 |
start_date = end_date - timedelta(days=UPDATE_TIME_FRAME_DAYS)
|
| 1006 |
|
| 1007 |
try:
|
| 1008 |
-
|
| 1009 |
-
|
|
|
|
| 1010 |
)
|
| 1011 |
except Exception as e:
|
| 1012 |
print(f"✗ Error during BigQuery fetch: {str(e)}")
|
|
@@ -1054,13 +1113,17 @@ def mine_all_agents():
|
|
| 1054 |
error_count += 1
|
| 1055 |
continue
|
| 1056 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1057 |
print(f"\n{'='*80}")
|
| 1058 |
print(f"✅ Mining complete!")
|
| 1059 |
print(f" Total agents: {len(agents)}")
|
| 1060 |
print(f" Successfully saved: {success_count}")
|
| 1061 |
print(f" No data (skipped): {no_data_count}")
|
| 1062 |
print(f" Errors: {error_count}")
|
| 1063 |
-
print(f" BigQuery
|
| 1064 |
print(f"{'='*80}\n")
|
| 1065 |
|
| 1066 |
# After mining is complete, save leaderboard and metrics to HuggingFace
|
|
|
|
| 105 |
# BIGQUERY FUNCTIONS
|
| 106 |
# =============================================================================
|
| 107 |
|
| 108 |
+
def fetch_issue_metadata_batched(client, identifiers, start_date, end_date, batch_size=100):
|
| 109 |
+
"""
|
| 110 |
+
Fetch issue metadata for ALL agents using BATCHED BigQuery queries.
|
| 111 |
+
Splits agents into smaller batches to avoid performance issues with large numbers of agents.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
client: BigQuery client instance
|
| 115 |
+
identifiers: List of GitHub usernames/bot identifiers
|
| 116 |
+
start_date: Start datetime (timezone-aware)
|
| 117 |
+
end_date: End datetime (timezone-aware)
|
| 118 |
+
batch_size: Number of agents to process per batch (default: 100)
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
Dictionary mapping agent identifier to list of issue metadata
|
| 122 |
+
"""
|
| 123 |
+
# Split identifiers into batches
|
| 124 |
+
batches = [identifiers[i:i + batch_size] for i in range(0, len(identifiers), batch_size)]
|
| 125 |
+
total_batches = len(batches)
|
| 126 |
+
|
| 127 |
+
print(f"\n🔍 Using BATCHED approach for {len(identifiers)} agents")
|
| 128 |
+
print(f" Total batches: {total_batches} (batch size: {batch_size})")
|
| 129 |
+
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
| 130 |
+
|
| 131 |
+
# Collect results from all batches
|
| 132 |
+
all_metadata = {}
|
| 133 |
+
|
| 134 |
+
for batch_num, batch_identifiers in enumerate(batches, 1):
|
| 135 |
+
print(f"\n📦 Processing batch {batch_num}/{total_batches} ({len(batch_identifiers)} agents)...")
|
| 136 |
+
|
| 137 |
+
try:
|
| 138 |
+
# Query each batch
|
| 139 |
+
batch_results = fetch_all_pr_metadata_single_query(
|
| 140 |
+
client, batch_identifiers, start_date, end_date
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
# Merge results
|
| 144 |
+
for identifier, metadata_list in batch_results.items():
|
| 145 |
+
if identifier in all_metadata:
|
| 146 |
+
all_metadata[identifier].extend(metadata_list)
|
| 147 |
+
else:
|
| 148 |
+
all_metadata[identifier] = metadata_list
|
| 149 |
+
|
| 150 |
+
print(f" ✓ Batch {batch_num}/{total_batches} complete")
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f" ✗ Batch {batch_num}/{total_batches} failed: {str(e)}")
|
| 154 |
+
print(f" Continuing with remaining batches...")
|
| 155 |
+
continue
|
| 156 |
+
|
| 157 |
+
total_prs = sum(len(metadata_list) for metadata_list in all_metadata.values())
|
| 158 |
+
print(f"\n✓ All batches complete! Found {total_prs} total PRs across {len(all_metadata)} agents")
|
| 159 |
+
|
| 160 |
+
return all_metadata
|
| 161 |
+
|
| 162 |
+
|
| 163 |
def get_bigquery_client():
|
| 164 |
"""
|
| 165 |
Initialize BigQuery client using credentials from environment variable.
|
|
|
|
| 216 |
|
| 217 |
def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date):
|
| 218 |
"""
|
| 219 |
+
Fetch PR metadata for a BATCH of agents using ONE comprehensive BigQuery query.
|
| 220 |
+
|
| 221 |
+
NOTE: This function is designed for smaller batches (~100 agents).
|
| 222 |
+
For large numbers of agents, use fetch_issue_metadata_batched() instead.
|
| 223 |
|
| 224 |
This query fetches PRs authored by agents (user.login matches identifier).
|
| 225 |
|
|
|
|
| 232 |
Returns:
|
| 233 |
Dictionary mapping agent identifier to list of PR metadata
|
| 234 |
"""
|
| 235 |
+
print(f" Querying BigQuery for {len(identifiers)} agents in this batch...")
|
| 236 |
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
| 237 |
|
| 238 |
# Generate table UNION statements for the time range
|
|
|
|
| 286 |
ORDER BY created_at DESC
|
| 287 |
"""
|
| 288 |
|
| 289 |
+
print(f" Scanning {(end_date - start_date).days} days of GitHub Archive data...")
|
| 290 |
+
print(f" Batch agents: {', '.join(identifiers[:5])}{'...' if len(identifiers) > 5 else ''}")
|
| 291 |
|
| 292 |
try:
|
| 293 |
query_job = client.query(query)
|
| 294 |
results = list(query_job.result())
|
| 295 |
|
| 296 |
+
print(f" ✓ Found {len(results)} PRs in this batch")
|
| 297 |
|
| 298 |
# Group results by agent
|
| 299 |
metadata_by_agent = defaultdict(list)
|
|
|
|
| 324 |
if pr_author and pr_author in identifiers:
|
| 325 |
metadata_by_agent[pr_author].append(pr_data)
|
| 326 |
|
| 327 |
+
# Print breakdown by agent (only show agents with PRs)
|
| 328 |
+
print(f" 📊 Batch breakdown:")
|
| 329 |
for identifier in identifiers:
|
| 330 |
count = len(metadata_by_agent.get(identifier, []))
|
| 331 |
if count > 0:
|
|
|
|
| 1047 |
print(f"\n{'='*80}")
|
| 1048 |
print(f"Starting PR metadata mining for {len(identifiers)} agents")
|
| 1049 |
print(f"Time frame: Last {UPDATE_TIME_FRAME_DAYS} days")
|
| 1050 |
+
print(f"Data source: BigQuery + GitHub Archive (BATCHED QUERIES)")
|
| 1051 |
print(f"{'='*80}\n")
|
| 1052 |
|
| 1053 |
# Initialize BigQuery client
|
|
|
|
| 1063 |
start_date = end_date - timedelta(days=UPDATE_TIME_FRAME_DAYS)
|
| 1064 |
|
| 1065 |
try:
|
| 1066 |
+
# Use batched approach for better performance
|
| 1067 |
+
all_metadata = fetch_issue_metadata_batched(
|
| 1068 |
+
client, identifiers, start_date, end_date, batch_size=100
|
| 1069 |
)
|
| 1070 |
except Exception as e:
|
| 1071 |
print(f"✗ Error during BigQuery fetch: {str(e)}")
|
|
|
|
| 1113 |
error_count += 1
|
| 1114 |
continue
|
| 1115 |
|
| 1116 |
+
# Calculate number of batches
|
| 1117 |
+
batch_size = 100
|
| 1118 |
+
total_batches = (len(identifiers) + batch_size - 1) // batch_size
|
| 1119 |
+
|
| 1120 |
print(f"\n{'='*80}")
|
| 1121 |
print(f"✅ Mining complete!")
|
| 1122 |
print(f" Total agents: {len(agents)}")
|
| 1123 |
print(f" Successfully saved: {success_count}")
|
| 1124 |
print(f" No data (skipped): {no_data_count}")
|
| 1125 |
print(f" Errors: {error_count}")
|
| 1126 |
+
print(f" BigQuery batches executed: {total_batches} (batch size: {batch_size})")
|
| 1127 |
print(f"{'='*80}\n")
|
| 1128 |
|
| 1129 |
# After mining is complete, save leaderboard and metrics to HuggingFace
|
msr.py
CHANGED
|
@@ -118,13 +118,70 @@ def get_hf_token():
|
|
| 118 |
# BIGQUERY FUNCTIONS
|
| 119 |
# =============================================================================
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date):
|
| 122 |
"""
|
| 123 |
-
Fetch PR metadata for
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
This query fetches:
|
| 126 |
1. PRs authored by agents (user.login matches identifier)
|
| 127 |
-
2. PRs from branches starting with agent identifier (head.ref pattern)
|
| 128 |
|
| 129 |
Args:
|
| 130 |
client: BigQuery client instance
|
|
@@ -147,7 +204,7 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
|
|
| 147 |
...
|
| 148 |
}
|
| 149 |
"""
|
| 150 |
-
print(f"
|
| 151 |
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
| 152 |
|
| 153 |
# Generate table UNION statements for the time range
|
|
@@ -201,14 +258,14 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
|
|
| 201 |
ORDER BY created_at DESC
|
| 202 |
"""
|
| 203 |
|
| 204 |
-
print(f"
|
| 205 |
-
print(f"
|
| 206 |
|
| 207 |
try:
|
| 208 |
query_job = client.query(query)
|
| 209 |
results = list(query_job.result())
|
| 210 |
|
| 211 |
-
print(f" ✓ Found {len(results)}
|
| 212 |
|
| 213 |
# Group results by agent
|
| 214 |
metadata_by_agent = defaultdict(list)
|
|
@@ -239,8 +296,8 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
|
|
| 239 |
if pr_author and pr_author in identifiers:
|
| 240 |
metadata_by_agent[pr_author].append(pr_data)
|
| 241 |
|
| 242 |
-
# Print breakdown by agent
|
| 243 |
-
print(f"
|
| 244 |
for identifier in identifiers:
|
| 245 |
count = len(metadata_by_agent.get(identifier, []))
|
| 246 |
if count > 0:
|
|
@@ -726,7 +783,7 @@ def mine_all_agents():
|
|
| 726 |
print(f"\n{'='*80}")
|
| 727 |
print(f"Starting PR metadata mining for {len(identifiers)} agents")
|
| 728 |
print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
|
| 729 |
-
print(f"Data source: BigQuery + GitHub Archive (
|
| 730 |
print(f"{'='*80}\n")
|
| 731 |
|
| 732 |
# Initialize BigQuery client
|
|
@@ -742,8 +799,9 @@ def mine_all_agents():
|
|
| 742 |
start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
|
| 743 |
|
| 744 |
try:
|
| 745 |
-
|
| 746 |
-
|
|
|
|
| 747 |
)
|
| 748 |
except Exception as e:
|
| 749 |
print(f"✗ Error during BigQuery fetch: {str(e)}")
|
|
@@ -791,13 +849,17 @@ def mine_all_agents():
|
|
| 791 |
error_count += 1
|
| 792 |
continue
|
| 793 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 794 |
print(f"\n{'='*80}")
|
| 795 |
print(f"✅ Mining complete!")
|
| 796 |
print(f" Total agents: {len(agents)}")
|
| 797 |
print(f" Successfully saved: {success_count}")
|
| 798 |
print(f" No data (skipped): {no_data_count}")
|
| 799 |
print(f" Errors: {error_count}")
|
| 800 |
-
print(f" BigQuery
|
| 801 |
print(f"{'='*80}\n")
|
| 802 |
|
| 803 |
# Compute and save leaderboard data
|
|
|
|
| 118 |
# BIGQUERY FUNCTIONS
|
| 119 |
# =============================================================================
|
| 120 |
|
| 121 |
+
def fetch_issue_metadata_batched(client, identifiers, start_date, end_date, batch_size=100):
|
| 122 |
+
"""
|
| 123 |
+
Fetch issue metadata for ALL agents using BATCHED BigQuery queries.
|
| 124 |
+
Splits agents into smaller batches to avoid performance issues with large numbers of agents.
|
| 125 |
+
|
| 126 |
+
Args:
|
| 127 |
+
client: BigQuery client instance
|
| 128 |
+
identifiers: List of GitHub usernames/bot identifiers
|
| 129 |
+
start_date: Start datetime (timezone-aware)
|
| 130 |
+
end_date: End datetime (timezone-aware)
|
| 131 |
+
batch_size: Number of agents to process per batch (default: 100)
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
Dictionary mapping agent identifier to list of issue metadata
|
| 135 |
+
"""
|
| 136 |
+
# Split identifiers into batches
|
| 137 |
+
batches = [identifiers[i:i + batch_size] for i in range(0, len(identifiers), batch_size)]
|
| 138 |
+
total_batches = len(batches)
|
| 139 |
+
|
| 140 |
+
print(f"\n🔍 Using BATCHED approach for {len(identifiers)} agents")
|
| 141 |
+
print(f" Total batches: {total_batches} (batch size: {batch_size})")
|
| 142 |
+
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
| 143 |
+
|
| 144 |
+
# Collect results from all batches
|
| 145 |
+
all_metadata = {}
|
| 146 |
+
|
| 147 |
+
for batch_num, batch_identifiers in enumerate(batches, 1):
|
| 148 |
+
print(f"\n📦 Processing batch {batch_num}/{total_batches} ({len(batch_identifiers)} agents)...")
|
| 149 |
+
|
| 150 |
+
try:
|
| 151 |
+
# Query each batch
|
| 152 |
+
batch_results = fetch_all_pr_metadata_single_query(
|
| 153 |
+
client, batch_identifiers, start_date, end_date
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
# Merge results
|
| 157 |
+
for identifier, metadata_list in batch_results.items():
|
| 158 |
+
if identifier in all_metadata:
|
| 159 |
+
all_metadata[identifier].extend(metadata_list)
|
| 160 |
+
else:
|
| 161 |
+
all_metadata[identifier] = metadata_list
|
| 162 |
+
|
| 163 |
+
print(f" ✓ Batch {batch_num}/{total_batches} complete")
|
| 164 |
+
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f" ✗ Batch {batch_num}/{total_batches} failed: {str(e)}")
|
| 167 |
+
print(f" Continuing with remaining batches...")
|
| 168 |
+
continue
|
| 169 |
+
|
| 170 |
+
total_prs = sum(len(metadata_list) for metadata_list in all_metadata.values())
|
| 171 |
+
print(f"\n✓ All batches complete! Found {total_prs} total PRs across {len(all_metadata)} agents")
|
| 172 |
+
|
| 173 |
+
return all_metadata
|
| 174 |
+
|
| 175 |
+
|
| 176 |
def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date):
|
| 177 |
"""
|
| 178 |
+
Fetch PR metadata for a BATCH of agents using ONE comprehensive BigQuery query.
|
| 179 |
+
|
| 180 |
+
NOTE: This function is designed for smaller batches (~100 agents).
|
| 181 |
+
For large numbers of agents, use fetch_issue_metadata_batched() instead.
|
| 182 |
|
| 183 |
This query fetches:
|
| 184 |
1. PRs authored by agents (user.login matches identifier)
|
|
|
|
| 185 |
|
| 186 |
Args:
|
| 187 |
client: BigQuery client instance
|
|
|
|
| 204 |
...
|
| 205 |
}
|
| 206 |
"""
|
| 207 |
+
print(f" Querying BigQuery for {len(identifiers)} agents in this batch...")
|
| 208 |
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
| 209 |
|
| 210 |
# Generate table UNION statements for the time range
|
|
|
|
| 258 |
ORDER BY created_at DESC
|
| 259 |
"""
|
| 260 |
|
| 261 |
+
print(f" Scanning {(end_date - start_date).days} days of GitHub Archive data...")
|
| 262 |
+
print(f" Batch agents: {', '.join(identifiers[:5])}{'...' if len(identifiers) > 5 else ''}")
|
| 263 |
|
| 264 |
try:
|
| 265 |
query_job = client.query(query)
|
| 266 |
results = list(query_job.result())
|
| 267 |
|
| 268 |
+
print(f" ✓ Found {len(results)} PRs in this batch")
|
| 269 |
|
| 270 |
# Group results by agent
|
| 271 |
metadata_by_agent = defaultdict(list)
|
|
|
|
| 296 |
if pr_author and pr_author in identifiers:
|
| 297 |
metadata_by_agent[pr_author].append(pr_data)
|
| 298 |
|
| 299 |
+
# Print breakdown by agent (only show agents with PRs)
|
| 300 |
+
print(f" 📊 Batch breakdown:")
|
| 301 |
for identifier in identifiers:
|
| 302 |
count = len(metadata_by_agent.get(identifier, []))
|
| 303 |
if count > 0:
|
|
|
|
| 783 |
print(f"\n{'='*80}")
|
| 784 |
print(f"Starting PR metadata mining for {len(identifiers)} agents")
|
| 785 |
print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
|
| 786 |
+
print(f"Data source: BigQuery + GitHub Archive (BATCHED QUERIES)")
|
| 787 |
print(f"{'='*80}\n")
|
| 788 |
|
| 789 |
# Initialize BigQuery client
|
|
|
|
| 799 |
start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
|
| 800 |
|
| 801 |
try:
|
| 802 |
+
# Use batched approach for better performance
|
| 803 |
+
all_metadata = fetch_issue_metadata_batched(
|
| 804 |
+
client, identifiers, start_date, end_date, batch_size=100
|
| 805 |
)
|
| 806 |
except Exception as e:
|
| 807 |
print(f"✗ Error during BigQuery fetch: {str(e)}")
|
|
|
|
| 849 |
error_count += 1
|
| 850 |
continue
|
| 851 |
|
| 852 |
+
# Calculate number of batches
|
| 853 |
+
batch_size = 100
|
| 854 |
+
total_batches = (len(identifiers) + batch_size - 1) // batch_size
|
| 855 |
+
|
| 856 |
print(f"\n{'='*80}")
|
| 857 |
print(f"✅ Mining complete!")
|
| 858 |
print(f" Total agents: {len(agents)}")
|
| 859 |
print(f" Successfully saved: {success_count}")
|
| 860 |
print(f" No data (skipped): {no_data_count}")
|
| 861 |
print(f" Errors: {error_count}")
|
| 862 |
+
print(f" BigQuery batches executed: {total_batches} (batch size: {batch_size})")
|
| 863 |
print(f"{'='*80}\n")
|
| 864 |
|
| 865 |
# Compute and save leaderboard data
|