zhimin-z commited on
Commit
fc7d979
·
1 Parent(s): 3f8cff7
Files changed (2) hide show
  1. msr.py +0 -2
  2. requirements.txt +1 -1
msr.py CHANGED
@@ -209,7 +209,6 @@ def get_duckdb_connection():
209
  conn.execute("SET memory_limit = '400GB';") # Utilize available RAM (709GB available)
210
  conn.execute("SET preserve_insertion_order = false;") # Better parallelization
211
  conn.execute("SET enable_object_cache = true;") # Cache objects for reuse
212
- conn.execute("SET force_parallelism = true;") # Force parallel execution where possible
213
  conn.execute("SET temp_directory = '/tmp/duckdb_temp';") # Use fast temp storage if needed
214
 
215
  return conn
@@ -1047,7 +1046,6 @@ def mine_all_agents():
1047
  print(f"{'='*80}")
1048
  print(f"Starting review metadata mining for {len(identifiers)} agents")
1049
  print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
1050
- print(f"Data source: DuckDB + Local GHArchive (SINGLE QUERY)")
1051
  print(f"{'='*80}")
1052
 
1053
  # Initialize DuckDB connection
 
209
  conn.execute("SET memory_limit = '400GB';") # Utilize available RAM (709GB available)
210
  conn.execute("SET preserve_insertion_order = false;") # Better parallelization
211
  conn.execute("SET enable_object_cache = true;") # Cache objects for reuse
 
212
  conn.execute("SET temp_directory = '/tmp/duckdb_temp';") # Use fast temp storage if needed
213
 
214
  return conn
 
1046
  print(f"{'='*80}")
1047
  print(f"Starting review metadata mining for {len(identifiers)} agents")
1048
  print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
 
1049
  print(f"{'='*80}")
1050
 
1051
  # Initialize DuckDB connection
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  APScheduler
2
  backoff
3
- duckdb
4
  gradio
5
  gradio_leaderboard
6
  huggingface_hub
 
1
  APScheduler
2
  backoff
3
+ duckdb[all]
4
  gradio
5
  gradio_leaderboard
6
  huggingface_hub