zhiminy commited on
Commit
97bc318
·
1 Parent(s): 161efc6

upload_large_folder

Browse files
Files changed (2) hide show
  1. app.py +19 -6
  2. msr.py +19 -6
app.py CHANGED
@@ -132,9 +132,6 @@ def backoff_handler(details):
132
  jitter=backoff.full_jitter,
133
  on_backoff=backoff_handler
134
  )
135
- def upload_large_folder_with_backoff(api, **kwargs):
136
- """Wrapper for HfApi.upload_large_folder with exponential backoff on rate limits."""
137
- return api.upload_large_folder(**kwargs)
138
 
139
 
140
  @backoff.on_exception(
@@ -167,6 +164,21 @@ def hf_hub_download_with_backoff(**kwargs):
167
  return hf_hub_download(**kwargs)
168
 
169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  @backoff.on_exception(
171
  backoff.expo,
172
  HfHubHTTPError,
@@ -719,13 +731,14 @@ def save_pr_metadata_to_hf(metadata_list, agent_identifier):
719
  save_jsonl(local_filename, day_metadata)
720
  print(f" Prepared {len(day_metadata)} PRs for {filename}")
721
 
722
- # Upload entire folder using upload_large_folder (optimized for large files)
723
  print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
724
- upload_large_folder_with_backoff(
725
  api,
726
  folder_path=temp_dir,
727
  repo_id=PR_METADATA_REPO,
728
- repo_type="dataset"
 
729
  )
730
  print(f" ✓ Batch upload complete for {agent_identifier}")
731
 
 
132
  jitter=backoff.full_jitter,
133
  on_backoff=backoff_handler
134
  )
 
 
 
135
 
136
 
137
  @backoff.on_exception(
 
164
  return hf_hub_download(**kwargs)
165
 
166
 
167
+ @backoff.on_exception(
168
+ backoff.expo,
169
+ HfHubHTTPError,
170
+ giveup=lambda e: not is_rate_limit_error(e),
171
+ max_tries=8,
172
+ base=300, # Start at 5 minutes (300 seconds)
173
+ max_value=3600, # Cap at 60 minutes (3600 seconds)
174
+ jitter=backoff.full_jitter,
175
+ on_backoff=backoff_handler
176
+ )
177
+ def upload_folder_with_backoff(api, **kwargs):
178
+ """Wrapper for HfApi.upload_folder with exponential backoff on rate limits."""
179
+ return api.upload_folder(**kwargs)
180
+
181
+
182
  @backoff.on_exception(
183
  backoff.expo,
184
  HfHubHTTPError,
 
731
  save_jsonl(local_filename, day_metadata)
732
  print(f" Prepared {len(day_metadata)} PRs for {filename}")
733
 
734
+ # Upload entire folder using upload_folder (single commit per agent)
735
  print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
736
+ upload_folder_with_backoff(
737
  api,
738
  folder_path=temp_dir,
739
  repo_id=PR_METADATA_REPO,
740
+ repo_type="dataset",
741
+ commit_message=f"Update PR metadata for {agent_identifier}"
742
  )
743
  print(f" ✓ Batch upload complete for {agent_identifier}")
744
 
msr.py CHANGED
@@ -153,9 +153,6 @@ def backoff_handler(details):
153
  jitter=backoff.full_jitter,
154
  on_backoff=backoff_handler
155
  )
156
- def upload_large_folder_with_backoff(api, **kwargs):
157
- """Wrapper for HfApi.upload_large_folder with exponential backoff on rate limits."""
158
- return api.upload_large_folder(**kwargs)
159
 
160
 
161
  @backoff.on_exception(
@@ -188,6 +185,21 @@ def hf_hub_download_with_backoff(**kwargs):
188
  return hf_hub_download(**kwargs)
189
 
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  @backoff.on_exception(
192
  backoff.expo,
193
  HfHubHTTPError,
@@ -501,13 +513,14 @@ def save_pr_metadata_to_hf(metadata_list, agent_identifier):
501
  save_jsonl(local_filename, day_metadata)
502
  print(f" Prepared {len(day_metadata)} PRs for {filename}")
503
 
504
- # Upload entire folder using upload_large_folder (optimized for large files)
505
  print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
506
- upload_large_folder_with_backoff(
507
  api,
508
  folder_path=temp_dir,
509
  repo_id=PR_METADATA_REPO,
510
- repo_type="dataset"
 
511
  )
512
  print(f" ✓ Batch upload complete for {agent_identifier}")
513
 
 
153
  jitter=backoff.full_jitter,
154
  on_backoff=backoff_handler
155
  )
 
 
 
156
 
157
 
158
  @backoff.on_exception(
 
185
  return hf_hub_download(**kwargs)
186
 
187
 
188
+ @backoff.on_exception(
189
+ backoff.expo,
190
+ HfHubHTTPError,
191
+ giveup=lambda e: not is_rate_limit_error(e),
192
+ max_tries=8,
193
+ base=300, # Start at 5 minutes (300 seconds)
194
+ max_value=3600, # Cap at 60 minutes (3600 seconds)
195
+ jitter=backoff.full_jitter,
196
+ on_backoff=backoff_handler
197
+ )
198
+ def upload_folder_with_backoff(api, **kwargs):
199
+ """Wrapper for HfApi.upload_folder with exponential backoff on rate limits."""
200
+ return api.upload_folder(**kwargs)
201
+
202
+
203
  @backoff.on_exception(
204
  backoff.expo,
205
  HfHubHTTPError,
 
513
  save_jsonl(local_filename, day_metadata)
514
  print(f" Prepared {len(day_metadata)} PRs for {filename}")
515
 
516
+ # Upload entire folder using upload_folder (single commit per agent)
517
  print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
518
+ upload_folder_with_backoff(
519
  api,
520
  folder_path=temp_dir,
521
  repo_id=PR_METADATA_REPO,
522
+ repo_type="dataset",
523
+ commit_message=f"Update PR metadata for {agent_identifier}"
524
  )
525
  print(f" ✓ Batch upload complete for {agent_identifier}")
526