upload_large_folder
Browse files
app.py
CHANGED
|
@@ -132,9 +132,6 @@ def backoff_handler(details):
|
|
| 132 |
jitter=backoff.full_jitter,
|
| 133 |
on_backoff=backoff_handler
|
| 134 |
)
|
| 135 |
-
def upload_large_folder_with_backoff(api, **kwargs):
|
| 136 |
-
"""Wrapper for HfApi.upload_large_folder with exponential backoff on rate limits."""
|
| 137 |
-
return api.upload_large_folder(**kwargs)
|
| 138 |
|
| 139 |
|
| 140 |
@backoff.on_exception(
|
|
@@ -167,6 +164,21 @@ def hf_hub_download_with_backoff(**kwargs):
|
|
| 167 |
return hf_hub_download(**kwargs)
|
| 168 |
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
@backoff.on_exception(
|
| 171 |
backoff.expo,
|
| 172 |
HfHubHTTPError,
|
|
@@ -719,13 +731,14 @@ def save_pr_metadata_to_hf(metadata_list, agent_identifier):
|
|
| 719 |
save_jsonl(local_filename, day_metadata)
|
| 720 |
print(f" Prepared {len(day_metadata)} PRs for {filename}")
|
| 721 |
|
| 722 |
-
# Upload entire folder using
|
| 723 |
print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
|
| 724 |
-
|
| 725 |
api,
|
| 726 |
folder_path=temp_dir,
|
| 727 |
repo_id=PR_METADATA_REPO,
|
| 728 |
-
repo_type="dataset"
|
|
|
|
| 729 |
)
|
| 730 |
print(f" ✓ Batch upload complete for {agent_identifier}")
|
| 731 |
|
|
|
|
| 132 |
jitter=backoff.full_jitter,
|
| 133 |
on_backoff=backoff_handler
|
| 134 |
)
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
@backoff.on_exception(
|
|
|
|
| 164 |
return hf_hub_download(**kwargs)
|
| 165 |
|
| 166 |
|
| 167 |
+
@backoff.on_exception(
|
| 168 |
+
backoff.expo,
|
| 169 |
+
HfHubHTTPError,
|
| 170 |
+
giveup=lambda e: not is_rate_limit_error(e),
|
| 171 |
+
max_tries=8,
|
| 172 |
+
base=300, # Start at 5 minutes (300 seconds)
|
| 173 |
+
max_value=3600, # Cap at 60 minutes (3600 seconds)
|
| 174 |
+
jitter=backoff.full_jitter,
|
| 175 |
+
on_backoff=backoff_handler
|
| 176 |
+
)
|
| 177 |
+
def upload_folder_with_backoff(api, **kwargs):
|
| 178 |
+
"""Wrapper for HfApi.upload_folder with exponential backoff on rate limits."""
|
| 179 |
+
return api.upload_folder(**kwargs)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
@backoff.on_exception(
|
| 183 |
backoff.expo,
|
| 184 |
HfHubHTTPError,
|
|
|
|
| 731 |
save_jsonl(local_filename, day_metadata)
|
| 732 |
print(f" Prepared {len(day_metadata)} PRs for {filename}")
|
| 733 |
|
| 734 |
+
# Upload entire folder using upload_folder (single commit per agent)
|
| 735 |
print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
|
| 736 |
+
upload_folder_with_backoff(
|
| 737 |
api,
|
| 738 |
folder_path=temp_dir,
|
| 739 |
repo_id=PR_METADATA_REPO,
|
| 740 |
+
repo_type="dataset",
|
| 741 |
+
commit_message=f"Update PR metadata for {agent_identifier}"
|
| 742 |
)
|
| 743 |
print(f" ✓ Batch upload complete for {agent_identifier}")
|
| 744 |
|
msr.py
CHANGED
|
@@ -153,9 +153,6 @@ def backoff_handler(details):
|
|
| 153 |
jitter=backoff.full_jitter,
|
| 154 |
on_backoff=backoff_handler
|
| 155 |
)
|
| 156 |
-
def upload_large_folder_with_backoff(api, **kwargs):
|
| 157 |
-
"""Wrapper for HfApi.upload_large_folder with exponential backoff on rate limits."""
|
| 158 |
-
return api.upload_large_folder(**kwargs)
|
| 159 |
|
| 160 |
|
| 161 |
@backoff.on_exception(
|
|
@@ -188,6 +185,21 @@ def hf_hub_download_with_backoff(**kwargs):
|
|
| 188 |
return hf_hub_download(**kwargs)
|
| 189 |
|
| 190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
@backoff.on_exception(
|
| 192 |
backoff.expo,
|
| 193 |
HfHubHTTPError,
|
|
@@ -501,13 +513,14 @@ def save_pr_metadata_to_hf(metadata_list, agent_identifier):
|
|
| 501 |
save_jsonl(local_filename, day_metadata)
|
| 502 |
print(f" Prepared {len(day_metadata)} PRs for {filename}")
|
| 503 |
|
| 504 |
-
# Upload entire folder using
|
| 505 |
print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
|
| 506 |
-
|
| 507 |
api,
|
| 508 |
folder_path=temp_dir,
|
| 509 |
repo_id=PR_METADATA_REPO,
|
| 510 |
-
repo_type="dataset"
|
|
|
|
| 511 |
)
|
| 512 |
print(f" ✓ Batch upload complete for {agent_identifier}")
|
| 513 |
|
|
|
|
| 153 |
jitter=backoff.full_jitter,
|
| 154 |
on_backoff=backoff_handler
|
| 155 |
)
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
|
| 158 |
@backoff.on_exception(
|
|
|
|
| 185 |
return hf_hub_download(**kwargs)
|
| 186 |
|
| 187 |
|
| 188 |
+
@backoff.on_exception(
|
| 189 |
+
backoff.expo,
|
| 190 |
+
HfHubHTTPError,
|
| 191 |
+
giveup=lambda e: not is_rate_limit_error(e),
|
| 192 |
+
max_tries=8,
|
| 193 |
+
base=300, # Start at 5 minutes (300 seconds)
|
| 194 |
+
max_value=3600, # Cap at 60 minutes (3600 seconds)
|
| 195 |
+
jitter=backoff.full_jitter,
|
| 196 |
+
on_backoff=backoff_handler
|
| 197 |
+
)
|
| 198 |
+
def upload_folder_with_backoff(api, **kwargs):
|
| 199 |
+
"""Wrapper for HfApi.upload_folder with exponential backoff on rate limits."""
|
| 200 |
+
return api.upload_folder(**kwargs)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
@backoff.on_exception(
|
| 204 |
backoff.expo,
|
| 205 |
HfHubHTTPError,
|
|
|
|
| 513 |
save_jsonl(local_filename, day_metadata)
|
| 514 |
print(f" Prepared {len(day_metadata)} PRs for {filename}")
|
| 515 |
|
| 516 |
+
# Upload entire folder using upload_folder (single commit per agent)
|
| 517 |
print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total PRs)...")
|
| 518 |
+
upload_folder_with_backoff(
|
| 519 |
api,
|
| 520 |
folder_path=temp_dir,
|
| 521 |
repo_id=PR_METADATA_REPO,
|
| 522 |
+
repo_type="dataset",
|
| 523 |
+
commit_message=f"Update PR metadata for {agent_identifier}"
|
| 524 |
)
|
| 525 |
print(f" ✓ Batch upload complete for {agent_identifier}")
|
| 526 |
|