Spaces:
Sleeping
Sleeping
upload_large_folder
Browse files
app.py
CHANGED
|
@@ -128,9 +128,6 @@ def is_rate_limit_error(e):
|
|
| 128 |
f"β³ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
|
| 129 |
)
|
| 130 |
)
|
| 131 |
-
def upload_large_folder_with_backoff(api, **kwargs):
|
| 132 |
-
"""Wrapper for api.upload_large_folder() with exponential backoff for rate limits."""
|
| 133 |
-
return api.upload_large_folder(**kwargs)
|
| 134 |
|
| 135 |
|
| 136 |
@backoff.on_exception(
|
|
@@ -181,6 +178,22 @@ def upload_file_with_backoff(api, **kwargs):
|
|
| 181 |
return api.upload_file(**kwargs)
|
| 182 |
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
# =============================================================================
|
| 185 |
# BIGQUERY FUNCTIONS
|
| 186 |
# =============================================================================
|
|
@@ -880,14 +893,14 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
|
|
| 880 |
save_jsonl(local_filename, merged_metadata)
|
| 881 |
print(f" Prepared {len(merged_metadata)} reviews for {filename}")
|
| 882 |
|
| 883 |
-
# Upload entire folder using
|
| 884 |
-
# Note: upload_large_folder creates multiple commits automatically and doesn't support custom commit_message
|
| 885 |
print(f"π€ Uploading {len(grouped)} files...")
|
| 886 |
-
|
| 887 |
api=api,
|
| 888 |
folder_path=temp_dir,
|
| 889 |
repo_id=REVIEW_METADATA_REPO,
|
| 890 |
-
repo_type="dataset"
|
|
|
|
| 891 |
)
|
| 892 |
print(f" β Batch upload complete for {agent_identifier}")
|
| 893 |
|
|
|
|
| 128 |
f"β³ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
|
| 129 |
)
|
| 130 |
)
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
|
| 133 |
@backoff.on_exception(
|
|
|
|
| 178 |
return api.upload_file(**kwargs)
|
| 179 |
|
| 180 |
|
| 181 |
+
@backoff.on_exception(
|
| 182 |
+
backoff.expo,
|
| 183 |
+
HfHubHTTPError,
|
| 184 |
+
max_tries=8,
|
| 185 |
+
base=300,
|
| 186 |
+
max_value=3600,
|
| 187 |
+
giveup=lambda e: not is_rate_limit_error(e),
|
| 188 |
+
on_backoff=lambda details: print(
|
| 189 |
+
f"β³ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
|
| 190 |
+
)
|
| 191 |
+
)
|
| 192 |
+
def upload_folder_with_backoff(api, **kwargs):
|
| 193 |
+
"""Wrapper for api.upload_folder() with exponential backoff for rate limits."""
|
| 194 |
+
return api.upload_folder(**kwargs)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
# =============================================================================
|
| 198 |
# BIGQUERY FUNCTIONS
|
| 199 |
# =============================================================================
|
|
|
|
| 893 |
save_jsonl(local_filename, merged_metadata)
|
| 894 |
print(f" Prepared {len(merged_metadata)} reviews for {filename}")
|
| 895 |
|
| 896 |
+
# Upload entire folder using upload_folder (single commit per agent)
|
|
|
|
| 897 |
print(f"π€ Uploading {len(grouped)} files...")
|
| 898 |
+
upload_folder_with_backoff(
|
| 899 |
api=api,
|
| 900 |
folder_path=temp_dir,
|
| 901 |
repo_id=REVIEW_METADATA_REPO,
|
| 902 |
+
repo_type="dataset",
|
| 903 |
+
commit_message=f"Update review metadata for {agent_identifier}"
|
| 904 |
)
|
| 905 |
print(f" β Batch upload complete for {agent_identifier}")
|
| 906 |
|
msr.py
CHANGED
|
@@ -119,10 +119,6 @@ def is_rate_limit_error(e):
|
|
| 119 |
f"β³ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
|
| 120 |
)
|
| 121 |
)
|
| 122 |
-
def upload_large_folder_with_backoff(api, **kwargs):
|
| 123 |
-
"""Wrapper for api.upload_large_folder() with exponential backoff for rate limits."""
|
| 124 |
-
return api.upload_large_folder(**kwargs)
|
| 125 |
-
|
| 126 |
|
| 127 |
@backoff.on_exception(
|
| 128 |
backoff.expo,
|
|
@@ -172,6 +168,22 @@ def upload_file_with_backoff(api, **kwargs):
|
|
| 172 |
return api.upload_file(**kwargs)
|
| 173 |
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
def get_bigquery_client():
|
| 176 |
"""
|
| 177 |
Initialize BigQuery client using credentials from environment variable.
|
|
@@ -597,14 +609,14 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
|
|
| 597 |
save_jsonl(local_filename, day_metadata)
|
| 598 |
print(f" Prepared {len(day_metadata)} reviews for {filename}")
|
| 599 |
|
| 600 |
-
# Upload entire folder using
|
| 601 |
-
# Note: upload_large_folder creates multiple commits automatically and doesn't support custom commit_message
|
| 602 |
print(f" π€ Uploading {len(grouped)} files ({len(metadata_list)} total reviews)...")
|
| 603 |
-
|
| 604 |
api=api,
|
| 605 |
folder_path=temp_dir,
|
| 606 |
repo_id=REVIEW_METADATA_REPO,
|
| 607 |
-
repo_type="dataset"
|
|
|
|
| 608 |
)
|
| 609 |
print(f" β Batch upload complete for {agent_identifier}")
|
| 610 |
|
|
|
|
| 119 |
f"β³ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
|
| 120 |
)
|
| 121 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
@backoff.on_exception(
|
| 124 |
backoff.expo,
|
|
|
|
| 168 |
return api.upload_file(**kwargs)
|
| 169 |
|
| 170 |
|
| 171 |
+
@backoff.on_exception(
|
| 172 |
+
backoff.expo,
|
| 173 |
+
HfHubHTTPError,
|
| 174 |
+
max_tries=8,
|
| 175 |
+
base=300,
|
| 176 |
+
max_value=3600,
|
| 177 |
+
giveup=lambda e: not is_rate_limit_error(e),
|
| 178 |
+
on_backoff=lambda details: print(
|
| 179 |
+
f"β³ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
|
| 180 |
+
)
|
| 181 |
+
)
|
| 182 |
+
def upload_folder_with_backoff(api, **kwargs):
|
| 183 |
+
"""Wrapper for api.upload_folder() with exponential backoff for rate limits."""
|
| 184 |
+
return api.upload_folder(**kwargs)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
def get_bigquery_client():
|
| 188 |
"""
|
| 189 |
Initialize BigQuery client using credentials from environment variable.
|
|
|
|
| 609 |
save_jsonl(local_filename, day_metadata)
|
| 610 |
print(f" Prepared {len(day_metadata)} reviews for {filename}")
|
| 611 |
|
| 612 |
+
# Upload entire folder using upload_folder (single commit per agent)
|
|
|
|
| 613 |
print(f" π€ Uploading {len(grouped)} files ({len(metadata_list)} total reviews)...")
|
| 614 |
+
upload_folder_with_backoff(
|
| 615 |
api=api,
|
| 616 |
folder_path=temp_dir,
|
| 617 |
repo_id=REVIEW_METADATA_REPO,
|
| 618 |
+
repo_type="dataset",
|
| 619 |
+
commit_message=f"Update review metadata for {agent_identifier}"
|
| 620 |
)
|
| 621 |
print(f" β Batch upload complete for {agent_identifier}")
|
| 622 |
|