zhimin-z
commited on
Commit
·
445e161
1
Parent(s):
ed4fe09
refine
Browse files
msr.py
CHANGED
|
@@ -132,8 +132,21 @@ def download_file(url):
|
|
| 132 |
with open(filepath, "wb") as f:
|
| 133 |
f.write(response.content)
|
| 134 |
return True
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
except Exception as e:
|
|
|
|
| 137 |
wait_time = DOWNLOAD_RETRY_DELAY * (2 ** attempt)
|
| 138 |
print(f" ⚠ {filename}: {e}, retrying in {wait_time}s (attempt {attempt + 1}/{MAX_RETRIES})")
|
| 139 |
time.sleep(wait_time)
|
|
|
|
| 132 |
with open(filepath, "wb") as f:
|
| 133 |
f.write(response.content)
|
| 134 |
return True
|
| 135 |
+
|
| 136 |
+
except requests.exceptions.HTTPError as e:
|
| 137 |
+
# 404 means the file doesn't exist in GHArchive - skip without retry
|
| 138 |
+
if e.response.status_code == 404:
|
| 139 |
+
if attempt == 0: # Only log once, not for each retry
|
| 140 |
+
print(f" ○ {filename}: Not available (404) - skipping")
|
| 141 |
+
return False
|
| 142 |
+
|
| 143 |
+
# Other HTTP errors (5xx, etc.) should be retried
|
| 144 |
+
wait_time = DOWNLOAD_RETRY_DELAY * (2 ** attempt)
|
| 145 |
+
print(f" ⚠ {filename}: {e}, retrying in {wait_time}s (attempt {attempt + 1}/{MAX_RETRIES})")
|
| 146 |
+
time.sleep(wait_time)
|
| 147 |
+
|
| 148 |
except Exception as e:
|
| 149 |
+
# Network errors, timeouts, etc. should be retried
|
| 150 |
wait_time = DOWNLOAD_RETRY_DELAY * (2 ** attempt)
|
| 151 |
print(f" ⚠ {filename}: {e}, retrying in {wait_time}s (attempt {attempt + 1}/{MAX_RETRIES})")
|
| 152 |
time.sleep(wait_time)
|