zhimin-z commited on
Commit
445e161
·
1 Parent(s): ed4fe09
Files changed (1) hide show
  1. msr.py +14 -1
msr.py CHANGED
@@ -132,8 +132,21 @@ def download_file(url):
132
  with open(filepath, "wb") as f:
133
  f.write(response.content)
134
  return True
135
-
 
 
 
 
 
 
 
 
 
 
 
 
136
  except Exception as e:
 
137
  wait_time = DOWNLOAD_RETRY_DELAY * (2 ** attempt)
138
  print(f" ⚠ {filename}: {e}, retrying in {wait_time}s (attempt {attempt + 1}/{MAX_RETRIES})")
139
  time.sleep(wait_time)
 
132
  with open(filepath, "wb") as f:
133
  f.write(response.content)
134
  return True
135
+
136
+ except requests.exceptions.HTTPError as e:
137
+ # 404 means the file doesn't exist in GHArchive - skip without retry
138
+ if e.response.status_code == 404:
139
+ if attempt == 0: # Only log once, not for each retry
140
+ print(f" ○ {filename}: Not available (404) - skipping")
141
+ return False
142
+
143
+ # Other HTTP errors (5xx, etc.) should be retried
144
+ wait_time = DOWNLOAD_RETRY_DELAY * (2 ** attempt)
145
+ print(f" ⚠ {filename}: {e}, retrying in {wait_time}s (attempt {attempt + 1}/{MAX_RETRIES})")
146
+ time.sleep(wait_time)
147
+
148
  except Exception as e:
149
+ # Network errors, timeouts, etc. should be retried
150
  wait_time = DOWNLOAD_RETRY_DELAY * (2 ** attempt)
151
  print(f" ⚠ {filename}: {e}, retrying in {wait_time}s (attempt {attempt + 1}/{MAX_RETRIES})")
152
  time.sleep(wait_time)