Commit
Β·
f31522c
1
Parent(s):
2129413
Update scheduler/schedule_tasks.py
Browse filesFixed line 7 from `from train.retrain_if_needed import retrain_if_needed` to `from model.retrain import train_model`
- scheduler/schedule_tasks.py +51 -51
scheduler/schedule_tasks.py
CHANGED
|
@@ -1,51 +1,51 @@
|
|
| 1 |
-
import schedule
|
| 2 |
-
import time
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
from data.scrape_real_news import
|
| 5 |
-
from data.generate_fake_news import
|
| 6 |
-
from monitor.monitor_drift import monitor_drift
|
| 7 |
-
from
|
| 8 |
-
import json
|
| 9 |
-
from datetime import datetime
|
| 10 |
-
|
| 11 |
-
LOG_PATH = Path("logs/activity_log.json")
|
| 12 |
-
|
| 13 |
-
def log_event(event: str):
|
| 14 |
-
log_entry = {
|
| 15 |
-
"timestamp": datetime.now().strftime("%Y-%m-%d %I:%M %p"),
|
| 16 |
-
"event": event
|
| 17 |
-
}
|
| 18 |
-
if LOG_PATH.exists():
|
| 19 |
-
logs = json.loads(LOG_PATH.read_text())
|
| 20 |
-
else:
|
| 21 |
-
logs = []
|
| 22 |
-
|
| 23 |
-
logs.append(log_entry)
|
| 24 |
-
LOG_PATH.write_text(json.dumps(logs, indent=2))
|
| 25 |
-
|
| 26 |
-
def run_scraper_and_generator():
|
| 27 |
-
print("β³ Running scraping and generation tasks...")
|
| 28 |
-
scrape_real_articles()
|
| 29 |
-
generate_fake_articles()
|
| 30 |
-
log_event("New data scraped and uploaded, triggering retraining now")
|
| 31 |
-
|
| 32 |
-
print("π Retraining pipeline started...")
|
| 33 |
-
retrain_if_needed()
|
| 34 |
-
|
| 35 |
-
print("π Monitoring for data drift...")
|
| 36 |
-
drift_score = monitor_drift()
|
| 37 |
-
log_event(f"Drift Score: {drift_score:.5f}")
|
| 38 |
-
|
| 39 |
-
print("β
All tasks completed and logged.\n")
|
| 40 |
-
|
| 41 |
-
# Initial run
|
| 42 |
-
run_scraper_and_generator()
|
| 43 |
-
|
| 44 |
-
# Schedule hourly
|
| 45 |
-
schedule.every().hour.do(run_scraper_and_generator)
|
| 46 |
-
|
| 47 |
-
print("π
Scheduler started. Running tasks every hour.\n")
|
| 48 |
-
|
| 49 |
-
while True:
|
| 50 |
-
schedule.run_pending()
|
| 51 |
-
time.sleep(60)
|
|
|
|
| 1 |
+
import schedule
|
| 2 |
+
import time
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from data.scrape_real_news import scrape_articles
|
| 5 |
+
from data.generate_fake_news import generate_fake_news
|
| 6 |
+
from monitor.monitor_drift import monitor_drift
|
| 7 |
+
from model.retrain import train_model
|
| 8 |
+
import json
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
LOG_PATH = Path("logs/activity_log.json")
|
| 12 |
+
|
| 13 |
+
def log_event(event: str):
|
| 14 |
+
log_entry = {
|
| 15 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %I:%M %p"),
|
| 16 |
+
"event": event
|
| 17 |
+
}
|
| 18 |
+
if LOG_PATH.exists():
|
| 19 |
+
logs = json.loads(LOG_PATH.read_text())
|
| 20 |
+
else:
|
| 21 |
+
logs = []
|
| 22 |
+
|
| 23 |
+
logs.append(log_entry)
|
| 24 |
+
LOG_PATH.write_text(json.dumps(logs, indent=2))
|
| 25 |
+
|
| 26 |
+
def run_scraper_and_generator():
|
| 27 |
+
print("β³ Running scraping and generation tasks...")
|
| 28 |
+
scrape_real_articles()
|
| 29 |
+
generate_fake_articles()
|
| 30 |
+
log_event("New data scraped and uploaded, triggering retraining now")
|
| 31 |
+
|
| 32 |
+
print("π Retraining pipeline started...")
|
| 33 |
+
retrain_if_needed()
|
| 34 |
+
|
| 35 |
+
print("π Monitoring for data drift...")
|
| 36 |
+
drift_score = monitor_drift()
|
| 37 |
+
log_event(f"Drift Score: {drift_score:.5f}")
|
| 38 |
+
|
| 39 |
+
print("β
All tasks completed and logged.\n")
|
| 40 |
+
|
| 41 |
+
# Initial run
|
| 42 |
+
run_scraper_and_generator()
|
| 43 |
+
|
| 44 |
+
# Schedule hourly
|
| 45 |
+
schedule.every().hour.do(run_scraper_and_generator)
|
| 46 |
+
|
| 47 |
+
print("π
Scheduler started. Running tasks every hour.\n")
|
| 48 |
+
|
| 49 |
+
while True:
|
| 50 |
+
schedule.run_pending()
|
| 51 |
+
time.sleep(60)
|