Spaces:

Gilette
/

volatilitypredictor

Sleeping

App Files Files Community

Gil Stetler commited on Oct 25

Commit

8a29bed

1 Parent(s): 67dad62

add % mse and rmse

Browse files

Files changed (1) hide show

app.py +33 -17

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ dtype = torch.bfloat16 if device == "cuda" else torch.float32
 # Load once at startup (HF Spaces cache between runs)
 pipe = ChronosPipeline.from_pretrained(
     MODEL_ID,
-    device_map="auto",
     torch_dtype=dtype,
 )
@@ -33,9 +33,7 @@ def run_forecast_and_evaluate():
     if n <= PREDICTION_LENGTH + 5:
         raise gr.Error("Time series too short for a holdout evaluation.")
-    # 2) Train/forecast split:
-    #    Use all but the last PREDICTION_LENGTH points as context (train),
-    #    and compare forecast to the real last PREDICTION_LENGTH points (test).
     y_train = y[: n - PREDICTION_LENGTH]
     y_test = y[n - PREDICTION_LENGTH :]
@@ -44,21 +42,29 @@ def run_forecast_and_evaluate():
     samples = fcst[0].cpu().numpy()  # (S, H)
     # 3) Summaries & metrics
-    low, median, high = np.quantile(samples, [0.1, 0.5, 0.9], axis=0)
-    # "mean standard error" is ambiguous; commonly MSE + RMSE are reported:
-    mse = float(np.mean((median - y_test) ** 2))
     rmse = float(np.sqrt(mse))
-    # 4) Plot: full history + forecast horizon vs ground truth
     fig = plt.figure(figsize=(9, 4))
     x_hist = np.arange(len(y_train))
     x_fcst = np.arange(len(y_train), len(y_train) + PREDICTION_LENGTH)
     plt.plot(x_hist, y_train, label="history")
     plt.plot(x_fcst, y_test, label="actual (holdout)")
-    plt.plot(x_fcst, median, linestyle="--", label="forecast (median)")
-    plt.fill_between(x_fcst, low, high, alpha=0.3, label="80% interval")
     plt.title("Chronos-T5-Large • Holdout Evaluation")
     plt.xlabel("time")
     plt.ylabel("#Passengers")
@@ -69,22 +75,32 @@ def run_forecast_and_evaluate():
     out_json = {
         "prediction_length": int(PREDICTION_LENGTH),
         "num_samples": int(NUM_SAMPLES),
-        "metrics": {"MSE": mse, "RMSE": rmse},
-        "median": median.tolist(),
-        "p10": low.tolist(),
-        "p90": high.tolist(),
         "actual": y_test.tolist(),
     }
-    # Metrics text to display prominently
-    metrics_md = f"**MSE:** {mse:.3f}  **RMSE:** {rmse:.3f}"
     return fig, out_json, metrics_md
 with gr.Blocks(title="Chronos-T5-Large • Holdout Demo") as demo:
     gr.Markdown(
         "## Chronos-T5-Large (zero-shot forecasting) — Holdout Evaluation\n"
         "Click **Run** to forecast the last 12 months from AirPassengers and compare to the true values.\n"
-        "Computation runs on this Space's server hardware."
     )
     run_btn = gr.Button("Run", variant="primary")
     plot = gr.Plot(label="Forecast vs Actual (holdout)")

 # Load once at startup (HF Spaces cache between runs)
 pipe = ChronosPipeline.from_pretrained(
     MODEL_ID,
+    device_map="auto",      # uses GPU if available
     torch_dtype=dtype,
 )
     if n <= PREDICTION_LENGTH + 5:
         raise gr.Error("Time series too short for a holdout evaluation.")
+    # 2) Holdout split: forecast the last 12 points
     y_train = y[: n - PREDICTION_LENGTH]
     y_test = y[n - PREDICTION_LENGTH :]
     samples = fcst[0].cpu().numpy()  # (S, H)
     # 3) Summaries & metrics
+    p10, p50, p90 = np.quantile(samples, [0.1, 0.5, 0.9], axis=0)
+    # Point forecast = median
+    mse = float(np.mean((p50 - y_test) ** 2))
     rmse = float(np.sqrt(mse))
+    # Percent versions (relative to the mean of true holdout)
+    mean_y = float(np.mean(y_test))
+    rmse_pct = float(100.0 * rmse / mean_y)                # RMSE as % of mean
+    mse_pct  = float(100.0 * mse / (mean_y ** 2))          # MSE as % of mean^2
+    # (Optional) MAPE if you ever want it:
+    # mape_pct = float(100.0 * np.mean(np.abs((p50 - y_test) / y_test)))
+    # 4) Plot: history + forecast horizon vs ground truth
     fig = plt.figure(figsize=(9, 4))
     x_hist = np.arange(len(y_train))
     x_fcst = np.arange(len(y_train), len(y_train) + PREDICTION_LENGTH)
     plt.plot(x_hist, y_train, label="history")
     plt.plot(x_fcst, y_test, label="actual (holdout)")
+    plt.plot(x_fcst, p50, linestyle="--", label="forecast (median)")
+    plt.fill_between(x_fcst, p10, p90, alpha=0.3, label="80% interval")
     plt.title("Chronos-T5-Large • Holdout Evaluation")
     plt.xlabel("time")
     plt.ylabel("#Passengers")
     out_json = {
         "prediction_length": int(PREDICTION_LENGTH),
         "num_samples": int(NUM_SAMPLES),
+        "metrics": {
+            "MSE": mse,
+            "RMSE": rmse,
+            "RMSE_%_of_mean": rmse_pct,
+            "MSE_%_of_mean^2": mse_pct,
+            # "MAPE_%": mape_pct,  # uncomment if you add MAPE
+            "mean_of_truth": mean_y,
+        },
+        "median": p50.tolist(),
+        "p10": p10.tolist(),
+        "p90": p90.tolist(),
         "actual": y_test.tolist(),
     }
+    metrics_md = (
+        f"**MSE:** {mse:.3f}  **RMSE:** {rmse:.3f}  "
+        f"**RMSE% of mean:** {rmse_pct:.2f}%  "
+        f"**MSE% of mean²:** {mse_pct:.3f}%"
+    )
     return fig, out_json, metrics_md
 with gr.Blocks(title="Chronos-T5-Large • Holdout Demo") as demo:
     gr.Markdown(
         "## Chronos-T5-Large (zero-shot forecasting) — Holdout Evaluation\n"
         "Click **Run** to forecast the last 12 months from AirPassengers and compare to the true values.\n"
+        "Shows MSE, RMSE, and RMSE% / MSE% relative to the mean of the 12 true values."
     )
     run_btn = gr.Button("Run", variant="primary")
     plot = gr.Plot(label="Forecast vs Actual (holdout)")