Gil Stetler commited on
Commit
3c3c589
·
1 Parent(s): 09f6668

utils fix 1-d

Browse files
Files changed (1) hide show
  1. utils_vol.py +118 -19
utils_vol.py CHANGED
@@ -1,29 +1,128 @@
 
 
1
  import yfinance as yf
2
  import numpy as np
3
  import pandas as pd
4
 
5
- def fetch_close_series(ticker: str, start="2015-01-01", interval="1d") -> pd.Series:
6
- """Downloadet Daten von yfinance und gibt die Schlusskurse zurück."""
7
- df = yf.download(ticker, start=start, interval=interval, progress=False, threads=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  if df is None or df.empty:
9
- raise ValueError(f"Keine Daten für {ticker}.")
10
- col = None
11
- for c in ["Adj Close", "Close", "close", "adj close"]:
12
- if c in df.columns:
13
- col = c; break
14
- if col is None:
15
- col = df.select_dtypes("number").columns[-1]
16
- return df[col].dropna()
17
-
18
- def realized_vol(close: pd.Series, window=20, annualize=True) -> pd.Series:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  r = np.log(close).diff().dropna()
20
  rv = r.rolling(window, min_periods=window).std()
21
  if annualize:
22
- rv *= np.sqrt(252)
23
- return rv.dropna()
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- def rv_to_autogluon_df(rv: pd.Series) -> pd.DataFrame:
26
- """Formatiert Realized Vol als DataFrame für AutoGluon TimeSeries."""
27
- df = pd.DataFrame({"timestamp": rv.index, "target": rv.values})
28
- df["item_id"] = "series_1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  return df
 
1
+ # utils_vol.py — robuste Version
2
+
3
  import yfinance as yf
4
  import numpy as np
5
  import pandas as pd
6
 
7
+ def _to_1d_series(obj: pd.Series | pd.DataFrame) -> pd.Series:
8
+ """
9
+ Erzwingt eine 1D-Serie:
10
+ - DataFrame (n,1) -> squeeze
11
+ - MultiIndex -> erste passende Spalte
12
+ - alles in float konvertieren, NaNs droppen
13
+ """
14
+ if isinstance(obj, pd.DataFrame):
15
+ # (n,1) -> Serie
16
+ if obj.shape[1] == 1:
17
+ ser = obj.squeeze(axis=1)
18
+ else:
19
+ # Fallback: nimm die erste numerische Spalte
20
+ num_cols = obj.select_dtypes(include=[np.number]).columns
21
+ if len(num_cols) > 0:
22
+ ser = obj[num_cols[0]]
23
+ else:
24
+ # nimm einfach die erste Spalte
25
+ ser = obj.iloc[:, 0]
26
+ else:
27
+ ser = obj
28
+
29
+ ser = pd.to_numeric(ser, errors="coerce")
30
+ ser = ser.dropna()
31
+ # Index in DatetimeIndex verwandeln, wenn möglich
32
+ if not isinstance(ser.index, pd.DatetimeIndex):
33
+ try:
34
+ ser.index = pd.to_datetime(ser.index, errors="coerce")
35
+ ser = ser[ser.index.notna()]
36
+ except Exception:
37
+ # notfalls RangeIndex lassen
38
+ pass
39
+ return ser.astype(float)
40
+
41
+
42
+ def fetch_close_series(ticker: str, start: str = "2015-01-01", interval: str = "1d") -> pd.Series:
43
+ """
44
+ Lädt OHLCV via yfinance und gibt eine 1D-Schlusskurs-Serie zurück.
45
+ Nutzt auto_adjust=True (aktuelles yfinance-Default) bewusst,
46
+ damit der FutureWarning verschwindet und Adjusted/Close konsistent ist.
47
+ """
48
+ df = yf.download(
49
+ ticker.strip(),
50
+ start=start,
51
+ interval=interval,
52
+ auto_adjust=True, # explizit setzen, um Warnung zu vermeiden
53
+ progress=False,
54
+ threads=True,
55
+ )
56
  if df is None or df.empty:
57
+ raise ValueError(f"Keine Daten für {ticker} (start={start}, interval={interval}).")
58
+
59
+ # MultiIndex-Handling (bei mehreren Tickern oder Börsen-Suffixen)
60
+ if isinstance(df.columns, pd.MultiIndex):
61
+ # versuche 'Close' auf Level 0
62
+ if "Close" in df.columns.get_level_values(0):
63
+ sub = df.xs("Close", axis=1, level=0)
64
+ # falls mehrere Spalten (mehrere Ticker): nimm die erste
65
+ if sub.shape[1] > 1:
66
+ sub = sub.iloc[:, 0]
67
+ return _to_1d_series(sub)
68
+ # Fallback: erste numerische Spalte
69
+ num_cols = df.select_dtypes(include=[np.number]).columns
70
+ if len(num_cols) > 0:
71
+ sub = df[num_cols[0]]
72
+ return _to_1d_series(sub)
73
+ # letzter Ausweg: erste Spalte
74
+ return _to_1d_series(df.iloc[:, 0])
75
+
76
+ # Flache Spalten
77
+ for name in ["Close", "Adj Close", "close", "adj close", "Price", "price"]:
78
+ if name in df.columns:
79
+ return _to_1d_series(df[name])
80
+
81
+ # Fallback: erste numerische Spalte
82
+ num_cols = df.select_dtypes(include=[np.number]).columns
83
+ if len(num_cols) == 0:
84
+ raise ValueError("Keine numerische Close-Spalte gefunden.")
85
+ return _to_1d_series(df[num_cols[0]])
86
+
87
+
88
+ def realized_vol(close: pd.Series, window: int = 20, annualize: bool = True) -> pd.Series:
89
+ """
90
+ 20-Tage-Rolling-Std der Logrenditen; gibt IMMER eine 1D-Serie zurück.
91
+ """
92
+ close = _to_1d_series(close)
93
  r = np.log(close).diff().dropna()
94
  rv = r.rolling(window, min_periods=window).std()
95
  if annualize:
96
+ rv = rv * np.sqrt(252.0)
97
+ rv = rv.dropna()
98
+ # Sicherheitshalber 1D
99
+ return _to_1d_series(rv)
100
+
101
+
102
+ def rv_to_autogluon_df(rv: pd.Series | pd.DataFrame) -> pd.DataFrame:
103
+ """
104
+ Formatiert Realized Vol als DataFrame für AutoGluon TimeSeries:
105
+ columns: ['item_id', 'timestamp', 'target']
106
+ """
107
+ # Erzwinge Serie 1D
108
+ rv = _to_1d_series(rv)
109
 
110
+ # Werte & Index robust extrahieren
111
+ values = np.asarray(rv.values).reshape(-1) # 1D
112
+ idx = rv.index
113
+ if not isinstance(idx, pd.DatetimeIndex):
114
+ try:
115
+ idx = pd.to_datetime(idx, errors="coerce")
116
+ except Exception:
117
+ # Fallback: generiere einfache Range-Dates
118
+ idx = pd.date_range(start="2000-01-01", periods=len(values), freq="D")
119
+ # gültige Punkte
120
+ mask = ~np.isnan(values)
121
+ df = pd.DataFrame({
122
+ "item_id": "series_1",
123
+ "timestamp": idx[mask],
124
+ "target": values[mask],
125
+ })
126
+ # sortiert & ohne NaN-Timestamps
127
+ df = df[df["timestamp"].notna()].sort_values("timestamp")
128
  return df