muooon commited on
Commit
add1541
·
verified ·
1 Parent(s): 0c2074c

Upload 33 files

Browse files
Files changed (34) hide show
  1. .gitattributes +16 -0
  2. 1Gv2_AMP-compatible/emoclan.py +274 -0
  3. 1Gv2_AMP-compatible/emofact.py +129 -0
  4. 1Gv2_AMP-compatible/emolynx.py +139 -0
  5. 1Gv2_AMP-compatible/emonavi.py +113 -0
  6. 1Gv2_AMP-compatible/emoneco.py +161 -0
  7. 1Gv2_AMP-compatible/emozeal.py +161 -0
  8. 1Gv3_AMP-compatible/docs/rastrigin_EmoClan.png +3 -0
  9. 1Gv3_AMP-compatible/docs/rastrigin_EmoFact.png +3 -0
  10. 1Gv3_AMP-compatible/docs/rastrigin_EmoLynx.png +3 -0
  11. 1Gv3_AMP-compatible/docs/rastrigin_EmoNavi.png +3 -0
  12. 1Gv3_AMP-compatible/docs/rastrigin_EmoNeco.png +3 -0
  13. 1Gv3_AMP-compatible/docs/rastrigin_EmoZeal.png +3 -0
  14. 1Gv3_AMP-compatible/docs/rosenbrock_EmoClan.png +3 -0
  15. 1Gv3_AMP-compatible/docs/rosenbrock_EmoFact.png +3 -0
  16. 1Gv3_AMP-compatible/docs/rosenbrock_EmoLynx.png +3 -0
  17. 1Gv3_AMP-compatible/docs/rosenbrock_EmoNavi.png +3 -0
  18. 1Gv3_AMP-compatible/docs/rosenbrock_EmoNeco.png +3 -0
  19. 1Gv3_AMP-compatible/docs/rosenbrock_EmoZeal.png +3 -0
  20. 1Gv3_AMP-compatible/emoclan.py +277 -0
  21. 1Gv3_AMP-compatible/emofact.py +133 -0
  22. 1Gv3_AMP-compatible/emolynx.py +140 -0
  23. 1Gv3_AMP-compatible/emonavi.py +118 -0
  24. 1Gv3_AMP-compatible/emoneco.py +162 -0
  25. 1Gv3_AMP-compatible/emozeal.py +161 -0
  26. 1Gv3_AMP-compatible/logs/fluctuation_and_accuracy_panel.png +3 -0
  27. 1Gv3_AMP-compatible/logs/loss_comparison_panel.png +3 -0
  28. 1Gv3_AMP-compatible/logs/trec_gpt2_weight_pca_3panel.png +3 -0
  29. 1Gv3_AMP-compatible/logs/trec_squad_step_accuracy.json +2431 -0
  30. 1Gv3_AMP-compatible/logs/trec_weights_log.json +3 -0
  31. 1Gv3_AMP-compatible/profile.txt +45 -0
  32. 2Gv2_AMP-compatible/emoairy.py +162 -0
  33. 2Gv2_AMP-compatible/emocats.py +160 -0
  34. 2Gv2_AMP-compatible/emosens.py +132 -0
.gitattributes CHANGED
@@ -115,3 +115,19 @@ report/TensorBoard01.png filter=lfs diff=lfs merge=lfs -text
115
  report/TensorBoard03.png filter=lfs diff=lfs merge=lfs -text
116
  report/xyz_grid-0001-1234.png filter=lfs diff=lfs merge=lfs -text
117
  report/xyz_grid-0002-4321.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  report/TensorBoard03.png filter=lfs diff=lfs merge=lfs -text
116
  report/xyz_grid-0001-1234.png filter=lfs diff=lfs merge=lfs -text
117
  report/xyz_grid-0002-4321.png filter=lfs diff=lfs merge=lfs -text
118
+ 1Gv3_AMP-compatible/docs/rastrigin_EmoClan.png filter=lfs diff=lfs merge=lfs -text
119
+ 1Gv3_AMP-compatible/docs/rastrigin_EmoFact.png filter=lfs diff=lfs merge=lfs -text
120
+ 1Gv3_AMP-compatible/docs/rastrigin_EmoLynx.png filter=lfs diff=lfs merge=lfs -text
121
+ 1Gv3_AMP-compatible/docs/rastrigin_EmoNavi.png filter=lfs diff=lfs merge=lfs -text
122
+ 1Gv3_AMP-compatible/docs/rastrigin_EmoNeco.png filter=lfs diff=lfs merge=lfs -text
123
+ 1Gv3_AMP-compatible/docs/rastrigin_EmoZeal.png filter=lfs diff=lfs merge=lfs -text
124
+ 1Gv3_AMP-compatible/docs/rosenbrock_EmoClan.png filter=lfs diff=lfs merge=lfs -text
125
+ 1Gv3_AMP-compatible/docs/rosenbrock_EmoFact.png filter=lfs diff=lfs merge=lfs -text
126
+ 1Gv3_AMP-compatible/docs/rosenbrock_EmoLynx.png filter=lfs diff=lfs merge=lfs -text
127
+ 1Gv3_AMP-compatible/docs/rosenbrock_EmoNavi.png filter=lfs diff=lfs merge=lfs -text
128
+ 1Gv3_AMP-compatible/docs/rosenbrock_EmoNeco.png filter=lfs diff=lfs merge=lfs -text
129
+ 1Gv3_AMP-compatible/docs/rosenbrock_EmoZeal.png filter=lfs diff=lfs merge=lfs -text
130
+ 1Gv3_AMP-compatible/logs/fluctuation_and_accuracy_panel.png filter=lfs diff=lfs merge=lfs -text
131
+ 1Gv3_AMP-compatible/logs/loss_comparison_panel.png filter=lfs diff=lfs merge=lfs -text
132
+ 1Gv3_AMP-compatible/logs/trec_gpt2_weight_pca_3panel.png filter=lfs diff=lfs merge=lfs -text
133
+ 1Gv3_AMP-compatible/logs/trec_weights_log.json filter=lfs diff=lfs merge=lfs -text
1Gv2_AMP-compatible/emoclan.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+ from typing import Callable, Union, Dict, Any, Tuple
5
+
6
+ """
7
+ EmoClan v2.0 (250815) shadow-system v2.0 scalar-switch v2.0
8
+ AMP対応完了(202507) p.data -> p 修正済み
9
+ memo : "optimizer = EmoClan(model.parameters(), lr=1e-3, use_shadow=True)"
10
+ optimizer 指定の際に True にすることで shadow をオンにできる
11
+ emosens shadow-effect v1.0 反映 shadow-system、scalar-switch 修正
12
+ """
13
+
14
+ # Helper function
15
+ def exists(val):
16
+ return val is not None
17
+
18
+ class EmoClan(Optimizer):
19
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
20
+ def __init__(self, params: Union[list, torch.nn.Module],
21
+ lr: float = 1e-3,
22
+ betas: Tuple[float, float] = (0.9, 0.999),
23
+ eps: float = 1e-8,
24
+ weight_decay: float = 0.01,
25
+ lynx_betas: Tuple[float, float] = (0.9, 0.99), # Lynx 固有の beta
26
+ decoupled_weight_decay: bool = False,
27
+ use_shadow: bool = False
28
+ ):
29
+
30
+ if not 0.0 <= lr:
31
+ raise ValueError(f"Invalid learning rate: {lr}")
32
+ if not 0.0 <= eps:
33
+ raise ValueError(f"Invalid epsilon value: {eps}")
34
+ if not 0.0 <= betas[0] < 1.0:
35
+ raise ValueError(f"Invalid beta parameter at index 0: {betas[0]}")
36
+ if not 0.0 <= betas[1] < 1.0:
37
+ raise ValueError(f"Invalid beta parameter at index 1: {betas[1]}")
38
+
39
+ # Lynx の betas もバリデーション
40
+ if not 0.0 <= lynx_betas[0] < 1.0:
41
+ raise ValueError(f"Invalid lynx_beta parameter at index 0: {lynx_betas[0]}")
42
+ if not 0.0 <= lynx_betas[1] < 1.0:
43
+ raise ValueError(f"Invalid lynx_beta parameter at index 1: {lynx_betas[1]}")
44
+
45
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay,
46
+ lynx_betas=lynx_betas, decoupled_weight_decay=decoupled_weight_decay)
47
+ super().__init__(params, defaults)
48
+
49
+ self._init_lr = lr # decoupled weight decay のために保存 (Lynx用)
50
+ self.should_stop = False # 全体の停止フラグ
51
+ self.use_shadow = use_shadow # EmoClanインスタンス自身がuse_shadowを保持
52
+
53
+ # --- 感情機構 (Emotion Mechanism) ---
54
+ def _update_ema(self, param_state: Dict[str, Any], loss_val: float) -> Dict[str, float]:
55
+ """損失値に基づいて短期・長期 EMA を更新"""
56
+ # param_state は各パラメータの state['ema'] を保持する
57
+ ema = param_state.setdefault('ema', {'short': loss_val, 'long': loss_val})
58
+ ema['short'] = 0.3 * loss_val + 0.7 * ema['short']
59
+ ema['long'] = 0.01 * loss_val + 0.99 * ema['long']
60
+ return ema
61
+
62
+ """EMA の差分から感情スカラー値を生成"""
63
+ def _compute_scalar(self, ema: Dict[str, float]) -> float:
64
+ diff = ema['short'] - ema['long']
65
+ return math.tanh(5 * diff)
66
+
67
+ """感情スカラーに基づいて Shadow の混合比率を決定"""
68
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
69
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
70
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
71
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
72
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
73
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
74
+ def _decide_ratio(self, scalar: float) -> float:
75
+ if not self.use_shadow:
76
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
77
+ if abs(scalar) > 0.6:
78
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
79
+ elif abs(scalar) > 0.1:
80
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
81
+ return 0.0
82
+
83
+ # --- 各最適化器のコアな勾配更新ロジック (プライベートメソッドとして統合) ---
84
+
85
+ def _lynx_update(
86
+ self,
87
+ p: torch.Tensor,
88
+ grad: torch.Tensor,
89
+ param_state: Dict[str, Any],
90
+ lr: float,
91
+ beta1: float,
92
+ beta2: float,
93
+ wd_actual: float
94
+ ):
95
+ """EmoLynx のコアな勾配更新ロジック"""
96
+ # Stepweight decay: p = p * (1 - lr * wd)
97
+ p.mul_(1. - lr * wd_actual)
98
+
99
+ # Lynx 固有の EMA 状態は param_state に保持
100
+ if 'exp_avg_lynx' not in param_state:
101
+ param_state['exp_avg_lynx'] = torch.zeros_like(p)
102
+ exp_avg = param_state['exp_avg_lynx']
103
+
104
+ # 勾配ブレンド
105
+ blended_grad = grad.mul(1. - beta1).add_(exp_avg, alpha=beta1)
106
+
107
+ # 符号ベースの更新
108
+ p.add_(blended_grad.sign_(), alpha = -lr)
109
+
110
+ # exp_avg 更新
111
+ exp_avg.mul_(beta2).add_(grad, alpha = 1. - beta2)
112
+
113
+ def _navi_update(
114
+ self,
115
+ p: torch.Tensor,
116
+ grad: torch.Tensor,
117
+ param_state: Dict[str, Any],
118
+ lr: float,
119
+ betas: Tuple[float, float],
120
+ eps: float,
121
+ weight_decay: float
122
+ ):
123
+ """EmoNavi のコアな勾配更新ロジック"""
124
+ beta1, beta2 = betas
125
+
126
+ exp_avg = param_state.setdefault('exp_avg_navi', torch.zeros_like(p))
127
+ exp_avg_sq = param_state.setdefault('exp_avg_sq_navi', torch.zeros_like(p.to(torch.float32)))
128
+
129
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
130
+ exp_avg_sq.mul_(beta2).addcmul_(grad.to(torch.float32), grad.to(torch.float32), value=1 - beta2)
131
+ denom = exp_avg_sq.sqrt().add_(eps)
132
+
133
+ # Weight decay (標準的手法)
134
+ if weight_decay:
135
+ p.mul_(1 - lr * weight_decay)
136
+
137
+ p.addcdiv_(exp_avg, denom, value=-lr)
138
+
139
+ def _fact_update(
140
+ self,
141
+ p: torch.Tensor,
142
+ grad: torch.Tensor,
143
+ param_state: Dict[str, Any],
144
+ lr: float,
145
+ betas: Tuple[float, float], # beta2 は現状使われないが互換性のため残す (1D勾配で使用)
146
+ eps: float,
147
+ weight_decay: float
148
+ ):
149
+ """EmoFact のコアな勾配更新ロジック (Adafactor ライク)"""
150
+ beta1, beta2 = betas
151
+
152
+ if grad.dim() >= 2:
153
+ # 行と列の2乗平均を計算 (分散の軽量な近似)
154
+ # gradをfloat32にキャストして計算することで数値安定性を高める
155
+ r_sq = torch.mean(grad.to(torch.float32) * grad.to(torch.float32), dim=tuple(range(1, grad.dim())), keepdim=True).add_(eps)
156
+ c_sq = torch.mean(grad.to(torch.float32) * grad.to(torch.float32), dim=0, keepdim=True).add_(eps)
157
+
158
+ param_state.setdefault('exp_avg_r_fact', torch.zeros_like(r_sq)).mul_(beta1).add_(torch.sqrt(r_sq), alpha=1 - beta1)
159
+ param_state.setdefault('exp_avg_c_fact', torch.zeros_like(c_sq)).mul_(beta1).add_(torch.sqrt(c_sq), alpha=1 - beta1)
160
+
161
+ # 再構築した近似勾配の平方根の積で正規化
162
+ denom = torch.sqrt(param_state['exp_avg_r_fact'] * param_state['exp_avg_c_fact']).add_(eps)
163
+ update_term = grad / denom # grad は元の型(float16またはfloat32)
164
+
165
+ else: # 1次元(ベクトル)の勾配補正
166
+ exp_avg = param_state.setdefault('exp_avg_fact', torch.zeros_like(p))
167
+ exp_avg_sq = param_state.setdefault('exp_avg_sq_fact', torch.zeros_like(p.to(torch.float32)))
168
+
169
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
170
+ exp_avg_sq.mul_(beta2).addcmul_(grad.to(torch.float32), grad.to(torch.float32), value=1 - beta2)
171
+ denom = exp_avg_sq.sqrt().add_(eps)
172
+ update_term = exp_avg / denom
173
+
174
+ # 最終的なパラメータ更新 (decoupled weight decayも適用)
175
+ # decoupled_weight_decay は __init__ でグループにdefaultsとして渡されているが、
176
+ # ここではfactorロジック自体がweight_decayを受け取る形式
177
+ p.mul_(1 - weight_decay * lr)
178
+ p.add_(update_term, alpha=-lr)
179
+
180
+
181
+ @torch.no_grad()
182
+ def step(self, closure: Callable | None = None):
183
+ loss = None
184
+ if exists(closure):
185
+ with torch.enable_grad():
186
+ loss = closure()
187
+ loss_val = loss.item() if loss is not None else 0.0
188
+
189
+ # 全体の scalar_hist を EmoClan インスタンスで管理
190
+ global_scalar_hist = self.state.setdefault('global_scalar_hist', [])
191
+
192
+ # 全体としての感情EMA状態を self.state に保持し、現在の感情スカラーを計算
193
+ global_ema_state = self.state.setdefault('global_ema', {'short': loss_val, 'long': loss_val})
194
+ global_ema_state['short'] = 0.3 * loss_val + 0.7 * global_ema_state['short']
195
+ global_ema_state['long'] = 0.01 * loss_val + 0.99 * global_ema_state['long']
196
+ current_global_scalar = self._compute_scalar(global_ema_state)
197
+
198
+ # global_scalar_hist に現在の感情スカラーを追加
199
+ global_scalar_hist.append(current_global_scalar)
200
+ if len(global_scalar_hist) >= 33:
201
+ global_scalar_hist.pop(0)
202
+
203
+
204
+ for group in self.param_groups:
205
+ lr = group['lr']
206
+ wd = group['weight_decay']
207
+ eps = group['eps']
208
+ decoupled_wd = group['decoupled_weight_decay']
209
+
210
+ lynx_beta1, lynx_beta2 = group['lynx_betas']
211
+ navi_fact_betas = group['betas'] # Navi/Fact 共通の beta を使用 (デフォルトの betas)
212
+
213
+ # Lynx の decoupled_wd のための _wd_actual 計算
214
+ _wd_actual_lynx = wd
215
+ if decoupled_wd:
216
+ _wd_actual_lynx /= self._init_lr
217
+
218
+ for p in group['params']:
219
+ if p.grad is None:
220
+ continue
221
+
222
+ grad = p.grad
223
+ param_state = self.state[p] # 各パラメータごとの状態
224
+
225
+ # --- 各パラメータごとの感情機構の更新と Shadow 処理 ---
226
+ # 各パラメータの state['ema'] は、それぞれの loss_val (全体で共通) を元に更新される
227
+ # ただし、現状の loss_val はクロージャから受け取った単一の値なので、
228
+ # 各パラメータ固有の「感情」を定義するより、全体としての感情が使われることになる。
229
+ # use_shadow が True の場合にのみ Shadow 関連の処理を実行
230
+ if self.use_shadow:
231
+ param_ema = self._update_ema(param_state, loss_val)
232
+ param_scalar = self._compute_scalar(param_ema) # 各パラメータ固有のスカラー
233
+
234
+ ratio = self._decide_ratio(param_scalar) # 各パラメータ固有の ratio
235
+
236
+ if ratio > 0:
237
+ if 'shadow' not in param_state:
238
+ param_state['shadow'] = p.clone()
239
+ else:
240
+ # Shadow を現在値にブレンド
241
+ p.mul_(1 - ratio).add_(param_state['shadow'], alpha=ratio)
242
+ # Shadow を現在値に追従させる
243
+ param_state['shadow'].lerp_(p, 0.05)
244
+
245
+ # --- 最適化器の選択と勾配更新 ---
246
+ # 現在のglobal_scalar_histに記録された全体としての感情スカラーに基づいてフェーズを判断
247
+ # global_scalar > abs 0.6 の範囲は Lynx
248
+ # global_scalar > abs 0.3 の範囲は Fact
249
+ # global_scalar < abs 0.3 の範囲は Navi
250
+ if abs(current_global_scalar) > 0.6: # 序盤・過学習・発散時
251
+ self._lynx_update(p, grad, param_state, lr, lynx_beta1, lynx_beta2, _wd_actual_lynx)
252
+ elif abs(current_global_scalar) > 0.3: # 終盤・過学習・発散傾向時
253
+ self._fact_update(p, grad, param_state, lr, navi_fact_betas, eps, wd)
254
+ else: # -0.3 <= current_global_scalar <= 0.3 の中盤・平時(安定期)
255
+ self._navi_update(p, grad, param_state, lr, navi_fact_betas, eps, wd)
256
+
257
+ # Early Stop判断
258
+ # global_scalar_hist の評価
259
+ if len(global_scalar_hist) >= 32:
260
+ buf = global_scalar_hist
261
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
262
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
263
+ if avg_abs < 0.05 and std < 0.005:
264
+ self.should_stop = True # 外部からこれを見て判断可
265
+
266
+ return loss
267
+
268
+ """
269
+ Emoシリーズは、Adam、Adafactor、Lion、Tiger、等から多くを学びました。
270
+ この開発において先人たちの知見に深く感謝しつつ今後も新しい可能性を探究します。
271
+ The Emo series has learned much from Adam, Adafactor, Lion, and Tiger.
272
+ Rather than being their successors,
273
+ In its development, we deeply appreciate the insights of those who came before us—and continue to explore new possibilities beyond them.
274
+ """
1Gv2_AMP-compatible/emofact.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+
5
+ """
6
+ EmoFact v2.0 (250815) shadow-system v2.0
7
+ AMP対応完了(202507) p.data -> p 修正済み
8
+ emosens shadow-effect v1.0 反映 shadow-system 修正
9
+ """
10
+
11
+ class EmoFact(Optimizer):
12
+ # クラス定義&初期化
13
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
14
+ eps=1e-8, weight_decay=0.01):
15
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
16
+ super().__init__(params, defaults)
17
+ self._init_lr = lr
18
+ self.should_stop = False # 停止フラグの初期化
19
+
20
+ # 感情EMA更新(緊張と安静)
21
+ def _update_ema(self, state, loss_val):
22
+ ema = state.setdefault('ema', {})
23
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
24
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
25
+ return ema
26
+
27
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
28
+ def _compute_scalar(self, ema):
29
+ diff = ema['short'] - ema['long']
30
+ return math.tanh(5 * diff)
31
+
32
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
33
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
34
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
35
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
36
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
37
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
38
+ def _decide_ratio(self, scalar):
39
+ if abs(scalar) > 0.6:
40
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
41
+ elif abs(scalar) > 0.1:
42
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
43
+ return 0.0
44
+
45
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
46
+ @torch.no_grad()
47
+ def step(self, closure=None):
48
+ loss = closure() if closure is not None else None
49
+ loss_val = loss.item() if loss is not None else 0.0
50
+
51
+ for group in self.param_groups:
52
+ for p in group['params']:
53
+ if p.grad is None:
54
+ continue
55
+
56
+ grad = p.grad
57
+ state = self.state[p]
58
+
59
+ # 感情EMA更新・スカラー生成 (既存ロジックを維持)
60
+ ema = self._update_ema(state, loss_val)
61
+ scalar = self._compute_scalar(ema)
62
+ ratio = self._decide_ratio(scalar)
63
+
64
+ # shadow_param:必要時のみ更新 (既存ロジックを維持)
65
+ if ratio > 0:
66
+ if 'shadow' not in state:
67
+ state['shadow'] = p.clone()
68
+ else:
69
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
70
+ state['shadow'].lerp_(p, 0.05)
71
+
72
+ # --- 勾配補正ロジック ---
73
+ # 行列の形状が2次元以上の場合、分散情報ベースのAB近似を使用
74
+ if grad.dim() >= 2:
75
+ # 行と列の2乗平均を計算 (分散の軽量な近似)
76
+ r_sq = torch.mean(grad * grad, dim=tuple(range(1, grad.dim())), keepdim=True).add_(group['eps'])
77
+ c_sq = torch.mean(grad * grad, dim=0, keepdim=True).add_(group['eps'])
78
+
79
+ # 分散情報から勾配の近似行列を生成
80
+ # AB行列として見立てたものを直接生成し更新項を計算する
81
+ # A = sqrt(r_sq), B = sqrt(c_sq) とすることでAB行列の近似を再現
82
+ # これをEMAで平滑化する
83
+ beta1, beta2 = group['betas']
84
+
85
+ state.setdefault('exp_avg_r', torch.zeros_like(r_sq)).mul_(beta1).add_(torch.sqrt(r_sq), alpha=1 - beta1)
86
+ state.setdefault('exp_avg_c', torch.zeros_like(c_sq)).mul_(beta1).add_(torch.sqrt(c_sq), alpha=1 - beta1)
87
+
88
+ # 再構築した近似勾配の平方根の積で正規化
89
+ # これにより2次モーメントのような役割を果たす
90
+ denom = torch.sqrt(state['exp_avg_r'] * state['exp_avg_c']).add_(group['eps'])
91
+
92
+ # 最終的な更新項を計算
93
+ update_term = grad / denom
94
+
95
+ # 1次元(ベクトル)の勾配補正(decoupled weight decay 構造に近い)
96
+ else:
97
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
98
+ exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
99
+ beta1, beta2 = group['betas']
100
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
101
+ exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
102
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
103
+ update_term = exp_avg / denom
104
+
105
+ # 最終的なパラメータ更新 (decoupled weight decayも適用)
106
+ p.add_(p, alpha=-group['weight_decay'] * group['lr'])
107
+ p.add_(update_term, alpha=-group['lr'])
108
+
109
+ # --- Early Stop ロジック (既存ロジックを維持) ---
110
+ hist = self.state.setdefault('scalar_hist', [])
111
+ hist.append(scalar)
112
+ if len(hist) >= 33:
113
+ hist.pop(0)
114
+
115
+ # Early Stop判断
116
+ if len(self.state['scalar_hist']) >= 32:
117
+ buf = self.state['scalar_hist']
118
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
119
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
120
+ if avg_abs < 0.05 and std < 0.005:
121
+ self.should_stop = True
122
+
123
+ return loss
124
+
125
+ """
126
+ https://github.com/muooon/EmoNavi
127
+ Fact is inspired by Adafactor,
128
+ and its VRAM-friendly design is something everyone loves.
129
+ """
1Gv2_AMP-compatible/emolynx.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+ from typing import Tuple, Callable, Union
5
+
6
+ """
7
+ EmoLynx v2.0 (250815) shadow-system v2.0
8
+ AMP対応完了(202507) p.data -> p 修正済み
9
+ emosens shadow-effect v1.0 反映 shadow-system 修正
10
+ """
11
+
12
+ # Helper function (Lynx)
13
+ def exists(val):
14
+ return val is not None
15
+
16
+ class EmoLynx(Optimizer):
17
+ # クラス定義&初期化
18
+ def __init__(self, params: Union[list, torch.nn.Module], lr=1e-3, betas=(0.9, 0.99),
19
+ # lynx用ベータ・互換性の追加(lynx用beta1・beta2)
20
+ eps=1e-8, weight_decay=0.01, decoupled_weight_decay: bool = False):
21
+
22
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
23
+ super().__init__(params, defaults)
24
+
25
+ # lynxに応じてウェイト減衰のため保存
26
+ self._init_lr = lr
27
+ self.should_stop = False # 停止フラグの初期化
28
+ self.decoupled_wd = decoupled_weight_decay
29
+
30
+ # 感情EMA更新(緊張と安静)
31
+ def _update_ema(self, state, loss_val):
32
+ ema = state.setdefault('ema', {})
33
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
34
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
35
+ return ema
36
+
37
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
38
+ def _compute_scalar(self, ema):
39
+ diff = ema['short'] - ema['long']
40
+ return math.tanh(5 * diff)
41
+
42
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
43
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
44
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
45
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
46
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
47
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
48
+ def _decide_ratio(self, scalar):
49
+ if abs(scalar) > 0.6:
50
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
51
+ elif abs(scalar) > 0.1:
52
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
53
+ return 0.0
54
+
55
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
56
+ @torch.no_grad()
57
+ def step(self, closure: Callable | None = None): # クロージャの型ヒントを追加
58
+ loss = None
59
+ if exists(closure): # 一貫性のためにexistsヘルパーを使う
60
+ with torch.enable_grad():
61
+ loss = closure()
62
+ loss_val = loss.item() if loss is not None else 0.0
63
+
64
+ for group in self.param_groups:
65
+ # リンクス共通パラメータ抽出
66
+ lr, wd, beta1, beta2 = group['lr'], group['weight_decay'], *group['betas']
67
+
68
+ # ウェイト減衰の処理を分離 (from lynx)
69
+ _wd_actual = wd
70
+ if self.decoupled_wd:
71
+ _wd_actual /= self._init_lr # 非連結時ウェイト減衰調整
72
+
73
+ for p in filter(lambda p: exists(p.grad), group['params']): # PGチェックにフィルタ
74
+
75
+ grad = p.grad # PG直接使用(計算に".data"不要)
76
+ state = self.state[p]
77
+
78
+ # EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
79
+ ema = self._update_ema(state, loss_val)
80
+ scalar = self._compute_scalar(ema)
81
+ ratio = self._decide_ratio(scalar)
82
+
83
+ # shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
84
+ if ratio > 0:
85
+ if 'shadow' not in state:
86
+ state['shadow'] = p.clone()
87
+ else:
88
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
89
+ state['shadow'].lerp_(p, 0.05)
90
+ # lynx更新前 p で shadow 更新(現在値を5%ずつ追従)
91
+ # p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
92
+ # EmoNavi: p = p * (1-ratio) + shadow * ratio
93
+
94
+ # --- Start Lynx Gradient Update Logic ---
95
+
96
+ # lynx初期化(exp_avg_sq)
97
+ if 'exp_avg' not in state:
98
+ state['exp_avg'] = torch.zeros_like(p)
99
+ exp_avg = state['exp_avg']
100
+
101
+ # Stepweight decay (from lynx): p = p * (1 - lr * wd)
102
+ # decoupled_wd 考慮 _wd_actual 使用(EmoNaviのwdは最後に適用)
103
+ p.mul_(1. - lr * _wd_actual)
104
+
105
+ # 勾配ブレンド
106
+ # m_t = beta1 * exp_avg_prev + (1 - beta1) * grad
107
+ blended_grad = grad.mul(1. - beta1).add_(exp_avg, alpha=beta1)
108
+
109
+ # p: p = p - lr * sign(blended_grad)
110
+ p.add_(blended_grad.sign_(), alpha = -lr)
111
+
112
+ # exp_avg = beta2 * exp_avg + (1 - beta2) * grad
113
+ exp_avg.mul_(beta2).add_(grad, alpha = 1. - beta2)
114
+
115
+ # --- End Lynx Gradient Update Logic ---
116
+
117
+ # Early Stop用 scalar記録(バッファ共通で管理/最大32件保持/動静評価)
118
+ # この部分は p.state ではなく self.state にアクセスする
119
+ hist = self.state.setdefault('scalar_hist', [])
120
+ hist.append(scalar)
121
+ if len(hist) >= 33:
122
+ hist.pop(0)
123
+
124
+ # Early Stop判断(静けさの合図) - This part is outside the inner loop
125
+ if len(self.state['scalar_hist']) >= 32:
126
+ buf = self.state['scalar_hist']
127
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
128
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
129
+ if avg_abs < 0.05 and std < 0.005:
130
+ self.should_stop = True # 外部からこれを見て判断可
131
+
132
+ return loss
133
+
134
+ """
135
+ https://github.com/muooon/EmoNavi
136
+ Lynx was developed with inspiration from Lion and Tiger,
137
+ which we deeply respect for their lightweight and intelligent design.
138
+ Lynx also integrates EmoNAVI to enhance its capabilities.
139
+ """
1Gv2_AMP-compatible/emonavi.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+
5
+ """
6
+ EmoNavi v2.0 (250815) shadow-system v2.0
7
+ AMP対応完了(202507) p.data -> p 修正済み
8
+ emosens shadow-effect v1.0 反映 shadow-system 修正
9
+ """
10
+
11
+ class EmoNavi(Optimizer):
12
+ # クラス定義&初期化
13
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
14
+ eps=1e-8, weight_decay=0.01):
15
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
16
+ super().__init__(params, defaults)
17
+ self._init_lr = lr
18
+ self.should_stop = False # 停止フラグの初期化
19
+
20
+ # 感情EMA更新(緊張と安静)
21
+ def _update_ema(self, state, loss_val):
22
+ ema = state.setdefault('ema', {})
23
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
24
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
25
+ return ema
26
+
27
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
28
+ def _compute_scalar(self, ema):
29
+ diff = ema['short'] - ema['long']
30
+ return math.tanh(5 * diff)
31
+
32
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
33
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
34
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
35
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
36
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
37
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
38
+ def _decide_ratio(self, scalar):
39
+ if abs(scalar) > 0.6:
40
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
41
+ elif abs(scalar) > 0.1:
42
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
43
+ return 0.0
44
+
45
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
46
+ @torch.no_grad()
47
+ def step(self, closure=None):
48
+ loss = closure() if closure is not None else None
49
+ loss_val = loss.item() if loss is not None else 0.0
50
+
51
+ for group in self.param_groups:
52
+ for p in group['params']:
53
+ if p.grad is None:
54
+ continue
55
+
56
+ grad = p.grad
57
+ state = self.state[p]
58
+
59
+ # EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
60
+ ema = self._update_ema(state, loss_val)
61
+ scalar = self._compute_scalar(ema)
62
+ ratio = self._decide_ratio(scalar)
63
+
64
+ # shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
65
+ if ratio > 0:
66
+ if 'shadow' not in state:
67
+ state['shadow'] = p.clone()
68
+ else:
69
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
70
+ state['shadow'].lerp_(p, 0.05)
71
+
72
+ # スカラー生成:短期と長期EMAの差分から信号を得る(高ぶりの強さ)
73
+ # 混合比率:スカラーが閾値を超える場合にのみ計算される(信頼できる感情信号かどうかの選別)
74
+ # → スカラー値が小さい場合は ratio = 0 となり、shadow混合は行われない
75
+ # → 信頼できる強い差分のときのみ感情機構が発動する(暗黙の信頼度判定)
76
+
77
+ # 1次・2次モーメントを使った勾配補正(decoupled weight decay 構造に近い)
78
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
79
+ exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
80
+ beta1, beta2 = group['betas']
81
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
82
+ exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
83
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
84
+
85
+ step_size = group['lr']
86
+ if group['weight_decay']:
87
+ p.add_(p, alpha=-group['weight_decay'] * step_size)
88
+ p.addcdiv_(exp_avg, denom, value=-step_size)
89
+
90
+ # 感情機構の発火が収まり"十分に安定"していることを外部伝達できる(自動停止ロジックではない)
91
+ # Early Stop用 scalar 記録(バッファ共通で管理/最大32件保持/動静評価)
92
+ hist = self.state.setdefault('scalar_hist', [])
93
+ hist.append(scalar)
94
+ if len(hist) >= 33:
95
+ hist.pop(0)
96
+
97
+ # Early Stop判断(静けさの合図)
98
+ if len(self.state['scalar_hist']) >= 32:
99
+ buf = self.state['scalar_hist']
100
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
101
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
102
+ if avg_abs < 0.05 and std < 0.005:
103
+ self.should_stop = True # 💡 外部からこれを見て判断可
104
+
105
+ # 32ステップ分のスカラー値の静かな条件を満たした時"フラグ" should_stop = True になるだけ
106
+
107
+ return loss
108
+
109
+ """
110
+ https://github.com/muooon/EmoNavi
111
+ An emotion-driven optimizer that feels loss and navigates accordingly.
112
+ Don't think. Feel. Don't stop. Keep running. Believe in what's beyond.
113
+ """
1Gv2_AMP-compatible/emoneco.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+ from typing import Tuple, Callable, Union
5
+
6
+ """
7
+ EmoNeco v2.0 (250815) shadow-system v2.0 scalar-switch v2.0
8
+ AMP対応完了(202507) p.data -> p 修正済み
9
+ memo : "optimizer = EmoNeco(model.parameters(), lr=1e-3, use_shadow=True)"
10
+ optimizer 指定の際に True にすることで shadow をオンにできる
11
+ emosens shadow-effect v1.0 反映 shadow-system、scalar-switch 修正
12
+ """
13
+
14
+ # Helper function (Lynx)
15
+ def exists(val):
16
+ return val is not None
17
+ # Soft Sign 関数
18
+ def softsign(x):
19
+ return x / (1 + x.abs())
20
+
21
+ class EmoNeco(Optimizer):
22
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
23
+ def __init__(self, params: Union[list, torch.nn.Module], lr=1e-3, betas=(0.9, 0.99),
24
+ # neco用ベータ・互換性の追加(neco用beta1・beta2)
25
+ eps=1e-8, weight_decay=0.01, decoupled_weight_decay: bool = False, use_shadow: bool = False):
26
+
27
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
28
+ super().__init__(params, defaults)
29
+
30
+ # ウェイト減衰のため保存
31
+ self._init_lr = lr
32
+ self.decoupled_wd = decoupled_weight_decay
33
+ self.should_stop = False # 停止フラグの初期化
34
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
35
+
36
+ # 感情EMA更新(緊張と安静)
37
+ def _update_ema(self, state, loss_val):
38
+ ema = state.setdefault('ema', {})
39
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
40
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
41
+ return ema
42
+
43
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
44
+ def _compute_scalar(self, ema):
45
+ diff = ema['short'] - ema['long']
46
+ return math.tanh(5 * diff)
47
+
48
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
49
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
50
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
51
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
52
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
53
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
54
+ def _decide_ratio(self, scalar):
55
+ if not self.use_shadow:
56
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
57
+ if abs(scalar) > 0.6:
58
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
59
+ elif abs(scalar) > 0.1:
60
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
61
+ return 0.0
62
+
63
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
64
+ @torch.no_grad()
65
+ def step(self, closure: Callable | None = None): # クロージャの型ヒントを追加
66
+ loss = None
67
+ if exists(closure): # 一貫性のためにexistsヘルパーを使う
68
+ with torch.enable_grad():
69
+ loss = closure()
70
+ loss_val = loss.item() if loss is not None else 0.0
71
+
72
+ for group in self.param_groups:
73
+ # 共通パラメータ抽出
74
+ lr, wd, beta1, beta2 = group['lr'], group['weight_decay'], *group['betas']
75
+
76
+ # ウェイト減衰の処理を分離 (from lynx)
77
+ _wd_actual = wd
78
+ if self.decoupled_wd:
79
+ _wd_actual /= self._init_lr # 非連結時ウェイト減衰調整
80
+
81
+ for p in filter(lambda p: exists(p.grad), group['params']): # PGチェックにフィルタ
82
+
83
+ grad = p.grad # PG直接使用(計算に".data"不要)
84
+ state = self.state[p]
85
+
86
+ # EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
87
+ ema = self._update_ema(state, loss_val)
88
+ scalar = self._compute_scalar(ema)
89
+ ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
90
+
91
+ # shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
92
+ # 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
93
+ if self.use_shadow and ratio > 0:
94
+ if 'shadow' not in state:
95
+ state['shadow'] = p.clone()
96
+ else:
97
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
98
+ state['shadow'].lerp_(p, 0.05)
99
+ # 更新前 p で shadow 更新(現在値を5%ずつ追従)
100
+ # p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
101
+ # EmoNavi: p = p * (1-ratio) + shadow * ratio
102
+
103
+ # --- Start Neco Gradient Update Logic ---
104
+
105
+ # neco初期化(exp_avg_sq)
106
+ if 'exp_avg' not in state:
107
+ state['exp_avg'] = torch.zeros_like(p)
108
+ exp_avg = state['exp_avg']
109
+
110
+ # Stepweight decay (from lynx): p = p * (1 - lr * wd)
111
+ # decoupled_wd 考慮 _wd_actual 使用(EmoNaviのwdは最後に適用)
112
+ p.mul_(1. - lr * _wd_actual)
113
+
114
+ # 勾配ブレンド
115
+ # m_t = beta1 * exp_avg_prev + (1 - beta1) * grad
116
+ blended_grad = grad.mul(1. - beta1).add_(exp_avg, alpha=beta1)
117
+ grad_norm = torch.norm(grad, dtype=torch.float32) # 勾配ノルムの計算
118
+
119
+ # 削除:-0.2 < scalar <= -0.5 : SoftSign (ゆっくり滑らかに)
120
+ # 0.2 < abs(scalar) <= 0.5 : SoftSign+norm (揺れを滑らかに)
121
+ # それ以外 Cautious (平時や過適合や崩壊傾向を慎重に)
122
+ # p - lr * softsign(blended_grad) (from softsign)
123
+ # p - lr * direction * mask (from Cautious)
124
+ # safe_norm 極値のブレンド勾配に対するスケーリング
125
+ if 0.2 < abs(scalar) <= 0.5:
126
+ safe_norm = grad_norm + eps
127
+ modified_grad = softsign(blended_grad) * safe_norm
128
+ p.add_(-lr * modified_grad)
129
+ else:
130
+ direction = blended_grad.sign() # 勾配方向の符号 Cautious 処理
131
+ mask = (direction == grad.sign()) # 過去の勾配と方向が一致している部分のみ更新
132
+ p.add_(direction * mask, alpha = -lr) # Cautious 更新
133
+
134
+ # exp_avg = beta2 * exp_avg + (1 - beta2) * grad
135
+ exp_avg.mul_(beta2).add_(grad, alpha = 1. - beta2)
136
+
137
+ # --- End Neco Gradient Update Logic ---
138
+
139
+ # Early Stop用 scalar記録(バッファ共通で管理/最大32件保持/動静評価)
140
+ # この部分は p.state ではなく self.state にアクセスする
141
+ hist = self.state.setdefault('scalar_hist', [])
142
+ hist.append(scalar)
143
+ if len(hist) >= 33:
144
+ hist.pop(0)
145
+
146
+ # Early Stop判断(静けさの合図) This part is outside the inner loop
147
+ if len(self.state['scalar_hist']) >= 32:
148
+ buf = self.state['scalar_hist']
149
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
150
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
151
+ if avg_abs < 0.05 and std < 0.005:
152
+ self.should_stop = True # 外部からこれを見て判断可
153
+
154
+ return loss
155
+
156
+ """
157
+ https://github.com/muooon/EmoNavi
158
+ Neco was developed with inspiration from Lion, Tiger, Cautious, softsign, and EmoLynx
159
+ which we deeply respect for their lightweight and intelligent design.
160
+ Neco also integrates EmoNAVI to enhance its capabilities.
161
+ """
1Gv2_AMP-compatible/emozeal.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+
5
+ """
6
+ EmoZeal v2.0 (250815) shadow-system v2.0 scalar-switch v2.0
7
+ AMP対応完了(202507) p.data -> p 修正済み
8
+ memo : "optimizer = EmoNeco(model.parameters(), lr=1e-3, use_shadow=True)"
9
+ optimizer 指定の際に True にすることで shadow をオンにできる
10
+ emosens shadow-effect v1.0 反映 shadow-system、scalar-switch 修正
11
+ """
12
+
13
+ # Soft Sign 関数
14
+ def softsign(x):
15
+ return x / (1 + x.abs())
16
+
17
+ class EmoZeal(Optimizer):
18
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
19
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
20
+ eps=1e-8, weight_decay=0.01, use_shadow: bool = False):
21
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
22
+
23
+ super().__init__(params, defaults)
24
+
25
+ self.alpha_prev = getattr(self, 'alpha_prev', 1.0)
26
+ self._init_lr = lr
27
+ self.should_stop = False # 停止フラグの初期化
28
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
29
+
30
+ # 感情EMA更新(緊張と安静)
31
+ def _update_ema(self, state, loss_val):
32
+ ema = state.setdefault('ema', {})
33
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
34
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
35
+ return ema
36
+
37
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
38
+ def _compute_scalar(self, ema):
39
+ diff = ema['short'] - ema['long']
40
+ return math.tanh(5 * diff)
41
+
42
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
43
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
44
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
45
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
46
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
47
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
48
+ def _decide_ratio(self, scalar):
49
+ if not self.use_shadow:
50
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
51
+ if abs(scalar) > 0.6:
52
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
53
+ elif abs(scalar) > 0.1:
54
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
55
+ return 0.0
56
+
57
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
58
+ @torch.no_grad()
59
+ def step(self, closure=None):
60
+ loss = closure() if closure is not None else None
61
+ loss_val = loss.item() if loss is not None else 0.0
62
+
63
+ for group in self.param_groups:
64
+ for p in group['params']:
65
+ if p.grad is None:
66
+ continue
67
+
68
+ grad = p.grad
69
+ state = self.state[p]
70
+
71
+ # 感情EMA更新・スカラー生成 (既存ロジックを維持)
72
+ ema = self._update_ema(state, loss_val)
73
+ scalar = self._compute_scalar(ema)
74
+ ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
75
+
76
+ # shadow_param:必要時のみ更新 (既存ロジックを維持)
77
+ # 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
78
+ if self.use_shadow and ratio > 0:
79
+ if 'shadow' not in state:
80
+ state['shadow'] = p.clone()
81
+ else:
82
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
83
+ state['shadow'].lerp_(p, 0.05)
84
+
85
+ # --- 勾配補正ロジック ---
86
+ # 行列の形状が2次元以上の場合、分散情報ベースのAB近似を使用
87
+ if grad.dim() >= 2:
88
+ # 行と列の2乗平均を計算 (分散の軽量な近似)
89
+ r_sq = torch.mean(grad * grad, dim=tuple(range(1, grad.dim())), keepdim=True).add_(group['eps'])
90
+ c_sq = torch.mean(grad * grad, dim=0, keepdim=True).add_(group['eps'])
91
+
92
+ # 分散情報から勾配の近似行列を生成
93
+ # AB行列として見立てたものを直接生成し更新項を計算する
94
+ # A = sqrt(r_sq), B = sqrt(c_sq) とすることでAB行列の近似を再現
95
+ # これをEMAで平滑化する
96
+ beta1, beta2 = group['betas']
97
+ eps = group['eps']
98
+ lr = group['lr']
99
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
100
+ blended_grad = grad.mul(1 - beta1).add_(exp_avg, alpha=beta1)
101
+ grad_norm = torch.norm(grad, dtype=torch.float32)
102
+ # > abs 0.6 Cautious (過適合や崩壊傾向を慎重に)
103
+ # > abs 0.1 SoftSign+NormEPS (揺れを滑らかに)
104
+ # 削除:それ以外 SoftSign (ゆっくり滑らかに)
105
+ # p - lr * softsign(blended_grad) (from softsign)
106
+ # p - lr * direction * mask (from Cautious)
107
+ # safe_norm 極値のブレンド勾配に対するスケーリング
108
+ if abs(scalar) > 0.6:
109
+ direction = blended_grad.sign() # 勾配方向の符号 Cautious 処理
110
+ mask = (direction == grad.sign()) # 過去の勾配と方向が一致する部分のみ更新
111
+ p.add_(direction * mask, alpha = -lr) # Cautious 更新
112
+ elif abs(scalar) > 0.1:
113
+ safe_norm = grad_norm + eps
114
+ modified_grad = softsign(blended_grad) * safe_norm
115
+ p.add_(-lr * modified_grad)
116
+
117
+ state.setdefault('exp_avg_r', torch.zeros_like(r_sq)).mul_(beta1).add_(torch.sqrt(r_sq), alpha=1 - beta1)
118
+ state.setdefault('exp_avg_c', torch.zeros_like(c_sq)).mul_(beta1).add_(torch.sqrt(c_sq), alpha=1 - beta1)
119
+
120
+ # 再構築した近似勾配の平方根の積で正規化
121
+ # これにより2次モーメントのような役割を果たす
122
+ denom = torch.sqrt(state['exp_avg_r'] * state['exp_avg_c']) + eps
123
+
124
+ # 最終的な更新項を計算
125
+ update_term = grad / denom
126
+
127
+ # 1次元(ベクトル)の勾配補正(decoupled weight decay 構造に近い)
128
+ else:
129
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
130
+ exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
131
+ beta1, beta2 = group['betas']
132
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
133
+ exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
134
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
135
+ update_term = exp_avg / denom
136
+
137
+ # 最終的なパラメータ更新 (decoupled weight decayも適用)
138
+ p.add_(p, alpha=-group['weight_decay'] * group['lr'])
139
+ p.add_(update_term, alpha=-group['lr'])
140
+
141
+ # --- Early Stop ロジック (既存ロジックを維持) ---
142
+ hist = self.state.setdefault('scalar_hist', [])
143
+ hist.append(scalar)
144
+ if len(hist) >= 33:
145
+ hist.pop(0)
146
+
147
+ # Early Stop判断
148
+ if len(self.state['scalar_hist']) >= 32:
149
+ buf = self.state['scalar_hist']
150
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
151
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
152
+ if avg_abs < 0.05 and std < 0.005:
153
+ self.should_stop = True
154
+
155
+ return loss
156
+
157
+ """
158
+ https://github.com/muooon/EmoNavi
159
+ Zeal is inspired by Adafactor, and EmoFact,
160
+ and its VRAM-friendly design is something everyone loves.
161
+ """
1Gv3_AMP-compatible/docs/rastrigin_EmoClan.png ADDED

Git LFS Details

  • SHA256: 4f8eef21b3f5b0cda9b4f77576178e404cbc3cf05c235ba4c8a0d6adb4a4bd1c
  • Pointer size: 131 Bytes
  • Size of remote file: 745 kB
1Gv3_AMP-compatible/docs/rastrigin_EmoFact.png ADDED

Git LFS Details

  • SHA256: f12bc7c3e0ad099eaf18db5eed0aaa49a22e926490c24e7c1461d020d9b89ed2
  • Pointer size: 131 Bytes
  • Size of remote file: 745 kB
1Gv3_AMP-compatible/docs/rastrigin_EmoLynx.png ADDED

Git LFS Details

  • SHA256: 986a1434173c2709c761e345fe02377f5f4f63db66076ad52ade521d6fc816ad
  • Pointer size: 131 Bytes
  • Size of remote file: 743 kB
1Gv3_AMP-compatible/docs/rastrigin_EmoNavi.png ADDED

Git LFS Details

  • SHA256: e128577ec5cfac12516f64aa64d36df57ecb969955c6b44714f91f65623fb2da
  • Pointer size: 131 Bytes
  • Size of remote file: 748 kB
1Gv3_AMP-compatible/docs/rastrigin_EmoNeco.png ADDED

Git LFS Details

  • SHA256: 399f7a0df2ba9c3f9b3d029aaf6bda53eb42f9a3b6d4528f18e69ee626b39858
  • Pointer size: 131 Bytes
  • Size of remote file: 743 kB
1Gv3_AMP-compatible/docs/rastrigin_EmoZeal.png ADDED

Git LFS Details

  • SHA256: 2bf9ec89e9fbb75d811ba31058ece87df6a858ff5cf6c426bddde8015aa18e21
  • Pointer size: 131 Bytes
  • Size of remote file: 745 kB
1Gv3_AMP-compatible/docs/rosenbrock_EmoClan.png ADDED

Git LFS Details

  • SHA256: 724066061af6a9b2e1f295ccf59d35bf5f34b5d92e2e8828a6e91396c74639f1
  • Pointer size: 131 Bytes
  • Size of remote file: 453 kB
1Gv3_AMP-compatible/docs/rosenbrock_EmoFact.png ADDED

Git LFS Details

  • SHA256: 394ffd0e91c799388a073d4f988a3954a430fa673b3bd2bf0066dea8c4a619aa
  • Pointer size: 131 Bytes
  • Size of remote file: 452 kB
1Gv3_AMP-compatible/docs/rosenbrock_EmoLynx.png ADDED

Git LFS Details

  • SHA256: 8c5fe9ed93bbb2734c705f6edc524ddc76ada069fe8c4326fb1d64e309e33109
  • Pointer size: 131 Bytes
  • Size of remote file: 405 kB
1Gv3_AMP-compatible/docs/rosenbrock_EmoNavi.png ADDED

Git LFS Details

  • SHA256: 7b8492f7754169900ed9acd9acf88a6cc4ac514e4483f7f72028568869891927
  • Pointer size: 131 Bytes
  • Size of remote file: 463 kB
1Gv3_AMP-compatible/docs/rosenbrock_EmoNeco.png ADDED

Git LFS Details

  • SHA256: 23dfe643fd868eefc446929b3fc9f54548d9dcfe69bdee658c906fd88e999b30
  • Pointer size: 131 Bytes
  • Size of remote file: 394 kB
1Gv3_AMP-compatible/docs/rosenbrock_EmoZeal.png ADDED

Git LFS Details

  • SHA256: 76baf8f9cdcf028a759ed087b376a76c2de3c51f4d52e65b2ba3e4a49f043baf
  • Pointer size: 131 Bytes
  • Size of remote file: 452 kB
1Gv3_AMP-compatible/emoclan.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+ from typing import Callable, Union, Dict, Any, Tuple
5
+
6
+ """
7
+ EmoClan v3.0 (250825) shadow-system v2.0 -effect NoN -moment v1.0 scalar-switch v2.0
8
+ AMP対応完了(202507) p.data -> p 修正済み
9
+ memo : "optimizer = EmoClan(model.parameters(), lr=1e-3, use_shadow=True)"
10
+ optimizer 指定の際に True にすることで shadow をオンにできる
11
+ emosens shadow-effect v1.0 反映 shadow-system、scalar-switch 修正
12
+ """
13
+
14
+ # Helper function
15
+ def exists(val):
16
+ return val is not None
17
+
18
+ class EmoClan(Optimizer):
19
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
20
+ def __init__(self, params: Union[list, torch.nn.Module],
21
+ lr: float = 1e-3,
22
+ betas: Tuple[float, float] = (0.9, 0.999),
23
+ eps: float = 1e-8,
24
+ weight_decay: float = 0.01,
25
+ lynx_betas: Tuple[float, float] = (0.9, 0.99), # Lynx 固有の beta
26
+ decoupled_weight_decay: bool = False,
27
+ use_shadow: bool = False
28
+ ):
29
+
30
+ if not 0.0 <= lr:
31
+ raise ValueError(f"Invalid learning rate: {lr}")
32
+ if not 0.0 <= eps:
33
+ raise ValueError(f"Invalid epsilon value: {eps}")
34
+ if not 0.0 <= betas[0] < 1.0:
35
+ raise ValueError(f"Invalid beta parameter at index 0: {betas[0]}")
36
+ if not 0.0 <= betas[1] < 1.0:
37
+ raise ValueError(f"Invalid beta parameter at index 1: {betas[1]}")
38
+
39
+ # Lynx の betas もバリデーション
40
+ if not 0.0 <= lynx_betas[0] < 1.0:
41
+ raise ValueError(f"Invalid lynx_beta parameter at index 0: {lynx_betas[0]}")
42
+ if not 0.0 <= lynx_betas[1] < 1.0:
43
+ raise ValueError(f"Invalid lynx_beta parameter at index 1: {lynx_betas[1]}")
44
+
45
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay,
46
+ lynx_betas=lynx_betas, decoupled_weight_decay=decoupled_weight_decay)
47
+ super().__init__(params, defaults)
48
+
49
+ self._init_lr = lr # decoupled weight decay のために保存 (Lynx用)
50
+ self.should_stop = False # 全体の停止フラグ
51
+ self.use_shadow = use_shadow # EmoClanインスタンス自身がuse_shadowを保持
52
+
53
+ # --- 感情機構 (Emotion Mechanism) ---
54
+ def _update_ema(self, param_state: Dict[str, Any], loss_val: float) -> Dict[str, float]:
55
+ """損失値に基づいて短期・長期 EMA を更新"""
56
+ # param_state は各パラメータの state['ema'] を保持する
57
+ ema = param_state.setdefault('ema', {'short': loss_val, 'long': loss_val})
58
+ ema['short'] = 0.3 * loss_val + 0.7 * ema['short']
59
+ ema['long'] = 0.01 * loss_val + 0.99 * ema['long']
60
+ return ema
61
+
62
+ """EMA の差分から感情スカラー値を生成"""
63
+ def _compute_scalar(self, ema: Dict[str, float]) -> float:
64
+ diff = ema['short'] - ema['long']
65
+ return math.tanh(5 * diff)
66
+
67
+ """感情スカラーに基づいて Shadow の混合比率を決定"""
68
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
69
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
70
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
71
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
72
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
73
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
74
+ def _decide_ratio(self, scalar: float) -> float:
75
+ if not self.use_shadow:
76
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
77
+ if abs(scalar) > 0.6:
78
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
79
+ elif abs(scalar) > 0.1:
80
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
81
+ return 0.0
82
+
83
+ # --- 各最適化器のコアな勾配更新ロジック (プライベートメソッドとして統合) ---
84
+
85
+ def _lynx_update(
86
+ self,
87
+ p: torch.Tensor,
88
+ grad: torch.Tensor,
89
+ param_state: Dict[str, Any],
90
+ lr: float,
91
+ beta1: float,
92
+ beta2: float,
93
+ wd_actual: float,
94
+ scalar
95
+ ):
96
+ """EmoLynx のコアな勾配更新ロジック"""
97
+ # Stepweight decay: p = p * (1 - lr * wd)
98
+ p.mul_(1. - lr * wd_actual)
99
+
100
+ # Lynx 固有の EMA 状態は param_state に保持
101
+ if 'exp_avg_lynx' not in param_state:
102
+ param_state['exp_avg_lynx'] = torch.zeros_like(p)
103
+ exp_avg = param_state['exp_avg_lynx']
104
+
105
+ # 勾配ブレンド
106
+ blended_grad = grad.mul(1. - beta1).add_(exp_avg, alpha=beta1)
107
+
108
+ # 符号ベースの更新
109
+ p.add_(blended_grad.sign_(), alpha = -lr * (1 - abs(scalar)))
110
+
111
+ # exp_avg 更新
112
+ exp_avg.mul_(beta2).add_(grad, alpha = 1. - beta2)
113
+
114
+ def _navi_update(
115
+ self,
116
+ p: torch.Tensor,
117
+ grad: torch.Tensor,
118
+ param_state: Dict[str, Any],
119
+ lr: float,
120
+ betas: Tuple[float, float],
121
+ eps: float,
122
+ weight_decay: float,
123
+ scalar
124
+ ):
125
+ """EmoNavi のコアな勾配更新ロジック"""
126
+ beta1, beta2 = betas
127
+
128
+ exp_avg = param_state.setdefault('exp_avg_navi', torch.zeros_like(p))
129
+ exp_avg_sq = param_state.setdefault('exp_avg_sq_navi', torch.zeros_like(p.to(torch.float32)))
130
+
131
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
132
+ exp_avg_sq.mul_(beta2).addcmul_(grad.to(torch.float32), grad.to(torch.float32), value=1 - beta2)
133
+ denom = exp_avg_sq.sqrt().add_(eps)
134
+
135
+ # Weight decay (標準的手法)
136
+ if weight_decay:
137
+ p.mul_(1 - lr * weight_decay)
138
+
139
+ p.addcdiv_(exp_avg, denom, value=-lr * (1 - abs(scalar)))
140
+
141
+ def _fact_update(
142
+ self,
143
+ p: torch.Tensor,
144
+ grad: torch.Tensor,
145
+ param_state: Dict[str, Any],
146
+ lr: float,
147
+ betas: Tuple[float, float], # beta2 は現状使われないが互換性のため残す (1D勾配で使用)
148
+ eps: float,
149
+ weight_decay: float,
150
+ scalar
151
+ ):
152
+ """EmoFact のコアな勾配更新ロジック (Adafactor ライク)"""
153
+ beta1, beta2 = betas
154
+
155
+ if grad.dim() >= 2:
156
+ # 行と列の2乗平均を計算 (分散の軽量な近似)
157
+ # gradをfloat32にキャストして計算することで数値安定性を高める
158
+ r_sq = torch.mean(grad.to(torch.float32) * grad.to(torch.float32), dim=tuple(range(1, grad.dim())), keepdim=True).add_(eps)
159
+ c_sq = torch.mean(grad.to(torch.float32) * grad.to(torch.float32), dim=0, keepdim=True).add_(eps)
160
+
161
+ param_state.setdefault('exp_avg_r_fact', torch.zeros_like(r_sq)).mul_(beta1).add_(torch.sqrt(r_sq), alpha=1 - beta1)
162
+ param_state.setdefault('exp_avg_c_fact', torch.zeros_like(c_sq)).mul_(beta1).add_(torch.sqrt(c_sq), alpha=1 - beta1)
163
+
164
+ # 再構築した近似勾配の平方根の積で正規化
165
+ denom = torch.sqrt(param_state['exp_avg_r_fact'] * param_state['exp_avg_c_fact']).add_(eps)
166
+ update_term = grad / denom # grad は元の型(float16またはfloat32)
167
+
168
+ else: # 1次元(ベクトル)の勾配補正
169
+ exp_avg = param_state.setdefault('exp_avg_fact', torch.zeros_like(p))
170
+ exp_avg_sq = param_state.setdefault('exp_avg_sq_fact', torch.zeros_like(p.to(torch.float32)))
171
+
172
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
173
+ exp_avg_sq.mul_(beta2).addcmul_(grad.to(torch.float32), grad.to(torch.float32), value=1 - beta2)
174
+ denom = exp_avg_sq.sqrt().add_(eps)
175
+ update_term = exp_avg / denom
176
+
177
+ # 最終的なパラメータ更新 (decoupled weight decayも適用)
178
+ # decoupled_weight_decay は __init__ でグループにdefaultsとして渡されているが、
179
+ # ここではfactorロジック自体がweight_decayを受け取る形式
180
+ p.mul_(1 - weight_decay * lr)
181
+ p.add_(update_term, alpha=-lr * (1 - abs(scalar)))
182
+
183
+
184
+ @torch.no_grad()
185
+ def step(self, closure: Callable | None = None):
186
+ loss = None
187
+ if exists(closure):
188
+ with torch.enable_grad():
189
+ loss = closure()
190
+ loss_val = loss.item() if loss is not None else 0.0
191
+
192
+ # 全体の scalar_hist を EmoClan インスタンスで管理
193
+ global_scalar_hist = self.state.setdefault('global_scalar_hist', [])
194
+
195
+ # 全体としての感情EMA状態を self.state に保持し、現在の感情スカラーを計算
196
+ global_ema_state = self.state.setdefault('global_ema', {'short': loss_val, 'long': loss_val})
197
+ global_ema_state['short'] = 0.3 * loss_val + 0.7 * global_ema_state['short']
198
+ global_ema_state['long'] = 0.01 * loss_val + 0.99 * global_ema_state['long']
199
+ current_global_scalar = self._compute_scalar(global_ema_state)
200
+
201
+ # global_scalar_hist に現在の感情スカラーを追加
202
+ global_scalar_hist.append(current_global_scalar)
203
+ if len(global_scalar_hist) >= 33:
204
+ global_scalar_hist.pop(0)
205
+
206
+
207
+ for group in self.param_groups:
208
+ lr = group['lr']
209
+ wd = group['weight_decay']
210
+ eps = group['eps']
211
+ decoupled_wd = group['decoupled_weight_decay']
212
+
213
+ lynx_beta1, lynx_beta2 = group['lynx_betas']
214
+ navi_fact_betas = group['betas'] # Navi/Fact 共通の beta を使用 (デフォルトの betas)
215
+
216
+ # Lynx の decoupled_wd のための _wd_actual 計算
217
+ _wd_actual_lynx = wd
218
+ if decoupled_wd:
219
+ _wd_actual_lynx /= self._init_lr
220
+
221
+ for p in group['params']:
222
+ if p.grad is None:
223
+ continue
224
+
225
+ grad = p.grad
226
+ param_state = self.state[p] # 各パラメータごとの状態
227
+
228
+ # --- 各パラメータごとの感情機構の更新と Shadow 処理 ---
229
+ # 各パラメータの state['ema'] は、それぞれの loss_val (全体で共通) を元に更新される
230
+ # ただし、現状の loss_val はクロージャから受け取った単一の値なので、
231
+ # 各パラメータ固有の「感情」を定義するより、全体としての感情が使われることになる。
232
+ # use_shadow が True の場合にのみ Shadow 関連の処理を実行
233
+ if self.use_shadow:
234
+ param_ema = self._update_ema(param_state, loss_val)
235
+ param_scalar = self._compute_scalar(param_ema) # 各パラメータ固有のスカラー
236
+
237
+ ratio = self._decide_ratio(param_scalar) # 各パラメータ固有の ratio
238
+
239
+ if ratio > 0:
240
+ if 'shadow' not in param_state:
241
+ param_state['shadow'] = p.clone()
242
+ else:
243
+ # Shadow を現在値にブレンド
244
+ p.mul_(1 - ratio).add_(param_state['shadow'], alpha=ratio)
245
+ # Shadow を現在値に追従させる
246
+ param_state['shadow'].lerp_(p, 0.05)
247
+
248
+ # --- 最適化器の選択と勾配更新 ---
249
+ # 現在のglobal_scalar_histに記録された全体としての感情スカラーに基づいてフェーズを判断
250
+ # global_scalar > abs 0.6 の範囲は Lynx
251
+ # global_scalar > abs 0.3 の範囲は Fact
252
+ # global_scalar < abs 0.3 の範囲は Navi
253
+ if abs(current_global_scalar) > 0.6: # 序盤・過学習・発散時
254
+ self._lynx_update(p, grad, param_state, lr, lynx_beta1, lynx_beta2, _wd_actual_lynx, current_global_scalar)
255
+ elif abs(current_global_scalar) > 0.3: # 終盤・過学習・発散傾向時
256
+ self._fact_update(p, grad, param_state, lr, navi_fact_betas, eps, wd, current_global_scalar)
257
+ else: # -0.3 <= current_global_scalar <= 0.3 の中盤・平時(安定期)
258
+ self._navi_update(p, grad, param_state, lr, navi_fact_betas, eps, wd, current_global_scalar)
259
+
260
+ # Early Stop判断
261
+ # global_scalar_hist の評価
262
+ if len(global_scalar_hist) >= 32:
263
+ buf = global_scalar_hist
264
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
265
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
266
+ if avg_abs < 0.05 and std < 0.005:
267
+ self.should_stop = True # 外部からこれを見て判断可
268
+
269
+ return loss
270
+
271
+ """
272
+ Emoシリーズは、Adam、Adafactor、Lion、Tiger、等から多くを学びました。
273
+ この開発において先人たちの知見に深く感謝しつつ今後も新しい可能性を探究します。
274
+ The Emo series has learned much from Adam, Adafactor, Lion, and Tiger.
275
+ Rather than being their successors,
276
+ In its development, we deeply appreciate the insights of those who came before us—and continue to explore new possibilities beyond them.
277
+ """
1Gv3_AMP-compatible/emofact.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+
5
+ """
6
+ EmoFact v3.0 (250825) shadow-system v2.0 -effect NoN -moment v1.0
7
+ AMP対応完了(202507) p.data -> p 修正済み
8
+ emosens shadow-effect v1.0 反映 shadow-system 修正
9
+ optimizer 指定の際に True / False で shadow を切替できる(現在 False)
10
+ """
11
+
12
+ class EmoFact(Optimizer):
13
+ # クラス定義&初期化
14
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
15
+ eps=1e-8, weight_decay=0.01, use_shadow: bool = False):
16
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
17
+ super().__init__(params, defaults)
18
+ self._init_lr = lr
19
+ self.should_stop = False # 停止フラグの初期化
20
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
21
+
22
+ # 感情EMA更新(緊張と安静)
23
+ def _update_ema(self, state, loss_val):
24
+ ema = state.setdefault('ema', {})
25
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
26
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
27
+ return ema
28
+
29
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
30
+ def _compute_scalar(self, ema):
31
+ diff = ema['short'] - ema['long']
32
+ return math.tanh(5 * diff)
33
+
34
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
35
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
36
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
37
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
38
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
39
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
40
+ def _decide_ratio(self, scalar):
41
+ if not self.use_shadow:
42
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
43
+ if abs(scalar) > 0.6:
44
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
45
+ elif abs(scalar) > 0.1:
46
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
47
+ return 0.0
48
+
49
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
50
+ @torch.no_grad()
51
+ def step(self, closure=None):
52
+ loss = closure() if closure is not None else None
53
+ loss_val = loss.item() if loss is not None else 0.0
54
+
55
+ for group in self.param_groups:
56
+ for p in group['params']:
57
+ if p.grad is None:
58
+ continue
59
+
60
+ grad = p.grad
61
+ state = self.state[p]
62
+
63
+ # 感情EMA更新・スカラー生成 (既存ロジックを維持)
64
+ ema = self._update_ema(state, loss_val)
65
+ scalar = self._compute_scalar(ema)
66
+ ratio = self._decide_ratio(scalar)
67
+
68
+ # shadow_param:必要時のみ更新 (スパイク部分に現在値を5%ずつ追従させる動的履歴)
69
+ if self.use_shadow and ratio > 0:
70
+ if 'shadow' not in state:
71
+ state['shadow'] = p.clone()
72
+ else:
73
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
74
+ state['shadow'].lerp_(p, 0.05)
75
+
76
+ # --- 勾配補正ロジック ---
77
+ # 行列の形状が2次元以上の場合、分散情報ベースのAB近似を使用
78
+ if grad.dim() >= 2:
79
+ # 行と列の2乗平均を計算 (分散の軽量な近似)
80
+ r_sq = torch.mean(grad * grad, dim=tuple(range(1, grad.dim())), keepdim=True).add_(group['eps'])
81
+ c_sq = torch.mean(grad * grad, dim=0, keepdim=True).add_(group['eps'])
82
+
83
+ # 分散情報から勾配の近似行列を生成
84
+ # AB行列として見立てたものを直接生成し更新項を計算する
85
+ # A = sqrt(r_sq), B = sqrt(c_sq) とすることでAB行列の近似を再現
86
+ # これをEMAで平滑化する
87
+ beta1, beta2 = group['betas']
88
+
89
+ state.setdefault('exp_avg_r', torch.zeros_like(r_sq)).mul_(beta1).add_(torch.sqrt(r_sq), alpha=1 - beta1)
90
+ state.setdefault('exp_avg_c', torch.zeros_like(c_sq)).mul_(beta1).add_(torch.sqrt(c_sq), alpha=1 - beta1)
91
+
92
+ # 再構築した近似勾配の平方根の積で正規化
93
+ # これにより2次モーメントのような役割を���たす
94
+ denom = torch.sqrt(state['exp_avg_r'] * state['exp_avg_c']).add_(group['eps'])
95
+
96
+ # 最終的な更新項を計算
97
+ update_term = grad / denom
98
+
99
+ # 1次元(ベクトル)の勾配補正(decoupled weight decay 構造に近い)
100
+ else:
101
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
102
+ exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
103
+ beta1, beta2 = group['betas']
104
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
105
+ exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
106
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
107
+ update_term = exp_avg / denom
108
+
109
+ # 最終的なパラメータ更新 (decoupled weight decayも適用)
110
+ p.add_(p, alpha=-group['weight_decay'] * group['lr'])
111
+ p.add_(update_term, alpha=-group['lr'] * (1 - abs(scalar)))
112
+
113
+ # --- Early Stop ロジック (既存ロジックを維持) ---
114
+ hist = self.state.setdefault('scalar_hist', [])
115
+ hist.append(scalar)
116
+ if len(hist) >= 33:
117
+ hist.pop(0)
118
+
119
+ # Early Stop判断
120
+ if len(self.state['scalar_hist']) >= 32:
121
+ buf = self.state['scalar_hist']
122
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
123
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
124
+ if avg_abs < 0.05 and std < 0.005:
125
+ self.should_stop = True
126
+
127
+ return loss
128
+
129
+ """
130
+ https://github.com/muooon/EmoNavi
131
+ Fact is inspired by Adafactor, and emoairy,
132
+ and its VRAM-friendly design is something everyone loves.
133
+ """
1Gv3_AMP-compatible/emolynx.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+ from typing import Tuple, Callable, Union
5
+
6
+ """
7
+ EmoLynx v3.0 (250825) shadow-system v2.0 -effect NoN -moment v1.0
8
+ AMP対応完了(202507) p.data -> p 修正済み
9
+ emosens shadow-effect v1.0 反映 shadow-system 修正
10
+ optimizer 指定の際に True / False で shadow を切替できる(現在 False)
11
+ """
12
+
13
+ # Helper function (Lynx)
14
+ def exists(val):
15
+ return val is not None
16
+
17
+ class EmoLynx(Optimizer):
18
+ # クラス定義&初期化 lynx用ベータ・互換性の追加(lynx用beta1・beta2)
19
+ def __init__(self, params: Union[list, torch.nn.Module], lr=1e-3, betas=(0.9, 0.99),
20
+ eps=1e-8, weight_decay=0.01, decoupled_weight_decay: bool = False, use_shadow: bool = False):
21
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
22
+ super().__init__(params, defaults)
23
+ # lynxに応じてウェイト減衰のため保存
24
+ self._init_lr = lr
25
+ self.should_stop = False # 停止フラグの初期化
26
+ self.decoupled_wd = decoupled_weight_decay
27
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
28
+
29
+ # 感情EMA更新(緊張と安静)
30
+ def _update_ema(self, state, loss_val):
31
+ ema = state.setdefault('ema', {})
32
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
33
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
34
+ return ema
35
+
36
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
37
+ def _compute_scalar(self, ema):
38
+ diff = ema['short'] - ema['long']
39
+ return math.tanh(5 * diff)
40
+
41
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
42
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
43
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
44
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
45
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
46
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
47
+ def _decide_ratio(self, scalar):
48
+ if not self.use_shadow:
49
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
50
+ if abs(scalar) > 0.6:
51
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
52
+ elif abs(scalar) > 0.1:
53
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
54
+ return 0.0
55
+
56
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
57
+ @torch.no_grad()
58
+ def step(self, closure: Callable | None = None): # クロージャの型ヒントを追加
59
+ loss = None
60
+ if exists(closure): # 一貫性のためにexistsヘルパーを使う
61
+ with torch.enable_grad():
62
+ loss = closure()
63
+ loss_val = loss.item() if loss is not None else 0.0
64
+
65
+ for group in self.param_groups:
66
+ # リンクス共通パラメータ抽出
67
+ lr, wd, beta1, beta2 = group['lr'], group['weight_decay'], *group['betas']
68
+
69
+ # ウェイト減衰の処理を分離 (from lynx)
70
+ _wd_actual = wd
71
+ if self.decoupled_wd:
72
+ _wd_actual /= self._init_lr # 非連結時ウェイト減衰調整
73
+
74
+ for p in filter(lambda p: exists(p.grad), group['params']): # PGチェックにフィルタ
75
+
76
+ grad = p.grad # PG直接使用(計算に".data"不要)
77
+ state = self.state[p]
78
+
79
+ # EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
80
+ ema = self._update_ema(state, loss_val)
81
+ scalar = self._compute_scalar(ema)
82
+ ratio = self._decide_ratio(scalar)
83
+
84
+ # shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
85
+ if self.use_shadow and ratio > 0:
86
+ if 'shadow' not in state:
87
+ state['shadow'] = p.clone()
88
+ else:
89
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
90
+ state['shadow'].lerp_(p, 0.05)
91
+ # lynx更新前 p で shadow 更新(現在値を5%ずつ追従)
92
+ # p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
93
+ # EmoNavi: p = p * (1-ratio) + shadow * ratio
94
+
95
+ # --- Start Lynx Gradient Update Logic ---
96
+
97
+ # lynx初期化(exp_avg_sq)
98
+ if 'exp_avg' not in state:
99
+ state['exp_avg'] = torch.zeros_like(p)
100
+ exp_avg = state['exp_avg']
101
+
102
+ # Stepweight decay (from lynx): p = p * (1 - lr * wd)
103
+ # decoupled_wd 考慮 _wd_actual 使用(EmoNaviのwdは最後に適用)
104
+ p.mul_(1. - lr * _wd_actual)
105
+
106
+ # 勾配ブレンド
107
+ # m_t = beta1 * exp_avg_prev + (1 - beta1) * grad
108
+ blended_grad = grad.mul(1. - beta1).add_(exp_avg, alpha=beta1)
109
+
110
+ # p: p = p - lr * sign(blended_grad)
111
+ p.add_(blended_grad.sign_(), alpha = -lr * (1 - abs(scalar)))
112
+
113
+ # exp_avg = beta2 * exp_avg + (1 - beta2) * grad
114
+ exp_avg.mul_(beta2).add_(grad, alpha = 1. - beta2)
115
+
116
+ # --- End Lynx Gradient Update Logic ---
117
+
118
+ # Early Stop用 scalar記録(バッファ共通で管理/最大32件保持/動静評価)
119
+ # この部分は p.state ではなく self.state にアクセスする
120
+ hist = self.state.setdefault('scalar_hist', [])
121
+ hist.append(scalar)
122
+ if len(hist) >= 33:
123
+ hist.pop(0)
124
+
125
+ # Early Stop判断(静けさの合図) - This part is outside the inner loop
126
+ if len(self.state['scalar_hist']) >= 32:
127
+ buf = self.state['scalar_hist']
128
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
129
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
130
+ if avg_abs < 0.05 and std < 0.005:
131
+ self.should_stop = True # 外部からこれを見て判断可
132
+
133
+ return loss
134
+
135
+ """
136
+ https://github.com/muooon/EmoNavi
137
+ Lynx was developed with inspiration from Lion, Tiger, and emocats,
138
+ which we deeply respect for their lightweight and intelligent design.
139
+ Lynx also integrates EmoNAVI to enhance its capabilities.
140
+ """
1Gv3_AMP-compatible/emonavi.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+
5
+ """
6
+ EmoNavi v3.0 (250825) shadow-system v2.0 -effect NoN -moment v1.0
7
+ AMP対応完了(202507) p.data -> p 修正済み
8
+ emosens shadow-effect v1.0 反映 shadow-system 修正
9
+ optimizer 指定の際に True / False で shadow を切替できる(現在 False)
10
+ """
11
+
12
+ class EmoNavi(Optimizer):
13
+ # クラス定義&初期化
14
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
15
+ eps=1e-8, weight_decay=0.01, use_shadow: bool = False):
16
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
17
+ super().__init__(params, defaults)
18
+ self._init_lr = lr
19
+ self.should_stop = False # 停止フラグの初期化
20
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
21
+
22
+ # 感情EMA更新(緊張と安静)
23
+ def _update_ema(self, state, loss_val):
24
+ ema = state.setdefault('ema', {})
25
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
26
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
27
+ return ema
28
+
29
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
30
+ def _compute_scalar(self, ema):
31
+ diff = ema['short'] - ema['long']
32
+ return math.tanh(5 * diff)
33
+
34
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
35
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
36
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
37
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
38
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
39
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
40
+ def _decide_ratio(self, scalar):
41
+ if not self.use_shadow:
42
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
43
+ if abs(scalar) > 0.6:
44
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
45
+ elif abs(scalar) > 0.1:
46
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
47
+ return 0.0
48
+
49
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
50
+ @torch.no_grad()
51
+ def step(self, closure=None):
52
+ loss = closure() if closure is not None else None
53
+ loss_val = loss.item() if loss is not None else 0.0
54
+
55
+ for group in self.param_groups:
56
+ for p in group['params']:
57
+ if p.grad is None:
58
+ continue
59
+
60
+ grad = p.grad
61
+ state = self.state[p]
62
+
63
+ # EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
64
+ ema = self._update_ema(state, loss_val)
65
+ scalar = self._compute_scalar(ema)
66
+ ratio = self._decide_ratio(scalar)
67
+
68
+ # shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
69
+ if self.use_shadow and ratio > 0:
70
+ if 'shadow' not in state:
71
+ state['shadow'] = p.clone()
72
+ else:
73
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
74
+ state['shadow'].lerp_(p, 0.05)
75
+
76
+ # スカラー生成:短期と長期EMAの差分から信号を得る(高ぶりの強さ)
77
+ # 混合比率:スカラーが閾値を超える場合にのみ計算される(信頼できる感情信号かどうかの選別)
78
+ # → スカラー値が小さい場合は ratio = 0 となり、shadow混合は行われない
79
+ # → 信頼できる強い差分のときのみ感情機構が発動する(暗黙の信頼度判定)
80
+
81
+ # 1次・2次モーメントを使った勾配補正(decoupled weight decay 構造に近い)
82
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
83
+ exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
84
+ beta1, beta2 = group['betas']
85
+
86
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
87
+ exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
88
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
89
+
90
+ step_size = group['lr']
91
+ if group['weight_decay']:
92
+ p.add_(p, alpha=-group['weight_decay'] * step_size)
93
+ p.addcdiv_(exp_avg, denom, value=-step_size * (1 - abs(scalar)))
94
+
95
+ # 感情機構の発火が収まり"十分に安定"していることを外部伝達できる(自動停止ロジックではない)
96
+ # Early Stop用 scalar 記録(バッファ共通で管理/最大32件保持/動静評価)
97
+ hist = self.state.setdefault('scalar_hist', [])
98
+ hist.append(scalar)
99
+ if len(hist) >= 33:
100
+ hist.pop(0)
101
+
102
+ # Early Stop判断(静けさの合図)
103
+ if len(self.state['scalar_hist']) >= 32:
104
+ buf = self.state['scalar_hist']
105
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
106
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
107
+ if avg_abs < 0.05 and std < 0.005:
108
+ self.should_stop = True # 💡 外部からこれを見て判断可
109
+
110
+ # 32ステップ分のスカラー値の静かな条件を満たした時"フラグ" should_stop = True になるだけ
111
+
112
+ return loss
113
+
114
+ """
115
+ https://github.com/muooon/EmoNavi
116
+ An emotion-driven optimizer that feels loss and navigates accordingly.
117
+ Don't think. Feel. Don't stop. Keep running. Believe in what's beyond.
118
+ """
1Gv3_AMP-compatible/emoneco.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+ from typing import Tuple, Callable, Union
5
+
6
+ """
7
+ EmoNeco v3.0 (250825) shadow-system v2.0 -effect NoN -moment v1.0 scalar-switch v2.0
8
+ AMP対応完了(202507) p.data -> p 修正済み
9
+ memo : "optimizer = EmoNeco(model.parameters(), lr=1e-3, use_shadow=True)"
10
+ optimizer 指定の際に True にすることで shadow をオンにできる
11
+ emosens shadow-effect v1.0 反映 shadow-system、scalar-switch 修正
12
+ """
13
+
14
+ # Helper function (Lynx)
15
+ def exists(val):
16
+ return val is not None
17
+ # Soft Sign 関数
18
+ def softsign(x):
19
+ return x / (1 + x.abs())
20
+
21
+ class EmoNeco(Optimizer):
22
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
23
+ def __init__(self, params: Union[list, torch.nn.Module], lr=1e-3, betas=(0.9, 0.99),
24
+ # neco用ベータ・互換性の追加(neco用beta1・beta2)
25
+ eps=1e-8, weight_decay=0.01, decoupled_weight_decay: bool = False, use_shadow: bool = False):
26
+
27
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
28
+ super().__init__(params, defaults)
29
+
30
+ # ウェイト減衰のため保存
31
+ self._init_lr = lr
32
+ self.decoupled_wd = decoupled_weight_decay
33
+ self.should_stop = False # 停止フラグの初期化
34
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
35
+
36
+ # 感情EMA更新(緊張と安静)
37
+ def _update_ema(self, state, loss_val):
38
+ ema = state.setdefault('ema', {})
39
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
40
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
41
+ return ema
42
+
43
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
44
+ def _compute_scalar(self, ema):
45
+ diff = ema['short'] - ema['long']
46
+ return math.tanh(5 * diff)
47
+
48
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
49
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
50
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
51
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
52
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
53
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
54
+ def _decide_ratio(self, scalar):
55
+ if not self.use_shadow:
56
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
57
+ if abs(scalar) > 0.6:
58
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
59
+ elif abs(scalar) > 0.1:
60
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
61
+ return 0.0
62
+
63
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
64
+ @torch.no_grad()
65
+ def step(self, closure: Callable | None = None): # クロージャの型ヒントを追加
66
+ loss = None
67
+ if exists(closure): # 一貫性のためにexistsヘルパーを使う
68
+ with torch.enable_grad():
69
+ loss = closure()
70
+ loss_val = loss.item() if loss is not None else 0.0
71
+
72
+ for group in self.param_groups:
73
+ # 共通パラメータ抽出
74
+ lr, wd, beta1, beta2 = group['lr'], group['weight_decay'], *group['betas']
75
+
76
+ # ウェイト減衰の処理を分離 (from lynx)
77
+ _wd_actual = wd
78
+ if self.decoupled_wd:
79
+ _wd_actual /= self._init_lr # 非連結時ウェイト減衰調整
80
+
81
+ for p in filter(lambda p: exists(p.grad), group['params']): # PGチェックにフィルタ
82
+
83
+ grad = p.grad # PG直接使用(計算に".data"不要)
84
+ state = self.state[p]
85
+
86
+ # EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
87
+ ema = self._update_ema(state, loss_val)
88
+ scalar = self._compute_scalar(ema)
89
+ ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
90
+
91
+ # shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
92
+ # 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
93
+ if self.use_shadow and ratio > 0:
94
+ if 'shadow' not in state:
95
+ state['shadow'] = p.clone()
96
+ else:
97
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
98
+ state['shadow'].lerp_(p, 0.05)
99
+ # 更新前 p で shadow 更新(現在値を5%ずつ追従)
100
+ # p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
101
+ # EmoNavi: p = p * (1-ratio) + shadow * ratio
102
+
103
+ # --- Start Neco Gradient Update Logic ---
104
+
105
+ # neco初期化(exp_avg)
106
+ if 'exp_avg' not in state:
107
+ state['exp_avg'] = torch.zeros_like(p)
108
+ exp_avg = state['exp_avg']
109
+
110
+ # Stepweight decay (from lynx): p = p * (1 - lr * wd)
111
+ # decoupled_wd 考慮 _wd_actual 使用(EmoNaviのwdは最後に適用)
112
+ p.mul_(1. - lr * _wd_actual)
113
+
114
+ # 勾配ブレンド
115
+ # m_t = beta1 * exp_avg_prev + (1 - beta1) * grad
116
+ blended_grad = grad.mul(1. - beta1).add_(exp_avg, alpha=beta1)
117
+ grad_norm = torch.norm(grad, dtype=torch.float32) # 勾配ノルムの計算
118
+
119
+ # 削除:-0.2 < scalar <= -0.5 : SoftSign (ゆっくり滑らかに)
120
+ # 0.2 < abs(scalar) <= 0.5 : SoftSign+norm (揺れを滑らかに)
121
+ # それ以外 Cautious (平時や過適合や崩壊傾向を慎重に)
122
+ # p - lr * softsign(blended_grad) (from softsign)
123
+ # p - lr * direction * mask (from Cautious)
124
+ # safe_norm 極値のブレンド勾配に対するスケーリング
125
+ if 0.2 < abs(scalar) <= 0.5:
126
+ safe_norm = grad_norm + eps
127
+ modified_grad = softsign(blended_grad) * safe_norm * (1 - abs(scalar))
128
+ p.add_(-lr * modified_grad)
129
+ else:
130
+ direction = blended_grad.sign() # 勾配方向の符号 Cautious 処理
131
+ mask = (direction == grad.sign()) # 過去の勾配と方向が一致している部分のみ更新
132
+ scaled_direction = direction * mask * (1 - abs(scalar))
133
+ p.add_(scaled_direction, alpha = -lr) # Cautious 更新
134
+
135
+ # exp_avg = beta2 * exp_avg + (1 - beta2) * grad:勾配の履歴
136
+ exp_avg.mul_(beta2).add_(grad, alpha = 1. - beta2)
137
+
138
+ # --- End Neco Gradient Update Logic ---
139
+
140
+ # Early Stop用 scalar記録(バッファ共通で管理/最大32件保持/動静評価)
141
+ # この部分は p.state ではなく self.state にアクセスする
142
+ hist = self.state.setdefault('scalar_hist', [])
143
+ hist.append(scalar)
144
+ if len(hist) >= 33:
145
+ hist.pop(0)
146
+
147
+ # Early Stop判断(静けさの合図) This part is outside the inner loop
148
+ if len(self.state['scalar_hist']) >= 32:
149
+ buf = self.state['scalar_hist']
150
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
151
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
152
+ if avg_abs < 0.05 and std < 0.005:
153
+ self.should_stop = True # 外部からこれを見て判断可
154
+
155
+ return loss
156
+
157
+ """
158
+ https://github.com/muooon/EmoNavi
159
+ Neco was developed with inspiration from Lion, Tiger, Cautious, softsign, and EmoLynx
160
+ which we deeply respect for their lightweight and intelligent design.
161
+ Neco also integrates EmoNAVI to enhance its capabilities.
162
+ """
1Gv3_AMP-compatible/emozeal.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+
5
+ """
6
+ EmoZeal v3.0 (250825) shadow-system v2.0 -effect NoN -moment v1.0 scalar-switch v2.0
7
+ AMP対応完了(202507) p.data -> p 修正済み
8
+ memo : "optimizer = EmoNeco(model.parameters(), lr=1e-3, use_shadow=True)"
9
+ optimizer 指定の際に True にすることで shadow をオンにできる
10
+ emosens shadow-effect v1.0 反映 shadow-system、scalar-switch 修正
11
+ """
12
+
13
+ # Soft Sign 関数
14
+ def softsign(x):
15
+ return x / (1 + x.abs())
16
+
17
+ class EmoZeal(Optimizer):
18
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
19
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
20
+ eps=1e-8, weight_decay=0.01, use_shadow: bool = False):
21
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
22
+
23
+ super().__init__(params, defaults)
24
+
25
+ self.alpha_prev = getattr(self, 'alpha_prev', 1.0)
26
+ self._init_lr = lr
27
+ self.should_stop = False # 停止フラグの初期化
28
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
29
+
30
+ # 感情EMA更新(緊張と安静)
31
+ def _update_ema(self, state, loss_val):
32
+ ema = state.setdefault('ema', {})
33
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
34
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
35
+ return ema
36
+
37
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
38
+ def _compute_scalar(self, ema):
39
+ diff = ema['short'] - ema['long']
40
+ return math.tanh(5 * diff)
41
+
42
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
43
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
44
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
45
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
46
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
47
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
48
+ def _decide_ratio(self, scalar):
49
+ if not self.use_shadow:
50
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
51
+ if abs(scalar) > 0.6:
52
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
53
+ elif abs(scalar) > 0.1:
54
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
55
+ return 0.0
56
+
57
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
58
+ @torch.no_grad()
59
+ def step(self, closure=None):
60
+ loss = closure() if closure is not None else None
61
+ loss_val = loss.item() if loss is not None else 0.0
62
+
63
+ for group in self.param_groups:
64
+ for p in group['params']:
65
+ if p.grad is None:
66
+ continue
67
+
68
+ grad = p.grad
69
+ state = self.state[p]
70
+
71
+ # 感情EMA更新・スカラー生成 (既存ロジックを維持)
72
+ ema = self._update_ema(state, loss_val)
73
+ scalar = self._compute_scalar(ema)
74
+ ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
75
+
76
+ # shadow_param:必要時のみ更新 (既存ロジックを維持)
77
+ # 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
78
+ if self.use_shadow and ratio > 0:
79
+ if 'shadow' not in state:
80
+ state['shadow'] = p.clone()
81
+ else:
82
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
83
+ state['shadow'].lerp_(p, 0.05)
84
+
85
+ # --- 勾配補正ロジック ---
86
+ # 行列の形状が2次元以上の場合、分散情報ベースのAB近似を使用
87
+ if grad.dim() >= 2:
88
+ # 行と列の2乗平均を計算 (分散の軽量な近似)
89
+ r_sq = torch.mean(grad * grad, dim=tuple(range(1, grad.dim())), keepdim=True).add_(group['eps'])
90
+ c_sq = torch.mean(grad * grad, dim=0, keepdim=True).add_(group['eps'])
91
+
92
+ # 分散情報から勾配の近似行列を生成
93
+ # AB行列として見立てたものを直接生成し更新項を計算する
94
+ # A = sqrt(r_sq), B = sqrt(c_sq) とすることでAB行列の近似を再現
95
+ # これをEMAで平滑化する
96
+ beta1, beta2 = group['betas']
97
+ eps = group['eps']
98
+ lr = group['lr']
99
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
100
+ blended_grad = grad.mul(1 - beta1).add_(exp_avg, alpha=beta1)
101
+ grad_norm = torch.norm(grad, dtype=torch.float32)
102
+ # > abs 0.6 Cautious (過適合や崩壊傾向を慎重に)
103
+ # > abs 0.1 SoftSign+NormEPS (揺れを滑らかに)
104
+ # p - lr * softsign(blended_grad) (from softsign)
105
+ # p - lr * direction * mask (from Cautious)
106
+ # safe_norm 極値のブレンド勾配に対するスケーリング
107
+ if abs(scalar) > 0.6:
108
+ direction = blended_grad.sign() # 勾配方向の符号 Cautious 処理
109
+ mask = (direction == grad.sign()) # 過去の勾配と方向が一致する部分のみ更新
110
+ scaled_direction = direction * mask * (1 - abs(scalar))
111
+ p.add_(scaled_direction, alpha = -lr) # Cautious 更新
112
+ elif abs(scalar) > 0.1:
113
+ safe_norm = grad_norm + eps
114
+ modified_grad = softsign(blended_grad) * safe_norm * (1 - abs(scalar))
115
+ p.add_(-lr * modified_grad)
116
+
117
+ state.setdefault('exp_avg_r', torch.zeros_like(r_sq)).mul_(beta1).add_(torch.sqrt(r_sq), alpha=1 - beta1)
118
+ state.setdefault('exp_avg_c', torch.zeros_like(c_sq)).mul_(beta1).add_(torch.sqrt(c_sq), alpha=1 - beta1)
119
+
120
+ # 再構築した近似勾配の平方根の積で正規化
121
+ # これにより2次モーメントのような役割を果たす
122
+ denom = torch.sqrt(state['exp_avg_r'] * state['exp_avg_c']) + eps
123
+
124
+ # 最終的な更新項を計算
125
+ update_term = grad / denom
126
+
127
+ # 1次元(ベクトル)の勾配補正(decoupled weight decay 構造に近い)
128
+ else:
129
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
130
+ exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
131
+ beta1, beta2 = group['betas']
132
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
133
+ exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
134
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
135
+ update_term = exp_avg / denom
136
+
137
+ # 最終的なパラメータ更新 (decoupled weight decayも適用)
138
+ p.add_(p, alpha=-group['weight_decay'] * group['lr'])
139
+ p.add_(update_term, alpha=-group['lr'] * (1 - abs(scalar)))
140
+
141
+ # --- Early Stop ロジック (既存ロジックを維持) ---
142
+ hist = self.state.setdefault('scalar_hist', [])
143
+ hist.append(scalar)
144
+ if len(hist) >= 33:
145
+ hist.pop(0)
146
+
147
+ # Early Stop判断
148
+ if len(self.state['scalar_hist']) >= 32:
149
+ buf = self.state['scalar_hist']
150
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
151
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
152
+ if avg_abs < 0.05 and std < 0.005:
153
+ self.should_stop = True
154
+
155
+ return loss
156
+
157
+ """
158
+ https://github.com/muooon/EmoNavi
159
+ Zeal is inspired by Adafactor, and EmoFact,
160
+ and its VRAM-friendly design is something everyone loves.
161
+ """
1Gv3_AMP-compatible/logs/fluctuation_and_accuracy_panel.png ADDED

Git LFS Details

  • SHA256: 2a074a42df6fedc22504de259c8e0428ea65eb248a608467cebc8a2c54138807
  • Pointer size: 131 Bytes
  • Size of remote file: 226 kB
1Gv3_AMP-compatible/logs/loss_comparison_panel.png ADDED

Git LFS Details

  • SHA256: 0bbde9135564fe13e4915305d4c5e81903a4b18732f1e2d493bd78ede0d753d4
  • Pointer size: 131 Bytes
  • Size of remote file: 200 kB
1Gv3_AMP-compatible/logs/trec_gpt2_weight_pca_3panel.png ADDED

Git LFS Details

  • SHA256: 8a4a4dc5c2658499fe5cb88e06c800f5d0de87395bb787ddcbd23b75ae04e9b7
  • Pointer size: 131 Bytes
  • Size of remote file: 206 kB
1Gv3_AMP-compatible/logs/trec_squad_step_accuracy.json ADDED
@@ -0,0 +1,2431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "TREC": {
3
+ "EmoNAVI": [
4
+ [
5
+ 10,
6
+ 0.274
7
+ ],
8
+ [
9
+ 20,
10
+ 0.258
11
+ ],
12
+ [
13
+ 30,
14
+ 0.336
15
+ ],
16
+ [
17
+ 40,
18
+ 0.374
19
+ ],
20
+ [
21
+ 50,
22
+ 0.332
23
+ ],
24
+ [
25
+ 60,
26
+ 0.422
27
+ ],
28
+ [
29
+ 70,
30
+ 0.308
31
+ ],
32
+ [
33
+ 80,
34
+ 0.168
35
+ ],
36
+ [
37
+ 90,
38
+ 0.422
39
+ ],
40
+ [
41
+ 100,
42
+ 0.32
43
+ ],
44
+ [
45
+ 110,
46
+ 0.494
47
+ ],
48
+ [
49
+ 120,
50
+ 0.31
51
+ ],
52
+ [
53
+ 130,
54
+ 0.344
55
+ ],
56
+ [
57
+ 140,
58
+ 0.474
59
+ ],
60
+ [
61
+ 150,
62
+ 0.37
63
+ ],
64
+ [
65
+ 160,
66
+ 0.402
67
+ ],
68
+ [
69
+ 170,
70
+ 0.58
71
+ ],
72
+ [
73
+ 180,
74
+ 0.474
75
+ ],
76
+ [
77
+ 190,
78
+ 0.516
79
+ ],
80
+ [
81
+ 200,
82
+ 0.518
83
+ ],
84
+ [
85
+ 210,
86
+ 0.422
87
+ ],
88
+ [
89
+ 220,
90
+ 0.522
91
+ ],
92
+ [
93
+ 230,
94
+ 0.564
95
+ ],
96
+ [
97
+ 240,
98
+ 0.51
99
+ ],
100
+ [
101
+ 250,
102
+ 0.672
103
+ ],
104
+ [
105
+ 260,
106
+ 0.548
107
+ ],
108
+ [
109
+ 270,
110
+ 0.57
111
+ ],
112
+ [
113
+ 280,
114
+ 0.538
115
+ ],
116
+ [
117
+ 290,
118
+ 0.508
119
+ ],
120
+ [
121
+ 300,
122
+ 0.672
123
+ ],
124
+ [
125
+ 310,
126
+ 0.606
127
+ ],
128
+ [
129
+ 320,
130
+ 0.6
131
+ ],
132
+ [
133
+ 330,
134
+ 0.65
135
+ ],
136
+ [
137
+ 340,
138
+ 0.662
139
+ ],
140
+ [
141
+ 350,
142
+ 0.652
143
+ ],
144
+ [
145
+ 360,
146
+ 0.67
147
+ ],
148
+ [
149
+ 370,
150
+ 0.632
151
+ ],
152
+ [
153
+ 380,
154
+ 0.634
155
+ ],
156
+ [
157
+ 390,
158
+ 0.61
159
+ ],
160
+ [
161
+ 400,
162
+ 0.7
163
+ ],
164
+ [
165
+ 410,
166
+ 0.67
167
+ ],
168
+ [
169
+ 420,
170
+ 0.704
171
+ ],
172
+ [
173
+ 430,
174
+ 0.692
175
+ ],
176
+ [
177
+ 440,
178
+ 0.724
179
+ ],
180
+ [
181
+ 450,
182
+ 0.724
183
+ ],
184
+ [
185
+ 460,
186
+ 0.722
187
+ ],
188
+ [
189
+ 470,
190
+ 0.668
191
+ ],
192
+ [
193
+ 480,
194
+ 0.694
195
+ ],
196
+ [
197
+ 490,
198
+ 0.744
199
+ ],
200
+ [
201
+ 500,
202
+ 0.732
203
+ ]
204
+ ],
205
+ "EmoFACT": [
206
+ [
207
+ 10,
208
+ 0.246
209
+ ],
210
+ [
211
+ 20,
212
+ 0.228
213
+ ],
214
+ [
215
+ 30,
216
+ 0.282
217
+ ],
218
+ [
219
+ 40,
220
+ 0.328
221
+ ],
222
+ [
223
+ 50,
224
+ 0.234
225
+ ],
226
+ [
227
+ 60,
228
+ 0.326
229
+ ],
230
+ [
231
+ 70,
232
+ 0.332
233
+ ],
234
+ [
235
+ 80,
236
+ 0.51
237
+ ],
238
+ [
239
+ 90,
240
+ 0.308
241
+ ],
242
+ [
243
+ 100,
244
+ 0.332
245
+ ],
246
+ [
247
+ 110,
248
+ 0.3
249
+ ],
250
+ [
251
+ 120,
252
+ 0.478
253
+ ],
254
+ [
255
+ 130,
256
+ 0.36
257
+ ],
258
+ [
259
+ 140,
260
+ 0.44
261
+ ],
262
+ [
263
+ 150,
264
+ 0.466
265
+ ],
266
+ [
267
+ 160,
268
+ 0.396
269
+ ],
270
+ [
271
+ 170,
272
+ 0.408
273
+ ],
274
+ [
275
+ 180,
276
+ 0.414
277
+ ],
278
+ [
279
+ 190,
280
+ 0.376
281
+ ],
282
+ [
283
+ 200,
284
+ 0.448
285
+ ],
286
+ [
287
+ 210,
288
+ 0.354
289
+ ],
290
+ [
291
+ 220,
292
+ 0.426
293
+ ],
294
+ [
295
+ 230,
296
+ 0.492
297
+ ],
298
+ [
299
+ 240,
300
+ 0.558
301
+ ],
302
+ [
303
+ 250,
304
+ 0.488
305
+ ],
306
+ [
307
+ 260,
308
+ 0.434
309
+ ],
310
+ [
311
+ 270,
312
+ 0.564
313
+ ],
314
+ [
315
+ 280,
316
+ 0.394
317
+ ],
318
+ [
319
+ 290,
320
+ 0.524
321
+ ],
322
+ [
323
+ 300,
324
+ 0.472
325
+ ],
326
+ [
327
+ 310,
328
+ 0.498
329
+ ],
330
+ [
331
+ 320,
332
+ 0.618
333
+ ],
334
+ [
335
+ 330,
336
+ 0.588
337
+ ],
338
+ [
339
+ 340,
340
+ 0.528
341
+ ],
342
+ [
343
+ 350,
344
+ 0.594
345
+ ],
346
+ [
347
+ 360,
348
+ 0.452
349
+ ],
350
+ [
351
+ 370,
352
+ 0.562
353
+ ],
354
+ [
355
+ 380,
356
+ 0.566
357
+ ],
358
+ [
359
+ 390,
360
+ 0.696
361
+ ],
362
+ [
363
+ 400,
364
+ 0.592
365
+ ],
366
+ [
367
+ 410,
368
+ 0.33
369
+ ],
370
+ [
371
+ 420,
372
+ 0.546
373
+ ],
374
+ [
375
+ 430,
376
+ 0.598
377
+ ],
378
+ [
379
+ 440,
380
+ 0.66
381
+ ],
382
+ [
383
+ 450,
384
+ 0.584
385
+ ],
386
+ [
387
+ 460,
388
+ 0.652
389
+ ],
390
+ [
391
+ 470,
392
+ 0.686
393
+ ],
394
+ [
395
+ 480,
396
+ 0.65
397
+ ],
398
+ [
399
+ 490,
400
+ 0.7
401
+ ],
402
+ [
403
+ 500,
404
+ 0.716
405
+ ]
406
+ ],
407
+ "EmoZEAL": [
408
+ [
409
+ 10,
410
+ 0.248
411
+ ],
412
+ [
413
+ 20,
414
+ 0.268
415
+ ],
416
+ [
417
+ 30,
418
+ 0.348
419
+ ],
420
+ [
421
+ 40,
422
+ 0.302
423
+ ],
424
+ [
425
+ 50,
426
+ 0.332
427
+ ],
428
+ [
429
+ 60,
430
+ 0.31
431
+ ],
432
+ [
433
+ 70,
434
+ 0.236
435
+ ],
436
+ [
437
+ 80,
438
+ 0.16
439
+ ],
440
+ [
441
+ 90,
442
+ 0.35
443
+ ],
444
+ [
445
+ 100,
446
+ 0.244
447
+ ],
448
+ [
449
+ 110,
450
+ 0.318
451
+ ],
452
+ [
453
+ 120,
454
+ 0.312
455
+ ],
456
+ [
457
+ 130,
458
+ 0.54
459
+ ],
460
+ [
461
+ 140,
462
+ 0.418
463
+ ],
464
+ [
465
+ 150,
466
+ 0.43
467
+ ],
468
+ [
469
+ 160,
470
+ 0.352
471
+ ],
472
+ [
473
+ 170,
474
+ 0.47
475
+ ],
476
+ [
477
+ 180,
478
+ 0.538
479
+ ],
480
+ [
481
+ 190,
482
+ 0.444
483
+ ],
484
+ [
485
+ 200,
486
+ 0.486
487
+ ],
488
+ [
489
+ 210,
490
+ 0.596
491
+ ],
492
+ [
493
+ 220,
494
+ 0.414
495
+ ],
496
+ [
497
+ 230,
498
+ 0.384
499
+ ],
500
+ [
501
+ 240,
502
+ 0.67
503
+ ],
504
+ [
505
+ 250,
506
+ 0.344
507
+ ],
508
+ [
509
+ 260,
510
+ 0.398
511
+ ],
512
+ [
513
+ 270,
514
+ 0.55
515
+ ],
516
+ [
517
+ 280,
518
+ 0.612
519
+ ],
520
+ [
521
+ 290,
522
+ 0.616
523
+ ],
524
+ [
525
+ 300,
526
+ 0.488
527
+ ],
528
+ [
529
+ 310,
530
+ 0.46
531
+ ],
532
+ [
533
+ 320,
534
+ 0.53
535
+ ],
536
+ [
537
+ 330,
538
+ 0.582
539
+ ],
540
+ [
541
+ 340,
542
+ 0.376
543
+ ],
544
+ [
545
+ 350,
546
+ 0.66
547
+ ],
548
+ [
549
+ 360,
550
+ 0.682
551
+ ],
552
+ [
553
+ 370,
554
+ 0.642
555
+ ],
556
+ [
557
+ 380,
558
+ 0.686
559
+ ],
560
+ [
561
+ 390,
562
+ 0.612
563
+ ],
564
+ [
565
+ 400,
566
+ 0.496
567
+ ],
568
+ [
569
+ 410,
570
+ 0.572
571
+ ],
572
+ [
573
+ 420,
574
+ 0.58
575
+ ],
576
+ [
577
+ 430,
578
+ 0.684
579
+ ],
580
+ [
581
+ 440,
582
+ 0.72
583
+ ],
584
+ [
585
+ 450,
586
+ 0.624
587
+ ],
588
+ [
589
+ 460,
590
+ 0.616
591
+ ],
592
+ [
593
+ 470,
594
+ 0.718
595
+ ],
596
+ [
597
+ 480,
598
+ 0.71
599
+ ],
600
+ [
601
+ 490,
602
+ 0.696
603
+ ],
604
+ [
605
+ 500,
606
+ 0.658
607
+ ]
608
+ ],
609
+ "EmoLYNX": [
610
+ [
611
+ 10,
612
+ 0.22
613
+ ],
614
+ [
615
+ 20,
616
+ 0.228
617
+ ],
618
+ [
619
+ 30,
620
+ 0.242
621
+ ],
622
+ [
623
+ 40,
624
+ 0.222
625
+ ],
626
+ [
627
+ 50,
628
+ 0.398
629
+ ],
630
+ [
631
+ 60,
632
+ 0.278
633
+ ],
634
+ [
635
+ 70,
636
+ 0.298
637
+ ],
638
+ [
639
+ 80,
640
+ 0.312
641
+ ],
642
+ [
643
+ 90,
644
+ 0.254
645
+ ],
646
+ [
647
+ 100,
648
+ 0.306
649
+ ],
650
+ [
651
+ 110,
652
+ 0.484
653
+ ],
654
+ [
655
+ 120,
656
+ 0.336
657
+ ],
658
+ [
659
+ 130,
660
+ 0.31
661
+ ],
662
+ [
663
+ 140,
664
+ 0.33
665
+ ],
666
+ [
667
+ 150,
668
+ 0.376
669
+ ],
670
+ [
671
+ 160,
672
+ 0.354
673
+ ],
674
+ [
675
+ 170,
676
+ 0.394
677
+ ],
678
+ [
679
+ 180,
680
+ 0.558
681
+ ],
682
+ [
683
+ 190,
684
+ 0.566
685
+ ],
686
+ [
687
+ 200,
688
+ 0.578
689
+ ],
690
+ [
691
+ 210,
692
+ 0.548
693
+ ],
694
+ [
695
+ 220,
696
+ 0.53
697
+ ],
698
+ [
699
+ 230,
700
+ 0.57
701
+ ],
702
+ [
703
+ 240,
704
+ 0.594
705
+ ],
706
+ [
707
+ 250,
708
+ 0.596
709
+ ],
710
+ [
711
+ 260,
712
+ 0.64
713
+ ],
714
+ [
715
+ 270,
716
+ 0.6
717
+ ],
718
+ [
719
+ 280,
720
+ 0.544
721
+ ],
722
+ [
723
+ 290,
724
+ 0.464
725
+ ],
726
+ [
727
+ 300,
728
+ 0.548
729
+ ],
730
+ [
731
+ 310,
732
+ 0.622
733
+ ],
734
+ [
735
+ 320,
736
+ 0.678
737
+ ],
738
+ [
739
+ 330,
740
+ 0.658
741
+ ],
742
+ [
743
+ 340,
744
+ 0.658
745
+ ],
746
+ [
747
+ 350,
748
+ 0.686
749
+ ],
750
+ [
751
+ 360,
752
+ 0.694
753
+ ],
754
+ [
755
+ 370,
756
+ 0.686
757
+ ],
758
+ [
759
+ 380,
760
+ 0.61
761
+ ],
762
+ [
763
+ 390,
764
+ 0.606
765
+ ],
766
+ [
767
+ 400,
768
+ 0.666
769
+ ],
770
+ [
771
+ 410,
772
+ 0.68
773
+ ],
774
+ [
775
+ 420,
776
+ 0.638
777
+ ],
778
+ [
779
+ 430,
780
+ 0.626
781
+ ],
782
+ [
783
+ 440,
784
+ 0.546
785
+ ],
786
+ [
787
+ 450,
788
+ 0.49
789
+ ],
790
+ [
791
+ 460,
792
+ 0.508
793
+ ],
794
+ [
795
+ 470,
796
+ 0.546
797
+ ],
798
+ [
799
+ 480,
800
+ 0.562
801
+ ],
802
+ [
803
+ 490,
804
+ 0.62
805
+ ],
806
+ [
807
+ 500,
808
+ 0.624
809
+ ]
810
+ ],
811
+ "EmoNECO": [
812
+ [
813
+ 10,
814
+ 0.164
815
+ ],
816
+ [
817
+ 20,
818
+ 0.198
819
+ ],
820
+ [
821
+ 30,
822
+ 0.29
823
+ ],
824
+ [
825
+ 40,
826
+ 0.372
827
+ ],
828
+ [
829
+ 50,
830
+ 0.236
831
+ ],
832
+ [
833
+ 60,
834
+ 0.384
835
+ ],
836
+ [
837
+ 70,
838
+ 0.23
839
+ ],
840
+ [
841
+ 80,
842
+ 0.406
843
+ ],
844
+ [
845
+ 90,
846
+ 0.476
847
+ ],
848
+ [
849
+ 100,
850
+ 0.226
851
+ ],
852
+ [
853
+ 110,
854
+ 0.398
855
+ ],
856
+ [
857
+ 120,
858
+ 0.234
859
+ ],
860
+ [
861
+ 130,
862
+ 0.37
863
+ ],
864
+ [
865
+ 140,
866
+ 0.568
867
+ ],
868
+ [
869
+ 150,
870
+ 0.554
871
+ ],
872
+ [
873
+ 160,
874
+ 0.542
875
+ ],
876
+ [
877
+ 170,
878
+ 0.578
879
+ ],
880
+ [
881
+ 180,
882
+ 0.488
883
+ ],
884
+ [
885
+ 190,
886
+ 0.574
887
+ ],
888
+ [
889
+ 200,
890
+ 0.58
891
+ ],
892
+ [
893
+ 210,
894
+ 0.616
895
+ ],
896
+ [
897
+ 220,
898
+ 0.59
899
+ ],
900
+ [
901
+ 230,
902
+ 0.612
903
+ ],
904
+ [
905
+ 240,
906
+ 0.622
907
+ ],
908
+ [
909
+ 250,
910
+ 0.624
911
+ ],
912
+ [
913
+ 260,
914
+ 0.684
915
+ ],
916
+ [
917
+ 270,
918
+ 0.63
919
+ ],
920
+ [
921
+ 280,
922
+ 0.694
923
+ ],
924
+ [
925
+ 290,
926
+ 0.632
927
+ ],
928
+ [
929
+ 300,
930
+ 0.66
931
+ ],
932
+ [
933
+ 310,
934
+ 0.708
935
+ ],
936
+ [
937
+ 320,
938
+ 0.552
939
+ ],
940
+ [
941
+ 330,
942
+ 0.622
943
+ ],
944
+ [
945
+ 340,
946
+ 0.676
947
+ ],
948
+ [
949
+ 350,
950
+ 0.732
951
+ ],
952
+ [
953
+ 360,
954
+ 0.702
955
+ ],
956
+ [
957
+ 370,
958
+ 0.646
959
+ ],
960
+ [
961
+ 380,
962
+ 0.726
963
+ ],
964
+ [
965
+ 390,
966
+ 0.758
967
+ ],
968
+ [
969
+ 400,
970
+ 0.73
971
+ ],
972
+ [
973
+ 410,
974
+ 0.698
975
+ ],
976
+ [
977
+ 420,
978
+ 0.754
979
+ ],
980
+ [
981
+ 430,
982
+ 0.732
983
+ ],
984
+ [
985
+ 440,
986
+ 0.666
987
+ ],
988
+ [
989
+ 450,
990
+ 0.756
991
+ ],
992
+ [
993
+ 460,
994
+ 0.728
995
+ ],
996
+ [
997
+ 470,
998
+ 0.74
999
+ ],
1000
+ [
1001
+ 480,
1002
+ 0.776
1003
+ ],
1004
+ [
1005
+ 490,
1006
+ 0.73
1007
+ ],
1008
+ [
1009
+ 500,
1010
+ 0.668
1011
+ ]
1012
+ ],
1013
+ "EmoCLAN": [
1014
+ [
1015
+ 10,
1016
+ 0.15
1017
+ ],
1018
+ [
1019
+ 20,
1020
+ 0.228
1021
+ ],
1022
+ [
1023
+ 30,
1024
+ 0.226
1025
+ ],
1026
+ [
1027
+ 40,
1028
+ 0.166
1029
+ ],
1030
+ [
1031
+ 50,
1032
+ 0.312
1033
+ ],
1034
+ [
1035
+ 60,
1036
+ 0.284
1037
+ ],
1038
+ [
1039
+ 70,
1040
+ 0.338
1041
+ ],
1042
+ [
1043
+ 80,
1044
+ 0.466
1045
+ ],
1046
+ [
1047
+ 90,
1048
+ 0.288
1049
+ ],
1050
+ [
1051
+ 100,
1052
+ 0.326
1053
+ ],
1054
+ [
1055
+ 110,
1056
+ 0.298
1057
+ ],
1058
+ [
1059
+ 120,
1060
+ 0.406
1061
+ ],
1062
+ [
1063
+ 130,
1064
+ 0.322
1065
+ ],
1066
+ [
1067
+ 140,
1068
+ 0.392
1069
+ ],
1070
+ [
1071
+ 150,
1072
+ 0.378
1073
+ ],
1074
+ [
1075
+ 160,
1076
+ 0.372
1077
+ ],
1078
+ [
1079
+ 170,
1080
+ 0.428
1081
+ ],
1082
+ [
1083
+ 180,
1084
+ 0.364
1085
+ ],
1086
+ [
1087
+ 190,
1088
+ 0.472
1089
+ ],
1090
+ [
1091
+ 200,
1092
+ 0.47
1093
+ ],
1094
+ [
1095
+ 210,
1096
+ 0.494
1097
+ ],
1098
+ [
1099
+ 220,
1100
+ 0.528
1101
+ ],
1102
+ [
1103
+ 230,
1104
+ 0.528
1105
+ ],
1106
+ [
1107
+ 240,
1108
+ 0.544
1109
+ ],
1110
+ [
1111
+ 250,
1112
+ 0.57
1113
+ ],
1114
+ [
1115
+ 260,
1116
+ 0.57
1117
+ ],
1118
+ [
1119
+ 270,
1120
+ 0.588
1121
+ ],
1122
+ [
1123
+ 280,
1124
+ 0.564
1125
+ ],
1126
+ [
1127
+ 290,
1128
+ 0.608
1129
+ ],
1130
+ [
1131
+ 300,
1132
+ 0.548
1133
+ ],
1134
+ [
1135
+ 310,
1136
+ 0.578
1137
+ ],
1138
+ [
1139
+ 320,
1140
+ 0.594
1141
+ ],
1142
+ [
1143
+ 330,
1144
+ 0.7
1145
+ ],
1146
+ [
1147
+ 340,
1148
+ 0.688
1149
+ ],
1150
+ [
1151
+ 350,
1152
+ 0.672
1153
+ ],
1154
+ [
1155
+ 360,
1156
+ 0.692
1157
+ ],
1158
+ [
1159
+ 370,
1160
+ 0.46
1161
+ ],
1162
+ [
1163
+ 380,
1164
+ 0.586
1165
+ ],
1166
+ [
1167
+ 390,
1168
+ 0.588
1169
+ ],
1170
+ [
1171
+ 400,
1172
+ 0.636
1173
+ ],
1174
+ [
1175
+ 410,
1176
+ 0.632
1177
+ ],
1178
+ [
1179
+ 420,
1180
+ 0.668
1181
+ ],
1182
+ [
1183
+ 430,
1184
+ 0.684
1185
+ ],
1186
+ [
1187
+ 440,
1188
+ 0.662
1189
+ ],
1190
+ [
1191
+ 450,
1192
+ 0.602
1193
+ ],
1194
+ [
1195
+ 460,
1196
+ 0.572
1197
+ ],
1198
+ [
1199
+ 470,
1200
+ 0.57
1201
+ ],
1202
+ [
1203
+ 480,
1204
+ 0.616
1205
+ ],
1206
+ [
1207
+ 490,
1208
+ 0.668
1209
+ ],
1210
+ [
1211
+ 500,
1212
+ 0.614
1213
+ ]
1214
+ ]
1215
+ },
1216
+ "SQuAD_Tiny": {},
1217
+ "GPT2": {
1218
+ "EmoNAVI": [
1219
+ [
1220
+ 10,
1221
+ 390.03
1222
+ ],
1223
+ [
1224
+ 20,
1225
+ 389.7
1226
+ ],
1227
+ [
1228
+ 30,
1229
+ 144.35
1230
+ ],
1231
+ [
1232
+ 40,
1233
+ 126.51
1234
+ ],
1235
+ [
1236
+ 50,
1237
+ 128.2
1238
+ ],
1239
+ [
1240
+ 60,
1241
+ 109.94
1242
+ ],
1243
+ [
1244
+ 70,
1245
+ 91.23
1246
+ ],
1247
+ [
1248
+ 80,
1249
+ 87.17
1250
+ ],
1251
+ [
1252
+ 90,
1253
+ 98.43
1254
+ ],
1255
+ [
1256
+ 100,
1257
+ 86.26
1258
+ ],
1259
+ [
1260
+ 110,
1261
+ 73.17
1262
+ ],
1263
+ [
1264
+ 120,
1265
+ 68.45
1266
+ ],
1267
+ [
1268
+ 130,
1269
+ 72.42
1270
+ ],
1271
+ [
1272
+ 140,
1273
+ 59.25
1274
+ ],
1275
+ [
1276
+ 150,
1277
+ 76.23
1278
+ ],
1279
+ [
1280
+ 160,
1281
+ 84.56
1282
+ ],
1283
+ [
1284
+ 170,
1285
+ 57.77
1286
+ ],
1287
+ [
1288
+ 180,
1289
+ 60.9
1290
+ ],
1291
+ [
1292
+ 190,
1293
+ 59.26
1294
+ ],
1295
+ [
1296
+ 200,
1297
+ 54.84
1298
+ ],
1299
+ [
1300
+ 210,
1301
+ 52.89
1302
+ ],
1303
+ [
1304
+ 220,
1305
+ 55.69
1306
+ ],
1307
+ [
1308
+ 230,
1309
+ 52.2
1310
+ ],
1311
+ [
1312
+ 240,
1313
+ 45.29
1314
+ ],
1315
+ [
1316
+ 250,
1317
+ 50.06
1318
+ ],
1319
+ [
1320
+ 260,
1321
+ 43.19
1322
+ ],
1323
+ [
1324
+ 270,
1325
+ 43.22
1326
+ ],
1327
+ [
1328
+ 280,
1329
+ 46.27
1330
+ ],
1331
+ [
1332
+ 290,
1333
+ 42.59
1334
+ ],
1335
+ [
1336
+ 300,
1337
+ 41.59
1338
+ ],
1339
+ [
1340
+ 310,
1341
+ 48.33
1342
+ ],
1343
+ [
1344
+ 320,
1345
+ 41.03
1346
+ ],
1347
+ [
1348
+ 330,
1349
+ 31.07
1350
+ ],
1351
+ [
1352
+ 340,
1353
+ 31.38
1354
+ ],
1355
+ [
1356
+ 350,
1357
+ 29.13
1358
+ ],
1359
+ [
1360
+ 360,
1361
+ 25.73
1362
+ ],
1363
+ [
1364
+ 370,
1365
+ 22.76
1366
+ ],
1367
+ [
1368
+ 380,
1369
+ 25.37
1370
+ ],
1371
+ [
1372
+ 390,
1373
+ 28.14
1374
+ ],
1375
+ [
1376
+ 400,
1377
+ 31.61
1378
+ ],
1379
+ [
1380
+ 410,
1381
+ 25.59
1382
+ ],
1383
+ [
1384
+ 420,
1385
+ 19.69
1386
+ ],
1387
+ [
1388
+ 430,
1389
+ 14.81
1390
+ ],
1391
+ [
1392
+ 440,
1393
+ 14.93
1394
+ ],
1395
+ [
1396
+ 450,
1397
+ 16.52
1398
+ ],
1399
+ [
1400
+ 460,
1401
+ 15.26
1402
+ ],
1403
+ [
1404
+ 470,
1405
+ 14.09
1406
+ ],
1407
+ [
1408
+ 480,
1409
+ 18.8
1410
+ ],
1411
+ [
1412
+ 490,
1413
+ 11.57
1414
+ ],
1415
+ [
1416
+ 500,
1417
+ 9.65
1418
+ ]
1419
+ ],
1420
+ "EmoFACT": [
1421
+ [
1422
+ 10,
1423
+ 415036.56
1424
+ ],
1425
+ [
1426
+ 20,
1427
+ 1082.92
1428
+ ],
1429
+ [
1430
+ 30,
1431
+ 173.88
1432
+ ],
1433
+ [
1434
+ 40,
1435
+ 180.84
1436
+ ],
1437
+ [
1438
+ 50,
1439
+ 134.73
1440
+ ],
1441
+ [
1442
+ 60,
1443
+ 119.16
1444
+ ],
1445
+ [
1446
+ 70,
1447
+ 112.98
1448
+ ],
1449
+ [
1450
+ 80,
1451
+ 114.38
1452
+ ],
1453
+ [
1454
+ 90,
1455
+ 95.07
1456
+ ],
1457
+ [
1458
+ 100,
1459
+ 95.62
1460
+ ],
1461
+ [
1462
+ 110,
1463
+ 159.77
1464
+ ],
1465
+ [
1466
+ 120,
1467
+ 122.98
1468
+ ],
1469
+ [
1470
+ 130,
1471
+ 133.86
1472
+ ],
1473
+ [
1474
+ 140,
1475
+ 109.73
1476
+ ],
1477
+ [
1478
+ 150,
1479
+ 287.33
1480
+ ],
1481
+ [
1482
+ 160,
1483
+ 117.7
1484
+ ],
1485
+ [
1486
+ 170,
1487
+ 112.34
1488
+ ],
1489
+ [
1490
+ 180,
1491
+ 97.98
1492
+ ],
1493
+ [
1494
+ 190,
1495
+ 103.99
1496
+ ],
1497
+ [
1498
+ 200,
1499
+ 133.38
1500
+ ],
1501
+ [
1502
+ 210,
1503
+ 94.22
1504
+ ],
1505
+ [
1506
+ 220,
1507
+ 90.21
1508
+ ],
1509
+ [
1510
+ 230,
1511
+ 113.93
1512
+ ],
1513
+ [
1514
+ 240,
1515
+ 77.51
1516
+ ],
1517
+ [
1518
+ 250,
1519
+ 159.87
1520
+ ],
1521
+ [
1522
+ 260,
1523
+ 85.87
1524
+ ],
1525
+ [
1526
+ 270,
1527
+ 91.0
1528
+ ],
1529
+ [
1530
+ 280,
1531
+ 79.88
1532
+ ],
1533
+ [
1534
+ 290,
1535
+ 80.98
1536
+ ],
1537
+ [
1538
+ 300,
1539
+ 67.46
1540
+ ],
1541
+ [
1542
+ 310,
1543
+ 69.69
1544
+ ],
1545
+ [
1546
+ 320,
1547
+ 65.7
1548
+ ],
1549
+ [
1550
+ 330,
1551
+ 54.08
1552
+ ],
1553
+ [
1554
+ 340,
1555
+ 48.44
1556
+ ],
1557
+ [
1558
+ 350,
1559
+ 92.18
1560
+ ],
1561
+ [
1562
+ 360,
1563
+ 46.75
1564
+ ],
1565
+ [
1566
+ 370,
1567
+ 54.9
1568
+ ],
1569
+ [
1570
+ 380,
1571
+ 41.72
1572
+ ],
1573
+ [
1574
+ 390,
1575
+ 40.68
1576
+ ],
1577
+ [
1578
+ 400,
1579
+ 36.56
1580
+ ],
1581
+ [
1582
+ 410,
1583
+ 38.22
1584
+ ],
1585
+ [
1586
+ 420,
1587
+ 40.82
1588
+ ],
1589
+ [
1590
+ 430,
1591
+ 27.42
1592
+ ],
1593
+ [
1594
+ 440,
1595
+ 3203.09
1596
+ ],
1597
+ [
1598
+ 450,
1599
+ 29.91
1600
+ ],
1601
+ [
1602
+ 460,
1603
+ 30.59
1604
+ ],
1605
+ [
1606
+ 470,
1607
+ 32.2
1608
+ ],
1609
+ [
1610
+ 480,
1611
+ 40.88
1612
+ ],
1613
+ [
1614
+ 490,
1615
+ 36.93
1616
+ ],
1617
+ [
1618
+ 500,
1619
+ 35.41
1620
+ ]
1621
+ ],
1622
+ "EmoZEAL": [
1623
+ [
1624
+ 10,
1625
+ 32368984.0
1626
+ ],
1627
+ [
1628
+ 20,
1629
+ 864642.69
1630
+ ],
1631
+ [
1632
+ 30,
1633
+ 702.81
1634
+ ],
1635
+ [
1636
+ 40,
1637
+ 6212.83
1638
+ ],
1639
+ [
1640
+ 50,
1641
+ 418.3
1642
+ ],
1643
+ [
1644
+ 60,
1645
+ 125.87
1646
+ ],
1647
+ [
1648
+ 70,
1649
+ 113.59
1650
+ ],
1651
+ [
1652
+ 80,
1653
+ 115.08
1654
+ ],
1655
+ [
1656
+ 90,
1657
+ 128.86
1658
+ ],
1659
+ [
1660
+ 100,
1661
+ 126.81
1662
+ ],
1663
+ [
1664
+ 110,
1665
+ 219.04
1666
+ ],
1667
+ [
1668
+ 120,
1669
+ 129.47
1670
+ ],
1671
+ [
1672
+ 130,
1673
+ 101.47
1674
+ ],
1675
+ [
1676
+ 140,
1677
+ 104.58
1678
+ ],
1679
+ [
1680
+ 150,
1681
+ 137.48
1682
+ ],
1683
+ [
1684
+ 160,
1685
+ 111.11
1686
+ ],
1687
+ [
1688
+ 170,
1689
+ 125.97
1690
+ ],
1691
+ [
1692
+ 180,
1693
+ 91.28
1694
+ ],
1695
+ [
1696
+ 190,
1697
+ 104.27
1698
+ ],
1699
+ [
1700
+ 200,
1701
+ 86.7
1702
+ ],
1703
+ [
1704
+ 210,
1705
+ 130.79
1706
+ ],
1707
+ [
1708
+ 220,
1709
+ 96.5
1710
+ ],
1711
+ [
1712
+ 230,
1713
+ 88.04
1714
+ ],
1715
+ [
1716
+ 240,
1717
+ 94.72
1718
+ ],
1719
+ [
1720
+ 250,
1721
+ 86.83
1722
+ ],
1723
+ [
1724
+ 260,
1725
+ 105.92
1726
+ ],
1727
+ [
1728
+ 270,
1729
+ 100.96
1730
+ ],
1731
+ [
1732
+ 280,
1733
+ 92.68
1734
+ ],
1735
+ [
1736
+ 290,
1737
+ 93.79
1738
+ ],
1739
+ [
1740
+ 300,
1741
+ 90.26
1742
+ ],
1743
+ [
1744
+ 310,
1745
+ 81.71
1746
+ ],
1747
+ [
1748
+ 320,
1749
+ 62.94
1750
+ ],
1751
+ [
1752
+ 330,
1753
+ 84.68
1754
+ ],
1755
+ [
1756
+ 340,
1757
+ 72.64
1758
+ ],
1759
+ [
1760
+ 350,
1761
+ 88.21
1762
+ ],
1763
+ [
1764
+ 360,
1765
+ 72.25
1766
+ ],
1767
+ [
1768
+ 370,
1769
+ 60.64
1770
+ ],
1771
+ [
1772
+ 380,
1773
+ 65.93
1774
+ ],
1775
+ [
1776
+ 390,
1777
+ 53.45
1778
+ ],
1779
+ [
1780
+ 400,
1781
+ 59.47
1782
+ ],
1783
+ [
1784
+ 410,
1785
+ 59.57
1786
+ ],
1787
+ [
1788
+ 420,
1789
+ 54.25
1790
+ ],
1791
+ [
1792
+ 430,
1793
+ 50.96
1794
+ ],
1795
+ [
1796
+ 440,
1797
+ 48.73
1798
+ ],
1799
+ [
1800
+ 450,
1801
+ 49.77
1802
+ ],
1803
+ [
1804
+ 460,
1805
+ 45.83
1806
+ ],
1807
+ [
1808
+ 470,
1809
+ 59.98
1810
+ ],
1811
+ [
1812
+ 480,
1813
+ 55.26
1814
+ ],
1815
+ [
1816
+ 490,
1817
+ 37.23
1818
+ ],
1819
+ [
1820
+ 500,
1821
+ 33.99
1822
+ ]
1823
+ ],
1824
+ "EmoLYNX": [
1825
+ [
1826
+ 10,
1827
+ 176.55
1828
+ ],
1829
+ [
1830
+ 20,
1831
+ 78.81
1832
+ ],
1833
+ [
1834
+ 30,
1835
+ 51.84
1836
+ ],
1837
+ [
1838
+ 40,
1839
+ 61.16
1840
+ ],
1841
+ [
1842
+ 50,
1843
+ 57.48
1844
+ ],
1845
+ [
1846
+ 60,
1847
+ 61.37
1848
+ ],
1849
+ [
1850
+ 70,
1851
+ 55.12
1852
+ ],
1853
+ [
1854
+ 80,
1855
+ 58.97
1856
+ ],
1857
+ [
1858
+ 90,
1859
+ 54.63
1860
+ ],
1861
+ [
1862
+ 100,
1863
+ 61.92
1864
+ ],
1865
+ [
1866
+ 110,
1867
+ 66.47
1868
+ ],
1869
+ [
1870
+ 120,
1871
+ 51.53
1872
+ ],
1873
+ [
1874
+ 130,
1875
+ 45.01
1876
+ ],
1877
+ [
1878
+ 140,
1879
+ 45.66
1880
+ ],
1881
+ [
1882
+ 150,
1883
+ 34.53
1884
+ ],
1885
+ [
1886
+ 160,
1887
+ 42.89
1888
+ ],
1889
+ [
1890
+ 170,
1891
+ 42.98
1892
+ ],
1893
+ [
1894
+ 180,
1895
+ 42.43
1896
+ ],
1897
+ [
1898
+ 190,
1899
+ 34.23
1900
+ ],
1901
+ [
1902
+ 200,
1903
+ 30.79
1904
+ ],
1905
+ [
1906
+ 210,
1907
+ 36.44
1908
+ ],
1909
+ [
1910
+ 220,
1911
+ 33.72
1912
+ ],
1913
+ [
1914
+ 230,
1915
+ 31.61
1916
+ ],
1917
+ [
1918
+ 240,
1919
+ 33.06
1920
+ ],
1921
+ [
1922
+ 250,
1923
+ 37.53
1924
+ ],
1925
+ [
1926
+ 260,
1927
+ 43.56
1928
+ ],
1929
+ [
1930
+ 270,
1931
+ 31.7
1932
+ ],
1933
+ [
1934
+ 280,
1935
+ 27.2
1936
+ ],
1937
+ [
1938
+ 290,
1939
+ 26.43
1940
+ ],
1941
+ [
1942
+ 300,
1943
+ 31.83
1944
+ ],
1945
+ [
1946
+ 310,
1947
+ 45.07
1948
+ ],
1949
+ [
1950
+ 320,
1951
+ 30.65
1952
+ ],
1953
+ [
1954
+ 330,
1955
+ 23.93
1956
+ ],
1957
+ [
1958
+ 340,
1959
+ 26.46
1960
+ ],
1961
+ [
1962
+ 350,
1963
+ 23.51
1964
+ ],
1965
+ [
1966
+ 360,
1967
+ 28.75
1968
+ ],
1969
+ [
1970
+ 370,
1971
+ 40.6
1972
+ ],
1973
+ [
1974
+ 380,
1975
+ 36.43
1976
+ ],
1977
+ [
1978
+ 390,
1979
+ 31.47
1980
+ ],
1981
+ [
1982
+ 400,
1983
+ 57.82
1984
+ ],
1985
+ [
1986
+ 410,
1987
+ 30.0
1988
+ ],
1989
+ [
1990
+ 420,
1991
+ 30.81
1992
+ ],
1993
+ [
1994
+ 430,
1995
+ 32.15
1996
+ ],
1997
+ [
1998
+ 440,
1999
+ 24.29
2000
+ ],
2001
+ [
2002
+ 450,
2003
+ 27.99
2004
+ ],
2005
+ [
2006
+ 460,
2007
+ 25.83
2008
+ ],
2009
+ [
2010
+ 470,
2011
+ 24.17
2012
+ ],
2013
+ [
2014
+ 480,
2015
+ 24.79
2016
+ ],
2017
+ [
2018
+ 490,
2019
+ 26.67
2020
+ ],
2021
+ [
2022
+ 500,
2023
+ 31.57
2024
+ ]
2025
+ ],
2026
+ "EmoNECO": [
2027
+ [
2028
+ 10,
2029
+ 43.63
2030
+ ],
2031
+ [
2032
+ 20,
2033
+ 41.81
2034
+ ],
2035
+ [
2036
+ 30,
2037
+ 29.89
2038
+ ],
2039
+ [
2040
+ 40,
2041
+ 21.48
2042
+ ],
2043
+ [
2044
+ 50,
2045
+ 34.85
2046
+ ],
2047
+ [
2048
+ 60,
2049
+ 35.27
2050
+ ],
2051
+ [
2052
+ 70,
2053
+ 20.74
2054
+ ],
2055
+ [
2056
+ 80,
2057
+ 22.34
2058
+ ],
2059
+ [
2060
+ 90,
2061
+ 22.61
2062
+ ],
2063
+ [
2064
+ 100,
2065
+ 17.34
2066
+ ],
2067
+ [
2068
+ 110,
2069
+ 20.47
2070
+ ],
2071
+ [
2072
+ 120,
2073
+ 30.86
2074
+ ],
2075
+ [
2076
+ 130,
2077
+ 19.76
2078
+ ],
2079
+ [
2080
+ 140,
2081
+ 18.27
2082
+ ],
2083
+ [
2084
+ 150,
2085
+ 17.08
2086
+ ],
2087
+ [
2088
+ 160,
2089
+ 17.37
2090
+ ],
2091
+ [
2092
+ 170,
2093
+ 19.73
2094
+ ],
2095
+ [
2096
+ 180,
2097
+ 18.19
2098
+ ],
2099
+ [
2100
+ 190,
2101
+ 17.4
2102
+ ],
2103
+ [
2104
+ 200,
2105
+ 12.44
2106
+ ],
2107
+ [
2108
+ 210,
2109
+ 9.06
2110
+ ],
2111
+ [
2112
+ 220,
2113
+ 7.12
2114
+ ],
2115
+ [
2116
+ 230,
2117
+ 6.33
2118
+ ],
2119
+ [
2120
+ 240,
2121
+ 7.67
2122
+ ],
2123
+ [
2124
+ 250,
2125
+ 5.18
2126
+ ],
2127
+ [
2128
+ 260,
2129
+ 5.39
2130
+ ],
2131
+ [
2132
+ 270,
2133
+ 5.6
2134
+ ],
2135
+ [
2136
+ 280,
2137
+ 5.41
2138
+ ],
2139
+ [
2140
+ 290,
2141
+ 5.32
2142
+ ],
2143
+ [
2144
+ 300,
2145
+ 5.23
2146
+ ],
2147
+ [
2148
+ 310,
2149
+ 4.38
2150
+ ],
2151
+ [
2152
+ 320,
2153
+ 4.73
2154
+ ],
2155
+ [
2156
+ 330,
2157
+ 4.98
2158
+ ],
2159
+ [
2160
+ 340,
2161
+ 5.48
2162
+ ],
2163
+ [
2164
+ 350,
2165
+ 5.61
2166
+ ],
2167
+ [
2168
+ 360,
2169
+ 4.57
2170
+ ],
2171
+ [
2172
+ 370,
2173
+ 4.24
2174
+ ],
2175
+ [
2176
+ 380,
2177
+ 4.71
2178
+ ],
2179
+ [
2180
+ 390,
2181
+ 3.63
2182
+ ],
2183
+ [
2184
+ 400,
2185
+ 3.62
2186
+ ],
2187
+ [
2188
+ 410,
2189
+ 2.83
2190
+ ],
2191
+ [
2192
+ 420,
2193
+ 2.96
2194
+ ],
2195
+ [
2196
+ 430,
2197
+ 2.78
2198
+ ],
2199
+ [
2200
+ 440,
2201
+ 3.06
2202
+ ],
2203
+ [
2204
+ 450,
2205
+ 3.2
2206
+ ],
2207
+ [
2208
+ 460,
2209
+ 3.03
2210
+ ],
2211
+ [
2212
+ 470,
2213
+ 3.05
2214
+ ],
2215
+ [
2216
+ 480,
2217
+ 2.97
2218
+ ],
2219
+ [
2220
+ 490,
2221
+ 3.08
2222
+ ],
2223
+ [
2224
+ 500,
2225
+ 3.6
2226
+ ]
2227
+ ],
2228
+ "EmoCLAN": [
2229
+ [
2230
+ 10,
2231
+ 2646.54
2232
+ ],
2233
+ [
2234
+ 20,
2235
+ 237.9
2236
+ ],
2237
+ [
2238
+ 30,
2239
+ 317.26
2240
+ ],
2241
+ [
2242
+ 40,
2243
+ 145.22
2244
+ ],
2245
+ [
2246
+ 50,
2247
+ 148.97
2248
+ ],
2249
+ [
2250
+ 60,
2251
+ 225.55
2252
+ ],
2253
+ [
2254
+ 70,
2255
+ 104.09
2256
+ ],
2257
+ [
2258
+ 80,
2259
+ 92.09
2260
+ ],
2261
+ [
2262
+ 90,
2263
+ 107.5
2264
+ ],
2265
+ [
2266
+ 100,
2267
+ 130.0
2268
+ ],
2269
+ [
2270
+ 110,
2271
+ 97.33
2272
+ ],
2273
+ [
2274
+ 120,
2275
+ 87.69
2276
+ ],
2277
+ [
2278
+ 130,
2279
+ 86.27
2280
+ ],
2281
+ [
2282
+ 140,
2283
+ 77.78
2284
+ ],
2285
+ [
2286
+ 150,
2287
+ 66.3
2288
+ ],
2289
+ [
2290
+ 160,
2291
+ 84.44
2292
+ ],
2293
+ [
2294
+ 170,
2295
+ 70.21
2296
+ ],
2297
+ [
2298
+ 180,
2299
+ 71.12
2300
+ ],
2301
+ [
2302
+ 190,
2303
+ 60.57
2304
+ ],
2305
+ [
2306
+ 200,
2307
+ 58.8
2308
+ ],
2309
+ [
2310
+ 210,
2311
+ 56.19
2312
+ ],
2313
+ [
2314
+ 220,
2315
+ 64.68
2316
+ ],
2317
+ [
2318
+ 230,
2319
+ 58.71
2320
+ ],
2321
+ [
2322
+ 240,
2323
+ 72.35
2324
+ ],
2325
+ [
2326
+ 250,
2327
+ 62.81
2328
+ ],
2329
+ [
2330
+ 260,
2331
+ 62.0
2332
+ ],
2333
+ [
2334
+ 270,
2335
+ 62.57
2336
+ ],
2337
+ [
2338
+ 280,
2339
+ 55.06
2340
+ ],
2341
+ [
2342
+ 290,
2343
+ 52.29
2344
+ ],
2345
+ [
2346
+ 300,
2347
+ 55.84
2348
+ ],
2349
+ [
2350
+ 310,
2351
+ 55.93
2352
+ ],
2353
+ [
2354
+ 320,
2355
+ 61.57
2356
+ ],
2357
+ [
2358
+ 330,
2359
+ 66.8
2360
+ ],
2361
+ [
2362
+ 340,
2363
+ 64.74
2364
+ ],
2365
+ [
2366
+ 350,
2367
+ 67.67
2368
+ ],
2369
+ [
2370
+ 360,
2371
+ 64.73
2372
+ ],
2373
+ [
2374
+ 370,
2375
+ 60.54
2376
+ ],
2377
+ [
2378
+ 380,
2379
+ 57.82
2380
+ ],
2381
+ [
2382
+ 390,
2383
+ 52.32
2384
+ ],
2385
+ [
2386
+ 400,
2387
+ 52.11
2388
+ ],
2389
+ [
2390
+ 410,
2391
+ 51.81
2392
+ ],
2393
+ [
2394
+ 420,
2395
+ 50.83
2396
+ ],
2397
+ [
2398
+ 430,
2399
+ 49.49
2400
+ ],
2401
+ [
2402
+ 440,
2403
+ 41.85
2404
+ ],
2405
+ [
2406
+ 450,
2407
+ 39.5
2408
+ ],
2409
+ [
2410
+ 460,
2411
+ 37.8
2412
+ ],
2413
+ [
2414
+ 470,
2415
+ 42.96
2416
+ ],
2417
+ [
2418
+ 480,
2419
+ 41.26
2420
+ ],
2421
+ [
2422
+ 490,
2423
+ 38.94
2424
+ ],
2425
+ [
2426
+ 500,
2427
+ 45.65
2428
+ ]
2429
+ ]
2430
+ }
2431
+ }
1Gv3_AMP-compatible/logs/trec_weights_log.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1039ad77b7d2814784414fd1e9b769a61286f264a93e82ba7dd5e9bffd847b1c
3
+ size 11052986
1Gv3_AMP-compatible/profile.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AMP-compatible / AMP対応版
2
+
3
+ emonavi 及び emoファミリーについて紹介します
4
+ emonavi は、RefAdamWmini-ScheduleFree を作成し機能向上を試行錯誤するうちにできた感情機構を持つオプティマイザです
5
+ emonavi is an optimizer equipped with an emotional mechanism,
6
+ developed through trial and error while creating and enhancing the functionality of RefAdamWmini-ScheduleFree.
7
+ https://github.com/muooon/ref-adamw-mini-ScheduleFree
8
+
9
+ RefAdamWmini は、ema、scaler、shadow、を持ちますが限定的な活用でした
10
+ これを改善していくなかでたどり着いたのが感情機構という新しい仕組みです
11
+ 以下、emonavi から順に紹介します
12
+ RefAdamWmini incorporated EMA, scaler, and shadow, but their application was limited.
13
+ Through our efforts to enhance this, we developed a novel mechanism: the emotional mechanism.
14
+ We'll introduce them in order, starting with emonavi.
15
+
16
+ 三姉妹 / The Three Sisters
17
+ emonavi:長女/Adam参考 The eldest daughter, referencing Adam.
18
+ emofact:次女/Adafactor参考 The second daughter, referencing Adafactor.
19
+ emolynx:三女/Lion・Tiger参考 The youngest daughter, referencing Lion and Tiger.
20
+
21
+ emoclan:統合/三姉妹に役割分担をさせた統合型 An integrated model where roles are assigned to the three sisters.
22
+
23
+ 従妹の双子 / Cousins of the Three Sisters
24
+ emozeal:双子の姉/emofact参考 The elder twin sister, referencing emofact.
25
+ emoneco:双子の妹/emolynx参考 The younger twin sister, referencing emolynx.
26
+
27
+ emoclanという統合から三姉妹の従妹へ発展します
28
+ emozeal と emoneco はそれぞれ場面に応じて更新方法を選択します
29
+ The emoclan integration serves as the foundation for the development of the three sisters' cousins.
30
+ emozeal and emoneco each select their update method based on the specific situation.
31
+
32
+ それぞれ同一の"感情機構"を持ちます
33
+ emozeal は Adafactor系に情熱を持たせました
34
+ emoneco は Lion系にしなやかさを持たせました
35
+ Each possesses the same "emotional mechanism."
36
+ emozeal imbues Adafactor-based models with passion.
37
+ emoneco instills flexibility in Lion-based models.
38
+
39
+ shadow 切替機能 / shadow switching function
40
+ emoclan、emozeal、emoneco、は、shadow 機能の 有効/無効 切替を可能にしました
41
+ allows enabling/disabling of the shadow function
42
+
43
+ memo : "optimizer = EmoNeco(model.parameters(), lr=1e-3, use_shadow=False)"
44
+ optimizer 指定の際に False にすることで shadow をオフにできる
45
+ Shadow can be turned off by setting it to False when specifying the optimizer.
2Gv2_AMP-compatible/emoairy.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+
5
+ """
6
+ EmoAiry v2.0 (250815) shadow-system v2.0 shadow-effect v1.0
7
+ AMP対応完了(202507) p.data -> p 修正済み
8
+ memo : "optimizer = EmoAiry(model.parameters(), lr=1e-3, use_shadow=True)"
9
+ optimizer 指定の際に True / False で shadow を切替できる(現在 False)
10
+ """
11
+
12
+ class EmoAiry(Optimizer):
13
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
14
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
15
+ eps=1e-8, weight_decay=0.01, use_shadow: bool = False):
16
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
17
+
18
+ super().__init__(params, defaults)
19
+
20
+ self.alpha_prev = getattr(self, 'alpha_prev', 1.0)
21
+ self._init_lr = lr
22
+ self.should_stop = False # 停止フラグの初期化
23
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
24
+
25
+ # 感情EMA更新(緊張と安静)
26
+ def _update_ema(self, state, loss_val):
27
+ ema = state.setdefault('ema', {})
28
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
29
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
30
+ return ema
31
+
32
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
33
+ def _compute_scalar(self, ema):
34
+ diff = ema['short'] - ema['long']
35
+ return math.tanh(5 * diff)
36
+
37
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
38
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
39
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
40
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
41
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
42
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
43
+ def _decide_ratio(self, scalar):
44
+ if not self.use_shadow:
45
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
46
+ if abs(scalar) > 0.6:
47
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
48
+ elif abs(scalar) > 0.1:
49
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
50
+ return 0.0
51
+
52
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
53
+ @torch.no_grad()
54
+ def step(self, closure=None):
55
+ loss = closure() if closure is not None else None
56
+ loss_val = loss.item() if loss is not None else 0.0
57
+
58
+ for group in self.param_groups:
59
+ for p in group['params']:
60
+ if p.grad is None:
61
+ continue
62
+
63
+ grad = p.grad
64
+ state = self.state[p]
65
+
66
+ # 感情EMA更新・スカラー生成 (既存ロジックを維持)
67
+ ema = self._update_ema(state, loss_val)
68
+ scalar = self._compute_scalar(ema)
69
+ ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
70
+
71
+ # shadow_param:必要時のみ更新 (既存ロジックを維持)
72
+ # 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
73
+ if self.use_shadow and ratio > 0:
74
+ if 'shadow' not in state:
75
+ state['shadow'] = p.clone()
76
+ else:
77
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
78
+ state['shadow'].lerp_(p, 0.05)
79
+
80
+ # --- 勾配補正ロジック ---
81
+ # 行列の形状が2次元以上の場合、分散情報ベースのAB近似を使用
82
+ if grad.dim() >= 2:
83
+ # フィルターしきい値(探索強度)←調整可能
84
+ threshold = 1e-4 * (1 + abs(scalar))
85
+
86
+ # 行と列の2乗平均を計算 (分散の軽量な近似)
87
+ r_sq = torch.mean(grad * grad, dim=tuple(range(1, grad.dim())), keepdim=True).add_(group['eps'])
88
+ c_sq = torch.mean(grad * grad, dim=0, keepdim=True).add_(group['eps'])
89
+
90
+ # 行方向/列方向 探索フィルター
91
+ r_mask = (r_sq.pow(1/3) > threshold).float() # 行方向マスク
92
+ c_mask = (c_sq.pow(1/3) > threshold).float() # 列方向マスク
93
+
94
+ # 行と列のマスクを組み合わせて、パラメータごとの最終的なマスクを作成
95
+ # torch.matmulは2次元テンソルを前提とするため、元のコードのロジックを修正
96
+ update_mask = r_mask * c_mask
97
+
98
+ # Adafactor的な更新項を計算
99
+ beta1, beta2 = group['betas']
100
+ eps = group['eps']
101
+
102
+ # EMAで平滑化された行と列の分散を計算(元のコードのdenom部分)
103
+ state.setdefault('exp_avg_r', torch.zeros_like(r_sq)).mul_(beta1).add_(torch.sqrt(r_sq), alpha=1 - beta1)
104
+ state.setdefault('exp_avg_c', torch.zeros_like(c_sq)).mul_(beta1).add_(torch.sqrt(c_sq), alpha=1 - beta1)
105
+
106
+ # 再構築した近似勾配の平方根の積で正規化
107
+ # これにより2次モーメントのような役割を果たす
108
+ denom = torch.sqrt(state['exp_avg_r'] * state['exp_avg_c']) + eps
109
+
110
+ # 勾配更新項の選別 通常のgrad/denomの更新式に対し、上で作成したマスクを適用
111
+ update_term = (grad / denom) * update_mask
112
+
113
+ # 1次元(ベクトル)の勾配補正(decoupled weight decay 構造に近い)
114
+ else:
115
+ # 3乗平方根によるフィルターを適用
116
+ # フィルターの強度を決定するしきい値を設定
117
+ # ここでは例として1e-4を使用しますが、これは調整可能です
118
+ threshold = 1e-4 * (1 + abs(scalar))
119
+
120
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
121
+ exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
122
+ beta1, beta2 = group['betas']
123
+
124
+ # Adamの1次モーメントと2次モーメントを計算
125
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
126
+ exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
127
+
128
+ # 通常のAdamの更新項を計算
129
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
130
+ update_term = exp_avg / denom
131
+
132
+ # 勾配の3乗平方根がしきい値を超える部分をマスクとして抽出
133
+ filter_mask = (grad.pow(2).pow(1/3) > threshold).float()
134
+
135
+ # 更新項にマスクを適用して選別
136
+ update_term = update_term * filter_mask
137
+
138
+ # 最終的なパラメータ更新 (decoupled weight decayも適用)
139
+ p.add_(p, alpha=-group['weight_decay'] * group['lr'])
140
+ p.add_(update_term, alpha=-group['lr'] * (1 - abs(scalar)))
141
+
142
+ # --- Early Stop ロジック (既存ロジックを維持) ---
143
+ hist = self.state.setdefault('scalar_hist', [])
144
+ hist.append(scalar)
145
+ if len(hist) >= 33:
146
+ hist.pop(0)
147
+
148
+ # Early Stop判断
149
+ if len(self.state['scalar_hist']) >= 32:
150
+ buf = self.state['scalar_hist']
151
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
152
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
153
+ if avg_abs < 0.05 and std < 0.005:
154
+ self.should_stop = True
155
+
156
+ return loss
157
+
158
+ """
159
+ https://github.com/muooon/EmoNavi
160
+ Airy is inspired by Adafactor, and EmoFact,
161
+ and its VRAM-friendly design is something everyone loves.
162
+ """
2Gv2_AMP-compatible/emocats.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+ from typing import Tuple, Callable, Union
5
+
6
+ """
7
+ EmoCats v2.0 (250815) shadow-system v2.0 shadow-effect v1.0
8
+ AMP対応完了(202507) p.data -> p 修正済み
9
+ memo : "optimizer = EmoCats(model.parameters(), lr=1e-3, use_shadow=True)"
10
+ optimizer 指定の際に True / False で shadow を切替できる(現在 False)
11
+ """
12
+
13
+ # Helper function (Lynx)
14
+ def exists(val):
15
+ return val is not None
16
+
17
+ class EmoCats(Optimizer):
18
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
19
+ def __init__(self, params: Union[list, torch.nn.Module], lr=1e-3, betas=(0.9, 0.99),
20
+ # lynx用ベータ・互換性の追加(lynx用beta1・beta2)
21
+ eps=1e-8, weight_decay=0.01, decoupled_weight_decay: bool = False, use_shadow: bool = False):
22
+
23
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
24
+ super().__init__(params, defaults)
25
+
26
+ # lynxに応じてウェイト減衰のため保存
27
+ self._init_lr = lr
28
+ self.should_stop = False # 停止フラグの初期化
29
+ self.decoupled_wd = decoupled_weight_decay
30
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
31
+
32
+ # 感情EMA更新(緊張と安静)
33
+ def _update_ema(self, state, loss_val):
34
+ ema = state.setdefault('ema', {})
35
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
36
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
37
+ return ema
38
+
39
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
40
+ def _compute_scalar(self, ema):
41
+ diff = ema['short'] - ema['long']
42
+ return math.tanh(5 * diff)
43
+
44
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
45
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
46
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
47
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
48
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
49
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
50
+ def _decide_ratio(self, scalar):
51
+ if not self.use_shadow:
52
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
53
+ if abs(scalar) > 0.6:
54
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
55
+ elif abs(scalar) > 0.1:
56
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
57
+ return 0.0
58
+
59
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
60
+ @torch.no_grad()
61
+ def step(self, closure: Callable | None = None): # クロージャの型ヒントを追加
62
+ loss = None
63
+ if exists(closure): # 一貫性のためにexistsヘルパーを使う
64
+ with torch.enable_grad():
65
+ loss = closure()
66
+ loss_val = loss.item() if loss is not None else 0.0
67
+
68
+ for group in self.param_groups:
69
+ # 共通パラメータ抽出
70
+ lr, wd, beta1, beta2 = group['lr'], group['weight_decay'], *group['betas']
71
+
72
+ # ウェイト減衰の処理を分離 (from Cats)
73
+ _wd_actual = wd
74
+ if self.decoupled_wd:
75
+ _wd_actual /= self._init_lr # 非連結時ウェイト減衰調整
76
+
77
+ for p in filter(lambda p: exists(p.grad), group['params']): # PGチェックにフィルタ
78
+
79
+ grad = p.grad # PG直接使用(計算に".data"不要)
80
+ state = self.state[p]
81
+
82
+ # EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
83
+ ema = self._update_ema(state, loss_val)
84
+ scalar = self._compute_scalar(ema)
85
+ ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
86
+
87
+ # shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
88
+ # 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
89
+ if self.use_shadow and ratio > 0:
90
+ if 'shadow' not in state:
91
+ state['shadow'] = p.clone()
92
+ else:
93
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
94
+ state['shadow'].lerp_(p, 0.05)
95
+ # 更新前 p で shadow 更新(現在値を5%ずつ追従)
96
+ # p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
97
+ # EmoNavi: p = p * (1-ratio) + shadow * ratio
98
+
99
+ # --- Start Cats Gradient Update Logic ---
100
+
101
+ # Cats初期化(exp_avg_sq)
102
+ if 'exp_avg' not in state:
103
+ state['exp_avg'] = torch.zeros_like(p)
104
+ exp_avg = state['exp_avg']
105
+
106
+ # フィルターのしきい値をscalarで動的に決定
107
+ threshold = 1e-4 * (1 + abs(scalar))
108
+
109
+ # 勾配の多乗根を計算して、フィルターの基準とする
110
+ # Lionの更新は符号が重要なので、勾配自体ではなく、その絶対値を基準とします
111
+ filter_strength = torch.abs(grad).pow(1/3)
112
+
113
+ # フィルタリング強度がしきい値を超えた部分がTrueになる
114
+ mask = torch.ge(filter_strength, threshold).float()
115
+
116
+ # Stepweight decay (from lynx): p = p * (1 - lr * wd)
117
+ # decoupled_wd 考慮 _wd_actual 使用(EmoNaviのwdは最後に適用)
118
+ p.mul_(1. - lr * _wd_actual)
119
+
120
+ # 勾配ブレンド
121
+ # m_t = beta1 * exp_avg_prev + (1 - beta1) * grad
122
+ blended_grad = grad.mul(1. - beta1).add_(exp_avg, alpha=beta1)
123
+
124
+ # 更新を計算 p: p = p - lr * sign(blended_grad)
125
+ Cats_update = blended_grad.sign_()
126
+
127
+ # 次に、この更新項にフィルターマスクを掛け合わせる
128
+ filtered_Cats_update = Cats_update * mask
129
+
130
+ # p: p = p - lr * sign(blended_grad)
131
+ p.add_(filtered_Cats_update, alpha = -lr * (1 - abs(scalar)))
132
+
133
+ # exp_avg = beta2 * exp_avg + (1 - beta2) * grad
134
+ exp_avg.mul_(beta2).add_(grad, alpha = 1. - beta2)
135
+
136
+ # --- End Cats Gradient Update Logic ---
137
+
138
+ # Early Stop用 scalar記録(バッファ共通で管理/最大32件保持/動静評価)
139
+ # この部分は p.state ではなく self.state にアクセスする
140
+ hist = self.state.setdefault('scalar_hist', [])
141
+ hist.append(scalar)
142
+ if len(hist) >= 33:
143
+ hist.pop(0)
144
+
145
+ # Early Stop判断(静けさの合図) - This part is outside the inner loop
146
+ if len(self.state['scalar_hist']) >= 32:
147
+ buf = self.state['scalar_hist']
148
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
149
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
150
+ if avg_abs < 0.05 and std < 0.005:
151
+ self.should_stop = True # 外部からこれを見て判断可
152
+
153
+ return loss
154
+
155
+ """
156
+ https://github.com/muooon/EmoNavi
157
+ Cats was developed with inspiration from Lion, Tiger, and emoneco,
158
+ which we deeply respect for their lightweight and intelligent design.
159
+ Cats also integrates EmoNAVI to enhance its capabilities.
160
+ """
2Gv2_AMP-compatible/emosens.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.optim import Optimizer
3
+ import math
4
+
5
+ """
6
+ EmoSens v2.0 (250815) shadow-system v2.0 shadow-effect v1.0
7
+ AMP対応完了(202507) p.data -> p 修正済み
8
+ memo : "optimizer = EmoSens(model.parameters(), lr=1e-3, use_shadow=True)"
9
+ optimizer 指定の際に True / False で shadow を切替できる(現在 False)
10
+ shadow-system、effect、併用時は、system によるVRAM専有を低下させる?(全体は増加/navi比)
11
+ """
12
+
13
+ class EmoSens(Optimizer):
14
+ # クラス定義&初期化 🔸Shadow True(有効)/False(無効) 切替え
15
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
16
+ eps=1e-8, weight_decay=0.01, use_shadow: bool = False):
17
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
18
+ super().__init__(params, defaults)
19
+ self._init_lr = lr
20
+ self.should_stop = False # 停止フラグの初期化
21
+ self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
22
+
23
+ # 感情EMA更新(緊張と安静)
24
+ def _update_ema(self, state, loss_val):
25
+ ema = state.setdefault('ema', {})
26
+ ema['short'] = 0.3 * loss_val + 0.7 * ema.get('short', loss_val)
27
+ ema['long'] = 0.01 * loss_val + 0.99 * ema.get('long', loss_val)
28
+ return ema
29
+
30
+ # 感情スカラー値生成(EMA差分、滑らかな非線形スカラー、tanh 5 * diff で鋭敏さ強調)
31
+ def _compute_scalar(self, ema):
32
+ diff = ema['short'] - ema['long']
33
+ return math.tanh(5 * diff)
34
+
35
+ # Shadow混合比率(> abs 0.6:60〜100%、 > abs 0.1:10〜60%、 平時:0%) emosens反映
36
+ # 旧:Shadow混合比率(> 0.6:80〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
37
+ # 説明:scalar>+0.6 は "return 0.7(開始値) + 0.2(変化幅) * scalar" = 0.82~0.9 ← 誤
38
+ # 修正1:scalar>±0.6 を "return 開始値 + (abs(scalar) - 0.6(範囲)) / 範囲量 * 変化幅"
39
+ # 修正2:scalar>±0.1 を "return 開始値 + (abs(scalar) - 0.1(範囲)) / 範囲量 * 変化幅"
40
+ # タスク等に応じた調整のため3段階で適用しておく(上記を参考に調整してください/現状はshadow-effect反映)
41
+ def _decide_ratio(self, scalar):
42
+ if not self.use_shadow:
43
+ return 0.0 # 🔸use_shadow が False の場合は常に比率を 0 にする
44
+ if abs(scalar) > 0.6:
45
+ return 0.6 + (abs(scalar) - 0.6) / 0.4 * 0.4 # 元 return 0.7 + 0.2 * scalar
46
+ elif abs(scalar) > 0.1:
47
+ return 0.1 + (abs(scalar) - 0.1) / 0.5 * 0.5 # 元 return 0.3
48
+ return 0.0
49
+
50
+ # 損失取得(損失値 loss_val を数値化、感情判定に使用、存在しないパラメータ(更新不要)はスキップ)
51
+ @torch.no_grad()
52
+ def step(self, closure=None):
53
+ loss = closure() if closure is not None else None
54
+ loss_val = loss.item() if loss is not None else 0.0
55
+
56
+ for group in self.param_groups:
57
+ for p in group['params']:
58
+ if p.grad is None:
59
+ continue
60
+
61
+ grad = p.grad
62
+ state = self.state[p]
63
+
64
+ # EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
65
+ ema = self._update_ema(state, loss_val)
66
+ scalar = self._compute_scalar(ema)
67
+ ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
68
+
69
+ # 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
70
+ if self.use_shadow and ratio > 0:
71
+ if 'shadow' not in state:
72
+ state['shadow'] = p.clone()
73
+ else:
74
+ p.mul_(1 - ratio).add_(state['shadow'], alpha=ratio)
75
+ state['shadow'].lerp_(p, 0.05)
76
+
77
+ # スカラー生成:短期と長期EMAの差分から信号を得る(高ぶりの強さ)
78
+ # 混合比率:スカラーが閾値を超える場合にのみ計算される(信頼できる感情信号かどうかの選別)
79
+ # → スカラー値が小さい場合は ratio = 0 となり、shadow混合は行われない
80
+ # → 信頼できる強い差分のときのみ感情機構が発動する(暗黙の信頼度判定)
81
+
82
+ # 1次・2次モーメントを使った勾配補正(decoupled weight decay 構造に近い)
83
+ exp_avg = state.setdefault('exp_avg', torch.zeros_like(p))
84
+ exp_avg_sq = state.setdefault('exp_avg_sq', torch.zeros_like(p))
85
+ beta1, beta2 = group['betas']
86
+
87
+ exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
88
+ exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
89
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
90
+
91
+ # 勾配の多乗根��計算して、フィルターの基準とする
92
+ threshold = 1e-4 * (1 + abs(scalar))
93
+ filter_strength = torch.abs(grad).pow(1/3)
94
+
95
+ # フィルタリング強度がしきい値を超えた部分がTrueになる
96
+ mask = torch.ge(filter_strength, threshold).float()
97
+
98
+ # 更新量を計算
99
+ update_term = exp_avg.div(denom)
100
+
101
+ # 更新量にマスクを適用して、生き残った部分のみを更新
102
+ filtered_update = update_term * mask
103
+
104
+ # 最終的なパラメータ更新(decoupled weight decayも適用)
105
+ if group['weight_decay']:
106
+ p.add_(p, alpha=-group['weight_decay'] * group['lr'])
107
+ p.add_(filtered_update, alpha=-group['lr'] * (1 - abs(scalar)))
108
+
109
+ # 感情機構の発火が収まり"十分に安定"していることを外部伝達できる(自動停止ロジックではない)
110
+ # Early Stop用 scalar 記録(バッファ共通で管理/最大32件保持/動静評価)
111
+ hist = self.state.setdefault('scalar_hist', [])
112
+ hist.append(scalar)
113
+ if len(hist) >= 33:
114
+ hist.pop(0)
115
+
116
+ # Early Stop判断(静けさの合図)
117
+ if len(self.state['scalar_hist']) >= 32:
118
+ buf = self.state['scalar_hist']
119
+ avg_abs = sum(abs(s) for s in buf) / len(buf)
120
+ std = sum((s - sum(buf)/len(buf))**2 for s in buf) / len(buf)
121
+ if avg_abs < 0.05 and std < 0.005:
122
+ self.should_stop = True # 💡 外部からこれを見て判断可
123
+
124
+ # 32ステップ分のスカラー値の静かな条件を満たした時"フラグ" should_stop = True になるだけ
125
+
126
+ return loss
127
+
128
+ """
129
+ https://github.com/muooon/EmoNavi
130
+ An emotion-driven optimizer that feels loss and navigates accordingly.
131
+ Don't think. Feel. Don't stop. Keep running. Believe in what's beyond.
132
+ """