Upload 4 files
Browse files- AMP-compatible/emoclan.py +21 -14
- AMP-compatible/emoneco.py +12 -5
- AMP-compatible/emozeal.py +13 -4
- AMP-compatible/profile.txt +15 -7
AMP-compatible/emoclan.py
CHANGED
|
@@ -5,6 +5,8 @@ from typing import Callable, Union, Dict, Any, Tuple
|
|
| 5 |
|
| 6 |
"""
|
| 7 |
AMP対応完了(202507) p.data -> p 修正済み
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
|
| 10 |
# Helper function
|
|
@@ -12,13 +14,15 @@ def exists(val):
|
|
| 12 |
return val is not None
|
| 13 |
|
| 14 |
class EmoClan(Optimizer):
|
|
|
|
| 15 |
def __init__(self, params: Union[list, torch.nn.Module],
|
| 16 |
lr: float = 1e-3,
|
| 17 |
betas: Tuple[float, float] = (0.9, 0.999),
|
| 18 |
eps: float = 1e-8,
|
| 19 |
weight_decay: float = 0.01,
|
| 20 |
lynx_betas: Tuple[float, float] = (0.9, 0.99), # Lynx 固有の beta
|
| 21 |
-
decoupled_weight_decay: bool = False
|
|
|
|
| 22 |
):
|
| 23 |
|
| 24 |
if not 0.0 <= lr:
|
|
@@ -42,6 +46,7 @@ class EmoClan(Optimizer):
|
|
| 42 |
|
| 43 |
self._init_lr = lr # decoupled weight decay のために保存 (Lynx用)
|
| 44 |
self.should_stop = False # 全体の停止フラグ
|
|
|
|
| 45 |
|
| 46 |
# --- 感情機構 (Emotion Mechanism) ---
|
| 47 |
def _update_ema(self, param_state: Dict[str, Any], loss_val: float) -> Dict[str, float]:
|
|
@@ -213,19 +218,21 @@ class EmoClan(Optimizer):
|
|
| 213 |
# 各パラメータの state['ema'] は、それぞれの loss_val (全体で共通) を元に更新される
|
| 214 |
# ただし、現状の loss_val はクロージャから受け取った単一の値なので、
|
| 215 |
# 各パラメータ固有の「感情」を定義するより、全体としての感情が使われることになる。
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
| 229 |
|
| 230 |
# --- 最適化器の選択と勾配更新 ---
|
| 231 |
# 現在のglobal_scalar_histに記録された全体としての感情スカラーに基づいてフェーズを判断
|
|
|
|
| 5 |
|
| 6 |
"""
|
| 7 |
AMP対応完了(202507) p.data -> p 修正済み
|
| 8 |
+
memo : "optimizer = EmoClan(model.parameters(), lr=1e-3, use_shadow=False)"
|
| 9 |
+
optimizer 指定の際に False にすることで shadow をオフにできる
|
| 10 |
"""
|
| 11 |
|
| 12 |
# Helper function
|
|
|
|
| 14 |
return val is not None
|
| 15 |
|
| 16 |
class EmoClan(Optimizer):
|
| 17 |
+
# クラス定義&初期化 - 🔸Shadow True(有効)/False(無効) 切替え
|
| 18 |
def __init__(self, params: Union[list, torch.nn.Module],
|
| 19 |
lr: float = 1e-3,
|
| 20 |
betas: Tuple[float, float] = (0.9, 0.999),
|
| 21 |
eps: float = 1e-8,
|
| 22 |
weight_decay: float = 0.01,
|
| 23 |
lynx_betas: Tuple[float, float] = (0.9, 0.99), # Lynx 固有の beta
|
| 24 |
+
decoupled_weight_decay: bool = False,
|
| 25 |
+
use_shadow: bool = True
|
| 26 |
):
|
| 27 |
|
| 28 |
if not 0.0 <= lr:
|
|
|
|
| 46 |
|
| 47 |
self._init_lr = lr # decoupled weight decay のために保存 (Lynx用)
|
| 48 |
self.should_stop = False # 全体の停止フラグ
|
| 49 |
+
self.use_shadow = use_shadow # EmoClanインスタンス自身がuse_shadowを保持
|
| 50 |
|
| 51 |
# --- 感情機構 (Emotion Mechanism) ---
|
| 52 |
def _update_ema(self, param_state: Dict[str, Any], loss_val: float) -> Dict[str, float]:
|
|
|
|
| 218 |
# 各パラメータの state['ema'] は、それぞれの loss_val (全体で共通) を元に更新される
|
| 219 |
# ただし、現状の loss_val はクロージャから受け取った単一の値なので、
|
| 220 |
# 各パラメータ固有の「感情」を定義するより、全体としての感情が使われることになる。
|
| 221 |
+
# use_shadow が True の場合にのみ Shadow 関連の処理を実行
|
| 222 |
+
if self.use_shadow:
|
| 223 |
+
param_ema = self._update_ema(param_state, loss_val)
|
| 224 |
+
param_scalar = self._compute_scalar(param_ema) # 各パラメータ固有のスカラー
|
| 225 |
+
|
| 226 |
+
ratio = self._decide_ratio(param_scalar) # 各パラメータ固有の ratio
|
| 227 |
+
|
| 228 |
+
if ratio > 0:
|
| 229 |
+
if 'shadow' not in param_state:
|
| 230 |
+
param_state['shadow'] = p.clone()
|
| 231 |
+
else:
|
| 232 |
+
# Shadow を現在値にブレンド
|
| 233 |
+
p.mul_(1 - ratio).add_(param_state['shadow'], alpha=ratio)
|
| 234 |
+
# Shadow を現在値に追従させる
|
| 235 |
+
param_state['shadow'].lerp_(p, 0.05)
|
| 236 |
|
| 237 |
# --- 最適化器の選択と勾配更新 ---
|
| 238 |
# 現在のglobal_scalar_histに記録された全体としての感情スカラーに基づいてフェーズを判断
|
AMP-compatible/emoneco.py
CHANGED
|
@@ -5,6 +5,8 @@ from typing import Tuple, Callable, Union
|
|
| 5 |
|
| 6 |
"""
|
| 7 |
AMP対応完了(202507) p.data -> p 修正済み
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
|
| 10 |
# Helper function (Lynx)
|
|
@@ -15,10 +17,10 @@ def softsign(x):
|
|
| 15 |
return x / (1 + x.abs())
|
| 16 |
|
| 17 |
class EmoNeco(Optimizer):
|
| 18 |
-
# クラス定義&初期化
|
| 19 |
def __init__(self, params: Union[list, torch.nn.Module], lr=1e-3, betas=(0.9, 0.99),
|
| 20 |
# neco用ベータ・互換性の追加(neco用beta1・beta2)
|
| 21 |
-
eps=1e-8, weight_decay=0.01, decoupled_weight_decay: bool = False):
|
| 22 |
|
| 23 |
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
|
| 24 |
super().__init__(params, defaults)
|
|
@@ -27,6 +29,7 @@ class EmoNeco(Optimizer):
|
|
| 27 |
self._init_lr = lr
|
| 28 |
self.decoupled_wd = decoupled_weight_decay
|
| 29 |
self.should_stop = False # 停止フラグの初期化
|
|
|
|
| 30 |
|
| 31 |
# 感情EMA更新(緊張と安静)
|
| 32 |
def _update_ema(self, state, loss_val):
|
|
@@ -42,6 +45,9 @@ class EmoNeco(Optimizer):
|
|
| 42 |
|
| 43 |
# Shadow混合比率(> 0.6:70〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
|
| 44 |
def _decide_ratio(self, scalar):
|
|
|
|
|
|
|
|
|
|
| 45 |
if scalar > 0.6:
|
| 46 |
return 0.7 + 0.2 * scalar
|
| 47 |
elif scalar < -0.6:
|
|
@@ -76,10 +82,11 @@ class EmoNeco(Optimizer):
|
|
| 76 |
# EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
|
| 77 |
ema = self._update_ema(state, loss_val)
|
| 78 |
scalar = self._compute_scalar(ema)
|
| 79 |
-
ratio = self._decide_ratio(scalar)
|
| 80 |
|
| 81 |
# shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
|
| 82 |
-
|
|
|
|
| 83 |
if 'shadow' not in state:
|
| 84 |
state['shadow'] = p.clone()
|
| 85 |
else:
|
|
@@ -144,7 +151,7 @@ class EmoNeco(Optimizer):
|
|
| 144 |
|
| 145 |
"""
|
| 146 |
https://github.com/muooon/EmoNavi
|
| 147 |
-
Neco was developed with inspiration from Lion, Tiger, Cautious, softsign, and
|
| 148 |
which we deeply respect for their lightweight and intelligent design.
|
| 149 |
Neco also integrates EmoNAVI to enhance its capabilities.
|
| 150 |
"""
|
|
|
|
| 5 |
|
| 6 |
"""
|
| 7 |
AMP対応完了(202507) p.data -> p 修正済み
|
| 8 |
+
memo : "optimizer = EmoNeco(model.parameters(), lr=1e-3, use_shadow=False)"
|
| 9 |
+
optimizer 指定の際に False にすることで shadow をオフにできる
|
| 10 |
"""
|
| 11 |
|
| 12 |
# Helper function (Lynx)
|
|
|
|
| 17 |
return x / (1 + x.abs())
|
| 18 |
|
| 19 |
class EmoNeco(Optimizer):
|
| 20 |
+
# クラス定義&初期化 - 🔸Shadow True(有効)/False(無効) 切替え
|
| 21 |
def __init__(self, params: Union[list, torch.nn.Module], lr=1e-3, betas=(0.9, 0.99),
|
| 22 |
# neco用ベータ・互換性の追加(neco用beta1・beta2)
|
| 23 |
+
eps=1e-8, weight_decay=0.01, decoupled_weight_decay: bool = False, use_shadow: bool = True):
|
| 24 |
|
| 25 |
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
|
| 26 |
super().__init__(params, defaults)
|
|
|
|
| 29 |
self._init_lr = lr
|
| 30 |
self.decoupled_wd = decoupled_weight_decay
|
| 31 |
self.should_stop = False # 停止フラグの初期化
|
| 32 |
+
self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
|
| 33 |
|
| 34 |
# 感情EMA更新(緊張と安静)
|
| 35 |
def _update_ema(self, state, loss_val):
|
|
|
|
| 45 |
|
| 46 |
# Shadow混合比率(> 0.6:70〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
|
| 47 |
def _decide_ratio(self, scalar):
|
| 48 |
+
# 🔸use_shadow が False の場合は常に比率を 0 にする
|
| 49 |
+
if not self.use_shadow:
|
| 50 |
+
return 0.0
|
| 51 |
if scalar > 0.6:
|
| 52 |
return 0.7 + 0.2 * scalar
|
| 53 |
elif scalar < -0.6:
|
|
|
|
| 82 |
# EMA更新・スカラー生成(EMA差分からスカラーを生成しスパイク比率を決定)
|
| 83 |
ema = self._update_ema(state, loss_val)
|
| 84 |
scalar = self._compute_scalar(ema)
|
| 85 |
+
ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
|
| 86 |
|
| 87 |
# shadow_param:必要時のみ更新(スパイク部分に現在値を5%ずつ追従させる動的履歴)
|
| 88 |
+
# 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
|
| 89 |
+
if self.use_shadow and ratio > 0:
|
| 90 |
if 'shadow' not in state:
|
| 91 |
state['shadow'] = p.clone()
|
| 92 |
else:
|
|
|
|
| 151 |
|
| 152 |
"""
|
| 153 |
https://github.com/muooon/EmoNavi
|
| 154 |
+
Neco was developed with inspiration from Lion, Tiger, Cautious, softsign, and EmoLynx
|
| 155 |
which we deeply respect for their lightweight and intelligent design.
|
| 156 |
Neco also integrates EmoNAVI to enhance its capabilities.
|
| 157 |
"""
|
AMP-compatible/emozeal.py
CHANGED
|
@@ -4,6 +4,8 @@ import math
|
|
| 4 |
|
| 5 |
"""
|
| 6 |
AMP対応完了(202507) p.data -> p 修正済み
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
# Soft Sign 関数
|
|
@@ -11,14 +13,17 @@ def softsign(x):
|
|
| 11 |
return x / (1 + x.abs())
|
| 12 |
|
| 13 |
class EmoZeal(Optimizer):
|
| 14 |
-
# クラス定義&初期化
|
| 15 |
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
|
| 16 |
-
eps=1e-8, weight_decay=0.01):
|
| 17 |
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
|
|
|
|
| 18 |
super().__init__(params, defaults)
|
|
|
|
| 19 |
self.alpha_prev = getattr(self, 'alpha_prev', 1.0)
|
| 20 |
self._init_lr = lr
|
| 21 |
self.should_stop = False # 停止フラグの初期化
|
|
|
|
| 22 |
|
| 23 |
# 感情EMA更新(緊張と安静)
|
| 24 |
def _update_ema(self, state, loss_val):
|
|
@@ -34,6 +39,9 @@ class EmoZeal(Optimizer):
|
|
| 34 |
|
| 35 |
# Shadow混合比率(> 0.6:70〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
|
| 36 |
def _decide_ratio(self, scalar):
|
|
|
|
|
|
|
|
|
|
| 37 |
if scalar > 0.6:
|
| 38 |
return 0.7 + 0.2 * scalar
|
| 39 |
elif scalar < -0.6:
|
|
@@ -59,10 +67,11 @@ class EmoZeal(Optimizer):
|
|
| 59 |
# 感情EMA更新・スカラー生成 (既存ロジックを維持)
|
| 60 |
ema = self._update_ema(state, loss_val)
|
| 61 |
scalar = self._compute_scalar(ema)
|
| 62 |
-
ratio = self._decide_ratio(scalar)
|
| 63 |
|
| 64 |
# shadow_param:必要時のみ更新 (既存ロジックを維持)
|
| 65 |
-
|
|
|
|
| 66 |
if 'shadow' not in state:
|
| 67 |
state['shadow'] = p.clone()
|
| 68 |
else:
|
|
|
|
| 4 |
|
| 5 |
"""
|
| 6 |
AMP対応完了(202507) p.data -> p 修正済み
|
| 7 |
+
memo : "optimizer = EmoNeco(model.parameters(), lr=1e-3, use_shadow=False)"
|
| 8 |
+
optimizer 指定の際に False にすることで shadow をオフにできる
|
| 9 |
"""
|
| 10 |
|
| 11 |
# Soft Sign 関数
|
|
|
|
| 13 |
return x / (1 + x.abs())
|
| 14 |
|
| 15 |
class EmoZeal(Optimizer):
|
| 16 |
+
# クラス定義&初期化 - 🔸Shadow True(有効)/False(無効) 切替え
|
| 17 |
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999),
|
| 18 |
+
eps=1e-8, weight_decay=0.01, use_shadow: bool = True):
|
| 19 |
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
|
| 20 |
+
|
| 21 |
super().__init__(params, defaults)
|
| 22 |
+
|
| 23 |
self.alpha_prev = getattr(self, 'alpha_prev', 1.0)
|
| 24 |
self._init_lr = lr
|
| 25 |
self.should_stop = False # 停止フラグの初期化
|
| 26 |
+
self.use_shadow = use_shadow # 🔸shadowの使用フラグを保存
|
| 27 |
|
| 28 |
# 感情EMA更新(緊張と安静)
|
| 29 |
def _update_ema(self, state, loss_val):
|
|
|
|
| 39 |
|
| 40 |
# Shadow混合比率(> 0.6:70〜90%、 < -0.6:10%、 abs> 0.3:30%、 平時:0%)
|
| 41 |
def _decide_ratio(self, scalar):
|
| 42 |
+
# 🔸use_shadow が False の場合は常に比率を 0 にする
|
| 43 |
+
if not self.use_shadow:
|
| 44 |
+
return 0.0
|
| 45 |
if scalar > 0.6:
|
| 46 |
return 0.7 + 0.2 * scalar
|
| 47 |
elif scalar < -0.6:
|
|
|
|
| 67 |
# 感情EMA更新・スカラー生成 (既存ロジックを維持)
|
| 68 |
ema = self._update_ema(state, loss_val)
|
| 69 |
scalar = self._compute_scalar(ema)
|
| 70 |
+
ratio = self._decide_ratio(scalar) # 🔸use_shadow に応じて ratio が 0 になる
|
| 71 |
|
| 72 |
# shadow_param:必要時のみ更新 (既存ロジックを維持)
|
| 73 |
+
# 🔸self.use_shadow が True で、かつ ratio > 0 の場合のみ shadow を更新
|
| 74 |
+
if self.use_shadow and ratio > 0:
|
| 75 |
if 'shadow' not in state:
|
| 76 |
state['shadow'] = p.clone()
|
| 77 |
else:
|
AMP-compatible/profile.txt
CHANGED
|
@@ -14,15 +14,15 @@ Through our efforts to enhance this, we developed a novel mechanism: the emotion
|
|
| 14 |
We'll introduce them in order, starting with emonavi.
|
| 15 |
|
| 16 |
三姉妹 / The Three Sisters
|
| 17 |
-
emonavi:長女/Adam参考
|
| 18 |
-
emofact:次女/Adafactor参考
|
| 19 |
-
emolynx:三女/Lion・Tiger参考
|
| 20 |
|
| 21 |
-
emoclan:統合/三姉妹に役割分担をさせた統合型
|
| 22 |
|
| 23 |
従妹の双子 / Cousins of the Three Sisters
|
| 24 |
-
emozeal:双子の姉/emofact参考
|
| 25 |
-
emoneco:双子の妹/emolynx参考
|
| 26 |
|
| 27 |
emoclanという統合から三姉妹の従妹へ発展します
|
| 28 |
emozeal と emoneco はそれぞれ場面に応じて更新方法を選択します
|
|
@@ -34,4 +34,12 @@ emozeal は Adafactor系に情熱を持たせました
|
|
| 34 |
emoneco は Lion系にしなやかさを持たせました
|
| 35 |
Each possesses the same "emotional mechanism."
|
| 36 |
emozeal imbues Adafactor-based models with passion.
|
| 37 |
-
emoneco instills flexibility in Lion-based models.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
We'll introduce them in order, starting with emonavi.
|
| 15 |
|
| 16 |
三姉妹 / The Three Sisters
|
| 17 |
+
emonavi:長女/Adam参考 The eldest daughter, referencing Adam.
|
| 18 |
+
emofact:次女/Adafactor参考 The second daughter, referencing Adafactor.
|
| 19 |
+
emolynx:三女/Lion・Tiger参考 The youngest daughter, referencing Lion and Tiger.
|
| 20 |
|
| 21 |
+
emoclan:統合/三姉妹に役割分担をさせた統合型 An integrated model where roles are assigned to the three sisters.
|
| 22 |
|
| 23 |
従妹の双子 / Cousins of the Three Sisters
|
| 24 |
+
emozeal:双子の姉/emofact参考 The elder twin sister, referencing emofact.
|
| 25 |
+
emoneco:双子の妹/emolynx参考 The younger twin sister, referencing emolynx.
|
| 26 |
|
| 27 |
emoclanという統合から三姉妹の従妹へ発展します
|
| 28 |
emozeal と emoneco はそれぞれ場面に応じて更新方法を選択します
|
|
|
|
| 34 |
emoneco は Lion系にしなやかさを持たせました
|
| 35 |
Each possesses the same "emotional mechanism."
|
| 36 |
emozeal imbues Adafactor-based models with passion.
|
| 37 |
+
emoneco instills flexibility in Lion-based models.
|
| 38 |
+
|
| 39 |
+
shadow 切替機能 / shadow switching function
|
| 40 |
+
emoclan、emozeal、emoneco、は、shadow 機能の 有効/無効 切替を可能にしました
|
| 41 |
+
allows enabling/disabling of the shadow function
|
| 42 |
+
|
| 43 |
+
memo : "optimizer = EmoNeco(model.parameters(), lr=1e-3, use_shadow=False)"
|
| 44 |
+
optimizer 指定の際に False にすることで shadow をオフにできる
|
| 45 |
+
Shadow can be turned off by setting it to False when specifying the optimizer.
|