final cooldown checkpoint
Browse files- config.json +8 -5
- generation_config.json +2 -0
- model-00001-of-00013.safetensors +1 -1
- model-00002-of-00013.safetensors +1 -1
- model-00003-of-00013.safetensors +1 -1
- model-00004-of-00013.safetensors +1 -1
- model-00005-of-00013.safetensors +1 -1
- model-00006-of-00013.safetensors +1 -1
- model-00007-of-00013.safetensors +1 -1
- model-00008-of-00013.safetensors +1 -1
- model-00009-of-00013.safetensors +1 -1
- model-00010-of-00013.safetensors +1 -1
- model-00011-of-00013.safetensors +1 -1
- model-00012-of-00013.safetensors +1 -1
- model-00013-of-00013.safetensors +1 -1
config.json
CHANGED
|
@@ -9,8 +9,10 @@
|
|
| 9 |
"AutoModel": "modeling_rnd.RND1Model",
|
| 10 |
"AutoModelForMaskedLM": "modeling_rnd.RND1LM"
|
| 11 |
},
|
|
|
|
| 12 |
"decoder_sparse_step": 1,
|
| 13 |
"dtype": "bfloat16",
|
|
|
|
| 14 |
"head_dim": 128,
|
| 15 |
"hidden_act": "silu",
|
| 16 |
"hidden_size": 2048,
|
|
@@ -18,12 +20,13 @@
|
|
| 18 |
"intermediate_size": 6144,
|
| 19 |
"is_causal": false,
|
| 20 |
"mask_token_id": 151669,
|
| 21 |
-
"max_position_embeddings":
|
|
|
|
| 22 |
"mlp_only_layers": [],
|
| 23 |
"model_type": "rnd1",
|
| 24 |
"moe_backend": "hf",
|
| 25 |
"moe_intermediate_size": 768,
|
| 26 |
-
"norm_topk_prob":
|
| 27 |
"num_attention_heads": 32,
|
| 28 |
"num_diffusion_steps": 256,
|
| 29 |
"num_experts": 128,
|
|
@@ -32,10 +35,10 @@
|
|
| 32 |
"num_key_value_heads": 4,
|
| 33 |
"output_router_logits": false,
|
| 34 |
"rms_norm_eps": 1e-06,
|
| 35 |
-
"rope_scaling":
|
| 36 |
-
"rope_theta":
|
| 37 |
"router_aux_loss_coef": 0.001,
|
| 38 |
-
"sliding_window":
|
| 39 |
"tie_word_embeddings": false,
|
| 40 |
"transformers_version": "4.56.1",
|
| 41 |
"use_cache": false,
|
|
|
|
| 9 |
"AutoModel": "modeling_rnd.RND1Model",
|
| 10 |
"AutoModelForMaskedLM": "modeling_rnd.RND1LM"
|
| 11 |
},
|
| 12 |
+
"bos_token_id": 151643,
|
| 13 |
"decoder_sparse_step": 1,
|
| 14 |
"dtype": "bfloat16",
|
| 15 |
+
"eos_token_id": 151645,
|
| 16 |
"head_dim": 128,
|
| 17 |
"hidden_act": "silu",
|
| 18 |
"hidden_size": 2048,
|
|
|
|
| 20 |
"intermediate_size": 6144,
|
| 21 |
"is_causal": false,
|
| 22 |
"mask_token_id": 151669,
|
| 23 |
+
"max_position_embeddings": 40960,
|
| 24 |
+
"max_window_layers": 48,
|
| 25 |
"mlp_only_layers": [],
|
| 26 |
"model_type": "rnd1",
|
| 27 |
"moe_backend": "hf",
|
| 28 |
"moe_intermediate_size": 768,
|
| 29 |
+
"norm_topk_prob": true,
|
| 30 |
"num_attention_heads": 32,
|
| 31 |
"num_diffusion_steps": 256,
|
| 32 |
"num_experts": 128,
|
|
|
|
| 35 |
"num_key_value_heads": 4,
|
| 36 |
"output_router_logits": false,
|
| 37 |
"rms_norm_eps": 1e-06,
|
| 38 |
+
"rope_scaling": false,
|
| 39 |
+
"rope_theta": 1000000.0,
|
| 40 |
"router_aux_loss_coef": 0.001,
|
| 41 |
+
"sliding_window": false,
|
| 42 |
"tie_word_embeddings": false,
|
| 43 |
"transformers_version": "4.56.1",
|
| 44 |
"use_cache": false,
|
generation_config.json
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"_from_model_config": true,
|
|
|
|
|
|
|
| 3 |
"transformers_version": "4.56.1",
|
| 4 |
"use_cache": false
|
| 5 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 151643,
|
| 4 |
+
"eos_token_id": 151645,
|
| 5 |
"transformers_version": "4.56.1",
|
| 6 |
"use_cache": false
|
| 7 |
}
|
model-00001-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997184968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f5ccbc78e9912abbaec09c2de65f27762db9beda37b27d1cc54de74c060ae7a
|
| 3 |
size 4997184968
|
model-00002-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997741608
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69a5d5cc063ac362e9533d118352522c67fc5035aeb5c8b42467b00ffdab3e48
|
| 3 |
size 4997741608
|
model-00003-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997742208
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fd57989834b9bee8522404ae5e0b2eb0b3c374636d6976b74df50a39a6e9872
|
| 3 |
size 4997742208
|
model-00004-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a6ab1611225c54edda60cb06b1d14411d21a34fbc98fd9096da6a97a010c55e
|
| 3 |
size 4997743184
|
model-00005-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92591bdeb901e15ec018b660c51da0665bb37a38862fcab563eeafc662274a27
|
| 3 |
size 4997743184
|
model-00006-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4eb0816513087c15eded0dca650ac71e9d3efbcc0fa99f45fa03ea53e8c4fe98
|
| 3 |
size 4997743184
|
model-00007-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c728642f7f88dc5a712aa700901baa52eca3247091295605d08096f4fe4c2bc
|
| 3 |
size 4997743184
|
model-00008-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd07dd840c4162bd91113117c20f498637a74b7d87ab530eb5f42d2f7f3531df
|
| 3 |
size 4997743184
|
model-00009-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a006b84b1648c9dc41f9ffc87ff82396ad8d3e296ebda84353fe386beafbab02
|
| 3 |
size 4997743184
|
model-00010-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b788bb0892eea1e80a1157dc2ad2903130a8949189585aa421e6a14fc18fd37a
|
| 3 |
size 4997743184
|
model-00011-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e62130ef5a954e7bb0d525d4beccdec5b3c132eb55b2823cc9cb0d35be5f5b0
|
| 3 |
size 4997743184
|
model-00012-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997743184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7efc847ca9e252b25353a00c40c2fc2fe0ffd2e6ad0adc21d8663d55afc36684
|
| 3 |
size 4997743184
|
model-00013-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1094220288
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:587dee59580965c52514d3033f5a85ef717b37d1699a29f07c517842238f6ec1
|
| 3 |
size 1094220288
|