jeromeku commited on
Commit
939a16f
·
verified ·
1 Parent(s): f790605

final cooldown checkpoint

Browse files
config.json CHANGED
@@ -9,8 +9,10 @@
9
  "AutoModel": "modeling_rnd.RND1Model",
10
  "AutoModelForMaskedLM": "modeling_rnd.RND1LM"
11
  },
 
12
  "decoder_sparse_step": 1,
13
  "dtype": "bfloat16",
 
14
  "head_dim": 128,
15
  "hidden_act": "silu",
16
  "hidden_size": 2048,
@@ -18,12 +20,13 @@
18
  "intermediate_size": 6144,
19
  "is_causal": false,
20
  "mask_token_id": 151669,
21
- "max_position_embeddings": 32768,
 
22
  "mlp_only_layers": [],
23
  "model_type": "rnd1",
24
  "moe_backend": "hf",
25
  "moe_intermediate_size": 768,
26
- "norm_topk_prob": false,
27
  "num_attention_heads": 32,
28
  "num_diffusion_steps": 256,
29
  "num_experts": 128,
@@ -32,10 +35,10 @@
32
  "num_key_value_heads": 4,
33
  "output_router_logits": false,
34
  "rms_norm_eps": 1e-06,
35
- "rope_scaling": null,
36
- "rope_theta": 10000.0,
37
  "router_aux_loss_coef": 0.001,
38
- "sliding_window": null,
39
  "tie_word_embeddings": false,
40
  "transformers_version": "4.56.1",
41
  "use_cache": false,
 
9
  "AutoModel": "modeling_rnd.RND1Model",
10
  "AutoModelForMaskedLM": "modeling_rnd.RND1LM"
11
  },
12
+ "bos_token_id": 151643,
13
  "decoder_sparse_step": 1,
14
  "dtype": "bfloat16",
15
+ "eos_token_id": 151645,
16
  "head_dim": 128,
17
  "hidden_act": "silu",
18
  "hidden_size": 2048,
 
20
  "intermediate_size": 6144,
21
  "is_causal": false,
22
  "mask_token_id": 151669,
23
+ "max_position_embeddings": 40960,
24
+ "max_window_layers": 48,
25
  "mlp_only_layers": [],
26
  "model_type": "rnd1",
27
  "moe_backend": "hf",
28
  "moe_intermediate_size": 768,
29
+ "norm_topk_prob": true,
30
  "num_attention_heads": 32,
31
  "num_diffusion_steps": 256,
32
  "num_experts": 128,
 
35
  "num_key_value_heads": 4,
36
  "output_router_logits": false,
37
  "rms_norm_eps": 1e-06,
38
+ "rope_scaling": false,
39
+ "rope_theta": 1000000.0,
40
  "router_aux_loss_coef": 0.001,
41
+ "sliding_window": false,
42
  "tie_word_embeddings": false,
43
  "transformers_version": "4.56.1",
44
  "use_cache": false,
generation_config.json CHANGED
@@ -1,5 +1,7 @@
1
  {
2
  "_from_model_config": true,
 
 
3
  "transformers_version": "4.56.1",
4
  "use_cache": false
5
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
  "transformers_version": "4.56.1",
6
  "use_cache": false
7
  }
model-00001-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03b400077d3f0edb8a8136da9b13fc95bc7d3d1bb0400d3c082a21f2abaab4ee
3
  size 4997184968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f5ccbc78e9912abbaec09c2de65f27762db9beda37b27d1cc54de74c060ae7a
3
  size 4997184968
model-00002-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4449a5216d2d7b9beb002ee451113b868a3816d3c6466274b2f0a07517044e3
3
  size 4997741608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69a5d5cc063ac362e9533d118352522c67fc5035aeb5c8b42467b00ffdab3e48
3
  size 4997741608
model-00003-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:794b3ec6cb6570c3258513d7217457be850b2841b7536e84176df6076f0be147
3
  size 4997742208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fd57989834b9bee8522404ae5e0b2eb0b3c374636d6976b74df50a39a6e9872
3
  size 4997742208
model-00004-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2463509491c50d87722517c8dc69da0a043d9519b745908e687d29978b6dab44
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a6ab1611225c54edda60cb06b1d14411d21a34fbc98fd9096da6a97a010c55e
3
  size 4997743184
model-00005-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0192397f6d8626905278484d84118fc3f69f10926b0c36150cdb752a4c37de6
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92591bdeb901e15ec018b660c51da0665bb37a38862fcab563eeafc662274a27
3
  size 4997743184
model-00006-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a750d721151f37922d6f7be4882cc4c545a2a62484a46fdeb104d4241162e49
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eb0816513087c15eded0dca650ac71e9d3efbcc0fa99f45fa03ea53e8c4fe98
3
  size 4997743184
model-00007-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c87e2060917fbfb40fb09f015415136439905809e85049c060b5eccf2892b93
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c728642f7f88dc5a712aa700901baa52eca3247091295605d08096f4fe4c2bc
3
  size 4997743184
model-00008-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20773bfbf58315505103c529d7d097cdcba45c2f9746755b04ccd3bc3146acc8
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd07dd840c4162bd91113117c20f498637a74b7d87ab530eb5f42d2f7f3531df
3
  size 4997743184
model-00009-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40b8b76b9a729948ec96618f7e8e8dafcac146fabb1d8e382924f8aee2d61c7f
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a006b84b1648c9dc41f9ffc87ff82396ad8d3e296ebda84353fe386beafbab02
3
  size 4997743184
model-00010-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e4e4bbe9f0eccd26c8bf8f29ccbd4d61a02f07297ac14d72e57bf0084006e0d
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b788bb0892eea1e80a1157dc2ad2903130a8949189585aa421e6a14fc18fd37a
3
  size 4997743184
model-00011-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf791f2c57d28e2565d6d6950898dfe3bcfd69fba1197987c9a289c5f862e378
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e62130ef5a954e7bb0d525d4beccdec5b3c132eb55b2823cc9cb0d35be5f5b0
3
  size 4997743184
model-00012-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dcd7fc3c80e517858c24d3081592d5053837a999d098bb58511a63de62d6bc2
3
  size 4997743184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7efc847ca9e252b25353a00c40c2fc2fe0ffd2e6ad0adc21d8663d55afc36684
3
  size 4997743184
model-00013-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcdbcdc096ac2b8bfb4d399e7b776a24ae5b3bb44f5f93e8fb7501fc8884257d
3
  size 1094220288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:587dee59580965c52514d3033f5a85ef717b37d1699a29f07c517842238f6ec1
3
  size 1094220288