NariLabs commited on
Commit
be4f9db
·
verified ·
1 Parent(s): a43a78d

Fix runtime weights schedule for Dia2-1B

Browse files
Files changed (1) hide show
  1. config.json +64 -10
config.json CHANGED
@@ -12,10 +12,38 @@
12
  "action_pad_token_id": 0,
13
  "action_new_word_token_id": 1,
14
  "delay_pattern": [
15
- 16, 18, 18, 18, 18, 18, 18, 18,
16
- 18, 18, 18, 18, 18, 18, 18, 18,
17
- 18, 18, 18, 18, 18, 18, 18, 18,
18
- 18, 18, 18, 18, 18, 18, 18, 18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ],
20
  "first_word_min_start": 3,
21
  "max_pad": 8,
@@ -49,15 +77,41 @@
49
  "dropout": 0.0,
50
  "rope_min_timescale": 1,
51
  "rope_max_timescale": 10000.0,
52
- "normalization_layer_epsilon": 0.000001
53
  },
54
  "runtime": {
55
  "weights_schedule": [
56
- 0, 0,
57
- 1, 1, 1, 1,
58
- 2, 2, 2, 2, 2, 2, 2, 2,
59
- 3, 3, 3, 3, 3, 3, 3, 3,
60
- 4, 4, 4, 4, 4, 4, 4, 4, 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  ],
62
  "max_context_steps": 1500
63
  }
 
12
  "action_pad_token_id": 0,
13
  "action_new_word_token_id": 1,
14
  "delay_pattern": [
15
+ 16,
16
+ 18,
17
+ 18,
18
+ 18,
19
+ 18,
20
+ 18,
21
+ 18,
22
+ 18,
23
+ 18,
24
+ 18,
25
+ 18,
26
+ 18,
27
+ 18,
28
+ 18,
29
+ 18,
30
+ 18,
31
+ 18,
32
+ 18,
33
+ 18,
34
+ 18,
35
+ 18,
36
+ 18,
37
+ 18,
38
+ 18,
39
+ 18,
40
+ 18,
41
+ 18,
42
+ 18,
43
+ 18,
44
+ 18,
45
+ 18,
46
+ 18
47
  ],
48
  "first_word_min_start": 3,
49
  "max_pad": 8,
 
77
  "dropout": 0.0,
78
  "rope_min_timescale": 1,
79
  "rope_max_timescale": 10000.0,
80
+ "normalization_layer_epsilon": 1e-06
81
  },
82
  "runtime": {
83
  "weights_schedule": [
84
+ 0,
85
+ 0,
86
+ 0,
87
+ 0,
88
+ 0,
89
+ 0,
90
+ 0,
91
+ 0,
92
+ 1,
93
+ 1,
94
+ 1,
95
+ 1,
96
+ 1,
97
+ 1,
98
+ 1,
99
+ 1,
100
+ 2,
101
+ 2,
102
+ 2,
103
+ 2,
104
+ 2,
105
+ 2,
106
+ 2,
107
+ 2,
108
+ 2,
109
+ 2,
110
+ 2,
111
+ 2,
112
+ 2,
113
+ 2,
114
+ 2
115
  ],
116
  "max_context_steps": 1500
117
  }