thanhh12 commited on
Commit
d7ce542
·
verified ·
1 Parent(s): 582206e

Upload from Google Colab

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ imgs/humen_eval.png filter=lfs diff=lfs merge=lfs -text
37
+ imgs/model_comparsion.png filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OpenMDW License Agreement, version 1.0 (OpenMDW-1.0)
2
+
3
+ By exercising rights granted to you under this agreement, you accept and agree
4
+ to its terms.
5
+
6
+ As used in this agreement, "Model Materials" means the materials provided to
7
+ you under this agreement, consisting of: (1) one or more machine learning
8
+ models (including architecture and parameters); and (2) all related artifacts
9
+ (including associated data, documentation and software) that are provided to
10
+ you hereunder.
11
+
12
+ Subject to your compliance with this agreement, permission is hereby granted,
13
+ free of charge, to deal in the Model Materials without restriction, including
14
+ under all copyright, patent, database, and trade secret rights included or
15
+ embodied therein.
16
+
17
+ If you distribute any portion of the Model Materials, you shall retain in your
18
+ distribution (1) a copy of this agreement, and (2) all copyright notices and
19
+ other notices of origin included in the Model Materials that are applicable to
20
+ your distribution.
21
+ If you file, maintain, or voluntarily participate in a lawsuit against any
22
+ person or entity asserting that the Model Materials directly or indirectly
23
+ infringe any patent, then all rights and grants made to you hereunder are
24
+ terminated, unless that lawsuit was in response to a corresponding lawsuit
25
+ first brought against you.
26
+
27
+ This agreement does not impose any restrictions or obligations with respect to
28
+ any use, modification, or sharing of any outputs generated by using the Model
29
+ Materials.
30
+
31
+ THE MODEL MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
32
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33
+ FITNESS FOR A PARTICULAR PURPOSE, TITLE, NONINFRINGEMENT, ACCURACY, OR THE
34
+ ABSENCE OF LATENT OR OTHER DEFECTS OR ERRORS, WHETHER OR NOT DISCOVERABLE, ALL
35
+ TO THE GREATEST EXTENT PERMISSIBLE UNDER APPLICABLE LAW.
36
+
37
+ YOU ARE SOLELY RESPONSIBLE FOR (1) CLEARING RIGHTS OF OTHER PERSONS THAT MAY
38
+ APPLY TO THE MODEL MATERIALS OR ANY USE THEREOF, INCLUDING WITHOUT LIMITATION
39
+ ANY PERSON'S COPYRIGHTS OR OTHER RIGHTS INCLUDED OR EMBODIED IN THE MODEL
40
+ MATERIALS; (2) OBTAINING ANY NECESSARY CONSENTS, PERMISSIONS OR OTHER RIGHTS
41
+ REQUIRED FOR ANY USE OF THE MODEL MATERIALS; OR (3) PERFORMING ANY DUE
42
+ DILIGENCE OR UNDERTAKING ANY OTHER INVESTIGATIONS INTO THE MODEL MATERIALS OR
43
+ ANYTHING INCORPORATED OR EMBODIED THEREIN.
44
+ IN NO EVENT SHALL THE PROVIDERS OF THE MODEL MATERIALS BE LIABLE FOR ANY CLAIM,
45
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
46
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MODEL MATERIALS, THE
47
+ USE THEREOF OR OTHER DEALINGS THEREIN.
README.md CHANGED
@@ -1,3 +1,143 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: openmdw
4
+ license_link: LICENSE
5
+ datasets:
6
+ - facebook/flores
7
+ - google/wmt24pp
8
+ language:
9
+ - en
10
+ - zh
11
+ - de
12
+ - fr
13
+ - es
14
+ - ar
15
+ - pt
16
+ - it
17
+ - ru
18
+ - id
19
+ - ja
20
+ - ko
21
+ - ms
22
+ - ro
23
+ - th
24
+ - vi
25
+ - uk
26
+ - tr
27
+ - cs
28
+ - nl
29
+ - pl
30
+ - sv
31
+ - da
32
+ - fi
33
+ - hr
34
+ - hu
35
+ - 'no'
36
+ - nb
37
+ metrics:
38
+ - bleurt
39
+ - comet
40
+ pipeline_tag: translation
41
+ ---
42
+ # Seed-X-PPO-7B-GPTQ-Int8
43
+ <a href="https://arxiv.org/pdf/2507.13618">
44
+ <img src="https://img.shields.io/badge/Seed--X-Report-blue"></a>
45
+ <a href="https://huggingface.co/collections/ByteDance-Seed/seed-x-6878753f2858bc17afa78543">
46
+ <img src="https://img.shields.io/badge/Seed--X-Hugging Face-brightgreen"></a>
47
+ <a href="https://huggingface.co/spaces/ByteDance-Seed/Seed-X">
48
+ <img src="https://img.shields.io/badge/Seed--X-DEMO-purple"></a>
49
+ <a href="https://github.com/ByteDance-Seed/Seed-X-7B/blob/main/LICENSE.openmdw">
50
+ <img src="https://img.shields.io/badge/License-OpenMDW-yellow"></a>
51
+
52
+ ## Introduction
53
+ We are excited to introduce **Seed-X**, a powerful series of open-source multilingual translation language models, including an instruction model, a reinforcement learning model, and a reward model. It pushes the boundaries of translation capabilities within 7 billion parameters.
54
+ We develop Seed-X as an accessible, off-the-shelf tool to support the community in advancing translation research and applications:
55
+ * **Exceptional translation capabilities**: Seed-X exhibits state-of-the-art translation capabilities, on par with or outperforming ultra-large models like Gemini-2.5, Claude-3.5, and GPT-4, as validated by human evaluations and automatic metrics.
56
+ * **Deployment and inference-friendly**: With a compact 7B parameter count and mistral architecture, Seed-X offers outstanding translation performance in a lightweight and efficient package, ideal for deployment and inference.
57
+ * **Broad domain coverage**: Seed-X excels on a highly challenging translation test set spanning diverse domains, including the internet, science and technology, office dialogues, e-commerce, biomedicine, finance, law, literature, and entertainment.
58
+ ![performance](imgs/model_comparsion.png)
59
+
60
+ This repo contains the **Seed-X-PPO-7B-GPTQ-Int8** model, with the following features:
61
+ * Type: Causal language models
62
+ * Training Stage: Pretraining & Post-training
63
+ * Support: Multilingual translation among 28 languages
64
+ * Quantization: GPTQ 8-bit
65
+
66
+ (We recommend using Seed-X-PPO model, as its translation performance is superior to Seed-X-Instruct.)
67
+ | Languages | Abbr. | Languages | Abbr. | Languages | Abbr. | Languages | Abbr. |
68
+ | ----------- | ----------- |-----------|-----------|-----------|-----------| -----------|-----------|
69
+ |Arabic | ar |French | fr | Malay | ms | Russian | ru |
70
+ |Czech | cs |Croatian | hr | Norwegian Bokmal | nb | Swedish | sv |
71
+ |Danish | da |Hungarian | hu | Dutch | nl | Thai | th |
72
+ |German | de |Indonesian | id | Norwegian | no | Turkish | tr |
73
+ |English | en |Italian | it | Polish | pl | Ukrainian | uk |
74
+ |Spanish | es |Japanese | ja | Portuguese | pt | Vietnamese | vi |
75
+ |Finnish | fi |Korean | ko | Romanian | ro | Chinese | zh |
76
+
77
+ ## Model Downloads
78
+ | Model Name | Description | Download |
79
+ | ----------- | ----------- |-----------
80
+ | Seed-X-Instruct | Instruction-tuned for alignment with user intent. |🤗 [Model](https://huggingface.co/ByteDance-Seed/Seed-X-Instruct-7B)|
81
+ | Seed-X-PPO | RL trained to boost translation capabilities. | 🤗 [Model](https://huggingface.co/ByteDance-Seed/Seed-X-PPO-7B)|
82
+ | 👉 **Seed-X-PPO-GPTQ-Int8** | Quantization: GPTQ 8-bit. | 🤗 [Model](https://huggingface.co/ByteDance-Seed/Seed-X-PPO-7B-GPTQ-Int8)|
83
+ | Seed-X-PPO-AWQ-Int4 | Quantization: AWQ 4-bit. | 🤗 [Model](https://huggingface.co/ByteDance-Seed/Seed-X-PPO-7B-AWQ-Int4)|
84
+ |Seed-X-RM | Reward model to evaluate the quality of translation.| 🤗 [Model](https://huggingface.co/ByteDance-Seed/Seed-X-RM-7B)|
85
+
86
+ ## Quickstart
87
+
88
+ 📮 **Notice**
89
+ * **The language tags at the end of the prompt is necessary**, which are used in PPO training. For example, when the target language is German, \<de\> needs to be added. You can refer to the above table for language abbreviations.
90
+ * **This model is specialized in multilingual translation**, which is unexpected to support other tasks.
91
+ * **We don't have any chat template**, thus you don't have to perform ```tokenizer.apply_chat_template```. Please avoid prompting the model in a multi-round conversation format.
92
+ * **We recommend against using unofficial quantized versions for local deployment.** We will soon release an official quantized model and develop a demo on Hugging Face Space.
93
+
94
+ Here is a simple example demonstrating how to load the model and perform translation using ```vllm```
95
+
96
+ Recommended:```vllm==0.8.0, transformers==4.51.3```
97
+
98
+ ```python
99
+ from vllm import LLM, SamplingParams, BeamSearchParams
100
+ model_path = "./ByteDance-Seed/Seed-X-PPO-7B-GPTQ-Int8"
101
+ model = LLM(model=model_path,
102
+ max_num_seqs=512,
103
+ tensor_parallel_size=8,
104
+ enable_prefix_caching=True,
105
+ gpu_memory_utilization=0.95)
106
+ messages = [
107
+ # without CoT
108
+ "Translate the following English sentence into Chinese:\nMay the force be with you <zh>",
109
+ # with CoT
110
+ "Translate the following English sentence into Chinese and explain it in detail:\nMay the force be with you <zh>"
111
+ ]
112
+ # Beam search (We recommend using beam search decoding)
113
+ decoding_params = BeamSearchParams(beam_width=4,
114
+ max_tokens=512)
115
+ # Greedy decoding
116
+ decoding_params = SamplingParams(temperature=0,
117
+ max_tokens=512,
118
+ skip_special_tokens=True)
119
+ results = model.generate(messages, decoding_params)
120
+ responses = [res.outputs[0].text.strip() for res in results]
121
+ print(responses)
122
+ ```
123
+ ## Evaluation
124
+ We evaluated Seed-X on a diverse set of translation benchmarks, including FLORES-200, WMT-25, and a publicly released [challenge set](https://github.com/ByteDance-Seed/Seed-X-7B/tree/main/challenge_set) accompanied by human evaluations.
125
+ ![humen_eval](imgs/humen_eval.png)
126
+ For detailed benchmark results and analysis, please refer to our [Technical Report](https://arxiv.org/pdf/2507.13618).
127
+
128
+ ## License
129
+ This project is licensed under OpenMDW. See the [LICENSE](https://github.com/ByteDance-Seed/Seed-X-7B/blob/main/LICENSE.openmdw) file for details.
130
+
131
+ ## Citation
132
+ If you find Seed-X useful for your research and applications, feel free to give us a star ⭐ or cite us using:
133
+ ```bibtex
134
+ @misc{cheng2025seedxbuildingstrongmultilingual,
135
+ title={Seed-X: Building Strong Multilingual Translation LLM with 7B Parameters},
136
+ author={Shanbo Cheng and Yu Bao and Qian Cao and Luyang Huang and Liyan Kang and Zhicheng Liu and Yu Lu and Wenhao Zhu and Jingwen Chen and Zhichao Huang and Tao Li and Yifu Li and Huiying Lin and Sitong Liu and Ningxin Peng and Shuaijie She and Lu Xu and Nuo Xu and Sen Yang and Runsheng Yu and Yiming Yu and Liehao Zou and Hang Li and Lu Lu and Yuxuan Wang and Yonghui Wu},
137
+ year={2025},
138
+ eprint={2507.13618},
139
+ archivePrefix={arXiv},
140
+ primaryClass={cs.CL},
141
+ url={https://arxiv.org/abs/2507.13618},
142
+ }
143
+ ```
config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MistralForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "head_dim": null,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "quantization_config": {
19
+ "config_groups": {
20
+ "group_0": {
21
+ "input_activations": {
22
+ "actorder": null,
23
+ "block_structure": null,
24
+ "dynamic": true,
25
+ "group_size": null,
26
+ "num_bits": 8,
27
+ "observer": null,
28
+ "observer_kwargs": {},
29
+ "strategy": "token",
30
+ "symmetric": true,
31
+ "type": "int"
32
+ },
33
+ "output_activations": null,
34
+ "targets": [
35
+ "Linear"
36
+ ],
37
+ "weights": {
38
+ "actorder": null,
39
+ "block_structure": null,
40
+ "dynamic": false,
41
+ "group_size": null,
42
+ "num_bits": 8,
43
+ "observer": "minmax",
44
+ "observer_kwargs": {},
45
+ "strategy": "channel",
46
+ "symmetric": true,
47
+ "type": "int"
48
+ }
49
+ }
50
+ },
51
+ "format": "int-quantized",
52
+ "global_compression_ratio": null,
53
+ "ignore": [
54
+ "lm_head"
55
+ ],
56
+ "kv_cache_scheme": null,
57
+ "quant_method": "compressed-tensors",
58
+ "quantization_status": "compressed"
59
+ },
60
+ "rms_norm_eps": 1e-05,
61
+ "rope_theta": 10000.0,
62
+ "sliding_window": 4096,
63
+ "tie_word_embeddings": false,
64
+ "torch_dtype": "bfloat16",
65
+ "transformers_version": "4.53.3",
66
+ "use_cache": true,
67
+ "vocab_size": 65269
68
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.53.3"
6
+ }
imgs/humen_eval.png ADDED

Git LFS Details

  • SHA256: 231e55f71f4b05cd905f9456b697bdc2ff7932be99e2b0481ce6f6f5f110a5e2
  • Pointer size: 131 Bytes
  • Size of remote file: 197 kB
imgs/model_comparsion.png ADDED

Git LFS Details

  • SHA256: 9c1baa367494252642a93b798a1fd05cad4b2d1794d725a1a4c767b61a947242
  • Pointer size: 131 Bytes
  • Size of remote file: 169 kB
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd0f3a1130aaa2f1fc34a908386023eecc9dfef9d319a60ab20150836014165c
3
+ size 4999558520
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc45cacff899027c26123244fdac600908557fd752e84316d75b1a74a88230f
3
+ size 3052475192
model.safetensors.index.json ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 7517024256,
4
+ "total_size": 8051974144
5
+ },
6
+ "weight_map": {
7
+ "lm_head.weight": "model-00002-of-00002.safetensors",
8
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
9
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
10
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.layers.0.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
16
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
18
+ "model.layers.0.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
19
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
20
+ "model.layers.0.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
21
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.layers.0.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
23
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
24
+ "model.layers.0.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
25
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
26
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.layers.1.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
28
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.layers.1.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
30
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
31
+ "model.layers.1.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
32
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
33
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.layers.1.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
35
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
36
+ "model.layers.1.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
37
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.layers.1.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
39
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
40
+ "model.layers.1.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
41
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
42
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.layers.10.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
44
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.layers.10.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
46
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
47
+ "model.layers.10.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
48
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
49
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
50
+ "model.layers.10.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
51
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.layers.10.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
53
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
54
+ "model.layers.10.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
55
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.layers.10.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
57
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
58
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.layers.11.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
60
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
61
+ "model.layers.11.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
62
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.layers.11.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
64
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
65
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.layers.11.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
67
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
68
+ "model.layers.11.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
69
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.layers.11.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
71
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
72
+ "model.layers.11.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
73
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
74
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
75
+ "model.layers.12.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
76
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.layers.12.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
78
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
79
+ "model.layers.12.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
80
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
81
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.layers.12.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
83
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.layers.12.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
85
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
86
+ "model.layers.12.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
87
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.layers.12.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
89
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
91
+ "model.layers.13.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
92
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.layers.13.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
94
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.layers.13.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
96
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
97
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
98
+ "model.layers.13.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
99
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.layers.13.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
101
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
102
+ "model.layers.13.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
103
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.layers.13.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
105
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
106
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.layers.14.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
108
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
109
+ "model.layers.14.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
110
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
111
+ "model.layers.14.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
112
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
113
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
114
+ "model.layers.14.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
115
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
116
+ "model.layers.14.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
117
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
118
+ "model.layers.14.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
119
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
120
+ "model.layers.14.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
121
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
122
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
123
+ "model.layers.15.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
124
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
125
+ "model.layers.15.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
126
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
127
+ "model.layers.15.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
128
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
129
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
130
+ "model.layers.15.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
131
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
132
+ "model.layers.15.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
133
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
134
+ "model.layers.15.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
135
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
136
+ "model.layers.15.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
137
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
138
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
139
+ "model.layers.16.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
140
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
141
+ "model.layers.16.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
142
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.layers.16.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
144
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
145
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
146
+ "model.layers.16.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
147
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
148
+ "model.layers.16.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
149
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
150
+ "model.layers.16.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
151
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
152
+ "model.layers.16.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
153
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
154
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
155
+ "model.layers.17.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
156
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
157
+ "model.layers.17.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
158
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
159
+ "model.layers.17.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
160
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
161
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
162
+ "model.layers.17.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
163
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
164
+ "model.layers.17.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
165
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
166
+ "model.layers.17.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
167
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
168
+ "model.layers.17.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
169
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
170
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
171
+ "model.layers.18.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
172
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
173
+ "model.layers.18.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
174
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
175
+ "model.layers.18.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
176
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
177
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
178
+ "model.layers.18.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
179
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
180
+ "model.layers.18.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
181
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
182
+ "model.layers.18.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
183
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
184
+ "model.layers.18.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
185
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
186
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
187
+ "model.layers.19.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
188
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
189
+ "model.layers.19.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
190
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
191
+ "model.layers.19.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
192
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
193
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
194
+ "model.layers.19.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
195
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
196
+ "model.layers.19.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
197
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
198
+ "model.layers.19.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
199
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
200
+ "model.layers.19.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
201
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
202
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
203
+ "model.layers.2.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
204
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
205
+ "model.layers.2.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
206
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
207
+ "model.layers.2.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
208
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
209
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
210
+ "model.layers.2.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
211
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
212
+ "model.layers.2.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
213
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
214
+ "model.layers.2.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
215
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
216
+ "model.layers.2.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
217
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
218
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.layers.20.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
220
+ "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
221
+ "model.layers.20.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
222
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
223
+ "model.layers.20.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
224
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
225
+ "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
226
+ "model.layers.20.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
227
+ "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
228
+ "model.layers.20.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
229
+ "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
230
+ "model.layers.20.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
231
+ "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
232
+ "model.layers.20.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
233
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
234
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.layers.21.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
236
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
237
+ "model.layers.21.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
238
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.layers.21.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
240
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
241
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.layers.21.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
243
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
244
+ "model.layers.21.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
245
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
246
+ "model.layers.21.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
247
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
248
+ "model.layers.21.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
249
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
250
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
251
+ "model.layers.22.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
252
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
253
+ "model.layers.22.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
254
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
255
+ "model.layers.22.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
256
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
257
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
258
+ "model.layers.22.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
259
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
260
+ "model.layers.22.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
261
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
262
+ "model.layers.22.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
263
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
264
+ "model.layers.22.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
265
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
266
+ "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
267
+ "model.layers.23.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
268
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
269
+ "model.layers.23.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
270
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
271
+ "model.layers.23.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
272
+ "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
273
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
274
+ "model.layers.23.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
275
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
276
+ "model.layers.23.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
277
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
278
+ "model.layers.23.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
279
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
280
+ "model.layers.23.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
281
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
282
+ "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
283
+ "model.layers.24.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
284
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
285
+ "model.layers.24.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
286
+ "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
287
+ "model.layers.24.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
288
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
289
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
290
+ "model.layers.24.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
291
+ "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
292
+ "model.layers.24.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
293
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
294
+ "model.layers.24.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
295
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
296
+ "model.layers.24.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
297
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
298
+ "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
299
+ "model.layers.25.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
300
+ "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
301
+ "model.layers.25.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
302
+ "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
303
+ "model.layers.25.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
304
+ "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
305
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
306
+ "model.layers.25.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
307
+ "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
308
+ "model.layers.25.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
309
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
310
+ "model.layers.25.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
311
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
312
+ "model.layers.25.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
313
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
314
+ "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
315
+ "model.layers.26.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
316
+ "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
317
+ "model.layers.26.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
318
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
319
+ "model.layers.26.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
320
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
321
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
322
+ "model.layers.26.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
323
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
324
+ "model.layers.26.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
325
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
326
+ "model.layers.26.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
327
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
328
+ "model.layers.26.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
329
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
330
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
331
+ "model.layers.27.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
332
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
333
+ "model.layers.27.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
334
+ "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
335
+ "model.layers.27.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
336
+ "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
337
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
338
+ "model.layers.27.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
339
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
340
+ "model.layers.27.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
341
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
342
+ "model.layers.27.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
343
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
344
+ "model.layers.27.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
345
+ "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
346
+ "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
347
+ "model.layers.28.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
348
+ "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
349
+ "model.layers.28.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
350
+ "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
351
+ "model.layers.28.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
352
+ "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
353
+ "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
354
+ "model.layers.28.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
355
+ "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
356
+ "model.layers.28.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
357
+ "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
358
+ "model.layers.28.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
359
+ "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
360
+ "model.layers.28.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
361
+ "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
362
+ "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
363
+ "model.layers.29.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
364
+ "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
365
+ "model.layers.29.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
366
+ "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
367
+ "model.layers.29.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
368
+ "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
369
+ "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
370
+ "model.layers.29.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
371
+ "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
372
+ "model.layers.29.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
373
+ "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
374
+ "model.layers.29.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
375
+ "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
376
+ "model.layers.29.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
377
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
378
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
379
+ "model.layers.3.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
380
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
381
+ "model.layers.3.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
382
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
383
+ "model.layers.3.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
384
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
385
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
386
+ "model.layers.3.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
387
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
388
+ "model.layers.3.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
389
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
390
+ "model.layers.3.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
391
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
392
+ "model.layers.3.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
393
+ "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
394
+ "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
395
+ "model.layers.30.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
396
+ "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
397
+ "model.layers.30.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
398
+ "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
399
+ "model.layers.30.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
400
+ "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
401
+ "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
402
+ "model.layers.30.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
403
+ "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
404
+ "model.layers.30.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
405
+ "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
406
+ "model.layers.30.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
407
+ "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
408
+ "model.layers.30.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
409
+ "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
410
+ "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
411
+ "model.layers.31.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
412
+ "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
413
+ "model.layers.31.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
414
+ "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
415
+ "model.layers.31.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
416
+ "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
417
+ "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
418
+ "model.layers.31.self_attn.k_proj.weight_scale": "model-00002-of-00002.safetensors",
419
+ "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
420
+ "model.layers.31.self_attn.o_proj.weight_scale": "model-00002-of-00002.safetensors",
421
+ "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
422
+ "model.layers.31.self_attn.q_proj.weight_scale": "model-00002-of-00002.safetensors",
423
+ "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
424
+ "model.layers.31.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
425
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
426
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
427
+ "model.layers.4.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
428
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
429
+ "model.layers.4.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
430
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
431
+ "model.layers.4.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
432
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
433
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
434
+ "model.layers.4.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
435
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
436
+ "model.layers.4.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
437
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
438
+ "model.layers.4.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
439
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
440
+ "model.layers.4.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
441
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
442
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
443
+ "model.layers.5.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
444
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
445
+ "model.layers.5.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
446
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
447
+ "model.layers.5.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
448
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
449
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
450
+ "model.layers.5.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
451
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
452
+ "model.layers.5.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
453
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
454
+ "model.layers.5.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
455
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
456
+ "model.layers.5.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
457
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
458
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
459
+ "model.layers.6.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
460
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
461
+ "model.layers.6.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
462
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
463
+ "model.layers.6.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
464
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
465
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
466
+ "model.layers.6.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
467
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
468
+ "model.layers.6.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
469
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
470
+ "model.layers.6.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
471
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
472
+ "model.layers.6.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
473
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
474
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
475
+ "model.layers.7.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
476
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
477
+ "model.layers.7.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
478
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
479
+ "model.layers.7.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
480
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
481
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
482
+ "model.layers.7.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
483
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
484
+ "model.layers.7.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
485
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
486
+ "model.layers.7.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
487
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
488
+ "model.layers.7.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
489
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
490
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
491
+ "model.layers.8.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
492
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
493
+ "model.layers.8.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
494
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
495
+ "model.layers.8.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
496
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
497
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
498
+ "model.layers.8.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
499
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
500
+ "model.layers.8.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
501
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
502
+ "model.layers.8.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
503
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
504
+ "model.layers.8.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
505
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
506
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
507
+ "model.layers.9.mlp.down_proj.weight_scale": "model-00001-of-00002.safetensors",
508
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
509
+ "model.layers.9.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
510
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
511
+ "model.layers.9.mlp.up_proj.weight_scale": "model-00001-of-00002.safetensors",
512
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
513
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
514
+ "model.layers.9.self_attn.k_proj.weight_scale": "model-00001-of-00002.safetensors",
515
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
516
+ "model.layers.9.self_attn.o_proj.weight_scale": "model-00001-of-00002.safetensors",
517
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
518
+ "model.layers.9.self_attn.q_proj.weight_scale": "model-00001-of-00002.safetensors",
519
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
520
+ "model.layers.9.self_attn.v_proj.weight_scale": "model-00001-of-00002.safetensors",
521
+ "model.norm.weight": "model-00002-of-00002.safetensors"
522
+ }
523
+ }
recipe.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ default_stage:
2
+ default_modifiers:
3
+ SmoothQuantModifier: {smoothing_strength: 0.8}
4
+ GPTQModifier:
5
+ scheme: W8A8
6
+ targets: Linear
7
+ ignore: [lm_head]
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "tokenizer_class": "LlamaTokenizerFast",
38
+ "unk_token": "<unk>",
39
+ "use_default_system_prompt": false
40
+ }