Hyukkyu commited on
Commit
005478f
·
verified ·
1 Parent(s): cb0d1e1

Upload migrated model for transformers 5.0+ compatibility

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 4096,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": false
10
+ }
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ library_name: transformers
4
+ tags:
5
+ - nv-embed
6
+ - embedding
7
+ - retrieval
8
+ - transformers
9
+ base_model: nvidia/NV-Embed-v2
10
+ ---
11
+
12
+ # NV-Embed-v2 (Migrated for transformers 5.0+ compatibility)
13
+
14
+ This is a migrated version of [nvidia/NV-Embed-v2](https://huggingface.co/nvidia/NV-Embed-v2) that is compatible with transformers 5.0.0 and later versions.
15
+
16
+ ## Changes from Original
17
+
18
+ The only change made is adding an `all_tied_weights_keys` property to the `NVEmbedModel` class in `modeling_nvembed.py`. This property provides backward compatibility with the transformers library, which changed from using `_tied_weights_keys` (a class attribute) to `all_tied_weights_keys` (a property that returns a dict) in version 5.0.0.
19
+
20
+ ### The Patch
21
+
22
+ ```python
23
+ @property
24
+ def all_tied_weights_keys(self):
25
+ """Compatibility property for transformers >= 5.0.0."""
26
+ if hasattr(self, '_tied_weights_keys') and self._tied_weights_keys:
27
+ return {key: key for key in self._tied_weights_keys}
28
+ return {}
29
+ ```
30
+
31
+ ## Usage
32
+
33
+ ```python
34
+ from transformers import AutoModel, AutoTokenizer
35
+
36
+ model = AutoModel.from_pretrained("Hyukkyu/nv-embed-v2", trust_remote_code=True)
37
+ tokenizer = AutoTokenizer.from_pretrained("Hyukkyu/nv-embed-v2", trust_remote_code=True)
38
+ ```
39
+
40
+ ## Original Model
41
+
42
+ This model is based on [nvidia/NV-Embed-v2](https://huggingface.co/nvidia/NV-Embed-v2). Please refer to the original repository for:
43
+ - Model architecture details
44
+ - Training information
45
+ - Benchmarks and evaluation results
46
+ - Citation information
47
+
48
+ ## License
49
+
50
+ This model inherits the license from the original repository. Please check [nvidia/NV-Embed-v2](https://huggingface.co/nvidia/NV-Embed-v2) for license details.
51
+
52
+ ## Migration Tool
53
+
54
+ This model was migrated using the GenZ model migration tool. The migration script is available at:
55
+ https://github.com/your-repo/GenZ/tree/main/scripts/preprocess/model
56
+
57
+ ## Compatibility
58
+
59
+ - **transformers**: >= 5.0.0
60
+ - **torch**: >= 2.0.0
61
+ - **Python**: >= 3.9
config.json ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nvidia/NV-Embed-v2",
3
+ "add_eos": true,
4
+ "add_pad_token": true,
5
+ "architectures": [
6
+ "NVEmbedModel"
7
+ ],
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_nvembed.NVEmbedConfig",
10
+ "AutoModel": "modeling_nvembed.NVEmbedModel"
11
+ },
12
+ "hidden_size": 4096,
13
+ "is_mask_instruction": true,
14
+ "latent_attention_config": {
15
+ "model_type": "latent_attention"
16
+ },
17
+ "mask_type": "b",
18
+ "model_type": "nvembed",
19
+ "padding_side": "right",
20
+ "text_config": {
21
+ "_name_or_path": "nvidia/NV-Embed-v2",
22
+ "add_cross_attention": false,
23
+ "architectures": [
24
+ "MistralModel"
25
+ ],
26
+ "attention_dropout": 0.0,
27
+ "bad_words_ids": null,
28
+ "begin_suppress_tokens": null,
29
+ "bos_token_id": 1,
30
+ "chunk_size_feed_forward": 0,
31
+ "cross_attention_hidden_size": null,
32
+ "decoder_start_token_id": null,
33
+ "diversity_penalty": 0.0,
34
+ "do_sample": false,
35
+ "early_stopping": false,
36
+ "encoder_no_repeat_ngram_size": 0,
37
+ "eos_token_id": 2,
38
+ "exponential_decay_length_penalty": null,
39
+ "finetuning_task": null,
40
+ "forced_bos_token_id": null,
41
+ "forced_eos_token_id": null,
42
+ "hidden_act": "silu",
43
+ "hidden_size": 4096,
44
+ "id2label": {
45
+ "0": "LABEL_0",
46
+ "1": "LABEL_1"
47
+ },
48
+ "initializer_range": 0.02,
49
+ "intermediate_size": 14336,
50
+ "is_decoder": false,
51
+ "is_encoder_decoder": false,
52
+ "label2id": {
53
+ "LABEL_0": 0,
54
+ "LABEL_1": 1
55
+ },
56
+ "length_penalty": 1.0,
57
+ "max_length": 20,
58
+ "max_position_embeddings": 32768,
59
+ "min_length": 0,
60
+ "model_type": "bidir_mistral",
61
+ "no_repeat_ngram_size": 0,
62
+ "num_attention_heads": 32,
63
+ "num_beam_groups": 1,
64
+ "num_beams": 1,
65
+ "num_hidden_layers": 32,
66
+ "num_key_value_heads": 8,
67
+ "num_return_sequences": 1,
68
+ "output_attentions": false,
69
+ "output_hidden_states": false,
70
+ "output_scores": false,
71
+ "pad_token_id": null,
72
+ "prefix": null,
73
+ "problem_type": null,
74
+ "pruned_heads": {},
75
+ "remove_invalid_values": false,
76
+ "repetition_penalty": 1.0,
77
+ "return_dict": true,
78
+ "return_dict_in_generate": false,
79
+ "rms_norm_eps": 1e-05,
80
+ "rope_theta": 10000.0,
81
+ "sep_token_id": null,
82
+ "sliding_window": 4096,
83
+ "suppress_tokens": null,
84
+ "task_specific_params": null,
85
+ "temperature": 1.0,
86
+ "tf_legacy_loss": false,
87
+ "tie_encoder_decoder": false,
88
+ "tie_word_embeddings": false,
89
+ "tokenizer_class": null,
90
+ "top_k": 50,
91
+ "top_p": 1.0,
92
+ "torch_dtype": "float32",
93
+ "torchscript": false,
94
+ "typical_p": 1.0,
95
+ "use_bfloat16": false,
96
+ "use_cache": true,
97
+ "vocab_size": 32000
98
+ },
99
+ "torch_dtype": "float16",
100
+ "transformers_version": "4.42.4"
101
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.7.0",
4
+ "transformers": "4.37.2",
5
+ "pytorch": "2.2.0+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null
9
+ }
configuration_nvembed.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from typing import Literal
3
+ from transformers import AutoConfig
4
+ from transformers.configuration_utils import PretrainedConfig
5
+ from transformers.models.auto import CONFIG_MAPPING
6
+ from transformers.models.mistral import MistralConfig
7
+
8
+ NVEMBED_TYPE = "nvembed"
9
+ LATENT_ATTENTION_TYPE = "latent_attention"
10
+ BIDIR_MISTRAL_TYPE = "bidir_mistral"
11
+
12
+ class NVEmbedConfig(PretrainedConfig):
13
+ model_type = "nvembed"
14
+ is_composition = False
15
+
16
+ def __init__(
17
+ self,
18
+ latent_attention_config=None,
19
+ text_config=None,
20
+ padding_side: Literal["right", "left"]="right",
21
+ add_pad_token: bool=True,
22
+ is_mask_instruction: bool = True,
23
+ add_eos: bool=True,
24
+ mask_type: str="b",
25
+ **kwargs,
26
+ ):
27
+ if isinstance(latent_attention_config, dict):
28
+ latent_attention_config["model_type"] = (
29
+ latent_attention_config["model_type"] if "model_type" in latent_attention_config else LATENT_ATTENTION_TYPE
30
+ )
31
+ latent_attention_config = CONFIG_MAPPING[latent_attention_config["model_type"]](**latent_attention_config)
32
+ elif latent_attention_config is None:
33
+ latent_attention_config = CONFIG_MAPPING[LATENT_ATTENTION_TYPE]()
34
+
35
+ self.latent_attention_config = latent_attention_config
36
+
37
+ if isinstance(text_config, dict):
38
+ text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama"
39
+ text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
40
+ elif text_config is None:
41
+ text_config = None
42
+
43
+ self.text_config = text_config
44
+ self.padding_side = padding_side
45
+ self.is_mask_instruction = is_mask_instruction
46
+ self.add_pad_token = add_pad_token
47
+ self.add_eos = add_eos
48
+ self.mask_type = mask_type
49
+ if "hidden_size" in kwargs:
50
+ self.hidden_size = kwargs["hidden_size"]
51
+ else:
52
+ self.hidden_size = 4096
53
+
54
+ super().__init__(**kwargs)
55
+
56
+
57
+ class LatentAttentionConfig(PretrainedConfig):
58
+ model_type = LATENT_ATTENTION_TYPE
59
+ is_composition = False
60
+ _name_or_path = "latent_attention"
61
+
62
+ def __init__(
63
+ self,
64
+ num_latents_value: int=512,
65
+ num_cross_heads: int=8,
66
+ output_normalize: bool=True,
67
+ hidden_dim: int=4096,
68
+ latent_dim: int=4096,
69
+ cross_dim_head: int=4096,
70
+ **kwargs,
71
+ ):
72
+ self.num_latents_value = num_latents_value
73
+ self.num_cross_heads = num_cross_heads
74
+ self.output_normalize = output_normalize
75
+ self.hidden_dim = hidden_dim
76
+ self.latent_dim = latent_dim
77
+ self.cross_dim_head = cross_dim_head
78
+
79
+ super().__init__(**kwargs)
80
+
81
+
82
+ class BidirectionalMistralConfig(MistralConfig):
83
+ model_type = BIDIR_MISTRAL_TYPE
84
+ keys_to_ignore_at_inference = ["past_key_values"]
85
+
86
+ AutoConfig.register(NVEMBED_TYPE, NVEmbedConfig)
87
+ AutoConfig.register(LATENT_ATTENTION_TYPE, LatentAttentionConfig)
88
+ AutoConfig.register(BIDIR_MISTRAL_TYPE, BidirectionalMistralConfig)
89
+
90
+ NVEmbedConfig.register_for_auto_class()
91
+ LatentAttentionConfig.register_for_auto_class()
92
+ BidirectionalMistralConfig.register_for_auto_class()
instructions.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ClimateFEVER":
3
+ {
4
+ "query": "Given a claim about climate change, retrieve documents that support or refute the claim",
5
+ "corpus": ""
6
+ },
7
+ "HotpotQA":
8
+ {
9
+ "query": "Given a multi-hop question, retrieve documents that can help answer the question",
10
+ "corpus": ""
11
+ },
12
+ "FEVER":
13
+ {
14
+ "query": "Given a claim, retrieve documents that support or refute the claim",
15
+ "corpus": ""
16
+ },
17
+ "MSMARCO":
18
+ {
19
+ "query": "Given a web search query, retrieve relevant passages that answer the query",
20
+ "corpus": ""
21
+ },
22
+ "DBPedia":
23
+ {
24
+ "query": "Given a query, retrieve relevant entity descriptions from DBPedia",
25
+ "corpus": ""
26
+ },
27
+ "NQ":
28
+ {
29
+ "query": "Given a question, retrieve passages that answer the question",
30
+ "corpus": ""
31
+ },
32
+ "QuoraRetrieval":
33
+ {
34
+ "query": "Given a question, retrieve questions that are semantically equivalent to the given question",
35
+ "corpus": "Given a question, retrieve questions that are semantically equivalent to the given question"
36
+ },
37
+ "SCIDOCS":
38
+ {
39
+ "query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper",
40
+ "corpus": ""
41
+ },
42
+ "TRECCOVID":
43
+ {
44
+ "query": "Given a query on COVID-19, retrieve documents that answer the query",
45
+ "corpus": ""
46
+ },
47
+ "Touche2020":
48
+ {
49
+ "query": "Given a question, retrieve passages that answer the question",
50
+ "corpus": ""
51
+ },
52
+ "SciFact":
53
+ {
54
+ "query": "Given a scientific claim, retrieve documents that support or refute the claim",
55
+ "corpus": ""
56
+ },
57
+ "NFCorpus":
58
+ {
59
+ "query": "Given a question, retrieve relevant documents that answer the question",
60
+ "corpus": ""
61
+ },
62
+ "ArguAna":
63
+ {
64
+ "query": "Given a claim, retrieve documents that support or refute the claim",
65
+ "corpus": ""
66
+ },
67
+ "FiQA2018":
68
+ {
69
+ "query": "Given a financial question, retrieve relevant passages that answer the query",
70
+ "corpus": ""
71
+ },
72
+ "STS":
73
+ {
74
+ "text": "Retrieve semantically similar text"
75
+ },
76
+ "SUMM":
77
+ {
78
+ "text": "Given a news summary, retrieve other semantically similar summaries"
79
+ }
80
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ce5651268058d961eaeabd4f65a5cb5d003ac7e0e34b7095658b5d5a4802f6a
3
+ size 4997761248
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbd7e85b57afbc74fab67e50a572590ce57dde8b5fa76fe7527c42189074d57d
3
+ size 4915917048
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c90f033107075c9531ed8163d4b087ce77e63596c8510821da15a4d892a85c
3
+ size 4999820296
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ff251c6b33ed89101915eb82a92575fd7d7daf9db953205f3bb4b982c4c3f5
3
+ size 788571960
model.safetensors.index.json ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15702032384
4
+ },
5
+ "weight_map": {
6
+ "embedding_model.embed_tokens.weight": "model-00001-of-00004.safetensors",
7
+ "embedding_model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
8
+ "embedding_model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
9
+ "embedding_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
10
+ "embedding_model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
11
+ "embedding_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
12
+ "embedding_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
13
+ "embedding_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
14
+ "embedding_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
15
+ "embedding_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
16
+ "embedding_model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
17
+ "embedding_model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
18
+ "embedding_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
19
+ "embedding_model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
20
+ "embedding_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "embedding_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
22
+ "embedding_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
23
+ "embedding_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
24
+ "embedding_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
25
+ "embedding_model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
26
+ "embedding_model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
27
+ "embedding_model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
28
+ "embedding_model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
29
+ "embedding_model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
30
+ "embedding_model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
31
+ "embedding_model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
32
+ "embedding_model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
33
+ "embedding_model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
34
+ "embedding_model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
35
+ "embedding_model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
36
+ "embedding_model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
37
+ "embedding_model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
38
+ "embedding_model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
39
+ "embedding_model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
40
+ "embedding_model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
41
+ "embedding_model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
+ "embedding_model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
43
+ "embedding_model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
44
+ "embedding_model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
45
+ "embedding_model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
46
+ "embedding_model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
47
+ "embedding_model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
48
+ "embedding_model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
49
+ "embedding_model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
50
+ "embedding_model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
51
+ "embedding_model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
52
+ "embedding_model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
53
+ "embedding_model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
54
+ "embedding_model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
55
+ "embedding_model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
56
+ "embedding_model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
57
+ "embedding_model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
58
+ "embedding_model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
59
+ "embedding_model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
60
+ "embedding_model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
61
+ "embedding_model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
62
+ "embedding_model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
63
+ "embedding_model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
64
+ "embedding_model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
65
+ "embedding_model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
66
+ "embedding_model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
67
+ "embedding_model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
68
+ "embedding_model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
69
+ "embedding_model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
70
+ "embedding_model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
71
+ "embedding_model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
72
+ "embedding_model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
73
+ "embedding_model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
74
+ "embedding_model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
75
+ "embedding_model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
76
+ "embedding_model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
77
+ "embedding_model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
+ "embedding_model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
79
+ "embedding_model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
80
+ "embedding_model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
81
+ "embedding_model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
82
+ "embedding_model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
83
+ "embedding_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
84
+ "embedding_model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
85
+ "embedding_model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
86
+ "embedding_model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
87
+ "embedding_model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
88
+ "embedding_model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
89
+ "embedding_model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
90
+ "embedding_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
91
+ "embedding_model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
92
+ "embedding_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
93
+ "embedding_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
94
+ "embedding_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
95
+ "embedding_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
96
+ "embedding_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
97
+ "embedding_model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
98
+ "embedding_model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
99
+ "embedding_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
100
+ "embedding_model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
101
+ "embedding_model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
102
+ "embedding_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
103
+ "embedding_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
104
+ "embedding_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
105
+ "embedding_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
106
+ "embedding_model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
107
+ "embedding_model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
108
+ "embedding_model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
109
+ "embedding_model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
110
+ "embedding_model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
111
+ "embedding_model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
112
+ "embedding_model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
113
+ "embedding_model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
114
+ "embedding_model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
115
+ "embedding_model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
116
+ "embedding_model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
117
+ "embedding_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
118
+ "embedding_model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
119
+ "embedding_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
120
+ "embedding_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
121
+ "embedding_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
122
+ "embedding_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
123
+ "embedding_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
124
+ "embedding_model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
125
+ "embedding_model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
126
+ "embedding_model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
127
+ "embedding_model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
128
+ "embedding_model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
129
+ "embedding_model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
130
+ "embedding_model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
131
+ "embedding_model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
132
+ "embedding_model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
133
+ "embedding_model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
134
+ "embedding_model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
135
+ "embedding_model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
136
+ "embedding_model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
137
+ "embedding_model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
138
+ "embedding_model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
139
+ "embedding_model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
140
+ "embedding_model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
141
+ "embedding_model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
142
+ "embedding_model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
143
+ "embedding_model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
144
+ "embedding_model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
145
+ "embedding_model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
146
+ "embedding_model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
147
+ "embedding_model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
148
+ "embedding_model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
149
+ "embedding_model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
+ "embedding_model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
151
+ "embedding_model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
152
+ "embedding_model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
153
+ "embedding_model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
154
+ "embedding_model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
155
+ "embedding_model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
156
+ "embedding_model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
157
+ "embedding_model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
158
+ "embedding_model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
159
+ "embedding_model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
160
+ "embedding_model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
161
+ "embedding_model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
162
+ "embedding_model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
163
+ "embedding_model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
164
+ "embedding_model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
165
+ "embedding_model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
166
+ "embedding_model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
167
+ "embedding_model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
168
+ "embedding_model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
169
+ "embedding_model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
170
+ "embedding_model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
171
+ "embedding_model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
172
+ "embedding_model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
173
+ "embedding_model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
174
+ "embedding_model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
175
+ "embedding_model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
176
+ "embedding_model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
177
+ "embedding_model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
178
+ "embedding_model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
179
+ "embedding_model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
180
+ "embedding_model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
181
+ "embedding_model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
182
+ "embedding_model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
183
+ "embedding_model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
184
+ "embedding_model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
185
+ "embedding_model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
+ "embedding_model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
187
+ "embedding_model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
188
+ "embedding_model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
189
+ "embedding_model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
190
+ "embedding_model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
191
+ "embedding_model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
192
+ "embedding_model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
193
+ "embedding_model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
194
+ "embedding_model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
195
+ "embedding_model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
196
+ "embedding_model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
197
+ "embedding_model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
198
+ "embedding_model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
199
+ "embedding_model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
200
+ "embedding_model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "embedding_model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
202
+ "embedding_model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
203
+ "embedding_model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
204
+ "embedding_model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
205
+ "embedding_model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
206
+ "embedding_model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
207
+ "embedding_model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
208
+ "embedding_model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
209
+ "embedding_model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
210
+ "embedding_model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
211
+ "embedding_model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
212
+ "embedding_model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
213
+ "embedding_model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
214
+ "embedding_model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
215
+ "embedding_model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
216
+ "embedding_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
217
+ "embedding_model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
218
+ "embedding_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
219
+ "embedding_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
220
+ "embedding_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
221
+ "embedding_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
222
+ "embedding_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
223
+ "embedding_model.layers.30.input_layernorm.weight": "model-00004-of-00004.safetensors",
224
+ "embedding_model.layers.30.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
225
+ "embedding_model.layers.30.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
226
+ "embedding_model.layers.30.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
227
+ "embedding_model.layers.30.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
228
+ "embedding_model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
229
+ "embedding_model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
230
+ "embedding_model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
231
+ "embedding_model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
232
+ "embedding_model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
233
+ "embedding_model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
234
+ "embedding_model.layers.31.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
235
+ "embedding_model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
236
+ "embedding_model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
237
+ "embedding_model.layers.31.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
238
+ "embedding_model.layers.31.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
239
+ "embedding_model.layers.31.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
240
+ "embedding_model.layers.31.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
241
+ "embedding_model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
242
+ "embedding_model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
243
+ "embedding_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
244
+ "embedding_model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
245
+ "embedding_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
246
+ "embedding_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
247
+ "embedding_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
248
+ "embedding_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
249
+ "embedding_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
250
+ "embedding_model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
251
+ "embedding_model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
252
+ "embedding_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
253
+ "embedding_model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
254
+ "embedding_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
255
+ "embedding_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
256
+ "embedding_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
257
+ "embedding_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
258
+ "embedding_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
259
+ "embedding_model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
260
+ "embedding_model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
261
+ "embedding_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
262
+ "embedding_model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
263
+ "embedding_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
264
+ "embedding_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
265
+ "embedding_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
266
+ "embedding_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
267
+ "embedding_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
268
+ "embedding_model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
269
+ "embedding_model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
270
+ "embedding_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
271
+ "embedding_model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
272
+ "embedding_model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
273
+ "embedding_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
274
+ "embedding_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
275
+ "embedding_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
276
+ "embedding_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
277
+ "embedding_model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
278
+ "embedding_model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
279
+ "embedding_model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
280
+ "embedding_model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
281
+ "embedding_model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
282
+ "embedding_model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
283
+ "embedding_model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
284
+ "embedding_model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
285
+ "embedding_model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
286
+ "embedding_model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
287
+ "embedding_model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
288
+ "embedding_model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
289
+ "embedding_model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
290
+ "embedding_model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
291
+ "embedding_model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
292
+ "embedding_model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
293
+ "embedding_model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
294
+ "embedding_model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
295
+ "embedding_model.norm.weight": "model-00004-of-00004.safetensors",
296
+ "latent_attention_model.cross_attend_blocks.0.fn.to_kv.weight": "model-00001-of-00004.safetensors",
297
+ "latent_attention_model.cross_attend_blocks.0.fn.to_out.weight": "model-00001-of-00004.safetensors",
298
+ "latent_attention_model.cross_attend_blocks.0.fn.to_q.weight": "model-00001-of-00004.safetensors",
299
+ "latent_attention_model.cross_attend_blocks.0.norm.bias": "model-00001-of-00004.safetensors",
300
+ "latent_attention_model.cross_attend_blocks.0.norm.weight": "model-00001-of-00004.safetensors",
301
+ "latent_attention_model.cross_attend_blocks.0.norm_context.bias": "model-00001-of-00004.safetensors",
302
+ "latent_attention_model.cross_attend_blocks.0.norm_context.weight": "model-00001-of-00004.safetensors",
303
+ "latent_attention_model.cross_attend_blocks.1.fn.net.0.bias": "model-00001-of-00004.safetensors",
304
+ "latent_attention_model.cross_attend_blocks.1.fn.net.0.weight": "model-00001-of-00004.safetensors",
305
+ "latent_attention_model.cross_attend_blocks.1.fn.net.2.bias": "model-00001-of-00004.safetensors",
306
+ "latent_attention_model.cross_attend_blocks.1.fn.net.2.weight": "model-00001-of-00004.safetensors",
307
+ "latent_attention_model.cross_attend_blocks.1.norm.bias": "model-00001-of-00004.safetensors",
308
+ "latent_attention_model.cross_attend_blocks.1.norm.weight": "model-00001-of-00004.safetensors",
309
+ "latent_attention_model.latents": "model-00001-of-00004.safetensors"
310
+ }
311
+ }
modeling_nvembed.py ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Union, Dict, Mapping, Optional, Tuple, TypedDict
2
+ import torch
3
+ import os
4
+ import json
5
+ import numpy as np
6
+ from functools import partial
7
+ from contextlib import nullcontext
8
+ from transformers import AutoModel, PreTrainedTokenizerFast, BatchEncoding, DataCollatorWithPadding
9
+ from transformers.modeling_utils import PreTrainedModel
10
+ from transformers.models.auto import AutoTokenizer
11
+ from transformers.modeling_outputs import BaseModelOutputWithPast
12
+ from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
13
+ from transformers import MistralModel, MistralConfig
14
+ from transformers.cache_utils import Cache, DynamicCache
15
+ from transformers.utils import (
16
+ add_start_docstrings_to_model_forward,
17
+ logging,
18
+ )
19
+ from einops import rearrange, repeat
20
+ from tqdm.auto import tqdm
21
+ from datasets import Dataset
22
+ from torch.utils.data import DataLoader
23
+ from .configuration_nvembed import NVEmbedConfig, LatentAttentionConfig, BidirectionalMistralConfig
24
+
25
+ logger = logging.get_logger(__name__)
26
+
27
+ class NVEmbedFeatures(TypedDict):
28
+ input_dict: torch.Tensor
29
+ attention_mask: torch.Tensor
30
+ pool_mask: torch.Tensor
31
+
32
+ class BidirectionalMistralModel(MistralModel):
33
+ config_class = BidirectionalMistralConfig
34
+
35
+ def __init__(self, config: MistralConfig):
36
+ super().__init__(config)
37
+ for layer in self.layers:
38
+ layer.self_attn.is_causal = False
39
+ self._attn_implementation = "eager"
40
+
41
+ def forward(
42
+ self,
43
+ input_ids: torch.LongTensor = None,
44
+ attention_mask: Optional[torch.Tensor] = None,
45
+ position_ids: Optional[torch.LongTensor] = None,
46
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
47
+ inputs_embeds: Optional[torch.FloatTensor] = None,
48
+ use_cache: Optional[bool] = None,
49
+ output_attentions: Optional[bool] = None,
50
+ output_hidden_states: Optional[bool] = None,
51
+ return_dict: Optional[bool] = None,
52
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
53
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
54
+ output_hidden_states = (
55
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
56
+ )
57
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
58
+
59
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
60
+
61
+ # retrieve input_ids and inputs_embeds
62
+ if input_ids is not None and inputs_embeds is not None:
63
+ raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
64
+ elif input_ids is not None:
65
+ batch_size, seq_length = input_ids.shape
66
+ elif inputs_embeds is not None:
67
+ batch_size, seq_length, _ = inputs_embeds.shape
68
+ else:
69
+ raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
70
+
71
+ if self.gradient_checkpointing and self.training:
72
+ if use_cache:
73
+ logger.warning_once(
74
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
75
+ )
76
+ use_cache = False
77
+
78
+ past_key_values_length = 0
79
+
80
+ if use_cache:
81
+ use_legacy_cache = not isinstance(past_key_values, Cache)
82
+ if use_legacy_cache:
83
+ past_key_values = DynamicCache.from_legacy_cache(past_key_values)
84
+ past_key_values_length = past_key_values.get_usable_length(seq_length)
85
+
86
+ if position_ids is None:
87
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
88
+ position_ids = torch.arange(
89
+ past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
90
+ )
91
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
92
+ else:
93
+ position_ids = position_ids.view(-1, seq_length).long()
94
+
95
+ if inputs_embeds is None:
96
+ inputs_embeds = self.embed_tokens(input_ids)
97
+
98
+ if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
99
+ is_padding_right = attention_mask[:, -1].sum().item() != batch_size
100
+ if is_padding_right:
101
+ raise ValueError(
102
+ "You are attempting to perform batched generation with padding_side='right'"
103
+ " this may lead to unexpected behaviour for Flash Attention version of Mistral. Make sure to "
104
+ " call `tokenizer.padding_side = 'left'` before tokenizing the input. "
105
+ )
106
+
107
+ if self._attn_implementation == "flash_attention_2":
108
+ # 2d mask is passed through the layers
109
+ attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
110
+ elif self._attn_implementation == "sdpa" and not output_attentions:
111
+ # output_attentions=True can not be supported when using SDPA, and we fall back on
112
+ # the manual implementation that requires a 4D causal mask in all cases.
113
+ attention_mask = _prepare_4d_attention_mask_for_sdpa(
114
+ attention_mask, inputs_embeds.dtype
115
+ )
116
+ else:
117
+ # 4d mask is passed through the layers
118
+ attention_mask = _prepare_4d_attention_mask(
119
+ attention_mask, inputs_embeds.dtype,
120
+ )
121
+
122
+ hidden_states = inputs_embeds
123
+
124
+ # decoder layers
125
+ all_hidden_states = () if output_hidden_states else None
126
+ all_self_attns = () if output_attentions else None
127
+ next_decoder_cache = None
128
+
129
+ for decoder_layer in self.layers:
130
+ if output_hidden_states:
131
+ all_hidden_states += (hidden_states,)
132
+
133
+ if self.gradient_checkpointing and self.training:
134
+ layer_outputs = self._gradient_checkpointing_func(
135
+ decoder_layer.__call__,
136
+ hidden_states,
137
+ attention_mask,
138
+ position_ids,
139
+ past_key_values,
140
+ output_attentions,
141
+ use_cache,
142
+ )
143
+ else:
144
+ layer_outputs = decoder_layer(
145
+ hidden_states,
146
+ attention_mask=attention_mask,
147
+ position_ids=position_ids,
148
+ past_key_value=past_key_values,
149
+ output_attentions=output_attentions,
150
+ use_cache=use_cache,
151
+ )
152
+
153
+ hidden_states = layer_outputs[0]
154
+
155
+ if use_cache:
156
+ next_decoder_cache = layer_outputs[2 if output_attentions else 1]
157
+
158
+ if output_attentions:
159
+ all_self_attns += (layer_outputs[1],)
160
+
161
+ hidden_states = self.norm(hidden_states)
162
+
163
+ # add hidden states from the last decoder layer
164
+ if output_hidden_states:
165
+ all_hidden_states += (hidden_states,)
166
+
167
+ next_cache = None
168
+ if use_cache:
169
+ next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
170
+
171
+ if not return_dict:
172
+ return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
173
+ return BaseModelOutputWithPast(
174
+ last_hidden_state=hidden_states,
175
+ past_key_values=next_cache,
176
+ hidden_states=all_hidden_states,
177
+ attentions=all_self_attns,
178
+ )
179
+
180
+ def _move_to_device(maybe_tensor, device: torch.device):
181
+ if torch.is_tensor(maybe_tensor):
182
+ return maybe_tensor.to(device, non_blocking=device.type == "cuda")
183
+ elif isinstance(maybe_tensor, dict):
184
+ return {key: _move_to_device(value, device) for key, value in maybe_tensor.items()}
185
+ elif isinstance(maybe_tensor, list):
186
+ return [_move_to_device(x, device) for x in maybe_tensor]
187
+ elif isinstance(maybe_tensor, tuple):
188
+ return tuple([_move_to_device(x, device) for x in maybe_tensor])
189
+ elif isinstance(maybe_tensor, Mapping):
190
+ return type(maybe_tensor)({k: _move_to_device(v, device) for k, v in maybe_tensor.items()})
191
+ else:
192
+ return maybe_tensor
193
+
194
+ def move_to_device(sample, device: torch.device):
195
+ if device.type == "cpu":
196
+ return sample
197
+
198
+ if len(sample) == 0:
199
+ return {}
200
+ return _move_to_device(sample, device)
201
+
202
+
203
+ def input_transform_func(
204
+ tokenizer: PreTrainedTokenizerFast,
205
+ examples: Dict[str, List],
206
+ always_add_eos: bool,
207
+ max_length: int,
208
+ instruction: str,
209
+ ) -> BatchEncoding:
210
+ if always_add_eos:
211
+ examples['input_texts'] = [instruction + input_example + tokenizer.eos_token for input_example in examples['input_texts']]
212
+ batch_dict = tokenizer(
213
+ examples['input_texts'],
214
+ max_length=max_length,
215
+ padding=True,
216
+ return_token_type_ids=False,
217
+ return_tensors="pt",
218
+ truncation=True)
219
+ return batch_dict
220
+
221
+
222
+ class PreNorm(torch.nn.Module):
223
+ def __init__(self, dim, fn, context_dim = None):
224
+ super().__init__()
225
+ self.fn = fn
226
+ self.norm = torch.nn.LayerNorm(dim)
227
+ self.norm_context = torch.nn.LayerNorm(context_dim) if exists(context_dim) else None
228
+
229
+ def forward(self, x, **kwargs):
230
+ x = self.norm(x)
231
+ if exists(self.norm_context):
232
+ context = kwargs['context']
233
+ normed_context = self.norm_context(context)
234
+ kwargs.update(context = normed_context)
235
+ return self.fn(x, **kwargs)
236
+
237
+ class GEGLU(torch.nn.Module):
238
+ def forward(self, x):
239
+ x, gates = x.chunk(2, dim = -1)
240
+ return x * torch.nn.functional.gelu(gates)
241
+
242
+ class FeedForward(torch.nn.Module):
243
+ def __init__(self, dim, mult = 4):
244
+ super().__init__()
245
+ self.net = torch.nn.Sequential(torch.nn.Linear(dim, dim * mult * 2),
246
+ GEGLU(),
247
+ torch.nn.Linear(dim * mult, dim))
248
+
249
+ def forward(self, x):
250
+ return self.net(x)
251
+
252
+ def exists(val):
253
+ return val is not None
254
+
255
+ def default(val, d):
256
+ return val if exists(val) else d
257
+
258
+
259
+ class Attention(torch.nn.Module):
260
+ def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64):
261
+ super().__init__()
262
+ inner_dim = dim_head * heads
263
+ context_dim = default(context_dim, query_dim)
264
+ self.scale = dim_head ** -0.5
265
+ self.heads = heads
266
+
267
+ self.to_q = torch.nn.Linear(query_dim, inner_dim, bias = False)
268
+ self.to_kv = torch.nn.Linear(context_dim, inner_dim * 2, bias = False)
269
+ self.to_out = torch.nn.Linear(inner_dim, query_dim, bias = False)
270
+
271
+ def forward(self, x, context = None, mask = None):
272
+ h = self.heads
273
+ q = self.to_q(x)
274
+ context = default(context, x)
275
+ k, v = self.to_kv(context).chunk(2, dim = -1)
276
+ q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
277
+ with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_mem_efficient=True):
278
+ out = torch.nn.functional.scaled_dot_product_attention(q, k, v)
279
+ out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
280
+ return self.to_out(out)
281
+
282
+
283
+ class LatentAttentionModel(PreTrainedModel):
284
+ config_class = LatentAttentionConfig
285
+
286
+ def __init__(self, config: LatentAttentionConfig):
287
+ super().__init__(config)
288
+ ## cross-attention block
289
+ num_latents, latent_dim, cross_heads, cross_dim_head = config.num_latents_value, config.latent_dim, config.num_cross_heads, config.cross_dim_head
290
+ dim = config.hidden_dim
291
+ # init latent_attention and latents
292
+ self.cross_attend_blocks = torch.nn.ModuleList([
293
+ PreNorm(latent_dim, Attention(latent_dim, dim, heads = cross_heads, dim_head = cross_dim_head),
294
+ context_dim = dim),
295
+ PreNorm(latent_dim, FeedForward(latent_dim)),
296
+ ])
297
+ self.output_normalize = config.output_normalize
298
+ self.register_parameter("latents", torch.nn.Parameter(torch.randn(num_latents, latent_dim)))
299
+
300
+ @property
301
+ def all_tied_weights_keys(self):
302
+ """Compatibility property for transformers >= 5.0.0.
303
+
304
+ The transformers library changed from using _tied_weights_keys (a class attribute)
305
+ to all_tied_weights_keys (a property that returns a dict). This property provides
306
+ backward compatibility by converting the old format to the new format.
307
+
308
+ Returns:
309
+ dict: A dictionary mapping tied weight keys to themselves.
310
+ """
311
+ if hasattr(self, '_tied_weights_keys') and self._tied_weights_keys:
312
+ return {key: key for key in self._tied_weights_keys}
313
+ return {}
314
+
315
+
316
+ def forward(self, hiddens, attention_mask: torch.Tensor=None):
317
+ ## cross-attention block
318
+ cross_attn, cross_ff = self.cross_attend_blocks
319
+ b, *_, device = *hiddens.shape, hiddens.device
320
+ x = repeat(self.latents, 'n d -> b n d', b = b)
321
+ hiddens = cross_attn(hiddens, context = x, mask = None) + hiddens
322
+ hiddens = cross_ff(hiddens) + hiddens
323
+ if attention_mask !=None:
324
+ s = torch.sum(hiddens * attention_mask.unsqueeze(-1).float(), dim=1)
325
+ d = attention_mask.sum(dim=1, keepdim=True).float()
326
+ hiddens = s / d
327
+ if self.output_normalize:
328
+ hiddens = torch.nn.functional.normalize(hiddens, p=2, dim=-1)
329
+ return hiddens
330
+
331
+ class NVEmbedModel(PreTrainedModel):
332
+ config_class = NVEmbedConfig
333
+ _no_split_modules = ["MistralDecoderLayer", "LatentAttentionModel"]
334
+
335
+ def __init__(self, config: NVEmbedConfig):
336
+ super().__init__(config)
337
+ self.latent_attention_model = AutoModel.from_config(config.latent_attention_config)
338
+ self.embedding_model = AutoModel.from_config(
339
+ config.text_config,
340
+ ) if config.text_config is not None else None
341
+ self.tokenizer = AutoTokenizer.from_pretrained(config.text_config._name_or_path) if config.text_config is not None else None
342
+ self.padding_side = config.padding_side
343
+ self.is_mask_instruction = config.is_mask_instruction
344
+ self.add_eos = config.add_eos
345
+ self.mask_type = config.mask_type
346
+ if config.add_pad_token and self.tokenizer is not None:
347
+ self.add_pad_token()
348
+
349
+ def add_pad_token(self):
350
+ self.tokenizer.pad_token = self.tokenizer.eos_token
351
+ self.tokenizer.padding_side = self.padding_side
352
+
353
+ @property
354
+ def all_tied_weights_keys(self):
355
+ """Compatibility property for transformers >= 5.0.0.
356
+
357
+ The transformers library changed from using _tied_weights_keys (a class attribute)
358
+ to all_tied_weights_keys (a property that returns a dict). This property provides
359
+ backward compatibility by converting the old format to the new format.
360
+
361
+ Returns:
362
+ dict: A dictionary mapping tied weight keys to themselves.
363
+ """
364
+ if hasattr(self, '_tied_weights_keys') and self._tied_weights_keys:
365
+ return {key: key for key in self._tied_weights_keys}
366
+ return {}
367
+
368
+ def prepare_kwargs_from_batch(self, batch_dict: dict, instruction_lens: int, device: torch.device):
369
+ batch_dict = move_to_device(batch_dict, device)
370
+ attention_mask = batch_dict['attention_mask'].clone() if 'attention_mask' in batch_dict else None
371
+ if (attention_mask is not None and
372
+ self.padding_side == "right" and
373
+ self.is_mask_instruction == True and
374
+ instruction_lens > 0):
375
+ # Mask out the instruction tokens for mean-pooling
376
+ attention_mask[:, :instruction_lens] = 0
377
+ features: NVEmbedFeatures = {
378
+ 'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
379
+ 'attention_mask': batch_dict['attention_mask'],
380
+ 'pool_mask': attention_mask,
381
+ }
382
+ return features
383
+
384
+ @torch.no_grad()
385
+ def _do_encode(self,
386
+ prompts: List[str],
387
+ batch_size: int=1,
388
+ instruction: str="",
389
+ max_length: int=4096,
390
+ num_workers: int=32,
391
+ **kwargs
392
+ ) -> Union[np.ndarray, torch.FloatTensor]:
393
+ dataset: Dataset = Dataset.from_dict({'input_texts': prompts})
394
+ dataset.set_transform(partial(input_transform_func,
395
+ self.tokenizer,
396
+ always_add_eos=True,
397
+ max_length=max_length,
398
+ instruction=instruction))
399
+
400
+ data_collator = DataCollatorWithPadding(self.tokenizer)
401
+ data_loader = DataLoader(
402
+ dataset,
403
+ batch_size=batch_size,
404
+ shuffle=False,
405
+ drop_last=False,
406
+ num_workers=num_workers,
407
+ collate_fn=data_collator,
408
+ pin_memory=True)
409
+
410
+ if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
411
+ instruction_lens = len(self.tokenizer.tokenize(instruction))
412
+ else:
413
+ instruction_lens = 0
414
+
415
+ encoded_embeds = []
416
+ device = next(self.embedding_model.parameters()).device
417
+ for batch_dict in tqdm(data_loader, desc='encoding', mininterval=10):
418
+ features = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
419
+ embeds=self(**features)["sentence_embeddings"].squeeze(1)
420
+ encoded_embeds.append(embeds)
421
+ encoded_embeds = torch.cat(encoded_embeds, axis=0)
422
+ if "return_numpy" in kwargs and kwargs.get("return_numpy"):
423
+ encoded_embeds = encoded_embeds.cpu().detach().numpy()
424
+ return encoded_embeds
425
+
426
+ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, pool_mask: Optional[torch.Tensor]=None, return_dict: bool=True):
427
+ autocast_ctx = torch.autocast if torch.cuda.is_available() else nullcontext
428
+ with autocast_ctx("cuda"):
429
+ ## decoder only layer
430
+ outputs = self.embedding_model(
431
+ input_ids=input_ids,
432
+ attention_mask=attention_mask,
433
+ )
434
+ ## latent attention layer
435
+ embeds = self.latent_attention_model(
436
+ outputs.last_hidden_state,
437
+ pool_mask,
438
+ )
439
+ if not return_dict:
440
+ return (embeds,)
441
+ return {"sentence_embeddings": embeds}
442
+
443
+
444
+ @torch.no_grad()
445
+ def encode(self, prompts: List[str], instruction: str="", max_length: int=4096, **kwargs):
446
+ if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
447
+ instruction_lens = len(self.tokenizer.tokenize(instruction))
448
+ else:
449
+ instruction_lens = 0
450
+
451
+ device = next(self.embedding_model.parameters()).device
452
+ batch_dict = input_transform_func(self.tokenizer,
453
+ {"input_texts": [prompt for prompt in prompts]},
454
+ always_add_eos=True,
455
+ max_length=max_length,
456
+ instruction=instruction)
457
+
458
+ features: NVEmbedFeatures = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
459
+ return self(**features)["sentence_embeddings"].squeeze(1)
460
+
461
+
462
+ ## AutoModel Register
463
+ AutoModel.register(NVEmbedConfig, NVEmbedModel)
464
+ AutoModel.register(LatentAttentionConfig, LatentAttentionModel)
465
+ AutoModel.register(BidirectionalMistralConfig, BidirectionalMistralModel)
466
+
467
+ ## Register for auto class
468
+ NVEmbedModel.register_for_auto_class("AutoModel")
469
+ LatentAttentionModel.register_for_auto_class("AutoModel")
470
+ BidirectionalMistralModel.register_for_auto_class("AutoModel")
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 32768,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }