commit

Browse files

Files changed (4) hide show

README.md +11 -29
adapter_config.json +8 -5
adapter_model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5047
 ## Model description
@@ -46,38 +46,20 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.03
-- num_epochs: 6
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 0.984         | 0.2256 | 50   | 0.9917          |
-| 0.6262        | 0.4512 | 100  | 0.6304          |
-| 0.5699        | 0.6768 | 150  | 0.5770          |
-| 0.6143        | 0.9024 | 200  | 0.5558          |
-| 0.5393        | 1.1280 | 250  | 0.5443          |
-| 0.5278        | 1.3536 | 300  | 0.5361          |
-| 0.4969        | 1.5792 | 350  | 0.5305          |
-| 0.5442        | 1.8049 | 400  | 0.5259          |
-| 0.5304        | 2.0305 | 450  | 0.5225          |
-| 0.4748        | 2.2561 | 500  | 0.5194          |
-| 0.5185        | 2.4817 | 550  | 0.5172          |
-| 0.5234        | 2.7073 | 600  | 0.5150          |
-| 0.5334        | 2.9329 | 650  | 0.5132          |
-| 0.5221        | 3.1585 | 700  | 0.5119          |
-| 0.5067        | 3.3841 | 750  | 0.5104          |
-| 0.4841        | 3.6097 | 800  | 0.5095          |
-| 0.5129        | 3.8353 | 850  | 0.5085          |
-| 0.4969        | 4.0609 | 900  | 0.5077          |
-| 0.4797        | 4.2865 | 950  | 0.5069          |
-| 0.4893        | 4.5121 | 1000 | 0.5063          |
-| 0.5203        | 4.7377 | 1050 | 0.5060          |
-| 0.4902        | 4.9633 | 1100 | 0.5055          |
-| 0.4904        | 5.1889 | 1150 | 0.5054          |
-| 0.508         | 5.4146 | 1200 | 0.5049          |
-| 0.5166        | 5.6402 | 1250 | 0.5048          |
-| 0.4921        | 5.8658 | 1300 | 0.5047          |
 ### Framework versions
@@ -85,5 +67,5 @@ The following hyperparameters were used during training:
 - PEFT 0.12.0
 - Transformers 4.44.2
 - Pytorch 2.4.1+cu121
-- Datasets 2.21.0
 - Tokenizers 0.19.1

 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5080
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.03
+- num_epochs: 2
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 0.6813        | 0.2256 | 50   | 0.6682          |
+| 0.5534        | 0.4512 | 100  | 0.5627          |
+| 0.5291        | 0.6768 | 150  | 0.5372          |
+| 0.575         | 0.9024 | 200  | 0.5245          |
+| 0.5066        | 1.1280 | 250  | 0.5170          |
+| 0.4949        | 1.3536 | 300  | 0.5124          |
+| 0.4666        | 1.5792 | 350  | 0.5095          |
+| 0.5142        | 1.8049 | 400  | 0.5080          |
 ### Framework versions
 - PEFT 0.12.0
 - Transformers 4.44.2
 - Pytorch 2.4.1+cu121
+- Datasets 3.0.0
 - Tokenizers 0.19.1

adapter_config.json CHANGED Viewed

@@ -14,19 +14,22 @@
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
-  "modules_to_save": null,
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
-    "down_proj",
     "v_proj",
-    "gate_proj",
     "o_proj",
-    "q_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "t_proj"
+  ],
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
+    "k_proj",
     "v_proj",
     "o_proj",
+    "down_proj",
+    "gate_up_proj",
+    "qkv_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e04e6eaf0ea35de37d1ee7ed7f2b5e3f6316acaa645d793d09e4639fd851824
-size 35668592

 version https://git-lfs.github.com/spec/v1
+oid sha256:4849fc3938bde241bc35daf2b5347e3c3d68f18a00297cf4e51c4690492d5f10
+size 138458960

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39380fc7f097c15e4219c91d389293f8e5475653951f3ca7b9e28d4ac5921397
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfc23f354b1e7595a84cbf3a84c89714a219b9c9b4f9a6b1467fd4ce7a13bd9d
 size 5496