kevinwang676 commited on
Commit
e4b8a42
·
1 Parent(s): eb04563

Delete checkpoint-200

Browse files
checkpoint-200/config.json DELETED
@@ -1,47 +0,0 @@
1
- {
2
- "_name_or_path": "chatglm2-6b",
3
- "add_bias_linear": false,
4
- "add_qkv_bias": true,
5
- "apply_query_key_layer_scaling": true,
6
- "apply_residual_connection_post_layernorm": false,
7
- "architectures": [
8
- "ChatGLMForConditionalGeneration"
9
- ],
10
- "attention_dropout": 0.0,
11
- "attention_softmax_in_fp32": true,
12
- "auto_map": {
13
- "AutoConfig": "configuration_chatglm.ChatGLMConfig",
14
- "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
15
- "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
16
- "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
17
- "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
18
- },
19
- "bias_dropout_fusion": true,
20
- "classifier_dropout": null,
21
- "eos_token_id": 2,
22
- "ffn_hidden_size": 13696,
23
- "fp32_residual_connection": false,
24
- "hidden_dropout": 0.0,
25
- "hidden_size": 4096,
26
- "kv_channels": 128,
27
- "layernorm_epsilon": 1e-05,
28
- "model_type": "chatglm",
29
- "multi_query_attention": true,
30
- "multi_query_group_num": 2,
31
- "num_attention_heads": 32,
32
- "num_layers": 28,
33
- "original_rope": true,
34
- "pad_token_id": 0,
35
- "padded_vocab_size": 65024,
36
- "post_layer_norm": true,
37
- "pre_seq_len": 128,
38
- "prefix_projection": false,
39
- "quantization_bit": 0,
40
- "rmsnorm": true,
41
- "seq_length": 32768,
42
- "tie_word_embeddings": false,
43
- "torch_dtype": "float16",
44
- "transformers_version": "4.30.2",
45
- "use_cache": true,
46
- "vocab_size": 65024
47
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-200/generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "eos_token_id": 2,
4
- "pad_token_id": 0,
5
- "transformers_version": "4.30.2"
6
- }
 
 
 
 
 
 
 
checkpoint-200/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:933b7b82708ba6a23d949d7b05fcb8644b9ab8b06ecf625f35c30aeba85b3ba2
3
- size 14681892
 
 
 
 
checkpoint-200/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:54e939cf8e3ee1c58646595ea0e7748202c1e1b85f82aeb536a388bbe8d36e86
3
- size 7341306
 
 
 
 
checkpoint-200/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:51523eedac643c13a3a71297ac9e347331249d1d4cc19f9738a182bae3585fb2
3
- size 14244
 
 
 
 
checkpoint-200/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3db1c4819d8e7a76f34cf5f8f4aa0bf9497992cd0862dbd9ba3fc68b9886b79e
3
- size 1064
 
 
 
 
checkpoint-200/special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {}
 
 
checkpoint-200/tokenizer.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
3
- size 1018370
 
 
 
 
checkpoint-200/tokenizer_config.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "auto_map": {
3
- "AutoTokenizer": [
4
- "tokenization_chatglm.ChatGLMTokenizer",
5
- null
6
- ]
7
- },
8
- "clean_up_tokenization_spaces": false,
9
- "do_lower_case": false,
10
- "model_max_length": 1000000000000000019884624838656,
11
- "padding_side": "left",
12
- "remove_space": false,
13
- "tokenizer_class": "ChatGLMTokenizer"
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-200/trainer_state.json DELETED
@@ -1,136 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 32.6530612244898,
5
- "global_step": 200,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 1.63,
12
- "learning_rate": 0.009833333333333333,
13
- "loss": 2.53,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 3.27,
18
- "learning_rate": 0.009666666666666667,
19
- "loss": 2.0016,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 4.9,
24
- "learning_rate": 0.0095,
25
- "loss": 1.7775,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 6.53,
30
- "learning_rate": 0.009333333333333334,
31
- "loss": 1.6576,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 8.16,
36
- "learning_rate": 0.009166666666666667,
37
- "loss": 1.5048,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 9.8,
42
- "learning_rate": 0.009000000000000001,
43
- "loss": 1.3572,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 11.43,
48
- "learning_rate": 0.008833333333333334,
49
- "loss": 1.2067,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 13.06,
54
- "learning_rate": 0.008666666666666668,
55
- "loss": 1.0777,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 14.69,
60
- "learning_rate": 0.0085,
61
- "loss": 0.9188,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 16.33,
66
- "learning_rate": 0.008333333333333333,
67
- "loss": 0.7241,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 17.96,
72
- "learning_rate": 0.008166666666666666,
73
- "loss": 0.5775,
74
- "step": 110
75
- },
76
- {
77
- "epoch": 19.59,
78
- "learning_rate": 0.008,
79
- "loss": 0.4235,
80
- "step": 120
81
- },
82
- {
83
- "epoch": 21.22,
84
- "learning_rate": 0.007833333333333333,
85
- "loss": 0.3182,
86
- "step": 130
87
- },
88
- {
89
- "epoch": 22.86,
90
- "learning_rate": 0.007666666666666667,
91
- "loss": 0.2155,
92
- "step": 140
93
- },
94
- {
95
- "epoch": 24.49,
96
- "learning_rate": 0.0075,
97
- "loss": 0.1633,
98
- "step": 150
99
- },
100
- {
101
- "epoch": 26.12,
102
- "learning_rate": 0.007333333333333333,
103
- "loss": 0.1234,
104
- "step": 160
105
- },
106
- {
107
- "epoch": 27.76,
108
- "learning_rate": 0.007166666666666667,
109
- "loss": 0.0911,
110
- "step": 170
111
- },
112
- {
113
- "epoch": 29.39,
114
- "learning_rate": 0.006999999999999999,
115
- "loss": 0.0738,
116
- "step": 180
117
- },
118
- {
119
- "epoch": 31.02,
120
- "learning_rate": 0.006833333333333334,
121
- "loss": 0.0673,
122
- "step": 190
123
- },
124
- {
125
- "epoch": 32.65,
126
- "learning_rate": 0.006666666666666666,
127
- "loss": 0.0544,
128
- "step": 200
129
- }
130
- ],
131
- "max_steps": 600,
132
- "num_train_epochs": 100,
133
- "total_flos": 2.3514963125469184e+17,
134
- "trial_name": null,
135
- "trial_params": null
136
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-200/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:df0a343e1f2ccb38a19082ba999546089030c0e15418471a24d346cbb68fa7af
3
- size 4472