kevinwang676's picture
Upload folder using huggingface_hub
c467392
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 37.64705882352941,
"eval_steps": 500,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"learning_rate": 0.009985714285714285,
"loss": 2.6971,
"step": 1
},
{
"epoch": 0.19,
"learning_rate": 0.009971428571428572,
"loss": 2.3927,
"step": 2
},
{
"epoch": 0.28,
"learning_rate": 0.009957142857142857,
"loss": 2.2539,
"step": 3
},
{
"epoch": 0.38,
"learning_rate": 0.009942857142857144,
"loss": 2.1408,
"step": 4
},
{
"epoch": 0.47,
"learning_rate": 0.009928571428571429,
"loss": 2.2672,
"step": 5
},
{
"epoch": 0.56,
"learning_rate": 0.009914285714285714,
"loss": 1.6433,
"step": 6
},
{
"epoch": 0.66,
"learning_rate": 0.0099,
"loss": 2.1405,
"step": 7
},
{
"epoch": 0.75,
"learning_rate": 0.009885714285714286,
"loss": 2.1464,
"step": 8
},
{
"epoch": 0.85,
"learning_rate": 0.009871428571428571,
"loss": 1.8498,
"step": 9
},
{
"epoch": 0.94,
"learning_rate": 0.009857142857142858,
"loss": 1.6896,
"step": 10
},
{
"epoch": 1.04,
"learning_rate": 0.009842857142857143,
"loss": 2.1932,
"step": 11
},
{
"epoch": 1.13,
"learning_rate": 0.00982857142857143,
"loss": 1.8236,
"step": 12
},
{
"epoch": 1.22,
"learning_rate": 0.009814285714285715,
"loss": 1.735,
"step": 13
},
{
"epoch": 1.32,
"learning_rate": 0.0098,
"loss": 1.7488,
"step": 14
},
{
"epoch": 1.41,
"learning_rate": 0.009785714285714285,
"loss": 1.8336,
"step": 15
},
{
"epoch": 1.51,
"learning_rate": 0.009771428571428572,
"loss": 1.9438,
"step": 16
},
{
"epoch": 1.6,
"learning_rate": 0.009757142857142858,
"loss": 1.7178,
"step": 17
},
{
"epoch": 1.69,
"learning_rate": 0.009742857142857143,
"loss": 1.5714,
"step": 18
},
{
"epoch": 1.79,
"learning_rate": 0.009728571428571428,
"loss": 1.537,
"step": 19
},
{
"epoch": 1.88,
"learning_rate": 0.009714285714285715,
"loss": 1.6764,
"step": 20
},
{
"epoch": 1.98,
"learning_rate": 0.0097,
"loss": 1.8919,
"step": 21
},
{
"epoch": 2.07,
"learning_rate": 0.009685714285714285,
"loss": 1.346,
"step": 22
},
{
"epoch": 2.16,
"learning_rate": 0.009671428571428572,
"loss": 1.5036,
"step": 23
},
{
"epoch": 2.26,
"learning_rate": 0.009657142857142857,
"loss": 1.6788,
"step": 24
},
{
"epoch": 2.35,
"learning_rate": 0.009642857142857144,
"loss": 1.6667,
"step": 25
},
{
"epoch": 2.45,
"learning_rate": 0.009628571428571429,
"loss": 1.7153,
"step": 26
},
{
"epoch": 2.54,
"learning_rate": 0.009614285714285714,
"loss": 1.601,
"step": 27
},
{
"epoch": 2.64,
"learning_rate": 0.0096,
"loss": 1.3002,
"step": 28
},
{
"epoch": 2.73,
"learning_rate": 0.009585714285714286,
"loss": 1.3294,
"step": 29
},
{
"epoch": 2.82,
"learning_rate": 0.009571428571428573,
"loss": 1.7477,
"step": 30
},
{
"epoch": 2.92,
"learning_rate": 0.009557142857142858,
"loss": 1.7961,
"step": 31
},
{
"epoch": 3.01,
"learning_rate": 0.009542857142857143,
"loss": 1.4954,
"step": 32
},
{
"epoch": 3.11,
"learning_rate": 0.009528571428571428,
"loss": 1.6452,
"step": 33
},
{
"epoch": 3.2,
"learning_rate": 0.009514285714285715,
"loss": 1.3528,
"step": 34
},
{
"epoch": 3.29,
"learning_rate": 0.0095,
"loss": 1.4811,
"step": 35
},
{
"epoch": 3.39,
"learning_rate": 0.009485714285714287,
"loss": 1.4738,
"step": 36
},
{
"epoch": 3.48,
"learning_rate": 0.009471428571428572,
"loss": 1.174,
"step": 37
},
{
"epoch": 3.58,
"learning_rate": 0.009457142857142857,
"loss": 1.2346,
"step": 38
},
{
"epoch": 3.67,
"learning_rate": 0.009442857142857143,
"loss": 1.5327,
"step": 39
},
{
"epoch": 3.76,
"learning_rate": 0.009428571428571429,
"loss": 1.5249,
"step": 40
},
{
"epoch": 3.86,
"learning_rate": 0.009414285714285714,
"loss": 1.5086,
"step": 41
},
{
"epoch": 3.95,
"learning_rate": 0.0094,
"loss": 1.8425,
"step": 42
},
{
"epoch": 4.05,
"learning_rate": 0.009385714285714287,
"loss": 1.1943,
"step": 43
},
{
"epoch": 4.14,
"learning_rate": 0.009371428571428572,
"loss": 1.6835,
"step": 44
},
{
"epoch": 4.24,
"learning_rate": 0.009357142857142857,
"loss": 1.75,
"step": 45
},
{
"epoch": 4.33,
"learning_rate": 0.009342857142857142,
"loss": 1.2561,
"step": 46
},
{
"epoch": 4.42,
"learning_rate": 0.009328571428571429,
"loss": 1.3784,
"step": 47
},
{
"epoch": 4.52,
"learning_rate": 0.009314285714285714,
"loss": 1.2538,
"step": 48
},
{
"epoch": 4.61,
"learning_rate": 0.009300000000000001,
"loss": 1.4429,
"step": 49
},
{
"epoch": 4.71,
"learning_rate": 0.009285714285714286,
"loss": 1.3687,
"step": 50
},
{
"epoch": 4.8,
"learning_rate": 0.009271428571428571,
"loss": 1.1511,
"step": 51
},
{
"epoch": 4.89,
"learning_rate": 0.009257142857142858,
"loss": 1.181,
"step": 52
},
{
"epoch": 4.99,
"learning_rate": 0.009242857142857143,
"loss": 1.1753,
"step": 53
},
{
"epoch": 5.08,
"learning_rate": 0.009228571428571428,
"loss": 1.1562,
"step": 54
},
{
"epoch": 5.18,
"learning_rate": 0.009214285714285715,
"loss": 1.2936,
"step": 55
},
{
"epoch": 5.27,
"learning_rate": 0.0092,
"loss": 1.3591,
"step": 56
},
{
"epoch": 5.36,
"learning_rate": 0.009185714285714287,
"loss": 1.1376,
"step": 57
},
{
"epoch": 5.46,
"learning_rate": 0.009171428571428572,
"loss": 1.372,
"step": 58
},
{
"epoch": 5.55,
"learning_rate": 0.009157142857142857,
"loss": 1.5141,
"step": 59
},
{
"epoch": 5.65,
"learning_rate": 0.009142857142857144,
"loss": 1.2087,
"step": 60
},
{
"epoch": 5.74,
"learning_rate": 0.009128571428571429,
"loss": 1.136,
"step": 61
},
{
"epoch": 5.84,
"learning_rate": 0.009114285714285715,
"loss": 1.2948,
"step": 62
},
{
"epoch": 5.93,
"learning_rate": 0.0091,
"loss": 1.0592,
"step": 63
},
{
"epoch": 6.02,
"learning_rate": 0.009085714285714286,
"loss": 1.2321,
"step": 64
},
{
"epoch": 6.12,
"learning_rate": 0.009071428571428572,
"loss": 1.0827,
"step": 65
},
{
"epoch": 6.21,
"learning_rate": 0.009057142857142857,
"loss": 1.1136,
"step": 66
},
{
"epoch": 6.31,
"learning_rate": 0.009042857142857142,
"loss": 1.475,
"step": 67
},
{
"epoch": 6.4,
"learning_rate": 0.009028571428571427,
"loss": 1.1316,
"step": 68
},
{
"epoch": 6.49,
"learning_rate": 0.009014285714285714,
"loss": 1.1688,
"step": 69
},
{
"epoch": 6.59,
"learning_rate": 0.009000000000000001,
"loss": 1.0882,
"step": 70
},
{
"epoch": 6.68,
"learning_rate": 0.008985714285714286,
"loss": 1.1085,
"step": 71
},
{
"epoch": 6.78,
"learning_rate": 0.008971428571428571,
"loss": 1.2029,
"step": 72
},
{
"epoch": 6.87,
"learning_rate": 0.008957142857142856,
"loss": 1.098,
"step": 73
},
{
"epoch": 6.96,
"learning_rate": 0.008942857142857143,
"loss": 1.219,
"step": 74
},
{
"epoch": 7.06,
"learning_rate": 0.00892857142857143,
"loss": 1.0092,
"step": 75
},
{
"epoch": 7.15,
"learning_rate": 0.008914285714285715,
"loss": 1.0112,
"step": 76
},
{
"epoch": 7.25,
"learning_rate": 0.0089,
"loss": 1.1481,
"step": 77
},
{
"epoch": 7.34,
"learning_rate": 0.008885714285714287,
"loss": 0.9873,
"step": 78
},
{
"epoch": 7.44,
"learning_rate": 0.008871428571428572,
"loss": 1.0586,
"step": 79
},
{
"epoch": 7.53,
"learning_rate": 0.008857142857142857,
"loss": 1.1177,
"step": 80
},
{
"epoch": 7.62,
"learning_rate": 0.008842857142857142,
"loss": 0.7814,
"step": 81
},
{
"epoch": 7.72,
"learning_rate": 0.008828571428571429,
"loss": 1.2043,
"step": 82
},
{
"epoch": 7.81,
"learning_rate": 0.008814285714285715,
"loss": 1.0062,
"step": 83
},
{
"epoch": 7.91,
"learning_rate": 0.0088,
"loss": 1.0831,
"step": 84
},
{
"epoch": 8.0,
"learning_rate": 0.008785714285714286,
"loss": 0.9554,
"step": 85
},
{
"epoch": 8.09,
"learning_rate": 0.00877142857142857,
"loss": 1.1674,
"step": 86
},
{
"epoch": 8.19,
"learning_rate": 0.008757142857142857,
"loss": 0.8226,
"step": 87
},
{
"epoch": 8.28,
"learning_rate": 0.008742857142857144,
"loss": 0.9166,
"step": 88
},
{
"epoch": 8.38,
"learning_rate": 0.00872857142857143,
"loss": 0.734,
"step": 89
},
{
"epoch": 8.47,
"learning_rate": 0.008714285714285714,
"loss": 0.8641,
"step": 90
},
{
"epoch": 8.56,
"learning_rate": 0.0087,
"loss": 0.9517,
"step": 91
},
{
"epoch": 8.66,
"learning_rate": 0.008685714285714286,
"loss": 0.9995,
"step": 92
},
{
"epoch": 8.75,
"learning_rate": 0.008671428571428571,
"loss": 0.763,
"step": 93
},
{
"epoch": 8.85,
"learning_rate": 0.008657142857142858,
"loss": 1.0712,
"step": 94
},
{
"epoch": 8.94,
"learning_rate": 0.008642857142857143,
"loss": 1.1111,
"step": 95
},
{
"epoch": 9.04,
"learning_rate": 0.008628571428571428,
"loss": 0.9626,
"step": 96
},
{
"epoch": 9.13,
"learning_rate": 0.008614285714285715,
"loss": 0.6385,
"step": 97
},
{
"epoch": 9.22,
"learning_rate": 0.0086,
"loss": 0.8147,
"step": 98
},
{
"epoch": 9.32,
"learning_rate": 0.008585714285714285,
"loss": 0.8109,
"step": 99
},
{
"epoch": 9.41,
"learning_rate": 0.008571428571428572,
"loss": 1.0953,
"step": 100
},
{
"epoch": 9.51,
"learning_rate": 0.008557142857142859,
"loss": 0.7104,
"step": 101
},
{
"epoch": 9.6,
"learning_rate": 0.008542857142857144,
"loss": 0.9672,
"step": 102
},
{
"epoch": 9.69,
"learning_rate": 0.008528571428571429,
"loss": 0.7593,
"step": 103
},
{
"epoch": 9.79,
"learning_rate": 0.008514285714285714,
"loss": 1.0186,
"step": 104
},
{
"epoch": 9.88,
"learning_rate": 0.0085,
"loss": 0.7898,
"step": 105
},
{
"epoch": 9.98,
"learning_rate": 0.008485714285714286,
"loss": 0.7392,
"step": 106
},
{
"epoch": 10.07,
"learning_rate": 0.008471428571428572,
"loss": 0.7295,
"step": 107
},
{
"epoch": 10.16,
"learning_rate": 0.008457142857142858,
"loss": 0.7211,
"step": 108
},
{
"epoch": 10.26,
"learning_rate": 0.008442857142857143,
"loss": 0.769,
"step": 109
},
{
"epoch": 10.35,
"learning_rate": 0.00842857142857143,
"loss": 0.718,
"step": 110
},
{
"epoch": 10.45,
"learning_rate": 0.008414285714285714,
"loss": 0.6411,
"step": 111
},
{
"epoch": 10.54,
"learning_rate": 0.0084,
"loss": 0.8016,
"step": 112
},
{
"epoch": 10.64,
"learning_rate": 0.008385714285714286,
"loss": 0.6633,
"step": 113
},
{
"epoch": 10.73,
"learning_rate": 0.008371428571428571,
"loss": 0.7257,
"step": 114
},
{
"epoch": 10.82,
"learning_rate": 0.008357142857142858,
"loss": 0.7785,
"step": 115
},
{
"epoch": 10.92,
"learning_rate": 0.008342857142857143,
"loss": 0.8927,
"step": 116
},
{
"epoch": 11.01,
"learning_rate": 0.008328571428571428,
"loss": 0.7242,
"step": 117
},
{
"epoch": 11.11,
"learning_rate": 0.008314285714285715,
"loss": 0.8297,
"step": 118
},
{
"epoch": 11.2,
"learning_rate": 0.0083,
"loss": 0.6761,
"step": 119
},
{
"epoch": 11.29,
"learning_rate": 0.008285714285714287,
"loss": 0.6699,
"step": 120
},
{
"epoch": 11.39,
"learning_rate": 0.008271428571428572,
"loss": 0.5365,
"step": 121
},
{
"epoch": 11.48,
"learning_rate": 0.008257142857142857,
"loss": 0.9045,
"step": 122
},
{
"epoch": 11.58,
"learning_rate": 0.008242857142857144,
"loss": 0.5071,
"step": 123
},
{
"epoch": 11.67,
"learning_rate": 0.008228571428571429,
"loss": 0.6472,
"step": 124
},
{
"epoch": 11.76,
"learning_rate": 0.008214285714285714,
"loss": 0.6232,
"step": 125
},
{
"epoch": 11.86,
"learning_rate": 0.008199999999999999,
"loss": 0.4905,
"step": 126
},
{
"epoch": 11.95,
"learning_rate": 0.008185714285714286,
"loss": 0.557,
"step": 127
},
{
"epoch": 12.05,
"learning_rate": 0.008171428571428573,
"loss": 0.5517,
"step": 128
},
{
"epoch": 12.14,
"learning_rate": 0.008157142857142858,
"loss": 0.6321,
"step": 129
},
{
"epoch": 12.24,
"learning_rate": 0.008142857142857143,
"loss": 0.6619,
"step": 130
},
{
"epoch": 12.33,
"learning_rate": 0.008128571428571428,
"loss": 0.5524,
"step": 131
},
{
"epoch": 12.42,
"learning_rate": 0.008114285714285715,
"loss": 0.4688,
"step": 132
},
{
"epoch": 12.52,
"learning_rate": 0.008100000000000001,
"loss": 0.3717,
"step": 133
},
{
"epoch": 12.61,
"learning_rate": 0.008085714285714286,
"loss": 0.5118,
"step": 134
},
{
"epoch": 12.71,
"learning_rate": 0.008071428571428571,
"loss": 0.4521,
"step": 135
},
{
"epoch": 12.8,
"learning_rate": 0.008057142857142856,
"loss": 0.5865,
"step": 136
},
{
"epoch": 12.89,
"learning_rate": 0.008042857142857143,
"loss": 0.5977,
"step": 137
},
{
"epoch": 12.99,
"learning_rate": 0.008028571428571428,
"loss": 0.6977,
"step": 138
},
{
"epoch": 13.08,
"learning_rate": 0.008014285714285713,
"loss": 0.5625,
"step": 139
},
{
"epoch": 13.18,
"learning_rate": 0.008,
"loss": 0.3611,
"step": 140
},
{
"epoch": 13.27,
"learning_rate": 0.007985714285714287,
"loss": 0.5168,
"step": 141
},
{
"epoch": 13.36,
"learning_rate": 0.007971428571428572,
"loss": 0.4429,
"step": 142
},
{
"epoch": 13.46,
"learning_rate": 0.007957142857142857,
"loss": 0.4998,
"step": 143
},
{
"epoch": 13.55,
"learning_rate": 0.007942857142857142,
"loss": 0.4437,
"step": 144
},
{
"epoch": 13.65,
"learning_rate": 0.007928571428571429,
"loss": 0.4958,
"step": 145
},
{
"epoch": 13.74,
"learning_rate": 0.007914285714285716,
"loss": 0.4021,
"step": 146
},
{
"epoch": 13.84,
"learning_rate": 0.0079,
"loss": 0.6163,
"step": 147
},
{
"epoch": 13.93,
"learning_rate": 0.007885714285714286,
"loss": 0.406,
"step": 148
},
{
"epoch": 14.02,
"learning_rate": 0.007871428571428571,
"loss": 0.4905,
"step": 149
},
{
"epoch": 14.12,
"learning_rate": 0.007857142857142858,
"loss": 0.3824,
"step": 150
},
{
"epoch": 14.21,
"learning_rate": 0.007842857142857143,
"loss": 0.3591,
"step": 151
},
{
"epoch": 14.31,
"learning_rate": 0.007828571428571428,
"loss": 0.342,
"step": 152
},
{
"epoch": 14.4,
"learning_rate": 0.007814285714285715,
"loss": 0.4565,
"step": 153
},
{
"epoch": 14.49,
"learning_rate": 0.0078000000000000005,
"loss": 0.3287,
"step": 154
},
{
"epoch": 14.59,
"learning_rate": 0.007785714285714286,
"loss": 0.4179,
"step": 155
},
{
"epoch": 14.68,
"learning_rate": 0.0077714285714285715,
"loss": 0.3586,
"step": 156
},
{
"epoch": 14.78,
"learning_rate": 0.007757142857142857,
"loss": 0.4618,
"step": 157
},
{
"epoch": 14.87,
"learning_rate": 0.0077428571428571425,
"loss": 0.4133,
"step": 158
},
{
"epoch": 14.96,
"learning_rate": 0.007728571428571429,
"loss": 0.4326,
"step": 159
},
{
"epoch": 15.06,
"learning_rate": 0.007714285714285715,
"loss": 0.3838,
"step": 160
},
{
"epoch": 15.15,
"learning_rate": 0.0077,
"loss": 0.2978,
"step": 161
},
{
"epoch": 15.25,
"learning_rate": 0.007685714285714286,
"loss": 0.3993,
"step": 162
},
{
"epoch": 15.34,
"learning_rate": 0.007671428571428571,
"loss": 0.3249,
"step": 163
},
{
"epoch": 15.44,
"learning_rate": 0.007657142857142857,
"loss": 0.2796,
"step": 164
},
{
"epoch": 15.53,
"learning_rate": 0.007642857142857142,
"loss": 0.3918,
"step": 165
},
{
"epoch": 15.62,
"learning_rate": 0.007628571428571429,
"loss": 0.4122,
"step": 166
},
{
"epoch": 15.72,
"learning_rate": 0.007614285714285715,
"loss": 0.3403,
"step": 167
},
{
"epoch": 15.81,
"learning_rate": 0.0076,
"loss": 0.3759,
"step": 168
},
{
"epoch": 15.91,
"learning_rate": 0.007585714285714286,
"loss": 0.3621,
"step": 169
},
{
"epoch": 16.0,
"learning_rate": 0.007571428571428571,
"loss": 0.2991,
"step": 170
},
{
"epoch": 16.09,
"learning_rate": 0.007557142857142857,
"loss": 0.3039,
"step": 171
},
{
"epoch": 16.19,
"learning_rate": 0.007542857142857144,
"loss": 0.4571,
"step": 172
},
{
"epoch": 16.28,
"learning_rate": 0.007528571428571429,
"loss": 0.2759,
"step": 173
},
{
"epoch": 16.38,
"learning_rate": 0.007514285714285715,
"loss": 0.2835,
"step": 174
},
{
"epoch": 16.47,
"learning_rate": 0.0075,
"loss": 0.3221,
"step": 175
},
{
"epoch": 16.56,
"learning_rate": 0.007485714285714286,
"loss": 0.3072,
"step": 176
},
{
"epoch": 16.66,
"learning_rate": 0.007471428571428572,
"loss": 0.2852,
"step": 177
},
{
"epoch": 16.75,
"learning_rate": 0.007457142857142857,
"loss": 0.2559,
"step": 178
},
{
"epoch": 16.85,
"learning_rate": 0.007442857142857143,
"loss": 0.2787,
"step": 179
},
{
"epoch": 16.94,
"learning_rate": 0.007428571428571429,
"loss": 0.3331,
"step": 180
},
{
"epoch": 17.04,
"learning_rate": 0.007414285714285714,
"loss": 0.1929,
"step": 181
},
{
"epoch": 17.13,
"learning_rate": 0.0074,
"loss": 0.2065,
"step": 182
},
{
"epoch": 17.22,
"learning_rate": 0.007385714285714285,
"loss": 0.2868,
"step": 183
},
{
"epoch": 17.32,
"learning_rate": 0.007371428571428571,
"loss": 0.2206,
"step": 184
},
{
"epoch": 17.41,
"learning_rate": 0.007357142857142858,
"loss": 0.2355,
"step": 185
},
{
"epoch": 17.51,
"learning_rate": 0.007342857142857143,
"loss": 0.3041,
"step": 186
},
{
"epoch": 17.6,
"learning_rate": 0.007328571428571429,
"loss": 0.3028,
"step": 187
},
{
"epoch": 17.69,
"learning_rate": 0.007314285714285714,
"loss": 0.2435,
"step": 188
},
{
"epoch": 17.79,
"learning_rate": 0.0073,
"loss": 0.1869,
"step": 189
},
{
"epoch": 17.88,
"learning_rate": 0.007285714285714285,
"loss": 0.3036,
"step": 190
},
{
"epoch": 17.98,
"learning_rate": 0.007271428571428571,
"loss": 0.246,
"step": 191
},
{
"epoch": 18.07,
"learning_rate": 0.007257142857142858,
"loss": 0.2316,
"step": 192
},
{
"epoch": 18.16,
"learning_rate": 0.007242857142857143,
"loss": 0.186,
"step": 193
},
{
"epoch": 18.26,
"learning_rate": 0.007228571428571429,
"loss": 0.2616,
"step": 194
},
{
"epoch": 18.35,
"learning_rate": 0.007214285714285715,
"loss": 0.2824,
"step": 195
},
{
"epoch": 18.45,
"learning_rate": 0.0072,
"loss": 0.2,
"step": 196
},
{
"epoch": 18.54,
"learning_rate": 0.007185714285714286,
"loss": 0.1978,
"step": 197
},
{
"epoch": 18.64,
"learning_rate": 0.007171428571428572,
"loss": 0.1897,
"step": 198
},
{
"epoch": 18.73,
"learning_rate": 0.007157142857142858,
"loss": 0.1958,
"step": 199
},
{
"epoch": 18.82,
"learning_rate": 0.0071428571428571435,
"loss": 0.203,
"step": 200
},
{
"epoch": 18.92,
"learning_rate": 0.0071285714285714286,
"loss": 0.2451,
"step": 201
},
{
"epoch": 19.01,
"learning_rate": 0.0071142857142857145,
"loss": 0.2045,
"step": 202
},
{
"epoch": 19.11,
"learning_rate": 0.0070999999999999995,
"loss": 0.1937,
"step": 203
},
{
"epoch": 19.2,
"learning_rate": 0.0070857142857142855,
"loss": 0.1814,
"step": 204
},
{
"epoch": 19.29,
"learning_rate": 0.007071428571428572,
"loss": 0.1869,
"step": 205
},
{
"epoch": 19.39,
"learning_rate": 0.007057142857142857,
"loss": 0.2089,
"step": 206
},
{
"epoch": 19.48,
"learning_rate": 0.007042857142857143,
"loss": 0.1924,
"step": 207
},
{
"epoch": 19.58,
"learning_rate": 0.007028571428571428,
"loss": 0.1512,
"step": 208
},
{
"epoch": 19.67,
"learning_rate": 0.007014285714285714,
"loss": 0.1375,
"step": 209
},
{
"epoch": 19.76,
"learning_rate": 0.006999999999999999,
"loss": 0.187,
"step": 210
},
{
"epoch": 19.86,
"learning_rate": 0.006985714285714286,
"loss": 0.2488,
"step": 211
},
{
"epoch": 19.95,
"learning_rate": 0.006971428571428572,
"loss": 0.1864,
"step": 212
},
{
"epoch": 20.05,
"learning_rate": 0.006957142857142857,
"loss": 0.1984,
"step": 213
},
{
"epoch": 20.14,
"learning_rate": 0.006942857142857143,
"loss": 0.156,
"step": 214
},
{
"epoch": 20.24,
"learning_rate": 0.006928571428571429,
"loss": 0.2082,
"step": 215
},
{
"epoch": 20.33,
"learning_rate": 0.006914285714285714,
"loss": 0.094,
"step": 216
},
{
"epoch": 20.42,
"learning_rate": 0.0069,
"loss": 0.1784,
"step": 217
},
{
"epoch": 20.52,
"learning_rate": 0.006885714285714287,
"loss": 0.1293,
"step": 218
},
{
"epoch": 20.61,
"learning_rate": 0.006871428571428572,
"loss": 0.1635,
"step": 219
},
{
"epoch": 20.71,
"learning_rate": 0.006857142857142858,
"loss": 0.1668,
"step": 220
},
{
"epoch": 20.8,
"learning_rate": 0.006842857142857143,
"loss": 0.1946,
"step": 221
},
{
"epoch": 20.89,
"learning_rate": 0.006828571428571429,
"loss": 0.2347,
"step": 222
},
{
"epoch": 20.99,
"learning_rate": 0.006814285714285714,
"loss": 0.1523,
"step": 223
},
{
"epoch": 21.08,
"learning_rate": 0.0068000000000000005,
"loss": 0.1337,
"step": 224
},
{
"epoch": 21.18,
"learning_rate": 0.006785714285714286,
"loss": 0.1511,
"step": 225
},
{
"epoch": 21.27,
"learning_rate": 0.0067714285714285715,
"loss": 0.1058,
"step": 226
},
{
"epoch": 21.36,
"learning_rate": 0.006757142857142857,
"loss": 0.172,
"step": 227
},
{
"epoch": 21.46,
"learning_rate": 0.0067428571428571425,
"loss": 0.1077,
"step": 228
},
{
"epoch": 21.55,
"learning_rate": 0.006728571428571428,
"loss": 0.1993,
"step": 229
},
{
"epoch": 21.65,
"learning_rate": 0.006714285714285714,
"loss": 0.1414,
"step": 230
},
{
"epoch": 21.74,
"learning_rate": 0.0067,
"loss": 0.126,
"step": 231
},
{
"epoch": 21.84,
"learning_rate": 0.006685714285714286,
"loss": 0.1528,
"step": 232
},
{
"epoch": 21.93,
"learning_rate": 0.006671428571428571,
"loss": 0.1316,
"step": 233
},
{
"epoch": 22.02,
"learning_rate": 0.006657142857142857,
"loss": 0.1565,
"step": 234
},
{
"epoch": 22.12,
"learning_rate": 0.006642857142857143,
"loss": 0.1088,
"step": 235
},
{
"epoch": 22.21,
"learning_rate": 0.006628571428571428,
"loss": 0.088,
"step": 236
},
{
"epoch": 22.31,
"learning_rate": 0.006614285714285715,
"loss": 0.1348,
"step": 237
},
{
"epoch": 22.4,
"learning_rate": 0.006600000000000001,
"loss": 0.1702,
"step": 238
},
{
"epoch": 22.49,
"learning_rate": 0.006585714285714286,
"loss": 0.132,
"step": 239
},
{
"epoch": 22.59,
"learning_rate": 0.006571428571428572,
"loss": 0.1115,
"step": 240
},
{
"epoch": 22.68,
"learning_rate": 0.006557142857142857,
"loss": 0.1173,
"step": 241
},
{
"epoch": 22.78,
"learning_rate": 0.006542857142857143,
"loss": 0.0967,
"step": 242
},
{
"epoch": 22.87,
"learning_rate": 0.006528571428571428,
"loss": 0.1484,
"step": 243
},
{
"epoch": 22.96,
"learning_rate": 0.006514285714285715,
"loss": 0.1566,
"step": 244
},
{
"epoch": 23.06,
"learning_rate": 0.006500000000000001,
"loss": 0.162,
"step": 245
},
{
"epoch": 23.15,
"learning_rate": 0.006485714285714286,
"loss": 0.1099,
"step": 246
},
{
"epoch": 23.25,
"learning_rate": 0.0064714285714285716,
"loss": 0.1087,
"step": 247
},
{
"epoch": 23.34,
"learning_rate": 0.006457142857142857,
"loss": 0.116,
"step": 248
},
{
"epoch": 23.44,
"learning_rate": 0.0064428571428571425,
"loss": 0.1096,
"step": 249
},
{
"epoch": 23.53,
"learning_rate": 0.006428571428571429,
"loss": 0.0972,
"step": 250
},
{
"epoch": 23.62,
"learning_rate": 0.006414285714285714,
"loss": 0.0889,
"step": 251
},
{
"epoch": 23.72,
"learning_rate": 0.0064,
"loss": 0.1199,
"step": 252
},
{
"epoch": 23.81,
"learning_rate": 0.006385714285714286,
"loss": 0.1337,
"step": 253
},
{
"epoch": 23.91,
"learning_rate": 0.006371428571428571,
"loss": 0.0977,
"step": 254
},
{
"epoch": 24.0,
"learning_rate": 0.006357142857142857,
"loss": 0.146,
"step": 255
},
{
"epoch": 24.09,
"learning_rate": 0.006342857142857142,
"loss": 0.1102,
"step": 256
},
{
"epoch": 24.19,
"learning_rate": 0.006328571428571429,
"loss": 0.1025,
"step": 257
},
{
"epoch": 24.28,
"learning_rate": 0.006314285714285715,
"loss": 0.09,
"step": 258
},
{
"epoch": 24.38,
"learning_rate": 0.0063,
"loss": 0.1302,
"step": 259
},
{
"epoch": 24.47,
"learning_rate": 0.006285714285714286,
"loss": 0.0739,
"step": 260
},
{
"epoch": 24.56,
"learning_rate": 0.006271428571428571,
"loss": 0.1172,
"step": 261
},
{
"epoch": 24.66,
"learning_rate": 0.006257142857142857,
"loss": 0.1048,
"step": 262
},
{
"epoch": 24.75,
"learning_rate": 0.006242857142857144,
"loss": 0.0977,
"step": 263
},
{
"epoch": 24.85,
"learning_rate": 0.006228571428571429,
"loss": 0.1056,
"step": 264
},
{
"epoch": 24.94,
"learning_rate": 0.006214285714285715,
"loss": 0.1252,
"step": 265
},
{
"epoch": 25.04,
"learning_rate": 0.0062,
"loss": 0.1107,
"step": 266
},
{
"epoch": 25.13,
"learning_rate": 0.006185714285714286,
"loss": 0.0887,
"step": 267
},
{
"epoch": 25.22,
"learning_rate": 0.006171428571428571,
"loss": 0.0836,
"step": 268
},
{
"epoch": 25.32,
"learning_rate": 0.0061571428571428576,
"loss": 0.0957,
"step": 269
},
{
"epoch": 25.41,
"learning_rate": 0.0061428571428571435,
"loss": 0.1165,
"step": 270
},
{
"epoch": 25.51,
"learning_rate": 0.0061285714285714285,
"loss": 0.1135,
"step": 271
},
{
"epoch": 25.6,
"learning_rate": 0.0061142857142857145,
"loss": 0.0901,
"step": 272
},
{
"epoch": 25.69,
"learning_rate": 0.0061,
"loss": 0.0751,
"step": 273
},
{
"epoch": 25.79,
"learning_rate": 0.0060857142857142854,
"loss": 0.109,
"step": 274
},
{
"epoch": 25.88,
"learning_rate": 0.006071428571428571,
"loss": 0.102,
"step": 275
},
{
"epoch": 25.98,
"learning_rate": 0.006057142857142858,
"loss": 0.0916,
"step": 276
},
{
"epoch": 26.07,
"learning_rate": 0.006042857142857143,
"loss": 0.0821,
"step": 277
},
{
"epoch": 26.16,
"learning_rate": 0.006028571428571429,
"loss": 0.0797,
"step": 278
},
{
"epoch": 26.26,
"learning_rate": 0.006014285714285714,
"loss": 0.0804,
"step": 279
},
{
"epoch": 26.35,
"learning_rate": 0.006,
"loss": 0.0987,
"step": 280
},
{
"epoch": 26.45,
"learning_rate": 0.005985714285714285,
"loss": 0.1192,
"step": 281
},
{
"epoch": 26.54,
"learning_rate": 0.005971428571428572,
"loss": 0.0699,
"step": 282
},
{
"epoch": 26.64,
"learning_rate": 0.005957142857142858,
"loss": 0.0902,
"step": 283
},
{
"epoch": 26.73,
"learning_rate": 0.005942857142857143,
"loss": 0.0916,
"step": 284
},
{
"epoch": 26.82,
"learning_rate": 0.005928571428571429,
"loss": 0.0753,
"step": 285
},
{
"epoch": 26.92,
"learning_rate": 0.005914285714285714,
"loss": 0.0964,
"step": 286
},
{
"epoch": 27.01,
"learning_rate": 0.0059,
"loss": 0.1108,
"step": 287
},
{
"epoch": 27.11,
"learning_rate": 0.005885714285714286,
"loss": 0.1062,
"step": 288
},
{
"epoch": 27.2,
"learning_rate": 0.005871428571428572,
"loss": 0.0846,
"step": 289
},
{
"epoch": 27.29,
"learning_rate": 0.005857142857142858,
"loss": 0.0986,
"step": 290
},
{
"epoch": 27.39,
"learning_rate": 0.005842857142857143,
"loss": 0.0713,
"step": 291
},
{
"epoch": 27.48,
"learning_rate": 0.005828571428571429,
"loss": 0.0829,
"step": 292
},
{
"epoch": 27.58,
"learning_rate": 0.0058142857142857145,
"loss": 0.1026,
"step": 293
},
{
"epoch": 27.67,
"learning_rate": 0.0058,
"loss": 0.0785,
"step": 294
},
{
"epoch": 27.76,
"learning_rate": 0.005785714285714286,
"loss": 0.0729,
"step": 295
},
{
"epoch": 27.86,
"learning_rate": 0.005771428571428572,
"loss": 0.0738,
"step": 296
},
{
"epoch": 27.95,
"learning_rate": 0.005757142857142857,
"loss": 0.079,
"step": 297
},
{
"epoch": 28.05,
"learning_rate": 0.005742857142857143,
"loss": 0.0761,
"step": 298
},
{
"epoch": 28.14,
"learning_rate": 0.005728571428571428,
"loss": 0.0792,
"step": 299
},
{
"epoch": 28.24,
"learning_rate": 0.005714285714285714,
"loss": 0.0881,
"step": 300
},
{
"epoch": 28.33,
"learning_rate": 0.005699999999999999,
"loss": 0.1073,
"step": 301
},
{
"epoch": 28.42,
"learning_rate": 0.005685714285714286,
"loss": 0.0686,
"step": 302
},
{
"epoch": 28.52,
"learning_rate": 0.005671428571428572,
"loss": 0.0701,
"step": 303
},
{
"epoch": 28.61,
"learning_rate": 0.005657142857142857,
"loss": 0.1114,
"step": 304
},
{
"epoch": 28.71,
"learning_rate": 0.005642857142857143,
"loss": 0.0595,
"step": 305
},
{
"epoch": 28.8,
"learning_rate": 0.005628571428571428,
"loss": 0.086,
"step": 306
},
{
"epoch": 28.89,
"learning_rate": 0.005614285714285714,
"loss": 0.0877,
"step": 307
},
{
"epoch": 28.99,
"learning_rate": 0.005600000000000001,
"loss": 0.0582,
"step": 308
},
{
"epoch": 29.08,
"learning_rate": 0.005585714285714286,
"loss": 0.0645,
"step": 309
},
{
"epoch": 29.18,
"learning_rate": 0.005571428571428572,
"loss": 0.1025,
"step": 310
},
{
"epoch": 29.27,
"learning_rate": 0.005557142857142857,
"loss": 0.0612,
"step": 311
},
{
"epoch": 29.36,
"learning_rate": 0.005542857142857143,
"loss": 0.0706,
"step": 312
},
{
"epoch": 29.46,
"learning_rate": 0.005528571428571429,
"loss": 0.0636,
"step": 313
},
{
"epoch": 29.55,
"learning_rate": 0.005514285714285714,
"loss": 0.0721,
"step": 314
},
{
"epoch": 29.65,
"learning_rate": 0.0055000000000000005,
"loss": 0.1062,
"step": 315
},
{
"epoch": 29.74,
"learning_rate": 0.0054857142857142865,
"loss": 0.0739,
"step": 316
},
{
"epoch": 29.84,
"learning_rate": 0.0054714285714285715,
"loss": 0.0688,
"step": 317
},
{
"epoch": 29.93,
"learning_rate": 0.0054571428571428575,
"loss": 0.0715,
"step": 318
},
{
"epoch": 30.02,
"learning_rate": 0.0054428571428571425,
"loss": 0.0628,
"step": 319
},
{
"epoch": 30.12,
"learning_rate": 0.0054285714285714284,
"loss": 0.0831,
"step": 320
},
{
"epoch": 30.21,
"learning_rate": 0.005414285714285715,
"loss": 0.0833,
"step": 321
},
{
"epoch": 30.31,
"learning_rate": 0.0054,
"loss": 0.09,
"step": 322
},
{
"epoch": 30.4,
"learning_rate": 0.005385714285714286,
"loss": 0.0469,
"step": 323
},
{
"epoch": 30.49,
"learning_rate": 0.005371428571428571,
"loss": 0.0631,
"step": 324
},
{
"epoch": 30.59,
"learning_rate": 0.005357142857142857,
"loss": 0.0685,
"step": 325
},
{
"epoch": 30.68,
"learning_rate": 0.005342857142857142,
"loss": 0.0798,
"step": 326
},
{
"epoch": 30.78,
"learning_rate": 0.005328571428571428,
"loss": 0.0653,
"step": 327
},
{
"epoch": 30.87,
"learning_rate": 0.005314285714285715,
"loss": 0.0615,
"step": 328
},
{
"epoch": 30.96,
"learning_rate": 0.0053,
"loss": 0.0548,
"step": 329
},
{
"epoch": 31.06,
"learning_rate": 0.005285714285714286,
"loss": 0.0592,
"step": 330
},
{
"epoch": 31.15,
"learning_rate": 0.005271428571428572,
"loss": 0.0628,
"step": 331
},
{
"epoch": 31.25,
"learning_rate": 0.005257142857142857,
"loss": 0.0604,
"step": 332
},
{
"epoch": 31.34,
"learning_rate": 0.005242857142857143,
"loss": 0.0833,
"step": 333
},
{
"epoch": 31.44,
"learning_rate": 0.005228571428571429,
"loss": 0.0748,
"step": 334
},
{
"epoch": 31.53,
"learning_rate": 0.005214285714285715,
"loss": 0.0495,
"step": 335
},
{
"epoch": 31.62,
"learning_rate": 0.005200000000000001,
"loss": 0.0589,
"step": 336
},
{
"epoch": 31.72,
"learning_rate": 0.005185714285714286,
"loss": 0.0655,
"step": 337
},
{
"epoch": 31.81,
"learning_rate": 0.005171428571428572,
"loss": 0.0695,
"step": 338
},
{
"epoch": 31.91,
"learning_rate": 0.005157142857142857,
"loss": 0.0609,
"step": 339
},
{
"epoch": 32.0,
"learning_rate": 0.005142857142857143,
"loss": 0.0636,
"step": 340
},
{
"epoch": 32.09,
"learning_rate": 0.005128571428571429,
"loss": 0.0606,
"step": 341
},
{
"epoch": 32.19,
"learning_rate": 0.0051142857142857144,
"loss": 0.0739,
"step": 342
},
{
"epoch": 32.28,
"learning_rate": 0.0051,
"loss": 0.0535,
"step": 343
},
{
"epoch": 32.38,
"learning_rate": 0.005085714285714285,
"loss": 0.0598,
"step": 344
},
{
"epoch": 32.47,
"learning_rate": 0.005071428571428571,
"loss": 0.06,
"step": 345
},
{
"epoch": 32.56,
"learning_rate": 0.005057142857142856,
"loss": 0.0734,
"step": 346
},
{
"epoch": 32.66,
"learning_rate": 0.005042857142857143,
"loss": 0.078,
"step": 347
},
{
"epoch": 32.75,
"learning_rate": 0.005028571428571429,
"loss": 0.0618,
"step": 348
},
{
"epoch": 32.85,
"learning_rate": 0.005014285714285714,
"loss": 0.0655,
"step": 349
},
{
"epoch": 32.94,
"learning_rate": 0.005,
"loss": 0.0615,
"step": 350
},
{
"epoch": 33.04,
"learning_rate": 0.004985714285714286,
"loss": 0.0556,
"step": 351
},
{
"epoch": 33.13,
"learning_rate": 0.004971428571428572,
"loss": 0.0637,
"step": 352
},
{
"epoch": 33.22,
"learning_rate": 0.004957142857142857,
"loss": 0.0518,
"step": 353
},
{
"epoch": 33.32,
"learning_rate": 0.004942857142857143,
"loss": 0.0466,
"step": 354
},
{
"epoch": 33.41,
"learning_rate": 0.004928571428571429,
"loss": 0.0732,
"step": 355
},
{
"epoch": 33.51,
"learning_rate": 0.004914285714285715,
"loss": 0.0584,
"step": 356
},
{
"epoch": 33.6,
"learning_rate": 0.0049,
"loss": 0.0586,
"step": 357
},
{
"epoch": 33.69,
"learning_rate": 0.004885714285714286,
"loss": 0.0481,
"step": 358
},
{
"epoch": 33.79,
"learning_rate": 0.004871428571428572,
"loss": 0.0552,
"step": 359
},
{
"epoch": 33.88,
"learning_rate": 0.004857142857142858,
"loss": 0.0567,
"step": 360
},
{
"epoch": 33.98,
"learning_rate": 0.004842857142857143,
"loss": 0.0664,
"step": 361
},
{
"epoch": 34.07,
"learning_rate": 0.004828571428571429,
"loss": 0.0701,
"step": 362
},
{
"epoch": 34.16,
"learning_rate": 0.0048142857142857145,
"loss": 0.069,
"step": 363
},
{
"epoch": 34.26,
"learning_rate": 0.0048,
"loss": 0.066,
"step": 364
},
{
"epoch": 34.35,
"learning_rate": 0.004785714285714286,
"loss": 0.0546,
"step": 365
},
{
"epoch": 34.45,
"learning_rate": 0.004771428571428571,
"loss": 0.0616,
"step": 366
},
{
"epoch": 34.54,
"learning_rate": 0.004757142857142857,
"loss": 0.0374,
"step": 367
},
{
"epoch": 34.64,
"learning_rate": 0.004742857142857143,
"loss": 0.046,
"step": 368
},
{
"epoch": 34.73,
"learning_rate": 0.004728571428571428,
"loss": 0.0459,
"step": 369
},
{
"epoch": 34.82,
"learning_rate": 0.004714285714285714,
"loss": 0.0648,
"step": 370
},
{
"epoch": 34.92,
"learning_rate": 0.0047,
"loss": 0.0699,
"step": 371
},
{
"epoch": 35.01,
"learning_rate": 0.004685714285714286,
"loss": 0.0605,
"step": 372
},
{
"epoch": 35.11,
"learning_rate": 0.004671428571428571,
"loss": 0.0704,
"step": 373
},
{
"epoch": 35.2,
"learning_rate": 0.004657142857142857,
"loss": 0.0444,
"step": 374
},
{
"epoch": 35.29,
"learning_rate": 0.004642857142857143,
"loss": 0.062,
"step": 375
},
{
"epoch": 35.39,
"learning_rate": 0.004628571428571429,
"loss": 0.0464,
"step": 376
},
{
"epoch": 35.48,
"learning_rate": 0.004614285714285714,
"loss": 0.0548,
"step": 377
},
{
"epoch": 35.58,
"learning_rate": 0.0046,
"loss": 0.0555,
"step": 378
},
{
"epoch": 35.67,
"learning_rate": 0.004585714285714286,
"loss": 0.0654,
"step": 379
},
{
"epoch": 35.76,
"learning_rate": 0.004571428571428572,
"loss": 0.0592,
"step": 380
},
{
"epoch": 35.86,
"learning_rate": 0.004557142857142858,
"loss": 0.0521,
"step": 381
},
{
"epoch": 35.95,
"learning_rate": 0.004542857142857143,
"loss": 0.0633,
"step": 382
},
{
"epoch": 36.05,
"learning_rate": 0.004528571428571429,
"loss": 0.047,
"step": 383
},
{
"epoch": 36.14,
"learning_rate": 0.004514285714285714,
"loss": 0.0476,
"step": 384
},
{
"epoch": 36.24,
"learning_rate": 0.0045000000000000005,
"loss": 0.051,
"step": 385
},
{
"epoch": 36.33,
"learning_rate": 0.004485714285714286,
"loss": 0.064,
"step": 386
},
{
"epoch": 36.42,
"learning_rate": 0.0044714285714285715,
"loss": 0.0309,
"step": 387
},
{
"epoch": 36.52,
"learning_rate": 0.0044571428571428574,
"loss": 0.0632,
"step": 388
},
{
"epoch": 36.61,
"learning_rate": 0.004442857142857143,
"loss": 0.0583,
"step": 389
},
{
"epoch": 36.71,
"learning_rate": 0.004428571428571428,
"loss": 0.0524,
"step": 390
},
{
"epoch": 36.8,
"learning_rate": 0.004414285714285714,
"loss": 0.0574,
"step": 391
},
{
"epoch": 36.89,
"learning_rate": 0.0044,
"loss": 0.043,
"step": 392
},
{
"epoch": 36.99,
"learning_rate": 0.004385714285714285,
"loss": 0.0482,
"step": 393
},
{
"epoch": 37.08,
"learning_rate": 0.004371428571428572,
"loss": 0.0585,
"step": 394
},
{
"epoch": 37.18,
"learning_rate": 0.004357142857142857,
"loss": 0.0467,
"step": 395
},
{
"epoch": 37.27,
"learning_rate": 0.004342857142857143,
"loss": 0.0498,
"step": 396
},
{
"epoch": 37.36,
"learning_rate": 0.004328571428571429,
"loss": 0.0578,
"step": 397
},
{
"epoch": 37.46,
"learning_rate": 0.004314285714285714,
"loss": 0.0469,
"step": 398
},
{
"epoch": 37.55,
"learning_rate": 0.0043,
"loss": 0.0447,
"step": 399
},
{
"epoch": 37.65,
"learning_rate": 0.004285714285714286,
"loss": 0.0669,
"step": 400
}
],
"logging_steps": 1.0,
"max_steps": 700,
"num_train_epochs": 70,
"save_steps": 100,
"total_flos": 4.700697287196672e+17,
"trial_name": null,
"trial_params": null
}