Upload folder using huggingface_hub

aef7ee9 verified 5 months ago

9.75 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.9929627023223082,
	"eval_steps": 500,
	"global_step": 236,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.00844475721323012,
	"grad_norm": 2.089867158089946,
	"learning_rate": 4.1666666666666667e-07,
	"loss": 2.161,
	"step": 1
	},
	{
	"epoch": 0.0422237860661506,
	"grad_norm": 1.5848399731293203,
	"learning_rate": 2.0833333333333334e-06,
	"loss": 2.0465,
	"step": 5
	},
	{
	"epoch": 0.0844475721323012,
	"grad_norm": 0.7578527824638689,
	"learning_rate": 4.166666666666667e-06,
	"loss": 1.5984,
	"step": 10
	},
	{
	"epoch": 0.1266713581984518,
	"grad_norm": 0.346915259777201,
	"learning_rate": 6.25e-06,
	"loss": 0.9384,
	"step": 15
	},
	{
	"epoch": 0.1688951442646024,
	"grad_norm": 0.17556784814479495,
	"learning_rate": 8.333333333333334e-06,
	"loss": 0.7181,
	"step": 20
	},
	{
	"epoch": 0.211118930330753,
	"grad_norm": 0.13630834383254564,
	"learning_rate": 9.999451015497595e-06,
	"loss": 0.6024,
	"step": 25
	},
	{
	"epoch": 0.2533427163969036,
	"grad_norm": 0.11554222320406965,
	"learning_rate": 9.980249213076085e-06,
	"loss": 0.5901,
	"step": 30
	},
	{
	"epoch": 0.2955665024630542,
	"grad_norm": 0.12278253986244328,
	"learning_rate": 9.933718620186745e-06,
	"loss": 0.5266,
	"step": 35
	},
	{
	"epoch": 0.3377902885292048,
	"grad_norm": 0.09979826087347965,
	"learning_rate": 9.860114570402055e-06,
	"loss": 0.4992,
	"step": 40
	},
	{
	"epoch": 0.3800140745953554,
	"grad_norm": 0.09403541146186689,
	"learning_rate": 9.759840961111098e-06,
	"loss": 0.4552,
	"step": 45
	},
	{
	"epoch": 0.422237860661506,
	"grad_norm": 0.11355916833002154,
	"learning_rate": 9.633448037159167e-06,
	"loss": 0.4481,
	"step": 50
	},
	{
	"epoch": 0.4644616467276566,
	"grad_norm": 0.08073377113874591,
	"learning_rate": 9.481629371415315e-06,
	"loss": 0.4421,
	"step": 55
	},
	{
	"epoch": 0.5066854327938072,
	"grad_norm": 0.09267587470901371,
	"learning_rate": 9.305218058836778e-06,
	"loss": 0.4315,
	"step": 60
	},
	{
	"epoch": 0.5489092188599578,
	"grad_norm": 0.07970275638417536,
	"learning_rate": 9.10518214491513e-06,
	"loss": 0.4193,
	"step": 65
	},
	{
	"epoch": 0.5911330049261084,
	"grad_norm": 0.09176650758741302,
	"learning_rate": 8.882619313590212e-06,
	"loss": 0.4359,
	"step": 70
	},
	{
	"epoch": 0.633356790992259,
	"grad_norm": 0.06297292993950475,
	"learning_rate": 8.638750863781614e-06,
	"loss": 0.3866,
	"step": 75
	},
	{
	"epoch": 0.6755805770584096,
	"grad_norm": 0.09109073826112205,
	"learning_rate": 8.374915007591053e-06,
	"loss": 0.4041,
	"step": 80
	},
	{
	"epoch": 0.7178043631245602,
	"grad_norm": 0.05865190056091487,
	"learning_rate": 8.092559526951374e-06,
	"loss": 0.4239,
	"step": 85
	},
	{
	"epoch": 0.7600281491907108,
	"grad_norm": 0.06974442479960637,
	"learning_rate": 7.793233829018263e-06,
	"loss": 0.3986,
	"step": 90
	},
	{
	"epoch": 0.8022519352568613,
	"grad_norm": 0.05463034337178698,
	"learning_rate": 7.478580443900247e-06,
	"loss": 0.4247,
	"step": 95
	},
	{
	"epoch": 0.844475721323012,
	"grad_norm": 0.06205362965112395,
	"learning_rate": 7.1503260113826035e-06,
	"loss": 0.3913,
	"step": 100
	},
	{
	"epoch": 0.8866995073891626,
	"grad_norm": 0.067892476771574,
	"learning_rate": 6.810271806104931e-06,
	"loss": 0.401,
	"step": 105
	},
	{
	"epoch": 0.9289232934553132,
	"grad_norm": 0.05154900712833624,
	"learning_rate": 6.46028385318488e-06,
	"loss": 0.3938,
	"step": 110
	},
	{
	"epoch": 0.9711470795214637,
	"grad_norm": 0.05021016889651189,
	"learning_rate": 6.10228268852786e-06,
	"loss": 0.3675,
	"step": 115
	},
	{
	"epoch": 0.9964813511611541,
	"eval_loss": 0.36927565932273865,
	"eval_runtime": 345.1421,
	"eval_samples_per_second": 4.117,
	"eval_steps_per_second": 1.031,
	"step": 118
	},
	{
	"epoch": 1.0133708655876144,
	"grad_norm": 0.06461727535494917,
	"learning_rate": 5.738232820012407e-06,
	"loss": 0.437,
	"step": 120
	},
	{
	"epoch": 1.0555946516537649,
	"grad_norm": 0.05489917224274474,
	"learning_rate": 5.370131947382215e-06,
	"loss": 0.3656,
	"step": 125
	},
	{
	"epoch": 1.0978184377199156,
	"grad_norm": 0.06042558978858904,
	"learning_rate": 5e-06,
	"loss": 0.4009,
	"step": 130
	},
	{
	"epoch": 1.1400422237860661,
	"grad_norm": 0.06049921311472245,
	"learning_rate": 4.629868052617786e-06,
	"loss": 0.3482,
	"step": 135
	},
	{
	"epoch": 1.1822660098522166,
	"grad_norm": 0.053628889597754824,
	"learning_rate": 4.261767179987595e-06,
	"loss": 0.3484,
	"step": 140
	},
	{
	"epoch": 1.2244897959183674,
	"grad_norm": 0.05427865015224349,
	"learning_rate": 3.897717311472141e-06,
	"loss": 0.3639,
	"step": 145
	},
	{
	"epoch": 1.266713581984518,
	"grad_norm": 0.057489395038740544,
	"learning_rate": 3.539716146815122e-06,
	"loss": 0.3835,
	"step": 150
	},
	{
	"epoch": 1.3089373680506686,
	"grad_norm": 0.05111391290442162,
	"learning_rate": 3.1897281938950693e-06,
	"loss": 0.3645,
	"step": 155
	},
	{
	"epoch": 1.3511611541168191,
	"grad_norm": 0.05075774897048314,
	"learning_rate": 2.8496739886173994e-06,
	"loss": 0.3693,
	"step": 160
	},
	{
	"epoch": 1.3933849401829699,
	"grad_norm": 0.04943651400266673,
	"learning_rate": 2.5214195560997546e-06,
	"loss": 0.3786,
	"step": 165
	},
	{
	"epoch": 1.4356087262491204,
	"grad_norm": 0.044263408717175816,
	"learning_rate": 2.2067661709817384e-06,
	"loss": 0.3802,
	"step": 170
	},
	{
	"epoch": 1.477832512315271,
	"grad_norm": 0.05037201642103367,
	"learning_rate": 1.9074404730486264e-06,
	"loss": 0.3548,
	"step": 175
	},
	{
	"epoch": 1.5200562983814216,
	"grad_norm": 0.047573069715430114,
	"learning_rate": 1.6250849924089485e-06,
	"loss": 0.3737,
	"step": 180
	},
	{
	"epoch": 1.5622800844475722,
	"grad_norm": 0.05285111504365142,
	"learning_rate": 1.3612491362183887e-06,
	"loss": 0.3602,
	"step": 185
	},
	{
	"epoch": 1.6045038705137227,
	"grad_norm": 0.05156216290861093,
	"learning_rate": 1.1173806864097885e-06,
	"loss": 0.3609,
	"step": 190
	},
	{
	"epoch": 1.6467276565798734,
	"grad_norm": 0.04756228232260825,
	"learning_rate": 8.948178550848702e-07,
	"loss": 0.3344,
	"step": 195
	},
	{
	"epoch": 1.688951442646024,
	"grad_norm": 0.05015333465574718,
	"learning_rate": 6.947819411632223e-07,
	"loss": 0.326,
	"step": 200
	},
	{
	"epoch": 1.7311752287121744,
	"grad_norm": 0.05391623766653661,
	"learning_rate": 5.183706285846873e-07,
	"loss": 0.3743,
	"step": 205
	},
	{
	"epoch": 1.7733990147783252,
	"grad_norm": 0.05381208848410942,
	"learning_rate": 3.665519628408332e-07,
	"loss": 0.3482,
	"step": 210
	},
	{
	"epoch": 1.8156228008444757,
	"grad_norm": 0.04072660534703765,
	"learning_rate": 2.401590388889025e-07,
	"loss": 0.3681,
	"step": 215
	},
	{
	"epoch": 1.8578465869106262,
	"grad_norm": 0.05078699788677098,
	"learning_rate": 1.3988542959794627e-07,
	"loss": 0.3522,
	"step": 220
	},
	{
	"epoch": 1.900070372976777,
	"grad_norm": 0.05235725304027692,
	"learning_rate": 6.628137981325611e-08,
	"loss": 0.3994,
	"step": 225
	},
	{
	"epoch": 1.9422941590429277,
	"grad_norm": 0.04983097587956561,
	"learning_rate": 1.975078692391552e-08,
	"loss": 0.36,
	"step": 230
	},
	{
	"epoch": 1.984517945109078,
	"grad_norm": 0.058131705794752526,
	"learning_rate": 5.489845024053698e-10,
	"loss": 0.3467,
	"step": 235
	},
	{
	"epoch": 1.9929627023223082,
	"eval_loss": 0.3553777039051056,
	"eval_runtime": 342.1435,
	"eval_samples_per_second": 4.153,
	"eval_steps_per_second": 1.04,
	"step": 236
	},
	{
	"epoch": 1.9929627023223082,
	"step": 236,
	"total_flos": 4.237289220525261e+16,
	"train_loss": 0.47937014673726036,
	"train_runtime": 10453.3911,
	"train_samples_per_second": 1.087,
	"train_steps_per_second": 0.023
	}
	],
	"logging_steps": 5,
	"max_steps": 236,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 4.237289220525261e+16,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}