| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 50.0, | |
| "global_step": 4450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.7501213550567627, | |
| "eval_mae": 1.0687230825424194, | |
| "eval_mse": 1.750120997428894, | |
| "eval_rmse": 1.3229213953018188, | |
| "eval_runtime": 0.213, | |
| "eval_samples_per_second": 323.879, | |
| "eval_steps_per_second": 42.245, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.9408809542655945, | |
| "eval_mae": 0.7522120475769043, | |
| "eval_mse": 0.9408809542655945, | |
| "eval_rmse": 0.9699901938438416, | |
| "eval_runtime": 0.4675, | |
| "eval_samples_per_second": 147.6, | |
| "eval_steps_per_second": 19.252, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.7912163138389587, | |
| "eval_mae": 0.7013395428657532, | |
| "eval_mse": 0.7912163734436035, | |
| "eval_rmse": 0.8895034193992615, | |
| "eval_runtime": 0.4829, | |
| "eval_samples_per_second": 142.893, | |
| "eval_steps_per_second": 18.638, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.7405096292495728, | |
| "eval_mae": 0.6587470769882202, | |
| "eval_mse": 0.7405097484588623, | |
| "eval_rmse": 0.8605287671089172, | |
| "eval_runtime": 0.4491, | |
| "eval_samples_per_second": 153.654, | |
| "eval_steps_per_second": 20.042, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.6923832297325134, | |
| "eval_mae": 0.6610296964645386, | |
| "eval_mse": 0.6923832893371582, | |
| "eval_rmse": 0.832095742225647, | |
| "eval_runtime": 0.4191, | |
| "eval_samples_per_second": 164.64, | |
| "eval_steps_per_second": 21.475, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 8.876404494382023e-06, | |
| "loss": 0.9104, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.6709772944450378, | |
| "eval_mae": 0.6442688703536987, | |
| "eval_mse": 0.6709771752357483, | |
| "eval_rmse": 0.8191319704055786, | |
| "eval_runtime": 0.4743, | |
| "eval_samples_per_second": 145.467, | |
| "eval_steps_per_second": 18.974, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.5820761322975159, | |
| "eval_mae": 0.6152850389480591, | |
| "eval_mse": 0.5820761919021606, | |
| "eval_rmse": 0.7629391551017761, | |
| "eval_runtime": 0.364, | |
| "eval_samples_per_second": 189.553, | |
| "eval_steps_per_second": 24.724, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.49572908878326416, | |
| "eval_mae": 0.5509689450263977, | |
| "eval_mse": 0.49572914838790894, | |
| "eval_rmse": 0.70408034324646, | |
| "eval_runtime": 0.4466, | |
| "eval_samples_per_second": 154.492, | |
| "eval_steps_per_second": 20.151, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.5924321413040161, | |
| "eval_mae": 0.6066040396690369, | |
| "eval_mse": 0.5924323201179504, | |
| "eval_rmse": 0.7696962356567383, | |
| "eval_runtime": 0.4645, | |
| "eval_samples_per_second": 148.549, | |
| "eval_steps_per_second": 19.376, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.4954969882965088, | |
| "eval_mae": 0.5473751425743103, | |
| "eval_mse": 0.4954971671104431, | |
| "eval_rmse": 0.7039155960083008, | |
| "eval_runtime": 0.3698, | |
| "eval_samples_per_second": 186.61, | |
| "eval_steps_per_second": 24.34, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.5200049877166748, | |
| "eval_mae": 0.5685440897941589, | |
| "eval_mse": 0.52000492811203, | |
| "eval_rmse": 0.7211136817932129, | |
| "eval_runtime": 0.4266, | |
| "eval_samples_per_second": 161.746, | |
| "eval_steps_per_second": 21.097, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 11.24, | |
| "learning_rate": 7.752808988764046e-06, | |
| "loss": 0.128, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.43686971068382263, | |
| "eval_mae": 0.5061944723129272, | |
| "eval_mse": 0.436869740486145, | |
| "eval_rmse": 0.6609612107276917, | |
| "eval_runtime": 0.4821, | |
| "eval_samples_per_second": 143.124, | |
| "eval_steps_per_second": 18.668, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.41416943073272705, | |
| "eval_mae": 0.46771711111068726, | |
| "eval_mse": 0.41416940093040466, | |
| "eval_rmse": 0.6435599327087402, | |
| "eval_runtime": 0.4624, | |
| "eval_samples_per_second": 149.23, | |
| "eval_steps_per_second": 19.465, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.40698668360710144, | |
| "eval_mae": 0.45521026849746704, | |
| "eval_mse": 0.4069867730140686, | |
| "eval_rmse": 0.6379551291465759, | |
| "eval_runtime": 0.4757, | |
| "eval_samples_per_second": 145.044, | |
| "eval_steps_per_second": 18.919, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.49930572509765625, | |
| "eval_mae": 0.5383840799331665, | |
| "eval_mse": 0.4993056654930115, | |
| "eval_rmse": 0.7066156268119812, | |
| "eval_runtime": 0.4656, | |
| "eval_samples_per_second": 148.181, | |
| "eval_steps_per_second": 19.328, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.5578888654708862, | |
| "eval_mae": 0.585189163684845, | |
| "eval_mse": 0.5578888058662415, | |
| "eval_rmse": 0.746919572353363, | |
| "eval_runtime": 0.4755, | |
| "eval_samples_per_second": 145.098, | |
| "eval_steps_per_second": 18.926, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 16.85, | |
| "learning_rate": 6.629213483146067e-06, | |
| "loss": 0.0895, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.5015009045600891, | |
| "eval_mae": 0.561897337436676, | |
| "eval_mse": 0.5015009641647339, | |
| "eval_rmse": 0.708167314529419, | |
| "eval_runtime": 0.3883, | |
| "eval_samples_per_second": 177.714, | |
| "eval_steps_per_second": 23.18, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.4590393602848053, | |
| "eval_mae": 0.5263462066650391, | |
| "eval_mse": 0.45903947949409485, | |
| "eval_rmse": 0.6775245070457458, | |
| "eval_runtime": 0.4835, | |
| "eval_samples_per_second": 142.705, | |
| "eval_steps_per_second": 18.614, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.4880666434764862, | |
| "eval_mae": 0.5377508997917175, | |
| "eval_mse": 0.4880666732788086, | |
| "eval_rmse": 0.698617696762085, | |
| "eval_runtime": 0.4715, | |
| "eval_samples_per_second": 146.34, | |
| "eval_steps_per_second": 19.088, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.3925124704837799, | |
| "eval_mae": 0.46625784039497375, | |
| "eval_mse": 0.3925124406814575, | |
| "eval_rmse": 0.6265081167221069, | |
| "eval_runtime": 0.3158, | |
| "eval_samples_per_second": 218.521, | |
| "eval_steps_per_second": 28.503, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.4392476975917816, | |
| "eval_mae": 0.5062677264213562, | |
| "eval_mse": 0.4392476975917816, | |
| "eval_rmse": 0.6627576351165771, | |
| "eval_runtime": 0.4321, | |
| "eval_samples_per_second": 159.696, | |
| "eval_steps_per_second": 20.83, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.42705094814300537, | |
| "eval_mae": 0.48886218667030334, | |
| "eval_mse": 0.42705097794532776, | |
| "eval_rmse": 0.6534913778305054, | |
| "eval_runtime": 0.3907, | |
| "eval_samples_per_second": 176.595, | |
| "eval_steps_per_second": 23.034, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 22.47, | |
| "learning_rate": 5.50561797752809e-06, | |
| "loss": 0.0694, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.40933796763420105, | |
| "eval_mae": 0.4783601760864258, | |
| "eval_mse": 0.4093380868434906, | |
| "eval_rmse": 0.6397953629493713, | |
| "eval_runtime": 0.3371, | |
| "eval_samples_per_second": 204.706, | |
| "eval_steps_per_second": 26.701, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.42077454924583435, | |
| "eval_mae": 0.49500545859336853, | |
| "eval_mse": 0.42077454924583435, | |
| "eval_rmse": 0.6486713886260986, | |
| "eval_runtime": 0.4775, | |
| "eval_samples_per_second": 144.493, | |
| "eval_steps_per_second": 18.847, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.40650996565818787, | |
| "eval_mae": 0.4869938790798187, | |
| "eval_mse": 0.4065099358558655, | |
| "eval_rmse": 0.6375812888145447, | |
| "eval_runtime": 0.4751, | |
| "eval_samples_per_second": 145.227, | |
| "eval_steps_per_second": 18.943, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.4467940032482147, | |
| "eval_mae": 0.5186977386474609, | |
| "eval_mse": 0.44679397344589233, | |
| "eval_rmse": 0.668426513671875, | |
| "eval_runtime": 0.4523, | |
| "eval_samples_per_second": 152.552, | |
| "eval_steps_per_second": 19.898, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.448551744222641, | |
| "eval_mae": 0.5286442041397095, | |
| "eval_mse": 0.4485517740249634, | |
| "eval_rmse": 0.6697400808334351, | |
| "eval_runtime": 0.3607, | |
| "eval_samples_per_second": 191.288, | |
| "eval_steps_per_second": 24.951, | |
| "step": 2403 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.4263148605823517, | |
| "eval_mae": 0.5110523700714111, | |
| "eval_mse": 0.4263148009777069, | |
| "eval_rmse": 0.6529278755187988, | |
| "eval_runtime": 0.4741, | |
| "eval_samples_per_second": 145.544, | |
| "eval_steps_per_second": 18.984, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 28.09, | |
| "learning_rate": 4.382022471910113e-06, | |
| "loss": 0.0575, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.4624464213848114, | |
| "eval_mae": 0.5141972303390503, | |
| "eval_mse": 0.462446391582489, | |
| "eval_rmse": 0.6800341010093689, | |
| "eval_runtime": 0.4737, | |
| "eval_samples_per_second": 145.658, | |
| "eval_steps_per_second": 18.999, | |
| "step": 2581 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.4065593481063843, | |
| "eval_mae": 0.4846087396144867, | |
| "eval_mse": 0.40655940771102905, | |
| "eval_rmse": 0.6376200914382935, | |
| "eval_runtime": 0.3326, | |
| "eval_samples_per_second": 207.482, | |
| "eval_steps_per_second": 27.063, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.4373004138469696, | |
| "eval_mae": 0.5060880780220032, | |
| "eval_mse": 0.437300443649292, | |
| "eval_rmse": 0.6612869501113892, | |
| "eval_runtime": 0.4249, | |
| "eval_samples_per_second": 162.393, | |
| "eval_steps_per_second": 21.182, | |
| "step": 2759 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.44729650020599365, | |
| "eval_mae": 0.5081753134727478, | |
| "eval_mse": 0.44729653000831604, | |
| "eval_rmse": 0.6688023209571838, | |
| "eval_runtime": 0.4601, | |
| "eval_samples_per_second": 149.977, | |
| "eval_steps_per_second": 19.562, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.43939414620399475, | |
| "eval_mae": 0.5079318881034851, | |
| "eval_mse": 0.43939417600631714, | |
| "eval_rmse": 0.6628681421279907, | |
| "eval_runtime": 0.4736, | |
| "eval_samples_per_second": 145.702, | |
| "eval_steps_per_second": 19.005, | |
| "step": 2937 | |
| }, | |
| { | |
| "epoch": 33.71, | |
| "learning_rate": 3.258426966292135e-06, | |
| "loss": 0.0532, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.4430878460407257, | |
| "eval_mae": 0.5065318942070007, | |
| "eval_mse": 0.44308778643608093, | |
| "eval_rmse": 0.6656484007835388, | |
| "eval_runtime": 0.4788, | |
| "eval_samples_per_second": 144.103, | |
| "eval_steps_per_second": 18.796, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.4311515688896179, | |
| "eval_mae": 0.5022226572036743, | |
| "eval_mse": 0.4311515688896179, | |
| "eval_rmse": 0.6566213369369507, | |
| "eval_runtime": 0.47, | |
| "eval_samples_per_second": 146.806, | |
| "eval_steps_per_second": 19.149, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.42474353313446045, | |
| "eval_mae": 0.49367982149124146, | |
| "eval_mse": 0.42474350333213806, | |
| "eval_rmse": 0.6517235040664673, | |
| "eval_runtime": 0.4548, | |
| "eval_samples_per_second": 151.706, | |
| "eval_steps_per_second": 19.788, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.4552953541278839, | |
| "eval_mae": 0.5187087655067444, | |
| "eval_mse": 0.45529526472091675, | |
| "eval_rmse": 0.6747556924819946, | |
| "eval_runtime": 0.329, | |
| "eval_samples_per_second": 209.749, | |
| "eval_steps_per_second": 27.359, | |
| "step": 3293 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.42223644256591797, | |
| "eval_mae": 0.4933069348335266, | |
| "eval_mse": 0.4222363829612732, | |
| "eval_rmse": 0.6497972011566162, | |
| "eval_runtime": 0.3523, | |
| "eval_samples_per_second": 195.847, | |
| "eval_steps_per_second": 25.545, | |
| "step": 3382 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.4451429545879364, | |
| "eval_mae": 0.5115242004394531, | |
| "eval_mse": 0.4451429843902588, | |
| "eval_rmse": 0.6671903729438782, | |
| "eval_runtime": 0.1851, | |
| "eval_samples_per_second": 372.87, | |
| "eval_steps_per_second": 48.635, | |
| "step": 3471 | |
| }, | |
| { | |
| "epoch": 39.33, | |
| "learning_rate": 2.1348314606741574e-06, | |
| "loss": 0.0421, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.42086702585220337, | |
| "eval_mae": 0.49490445852279663, | |
| "eval_mse": 0.4208669662475586, | |
| "eval_rmse": 0.6487426161766052, | |
| "eval_runtime": 0.3399, | |
| "eval_samples_per_second": 203.01, | |
| "eval_steps_per_second": 26.48, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.44048014283180237, | |
| "eval_mae": 0.5092083215713501, | |
| "eval_mse": 0.4404800832271576, | |
| "eval_rmse": 0.6636867523193359, | |
| "eval_runtime": 0.3606, | |
| "eval_samples_per_second": 191.355, | |
| "eval_steps_per_second": 24.959, | |
| "step": 3649 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.41600948572158813, | |
| "eval_mae": 0.49518799781799316, | |
| "eval_mse": 0.4160095453262329, | |
| "eval_rmse": 0.6449880003929138, | |
| "eval_runtime": 0.3412, | |
| "eval_samples_per_second": 202.233, | |
| "eval_steps_per_second": 26.378, | |
| "step": 3738 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 0.42693421244621277, | |
| "eval_mae": 0.5002013444900513, | |
| "eval_mse": 0.42693421244621277, | |
| "eval_rmse": 0.6534020304679871, | |
| "eval_runtime": 0.2082, | |
| "eval_samples_per_second": 331.427, | |
| "eval_steps_per_second": 43.23, | |
| "step": 3827 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.4115591049194336, | |
| "eval_mae": 0.491961270570755, | |
| "eval_mse": 0.4115590751171112, | |
| "eval_rmse": 0.6415287256240845, | |
| "eval_runtime": 0.3428, | |
| "eval_samples_per_second": 201.27, | |
| "eval_steps_per_second": 26.253, | |
| "step": 3916 | |
| }, | |
| { | |
| "epoch": 44.94, | |
| "learning_rate": 1.01123595505618e-06, | |
| "loss": 0.0419, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 0.41869696974754333, | |
| "eval_mae": 0.500180721282959, | |
| "eval_mse": 0.41869693994522095, | |
| "eval_rmse": 0.6470679640769958, | |
| "eval_runtime": 0.3406, | |
| "eval_samples_per_second": 202.605, | |
| "eval_steps_per_second": 26.427, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.4199928641319275, | |
| "eval_mae": 0.504236102104187, | |
| "eval_mse": 0.41999292373657227, | |
| "eval_rmse": 0.6480686068534851, | |
| "eval_runtime": 0.3427, | |
| "eval_samples_per_second": 201.361, | |
| "eval_steps_per_second": 26.264, | |
| "step": 4094 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 0.4173473119735718, | |
| "eval_mae": 0.49992823600769043, | |
| "eval_mse": 0.41734734177589417, | |
| "eval_rmse": 0.6460242867469788, | |
| "eval_runtime": 0.3094, | |
| "eval_samples_per_second": 222.982, | |
| "eval_steps_per_second": 29.085, | |
| "step": 4183 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.41815370321273804, | |
| "eval_mae": 0.4995117783546448, | |
| "eval_mse": 0.4181537628173828, | |
| "eval_rmse": 0.646648108959198, | |
| "eval_runtime": 0.3301, | |
| "eval_samples_per_second": 209.019, | |
| "eval_steps_per_second": 27.263, | |
| "step": 4272 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 0.41537874937057495, | |
| "eval_mae": 0.49763771891593933, | |
| "eval_mse": 0.41537871956825256, | |
| "eval_rmse": 0.6444988250732422, | |
| "eval_runtime": 0.3396, | |
| "eval_samples_per_second": 203.201, | |
| "eval_steps_per_second": 26.504, | |
| "step": 4361 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.41592642664909363, | |
| "eval_mae": 0.4979737401008606, | |
| "eval_mse": 0.4159264862537384, | |
| "eval_rmse": 0.6449236273765564, | |
| "eval_runtime": 0.3167, | |
| "eval_samples_per_second": 217.87, | |
| "eval_steps_per_second": 28.418, | |
| "step": 4450 | |
| } | |
| ], | |
| "max_steps": 4450, | |
| "num_train_epochs": 50, | |
| "total_flos": 2354503087987200.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |