{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 4450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 1.7501213550567627, "eval_mae": 1.0687230825424194, "eval_mse": 1.750120997428894, "eval_rmse": 1.3229213953018188, "eval_runtime": 0.213, "eval_samples_per_second": 323.879, "eval_steps_per_second": 42.245, "step": 89 }, { "epoch": 2.0, "eval_loss": 0.9408809542655945, "eval_mae": 0.7522120475769043, "eval_mse": 0.9408809542655945, "eval_rmse": 0.9699901938438416, "eval_runtime": 0.4675, "eval_samples_per_second": 147.6, "eval_steps_per_second": 19.252, "step": 178 }, { "epoch": 3.0, "eval_loss": 0.7912163138389587, "eval_mae": 0.7013395428657532, "eval_mse": 0.7912163734436035, "eval_rmse": 0.8895034193992615, "eval_runtime": 0.4829, "eval_samples_per_second": 142.893, "eval_steps_per_second": 18.638, "step": 267 }, { "epoch": 4.0, "eval_loss": 0.7405096292495728, "eval_mae": 0.6587470769882202, "eval_mse": 0.7405097484588623, "eval_rmse": 0.8605287671089172, "eval_runtime": 0.4491, "eval_samples_per_second": 153.654, "eval_steps_per_second": 20.042, "step": 356 }, { "epoch": 5.0, "eval_loss": 0.6923832297325134, "eval_mae": 0.6610296964645386, "eval_mse": 0.6923832893371582, "eval_rmse": 0.832095742225647, "eval_runtime": 0.4191, "eval_samples_per_second": 164.64, "eval_steps_per_second": 21.475, "step": 445 }, { "epoch": 5.62, "learning_rate": 8.876404494382023e-06, "loss": 0.9104, "step": 500 }, { "epoch": 6.0, "eval_loss": 0.6709772944450378, "eval_mae": 0.6442688703536987, "eval_mse": 0.6709771752357483, "eval_rmse": 0.8191319704055786, "eval_runtime": 0.4743, "eval_samples_per_second": 145.467, "eval_steps_per_second": 18.974, "step": 534 }, { "epoch": 7.0, "eval_loss": 0.5820761322975159, "eval_mae": 0.6152850389480591, "eval_mse": 0.5820761919021606, "eval_rmse": 0.7629391551017761, "eval_runtime": 0.364, "eval_samples_per_second": 189.553, "eval_steps_per_second": 24.724, "step": 623 }, { "epoch": 8.0, "eval_loss": 0.49572908878326416, "eval_mae": 0.5509689450263977, "eval_mse": 0.49572914838790894, "eval_rmse": 0.70408034324646, "eval_runtime": 0.4466, "eval_samples_per_second": 154.492, "eval_steps_per_second": 20.151, "step": 712 }, { "epoch": 9.0, "eval_loss": 0.5924321413040161, "eval_mae": 0.6066040396690369, "eval_mse": 0.5924323201179504, "eval_rmse": 0.7696962356567383, "eval_runtime": 0.4645, "eval_samples_per_second": 148.549, "eval_steps_per_second": 19.376, "step": 801 }, { "epoch": 10.0, "eval_loss": 0.4954969882965088, "eval_mae": 0.5473751425743103, "eval_mse": 0.4954971671104431, "eval_rmse": 0.7039155960083008, "eval_runtime": 0.3698, "eval_samples_per_second": 186.61, "eval_steps_per_second": 24.34, "step": 890 }, { "epoch": 11.0, "eval_loss": 0.5200049877166748, "eval_mae": 0.5685440897941589, "eval_mse": 0.52000492811203, "eval_rmse": 0.7211136817932129, "eval_runtime": 0.4266, "eval_samples_per_second": 161.746, "eval_steps_per_second": 21.097, "step": 979 }, { "epoch": 11.24, "learning_rate": 7.752808988764046e-06, "loss": 0.128, "step": 1000 }, { "epoch": 12.0, "eval_loss": 0.43686971068382263, "eval_mae": 0.5061944723129272, "eval_mse": 0.436869740486145, "eval_rmse": 0.6609612107276917, "eval_runtime": 0.4821, "eval_samples_per_second": 143.124, "eval_steps_per_second": 18.668, "step": 1068 }, { "epoch": 13.0, "eval_loss": 0.41416943073272705, "eval_mae": 0.46771711111068726, "eval_mse": 0.41416940093040466, "eval_rmse": 0.6435599327087402, "eval_runtime": 0.4624, "eval_samples_per_second": 149.23, "eval_steps_per_second": 19.465, "step": 1157 }, { "epoch": 14.0, "eval_loss": 0.40698668360710144, "eval_mae": 0.45521026849746704, "eval_mse": 0.4069867730140686, "eval_rmse": 0.6379551291465759, "eval_runtime": 0.4757, "eval_samples_per_second": 145.044, "eval_steps_per_second": 18.919, "step": 1246 }, { "epoch": 15.0, "eval_loss": 0.49930572509765625, "eval_mae": 0.5383840799331665, "eval_mse": 0.4993056654930115, "eval_rmse": 0.7066156268119812, "eval_runtime": 0.4656, "eval_samples_per_second": 148.181, "eval_steps_per_second": 19.328, "step": 1335 }, { "epoch": 16.0, "eval_loss": 0.5578888654708862, "eval_mae": 0.585189163684845, "eval_mse": 0.5578888058662415, "eval_rmse": 0.746919572353363, "eval_runtime": 0.4755, "eval_samples_per_second": 145.098, "eval_steps_per_second": 18.926, "step": 1424 }, { "epoch": 16.85, "learning_rate": 6.629213483146067e-06, "loss": 0.0895, "step": 1500 }, { "epoch": 17.0, "eval_loss": 0.5015009045600891, "eval_mae": 0.561897337436676, "eval_mse": 0.5015009641647339, "eval_rmse": 0.708167314529419, "eval_runtime": 0.3883, "eval_samples_per_second": 177.714, "eval_steps_per_second": 23.18, "step": 1513 }, { "epoch": 18.0, "eval_loss": 0.4590393602848053, "eval_mae": 0.5263462066650391, "eval_mse": 0.45903947949409485, "eval_rmse": 0.6775245070457458, "eval_runtime": 0.4835, "eval_samples_per_second": 142.705, "eval_steps_per_second": 18.614, "step": 1602 }, { "epoch": 19.0, "eval_loss": 0.4880666434764862, "eval_mae": 0.5377508997917175, "eval_mse": 0.4880666732788086, "eval_rmse": 0.698617696762085, "eval_runtime": 0.4715, "eval_samples_per_second": 146.34, "eval_steps_per_second": 19.088, "step": 1691 }, { "epoch": 20.0, "eval_loss": 0.3925124704837799, "eval_mae": 0.46625784039497375, "eval_mse": 0.3925124406814575, "eval_rmse": 0.6265081167221069, "eval_runtime": 0.3158, "eval_samples_per_second": 218.521, "eval_steps_per_second": 28.503, "step": 1780 }, { "epoch": 21.0, "eval_loss": 0.4392476975917816, "eval_mae": 0.5062677264213562, "eval_mse": 0.4392476975917816, "eval_rmse": 0.6627576351165771, "eval_runtime": 0.4321, "eval_samples_per_second": 159.696, "eval_steps_per_second": 20.83, "step": 1869 }, { "epoch": 22.0, "eval_loss": 0.42705094814300537, "eval_mae": 0.48886218667030334, "eval_mse": 0.42705097794532776, "eval_rmse": 0.6534913778305054, "eval_runtime": 0.3907, "eval_samples_per_second": 176.595, "eval_steps_per_second": 23.034, "step": 1958 }, { "epoch": 22.47, "learning_rate": 5.50561797752809e-06, "loss": 0.0694, "step": 2000 }, { "epoch": 23.0, "eval_loss": 0.40933796763420105, "eval_mae": 0.4783601760864258, "eval_mse": 0.4093380868434906, "eval_rmse": 0.6397953629493713, "eval_runtime": 0.3371, "eval_samples_per_second": 204.706, "eval_steps_per_second": 26.701, "step": 2047 }, { "epoch": 24.0, "eval_loss": 0.42077454924583435, "eval_mae": 0.49500545859336853, "eval_mse": 0.42077454924583435, "eval_rmse": 0.6486713886260986, "eval_runtime": 0.4775, "eval_samples_per_second": 144.493, "eval_steps_per_second": 18.847, "step": 2136 }, { "epoch": 25.0, "eval_loss": 0.40650996565818787, "eval_mae": 0.4869938790798187, "eval_mse": 0.4065099358558655, "eval_rmse": 0.6375812888145447, "eval_runtime": 0.4751, "eval_samples_per_second": 145.227, "eval_steps_per_second": 18.943, "step": 2225 }, { "epoch": 26.0, "eval_loss": 0.4467940032482147, "eval_mae": 0.5186977386474609, "eval_mse": 0.44679397344589233, "eval_rmse": 0.668426513671875, "eval_runtime": 0.4523, "eval_samples_per_second": 152.552, "eval_steps_per_second": 19.898, "step": 2314 }, { "epoch": 27.0, "eval_loss": 0.448551744222641, "eval_mae": 0.5286442041397095, "eval_mse": 0.4485517740249634, "eval_rmse": 0.6697400808334351, "eval_runtime": 0.3607, "eval_samples_per_second": 191.288, "eval_steps_per_second": 24.951, "step": 2403 }, { "epoch": 28.0, "eval_loss": 0.4263148605823517, "eval_mae": 0.5110523700714111, "eval_mse": 0.4263148009777069, "eval_rmse": 0.6529278755187988, "eval_runtime": 0.4741, "eval_samples_per_second": 145.544, "eval_steps_per_second": 18.984, "step": 2492 }, { "epoch": 28.09, "learning_rate": 4.382022471910113e-06, "loss": 0.0575, "step": 2500 }, { "epoch": 29.0, "eval_loss": 0.4624464213848114, "eval_mae": 0.5141972303390503, "eval_mse": 0.462446391582489, "eval_rmse": 0.6800341010093689, "eval_runtime": 0.4737, "eval_samples_per_second": 145.658, "eval_steps_per_second": 18.999, "step": 2581 }, { "epoch": 30.0, "eval_loss": 0.4065593481063843, "eval_mae": 0.4846087396144867, "eval_mse": 0.40655940771102905, "eval_rmse": 0.6376200914382935, "eval_runtime": 0.3326, "eval_samples_per_second": 207.482, "eval_steps_per_second": 27.063, "step": 2670 }, { "epoch": 31.0, "eval_loss": 0.4373004138469696, "eval_mae": 0.5060880780220032, "eval_mse": 0.437300443649292, "eval_rmse": 0.6612869501113892, "eval_runtime": 0.4249, "eval_samples_per_second": 162.393, "eval_steps_per_second": 21.182, "step": 2759 }, { "epoch": 32.0, "eval_loss": 0.44729650020599365, "eval_mae": 0.5081753134727478, "eval_mse": 0.44729653000831604, "eval_rmse": 0.6688023209571838, "eval_runtime": 0.4601, "eval_samples_per_second": 149.977, "eval_steps_per_second": 19.562, "step": 2848 }, { "epoch": 33.0, "eval_loss": 0.43939414620399475, "eval_mae": 0.5079318881034851, "eval_mse": 0.43939417600631714, "eval_rmse": 0.6628681421279907, "eval_runtime": 0.4736, "eval_samples_per_second": 145.702, "eval_steps_per_second": 19.005, "step": 2937 }, { "epoch": 33.71, "learning_rate": 3.258426966292135e-06, "loss": 0.0532, "step": 3000 }, { "epoch": 34.0, "eval_loss": 0.4430878460407257, "eval_mae": 0.5065318942070007, "eval_mse": 0.44308778643608093, "eval_rmse": 0.6656484007835388, "eval_runtime": 0.4788, "eval_samples_per_second": 144.103, "eval_steps_per_second": 18.796, "step": 3026 }, { "epoch": 35.0, "eval_loss": 0.4311515688896179, "eval_mae": 0.5022226572036743, "eval_mse": 0.4311515688896179, "eval_rmse": 0.6566213369369507, "eval_runtime": 0.47, "eval_samples_per_second": 146.806, "eval_steps_per_second": 19.149, "step": 3115 }, { "epoch": 36.0, "eval_loss": 0.42474353313446045, "eval_mae": 0.49367982149124146, "eval_mse": 0.42474350333213806, "eval_rmse": 0.6517235040664673, "eval_runtime": 0.4548, "eval_samples_per_second": 151.706, "eval_steps_per_second": 19.788, "step": 3204 }, { "epoch": 37.0, "eval_loss": 0.4552953541278839, "eval_mae": 0.5187087655067444, "eval_mse": 0.45529526472091675, "eval_rmse": 0.6747556924819946, "eval_runtime": 0.329, "eval_samples_per_second": 209.749, "eval_steps_per_second": 27.359, "step": 3293 }, { "epoch": 38.0, "eval_loss": 0.42223644256591797, "eval_mae": 0.4933069348335266, "eval_mse": 0.4222363829612732, "eval_rmse": 0.6497972011566162, "eval_runtime": 0.3523, "eval_samples_per_second": 195.847, "eval_steps_per_second": 25.545, "step": 3382 }, { "epoch": 39.0, "eval_loss": 0.4451429545879364, "eval_mae": 0.5115242004394531, "eval_mse": 0.4451429843902588, "eval_rmse": 0.6671903729438782, "eval_runtime": 0.1851, "eval_samples_per_second": 372.87, "eval_steps_per_second": 48.635, "step": 3471 }, { "epoch": 39.33, "learning_rate": 2.1348314606741574e-06, "loss": 0.0421, "step": 3500 }, { "epoch": 40.0, "eval_loss": 0.42086702585220337, "eval_mae": 0.49490445852279663, "eval_mse": 0.4208669662475586, "eval_rmse": 0.6487426161766052, "eval_runtime": 0.3399, "eval_samples_per_second": 203.01, "eval_steps_per_second": 26.48, "step": 3560 }, { "epoch": 41.0, "eval_loss": 0.44048014283180237, "eval_mae": 0.5092083215713501, "eval_mse": 0.4404800832271576, "eval_rmse": 0.6636867523193359, "eval_runtime": 0.3606, "eval_samples_per_second": 191.355, "eval_steps_per_second": 24.959, "step": 3649 }, { "epoch": 42.0, "eval_loss": 0.41600948572158813, "eval_mae": 0.49518799781799316, "eval_mse": 0.4160095453262329, "eval_rmse": 0.6449880003929138, "eval_runtime": 0.3412, "eval_samples_per_second": 202.233, "eval_steps_per_second": 26.378, "step": 3738 }, { "epoch": 43.0, "eval_loss": 0.42693421244621277, "eval_mae": 0.5002013444900513, "eval_mse": 0.42693421244621277, "eval_rmse": 0.6534020304679871, "eval_runtime": 0.2082, "eval_samples_per_second": 331.427, "eval_steps_per_second": 43.23, "step": 3827 }, { "epoch": 44.0, "eval_loss": 0.4115591049194336, "eval_mae": 0.491961270570755, "eval_mse": 0.4115590751171112, "eval_rmse": 0.6415287256240845, "eval_runtime": 0.3428, "eval_samples_per_second": 201.27, "eval_steps_per_second": 26.253, "step": 3916 }, { "epoch": 44.94, "learning_rate": 1.01123595505618e-06, "loss": 0.0419, "step": 4000 }, { "epoch": 45.0, "eval_loss": 0.41869696974754333, "eval_mae": 0.500180721282959, "eval_mse": 0.41869693994522095, "eval_rmse": 0.6470679640769958, "eval_runtime": 0.3406, "eval_samples_per_second": 202.605, "eval_steps_per_second": 26.427, "step": 4005 }, { "epoch": 46.0, "eval_loss": 0.4199928641319275, "eval_mae": 0.504236102104187, "eval_mse": 0.41999292373657227, "eval_rmse": 0.6480686068534851, "eval_runtime": 0.3427, "eval_samples_per_second": 201.361, "eval_steps_per_second": 26.264, "step": 4094 }, { "epoch": 47.0, "eval_loss": 0.4173473119735718, "eval_mae": 0.49992823600769043, "eval_mse": 0.41734734177589417, "eval_rmse": 0.6460242867469788, "eval_runtime": 0.3094, "eval_samples_per_second": 222.982, "eval_steps_per_second": 29.085, "step": 4183 }, { "epoch": 48.0, "eval_loss": 0.41815370321273804, "eval_mae": 0.4995117783546448, "eval_mse": 0.4181537628173828, "eval_rmse": 0.646648108959198, "eval_runtime": 0.3301, "eval_samples_per_second": 209.019, "eval_steps_per_second": 27.263, "step": 4272 }, { "epoch": 49.0, "eval_loss": 0.41537874937057495, "eval_mae": 0.49763771891593933, "eval_mse": 0.41537871956825256, "eval_rmse": 0.6444988250732422, "eval_runtime": 0.3396, "eval_samples_per_second": 203.201, "eval_steps_per_second": 26.504, "step": 4361 }, { "epoch": 50.0, "eval_loss": 0.41592642664909363, "eval_mae": 0.4979737401008606, "eval_mse": 0.4159264862537384, "eval_rmse": 0.6449236273765564, "eval_runtime": 0.3167, "eval_samples_per_second": 217.87, "eval_steps_per_second": 28.418, "step": 4450 } ], "max_steps": 4450, "num_train_epochs": 50, "total_flos": 2354503087987200.0, "trial_name": null, "trial_params": null }