| { | |
| "best_global_step": 50000, | |
| "best_metric": 0.5881, | |
| "best_model_checkpoint": "models/NED/SPACCC_full_upsampled_tfidf/Meta-Llama-3-8B-Instruct/checkpoint-50000", | |
| "epoch": 3.0, | |
| "eval_steps": 2000, | |
| "global_step": 103965, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 1.771104082763195, | |
| "epoch": 0.057711729909104025, | |
| "grad_norm": 8.875, | |
| "learning_rate": 1.9227316447579353e-05, | |
| "loss": 0.807, | |
| "mean_token_accuracy": 0.8383643639683723, | |
| "num_tokens": 15534479.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.057711729909104025, | |
| "eval_entropy": 1.7064778925563795, | |
| "eval_loss": 0.5752137899398804, | |
| "eval_mean_token_accuracy": 0.8736604764762166, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 15534479.0, | |
| "eval_recall": 0.4857, | |
| "eval_runtime": 19.5541, | |
| "eval_samples_per_second": 46.435, | |
| "eval_steps_per_second": 46.435, | |
| "step": 2000 | |
| }, | |
| { | |
| "entropy": 1.7855026668310165, | |
| "epoch": 0.11542345981820805, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 2.973821470360748e-05, | |
| "loss": 0.4284, | |
| "mean_token_accuracy": 0.9000137696564198, | |
| "num_tokens": 31091376.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.11542345981820805, | |
| "eval_entropy": 1.9107416086391205, | |
| "eval_loss": 0.6141767501831055, | |
| "eval_mean_token_accuracy": 0.8813558152921924, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 31091376.0, | |
| "eval_recall": 0.5132, | |
| "eval_runtime": 18.6743, | |
| "eval_samples_per_second": 48.623, | |
| "eval_steps_per_second": 48.623, | |
| "step": 4000 | |
| }, | |
| { | |
| "entropy": 1.7454647228717803, | |
| "epoch": 0.17313518972731207, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 2.914324812089721e-05, | |
| "loss": 0.3478, | |
| "mean_token_accuracy": 0.9183464118242264, | |
| "num_tokens": 46695519.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.17313518972731207, | |
| "eval_entropy": 1.6889207995279245, | |
| "eval_loss": 0.7172139286994934, | |
| "eval_mean_token_accuracy": 0.8881947658627831, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 46695519.0, | |
| "eval_recall": 0.5551, | |
| "eval_runtime": 18.7505, | |
| "eval_samples_per_second": 48.425, | |
| "eval_steps_per_second": 48.425, | |
| "step": 6000 | |
| }, | |
| { | |
| "entropy": 1.686599359869957, | |
| "epoch": 0.2308469196364161, | |
| "grad_norm": 5.0, | |
| "learning_rate": 2.8548281538186937e-05, | |
| "loss": 0.2911, | |
| "mean_token_accuracy": 0.9297748121023178, | |
| "num_tokens": 62201311.0, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.2308469196364161, | |
| "eval_entropy": 1.765830583837589, | |
| "eval_loss": 0.6870580315589905, | |
| "eval_mean_token_accuracy": 0.8954090231184392, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 62201311.0, | |
| "eval_recall": 0.554, | |
| "eval_runtime": 18.2183, | |
| "eval_samples_per_second": 49.84, | |
| "eval_steps_per_second": 49.84, | |
| "step": 8000 | |
| }, | |
| { | |
| "entropy": 1.6513705806136132, | |
| "epoch": 0.28855864954552013, | |
| "grad_norm": 2.890625, | |
| "learning_rate": 2.795331495547667e-05, | |
| "loss": 0.2723, | |
| "mean_token_accuracy": 0.9330688781142235, | |
| "num_tokens": 77723725.0, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.28855864954552013, | |
| "eval_entropy": 1.7299244680199855, | |
| "eval_loss": 0.671196460723877, | |
| "eval_mean_token_accuracy": 0.8996821519003828, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 77723725.0, | |
| "eval_recall": 0.5837, | |
| "eval_runtime": 18.1693, | |
| "eval_samples_per_second": 49.975, | |
| "eval_steps_per_second": 49.975, | |
| "step": 10000 | |
| }, | |
| { | |
| "entropy": 1.6137347612977029, | |
| "epoch": 0.34627037945462413, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 2.7358348372766396e-05, | |
| "loss": 0.247, | |
| "mean_token_accuracy": 0.938195524007082, | |
| "num_tokens": 93268948.0, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.34627037945462413, | |
| "eval_entropy": 1.642808350631844, | |
| "eval_loss": 0.7069945335388184, | |
| "eval_mean_token_accuracy": 0.9005183333998735, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 93268948.0, | |
| "eval_recall": 0.5683, | |
| "eval_runtime": 18.2098, | |
| "eval_samples_per_second": 49.863, | |
| "eval_steps_per_second": 49.863, | |
| "step": 12000 | |
| }, | |
| { | |
| "entropy": 1.5417808018922805, | |
| "epoch": 0.4039821093637282, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 2.6763381790056127e-05, | |
| "loss": 0.2369, | |
| "mean_token_accuracy": 0.9400165711343288, | |
| "num_tokens": 108712209.0, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.4039821093637282, | |
| "eval_entropy": 1.6070995822483223, | |
| "eval_loss": 0.7336843013763428, | |
| "eval_mean_token_accuracy": 0.9035924484026064, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 108712209.0, | |
| "eval_recall": 0.5749, | |
| "eval_runtime": 18.1147, | |
| "eval_samples_per_second": 50.125, | |
| "eval_steps_per_second": 50.125, | |
| "step": 14000 | |
| }, | |
| { | |
| "entropy": 1.5307720832824707, | |
| "epoch": 0.4616938392728322, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 2.6168415207345855e-05, | |
| "loss": 0.224, | |
| "mean_token_accuracy": 0.9427571404874325, | |
| "num_tokens": 124237300.0, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.4616938392728322, | |
| "eval_entropy": 1.6007142412242386, | |
| "eval_loss": 0.723686933517456, | |
| "eval_mean_token_accuracy": 0.9036177859891878, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 124237300.0, | |
| "eval_recall": 0.5738, | |
| "eval_runtime": 18.0095, | |
| "eval_samples_per_second": 50.418, | |
| "eval_steps_per_second": 50.418, | |
| "step": 16000 | |
| }, | |
| { | |
| "entropy": 1.5197161840200424, | |
| "epoch": 0.5194055691819363, | |
| "grad_norm": 4.375, | |
| "learning_rate": 2.5573448624635583e-05, | |
| "loss": 0.2127, | |
| "mean_token_accuracy": 0.9449084457457065, | |
| "num_tokens": 139804083.0, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.5194055691819363, | |
| "eval_entropy": 1.5377285603909765, | |
| "eval_loss": 0.7464824318885803, | |
| "eval_mean_token_accuracy": 0.9009800682968505, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 139804083.0, | |
| "eval_recall": 0.5694, | |
| "eval_runtime": 17.3142, | |
| "eval_samples_per_second": 52.443, | |
| "eval_steps_per_second": 52.443, | |
| "step": 18000 | |
| }, | |
| { | |
| "entropy": 1.4599213127493857, | |
| "epoch": 0.5771172990910403, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 2.4978482041925314e-05, | |
| "loss": 0.2026, | |
| "mean_token_accuracy": 0.9471077627837657, | |
| "num_tokens": 155379953.0, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.5771172990910403, | |
| "eval_entropy": 1.475688523001608, | |
| "eval_loss": 0.762593150138855, | |
| "eval_mean_token_accuracy": 0.9033181490089399, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 155379953.0, | |
| "eval_recall": 0.5705, | |
| "eval_runtime": 17.8659, | |
| "eval_samples_per_second": 50.823, | |
| "eval_steps_per_second": 50.823, | |
| "step": 20000 | |
| }, | |
| { | |
| "entropy": 1.4277473657727242, | |
| "epoch": 0.6348290290001443, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 2.4383515459215042e-05, | |
| "loss": 0.1907, | |
| "mean_token_accuracy": 0.9498722539842128, | |
| "num_tokens": 170982670.0, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.6348290290001443, | |
| "eval_entropy": 1.5171283333837198, | |
| "eval_loss": 0.7434535026550293, | |
| "eval_mean_token_accuracy": 0.9021543205965983, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 170982670.0, | |
| "eval_recall": 0.5727, | |
| "eval_runtime": 17.4411, | |
| "eval_samples_per_second": 52.061, | |
| "eval_steps_per_second": 52.061, | |
| "step": 22000 | |
| }, | |
| { | |
| "entropy": 1.4187169399261474, | |
| "epoch": 0.6925407589092483, | |
| "grad_norm": 2.9375, | |
| "learning_rate": 2.378854887650477e-05, | |
| "loss": 0.1905, | |
| "mean_token_accuracy": 0.9493927232325077, | |
| "num_tokens": 186663193.0, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.6925407589092483, | |
| "eval_entropy": 1.4800235901503835, | |
| "eval_loss": 0.7626135945320129, | |
| "eval_mean_token_accuracy": 0.9030716799298047, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 186663193.0, | |
| "eval_recall": 0.5771, | |
| "eval_runtime": 17.6742, | |
| "eval_samples_per_second": 51.374, | |
| "eval_steps_per_second": 51.374, | |
| "step": 24000 | |
| }, | |
| { | |
| "entropy": 1.3858990859389304, | |
| "epoch": 0.7502524888183524, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 2.31935822937945e-05, | |
| "loss": 0.1847, | |
| "mean_token_accuracy": 0.9506744608581066, | |
| "num_tokens": 202382574.0, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.7502524888183524, | |
| "eval_entropy": 1.4523340987476483, | |
| "eval_loss": 0.7447758316993713, | |
| "eval_mean_token_accuracy": 0.9053517018777159, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 202382574.0, | |
| "eval_recall": 0.5837, | |
| "eval_runtime": 17.5503, | |
| "eval_samples_per_second": 51.737, | |
| "eval_steps_per_second": 51.737, | |
| "step": 26000 | |
| }, | |
| { | |
| "entropy": 1.371607663989067, | |
| "epoch": 0.8079642187274564, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 2.259861571108423e-05, | |
| "loss": 0.1747, | |
| "mean_token_accuracy": 0.9534294557571411, | |
| "num_tokens": 217894933.0, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.8079642187274564, | |
| "eval_entropy": 1.449135780728336, | |
| "eval_loss": 0.726492166519165, | |
| "eval_mean_token_accuracy": 0.9063065528607054, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 217894933.0, | |
| "eval_recall": 0.5782, | |
| "eval_runtime": 17.4046, | |
| "eval_samples_per_second": 52.17, | |
| "eval_steps_per_second": 52.17, | |
| "step": 28000 | |
| }, | |
| { | |
| "entropy": 1.3474767149090767, | |
| "epoch": 0.8656759486365604, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 2.2003649128373957e-05, | |
| "loss": 0.1662, | |
| "mean_token_accuracy": 0.9552805411219597, | |
| "num_tokens": 233442927.0, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.8656759486365604, | |
| "eval_entropy": 1.4218564205495272, | |
| "eval_loss": 0.7416213750839233, | |
| "eval_mean_token_accuracy": 0.9069001922129535, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 233442927.0, | |
| "eval_recall": 0.587, | |
| "eval_runtime": 17.6827, | |
| "eval_samples_per_second": 51.35, | |
| "eval_steps_per_second": 51.35, | |
| "step": 30000 | |
| }, | |
| { | |
| "entropy": 1.337467650592327, | |
| "epoch": 0.9233876785456644, | |
| "grad_norm": 4.375, | |
| "learning_rate": 2.1408682545663684e-05, | |
| "loss": 0.1637, | |
| "mean_token_accuracy": 0.9557446602284908, | |
| "num_tokens": 249071659.0, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.9233876785456644, | |
| "eval_entropy": 1.3889587524835234, | |
| "eval_loss": 0.7390624284744263, | |
| "eval_mean_token_accuracy": 0.9073679444876537, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 249071659.0, | |
| "eval_recall": 0.5848, | |
| "eval_runtime": 17.5032, | |
| "eval_samples_per_second": 51.876, | |
| "eval_steps_per_second": 51.876, | |
| "step": 32000 | |
| }, | |
| { | |
| "entropy": 1.3001156712770463, | |
| "epoch": 0.9810994084547684, | |
| "grad_norm": 3.890625, | |
| "learning_rate": 2.0813715962953412e-05, | |
| "loss": 0.1557, | |
| "mean_token_accuracy": 0.9573536138236522, | |
| "num_tokens": 264672169.0, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.9810994084547684, | |
| "eval_entropy": 1.4234498099214705, | |
| "eval_loss": 0.7484801411628723, | |
| "eval_mean_token_accuracy": 0.9075630426012997, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 264672169.0, | |
| "eval_recall": 0.5859, | |
| "eval_runtime": 16.2686, | |
| "eval_samples_per_second": 55.813, | |
| "eval_steps_per_second": 55.813, | |
| "step": 34000 | |
| }, | |
| { | |
| "entropy": 1.2362971892952919, | |
| "epoch": 1.0388111383638725, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 2.0218749380243143e-05, | |
| "loss": 0.1179, | |
| "mean_token_accuracy": 0.9659893708825111, | |
| "num_tokens": 280288735.0, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.0388111383638725, | |
| "eval_entropy": 1.268248316540592, | |
| "eval_loss": 0.7719414830207825, | |
| "eval_mean_token_accuracy": 0.9055645169796923, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 280288735.0, | |
| "eval_recall": 0.5771, | |
| "eval_runtime": 16.2819, | |
| "eval_samples_per_second": 55.767, | |
| "eval_steps_per_second": 55.767, | |
| "step": 36000 | |
| }, | |
| { | |
| "entropy": 1.1861163977086544, | |
| "epoch": 1.0965228682729764, | |
| "grad_norm": 2.703125, | |
| "learning_rate": 1.962378279753287e-05, | |
| "loss": 0.1001, | |
| "mean_token_accuracy": 0.9700573923885822, | |
| "num_tokens": 295889252.0, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.0965228682729764, | |
| "eval_entropy": 1.2859099207339308, | |
| "eval_loss": 0.7740228176116943, | |
| "eval_mean_token_accuracy": 0.9053704237425905, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 295889252.0, | |
| "eval_recall": 0.5815, | |
| "eval_runtime": 17.2227, | |
| "eval_samples_per_second": 52.721, | |
| "eval_steps_per_second": 52.721, | |
| "step": 38000 | |
| }, | |
| { | |
| "entropy": 1.172551353752613, | |
| "epoch": 1.1542345981820805, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 1.90288162148226e-05, | |
| "loss": 0.0999, | |
| "mean_token_accuracy": 0.9701334120929241, | |
| "num_tokens": 311460812.0, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.1542345981820805, | |
| "eval_entropy": 1.2553664291613953, | |
| "eval_loss": 0.7721803784370422, | |
| "eval_mean_token_accuracy": 0.9035136323537071, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 311460812.0, | |
| "eval_recall": 0.576, | |
| "eval_runtime": 16.7852, | |
| "eval_samples_per_second": 54.095, | |
| "eval_steps_per_second": 54.095, | |
| "step": 40000 | |
| }, | |
| { | |
| "entropy": 1.1619984501898288, | |
| "epoch": 1.2119463280911846, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 1.843384963211233e-05, | |
| "loss": 0.0965, | |
| "mean_token_accuracy": 0.9710877353549003, | |
| "num_tokens": 327135410.0, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.2119463280911846, | |
| "eval_entropy": 1.2579897131426219, | |
| "eval_loss": 0.7692885398864746, | |
| "eval_mean_token_accuracy": 0.9031982754033042, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 327135410.0, | |
| "eval_recall": 0.576, | |
| "eval_runtime": 17.2218, | |
| "eval_samples_per_second": 52.724, | |
| "eval_steps_per_second": 52.724, | |
| "step": 42000 | |
| }, | |
| { | |
| "entropy": 1.1411213338077069, | |
| "epoch": 1.2696580580002885, | |
| "grad_norm": 3.578125, | |
| "learning_rate": 1.7838883049402058e-05, | |
| "loss": 0.0944, | |
| "mean_token_accuracy": 0.9715765230953693, | |
| "num_tokens": 342774177.0, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.2696580580002885, | |
| "eval_entropy": 1.253242742319464, | |
| "eval_loss": 0.7519774436950684, | |
| "eval_mean_token_accuracy": 0.9045354708175827, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 342774177.0, | |
| "eval_recall": 0.576, | |
| "eval_runtime": 17.1793, | |
| "eval_samples_per_second": 52.854, | |
| "eval_steps_per_second": 52.854, | |
| "step": 44000 | |
| }, | |
| { | |
| "entropy": 1.1595853001475334, | |
| "epoch": 1.3273697879093926, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.724391646669179e-05, | |
| "loss": 0.0963, | |
| "mean_token_accuracy": 0.9711391851603984, | |
| "num_tokens": 358312922.0, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.3273697879093926, | |
| "eval_entropy": 1.2453804171689282, | |
| "eval_loss": 0.7676454186439514, | |
| "eval_mean_token_accuracy": 0.9064169454876547, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 358312922.0, | |
| "eval_recall": 0.5837, | |
| "eval_runtime": 16.8037, | |
| "eval_samples_per_second": 54.036, | |
| "eval_steps_per_second": 54.036, | |
| "step": 46000 | |
| }, | |
| { | |
| "entropy": 1.1609133576154709, | |
| "epoch": 1.3850815178184965, | |
| "grad_norm": 4.03125, | |
| "learning_rate": 1.6648949883981517e-05, | |
| "loss": 0.0922, | |
| "mean_token_accuracy": 0.9723608312606812, | |
| "num_tokens": 373752333.0, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.3850815178184965, | |
| "eval_entropy": 1.2345776492934921, | |
| "eval_loss": 0.7665285468101501, | |
| "eval_mean_token_accuracy": 0.9063460667687365, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 373752333.0, | |
| "eval_recall": 0.5793, | |
| "eval_runtime": 16.7578, | |
| "eval_samples_per_second": 54.184, | |
| "eval_steps_per_second": 54.184, | |
| "step": 48000 | |
| }, | |
| { | |
| "entropy": 1.1655547478497028, | |
| "epoch": 1.4427932477276006, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 1.6053983301271245e-05, | |
| "loss": 0.094, | |
| "mean_token_accuracy": 0.9717481000125409, | |
| "num_tokens": 389447345.0, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.4427932477276006, | |
| "eval_entropy": 1.2292915042407713, | |
| "eval_loss": 0.7735024094581604, | |
| "eval_mean_token_accuracy": 0.907910385517822, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 389447345.0, | |
| "eval_recall": 0.5881, | |
| "eval_runtime": 17.3697, | |
| "eval_samples_per_second": 52.275, | |
| "eval_steps_per_second": 52.275, | |
| "step": 50000 | |
| }, | |
| { | |
| "entropy": 1.1552352701127528, | |
| "epoch": 1.5005049776367048, | |
| "grad_norm": 3.0, | |
| "learning_rate": 1.5459016718560976e-05, | |
| "loss": 0.091, | |
| "mean_token_accuracy": 0.9726284679472447, | |
| "num_tokens": 404935652.0, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.5005049776367048, | |
| "eval_entropy": 1.2490241264325406, | |
| "eval_loss": 0.7779573202133179, | |
| "eval_mean_token_accuracy": 0.9046718338053132, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 404935652.0, | |
| "eval_recall": 0.5848, | |
| "eval_runtime": 17.2173, | |
| "eval_samples_per_second": 52.738, | |
| "eval_steps_per_second": 52.738, | |
| "step": 52000 | |
| }, | |
| { | |
| "entropy": 1.1580015743076801, | |
| "epoch": 1.5582167075458087, | |
| "grad_norm": 0.0019989013671875, | |
| "learning_rate": 1.4864050135850704e-05, | |
| "loss": 0.0674, | |
| "mean_token_accuracy": 0.979576114565134, | |
| "num_tokens": 15533221.0, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.5582167075458087, | |
| "eval_entropy": 1.204221866138706, | |
| "eval_loss": 0.8085830211639404, | |
| "eval_mean_token_accuracy": 0.9048162211668124, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 15533221.0, | |
| "eval_recall": 0.5738, | |
| "eval_runtime": 17.1435, | |
| "eval_samples_per_second": 52.965, | |
| "eval_steps_per_second": 52.965, | |
| "step": 54000 | |
| }, | |
| { | |
| "entropy": 1.144241349697113, | |
| "epoch": 1.6159284374549125, | |
| "grad_norm": 3.765625, | |
| "learning_rate": 1.4269083553140432e-05, | |
| "loss": 0.0633, | |
| "mean_token_accuracy": 0.9807874869704246, | |
| "num_tokens": 31150685.0, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.6159284374549125, | |
| "eval_entropy": 1.2577752770306256, | |
| "eval_loss": 0.8108322024345398, | |
| "eval_mean_token_accuracy": 0.904205797002179, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 31150685.0, | |
| "eval_recall": 0.5738, | |
| "eval_runtime": 16.4274, | |
| "eval_samples_per_second": 55.274, | |
| "eval_steps_per_second": 55.274, | |
| "step": 56000 | |
| }, | |
| { | |
| "entropy": 1.162702257514, | |
| "epoch": 1.6736401673640167, | |
| "grad_norm": 2.8125, | |
| "learning_rate": 1.3674116970430161e-05, | |
| "loss": 0.0665, | |
| "mean_token_accuracy": 0.9797295650243759, | |
| "num_tokens": 46832332.0, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.6736401673640167, | |
| "eval_entropy": 1.268515376989537, | |
| "eval_loss": 0.814584493637085, | |
| "eval_mean_token_accuracy": 0.904229478295154, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 46832332.0, | |
| "eval_recall": 0.5716, | |
| "eval_runtime": 16.4853, | |
| "eval_samples_per_second": 55.079, | |
| "eval_steps_per_second": 55.079, | |
| "step": 58000 | |
| }, | |
| { | |
| "entropy": 1.1661596206724645, | |
| "epoch": 1.7313518972731208, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 1.3079150387719889e-05, | |
| "loss": 0.0672, | |
| "mean_token_accuracy": 0.9796462517380714, | |
| "num_tokens": 62558817.0, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.7313518972731208, | |
| "eval_entropy": 1.2622852631996382, | |
| "eval_loss": 0.8227198123931885, | |
| "eval_mean_token_accuracy": 0.9038923141846048, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 62558817.0, | |
| "eval_recall": 0.5716, | |
| "eval_runtime": 16.9948, | |
| "eval_samples_per_second": 53.428, | |
| "eval_steps_per_second": 53.428, | |
| "step": 60000 | |
| }, | |
| { | |
| "entropy": 1.1797457176148891, | |
| "epoch": 1.789063627182225, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.2484183805009618e-05, | |
| "loss": 0.0657, | |
| "mean_token_accuracy": 0.980204150468111, | |
| "num_tokens": 78074806.0, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.789063627182225, | |
| "eval_entropy": 1.2418163208052975, | |
| "eval_loss": 0.8185028433799744, | |
| "eval_mean_token_accuracy": 0.9041991046740621, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 78074806.0, | |
| "eval_recall": 0.5705, | |
| "eval_runtime": 17.1144, | |
| "eval_samples_per_second": 53.055, | |
| "eval_steps_per_second": 53.055, | |
| "step": 62000 | |
| }, | |
| { | |
| "entropy": 1.1774089051187038, | |
| "epoch": 1.8467753570913288, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 1.1889217222299348e-05, | |
| "loss": 0.0649, | |
| "mean_token_accuracy": 0.9804997465908527, | |
| "num_tokens": 93602629.0, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.8467753570913288, | |
| "eval_entropy": 1.2988805646807087, | |
| "eval_loss": 0.8260899782180786, | |
| "eval_mean_token_accuracy": 0.9030656689523601, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 93602629.0, | |
| "eval_recall": 0.576, | |
| "eval_runtime": 16.1643, | |
| "eval_samples_per_second": 56.173, | |
| "eval_steps_per_second": 56.173, | |
| "step": 64000 | |
| }, | |
| { | |
| "entropy": 1.174987347126007, | |
| "epoch": 1.9044870870004327, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 1.1294250639589077e-05, | |
| "loss": 0.064, | |
| "mean_token_accuracy": 0.9806980607807636, | |
| "num_tokens": 109249414.0, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.9044870870004327, | |
| "eval_entropy": 1.2433809736489199, | |
| "eval_loss": 0.8272661566734314, | |
| "eval_mean_token_accuracy": 0.9028221254569319, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 109249414.0, | |
| "eval_recall": 0.5727, | |
| "eval_runtime": 16.3988, | |
| "eval_samples_per_second": 55.37, | |
| "eval_steps_per_second": 55.37, | |
| "step": 66000 | |
| }, | |
| { | |
| "entropy": 1.1633582679629326, | |
| "epoch": 1.9621988169095368, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 1.0699284056878807e-05, | |
| "loss": 0.0643, | |
| "mean_token_accuracy": 0.9805754337012768, | |
| "num_tokens": 124880720.0, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.9621988169095368, | |
| "eval_entropy": 1.224490842367584, | |
| "eval_loss": 0.8288715481758118, | |
| "eval_mean_token_accuracy": 0.9034351931991557, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 124880720.0, | |
| "eval_recall": 0.5738, | |
| "eval_runtime": 16.4997, | |
| "eval_samples_per_second": 55.031, | |
| "eval_steps_per_second": 55.031, | |
| "step": 68000 | |
| }, | |
| { | |
| "entropy": 1.1513627296090125, | |
| "epoch": 2.019910546818641, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.0104317474168535e-05, | |
| "loss": 0.0633, | |
| "mean_token_accuracy": 0.9811660476624966, | |
| "num_tokens": 140499220.0, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 2.019910546818641, | |
| "eval_entropy": 1.2267822175561593, | |
| "eval_loss": 0.8458257913589478, | |
| "eval_mean_token_accuracy": 0.9038964834572986, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 140499220.0, | |
| "eval_recall": 0.5705, | |
| "eval_runtime": 16.4967, | |
| "eval_samples_per_second": 55.041, | |
| "eval_steps_per_second": 55.041, | |
| "step": 70000 | |
| }, | |
| { | |
| "entropy": 1.143776093840599, | |
| "epoch": 2.077622276727745, | |
| "grad_norm": 7.9375, | |
| "learning_rate": 9.509350891458264e-06, | |
| "loss": 0.0597, | |
| "mean_token_accuracy": 0.9825106913745403, | |
| "num_tokens": 156048918.0, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 2.077622276727745, | |
| "eval_entropy": 1.2148328015195116, | |
| "eval_loss": 0.8337165713310242, | |
| "eval_mean_token_accuracy": 0.9035390550475814, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 156048918.0, | |
| "eval_recall": 0.5705, | |
| "eval_runtime": 16.506, | |
| "eval_samples_per_second": 55.01, | |
| "eval_steps_per_second": 55.01, | |
| "step": 72000 | |
| }, | |
| { | |
| "entropy": 1.1460822140574456, | |
| "epoch": 2.135334006636849, | |
| "grad_norm": 12.375, | |
| "learning_rate": 8.914384308747992e-06, | |
| "loss": 0.0596, | |
| "mean_token_accuracy": 0.98244061678648, | |
| "num_tokens": 171653895.0, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 2.135334006636849, | |
| "eval_entropy": 1.2635613490175046, | |
| "eval_loss": 0.8348618745803833, | |
| "eval_mean_token_accuracy": 0.9038379774285308, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 171653895.0, | |
| "eval_recall": 0.5694, | |
| "eval_runtime": 16.4822, | |
| "eval_samples_per_second": 55.09, | |
| "eval_steps_per_second": 55.09, | |
| "step": 74000 | |
| }, | |
| { | |
| "entropy": 1.1560133908391, | |
| "epoch": 2.193045736545953, | |
| "grad_norm": 7.625, | |
| "learning_rate": 8.319417726037721e-06, | |
| "loss": 0.06, | |
| "mean_token_accuracy": 0.9822552761137485, | |
| "num_tokens": 187228261.0, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 2.193045736545953, | |
| "eval_entropy": 1.2220293277554575, | |
| "eval_loss": 0.8315507769584656, | |
| "eval_mean_token_accuracy": 0.9036543207809263, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 187228261.0, | |
| "eval_recall": 0.5716, | |
| "eval_runtime": 16.703, | |
| "eval_samples_per_second": 54.361, | |
| "eval_steps_per_second": 54.361, | |
| "step": 76000 | |
| }, | |
| { | |
| "entropy": 1.1676500248610973, | |
| "epoch": 2.250757466455057, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 7.72445114332745e-06, | |
| "loss": 0.0611, | |
| "mean_token_accuracy": 0.9819406977891922, | |
| "num_tokens": 202699683.0, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 2.250757466455057, | |
| "eval_entropy": 1.2448954319638827, | |
| "eval_loss": 0.8309385776519775, | |
| "eval_mean_token_accuracy": 0.9030922418255113, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 202699683.0, | |
| "eval_recall": 0.5694, | |
| "eval_runtime": 16.603, | |
| "eval_samples_per_second": 54.689, | |
| "eval_steps_per_second": 54.689, | |
| "step": 78000 | |
| }, | |
| { | |
| "entropy": 1.1656713368594647, | |
| "epoch": 2.308469196364161, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 7.129484560617179e-06, | |
| "loss": 0.0618, | |
| "mean_token_accuracy": 0.9817487963140011, | |
| "num_tokens": 218284466.0, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 2.308469196364161, | |
| "eval_entropy": 1.255102663181952, | |
| "eval_loss": 0.8435425162315369, | |
| "eval_mean_token_accuracy": 0.902260869642974, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 218284466.0, | |
| "eval_recall": 0.5661, | |
| "eval_runtime": 16.8256, | |
| "eval_samples_per_second": 53.965, | |
| "eval_steps_per_second": 53.965, | |
| "step": 80000 | |
| }, | |
| { | |
| "entropy": 1.1597592905461789, | |
| "epoch": 2.366180926273265, | |
| "grad_norm": 2.5, | |
| "learning_rate": 6.534517977906908e-06, | |
| "loss": 0.0602, | |
| "mean_token_accuracy": 0.9821576415896416, | |
| "num_tokens": 233928452.0, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 2.366180926273265, | |
| "eval_entropy": 1.2422783964924875, | |
| "eval_loss": 0.8390738368034363, | |
| "eval_mean_token_accuracy": 0.9032785006950605, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 233928452.0, | |
| "eval_recall": 0.5683, | |
| "eval_runtime": 16.7577, | |
| "eval_samples_per_second": 54.184, | |
| "eval_steps_per_second": 54.184, | |
| "step": 82000 | |
| }, | |
| { | |
| "entropy": 1.17008468157053, | |
| "epoch": 2.4238926561823693, | |
| "grad_norm": 0.0400390625, | |
| "learning_rate": 5.939551395196637e-06, | |
| "loss": 0.0591, | |
| "mean_token_accuracy": 0.9825585896968841, | |
| "num_tokens": 249419664.0, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 2.4238926561823693, | |
| "eval_entropy": 1.2469606770424067, | |
| "eval_loss": 0.8383654356002808, | |
| "eval_mean_token_accuracy": 0.9040639832418921, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 249419664.0, | |
| "eval_recall": 0.5705, | |
| "eval_runtime": 16.6392, | |
| "eval_samples_per_second": 54.57, | |
| "eval_steps_per_second": 54.57, | |
| "step": 84000 | |
| }, | |
| { | |
| "entropy": 1.163529093414545, | |
| "epoch": 2.481604386091473, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 5.3445848124863655e-06, | |
| "loss": 0.0568, | |
| "mean_token_accuracy": 0.9832313210368157, | |
| "num_tokens": 264982654.0, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 2.481604386091473, | |
| "eval_entropy": 1.236849331908289, | |
| "eval_loss": 0.8381890058517456, | |
| "eval_mean_token_accuracy": 0.9027883698630438, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 264982654.0, | |
| "eval_recall": 0.5672, | |
| "eval_runtime": 16.5964, | |
| "eval_samples_per_second": 54.711, | |
| "eval_steps_per_second": 54.711, | |
| "step": 86000 | |
| }, | |
| { | |
| "entropy": 1.1701532056927682, | |
| "epoch": 2.539316116000577, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 4.749618229776094e-06, | |
| "loss": 0.0574, | |
| "mean_token_accuracy": 0.9830155865848065, | |
| "num_tokens": 280520807.0, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 2.539316116000577, | |
| "eval_entropy": 1.2492524392673097, | |
| "eval_loss": 0.839518666267395, | |
| "eval_mean_token_accuracy": 0.9025986767681685, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 280520807.0, | |
| "eval_recall": 0.5661, | |
| "eval_runtime": 16.5519, | |
| "eval_samples_per_second": 54.858, | |
| "eval_steps_per_second": 54.858, | |
| "step": 88000 | |
| }, | |
| { | |
| "entropy": 1.167941878914833, | |
| "epoch": 2.597027845909681, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 4.154651647065824e-06, | |
| "loss": 0.0602, | |
| "mean_token_accuracy": 0.9820802296400071, | |
| "num_tokens": 296146535.0, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 2.597027845909681, | |
| "eval_entropy": 1.2443812186234848, | |
| "eval_loss": 0.8395401835441589, | |
| "eval_mean_token_accuracy": 0.9034286766981764, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 296146535.0, | |
| "eval_recall": 0.5683, | |
| "eval_runtime": 16.788, | |
| "eval_samples_per_second": 54.086, | |
| "eval_steps_per_second": 54.086, | |
| "step": 90000 | |
| }, | |
| { | |
| "entropy": 1.1601335457861424, | |
| "epoch": 2.6547395758187853, | |
| "grad_norm": 4.6875, | |
| "learning_rate": 3.559685064355552e-06, | |
| "loss": 0.0584, | |
| "mean_token_accuracy": 0.9827592859268188, | |
| "num_tokens": 311778551.0, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 2.6547395758187853, | |
| "eval_entropy": 1.2437387075324415, | |
| "eval_loss": 0.836577296257019, | |
| "eval_mean_token_accuracy": 0.9039776291419231, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 311778551.0, | |
| "eval_recall": 0.5705, | |
| "eval_runtime": 16.2272, | |
| "eval_samples_per_second": 55.956, | |
| "eval_steps_per_second": 55.956, | |
| "step": 92000 | |
| }, | |
| { | |
| "entropy": 1.1733056641221047, | |
| "epoch": 2.712451305727889, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 2.964718481645281e-06, | |
| "loss": 0.0564, | |
| "mean_token_accuracy": 0.9832823853492737, | |
| "num_tokens": 327170479.0, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 2.712451305727889, | |
| "eval_entropy": 1.2440849004337966, | |
| "eval_loss": 0.8399211168289185, | |
| "eval_mean_token_accuracy": 0.9033104040155326, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 327170479.0, | |
| "eval_recall": 0.5683, | |
| "eval_runtime": 16.2233, | |
| "eval_samples_per_second": 55.969, | |
| "eval_steps_per_second": 55.969, | |
| "step": 94000 | |
| }, | |
| { | |
| "entropy": 1.1586334483027458, | |
| "epoch": 2.770163035636993, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 2.36975189893501e-06, | |
| "loss": 0.0585, | |
| "mean_token_accuracy": 0.9826480825543403, | |
| "num_tokens": 342791353.0, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 2.770163035636993, | |
| "eval_entropy": 1.2412338042180444, | |
| "eval_loss": 0.8378188610076904, | |
| "eval_mean_token_accuracy": 0.9035194405572005, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 342791353.0, | |
| "eval_recall": 0.5672, | |
| "eval_runtime": 16.2077, | |
| "eval_samples_per_second": 56.023, | |
| "eval_steps_per_second": 56.023, | |
| "step": 96000 | |
| }, | |
| { | |
| "entropy": 1.1629991734027862, | |
| "epoch": 2.827874765546097, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 1.7747853162247388e-06, | |
| "loss": 0.0608, | |
| "mean_token_accuracy": 0.9821404512822628, | |
| "num_tokens": 358436354.0, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 2.827874765546097, | |
| "eval_entropy": 1.2435034370369848, | |
| "eval_loss": 0.8380420207977295, | |
| "eval_mean_token_accuracy": 0.9037704004327631, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 358436354.0, | |
| "eval_recall": 0.5694, | |
| "eval_runtime": 16.2198, | |
| "eval_samples_per_second": 55.981, | |
| "eval_steps_per_second": 55.981, | |
| "step": 98000 | |
| }, | |
| { | |
| "entropy": 1.1640874392092229, | |
| "epoch": 2.8855864954552013, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.1798187335144677e-06, | |
| "loss": 0.0574, | |
| "mean_token_accuracy": 0.9829988768994808, | |
| "num_tokens": 374029027.0, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 2.8855864954552013, | |
| "eval_entropy": 1.2438825091207606, | |
| "eval_loss": 0.8370459079742432, | |
| "eval_mean_token_accuracy": 0.9030486140810445, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 374029027.0, | |
| "eval_recall": 0.5661, | |
| "eval_runtime": 17.3712, | |
| "eval_samples_per_second": 52.271, | |
| "eval_steps_per_second": 52.271, | |
| "step": 100000 | |
| }, | |
| { | |
| "entropy": 1.1683570961356162, | |
| "epoch": 2.9432982253643054, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 5.848521508041964e-07, | |
| "loss": 0.0583, | |
| "mean_token_accuracy": 0.982835016399622, | |
| "num_tokens": 389554889.0, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 2.9432982253643054, | |
| "eval_entropy": 1.2444104566710636, | |
| "eval_loss": 0.8376456499099731, | |
| "eval_mean_token_accuracy": 0.9037148623608282, | |
| "eval_num_gold": 908, | |
| "eval_num_guess": 908, | |
| "eval_num_tokens": 389554889.0, | |
| "eval_recall": 0.5683, | |
| "eval_runtime": 16.2408, | |
| "eval_samples_per_second": 55.909, | |
| "eval_steps_per_second": 55.909, | |
| "step": 102000 | |
| } | |
| ], | |
| "logging_steps": 2000, | |
| "max_steps": 103965, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.6466469785747587e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |