| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 1652, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.021231422505307854, | |
| "grad_norm": 6.601170763883668, | |
| "learning_rate": 9.638554216867472e-07, | |
| "loss": 0.5153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3147011399269104, | |
| "step": 5, | |
| "valid_targets_mean": 14395.1, | |
| "valid_targets_min": 6135 | |
| }, | |
| { | |
| "epoch": 0.04246284501061571, | |
| "grad_norm": 4.640678277810576, | |
| "learning_rate": 2.168674698795181e-06, | |
| "loss": 0.4921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18654188513755798, | |
| "step": 10, | |
| "valid_targets_mean": 12533.2, | |
| "valid_targets_min": 4349 | |
| }, | |
| { | |
| "epoch": 0.06369426751592357, | |
| "grad_norm": 2.8761928107591204, | |
| "learning_rate": 3.3734939759036146e-06, | |
| "loss": 0.4234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22491103410720825, | |
| "step": 15, | |
| "valid_targets_mean": 13778.4, | |
| "valid_targets_min": 6404 | |
| }, | |
| { | |
| "epoch": 0.08492569002123142, | |
| "grad_norm": 2.0507756278474343, | |
| "learning_rate": 4.578313253012049e-06, | |
| "loss": 0.4198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2335529774427414, | |
| "step": 20, | |
| "valid_targets_mean": 13837.5, | |
| "valid_targets_min": 3655 | |
| }, | |
| { | |
| "epoch": 0.10615711252653928, | |
| "grad_norm": 1.4535039164298584, | |
| "learning_rate": 5.783132530120482e-06, | |
| "loss": 0.4121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17189857363700867, | |
| "step": 25, | |
| "valid_targets_mean": 12292.6, | |
| "valid_targets_min": 6259 | |
| }, | |
| { | |
| "epoch": 0.12738853503184713, | |
| "grad_norm": 0.7914310934537628, | |
| "learning_rate": 6.987951807228917e-06, | |
| "loss": 0.3847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24835364520549774, | |
| "step": 30, | |
| "valid_targets_mean": 14466.2, | |
| "valid_targets_min": 9464 | |
| }, | |
| { | |
| "epoch": 0.14861995753715498, | |
| "grad_norm": 0.5293723876639078, | |
| "learning_rate": 8.19277108433735e-06, | |
| "loss": 0.3696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21188116073608398, | |
| "step": 35, | |
| "valid_targets_mean": 14101.8, | |
| "valid_targets_min": 4063 | |
| }, | |
| { | |
| "epoch": 0.16985138004246284, | |
| "grad_norm": 0.5249820883384718, | |
| "learning_rate": 9.397590361445785e-06, | |
| "loss": 0.3606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17040115594863892, | |
| "step": 40, | |
| "valid_targets_mean": 14958.1, | |
| "valid_targets_min": 6110 | |
| }, | |
| { | |
| "epoch": 0.1910828025477707, | |
| "grad_norm": 0.4219272827083708, | |
| "learning_rate": 1.0602409638554219e-05, | |
| "loss": 0.3188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16639268398284912, | |
| "step": 45, | |
| "valid_targets_mean": 12699.5, | |
| "valid_targets_min": 5064 | |
| }, | |
| { | |
| "epoch": 0.21231422505307856, | |
| "grad_norm": 0.38858708893114424, | |
| "learning_rate": 1.1807228915662651e-05, | |
| "loss": 0.3358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18469196557998657, | |
| "step": 50, | |
| "valid_targets_mean": 12852.6, | |
| "valid_targets_min": 4287 | |
| }, | |
| { | |
| "epoch": 0.23354564755838642, | |
| "grad_norm": 0.3799894773689016, | |
| "learning_rate": 1.3012048192771085e-05, | |
| "loss": 0.3134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20896856486797333, | |
| "step": 55, | |
| "valid_targets_mean": 15256.2, | |
| "valid_targets_min": 5458 | |
| }, | |
| { | |
| "epoch": 0.25477707006369427, | |
| "grad_norm": 0.319379398181401, | |
| "learning_rate": 1.4216867469879519e-05, | |
| "loss": 0.2825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09708239883184433, | |
| "step": 60, | |
| "valid_targets_mean": 14549.4, | |
| "valid_targets_min": 6224 | |
| }, | |
| { | |
| "epoch": 0.2760084925690021, | |
| "grad_norm": 0.2771017285361385, | |
| "learning_rate": 1.5421686746987955e-05, | |
| "loss": 0.269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16080784797668457, | |
| "step": 65, | |
| "valid_targets_mean": 15810.6, | |
| "valid_targets_min": 8701 | |
| }, | |
| { | |
| "epoch": 0.29723991507430997, | |
| "grad_norm": 0.290052395391973, | |
| "learning_rate": 1.6626506024096387e-05, | |
| "loss": 0.2876, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14474868774414062, | |
| "step": 70, | |
| "valid_targets_mean": 13943.5, | |
| "valid_targets_min": 8170 | |
| }, | |
| { | |
| "epoch": 0.3184713375796178, | |
| "grad_norm": 0.2887366598094193, | |
| "learning_rate": 1.783132530120482e-05, | |
| "loss": 0.3045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20289857685565948, | |
| "step": 75, | |
| "valid_targets_mean": 19152.2, | |
| "valid_targets_min": 8288 | |
| }, | |
| { | |
| "epoch": 0.33970276008492567, | |
| "grad_norm": 0.24501889780498082, | |
| "learning_rate": 1.9036144578313255e-05, | |
| "loss": 0.2576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14412395656108856, | |
| "step": 80, | |
| "valid_targets_mean": 16776.8, | |
| "valid_targets_min": 6292 | |
| }, | |
| { | |
| "epoch": 0.3609341825902335, | |
| "grad_norm": 0.31757555588060904, | |
| "learning_rate": 2.0240963855421687e-05, | |
| "loss": 0.2864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15807807445526123, | |
| "step": 85, | |
| "valid_targets_mean": 11288.1, | |
| "valid_targets_min": 5740 | |
| }, | |
| { | |
| "epoch": 0.3821656050955414, | |
| "grad_norm": 0.21005685787572534, | |
| "learning_rate": 2.1445783132530123e-05, | |
| "loss": 0.2537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08626846969127655, | |
| "step": 90, | |
| "valid_targets_mean": 15819.5, | |
| "valid_targets_min": 7497 | |
| }, | |
| { | |
| "epoch": 0.4033970276008493, | |
| "grad_norm": 0.2942954097586065, | |
| "learning_rate": 2.265060240963856e-05, | |
| "loss": 0.2792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14525042474269867, | |
| "step": 95, | |
| "valid_targets_mean": 14950.8, | |
| "valid_targets_min": 7206 | |
| }, | |
| { | |
| "epoch": 0.42462845010615713, | |
| "grad_norm": 0.297024377935559, | |
| "learning_rate": 2.3855421686746988e-05, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1313462257385254, | |
| "step": 100, | |
| "valid_targets_mean": 12276.5, | |
| "valid_targets_min": 4481 | |
| }, | |
| { | |
| "epoch": 0.445859872611465, | |
| "grad_norm": 0.3083155449002965, | |
| "learning_rate": 2.5060240963855423e-05, | |
| "loss": 0.2674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13870102167129517, | |
| "step": 105, | |
| "valid_targets_mean": 8789.2, | |
| "valid_targets_min": 4863 | |
| }, | |
| { | |
| "epoch": 0.46709129511677283, | |
| "grad_norm": 0.22405694574565216, | |
| "learning_rate": 2.6265060240963856e-05, | |
| "loss": 0.2563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11005185544490814, | |
| "step": 110, | |
| "valid_targets_mean": 19546.9, | |
| "valid_targets_min": 9253 | |
| }, | |
| { | |
| "epoch": 0.4883227176220807, | |
| "grad_norm": 0.32976545776333965, | |
| "learning_rate": 2.746987951807229e-05, | |
| "loss": 0.2934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1696811020374298, | |
| "step": 115, | |
| "valid_targets_mean": 11419.8, | |
| "valid_targets_min": 4938 | |
| }, | |
| { | |
| "epoch": 0.5095541401273885, | |
| "grad_norm": 0.26726017461928253, | |
| "learning_rate": 2.8674698795180727e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0898926854133606, | |
| "step": 120, | |
| "valid_targets_mean": 14191.1, | |
| "valid_targets_min": 3141 | |
| }, | |
| { | |
| "epoch": 0.5307855626326964, | |
| "grad_norm": 0.26693132304388195, | |
| "learning_rate": 2.9879518072289156e-05, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10769972205162048, | |
| "step": 125, | |
| "valid_targets_mean": 15435.2, | |
| "valid_targets_min": 5449 | |
| }, | |
| { | |
| "epoch": 0.5520169851380042, | |
| "grad_norm": 0.29793681607777117, | |
| "learning_rate": 3.108433734939759e-05, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13548800349235535, | |
| "step": 130, | |
| "valid_targets_mean": 12220.0, | |
| "valid_targets_min": 4963 | |
| }, | |
| { | |
| "epoch": 0.5732484076433121, | |
| "grad_norm": 0.2843851218180784, | |
| "learning_rate": 3.228915662650603e-05, | |
| "loss": 0.2449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08166906237602234, | |
| "step": 135, | |
| "valid_targets_mean": 16374.8, | |
| "valid_targets_min": 7174 | |
| }, | |
| { | |
| "epoch": 0.5944798301486199, | |
| "grad_norm": 0.2537876339931588, | |
| "learning_rate": 3.3493975903614457e-05, | |
| "loss": 0.2345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11751864105463028, | |
| "step": 140, | |
| "valid_targets_mean": 10767.6, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 0.6157112526539278, | |
| "grad_norm": 0.26983132136557225, | |
| "learning_rate": 3.4698795180722896e-05, | |
| "loss": 0.2754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10696007311344147, | |
| "step": 145, | |
| "valid_targets_mean": 15567.0, | |
| "valid_targets_min": 3478 | |
| }, | |
| { | |
| "epoch": 0.6369426751592356, | |
| "grad_norm": 0.2874455255323601, | |
| "learning_rate": 3.590361445783133e-05, | |
| "loss": 0.2422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10694894194602966, | |
| "step": 150, | |
| "valid_targets_mean": 14445.2, | |
| "valid_targets_min": 5102 | |
| }, | |
| { | |
| "epoch": 0.6581740976645435, | |
| "grad_norm": 0.36018854059753647, | |
| "learning_rate": 3.710843373493976e-05, | |
| "loss": 0.2485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16881927847862244, | |
| "step": 155, | |
| "valid_targets_mean": 10835.9, | |
| "valid_targets_min": 1377 | |
| }, | |
| { | |
| "epoch": 0.6794055201698513, | |
| "grad_norm": 0.3128091862915849, | |
| "learning_rate": 3.83132530120482e-05, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10742859542369843, | |
| "step": 160, | |
| "valid_targets_mean": 12899.6, | |
| "valid_targets_min": 8041 | |
| }, | |
| { | |
| "epoch": 0.7006369426751592, | |
| "grad_norm": 0.3176035358807374, | |
| "learning_rate": 3.9518072289156625e-05, | |
| "loss": 0.28, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10753054916858673, | |
| "step": 165, | |
| "valid_targets_mean": 14364.8, | |
| "valid_targets_min": 2986 | |
| }, | |
| { | |
| "epoch": 0.721868365180467, | |
| "grad_norm": 0.27568705715038133, | |
| "learning_rate": 3.9999597743398453e-05, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1222403347492218, | |
| "step": 170, | |
| "valid_targets_mean": 15836.6, | |
| "valid_targets_min": 8080 | |
| }, | |
| { | |
| "epoch": 0.7430997876857749, | |
| "grad_norm": 0.3595846231890364, | |
| "learning_rate": 3.999713956720898e-05, | |
| "loss": 0.2501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14652863144874573, | |
| "step": 175, | |
| "valid_targets_mean": 17202.6, | |
| "valid_targets_min": 9673 | |
| }, | |
| { | |
| "epoch": 0.7643312101910829, | |
| "grad_norm": 0.35289985950885067, | |
| "learning_rate": 3.9992446965056756e-05, | |
| "loss": 0.2561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16108444333076477, | |
| "step": 180, | |
| "valid_targets_mean": 14388.9, | |
| "valid_targets_min": 4858 | |
| }, | |
| { | |
| "epoch": 0.7855626326963907, | |
| "grad_norm": 0.33155413679799534, | |
| "learning_rate": 3.998552046128038e-05, | |
| "loss": 0.2604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18604812026023865, | |
| "step": 185, | |
| "valid_targets_mean": 17653.5, | |
| "valid_targets_min": 12562 | |
| }, | |
| { | |
| "epoch": 0.8067940552016986, | |
| "grad_norm": 0.26974442765891954, | |
| "learning_rate": 3.997636082982853e-05, | |
| "loss": 0.2231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11307717114686966, | |
| "step": 190, | |
| "valid_targets_mean": 14695.4, | |
| "valid_targets_min": 1868 | |
| }, | |
| { | |
| "epoch": 0.8280254777070064, | |
| "grad_norm": 0.4484022169366936, | |
| "learning_rate": 3.9964969094173506e-05, | |
| "loss": 0.2456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13788184523582458, | |
| "step": 195, | |
| "valid_targets_mean": 11258.2, | |
| "valid_targets_min": 5920 | |
| }, | |
| { | |
| "epoch": 0.8492569002123143, | |
| "grad_norm": 0.31884550186901484, | |
| "learning_rate": 3.995134652719684e-05, | |
| "loss": 0.2378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17065517604351044, | |
| "step": 200, | |
| "valid_targets_mean": 15263.8, | |
| "valid_targets_min": 7771 | |
| }, | |
| { | |
| "epoch": 0.8704883227176221, | |
| "grad_norm": 0.2912088961412526, | |
| "learning_rate": 3.993549465104712e-05, | |
| "loss": 0.212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1016058549284935, | |
| "step": 205, | |
| "valid_targets_mean": 11488.8, | |
| "valid_targets_min": 2254 | |
| }, | |
| { | |
| "epoch": 0.89171974522293, | |
| "grad_norm": 1.2538232123110695, | |
| "learning_rate": 3.991741523696984e-05, | |
| "loss": 0.2266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0836310163140297, | |
| "step": 210, | |
| "valid_targets_mean": 18902.0, | |
| "valid_targets_min": 7919 | |
| }, | |
| { | |
| "epoch": 0.9129511677282378, | |
| "grad_norm": 0.3347545564566702, | |
| "learning_rate": 3.989711030510954e-05, | |
| "loss": 0.2398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12700827419757843, | |
| "step": 215, | |
| "valid_targets_mean": 11326.8, | |
| "valid_targets_min": 5084 | |
| }, | |
| { | |
| "epoch": 0.9341825902335457, | |
| "grad_norm": 0.2971192130089368, | |
| "learning_rate": 3.987458212428406e-05, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1275290548801422, | |
| "step": 220, | |
| "valid_targets_mean": 14325.8, | |
| "valid_targets_min": 5076 | |
| }, | |
| { | |
| "epoch": 0.9554140127388535, | |
| "grad_norm": 0.33426267528626336, | |
| "learning_rate": 3.984983321173101e-05, | |
| "loss": 0.2403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15455971658229828, | |
| "step": 225, | |
| "valid_targets_mean": 15615.0, | |
| "valid_targets_min": 6466 | |
| }, | |
| { | |
| "epoch": 0.9766454352441614, | |
| "grad_norm": 0.3247808671944862, | |
| "learning_rate": 3.9822866332826555e-05, | |
| "loss": 0.2246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11736075580120087, | |
| "step": 230, | |
| "valid_targets_mean": 17059.8, | |
| "valid_targets_min": 7224 | |
| }, | |
| { | |
| "epoch": 0.9978768577494692, | |
| "grad_norm": 0.2812386799146817, | |
| "learning_rate": 3.9793684500776356e-05, | |
| "loss": 0.2331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1330154538154602, | |
| "step": 235, | |
| "valid_targets_mean": 13021.2, | |
| "valid_targets_min": 5174 | |
| }, | |
| { | |
| "epoch": 1.0169851380042463, | |
| "grad_norm": 0.2755139866589168, | |
| "learning_rate": 3.976229097627892e-05, | |
| "loss": 0.2037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10062160342931747, | |
| "step": 240, | |
| "valid_targets_mean": 15395.4, | |
| "valid_targets_min": 7250 | |
| }, | |
| { | |
| "epoch": 1.0382165605095541, | |
| "grad_norm": 0.30337736123776227, | |
| "learning_rate": 3.972868926716127e-05, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08376499265432358, | |
| "step": 245, | |
| "valid_targets_mean": 17409.0, | |
| "valid_targets_min": 7023 | |
| }, | |
| { | |
| "epoch": 1.059447983014862, | |
| "grad_norm": 0.2915807595114085, | |
| "learning_rate": 3.969288312798693e-05, | |
| "loss": 0.2454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11872377246618271, | |
| "step": 250, | |
| "valid_targets_mean": 14989.5, | |
| "valid_targets_min": 6930 | |
| }, | |
| { | |
| "epoch": 1.0806794055201698, | |
| "grad_norm": 0.27726744741730325, | |
| "learning_rate": 3.965487655963647e-05, | |
| "loss": 0.2257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11883805692195892, | |
| "step": 255, | |
| "valid_targets_mean": 13760.0, | |
| "valid_targets_min": 5931 | |
| }, | |
| { | |
| "epoch": 1.1019108280254777, | |
| "grad_norm": 0.29991341277236133, | |
| "learning_rate": 3.961467380886042e-05, | |
| "loss": 0.2189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08793854713439941, | |
| "step": 260, | |
| "valid_targets_mean": 11854.8, | |
| "valid_targets_min": 7223 | |
| }, | |
| { | |
| "epoch": 1.1231422505307855, | |
| "grad_norm": 0.2811615074598358, | |
| "learning_rate": 3.957227936780476e-05, | |
| "loss": 0.2266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11964725703001022, | |
| "step": 265, | |
| "valid_targets_mean": 13769.2, | |
| "valid_targets_min": 7965 | |
| }, | |
| { | |
| "epoch": 1.1443736730360934, | |
| "grad_norm": 0.2810663280814256, | |
| "learning_rate": 3.952769797350899e-05, | |
| "loss": 0.2161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.109311044216156, | |
| "step": 270, | |
| "valid_targets_mean": 14035.2, | |
| "valid_targets_min": 6319 | |
| }, | |
| { | |
| "epoch": 1.1656050955414012, | |
| "grad_norm": 0.2518522428935926, | |
| "learning_rate": 3.948093460737679e-05, | |
| "loss": 0.1904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10714008659124374, | |
| "step": 275, | |
| "valid_targets_mean": 15836.6, | |
| "valid_targets_min": 7593 | |
| }, | |
| { | |
| "epoch": 1.186836518046709, | |
| "grad_norm": 0.265837889297658, | |
| "learning_rate": 3.943199449461944e-05, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11197318136692047, | |
| "step": 280, | |
| "valid_targets_mean": 15728.6, | |
| "valid_targets_min": 8141 | |
| }, | |
| { | |
| "epoch": 1.208067940552017, | |
| "grad_norm": 0.28084898004784403, | |
| "learning_rate": 3.938088310367199e-05, | |
| "loss": 0.2234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08627848327159882, | |
| "step": 285, | |
| "valid_targets_mean": 15427.1, | |
| "valid_targets_min": 6842 | |
| }, | |
| { | |
| "epoch": 1.2292993630573248, | |
| "grad_norm": 0.27746934501090276, | |
| "learning_rate": 3.932760614558218e-05, | |
| "loss": 0.2209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12175991386175156, | |
| "step": 290, | |
| "valid_targets_mean": 15384.0, | |
| "valid_targets_min": 8328 | |
| }, | |
| { | |
| "epoch": 1.2505307855626326, | |
| "grad_norm": 0.29525237911812746, | |
| "learning_rate": 3.9272169573372345e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10661651194095612, | |
| "step": 295, | |
| "valid_targets_mean": 12726.1, | |
| "valid_targets_min": 4463 | |
| }, | |
| { | |
| "epoch": 1.2717622080679405, | |
| "grad_norm": 0.3428937157148055, | |
| "learning_rate": 3.921457958137421e-05, | |
| "loss": 0.2649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12638840079307556, | |
| "step": 300, | |
| "valid_targets_mean": 13763.4, | |
| "valid_targets_min": 6285 | |
| }, | |
| { | |
| "epoch": 1.2929936305732483, | |
| "grad_norm": 0.3714748650604558, | |
| "learning_rate": 3.915484260453679e-05, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12943370640277863, | |
| "step": 305, | |
| "valid_targets_mean": 11842.8, | |
| "valid_targets_min": 2224 | |
| }, | |
| { | |
| "epoch": 1.3142250530785562, | |
| "grad_norm": 0.310114967686842, | |
| "learning_rate": 3.909296531770732e-05, | |
| "loss": 0.2319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1138845905661583, | |
| "step": 310, | |
| "valid_targets_mean": 12274.8, | |
| "valid_targets_min": 5680 | |
| }, | |
| { | |
| "epoch": 1.335456475583864, | |
| "grad_norm": 0.27182436922475456, | |
| "learning_rate": 3.902895463488547e-05, | |
| "loss": 0.2209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10476358979940414, | |
| "step": 315, | |
| "valid_targets_mean": 14856.8, | |
| "valid_targets_min": 5796 | |
| }, | |
| { | |
| "epoch": 1.356687898089172, | |
| "grad_norm": 0.31748294870719895, | |
| "learning_rate": 3.896281770845076e-05, | |
| "loss": 0.2109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10224044322967529, | |
| "step": 320, | |
| "valid_targets_mean": 10133.5, | |
| "valid_targets_min": 5714 | |
| }, | |
| { | |
| "epoch": 1.3779193205944797, | |
| "grad_norm": 0.3826976146252936, | |
| "learning_rate": 3.8894561928363396e-05, | |
| "loss": 0.2003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11495161056518555, | |
| "step": 325, | |
| "valid_targets_mean": 15048.9, | |
| "valid_targets_min": 5342 | |
| }, | |
| { | |
| "epoch": 1.3991507430997876, | |
| "grad_norm": 0.27319461570158804, | |
| "learning_rate": 3.8824194921338516e-05, | |
| "loss": 0.2091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08650592714548111, | |
| "step": 330, | |
| "valid_targets_mean": 15681.1, | |
| "valid_targets_min": 7221 | |
| }, | |
| { | |
| "epoch": 1.4203821656050954, | |
| "grad_norm": 0.2792309770737464, | |
| "learning_rate": 3.875172454999402e-05, | |
| "loss": 0.2081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10807353258132935, | |
| "step": 335, | |
| "valid_targets_mean": 13810.6, | |
| "valid_targets_min": 4893 | |
| }, | |
| { | |
| "epoch": 1.4416135881104033, | |
| "grad_norm": 0.3182023284508512, | |
| "learning_rate": 3.8677158911972e-05, | |
| "loss": 0.2073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14491504430770874, | |
| "step": 340, | |
| "valid_targets_mean": 12870.5, | |
| "valid_targets_min": 2474 | |
| }, | |
| { | |
| "epoch": 1.4628450106157111, | |
| "grad_norm": 0.2729629600919183, | |
| "learning_rate": 3.860050633903395e-05, | |
| "loss": 0.2098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13041123747825623, | |
| "step": 345, | |
| "valid_targets_mean": 14308.1, | |
| "valid_targets_min": 7717 | |
| }, | |
| { | |
| "epoch": 1.484076433121019, | |
| "grad_norm": 0.31582590679097317, | |
| "learning_rate": 3.8521775396129824e-05, | |
| "loss": 0.2233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13939642906188965, | |
| "step": 350, | |
| "valid_targets_mean": 15401.0, | |
| "valid_targets_min": 7556 | |
| }, | |
| { | |
| "epoch": 1.5053078556263269, | |
| "grad_norm": 0.30106861470253815, | |
| "learning_rate": 3.8440974880440925e-05, | |
| "loss": 0.2165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10552646219730377, | |
| "step": 355, | |
| "valid_targets_mean": 15419.1, | |
| "valid_targets_min": 3604 | |
| }, | |
| { | |
| "epoch": 1.5265392781316347, | |
| "grad_norm": 0.24666022628823678, | |
| "learning_rate": 3.835811382039703e-05, | |
| "loss": 0.2098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09952588379383087, | |
| "step": 360, | |
| "valid_targets_mean": 13430.5, | |
| "valid_targets_min": 6392 | |
| }, | |
| { | |
| "epoch": 1.5477707006369426, | |
| "grad_norm": 0.23263039306298894, | |
| "learning_rate": 3.827320147466752e-05, | |
| "loss": 0.2201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10834848880767822, | |
| "step": 365, | |
| "valid_targets_mean": 18282.2, | |
| "valid_targets_min": 10348 | |
| }, | |
| { | |
| "epoch": 1.5690021231422504, | |
| "grad_norm": 0.2763418285871218, | |
| "learning_rate": 3.818624733112687e-05, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09543554484844208, | |
| "step": 370, | |
| "valid_targets_mean": 15127.2, | |
| "valid_targets_min": 4569 | |
| }, | |
| { | |
| "epoch": 1.5902335456475583, | |
| "grad_norm": 0.2344608159731727, | |
| "learning_rate": 3.809726110579446e-05, | |
| "loss": 0.1932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10267725586891174, | |
| "step": 375, | |
| "valid_targets_mean": 18113.5, | |
| "valid_targets_min": 5644 | |
| }, | |
| { | |
| "epoch": 1.611464968152866, | |
| "grad_norm": 0.40210264521938344, | |
| "learning_rate": 3.8006252741748986e-05, | |
| "loss": 0.227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11398470401763916, | |
| "step": 380, | |
| "valid_targets_mean": 13665.8, | |
| "valid_targets_min": 1344 | |
| }, | |
| { | |
| "epoch": 1.632696390658174, | |
| "grad_norm": 0.30513744618147365, | |
| "learning_rate": 3.79132324080174e-05, | |
| "loss": 0.2166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14099963009357452, | |
| "step": 385, | |
| "valid_targets_mean": 14852.2, | |
| "valid_targets_min": 3364 | |
| }, | |
| { | |
| "epoch": 1.6539278131634818, | |
| "grad_norm": 0.22426713356424843, | |
| "learning_rate": 3.781821049843869e-05, | |
| "loss": 0.1943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07226351648569107, | |
| "step": 390, | |
| "valid_targets_mean": 18799.4, | |
| "valid_targets_min": 10477 | |
| }, | |
| { | |
| "epoch": 1.6751592356687897, | |
| "grad_norm": 0.2739515977898349, | |
| "learning_rate": 3.7721197630502485e-05, | |
| "loss": 0.2147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11230744421482086, | |
| "step": 395, | |
| "valid_targets_mean": 14162.9, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 1.6963906581740975, | |
| "grad_norm": 0.2920300807449177, | |
| "learning_rate": 3.762220464416266e-05, | |
| "loss": 0.2095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1347799301147461, | |
| "step": 400, | |
| "valid_targets_mean": 17425.0, | |
| "valid_targets_min": 12687 | |
| }, | |
| { | |
| "epoch": 1.7176220806794054, | |
| "grad_norm": 0.37395274633289044, | |
| "learning_rate": 3.7521242600626154e-05, | |
| "loss": 0.1993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09538485109806061, | |
| "step": 405, | |
| "valid_targets_mean": 15063.0, | |
| "valid_targets_min": 7612 | |
| }, | |
| { | |
| "epoch": 1.7388535031847132, | |
| "grad_norm": 0.330636894297862, | |
| "learning_rate": 3.7418322781117e-05, | |
| "loss": 0.2471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13609623908996582, | |
| "step": 410, | |
| "valid_targets_mean": 12677.8, | |
| "valid_targets_min": 5740 | |
| }, | |
| { | |
| "epoch": 1.7600849256900213, | |
| "grad_norm": 0.28342681236643996, | |
| "learning_rate": 3.731345668561577e-05, | |
| "loss": 0.2065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0960487648844719, | |
| "step": 415, | |
| "valid_targets_mean": 12748.8, | |
| "valid_targets_min": 7296 | |
| }, | |
| { | |
| "epoch": 1.7813163481953291, | |
| "grad_norm": 0.23683106588934166, | |
| "learning_rate": 3.720665603157464e-05, | |
| "loss": 0.1736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08303853869438171, | |
| "step": 420, | |
| "valid_targets_mean": 15981.0, | |
| "valid_targets_min": 7057 | |
| }, | |
| { | |
| "epoch": 1.802547770700637, | |
| "grad_norm": 0.316249617780758, | |
| "learning_rate": 3.7097932752608096e-05, | |
| "loss": 0.219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1166648119688034, | |
| "step": 425, | |
| "valid_targets_mean": 16572.9, | |
| "valid_targets_min": 11308 | |
| }, | |
| { | |
| "epoch": 1.8237791932059448, | |
| "grad_norm": 0.25450442249313526, | |
| "learning_rate": 3.698729899715947e-05, | |
| "loss": 0.1986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11825818568468094, | |
| "step": 430, | |
| "valid_targets_mean": 16501.6, | |
| "valid_targets_min": 5425 | |
| }, | |
| { | |
| "epoch": 1.8450106157112527, | |
| "grad_norm": 0.2562691834652498, | |
| "learning_rate": 3.687476712714358e-05, | |
| "loss": 0.2078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08408856391906738, | |
| "step": 435, | |
| "valid_targets_mean": 13867.4, | |
| "valid_targets_min": 5665 | |
| }, | |
| { | |
| "epoch": 1.8662420382165605, | |
| "grad_norm": 0.26720306087521367, | |
| "learning_rate": 3.676034971656537e-05, | |
| "loss": 0.1934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1064402386546135, | |
| "step": 440, | |
| "valid_targets_mean": 14102.1, | |
| "valid_targets_min": 6476 | |
| }, | |
| { | |
| "epoch": 1.8874734607218684, | |
| "grad_norm": 0.21974899342329504, | |
| "learning_rate": 3.664405955011498e-05, | |
| "loss": 0.1917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08773387968540192, | |
| "step": 445, | |
| "valid_targets_mean": 17870.5, | |
| "valid_targets_min": 9456 | |
| }, | |
| { | |
| "epoch": 1.9087048832271762, | |
| "grad_norm": 0.30795940792172116, | |
| "learning_rate": 3.652590962173917e-05, | |
| "loss": 0.2353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14995551109313965, | |
| "step": 450, | |
| "valid_targets_mean": 15358.0, | |
| "valid_targets_min": 8778 | |
| }, | |
| { | |
| "epoch": 1.929936305732484, | |
| "grad_norm": 0.27504189449803096, | |
| "learning_rate": 3.640591313318944e-05, | |
| "loss": 0.2091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10541120916604996, | |
| "step": 455, | |
| "valid_targets_mean": 15389.8, | |
| "valid_targets_min": 5133 | |
| }, | |
| { | |
| "epoch": 1.951167728237792, | |
| "grad_norm": 0.29564860036710783, | |
| "learning_rate": 3.628408349254693e-05, | |
| "loss": 0.202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10049857199192047, | |
| "step": 460, | |
| "valid_targets_mean": 14645.1, | |
| "valid_targets_min": 6715 | |
| }, | |
| { | |
| "epoch": 1.9723991507430998, | |
| "grad_norm": 0.25475277572273397, | |
| "learning_rate": 3.616043431272417e-05, | |
| "loss": 0.2048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07846721261739731, | |
| "step": 465, | |
| "valid_targets_mean": 16528.9, | |
| "valid_targets_min": 6680 | |
| }, | |
| { | |
| "epoch": 1.9936305732484076, | |
| "grad_norm": 0.26142683635944247, | |
| "learning_rate": 3.603497940994407e-05, | |
| "loss": 0.2125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09435806423425674, | |
| "step": 470, | |
| "valid_targets_mean": 16962.5, | |
| "valid_targets_min": 8108 | |
| }, | |
| { | |
| "epoch": 2.0127388535031847, | |
| "grad_norm": 0.24722160377441987, | |
| "learning_rate": 3.59077328021961e-05, | |
| "loss": 0.1976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08899471163749695, | |
| "step": 475, | |
| "valid_targets_mean": 12891.4, | |
| "valid_targets_min": 9088 | |
| }, | |
| { | |
| "epoch": 2.0339702760084926, | |
| "grad_norm": 0.2517941803747066, | |
| "learning_rate": 3.577870870766997e-05, | |
| "loss": 0.2029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09349747002124786, | |
| "step": 480, | |
| "valid_targets_mean": 13435.2, | |
| "valid_targets_min": 6613 | |
| }, | |
| { | |
| "epoch": 2.0552016985138004, | |
| "grad_norm": 0.2758026420024873, | |
| "learning_rate": 3.5647921543166923e-05, | |
| "loss": 0.1978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07516678422689438, | |
| "step": 485, | |
| "valid_targets_mean": 16082.9, | |
| "valid_targets_min": 5458 | |
| }, | |
| { | |
| "epoch": 2.0764331210191083, | |
| "grad_norm": 0.24371704180908865, | |
| "learning_rate": 3.5515385922488846e-05, | |
| "loss": 0.1965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08999522030353546, | |
| "step": 490, | |
| "valid_targets_mean": 16669.6, | |
| "valid_targets_min": 6356 | |
| }, | |
| { | |
| "epoch": 2.097664543524416, | |
| "grad_norm": 0.278212723013607, | |
| "learning_rate": 3.5381116654805375e-05, | |
| "loss": 0.1878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07290489971637726, | |
| "step": 495, | |
| "valid_targets_mean": 15383.8, | |
| "valid_targets_min": 4535 | |
| }, | |
| { | |
| "epoch": 2.118895966029724, | |
| "grad_norm": 0.28587596752541133, | |
| "learning_rate": 3.524512874299912e-05, | |
| "loss": 0.1913, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1112004891037941, | |
| "step": 500, | |
| "valid_targets_mean": 16063.8, | |
| "valid_targets_min": 8790 | |
| }, | |
| { | |
| "epoch": 2.140127388535032, | |
| "grad_norm": 0.23694124431153454, | |
| "learning_rate": 3.5107437381989325e-05, | |
| "loss": 0.1988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06883738934993744, | |
| "step": 505, | |
| "valid_targets_mean": 15695.1, | |
| "valid_targets_min": 5449 | |
| }, | |
| { | |
| "epoch": 2.1613588110403397, | |
| "grad_norm": 0.2572567842840769, | |
| "learning_rate": 3.4968057957034e-05, | |
| "loss": 0.194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09729857742786407, | |
| "step": 510, | |
| "valid_targets_mean": 16853.2, | |
| "valid_targets_min": 3854 | |
| }, | |
| { | |
| "epoch": 2.1825902335456475, | |
| "grad_norm": 0.2897655571941794, | |
| "learning_rate": 3.482700604201086e-05, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10338980704545975, | |
| "step": 515, | |
| "valid_targets_mean": 12990.2, | |
| "valid_targets_min": 6250 | |
| }, | |
| { | |
| "epoch": 2.2038216560509554, | |
| "grad_norm": 0.3345559098061995, | |
| "learning_rate": 3.4684297397677064e-05, | |
| "loss": 0.2124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11578390747308731, | |
| "step": 520, | |
| "valid_targets_mean": 13486.2, | |
| "valid_targets_min": 5214 | |
| }, | |
| { | |
| "epoch": 2.225053078556263, | |
| "grad_norm": 0.2647961046337731, | |
| "learning_rate": 3.453994796990823e-05, | |
| "loss": 0.2005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08278575539588928, | |
| "step": 525, | |
| "valid_targets_mean": 15026.2, | |
| "valid_targets_min": 2535 | |
| }, | |
| { | |
| "epoch": 2.246284501061571, | |
| "grad_norm": 0.29321760721644347, | |
| "learning_rate": 3.439397388791662e-05, | |
| "loss": 0.1842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10423131287097931, | |
| "step": 530, | |
| "valid_targets_mean": 13677.5, | |
| "valid_targets_min": 4331 | |
| }, | |
| { | |
| "epoch": 2.267515923566879, | |
| "grad_norm": 0.33439639128696136, | |
| "learning_rate": 3.424639146244898e-05, | |
| "loss": 0.2108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09143179655075073, | |
| "step": 535, | |
| "valid_targets_mean": 12497.9, | |
| "valid_targets_min": 6576 | |
| }, | |
| { | |
| "epoch": 2.2887473460721868, | |
| "grad_norm": 0.31293816068157115, | |
| "learning_rate": 3.409721718396395e-05, | |
| "loss": 0.2073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12323853373527527, | |
| "step": 540, | |
| "valid_targets_mean": 15538.4, | |
| "valid_targets_min": 9217 | |
| }, | |
| { | |
| "epoch": 2.3099787685774946, | |
| "grad_norm": 0.2932762754898537, | |
| "learning_rate": 3.394646772078951e-05, | |
| "loss": 0.2136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11464841663837433, | |
| "step": 545, | |
| "valid_targets_mean": 13407.5, | |
| "valid_targets_min": 5234 | |
| }, | |
| { | |
| "epoch": 2.3312101910828025, | |
| "grad_norm": 0.31716101339217717, | |
| "learning_rate": 3.379415991726047e-05, | |
| "loss": 0.1953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0933525562286377, | |
| "step": 550, | |
| "valid_targets_mean": 10441.4, | |
| "valid_targets_min": 3289 | |
| }, | |
| { | |
| "epoch": 2.3524416135881103, | |
| "grad_norm": 0.31294543987180895, | |
| "learning_rate": 3.3640310791836375e-05, | |
| "loss": 0.2001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11642187833786011, | |
| "step": 555, | |
| "valid_targets_mean": 15880.0, | |
| "valid_targets_min": 6450 | |
| }, | |
| { | |
| "epoch": 2.373673036093418, | |
| "grad_norm": 0.23138968267395532, | |
| "learning_rate": 3.348493753519987e-05, | |
| "loss": 0.2171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0960564911365509, | |
| "step": 560, | |
| "valid_targets_mean": 18356.4, | |
| "valid_targets_min": 13592 | |
| }, | |
| { | |
| "epoch": 2.394904458598726, | |
| "grad_norm": 0.296455985526213, | |
| "learning_rate": 3.332805750833588e-05, | |
| "loss": 0.1966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1138642281293869, | |
| "step": 565, | |
| "valid_targets_mean": 16774.5, | |
| "valid_targets_min": 9339 | |
| }, | |
| { | |
| "epoch": 2.416135881104034, | |
| "grad_norm": 0.26969809385295945, | |
| "learning_rate": 3.3169688240591735e-05, | |
| "loss": 0.187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08020391315221786, | |
| "step": 570, | |
| "valid_targets_mean": 11902.9, | |
| "valid_targets_min": 6702 | |
| }, | |
| { | |
| "epoch": 2.4373673036093417, | |
| "grad_norm": 0.26145366450884633, | |
| "learning_rate": 3.300984742771849e-05, | |
| "loss": 0.1826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08663637936115265, | |
| "step": 575, | |
| "valid_targets_mean": 15054.6, | |
| "valid_targets_min": 4839 | |
| }, | |
| { | |
| "epoch": 2.4585987261146496, | |
| "grad_norm": 0.32984191621067444, | |
| "learning_rate": 3.284855292989363e-05, | |
| "loss": 0.2016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10979083180427551, | |
| "step": 580, | |
| "valid_targets_mean": 14732.2, | |
| "valid_targets_min": 5139 | |
| }, | |
| { | |
| "epoch": 2.4798301486199574, | |
| "grad_norm": 0.27365490330331865, | |
| "learning_rate": 3.268582276972549e-05, | |
| "loss": 0.1907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13272282481193542, | |
| "step": 585, | |
| "valid_targets_mean": 14038.0, | |
| "valid_targets_min": 7854 | |
| }, | |
| { | |
| "epoch": 2.5010615711252653, | |
| "grad_norm": 0.25796595337009653, | |
| "learning_rate": 3.252167513023934e-05, | |
| "loss": 0.2006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09469525516033173, | |
| "step": 590, | |
| "valid_targets_mean": 17552.2, | |
| "valid_targets_min": 9415 | |
| }, | |
| { | |
| "epoch": 2.522292993630573, | |
| "grad_norm": 0.26440522510501036, | |
| "learning_rate": 3.2356128352845794e-05, | |
| "loss": 0.1982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08657258749008179, | |
| "step": 595, | |
| "valid_targets_mean": 15092.1, | |
| "valid_targets_min": 8187 | |
| }, | |
| { | |
| "epoch": 2.543524416135881, | |
| "grad_norm": 0.31227039684550767, | |
| "learning_rate": 3.218920093529129e-05, | |
| "loss": 0.1869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11756514757871628, | |
| "step": 600, | |
| "valid_targets_mean": 12069.6, | |
| "valid_targets_min": 5064 | |
| }, | |
| { | |
| "epoch": 2.564755838641189, | |
| "grad_norm": 0.27723943310578775, | |
| "learning_rate": 3.202091152959126e-05, | |
| "loss": 0.1757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11652103066444397, | |
| "step": 605, | |
| "valid_targets_mean": 15984.1, | |
| "valid_targets_min": 5816 | |
| }, | |
| { | |
| "epoch": 2.5859872611464967, | |
| "grad_norm": 0.2780638657313319, | |
| "learning_rate": 3.1851278939945974e-05, | |
| "loss": 0.1775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1163756474852562, | |
| "step": 610, | |
| "valid_targets_mean": 15140.8, | |
| "valid_targets_min": 9131 | |
| }, | |
| { | |
| "epoch": 2.6072186836518045, | |
| "grad_norm": 0.32200630212205833, | |
| "learning_rate": 3.1680322120639436e-05, | |
| "loss": 0.2035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12698647379875183, | |
| "step": 615, | |
| "valid_targets_mean": 13908.4, | |
| "valid_targets_min": 3655 | |
| }, | |
| { | |
| "epoch": 2.6284501061571124, | |
| "grad_norm": 0.28026722665310455, | |
| "learning_rate": 3.150806017392145e-05, | |
| "loss": 0.1815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10848917067050934, | |
| "step": 620, | |
| "valid_targets_mean": 15885.1, | |
| "valid_targets_min": 8272 | |
| }, | |
| { | |
| "epoch": 2.6496815286624202, | |
| "grad_norm": 0.2987901602685072, | |
| "learning_rate": 3.1334512347873215e-05, | |
| "loss": 0.1946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1020238921046257, | |
| "step": 625, | |
| "valid_targets_mean": 16725.5, | |
| "valid_targets_min": 6596 | |
| }, | |
| { | |
| "epoch": 2.670912951167728, | |
| "grad_norm": 0.3422773542295122, | |
| "learning_rate": 3.1159698034256595e-05, | |
| "loss": 0.1946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08330472558736801, | |
| "step": 630, | |
| "valid_targets_mean": 12442.0, | |
| "valid_targets_min": 1587 | |
| }, | |
| { | |
| "epoch": 2.692144373673036, | |
| "grad_norm": 0.2815022503459199, | |
| "learning_rate": 3.098363676634732e-05, | |
| "loss": 0.2026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09158733487129211, | |
| "step": 635, | |
| "valid_targets_mean": 14677.6, | |
| "valid_targets_min": 3623 | |
| }, | |
| { | |
| "epoch": 2.713375796178344, | |
| "grad_norm": 0.27699220688660753, | |
| "learning_rate": 3.080634821675239e-05, | |
| "loss": 0.1906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08198846876621246, | |
| "step": 640, | |
| "valid_targets_mean": 14876.6, | |
| "valid_targets_min": 5491 | |
| }, | |
| { | |
| "epoch": 2.7346072186836516, | |
| "grad_norm": 0.27056674048902585, | |
| "learning_rate": 3.0627852195211944e-05, | |
| "loss": 0.1943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09581325948238373, | |
| "step": 645, | |
| "valid_targets_mean": 16048.8, | |
| "valid_targets_min": 7561 | |
| }, | |
| { | |
| "epoch": 2.7558386411889595, | |
| "grad_norm": 0.25624506575317174, | |
| "learning_rate": 3.0448168646385733e-05, | |
| "loss": 0.1871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09879995882511139, | |
| "step": 650, | |
| "valid_targets_mean": 16128.4, | |
| "valid_targets_min": 11230 | |
| }, | |
| { | |
| "epoch": 2.777070063694268, | |
| "grad_norm": 0.25670398472243816, | |
| "learning_rate": 3.0267317647624584e-05, | |
| "loss": 0.2121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0653584823012352, | |
| "step": 655, | |
| "valid_targets_mean": 14782.5, | |
| "valid_targets_min": 3532 | |
| }, | |
| { | |
| "epoch": 2.798301486199575, | |
| "grad_norm": 0.2724083240499696, | |
| "learning_rate": 3.0085319406727003e-05, | |
| "loss": 0.2165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08737077564001083, | |
| "step": 660, | |
| "valid_targets_mean": 16403.5, | |
| "valid_targets_min": 4331 | |
| }, | |
| { | |
| "epoch": 2.8195329087048835, | |
| "grad_norm": 0.2383604800522291, | |
| "learning_rate": 2.9902194259681203e-05, | |
| "loss": 0.1886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07255426049232483, | |
| "step": 665, | |
| "valid_targets_mean": 15846.5, | |
| "valid_targets_min": 7310 | |
| }, | |
| { | |
| "epoch": 2.840764331210191, | |
| "grad_norm": 0.30257307120700694, | |
| "learning_rate": 2.9717962668392837e-05, | |
| "loss": 0.1662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12048687040805817, | |
| "step": 670, | |
| "valid_targets_mean": 13953.0, | |
| "valid_targets_min": 3689 | |
| }, | |
| { | |
| "epoch": 2.861995753715499, | |
| "grad_norm": 0.2522936277398451, | |
| "learning_rate": 2.9532645218398608e-05, | |
| "loss": 0.186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09158721566200256, | |
| "step": 675, | |
| "valid_targets_mean": 14446.5, | |
| "valid_targets_min": 5751 | |
| }, | |
| { | |
| "epoch": 2.8832271762208066, | |
| "grad_norm": 0.2070318168754386, | |
| "learning_rate": 2.9346262616566128e-05, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08559735119342804, | |
| "step": 680, | |
| "valid_targets_mean": 17995.4, | |
| "valid_targets_min": 10423 | |
| }, | |
| { | |
| "epoch": 2.904458598726115, | |
| "grad_norm": 0.24087732064409983, | |
| "learning_rate": 2.9158835688780188e-05, | |
| "loss": 0.1856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08264927566051483, | |
| "step": 685, | |
| "valid_targets_mean": 13348.2, | |
| "valid_targets_min": 6221 | |
| }, | |
| { | |
| "epoch": 2.9256900212314223, | |
| "grad_norm": 0.24300050232911719, | |
| "learning_rate": 2.89703853776157e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08173580467700958, | |
| "step": 690, | |
| "valid_targets_mean": 16869.4, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 2.9469214437367306, | |
| "grad_norm": 0.26724986441117543, | |
| "learning_rate": 2.878093273999765e-05, | |
| "loss": 0.1836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08488611876964569, | |
| "step": 695, | |
| "valid_targets_mean": 15076.5, | |
| "valid_targets_min": 6502 | |
| }, | |
| { | |
| "epoch": 2.968152866242038, | |
| "grad_norm": 0.2455958459356137, | |
| "learning_rate": 2.859049894484828e-05, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08177411556243896, | |
| "step": 700, | |
| "valid_targets_mean": 14060.8, | |
| "valid_targets_min": 3622 | |
| }, | |
| { | |
| "epoch": 2.9893842887473463, | |
| "grad_norm": 0.2715224629184729, | |
| "learning_rate": 2.8399105270721668e-05, | |
| "loss": 0.2006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09079961478710175, | |
| "step": 705, | |
| "valid_targets_mean": 15069.6, | |
| "valid_targets_min": 8482 | |
| }, | |
| { | |
| "epoch": 3.008492569002123, | |
| "grad_norm": 0.23573264403831284, | |
| "learning_rate": 2.8206773103426187e-05, | |
| "loss": 0.168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07453001290559769, | |
| "step": 710, | |
| "valid_targets_mean": 13914.1, | |
| "valid_targets_min": 3623 | |
| }, | |
| { | |
| "epoch": 3.029723991507431, | |
| "grad_norm": 0.28900177745868094, | |
| "learning_rate": 2.8013523933634875e-05, | |
| "loss": 0.2028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07700274884700775, | |
| "step": 715, | |
| "valid_targets_mean": 14241.4, | |
| "valid_targets_min": 9250 | |
| }, | |
| { | |
| "epoch": 3.050955414012739, | |
| "grad_norm": 0.2794038536966578, | |
| "learning_rate": 2.7819379354484124e-05, | |
| "loss": 0.1776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07138693332672119, | |
| "step": 720, | |
| "valid_targets_mean": 14523.6, | |
| "valid_targets_min": 3518 | |
| }, | |
| { | |
| "epoch": 3.0721868365180467, | |
| "grad_norm": 0.3075656321601474, | |
| "learning_rate": 2.762436105916094e-05, | |
| "loss": 0.1852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08331207185983658, | |
| "step": 725, | |
| "valid_targets_mean": 14277.2, | |
| "valid_targets_min": 6321 | |
| }, | |
| { | |
| "epoch": 3.0934182590233545, | |
| "grad_norm": 0.268200382167701, | |
| "learning_rate": 2.742849083847899e-05, | |
| "loss": 0.2002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08485275506973267, | |
| "step": 730, | |
| "valid_targets_mean": 15997.1, | |
| "valid_targets_min": 7462 | |
| }, | |
| { | |
| "epoch": 3.1146496815286624, | |
| "grad_norm": 0.26063481388615084, | |
| "learning_rate": 2.7231790578443785e-05, | |
| "loss": 0.1666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09242895990610123, | |
| "step": 735, | |
| "valid_targets_mean": 16829.9, | |
| "valid_targets_min": 4851 | |
| }, | |
| { | |
| "epoch": 3.1358811040339702, | |
| "grad_norm": 0.33301761675595387, | |
| "learning_rate": 2.7034282257807136e-05, | |
| "loss": 0.1877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12262482196092606, | |
| "step": 740, | |
| "valid_targets_mean": 14170.1, | |
| "valid_targets_min": 7297 | |
| }, | |
| { | |
| "epoch": 3.157112526539278, | |
| "grad_norm": 0.25559284472185234, | |
| "learning_rate": 2.683598794561138e-05, | |
| "loss": 0.1819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.060392506420612335, | |
| "step": 745, | |
| "valid_targets_mean": 13418.9, | |
| "valid_targets_min": 7538 | |
| }, | |
| { | |
| "epoch": 3.178343949044586, | |
| "grad_norm": 0.31989112975280076, | |
| "learning_rate": 2.66369297987234e-05, | |
| "loss": 0.1758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09339696168899536, | |
| "step": 750, | |
| "valid_targets_mean": 14859.2, | |
| "valid_targets_min": 5714 | |
| }, | |
| { | |
| "epoch": 3.199575371549894, | |
| "grad_norm": 0.24047693551940477, | |
| "learning_rate": 2.643713005935888e-05, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06566546857357025, | |
| "step": 755, | |
| "valid_targets_mean": 17566.6, | |
| "valid_targets_min": 12773 | |
| }, | |
| { | |
| "epoch": 3.2208067940552016, | |
| "grad_norm": 0.24423971251668095, | |
| "learning_rate": 2.6236611052597055e-05, | |
| "loss": 0.1732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05638258159160614, | |
| "step": 760, | |
| "valid_targets_mean": 16585.4, | |
| "valid_targets_min": 6836 | |
| }, | |
| { | |
| "epoch": 3.2420382165605095, | |
| "grad_norm": 0.22931906700860674, | |
| "learning_rate": 2.603539518388611e-05, | |
| "loss": 0.1782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.062216877937316895, | |
| "step": 765, | |
| "valid_targets_mean": 15363.1, | |
| "valid_targets_min": 6726 | |
| }, | |
| { | |
| "epoch": 3.2632696390658174, | |
| "grad_norm": 0.29138046222131386, | |
| "learning_rate": 2.5833504936539712e-05, | |
| "loss": 0.1794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12089153379201889, | |
| "step": 770, | |
| "valid_targets_mean": 15873.8, | |
| "valid_targets_min": 7979 | |
| }, | |
| { | |
| "epoch": 3.284501061571125, | |
| "grad_norm": 0.27630841743869844, | |
| "learning_rate": 2.563096286922474e-05, | |
| "loss": 0.1948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0977085679769516, | |
| "step": 775, | |
| "valid_targets_mean": 13591.0, | |
| "valid_targets_min": 3435 | |
| }, | |
| { | |
| "epoch": 3.305732484076433, | |
| "grad_norm": 0.31406681179293716, | |
| "learning_rate": 2.54277916134407e-05, | |
| "loss": 0.1825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06433968245983124, | |
| "step": 780, | |
| "valid_targets_mean": 13319.6, | |
| "valid_targets_min": 2535 | |
| }, | |
| { | |
| "epoch": 3.326963906581741, | |
| "grad_norm": 0.26059306614788147, | |
| "learning_rate": 2.5224013870990868e-05, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08149316906929016, | |
| "step": 785, | |
| "valid_targets_mean": 13039.1, | |
| "valid_targets_min": 6808 | |
| }, | |
| { | |
| "epoch": 3.3481953290870488, | |
| "grad_norm": 0.2351953795497981, | |
| "learning_rate": 2.5019652411445704e-05, | |
| "loss": 0.1929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07320615649223328, | |
| "step": 790, | |
| "valid_targets_mean": 15469.1, | |
| "valid_targets_min": 8955 | |
| }, | |
| { | |
| "epoch": 3.3694267515923566, | |
| "grad_norm": 0.2736683222828364, | |
| "learning_rate": 2.4814730069598624e-05, | |
| "loss": 0.1737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0710974708199501, | |
| "step": 795, | |
| "valid_targets_mean": 11276.5, | |
| "valid_targets_min": 5920 | |
| }, | |
| { | |
| "epoch": 3.3906581740976645, | |
| "grad_norm": 0.27054094450445326, | |
| "learning_rate": 2.460926974291451e-05, | |
| "loss": 0.1916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07035864889621735, | |
| "step": 800, | |
| "valid_targets_mean": 17484.8, | |
| "valid_targets_min": 8262 | |
| }, | |
| { | |
| "epoch": 3.4118895966029723, | |
| "grad_norm": 0.29197318400903655, | |
| "learning_rate": 2.440329438897122e-05, | |
| "loss": 0.1705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09986551851034164, | |
| "step": 805, | |
| "valid_targets_mean": 16090.8, | |
| "valid_targets_min": 7707 | |
| }, | |
| { | |
| "epoch": 3.43312101910828, | |
| "grad_norm": 0.2314470232042232, | |
| "learning_rate": 2.419682702289432e-05, | |
| "loss": 0.1584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05505535006523132, | |
| "step": 810, | |
| "valid_targets_mean": 16037.1, | |
| "valid_targets_min": 9644 | |
| }, | |
| { | |
| "epoch": 3.454352441613588, | |
| "grad_norm": 0.3222253587001989, | |
| "learning_rate": 2.3989890714785505e-05, | |
| "loss": 0.1632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09005002677440643, | |
| "step": 815, | |
| "valid_targets_mean": 16947.9, | |
| "valid_targets_min": 6163 | |
| }, | |
| { | |
| "epoch": 3.475583864118896, | |
| "grad_norm": 0.3294379601590877, | |
| "learning_rate": 2.3782508587144774e-05, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11677606403827667, | |
| "step": 820, | |
| "valid_targets_mean": 13542.0, | |
| "valid_targets_min": 6462 | |
| }, | |
| { | |
| "epoch": 3.4968152866242037, | |
| "grad_norm": 0.29000006498568415, | |
| "learning_rate": 2.3574703812286766e-05, | |
| "loss": 0.1746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07387042045593262, | |
| "step": 825, | |
| "valid_targets_mean": 11624.4, | |
| "valid_targets_min": 4488 | |
| }, | |
| { | |
| "epoch": 3.5180467091295116, | |
| "grad_norm": 0.2401705211916322, | |
| "learning_rate": 2.3366499609751593e-05, | |
| "loss": 0.1736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09939301013946533, | |
| "step": 830, | |
| "valid_targets_mean": 15575.9, | |
| "valid_targets_min": 8394 | |
| }, | |
| { | |
| "epoch": 3.5392781316348194, | |
| "grad_norm": 0.28451927113712266, | |
| "learning_rate": 2.3157919243710318e-05, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09718938171863556, | |
| "step": 835, | |
| "valid_targets_mean": 16160.0, | |
| "valid_targets_min": 7174 | |
| }, | |
| { | |
| "epoch": 3.5605095541401273, | |
| "grad_norm": 0.3211013643205975, | |
| "learning_rate": 2.2948986020365493e-05, | |
| "loss": 0.1955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09303892403841019, | |
| "step": 840, | |
| "valid_targets_mean": 11994.6, | |
| "valid_targets_min": 4268 | |
| }, | |
| { | |
| "epoch": 3.581740976645435, | |
| "grad_norm": 0.2746677955508634, | |
| "learning_rate": 2.273972328534698e-05, | |
| "loss": 0.2052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12424758076667786, | |
| "step": 845, | |
| "valid_targets_mean": 15329.9, | |
| "valid_targets_min": 5514 | |
| }, | |
| { | |
| "epoch": 3.602972399150743, | |
| "grad_norm": 0.2916069982798815, | |
| "learning_rate": 2.2530154421103386e-05, | |
| "loss": 0.1627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0865262895822525, | |
| "step": 850, | |
| "valid_targets_mean": 12826.9, | |
| "valid_targets_min": 1935 | |
| }, | |
| { | |
| "epoch": 3.624203821656051, | |
| "grad_norm": 0.329290804996052, | |
| "learning_rate": 2.2320302844289366e-05, | |
| "loss": 0.2028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1294848918914795, | |
| "step": 855, | |
| "valid_targets_mean": 14766.9, | |
| "valid_targets_min": 4963 | |
| }, | |
| { | |
| "epoch": 3.6454352441613587, | |
| "grad_norm": 0.26352816238784327, | |
| "learning_rate": 2.21101920031491e-05, | |
| "loss": 0.1643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05863261595368385, | |
| "step": 860, | |
| "valid_targets_mean": 14058.9, | |
| "valid_targets_min": 2079 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 0.27075913676748065, | |
| "learning_rate": 2.1899845374896264e-05, | |
| "loss": 0.1724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0771893858909607, | |
| "step": 865, | |
| "valid_targets_mean": 15660.5, | |
| "valid_targets_min": 3825 | |
| }, | |
| { | |
| "epoch": 3.6878980891719744, | |
| "grad_norm": 0.2879633243259211, | |
| "learning_rate": 2.168928646309074e-05, | |
| "loss": 0.1844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0968056321144104, | |
| "step": 870, | |
| "valid_targets_mean": 13577.5, | |
| "valid_targets_min": 1343 | |
| }, | |
| { | |
| "epoch": 3.709129511677282, | |
| "grad_norm": 0.3324325284111313, | |
| "learning_rate": 2.14785387950124e-05, | |
| "loss": 0.2002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10815715789794922, | |
| "step": 875, | |
| "valid_targets_mean": 13251.2, | |
| "valid_targets_min": 5776 | |
| }, | |
| { | |
| "epoch": 3.73036093418259, | |
| "grad_norm": 0.24385270873622308, | |
| "learning_rate": 2.1267625919032233e-05, | |
| "loss": 0.1949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07621297240257263, | |
| "step": 880, | |
| "valid_targets_mean": 14647.6, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 3.7515923566878984, | |
| "grad_norm": 0.28158194480710597, | |
| "learning_rate": 2.10565714019811e-05, | |
| "loss": 0.1882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09309081733226776, | |
| "step": 885, | |
| "valid_targets_mean": 13250.6, | |
| "valid_targets_min": 5586 | |
| }, | |
| { | |
| "epoch": 3.7728237791932058, | |
| "grad_norm": 0.28387353955537614, | |
| "learning_rate": 2.0845398826516457e-05, | |
| "loss": 0.1844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0900927409529686, | |
| "step": 890, | |
| "valid_targets_mean": 15713.0, | |
| "valid_targets_min": 5478 | |
| }, | |
| { | |
| "epoch": 3.794055201698514, | |
| "grad_norm": 0.22676398483352012, | |
| "learning_rate": 2.0634131788487278e-05, | |
| "loss": 0.187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0685080736875534, | |
| "step": 895, | |
| "valid_targets_mean": 17496.4, | |
| "valid_targets_min": 11864 | |
| }, | |
| { | |
| "epoch": 3.8152866242038215, | |
| "grad_norm": 0.2894684737379915, | |
| "learning_rate": 2.0422793894297533e-05, | |
| "loss": 0.1743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09989657998085022, | |
| "step": 900, | |
| "valid_targets_mean": 15670.9, | |
| "valid_targets_min": 5314 | |
| }, | |
| { | |
| "epoch": 3.8365180467091298, | |
| "grad_norm": 0.2529641193716833, | |
| "learning_rate": 2.0211408758268468e-05, | |
| "loss": 0.1832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08049357682466507, | |
| "step": 905, | |
| "valid_targets_mean": 19922.8, | |
| "valid_targets_min": 5124 | |
| }, | |
| { | |
| "epoch": 3.857749469214437, | |
| "grad_norm": 0.3262452116430981, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06966280192136765, | |
| "step": 910, | |
| "valid_targets_mean": 10526.9, | |
| "valid_targets_min": 3364 | |
| }, | |
| { | |
| "epoch": 3.8789808917197455, | |
| "grad_norm": 0.2935358593938512, | |
| "learning_rate": 1.9788591241731535e-05, | |
| "loss": 0.1781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09608276188373566, | |
| "step": 915, | |
| "valid_targets_mean": 13981.8, | |
| "valid_targets_min": 8162 | |
| }, | |
| { | |
| "epoch": 3.900212314225053, | |
| "grad_norm": 0.27813488718681834, | |
| "learning_rate": 1.9577206105702474e-05, | |
| "loss": 0.1811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09818491339683533, | |
| "step": 920, | |
| "valid_targets_mean": 17414.1, | |
| "valid_targets_min": 7717 | |
| }, | |
| { | |
| "epoch": 3.921443736730361, | |
| "grad_norm": 0.3884374840815922, | |
| "learning_rate": 1.9365868211512725e-05, | |
| "loss": 0.1958, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12244383990764618, | |
| "step": 925, | |
| "valid_targets_mean": 13225.5, | |
| "valid_targets_min": 7774 | |
| }, | |
| { | |
| "epoch": 3.9426751592356686, | |
| "grad_norm": 0.25803403519577645, | |
| "learning_rate": 1.915460117348355e-05, | |
| "loss": 0.1675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0736437737941742, | |
| "step": 930, | |
| "valid_targets_mean": 14138.8, | |
| "valid_targets_min": 4599 | |
| }, | |
| { | |
| "epoch": 3.963906581740977, | |
| "grad_norm": 0.2948166835249158, | |
| "learning_rate": 1.8943428598018904e-05, | |
| "loss": 0.1729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10551305115222931, | |
| "step": 935, | |
| "valid_targets_mean": 13149.0, | |
| "valid_targets_min": 5237 | |
| }, | |
| { | |
| "epoch": 3.9851380042462843, | |
| "grad_norm": 0.26157763886832613, | |
| "learning_rate": 1.8732374080967774e-05, | |
| "loss": 0.1664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08099900186061859, | |
| "step": 940, | |
| "valid_targets_mean": 16457.1, | |
| "valid_targets_min": 5918 | |
| }, | |
| { | |
| "epoch": 4.004246284501062, | |
| "grad_norm": 0.22804078903359448, | |
| "learning_rate": 1.8521461204987606e-05, | |
| "loss": 0.1569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.044775694608688354, | |
| "step": 945, | |
| "valid_targets_mean": 16204.6, | |
| "valid_targets_min": 7257 | |
| }, | |
| { | |
| "epoch": 4.025477707006369, | |
| "grad_norm": 0.2836852446635035, | |
| "learning_rate": 1.8310713536909265e-05, | |
| "loss": 0.1668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08778543770313263, | |
| "step": 950, | |
| "valid_targets_mean": 12359.9, | |
| "valid_targets_min": 5076 | |
| }, | |
| { | |
| "epoch": 4.046709129511678, | |
| "grad_norm": 0.2442174895389702, | |
| "learning_rate": 1.810015462510374e-05, | |
| "loss": 0.1697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09868457168340683, | |
| "step": 955, | |
| "valid_targets_mean": 17049.4, | |
| "valid_targets_min": 5491 | |
| }, | |
| { | |
| "epoch": 4.067940552016985, | |
| "grad_norm": 0.3695645197841845, | |
| "learning_rate": 1.7889807996850906e-05, | |
| "loss": 0.2036, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11026948690414429, | |
| "step": 960, | |
| "valid_targets_mean": 9681.6, | |
| "valid_targets_min": 3605 | |
| }, | |
| { | |
| "epoch": 4.089171974522293, | |
| "grad_norm": 0.29979727831410075, | |
| "learning_rate": 1.767969715571064e-05, | |
| "loss": 0.1686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0982229933142662, | |
| "step": 965, | |
| "valid_targets_mean": 11373.5, | |
| "valid_targets_min": 2622 | |
| }, | |
| { | |
| "epoch": 4.110403397027601, | |
| "grad_norm": 0.29739837712764017, | |
| "learning_rate": 1.746984557889662e-05, | |
| "loss": 0.1837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08777828514575958, | |
| "step": 970, | |
| "valid_targets_mean": 14991.1, | |
| "valid_targets_min": 6941 | |
| }, | |
| { | |
| "epoch": 4.131634819532909, | |
| "grad_norm": 0.27118781352159604, | |
| "learning_rate": 1.7260276714653023e-05, | |
| "loss": 0.1704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0875612124800682, | |
| "step": 975, | |
| "valid_targets_mean": 14782.4, | |
| "valid_targets_min": 7258 | |
| }, | |
| { | |
| "epoch": 4.1528662420382165, | |
| "grad_norm": 0.28359027589369706, | |
| "learning_rate": 1.7051013979634514e-05, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10205516219139099, | |
| "step": 980, | |
| "valid_targets_mean": 18528.0, | |
| "valid_targets_min": 12316 | |
| }, | |
| { | |
| "epoch": 4.174097664543525, | |
| "grad_norm": 0.3340589809342197, | |
| "learning_rate": 1.684208075628969e-05, | |
| "loss": 0.1826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09689417481422424, | |
| "step": 985, | |
| "valid_targets_mean": 13361.2, | |
| "valid_targets_min": 5776 | |
| }, | |
| { | |
| "epoch": 4.195329087048832, | |
| "grad_norm": 0.3077583048219397, | |
| "learning_rate": 1.6633500390248414e-05, | |
| "loss": 0.1573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06832315027713776, | |
| "step": 990, | |
| "valid_targets_mean": 12119.1, | |
| "valid_targets_min": 4323 | |
| }, | |
| { | |
| "epoch": 4.2165605095541405, | |
| "grad_norm": 0.2573294853602038, | |
| "learning_rate": 1.642529618771324e-05, | |
| "loss": 0.1518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05930829420685768, | |
| "step": 995, | |
| "valid_targets_mean": 15648.5, | |
| "valid_targets_min": 7985 | |
| }, | |
| { | |
| "epoch": 4.237791932059448, | |
| "grad_norm": 0.2690214106625097, | |
| "learning_rate": 1.6217491412855233e-05, | |
| "loss": 0.1813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07982520759105682, | |
| "step": 1000, | |
| "valid_targets_mean": 14926.2, | |
| "valid_targets_min": 1956 | |
| }, | |
| { | |
| "epoch": 4.259023354564756, | |
| "grad_norm": 0.3360157443708867, | |
| "learning_rate": 1.60101092852145e-05, | |
| "loss": 0.1753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04510154575109482, | |
| "step": 1005, | |
| "valid_targets_mean": 13575.0, | |
| "valid_targets_min": 7250 | |
| }, | |
| { | |
| "epoch": 4.280254777070064, | |
| "grad_norm": 0.3205217949902666, | |
| "learning_rate": 1.5803172977105686e-05, | |
| "loss": 0.1579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10675063729286194, | |
| "step": 1010, | |
| "valid_targets_mean": 15840.2, | |
| "valid_targets_min": 5089 | |
| }, | |
| { | |
| "epoch": 4.301486199575372, | |
| "grad_norm": 0.2583709576870678, | |
| "learning_rate": 1.5596705611028792e-05, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06442227959632874, | |
| "step": 1015, | |
| "valid_targets_mean": 17846.0, | |
| "valid_targets_min": 5469 | |
| }, | |
| { | |
| "epoch": 4.322717622080679, | |
| "grad_norm": 0.26010908830801727, | |
| "learning_rate": 1.5390730257085494e-05, | |
| "loss": 0.162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07889682799577713, | |
| "step": 1020, | |
| "valid_targets_mean": 16397.1, | |
| "valid_targets_min": 12382 | |
| }, | |
| { | |
| "epoch": 4.343949044585988, | |
| "grad_norm": 0.3022177554051947, | |
| "learning_rate": 1.5185269930401381e-05, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0844014585018158, | |
| "step": 1025, | |
| "valid_targets_mean": 11215.9, | |
| "valid_targets_min": 4517 | |
| }, | |
| { | |
| "epoch": 4.365180467091295, | |
| "grad_norm": 0.27137051227613906, | |
| "learning_rate": 1.4980347588554302e-05, | |
| "loss": 0.1632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10084296762943268, | |
| "step": 1030, | |
| "valid_targets_mean": 17699.4, | |
| "valid_targets_min": 5458 | |
| }, | |
| { | |
| "epoch": 4.386411889596603, | |
| "grad_norm": 0.28712761699778766, | |
| "learning_rate": 1.4775986129009137e-05, | |
| "loss": 0.1897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08913597464561462, | |
| "step": 1035, | |
| "valid_targets_mean": 15072.6, | |
| "valid_targets_min": 2535 | |
| }, | |
| { | |
| "epoch": 4.407643312101911, | |
| "grad_norm": 0.28374049176794086, | |
| "learning_rate": 1.4572208386559304e-05, | |
| "loss": 0.1672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08083443343639374, | |
| "step": 1040, | |
| "valid_targets_mean": 15562.0, | |
| "valid_targets_min": 6274 | |
| }, | |
| { | |
| "epoch": 4.428874734607219, | |
| "grad_norm": 0.289299652196656, | |
| "learning_rate": 1.436903713077526e-05, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08103075623512268, | |
| "step": 1045, | |
| "valid_targets_mean": 13985.0, | |
| "valid_targets_min": 7381 | |
| }, | |
| { | |
| "epoch": 4.450106157112526, | |
| "grad_norm": 0.29939314691306246, | |
| "learning_rate": 1.4166495063460295e-05, | |
| "loss": 0.1619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07846647500991821, | |
| "step": 1050, | |
| "valid_targets_mean": 15401.0, | |
| "valid_targets_min": 5135 | |
| }, | |
| { | |
| "epoch": 4.471337579617835, | |
| "grad_norm": 0.322213805306961, | |
| "learning_rate": 1.3964604816113896e-05, | |
| "loss": 0.1806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10395080596208572, | |
| "step": 1055, | |
| "valid_targets_mean": 14610.8, | |
| "valid_targets_min": 7159 | |
| }, | |
| { | |
| "epoch": 4.492569002123142, | |
| "grad_norm": 0.2866514130234711, | |
| "learning_rate": 1.3763388947402953e-05, | |
| "loss": 0.1715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07112490385770798, | |
| "step": 1060, | |
| "valid_targets_mean": 15032.0, | |
| "valid_targets_min": 8317 | |
| }, | |
| { | |
| "epoch": 4.51380042462845, | |
| "grad_norm": 0.314478967030206, | |
| "learning_rate": 1.3562869940641123e-05, | |
| "loss": 0.1691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10800331830978394, | |
| "step": 1065, | |
| "valid_targets_mean": 15537.0, | |
| "valid_targets_min": 5167 | |
| }, | |
| { | |
| "epoch": 4.535031847133758, | |
| "grad_norm": 0.28204747261924135, | |
| "learning_rate": 1.3363070201276606e-05, | |
| "loss": 0.1601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07255604863166809, | |
| "step": 1070, | |
| "valid_targets_mean": 14644.9, | |
| "valid_targets_min": 4870 | |
| }, | |
| { | |
| "epoch": 4.556263269639066, | |
| "grad_norm": 0.27569273817889167, | |
| "learning_rate": 1.316401205438862e-05, | |
| "loss": 0.1715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06936567276716232, | |
| "step": 1075, | |
| "valid_targets_mean": 14241.4, | |
| "valid_targets_min": 4601 | |
| }, | |
| { | |
| "epoch": 4.5774946921443735, | |
| "grad_norm": 0.26253469480509967, | |
| "learning_rate": 1.2965717742192866e-05, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08657175302505493, | |
| "step": 1080, | |
| "valid_targets_mean": 15617.2, | |
| "valid_targets_min": 5992 | |
| }, | |
| { | |
| "epoch": 4.598726114649682, | |
| "grad_norm": 0.28581659144121935, | |
| "learning_rate": 1.276820942155622e-05, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08918562531471252, | |
| "step": 1085, | |
| "valid_targets_mean": 13600.9, | |
| "valid_targets_min": 4402 | |
| }, | |
| { | |
| "epoch": 4.619957537154989, | |
| "grad_norm": 0.3073024926683609, | |
| "learning_rate": 1.2571509161521007e-05, | |
| "loss": 0.1714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07768109440803528, | |
| "step": 1090, | |
| "valid_targets_mean": 12548.6, | |
| "valid_targets_min": 4815 | |
| }, | |
| { | |
| "epoch": 4.6411889596602975, | |
| "grad_norm": 0.3269547869469645, | |
| "learning_rate": 1.2375638940839062e-05, | |
| "loss": 0.1954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11400610208511353, | |
| "step": 1095, | |
| "valid_targets_mean": 12697.8, | |
| "valid_targets_min": 5610 | |
| }, | |
| { | |
| "epoch": 4.662420382165605, | |
| "grad_norm": 0.27326799041082284, | |
| "learning_rate": 1.2180620645515875e-05, | |
| "loss": 0.1619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06606325507164001, | |
| "step": 1100, | |
| "valid_targets_mean": 10452.8, | |
| "valid_targets_min": 5105 | |
| }, | |
| { | |
| "epoch": 4.683651804670913, | |
| "grad_norm": 0.31708108689013287, | |
| "learning_rate": 1.1986476066365125e-05, | |
| "loss": 0.1794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07225104421377182, | |
| "step": 1105, | |
| "valid_targets_mean": 10873.4, | |
| "valid_targets_min": 5961 | |
| }, | |
| { | |
| "epoch": 4.704883227176221, | |
| "grad_norm": 0.34556552719228606, | |
| "learning_rate": 1.179322689657381e-05, | |
| "loss": 0.1964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13157233595848083, | |
| "step": 1110, | |
| "valid_targets_mean": 16111.0, | |
| "valid_targets_min": 5477 | |
| }, | |
| { | |
| "epoch": 4.726114649681529, | |
| "grad_norm": 0.29047699922219883, | |
| "learning_rate": 1.1600894729278333e-05, | |
| "loss": 0.1633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07969208061695099, | |
| "step": 1115, | |
| "valid_targets_mean": 12312.8, | |
| "valid_targets_min": 7979 | |
| }, | |
| { | |
| "epoch": 4.747346072186836, | |
| "grad_norm": 0.31878635054753074, | |
| "learning_rate": 1.1409501055151726e-05, | |
| "loss": 0.1663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11133340746164322, | |
| "step": 1120, | |
| "valid_targets_mean": 16930.0, | |
| "valid_targets_min": 4513 | |
| }, | |
| { | |
| "epoch": 4.768577494692145, | |
| "grad_norm": 0.2894979007219718, | |
| "learning_rate": 1.1219067260002352e-05, | |
| "loss": 0.1481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08189079165458679, | |
| "step": 1125, | |
| "valid_targets_mean": 13020.8, | |
| "valid_targets_min": 5919 | |
| }, | |
| { | |
| "epoch": 4.789808917197452, | |
| "grad_norm": 0.2974224757724096, | |
| "learning_rate": 1.1029614622384307e-05, | |
| "loss": 0.1763, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0739152580499649, | |
| "step": 1130, | |
| "valid_targets_mean": 15006.2, | |
| "valid_targets_min": 6402 | |
| }, | |
| { | |
| "epoch": 4.81104033970276, | |
| "grad_norm": 0.2740806790545324, | |
| "learning_rate": 1.0841164311219812e-05, | |
| "loss": 0.1665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09299416840076447, | |
| "step": 1135, | |
| "valid_targets_mean": 18061.0, | |
| "valid_targets_min": 10000 | |
| }, | |
| { | |
| "epoch": 4.832271762208068, | |
| "grad_norm": 0.23454734933723237, | |
| "learning_rate": 1.0653737383433869e-05, | |
| "loss": 0.1727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08343719691038132, | |
| "step": 1140, | |
| "valid_targets_mean": 17752.4, | |
| "valid_targets_min": 7344 | |
| }, | |
| { | |
| "epoch": 4.853503184713376, | |
| "grad_norm": 0.24458323267656765, | |
| "learning_rate": 1.0467354781601395e-05, | |
| "loss": 0.1664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06879070401191711, | |
| "step": 1145, | |
| "valid_targets_mean": 18738.1, | |
| "valid_targets_min": 11083 | |
| }, | |
| { | |
| "epoch": 4.8747346072186835, | |
| "grad_norm": 0.26926970833953917, | |
| "learning_rate": 1.0282037331607167e-05, | |
| "loss": 0.165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09153994917869568, | |
| "step": 1150, | |
| "valid_targets_mean": 12938.4, | |
| "valid_targets_min": 6938 | |
| }, | |
| { | |
| "epoch": 4.895966029723992, | |
| "grad_norm": 0.2475631115627024, | |
| "learning_rate": 1.0097805740318797e-05, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06908365339040756, | |
| "step": 1155, | |
| "valid_targets_mean": 14550.2, | |
| "valid_targets_min": 2924 | |
| }, | |
| { | |
| "epoch": 4.917197452229299, | |
| "grad_norm": 0.26381821539779826, | |
| "learning_rate": 9.914680593273e-06, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0847349539399147, | |
| "step": 1160, | |
| "valid_targets_mean": 16227.0, | |
| "valid_targets_min": 2970 | |
| }, | |
| { | |
| "epoch": 4.9384288747346075, | |
| "grad_norm": 0.2536112248092482, | |
| "learning_rate": 9.732682352375418e-06, | |
| "loss": 0.1692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07469794154167175, | |
| "step": 1165, | |
| "valid_targets_mean": 16989.6, | |
| "valid_targets_min": 1639 | |
| }, | |
| { | |
| "epoch": 4.959660297239915, | |
| "grad_norm": 0.26178169003572327, | |
| "learning_rate": 9.551831353614272e-06, | |
| "loss": 0.1666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0636444240808487, | |
| "step": 1170, | |
| "valid_targets_mean": 13780.4, | |
| "valid_targets_min": 5417 | |
| }, | |
| { | |
| "epoch": 4.980891719745223, | |
| "grad_norm": 0.292574119747484, | |
| "learning_rate": 9.372147804788063e-06, | |
| "loss": 0.1838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10308250784873962, | |
| "step": 1175, | |
| "valid_targets_mean": 15759.5, | |
| "valid_targets_min": 6097 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.3633457948840684, | |
| "learning_rate": 9.193651783247616e-06, | |
| "loss": 0.1652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1370236575603485, | |
| "step": 1180, | |
| "valid_targets_mean": 13572.6, | |
| "valid_targets_min": 8150 | |
| }, | |
| { | |
| "epoch": 5.021231422505308, | |
| "grad_norm": 0.32191020526943465, | |
| "learning_rate": 9.016363233652686e-06, | |
| "loss": 0.1657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10346332937479019, | |
| "step": 1185, | |
| "valid_targets_mean": 14068.4, | |
| "valid_targets_min": 4870 | |
| }, | |
| { | |
| "epoch": 5.042462845010616, | |
| "grad_norm": 0.32642486899462897, | |
| "learning_rate": 8.840301965743405e-06, | |
| "loss": 0.1813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09915921092033386, | |
| "step": 1190, | |
| "valid_targets_mean": 11485.2, | |
| "valid_targets_min": 5336 | |
| }, | |
| { | |
| "epoch": 5.063694267515924, | |
| "grad_norm": 0.28541664235799485, | |
| "learning_rate": 8.665487652126785e-06, | |
| "loss": 0.1678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05272082984447479, | |
| "step": 1195, | |
| "valid_targets_mean": 14355.5, | |
| "valid_targets_min": 6036 | |
| }, | |
| { | |
| "epoch": 5.084925690021231, | |
| "grad_norm": 0.290919100504366, | |
| "learning_rate": 8.491939826078552e-06, | |
| "loss": 0.1691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08834843337535858, | |
| "step": 1200, | |
| "valid_targets_mean": 12843.2, | |
| "valid_targets_min": 6392 | |
| }, | |
| { | |
| "epoch": 5.10615711252654, | |
| "grad_norm": 0.24588022878035837, | |
| "learning_rate": 8.319677879360566e-06, | |
| "loss": 0.1662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06265243887901306, | |
| "step": 1205, | |
| "valid_targets_mean": 14097.9, | |
| "valid_targets_min": 1479 | |
| }, | |
| { | |
| "epoch": 5.127388535031847, | |
| "grad_norm": 0.26986615918502915, | |
| "learning_rate": 8.148721060054026e-06, | |
| "loss": 0.1576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09268582612276077, | |
| "step": 1210, | |
| "valid_targets_mean": 18160.4, | |
| "valid_targets_min": 10243 | |
| }, | |
| { | |
| "epoch": 5.148619957537155, | |
| "grad_norm": 0.2900102170992239, | |
| "learning_rate": 7.979088470408743e-06, | |
| "loss": 0.1633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0702872946858406, | |
| "step": 1215, | |
| "valid_targets_mean": 13231.0, | |
| "valid_targets_min": 6621 | |
| }, | |
| { | |
| "epoch": 5.169851380042463, | |
| "grad_norm": 0.2778789233044835, | |
| "learning_rate": 7.81079906470872e-06, | |
| "loss": 0.1707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08230331540107727, | |
| "step": 1220, | |
| "valid_targets_mean": 11612.5, | |
| "valid_targets_min": 4938 | |
| }, | |
| { | |
| "epoch": 5.191082802547771, | |
| "grad_norm": 0.2746339546722902, | |
| "learning_rate": 7.643871647154212e-06, | |
| "loss": 0.1675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07877236604690552, | |
| "step": 1225, | |
| "valid_targets_mean": 13966.2, | |
| "valid_targets_min": 5918 | |
| }, | |
| { | |
| "epoch": 5.2123142250530785, | |
| "grad_norm": 0.29827654767268313, | |
| "learning_rate": 7.478324869760665e-06, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08680558204650879, | |
| "step": 1230, | |
| "valid_targets_mean": 14225.1, | |
| "valid_targets_min": 6634 | |
| }, | |
| { | |
| "epoch": 5.233545647558387, | |
| "grad_norm": 0.3635364050170567, | |
| "learning_rate": 7.314177230274522e-06, | |
| "loss": 0.1498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07186665385961533, | |
| "step": 1235, | |
| "valid_targets_mean": 14451.9, | |
| "valid_targets_min": 7291 | |
| }, | |
| { | |
| "epoch": 5.254777070063694, | |
| "grad_norm": 0.26514167860622967, | |
| "learning_rate": 7.151447070106372e-06, | |
| "loss": 0.1557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0701196938753128, | |
| "step": 1240, | |
| "valid_targets_mean": 13301.2, | |
| "valid_targets_min": 3783 | |
| }, | |
| { | |
| "epoch": 5.2760084925690025, | |
| "grad_norm": 0.26324169878744724, | |
| "learning_rate": 6.990152572281523e-06, | |
| "loss": 0.1682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0794605165719986, | |
| "step": 1245, | |
| "valid_targets_mean": 16191.2, | |
| "valid_targets_min": 8450 | |
| }, | |
| { | |
| "epoch": 5.29723991507431, | |
| "grad_norm": 0.2556220773865451, | |
| "learning_rate": 6.830311759408275e-06, | |
| "loss": 0.1478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0679427832365036, | |
| "step": 1250, | |
| "valid_targets_mean": 19563.9, | |
| "valid_targets_min": 12610 | |
| }, | |
| { | |
| "epoch": 5.318471337579618, | |
| "grad_norm": 0.2512037430973632, | |
| "learning_rate": 6.671942491664128e-06, | |
| "loss": 0.1583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08314774930477142, | |
| "step": 1255, | |
| "valid_targets_mean": 17775.9, | |
| "valid_targets_min": 6844 | |
| }, | |
| { | |
| "epoch": 5.339702760084926, | |
| "grad_norm": 0.301451978575514, | |
| "learning_rate": 6.515062464800139e-06, | |
| "loss": 0.1617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09716193377971649, | |
| "step": 1260, | |
| "valid_targets_mean": 13520.8, | |
| "valid_targets_min": 6446 | |
| }, | |
| { | |
| "epoch": 5.360934182590234, | |
| "grad_norm": 0.3033561761745026, | |
| "learning_rate": 6.359689208163635e-06, | |
| "loss": 0.1786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09750799834728241, | |
| "step": 1265, | |
| "valid_targets_mean": 13563.6, | |
| "valid_targets_min": 6054 | |
| }, | |
| { | |
| "epoch": 5.382165605095541, | |
| "grad_norm": 0.2867076697834142, | |
| "learning_rate": 6.205840082739538e-06, | |
| "loss": 0.1704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08606909960508347, | |
| "step": 1270, | |
| "valid_targets_mean": 15594.4, | |
| "valid_targets_min": 7437 | |
| }, | |
| { | |
| "epoch": 5.40339702760085, | |
| "grad_norm": 0.2902611080814107, | |
| "learning_rate": 6.053532279210494e-06, | |
| "loss": 0.1819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09306415915489197, | |
| "step": 1275, | |
| "valid_targets_mean": 13659.0, | |
| "valid_targets_min": 6067 | |
| }, | |
| { | |
| "epoch": 5.424628450106157, | |
| "grad_norm": 0.3138642504116254, | |
| "learning_rate": 5.90278281603605e-06, | |
| "loss": 0.1516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06723940372467041, | |
| "step": 1280, | |
| "valid_targets_mean": 12938.2, | |
| "valid_targets_min": 7569 | |
| }, | |
| { | |
| "epoch": 5.445859872611465, | |
| "grad_norm": 0.37944831844559856, | |
| "learning_rate": 5.753608537551023e-06, | |
| "loss": 0.1751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08615720272064209, | |
| "step": 1285, | |
| "valid_targets_mean": 20076.0, | |
| "valid_targets_min": 8700 | |
| }, | |
| { | |
| "epoch": 5.467091295116773, | |
| "grad_norm": 0.30159242882323506, | |
| "learning_rate": 5.606026112083383e-06, | |
| "loss": 0.172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09909145534038544, | |
| "step": 1290, | |
| "valid_targets_mean": 16584.8, | |
| "valid_targets_min": 7272 | |
| }, | |
| { | |
| "epoch": 5.488322717622081, | |
| "grad_norm": 0.29081583680889445, | |
| "learning_rate": 5.460052030091782e-06, | |
| "loss": 0.1669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0889005959033966, | |
| "step": 1295, | |
| "valid_targets_mean": 17472.0, | |
| "valid_targets_min": 8288 | |
| }, | |
| { | |
| "epoch": 5.509554140127388, | |
| "grad_norm": 0.2607145273144808, | |
| "learning_rate": 5.315702602322943e-06, | |
| "loss": 0.159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0712738037109375, | |
| "step": 1300, | |
| "valid_targets_mean": 14735.1, | |
| "valid_targets_min": 7641 | |
| }, | |
| { | |
| "epoch": 5.530785562632697, | |
| "grad_norm": 0.29501459858586015, | |
| "learning_rate": 5.1729939579891476e-06, | |
| "loss": 0.162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07912551611661911, | |
| "step": 1305, | |
| "valid_targets_mean": 13685.9, | |
| "valid_targets_min": 7765 | |
| }, | |
| { | |
| "epoch": 5.552016985138004, | |
| "grad_norm": 0.27509948934796713, | |
| "learning_rate": 5.031942042966e-06, | |
| "loss": 0.1647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08225835859775543, | |
| "step": 1310, | |
| "valid_targets_mean": 16989.0, | |
| "valid_targets_min": 8083 | |
| }, | |
| { | |
| "epoch": 5.573248407643312, | |
| "grad_norm": 0.22169708987849615, | |
| "learning_rate": 4.892562618010684e-06, | |
| "loss": 0.1361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04339899122714996, | |
| "step": 1315, | |
| "valid_targets_mean": 17842.2, | |
| "valid_targets_min": 8176 | |
| }, | |
| { | |
| "epoch": 5.59447983014862, | |
| "grad_norm": 0.3559664931393158, | |
| "learning_rate": 4.754871257000888e-06, | |
| "loss": 0.1583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11029928922653198, | |
| "step": 1320, | |
| "valid_targets_mean": 14398.4, | |
| "valid_targets_min": 8162 | |
| }, | |
| { | |
| "epoch": 5.615711252653928, | |
| "grad_norm": 0.2679685791920965, | |
| "learning_rate": 4.618883345194627e-06, | |
| "loss": 0.1579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09200026094913483, | |
| "step": 1325, | |
| "valid_targets_mean": 17098.9, | |
| "valid_targets_min": 7659 | |
| }, | |
| { | |
| "epoch": 5.6369426751592355, | |
| "grad_norm": 0.3057411211787925, | |
| "learning_rate": 4.484614077511153e-06, | |
| "loss": 0.1633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08388785272836685, | |
| "step": 1330, | |
| "valid_targets_mean": 13349.4, | |
| "valid_targets_min": 7026 | |
| }, | |
| { | |
| "epoch": 5.658174097664544, | |
| "grad_norm": 0.2888169299715042, | |
| "learning_rate": 4.352078456833082e-06, | |
| "loss": 0.1719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10811308026313782, | |
| "step": 1335, | |
| "valid_targets_mean": 15849.9, | |
| "valid_targets_min": 5320 | |
| }, | |
| { | |
| "epoch": 5.679405520169851, | |
| "grad_norm": 0.28951569017216344, | |
| "learning_rate": 4.221291292330036e-06, | |
| "loss": 0.1775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1030922383069992, | |
| "step": 1340, | |
| "valid_targets_mean": 15074.6, | |
| "valid_targets_min": 9428 | |
| }, | |
| { | |
| "epoch": 5.7006369426751595, | |
| "grad_norm": 0.27915439610100806, | |
| "learning_rate": 4.0922671978039055e-06, | |
| "loss": 0.1676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06682918220758438, | |
| "step": 1345, | |
| "valid_targets_mean": 13675.4, | |
| "valid_targets_min": 5223 | |
| }, | |
| { | |
| "epoch": 5.721868365180467, | |
| "grad_norm": 0.2747466866708511, | |
| "learning_rate": 3.965020590055934e-06, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10387305170297623, | |
| "step": 1350, | |
| "valid_targets_mean": 16905.1, | |
| "valid_targets_min": 10448 | |
| }, | |
| { | |
| "epoch": 5.743099787685775, | |
| "grad_norm": 0.39319047182394823, | |
| "learning_rate": 3.839565687275835e-06, | |
| "loss": 0.1555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10636863112449646, | |
| "step": 1355, | |
| "valid_targets_mean": 10298.1, | |
| "valid_targets_min": 5818 | |
| }, | |
| { | |
| "epoch": 5.764331210191083, | |
| "grad_norm": 0.2508928317334097, | |
| "learning_rate": 3.715916507453079e-06, | |
| "loss": 0.1423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06272110342979431, | |
| "step": 1360, | |
| "valid_targets_mean": 14815.4, | |
| "valid_targets_min": 3317 | |
| }, | |
| { | |
| "epoch": 5.785562632696391, | |
| "grad_norm": 0.2958681304685539, | |
| "learning_rate": 3.5940868668105644e-06, | |
| "loss": 0.1408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07483332604169846, | |
| "step": 1365, | |
| "valid_targets_mean": 11648.4, | |
| "valid_targets_min": 5762 | |
| }, | |
| { | |
| "epoch": 5.806794055201698, | |
| "grad_norm": 0.30333009062567456, | |
| "learning_rate": 3.4740903782608416e-06, | |
| "loss": 0.1697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08001961559057236, | |
| "step": 1370, | |
| "valid_targets_mean": 15420.4, | |
| "valid_targets_min": 5495 | |
| }, | |
| { | |
| "epoch": 5.828025477707007, | |
| "grad_norm": 0.27437345080245856, | |
| "learning_rate": 3.3559404498850245e-06, | |
| "loss": 0.1836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08367906510829926, | |
| "step": 1375, | |
| "valid_targets_mean": 17487.1, | |
| "valid_targets_min": 10947 | |
| }, | |
| { | |
| "epoch": 5.849256900212314, | |
| "grad_norm": 0.33157275873211317, | |
| "learning_rate": 3.2396502834346277e-06, | |
| "loss": 0.158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10356227308511734, | |
| "step": 1380, | |
| "valid_targets_mean": 13732.8, | |
| "valid_targets_min": 5999 | |
| }, | |
| { | |
| "epoch": 5.870488322717622, | |
| "grad_norm": 0.25499987617159037, | |
| "learning_rate": 3.1252328728564206e-06, | |
| "loss": 0.1609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06108412891626358, | |
| "step": 1385, | |
| "valid_targets_mean": 13659.1, | |
| "valid_targets_min": 6581 | |
| }, | |
| { | |
| "epoch": 5.89171974522293, | |
| "grad_norm": 0.2754456032364648, | |
| "learning_rate": 3.0127010028405303e-06, | |
| "loss": 0.1604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07066244632005692, | |
| "step": 1390, | |
| "valid_targets_mean": 16501.9, | |
| "valid_targets_min": 7600 | |
| }, | |
| { | |
| "epoch": 5.912951167728238, | |
| "grad_norm": 0.3154111494075387, | |
| "learning_rate": 2.9020672473919107e-06, | |
| "loss": 0.1683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11153072118759155, | |
| "step": 1395, | |
| "valid_targets_mean": 12126.4, | |
| "valid_targets_min": 1587 | |
| }, | |
| { | |
| "epoch": 5.934182590233545, | |
| "grad_norm": 0.28035453553485096, | |
| "learning_rate": 2.7933439684253616e-06, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06870816648006439, | |
| "step": 1400, | |
| "valid_targets_mean": 13538.0, | |
| "valid_targets_min": 6607 | |
| }, | |
| { | |
| "epoch": 5.955414012738854, | |
| "grad_norm": 0.3259723507914851, | |
| "learning_rate": 2.6865433143842356e-06, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10865054279565811, | |
| "step": 1405, | |
| "valid_targets_mean": 12518.4, | |
| "valid_targets_min": 4916 | |
| }, | |
| { | |
| "epoch": 5.976645435244161, | |
| "grad_norm": 0.334412099283546, | |
| "learning_rate": 2.5816772188830098e-06, | |
| "loss": 0.165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08065322041511536, | |
| "step": 1410, | |
| "valid_targets_mean": 12634.0, | |
| "valid_targets_min": 6610 | |
| }, | |
| { | |
| "epoch": 5.997876857749469, | |
| "grad_norm": 0.2640023033609606, | |
| "learning_rate": 2.4787573993738524e-06, | |
| "loss": 0.1631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.048118408769369125, | |
| "step": 1415, | |
| "valid_targets_mean": 10203.4, | |
| "valid_targets_min": 1607 | |
| }, | |
| { | |
| "epoch": 6.016985138004246, | |
| "grad_norm": 0.2714448832343656, | |
| "learning_rate": 2.377795355837349e-06, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07569775730371475, | |
| "step": 1420, | |
| "valid_targets_mean": 13937.0, | |
| "valid_targets_min": 4540 | |
| }, | |
| { | |
| "epoch": 6.038216560509555, | |
| "grad_norm": 0.3472240836287177, | |
| "learning_rate": 2.2788023694975236e-06, | |
| "loss": 0.1663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09323162585496902, | |
| "step": 1425, | |
| "valid_targets_mean": 15866.6, | |
| "valid_targets_min": 3141 | |
| }, | |
| { | |
| "epoch": 6.059447983014862, | |
| "grad_norm": 0.2350240326376912, | |
| "learning_rate": 2.1817895015613134e-06, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07922834157943726, | |
| "step": 1430, | |
| "valid_targets_mean": 18886.6, | |
| "valid_targets_min": 9895 | |
| }, | |
| { | |
| "epoch": 6.08067940552017, | |
| "grad_norm": 0.36818142115866936, | |
| "learning_rate": 2.086767591982608e-06, | |
| "loss": 0.1529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07837212830781937, | |
| "step": 1435, | |
| "valid_targets_mean": 12160.6, | |
| "valid_targets_min": 3368 | |
| }, | |
| { | |
| "epoch": 6.101910828025478, | |
| "grad_norm": 0.2856655219402555, | |
| "learning_rate": 1.9937472582510243e-06, | |
| "loss": 0.1684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09155093133449554, | |
| "step": 1440, | |
| "valid_targets_mean": 15443.0, | |
| "valid_targets_min": 5076 | |
| }, | |
| { | |
| "epoch": 6.123142250530786, | |
| "grad_norm": 0.2547340160025767, | |
| "learning_rate": 1.902738894205547e-06, | |
| "loss": 0.1554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.059237219393253326, | |
| "step": 1445, | |
| "valid_targets_mean": 14534.0, | |
| "valid_targets_min": 5035 | |
| }, | |
| { | |
| "epoch": 6.144373673036093, | |
| "grad_norm": 0.3002693947204139, | |
| "learning_rate": 1.8137526688731365e-06, | |
| "loss": 0.1596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0889972448348999, | |
| "step": 1450, | |
| "valid_targets_mean": 16488.1, | |
| "valid_targets_min": 8118 | |
| }, | |
| { | |
| "epoch": 6.165605095541402, | |
| "grad_norm": 0.298982073220577, | |
| "learning_rate": 1.7267985253324803e-06, | |
| "loss": 0.1534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07069272547960281, | |
| "step": 1455, | |
| "valid_targets_mean": 15787.2, | |
| "valid_targets_min": 6470 | |
| }, | |
| { | |
| "epoch": 6.186836518046709, | |
| "grad_norm": 0.32579161912478005, | |
| "learning_rate": 1.641886179602974e-06, | |
| "loss": 0.1738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07076440751552582, | |
| "step": 1460, | |
| "valid_targets_mean": 14006.6, | |
| "valid_targets_min": 1307 | |
| }, | |
| { | |
| "epoch": 6.208067940552017, | |
| "grad_norm": 0.3669756453303981, | |
| "learning_rate": 1.5590251195590811e-06, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06793823093175888, | |
| "step": 1465, | |
| "valid_targets_mean": 15177.5, | |
| "valid_targets_min": 2535 | |
| }, | |
| { | |
| "epoch": 6.229299363057325, | |
| "grad_norm": 0.2450071301139486, | |
| "learning_rate": 1.4782246038701865e-06, | |
| "loss": 0.1708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06423592567443848, | |
| "step": 1470, | |
| "valid_targets_mean": 17624.6, | |
| "valid_targets_min": 9107 | |
| }, | |
| { | |
| "epoch": 6.250530785562633, | |
| "grad_norm": 0.29353402854137134, | |
| "learning_rate": 1.3994936609660493e-06, | |
| "loss": 0.1735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06590164452791214, | |
| "step": 1475, | |
| "valid_targets_mean": 11156.4, | |
| "valid_targets_min": 5149 | |
| }, | |
| { | |
| "epoch": 6.2717622080679405, | |
| "grad_norm": 0.3270038830460443, | |
| "learning_rate": 1.3228410880280084e-06, | |
| "loss": 0.1719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09932979941368103, | |
| "step": 1480, | |
| "valid_targets_mean": 13644.0, | |
| "valid_targets_min": 6297 | |
| }, | |
| { | |
| "epoch": 6.292993630573249, | |
| "grad_norm": 0.2789895464186324, | |
| "learning_rate": 1.248275450005987e-06, | |
| "loss": 0.158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08671200275421143, | |
| "step": 1485, | |
| "valid_targets_mean": 17008.0, | |
| "valid_targets_min": 6585 | |
| }, | |
| { | |
| "epoch": 6.314225053078556, | |
| "grad_norm": 0.2922442549795321, | |
| "learning_rate": 1.1758050786614872e-06, | |
| "loss": 0.1674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08488278090953827, | |
| "step": 1490, | |
| "valid_targets_mean": 15370.1, | |
| "valid_targets_min": 8217 | |
| }, | |
| { | |
| "epoch": 6.3354564755838645, | |
| "grad_norm": 0.2582229867861875, | |
| "learning_rate": 1.1054380716366064e-06, | |
| "loss": 0.1698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06779177486896515, | |
| "step": 1495, | |
| "valid_targets_mean": 14717.9, | |
| "valid_targets_min": 5041 | |
| }, | |
| { | |
| "epoch": 6.356687898089172, | |
| "grad_norm": 0.2331255850760353, | |
| "learning_rate": 1.0371822915492414e-06, | |
| "loss": 0.1568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07795362919569016, | |
| "step": 1500, | |
| "valid_targets_mean": 16383.0, | |
| "valid_targets_min": 8819 | |
| }, | |
| { | |
| "epoch": 6.37791932059448, | |
| "grad_norm": 0.2892889195801475, | |
| "learning_rate": 9.710453651145335e-07, | |
| "loss": 0.1634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07211548089981079, | |
| "step": 1505, | |
| "valid_targets_mean": 13909.4, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 6.399150743099788, | |
| "grad_norm": 0.26060830596236895, | |
| "learning_rate": 9.070346822926846e-07, | |
| "loss": 0.1658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09147711843252182, | |
| "step": 1510, | |
| "valid_targets_mean": 16547.1, | |
| "valid_targets_min": 8024 | |
| }, | |
| { | |
| "epoch": 6.420382165605096, | |
| "grad_norm": 0.26764389867523897, | |
| "learning_rate": 8.451573954632186e-07, | |
| "loss": 0.1619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08518597483634949, | |
| "step": 1515, | |
| "valid_targets_mean": 16291.4, | |
| "valid_targets_min": 3623 | |
| }, | |
| { | |
| "epoch": 6.441613588110403, | |
| "grad_norm": 0.30728258811352377, | |
| "learning_rate": 7.854204186257952e-07, | |
| "loss": 0.1543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1069483608007431, | |
| "step": 1520, | |
| "valid_targets_mean": 14551.8, | |
| "valid_targets_min": 7637 | |
| }, | |
| { | |
| "epoch": 6.462845010615712, | |
| "grad_norm": 0.27808926869463985, | |
| "learning_rate": 7.278304266276625e-07, | |
| "loss": 0.1555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06044379994273186, | |
| "step": 1525, | |
| "valid_targets_mean": 13939.4, | |
| "valid_targets_min": 5659 | |
| }, | |
| { | |
| "epoch": 6.484076433121019, | |
| "grad_norm": 0.23829230423437128, | |
| "learning_rate": 6.723938544178232e-07, | |
| "loss": 0.1524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06626199930906296, | |
| "step": 1530, | |
| "valid_targets_mean": 18216.4, | |
| "valid_targets_min": 6292 | |
| }, | |
| { | |
| "epoch": 6.505307855626327, | |
| "grad_norm": 0.28068157891206974, | |
| "learning_rate": 6.191168963280136e-07, | |
| "loss": 0.1545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07529743015766144, | |
| "step": 1535, | |
| "valid_targets_mean": 13190.6, | |
| "valid_targets_min": 5603 | |
| }, | |
| { | |
| "epoch": 6.526539278131635, | |
| "grad_norm": 0.26745162960671476, | |
| "learning_rate": 5.680055053805622e-07, | |
| "loss": 0.1439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0684957280755043, | |
| "step": 1540, | |
| "valid_targets_mean": 15671.4, | |
| "valid_targets_min": 9088 | |
| }, | |
| { | |
| "epoch": 6.547770700636943, | |
| "grad_norm": 0.3186094467001062, | |
| "learning_rate": 5.190653926232169e-07, | |
| "loss": 0.1787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09460516273975372, | |
| "step": 1545, | |
| "valid_targets_mean": 13747.5, | |
| "valid_targets_min": 6715 | |
| }, | |
| { | |
| "epoch": 6.56900212314225, | |
| "grad_norm": 0.2830669692378638, | |
| "learning_rate": 4.723020264910139e-07, | |
| "loss": 0.1493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10172918438911438, | |
| "step": 1550, | |
| "valid_targets_mean": 19980.6, | |
| "valid_targets_min": 13214 | |
| }, | |
| { | |
| "epoch": 6.590233545647559, | |
| "grad_norm": 0.3388742629588612, | |
| "learning_rate": 4.2772063219523875e-07, | |
| "loss": 0.1871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06810663640499115, | |
| "step": 1555, | |
| "valid_targets_mean": 11326.8, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 6.611464968152866, | |
| "grad_norm": 0.25098732861510203, | |
| "learning_rate": 3.853261911395834e-07, | |
| "loss": 0.162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08643859624862671, | |
| "step": 1560, | |
| "valid_targets_mean": 17087.6, | |
| "valid_targets_min": 4205 | |
| }, | |
| { | |
| "epoch": 6.632696390658174, | |
| "grad_norm": 0.25308441903042767, | |
| "learning_rate": 3.4512344036353727e-07, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0752149447798729, | |
| "step": 1565, | |
| "valid_targets_mean": 14135.8, | |
| "valid_targets_min": 6446 | |
| }, | |
| { | |
| "epoch": 6.653927813163482, | |
| "grad_norm": 0.27008472046636767, | |
| "learning_rate": 3.071168720130779e-07, | |
| "loss": 0.1496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.055712342262268066, | |
| "step": 1570, | |
| "valid_targets_mean": 10874.0, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 6.67515923566879, | |
| "grad_norm": 0.25724134253077174, | |
| "learning_rate": 2.7131073283873654e-07, | |
| "loss": 0.1573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.059917815029621124, | |
| "step": 1575, | |
| "valid_targets_mean": 14455.5, | |
| "valid_targets_min": 6049 | |
| }, | |
| { | |
| "epoch": 6.6963906581740975, | |
| "grad_norm": 0.27550494093570005, | |
| "learning_rate": 2.3770902372107772e-07, | |
| "loss": 0.1609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0692015290260315, | |
| "step": 1580, | |
| "valid_targets_mean": 13820.6, | |
| "valid_targets_min": 4902 | |
| }, | |
| { | |
| "epoch": 6.717622080679406, | |
| "grad_norm": 0.2523704810311892, | |
| "learning_rate": 2.0631549922364824e-07, | |
| "loss": 0.1427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06639319658279419, | |
| "step": 1585, | |
| "valid_targets_mean": 11344.8, | |
| "valid_targets_min": 5619 | |
| }, | |
| { | |
| "epoch": 6.738853503184713, | |
| "grad_norm": 0.29993361280228925, | |
| "learning_rate": 1.7713366717344803e-07, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07957091927528381, | |
| "step": 1590, | |
| "valid_targets_mean": 9983.5, | |
| "valid_targets_min": 4996 | |
| }, | |
| { | |
| "epoch": 6.7600849256900215, | |
| "grad_norm": 0.2502043937813371, | |
| "learning_rate": 1.5016678826899055e-07, | |
| "loss": 0.1495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07833079993724823, | |
| "step": 1595, | |
| "valid_targets_mean": 20954.8, | |
| "valid_targets_min": 14257 | |
| }, | |
| { | |
| "epoch": 6.781316348195329, | |
| "grad_norm": 0.25034032952355, | |
| "learning_rate": 1.2541787571594522e-07, | |
| "loss": 0.1599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07827061414718628, | |
| "step": 1600, | |
| "valid_targets_mean": 15005.9, | |
| "valid_targets_min": 9166 | |
| }, | |
| { | |
| "epoch": 6.802547770700637, | |
| "grad_norm": 0.22039819025048707, | |
| "learning_rate": 1.0288969489046008e-07, | |
| "loss": 0.1417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.048642657697200775, | |
| "step": 1605, | |
| "valid_targets_mean": 19840.2, | |
| "valid_targets_min": 11252 | |
| }, | |
| { | |
| "epoch": 6.823779193205945, | |
| "grad_norm": 0.2588836654976448, | |
| "learning_rate": 8.258476303016017e-08, | |
| "loss": 0.148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07373689115047455, | |
| "step": 1610, | |
| "valid_targets_mean": 14245.9, | |
| "valid_targets_min": 8271 | |
| }, | |
| { | |
| "epoch": 6.845010615711253, | |
| "grad_norm": 0.2926967992400304, | |
| "learning_rate": 6.45053489528813e-08, | |
| "loss": 0.1707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08257532864809036, | |
| "step": 1615, | |
| "valid_targets_mean": 17754.5, | |
| "valid_targets_min": 10599 | |
| }, | |
| { | |
| "epoch": 6.86624203821656, | |
| "grad_norm": 0.2838687369452468, | |
| "learning_rate": 4.8653472803159576e-08, | |
| "loss": 0.1737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0895313173532486, | |
| "step": 1620, | |
| "valid_targets_mean": 16994.5, | |
| "valid_targets_min": 8175 | |
| }, | |
| { | |
| "epoch": 6.887473460721869, | |
| "grad_norm": 0.2750729053185284, | |
| "learning_rate": 3.503090582650081e-08, | |
| "loss": 0.1656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07482630014419556, | |
| "step": 1625, | |
| "valid_targets_mean": 16871.8, | |
| "valid_targets_min": 5469 | |
| }, | |
| { | |
| "epoch": 6.908704883227176, | |
| "grad_norm": 0.3270451602832223, | |
| "learning_rate": 2.3639170171474434e-08, | |
| "loss": 0.1578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08098286390304565, | |
| "step": 1630, | |
| "valid_targets_mean": 13039.2, | |
| "valid_targets_min": 4344 | |
| }, | |
| { | |
| "epoch": 6.929936305732484, | |
| "grad_norm": 0.3057446527787021, | |
| "learning_rate": 1.4479538719622822e-08, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07232065498828888, | |
| "step": 1635, | |
| "valid_targets_mean": 12158.9, | |
| "valid_targets_min": 5313 | |
| }, | |
| { | |
| "epoch": 6.951167728237792, | |
| "grad_norm": 0.3010327503890946, | |
| "learning_rate": 7.553034943243998e-09, | |
| "loss": 0.174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10355747491121292, | |
| "step": 1640, | |
| "valid_targets_mean": 16122.8, | |
| "valid_targets_min": 7424 | |
| }, | |
| { | |
| "epoch": 6.9723991507431, | |
| "grad_norm": 0.296512581796535, | |
| "learning_rate": 2.8604327910186634e-09, | |
| "loss": 0.1679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.055927574634552, | |
| "step": 1645, | |
| "valid_targets_mean": 14152.6, | |
| "valid_targets_min": 3364 | |
| }, | |
| { | |
| "epoch": 6.993630573248407, | |
| "grad_norm": 0.29446698895863765, | |
| "learning_rate": 4.02256601546025e-10, | |
| "loss": 0.1742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0790117084980011, | |
| "step": 1650, | |
| "valid_targets_mean": 12653.6, | |
| "valid_targets_min": 5488 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17071497440338135, | |
| "step": 1652, | |
| "total_flos": 1.3627384946606735e+18, | |
| "train_loss": 0.19606392417490914, | |
| "train_runtime": 62933.2387, | |
| "train_samples_per_second": 0.419, | |
| "train_steps_per_second": 0.026, | |
| "valid_targets_mean": 16957.1, | |
| "valid_targets_min": 7571 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1652, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3627384946606735e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |