penfever
Reset repository without checkpoint dirs
4cf2836
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1652,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.021231422505307854,
"grad_norm": 6.601170763883668,
"learning_rate": 9.638554216867472e-07,
"loss": 0.5153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3147011399269104,
"step": 5,
"valid_targets_mean": 14395.1,
"valid_targets_min": 6135
},
{
"epoch": 0.04246284501061571,
"grad_norm": 4.640678277810576,
"learning_rate": 2.168674698795181e-06,
"loss": 0.4921,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18654188513755798,
"step": 10,
"valid_targets_mean": 12533.2,
"valid_targets_min": 4349
},
{
"epoch": 0.06369426751592357,
"grad_norm": 2.8761928107591204,
"learning_rate": 3.3734939759036146e-06,
"loss": 0.4234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22491103410720825,
"step": 15,
"valid_targets_mean": 13778.4,
"valid_targets_min": 6404
},
{
"epoch": 0.08492569002123142,
"grad_norm": 2.0507756278474343,
"learning_rate": 4.578313253012049e-06,
"loss": 0.4198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2335529774427414,
"step": 20,
"valid_targets_mean": 13837.5,
"valid_targets_min": 3655
},
{
"epoch": 0.10615711252653928,
"grad_norm": 1.4535039164298584,
"learning_rate": 5.783132530120482e-06,
"loss": 0.4121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17189857363700867,
"step": 25,
"valid_targets_mean": 12292.6,
"valid_targets_min": 6259
},
{
"epoch": 0.12738853503184713,
"grad_norm": 0.7914310934537628,
"learning_rate": 6.987951807228917e-06,
"loss": 0.3847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24835364520549774,
"step": 30,
"valid_targets_mean": 14466.2,
"valid_targets_min": 9464
},
{
"epoch": 0.14861995753715498,
"grad_norm": 0.5293723876639078,
"learning_rate": 8.19277108433735e-06,
"loss": 0.3696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21188116073608398,
"step": 35,
"valid_targets_mean": 14101.8,
"valid_targets_min": 4063
},
{
"epoch": 0.16985138004246284,
"grad_norm": 0.5249820883384718,
"learning_rate": 9.397590361445785e-06,
"loss": 0.3606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17040115594863892,
"step": 40,
"valid_targets_mean": 14958.1,
"valid_targets_min": 6110
},
{
"epoch": 0.1910828025477707,
"grad_norm": 0.4219272827083708,
"learning_rate": 1.0602409638554219e-05,
"loss": 0.3188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16639268398284912,
"step": 45,
"valid_targets_mean": 12699.5,
"valid_targets_min": 5064
},
{
"epoch": 0.21231422505307856,
"grad_norm": 0.38858708893114424,
"learning_rate": 1.1807228915662651e-05,
"loss": 0.3358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18469196557998657,
"step": 50,
"valid_targets_mean": 12852.6,
"valid_targets_min": 4287
},
{
"epoch": 0.23354564755838642,
"grad_norm": 0.3799894773689016,
"learning_rate": 1.3012048192771085e-05,
"loss": 0.3134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20896856486797333,
"step": 55,
"valid_targets_mean": 15256.2,
"valid_targets_min": 5458
},
{
"epoch": 0.25477707006369427,
"grad_norm": 0.319379398181401,
"learning_rate": 1.4216867469879519e-05,
"loss": 0.2825,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09708239883184433,
"step": 60,
"valid_targets_mean": 14549.4,
"valid_targets_min": 6224
},
{
"epoch": 0.2760084925690021,
"grad_norm": 0.2771017285361385,
"learning_rate": 1.5421686746987955e-05,
"loss": 0.269,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16080784797668457,
"step": 65,
"valid_targets_mean": 15810.6,
"valid_targets_min": 8701
},
{
"epoch": 0.29723991507430997,
"grad_norm": 0.290052395391973,
"learning_rate": 1.6626506024096387e-05,
"loss": 0.2876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14474868774414062,
"step": 70,
"valid_targets_mean": 13943.5,
"valid_targets_min": 8170
},
{
"epoch": 0.3184713375796178,
"grad_norm": 0.2887366598094193,
"learning_rate": 1.783132530120482e-05,
"loss": 0.3045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20289857685565948,
"step": 75,
"valid_targets_mean": 19152.2,
"valid_targets_min": 8288
},
{
"epoch": 0.33970276008492567,
"grad_norm": 0.24501889780498082,
"learning_rate": 1.9036144578313255e-05,
"loss": 0.2576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14412395656108856,
"step": 80,
"valid_targets_mean": 16776.8,
"valid_targets_min": 6292
},
{
"epoch": 0.3609341825902335,
"grad_norm": 0.31757555588060904,
"learning_rate": 2.0240963855421687e-05,
"loss": 0.2864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15807807445526123,
"step": 85,
"valid_targets_mean": 11288.1,
"valid_targets_min": 5740
},
{
"epoch": 0.3821656050955414,
"grad_norm": 0.21005685787572534,
"learning_rate": 2.1445783132530123e-05,
"loss": 0.2537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08626846969127655,
"step": 90,
"valid_targets_mean": 15819.5,
"valid_targets_min": 7497
},
{
"epoch": 0.4033970276008493,
"grad_norm": 0.2942954097586065,
"learning_rate": 2.265060240963856e-05,
"loss": 0.2792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14525042474269867,
"step": 95,
"valid_targets_mean": 14950.8,
"valid_targets_min": 7206
},
{
"epoch": 0.42462845010615713,
"grad_norm": 0.297024377935559,
"learning_rate": 2.3855421686746988e-05,
"loss": 0.2496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1313462257385254,
"step": 100,
"valid_targets_mean": 12276.5,
"valid_targets_min": 4481
},
{
"epoch": 0.445859872611465,
"grad_norm": 0.3083155449002965,
"learning_rate": 2.5060240963855423e-05,
"loss": 0.2674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13870102167129517,
"step": 105,
"valid_targets_mean": 8789.2,
"valid_targets_min": 4863
},
{
"epoch": 0.46709129511677283,
"grad_norm": 0.22405694574565216,
"learning_rate": 2.6265060240963856e-05,
"loss": 0.2563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11005185544490814,
"step": 110,
"valid_targets_mean": 19546.9,
"valid_targets_min": 9253
},
{
"epoch": 0.4883227176220807,
"grad_norm": 0.32976545776333965,
"learning_rate": 2.746987951807229e-05,
"loss": 0.2934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1696811020374298,
"step": 115,
"valid_targets_mean": 11419.8,
"valid_targets_min": 4938
},
{
"epoch": 0.5095541401273885,
"grad_norm": 0.26726017461928253,
"learning_rate": 2.8674698795180727e-05,
"loss": 0.2538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0898926854133606,
"step": 120,
"valid_targets_mean": 14191.1,
"valid_targets_min": 3141
},
{
"epoch": 0.5307855626326964,
"grad_norm": 0.26693132304388195,
"learning_rate": 2.9879518072289156e-05,
"loss": 0.2489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10769972205162048,
"step": 125,
"valid_targets_mean": 15435.2,
"valid_targets_min": 5449
},
{
"epoch": 0.5520169851380042,
"grad_norm": 0.29793681607777117,
"learning_rate": 3.108433734939759e-05,
"loss": 0.256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13548800349235535,
"step": 130,
"valid_targets_mean": 12220.0,
"valid_targets_min": 4963
},
{
"epoch": 0.5732484076433121,
"grad_norm": 0.2843851218180784,
"learning_rate": 3.228915662650603e-05,
"loss": 0.2449,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08166906237602234,
"step": 135,
"valid_targets_mean": 16374.8,
"valid_targets_min": 7174
},
{
"epoch": 0.5944798301486199,
"grad_norm": 0.2537876339931588,
"learning_rate": 3.3493975903614457e-05,
"loss": 0.2345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11751864105463028,
"step": 140,
"valid_targets_mean": 10767.6,
"valid_targets_min": 900
},
{
"epoch": 0.6157112526539278,
"grad_norm": 0.26983132136557225,
"learning_rate": 3.4698795180722896e-05,
"loss": 0.2754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10696007311344147,
"step": 145,
"valid_targets_mean": 15567.0,
"valid_targets_min": 3478
},
{
"epoch": 0.6369426751592356,
"grad_norm": 0.2874455255323601,
"learning_rate": 3.590361445783133e-05,
"loss": 0.2422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10694894194602966,
"step": 150,
"valid_targets_mean": 14445.2,
"valid_targets_min": 5102
},
{
"epoch": 0.6581740976645435,
"grad_norm": 0.36018854059753647,
"learning_rate": 3.710843373493976e-05,
"loss": 0.2485,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16881927847862244,
"step": 155,
"valid_targets_mean": 10835.9,
"valid_targets_min": 1377
},
{
"epoch": 0.6794055201698513,
"grad_norm": 0.3128091862915849,
"learning_rate": 3.83132530120482e-05,
"loss": 0.2405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10742859542369843,
"step": 160,
"valid_targets_mean": 12899.6,
"valid_targets_min": 8041
},
{
"epoch": 0.7006369426751592,
"grad_norm": 0.3176035358807374,
"learning_rate": 3.9518072289156625e-05,
"loss": 0.28,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10753054916858673,
"step": 165,
"valid_targets_mean": 14364.8,
"valid_targets_min": 2986
},
{
"epoch": 0.721868365180467,
"grad_norm": 0.27568705715038133,
"learning_rate": 3.9999597743398453e-05,
"loss": 0.2259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1222403347492218,
"step": 170,
"valid_targets_mean": 15836.6,
"valid_targets_min": 8080
},
{
"epoch": 0.7430997876857749,
"grad_norm": 0.3595846231890364,
"learning_rate": 3.999713956720898e-05,
"loss": 0.2501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14652863144874573,
"step": 175,
"valid_targets_mean": 17202.6,
"valid_targets_min": 9673
},
{
"epoch": 0.7643312101910829,
"grad_norm": 0.35289985950885067,
"learning_rate": 3.9992446965056756e-05,
"loss": 0.2561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16108444333076477,
"step": 180,
"valid_targets_mean": 14388.9,
"valid_targets_min": 4858
},
{
"epoch": 0.7855626326963907,
"grad_norm": 0.33155413679799534,
"learning_rate": 3.998552046128038e-05,
"loss": 0.2604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18604812026023865,
"step": 185,
"valid_targets_mean": 17653.5,
"valid_targets_min": 12562
},
{
"epoch": 0.8067940552016986,
"grad_norm": 0.26974442765891954,
"learning_rate": 3.997636082982853e-05,
"loss": 0.2231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11307717114686966,
"step": 190,
"valid_targets_mean": 14695.4,
"valid_targets_min": 1868
},
{
"epoch": 0.8280254777070064,
"grad_norm": 0.4484022169366936,
"learning_rate": 3.9964969094173506e-05,
"loss": 0.2456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13788184523582458,
"step": 195,
"valid_targets_mean": 11258.2,
"valid_targets_min": 5920
},
{
"epoch": 0.8492569002123143,
"grad_norm": 0.31884550186901484,
"learning_rate": 3.995134652719684e-05,
"loss": 0.2378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17065517604351044,
"step": 200,
"valid_targets_mean": 15263.8,
"valid_targets_min": 7771
},
{
"epoch": 0.8704883227176221,
"grad_norm": 0.2912088961412526,
"learning_rate": 3.993549465104712e-05,
"loss": 0.212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1016058549284935,
"step": 205,
"valid_targets_mean": 11488.8,
"valid_targets_min": 2254
},
{
"epoch": 0.89171974522293,
"grad_norm": 1.2538232123110695,
"learning_rate": 3.991741523696984e-05,
"loss": 0.2266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0836310163140297,
"step": 210,
"valid_targets_mean": 18902.0,
"valid_targets_min": 7919
},
{
"epoch": 0.9129511677282378,
"grad_norm": 0.3347545564566702,
"learning_rate": 3.989711030510954e-05,
"loss": 0.2398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12700827419757843,
"step": 215,
"valid_targets_mean": 11326.8,
"valid_targets_min": 5084
},
{
"epoch": 0.9341825902335457,
"grad_norm": 0.2971192130089368,
"learning_rate": 3.987458212428406e-05,
"loss": 0.2243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1275290548801422,
"step": 220,
"valid_targets_mean": 14325.8,
"valid_targets_min": 5076
},
{
"epoch": 0.9554140127388535,
"grad_norm": 0.33426267528626336,
"learning_rate": 3.984983321173101e-05,
"loss": 0.2403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15455971658229828,
"step": 225,
"valid_targets_mean": 15615.0,
"valid_targets_min": 6466
},
{
"epoch": 0.9766454352441614,
"grad_norm": 0.3247808671944862,
"learning_rate": 3.9822866332826555e-05,
"loss": 0.2246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11736075580120087,
"step": 230,
"valid_targets_mean": 17059.8,
"valid_targets_min": 7224
},
{
"epoch": 0.9978768577494692,
"grad_norm": 0.2812386799146817,
"learning_rate": 3.9793684500776356e-05,
"loss": 0.2331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1330154538154602,
"step": 235,
"valid_targets_mean": 13021.2,
"valid_targets_min": 5174
},
{
"epoch": 1.0169851380042463,
"grad_norm": 0.2755139866589168,
"learning_rate": 3.976229097627892e-05,
"loss": 0.2037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10062160342931747,
"step": 240,
"valid_targets_mean": 15395.4,
"valid_targets_min": 7250
},
{
"epoch": 1.0382165605095541,
"grad_norm": 0.30337736123776227,
"learning_rate": 3.972868926716127e-05,
"loss": 0.1971,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08376499265432358,
"step": 245,
"valid_targets_mean": 17409.0,
"valid_targets_min": 7023
},
{
"epoch": 1.059447983014862,
"grad_norm": 0.2915807595114085,
"learning_rate": 3.969288312798693e-05,
"loss": 0.2454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11872377246618271,
"step": 250,
"valid_targets_mean": 14989.5,
"valid_targets_min": 6930
},
{
"epoch": 1.0806794055201698,
"grad_norm": 0.27726744741730325,
"learning_rate": 3.965487655963647e-05,
"loss": 0.2257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11883805692195892,
"step": 255,
"valid_targets_mean": 13760.0,
"valid_targets_min": 5931
},
{
"epoch": 1.1019108280254777,
"grad_norm": 0.29991341277236133,
"learning_rate": 3.961467380886042e-05,
"loss": 0.2189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08793854713439941,
"step": 260,
"valid_targets_mean": 11854.8,
"valid_targets_min": 7223
},
{
"epoch": 1.1231422505307855,
"grad_norm": 0.2811615074598358,
"learning_rate": 3.957227936780476e-05,
"loss": 0.2266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11964725703001022,
"step": 265,
"valid_targets_mean": 13769.2,
"valid_targets_min": 7965
},
{
"epoch": 1.1443736730360934,
"grad_norm": 0.2810663280814256,
"learning_rate": 3.952769797350899e-05,
"loss": 0.2161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.109311044216156,
"step": 270,
"valid_targets_mean": 14035.2,
"valid_targets_min": 6319
},
{
"epoch": 1.1656050955414012,
"grad_norm": 0.2518522428935926,
"learning_rate": 3.948093460737679e-05,
"loss": 0.1904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10714008659124374,
"step": 275,
"valid_targets_mean": 15836.6,
"valid_targets_min": 7593
},
{
"epoch": 1.186836518046709,
"grad_norm": 0.265837889297658,
"learning_rate": 3.943199449461944e-05,
"loss": 0.2426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11197318136692047,
"step": 280,
"valid_targets_mean": 15728.6,
"valid_targets_min": 8141
},
{
"epoch": 1.208067940552017,
"grad_norm": 0.28084898004784403,
"learning_rate": 3.938088310367199e-05,
"loss": 0.2234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08627848327159882,
"step": 285,
"valid_targets_mean": 15427.1,
"valid_targets_min": 6842
},
{
"epoch": 1.2292993630573248,
"grad_norm": 0.27746934501090276,
"learning_rate": 3.932760614558218e-05,
"loss": 0.2209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12175991386175156,
"step": 290,
"valid_targets_mean": 15384.0,
"valid_targets_min": 8328
},
{
"epoch": 1.2505307855626326,
"grad_norm": 0.29525237911812746,
"learning_rate": 3.9272169573372345e-05,
"loss": 0.23,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10661651194095612,
"step": 295,
"valid_targets_mean": 12726.1,
"valid_targets_min": 4463
},
{
"epoch": 1.2717622080679405,
"grad_norm": 0.3428937157148055,
"learning_rate": 3.921457958137421e-05,
"loss": 0.2649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12638840079307556,
"step": 300,
"valid_targets_mean": 13763.4,
"valid_targets_min": 6285
},
{
"epoch": 1.2929936305732483,
"grad_norm": 0.3714748650604558,
"learning_rate": 3.915484260453679e-05,
"loss": 0.238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12943370640277863,
"step": 305,
"valid_targets_mean": 11842.8,
"valid_targets_min": 2224
},
{
"epoch": 1.3142250530785562,
"grad_norm": 0.310114967686842,
"learning_rate": 3.909296531770732e-05,
"loss": 0.2319,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1138845905661583,
"step": 310,
"valid_targets_mean": 12274.8,
"valid_targets_min": 5680
},
{
"epoch": 1.335456475583864,
"grad_norm": 0.27182436922475456,
"learning_rate": 3.902895463488547e-05,
"loss": 0.2209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10476358979940414,
"step": 315,
"valid_targets_mean": 14856.8,
"valid_targets_min": 5796
},
{
"epoch": 1.356687898089172,
"grad_norm": 0.31748294870719895,
"learning_rate": 3.896281770845076e-05,
"loss": 0.2109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10224044322967529,
"step": 320,
"valid_targets_mean": 10133.5,
"valid_targets_min": 5714
},
{
"epoch": 1.3779193205944797,
"grad_norm": 0.3826976146252936,
"learning_rate": 3.8894561928363396e-05,
"loss": 0.2003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11495161056518555,
"step": 325,
"valid_targets_mean": 15048.9,
"valid_targets_min": 5342
},
{
"epoch": 1.3991507430997876,
"grad_norm": 0.27319461570158804,
"learning_rate": 3.8824194921338516e-05,
"loss": 0.2091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08650592714548111,
"step": 330,
"valid_targets_mean": 15681.1,
"valid_targets_min": 7221
},
{
"epoch": 1.4203821656050954,
"grad_norm": 0.2792309770737464,
"learning_rate": 3.875172454999402e-05,
"loss": 0.2081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10807353258132935,
"step": 335,
"valid_targets_mean": 13810.6,
"valid_targets_min": 4893
},
{
"epoch": 1.4416135881104033,
"grad_norm": 0.3182023284508512,
"learning_rate": 3.8677158911972e-05,
"loss": 0.2073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14491504430770874,
"step": 340,
"valid_targets_mean": 12870.5,
"valid_targets_min": 2474
},
{
"epoch": 1.4628450106157111,
"grad_norm": 0.2729629600919183,
"learning_rate": 3.860050633903395e-05,
"loss": 0.2098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13041123747825623,
"step": 345,
"valid_targets_mean": 14308.1,
"valid_targets_min": 7717
},
{
"epoch": 1.484076433121019,
"grad_norm": 0.31582590679097317,
"learning_rate": 3.8521775396129824e-05,
"loss": 0.2233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13939642906188965,
"step": 350,
"valid_targets_mean": 15401.0,
"valid_targets_min": 7556
},
{
"epoch": 1.5053078556263269,
"grad_norm": 0.30106861470253815,
"learning_rate": 3.8440974880440925e-05,
"loss": 0.2165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10552646219730377,
"step": 355,
"valid_targets_mean": 15419.1,
"valid_targets_min": 3604
},
{
"epoch": 1.5265392781316347,
"grad_norm": 0.24666022628823678,
"learning_rate": 3.835811382039703e-05,
"loss": 0.2098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09952588379383087,
"step": 360,
"valid_targets_mean": 13430.5,
"valid_targets_min": 6392
},
{
"epoch": 1.5477707006369426,
"grad_norm": 0.23263039306298894,
"learning_rate": 3.827320147466752e-05,
"loss": 0.2201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10834848880767822,
"step": 365,
"valid_targets_mean": 18282.2,
"valid_targets_min": 10348
},
{
"epoch": 1.5690021231422504,
"grad_norm": 0.2763418285871218,
"learning_rate": 3.818624733112687e-05,
"loss": 0.2437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09543554484844208,
"step": 370,
"valid_targets_mean": 15127.2,
"valid_targets_min": 4569
},
{
"epoch": 1.5902335456475583,
"grad_norm": 0.2344608159731727,
"learning_rate": 3.809726110579446e-05,
"loss": 0.1932,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10267725586891174,
"step": 375,
"valid_targets_mean": 18113.5,
"valid_targets_min": 5644
},
{
"epoch": 1.611464968152866,
"grad_norm": 0.40210264521938344,
"learning_rate": 3.8006252741748986e-05,
"loss": 0.227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11398470401763916,
"step": 380,
"valid_targets_mean": 13665.8,
"valid_targets_min": 1344
},
{
"epoch": 1.632696390658174,
"grad_norm": 0.30513744618147365,
"learning_rate": 3.79132324080174e-05,
"loss": 0.2166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14099963009357452,
"step": 385,
"valid_targets_mean": 14852.2,
"valid_targets_min": 3364
},
{
"epoch": 1.6539278131634818,
"grad_norm": 0.22426713356424843,
"learning_rate": 3.781821049843869e-05,
"loss": 0.1943,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07226351648569107,
"step": 390,
"valid_targets_mean": 18799.4,
"valid_targets_min": 10477
},
{
"epoch": 1.6751592356687897,
"grad_norm": 0.2739515977898349,
"learning_rate": 3.7721197630502485e-05,
"loss": 0.2147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11230744421482086,
"step": 395,
"valid_targets_mean": 14162.9,
"valid_targets_min": 4448
},
{
"epoch": 1.6963906581740975,
"grad_norm": 0.2920300807449177,
"learning_rate": 3.762220464416266e-05,
"loss": 0.2095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1347799301147461,
"step": 400,
"valid_targets_mean": 17425.0,
"valid_targets_min": 12687
},
{
"epoch": 1.7176220806794054,
"grad_norm": 0.37395274633289044,
"learning_rate": 3.7521242600626154e-05,
"loss": 0.1993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09538485109806061,
"step": 405,
"valid_targets_mean": 15063.0,
"valid_targets_min": 7612
},
{
"epoch": 1.7388535031847132,
"grad_norm": 0.330636894297862,
"learning_rate": 3.7418322781117e-05,
"loss": 0.2471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13609623908996582,
"step": 410,
"valid_targets_mean": 12677.8,
"valid_targets_min": 5740
},
{
"epoch": 1.7600849256900213,
"grad_norm": 0.28342681236643996,
"learning_rate": 3.731345668561577e-05,
"loss": 0.2065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0960487648844719,
"step": 415,
"valid_targets_mean": 12748.8,
"valid_targets_min": 7296
},
{
"epoch": 1.7813163481953291,
"grad_norm": 0.23683106588934166,
"learning_rate": 3.720665603157464e-05,
"loss": 0.1736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08303853869438171,
"step": 420,
"valid_targets_mean": 15981.0,
"valid_targets_min": 7057
},
{
"epoch": 1.802547770700637,
"grad_norm": 0.316249617780758,
"learning_rate": 3.7097932752608096e-05,
"loss": 0.219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1166648119688034,
"step": 425,
"valid_targets_mean": 16572.9,
"valid_targets_min": 11308
},
{
"epoch": 1.8237791932059448,
"grad_norm": 0.25450442249313526,
"learning_rate": 3.698729899715947e-05,
"loss": 0.1986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11825818568468094,
"step": 430,
"valid_targets_mean": 16501.6,
"valid_targets_min": 5425
},
{
"epoch": 1.8450106157112527,
"grad_norm": 0.2562691834652498,
"learning_rate": 3.687476712714358e-05,
"loss": 0.2078,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08408856391906738,
"step": 435,
"valid_targets_mean": 13867.4,
"valid_targets_min": 5665
},
{
"epoch": 1.8662420382165605,
"grad_norm": 0.26720306087521367,
"learning_rate": 3.676034971656537e-05,
"loss": 0.1934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1064402386546135,
"step": 440,
"valid_targets_mean": 14102.1,
"valid_targets_min": 6476
},
{
"epoch": 1.8874734607218684,
"grad_norm": 0.21974899342329504,
"learning_rate": 3.664405955011498e-05,
"loss": 0.1917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08773387968540192,
"step": 445,
"valid_targets_mean": 17870.5,
"valid_targets_min": 9456
},
{
"epoch": 1.9087048832271762,
"grad_norm": 0.30795940792172116,
"learning_rate": 3.652590962173917e-05,
"loss": 0.2353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14995551109313965,
"step": 450,
"valid_targets_mean": 15358.0,
"valid_targets_min": 8778
},
{
"epoch": 1.929936305732484,
"grad_norm": 0.27504189449803096,
"learning_rate": 3.640591313318944e-05,
"loss": 0.2091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10541120916604996,
"step": 455,
"valid_targets_mean": 15389.8,
"valid_targets_min": 5133
},
{
"epoch": 1.951167728237792,
"grad_norm": 0.29564860036710783,
"learning_rate": 3.628408349254693e-05,
"loss": 0.202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10049857199192047,
"step": 460,
"valid_targets_mean": 14645.1,
"valid_targets_min": 6715
},
{
"epoch": 1.9723991507430998,
"grad_norm": 0.25475277572273397,
"learning_rate": 3.616043431272417e-05,
"loss": 0.2048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07846721261739731,
"step": 465,
"valid_targets_mean": 16528.9,
"valid_targets_min": 6680
},
{
"epoch": 1.9936305732484076,
"grad_norm": 0.26142683635944247,
"learning_rate": 3.603497940994407e-05,
"loss": 0.2125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09435806423425674,
"step": 470,
"valid_targets_mean": 16962.5,
"valid_targets_min": 8108
},
{
"epoch": 2.0127388535031847,
"grad_norm": 0.24722160377441987,
"learning_rate": 3.59077328021961e-05,
"loss": 0.1976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08899471163749695,
"step": 475,
"valid_targets_mean": 12891.4,
"valid_targets_min": 9088
},
{
"epoch": 2.0339702760084926,
"grad_norm": 0.2517941803747066,
"learning_rate": 3.577870870766997e-05,
"loss": 0.2029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09349747002124786,
"step": 480,
"valid_targets_mean": 13435.2,
"valid_targets_min": 6613
},
{
"epoch": 2.0552016985138004,
"grad_norm": 0.2758026420024873,
"learning_rate": 3.5647921543166923e-05,
"loss": 0.1978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07516678422689438,
"step": 485,
"valid_targets_mean": 16082.9,
"valid_targets_min": 5458
},
{
"epoch": 2.0764331210191083,
"grad_norm": 0.24371704180908865,
"learning_rate": 3.5515385922488846e-05,
"loss": 0.1965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08999522030353546,
"step": 490,
"valid_targets_mean": 16669.6,
"valid_targets_min": 6356
},
{
"epoch": 2.097664543524416,
"grad_norm": 0.278212723013607,
"learning_rate": 3.5381116654805375e-05,
"loss": 0.1878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07290489971637726,
"step": 495,
"valid_targets_mean": 15383.8,
"valid_targets_min": 4535
},
{
"epoch": 2.118895966029724,
"grad_norm": 0.28587596752541133,
"learning_rate": 3.524512874299912e-05,
"loss": 0.1913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1112004891037941,
"step": 500,
"valid_targets_mean": 16063.8,
"valid_targets_min": 8790
},
{
"epoch": 2.140127388535032,
"grad_norm": 0.23694124431153454,
"learning_rate": 3.5107437381989325e-05,
"loss": 0.1988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06883738934993744,
"step": 505,
"valid_targets_mean": 15695.1,
"valid_targets_min": 5449
},
{
"epoch": 2.1613588110403397,
"grad_norm": 0.2572567842840769,
"learning_rate": 3.4968057957034e-05,
"loss": 0.194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09729857742786407,
"step": 510,
"valid_targets_mean": 16853.2,
"valid_targets_min": 3854
},
{
"epoch": 2.1825902335456475,
"grad_norm": 0.2897655571941794,
"learning_rate": 3.482700604201086e-05,
"loss": 0.1947,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10338980704545975,
"step": 515,
"valid_targets_mean": 12990.2,
"valid_targets_min": 6250
},
{
"epoch": 2.2038216560509554,
"grad_norm": 0.3345559098061995,
"learning_rate": 3.4684297397677064e-05,
"loss": 0.2124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11578390747308731,
"step": 520,
"valid_targets_mean": 13486.2,
"valid_targets_min": 5214
},
{
"epoch": 2.225053078556263,
"grad_norm": 0.2647961046337731,
"learning_rate": 3.453994796990823e-05,
"loss": 0.2005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08278575539588928,
"step": 525,
"valid_targets_mean": 15026.2,
"valid_targets_min": 2535
},
{
"epoch": 2.246284501061571,
"grad_norm": 0.29321760721644347,
"learning_rate": 3.439397388791662e-05,
"loss": 0.1842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10423131287097931,
"step": 530,
"valid_targets_mean": 13677.5,
"valid_targets_min": 4331
},
{
"epoch": 2.267515923566879,
"grad_norm": 0.33439639128696136,
"learning_rate": 3.424639146244898e-05,
"loss": 0.2108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09143179655075073,
"step": 535,
"valid_targets_mean": 12497.9,
"valid_targets_min": 6576
},
{
"epoch": 2.2887473460721868,
"grad_norm": 0.31293816068157115,
"learning_rate": 3.409721718396395e-05,
"loss": 0.2073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12323853373527527,
"step": 540,
"valid_targets_mean": 15538.4,
"valid_targets_min": 9217
},
{
"epoch": 2.3099787685774946,
"grad_norm": 0.2932762754898537,
"learning_rate": 3.394646772078951e-05,
"loss": 0.2136,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11464841663837433,
"step": 545,
"valid_targets_mean": 13407.5,
"valid_targets_min": 5234
},
{
"epoch": 2.3312101910828025,
"grad_norm": 0.31716101339217717,
"learning_rate": 3.379415991726047e-05,
"loss": 0.1953,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0933525562286377,
"step": 550,
"valid_targets_mean": 10441.4,
"valid_targets_min": 3289
},
{
"epoch": 2.3524416135881103,
"grad_norm": 0.31294543987180895,
"learning_rate": 3.3640310791836375e-05,
"loss": 0.2001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11642187833786011,
"step": 555,
"valid_targets_mean": 15880.0,
"valid_targets_min": 6450
},
{
"epoch": 2.373673036093418,
"grad_norm": 0.23138968267395532,
"learning_rate": 3.348493753519987e-05,
"loss": 0.2171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0960564911365509,
"step": 560,
"valid_targets_mean": 18356.4,
"valid_targets_min": 13592
},
{
"epoch": 2.394904458598726,
"grad_norm": 0.296455985526213,
"learning_rate": 3.332805750833588e-05,
"loss": 0.1966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1138642281293869,
"step": 565,
"valid_targets_mean": 16774.5,
"valid_targets_min": 9339
},
{
"epoch": 2.416135881104034,
"grad_norm": 0.26969809385295945,
"learning_rate": 3.3169688240591735e-05,
"loss": 0.187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08020391315221786,
"step": 570,
"valid_targets_mean": 11902.9,
"valid_targets_min": 6702
},
{
"epoch": 2.4373673036093417,
"grad_norm": 0.26145366450884633,
"learning_rate": 3.300984742771849e-05,
"loss": 0.1826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08663637936115265,
"step": 575,
"valid_targets_mean": 15054.6,
"valid_targets_min": 4839
},
{
"epoch": 2.4585987261146496,
"grad_norm": 0.32984191621067444,
"learning_rate": 3.284855292989363e-05,
"loss": 0.2016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10979083180427551,
"step": 580,
"valid_targets_mean": 14732.2,
"valid_targets_min": 5139
},
{
"epoch": 2.4798301486199574,
"grad_norm": 0.27365490330331865,
"learning_rate": 3.268582276972549e-05,
"loss": 0.1907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13272282481193542,
"step": 585,
"valid_targets_mean": 14038.0,
"valid_targets_min": 7854
},
{
"epoch": 2.5010615711252653,
"grad_norm": 0.25796595337009653,
"learning_rate": 3.252167513023934e-05,
"loss": 0.2006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09469525516033173,
"step": 590,
"valid_targets_mean": 17552.2,
"valid_targets_min": 9415
},
{
"epoch": 2.522292993630573,
"grad_norm": 0.26440522510501036,
"learning_rate": 3.2356128352845794e-05,
"loss": 0.1982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08657258749008179,
"step": 595,
"valid_targets_mean": 15092.1,
"valid_targets_min": 8187
},
{
"epoch": 2.543524416135881,
"grad_norm": 0.31227039684550767,
"learning_rate": 3.218920093529129e-05,
"loss": 0.1869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11756514757871628,
"step": 600,
"valid_targets_mean": 12069.6,
"valid_targets_min": 5064
},
{
"epoch": 2.564755838641189,
"grad_norm": 0.27723943310578775,
"learning_rate": 3.202091152959126e-05,
"loss": 0.1757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11652103066444397,
"step": 605,
"valid_targets_mean": 15984.1,
"valid_targets_min": 5816
},
{
"epoch": 2.5859872611464967,
"grad_norm": 0.2780638657313319,
"learning_rate": 3.1851278939945974e-05,
"loss": 0.1775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1163756474852562,
"step": 610,
"valid_targets_mean": 15140.8,
"valid_targets_min": 9131
},
{
"epoch": 2.6072186836518045,
"grad_norm": 0.32200630212205833,
"learning_rate": 3.1680322120639436e-05,
"loss": 0.2035,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12698647379875183,
"step": 615,
"valid_targets_mean": 13908.4,
"valid_targets_min": 3655
},
{
"epoch": 2.6284501061571124,
"grad_norm": 0.28026722665310455,
"learning_rate": 3.150806017392145e-05,
"loss": 0.1815,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10848917067050934,
"step": 620,
"valid_targets_mean": 15885.1,
"valid_targets_min": 8272
},
{
"epoch": 2.6496815286624202,
"grad_norm": 0.2987901602685072,
"learning_rate": 3.1334512347873215e-05,
"loss": 0.1946,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1020238921046257,
"step": 625,
"valid_targets_mean": 16725.5,
"valid_targets_min": 6596
},
{
"epoch": 2.670912951167728,
"grad_norm": 0.3422773542295122,
"learning_rate": 3.1159698034256595e-05,
"loss": 0.1946,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08330472558736801,
"step": 630,
"valid_targets_mean": 12442.0,
"valid_targets_min": 1587
},
{
"epoch": 2.692144373673036,
"grad_norm": 0.2815022503459199,
"learning_rate": 3.098363676634732e-05,
"loss": 0.2026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09158733487129211,
"step": 635,
"valid_targets_mean": 14677.6,
"valid_targets_min": 3623
},
{
"epoch": 2.713375796178344,
"grad_norm": 0.27699220688660753,
"learning_rate": 3.080634821675239e-05,
"loss": 0.1906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08198846876621246,
"step": 640,
"valid_targets_mean": 14876.6,
"valid_targets_min": 5491
},
{
"epoch": 2.7346072186836516,
"grad_norm": 0.27056674048902585,
"learning_rate": 3.0627852195211944e-05,
"loss": 0.1943,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09581325948238373,
"step": 645,
"valid_targets_mean": 16048.8,
"valid_targets_min": 7561
},
{
"epoch": 2.7558386411889595,
"grad_norm": 0.25624506575317174,
"learning_rate": 3.0448168646385733e-05,
"loss": 0.1871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09879995882511139,
"step": 650,
"valid_targets_mean": 16128.4,
"valid_targets_min": 11230
},
{
"epoch": 2.777070063694268,
"grad_norm": 0.25670398472243816,
"learning_rate": 3.0267317647624584e-05,
"loss": 0.2121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0653584823012352,
"step": 655,
"valid_targets_mean": 14782.5,
"valid_targets_min": 3532
},
{
"epoch": 2.798301486199575,
"grad_norm": 0.2724083240499696,
"learning_rate": 3.0085319406727003e-05,
"loss": 0.2165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08737077564001083,
"step": 660,
"valid_targets_mean": 16403.5,
"valid_targets_min": 4331
},
{
"epoch": 2.8195329087048835,
"grad_norm": 0.2383604800522291,
"learning_rate": 2.9902194259681203e-05,
"loss": 0.1886,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07255426049232483,
"step": 665,
"valid_targets_mean": 15846.5,
"valid_targets_min": 7310
},
{
"epoch": 2.840764331210191,
"grad_norm": 0.30257307120700694,
"learning_rate": 2.9717962668392837e-05,
"loss": 0.1662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12048687040805817,
"step": 670,
"valid_targets_mean": 13953.0,
"valid_targets_min": 3689
},
{
"epoch": 2.861995753715499,
"grad_norm": 0.2522936277398451,
"learning_rate": 2.9532645218398608e-05,
"loss": 0.186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09158721566200256,
"step": 675,
"valid_targets_mean": 14446.5,
"valid_targets_min": 5751
},
{
"epoch": 2.8832271762208066,
"grad_norm": 0.2070318168754386,
"learning_rate": 2.9346262616566128e-05,
"loss": 0.1798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08559735119342804,
"step": 680,
"valid_targets_mean": 17995.4,
"valid_targets_min": 10423
},
{
"epoch": 2.904458598726115,
"grad_norm": 0.24087732064409983,
"learning_rate": 2.9158835688780188e-05,
"loss": 0.1856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08264927566051483,
"step": 685,
"valid_targets_mean": 13348.2,
"valid_targets_min": 6221
},
{
"epoch": 2.9256900212314223,
"grad_norm": 0.24300050232911719,
"learning_rate": 2.89703853776157e-05,
"loss": 0.1673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08173580467700958,
"step": 690,
"valid_targets_mean": 16869.4,
"valid_targets_min": 1387
},
{
"epoch": 2.9469214437367306,
"grad_norm": 0.26724986441117543,
"learning_rate": 2.878093273999765e-05,
"loss": 0.1836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08488611876964569,
"step": 695,
"valid_targets_mean": 15076.5,
"valid_targets_min": 6502
},
{
"epoch": 2.968152866242038,
"grad_norm": 0.2455958459356137,
"learning_rate": 2.859049894484828e-05,
"loss": 0.1885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08177411556243896,
"step": 700,
"valid_targets_mean": 14060.8,
"valid_targets_min": 3622
},
{
"epoch": 2.9893842887473463,
"grad_norm": 0.2715224629184729,
"learning_rate": 2.8399105270721668e-05,
"loss": 0.2006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09079961478710175,
"step": 705,
"valid_targets_mean": 15069.6,
"valid_targets_min": 8482
},
{
"epoch": 3.008492569002123,
"grad_norm": 0.23573264403831284,
"learning_rate": 2.8206773103426187e-05,
"loss": 0.168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07453001290559769,
"step": 710,
"valid_targets_mean": 13914.1,
"valid_targets_min": 3623
},
{
"epoch": 3.029723991507431,
"grad_norm": 0.28900177745868094,
"learning_rate": 2.8013523933634875e-05,
"loss": 0.2028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07700274884700775,
"step": 715,
"valid_targets_mean": 14241.4,
"valid_targets_min": 9250
},
{
"epoch": 3.050955414012739,
"grad_norm": 0.2794038536966578,
"learning_rate": 2.7819379354484124e-05,
"loss": 0.1776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07138693332672119,
"step": 720,
"valid_targets_mean": 14523.6,
"valid_targets_min": 3518
},
{
"epoch": 3.0721868365180467,
"grad_norm": 0.3075656321601474,
"learning_rate": 2.762436105916094e-05,
"loss": 0.1852,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08331207185983658,
"step": 725,
"valid_targets_mean": 14277.2,
"valid_targets_min": 6321
},
{
"epoch": 3.0934182590233545,
"grad_norm": 0.268200382167701,
"learning_rate": 2.742849083847899e-05,
"loss": 0.2002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08485275506973267,
"step": 730,
"valid_targets_mean": 15997.1,
"valid_targets_min": 7462
},
{
"epoch": 3.1146496815286624,
"grad_norm": 0.26063481388615084,
"learning_rate": 2.7231790578443785e-05,
"loss": 0.1666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09242895990610123,
"step": 735,
"valid_targets_mean": 16829.9,
"valid_targets_min": 4851
},
{
"epoch": 3.1358811040339702,
"grad_norm": 0.33301761675595387,
"learning_rate": 2.7034282257807136e-05,
"loss": 0.1877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12262482196092606,
"step": 740,
"valid_targets_mean": 14170.1,
"valid_targets_min": 7297
},
{
"epoch": 3.157112526539278,
"grad_norm": 0.25559284472185234,
"learning_rate": 2.683598794561138e-05,
"loss": 0.1819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.060392506420612335,
"step": 745,
"valid_targets_mean": 13418.9,
"valid_targets_min": 7538
},
{
"epoch": 3.178343949044586,
"grad_norm": 0.31989112975280076,
"learning_rate": 2.66369297987234e-05,
"loss": 0.1758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09339696168899536,
"step": 750,
"valid_targets_mean": 14859.2,
"valid_targets_min": 5714
},
{
"epoch": 3.199575371549894,
"grad_norm": 0.24047693551940477,
"learning_rate": 2.643713005935888e-05,
"loss": 0.1607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06566546857357025,
"step": 755,
"valid_targets_mean": 17566.6,
"valid_targets_min": 12773
},
{
"epoch": 3.2208067940552016,
"grad_norm": 0.24423971251668095,
"learning_rate": 2.6236611052597055e-05,
"loss": 0.1732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05638258159160614,
"step": 760,
"valid_targets_mean": 16585.4,
"valid_targets_min": 6836
},
{
"epoch": 3.2420382165605095,
"grad_norm": 0.22931906700860674,
"learning_rate": 2.603539518388611e-05,
"loss": 0.1782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.062216877937316895,
"step": 765,
"valid_targets_mean": 15363.1,
"valid_targets_min": 6726
},
{
"epoch": 3.2632696390658174,
"grad_norm": 0.29138046222131386,
"learning_rate": 2.5833504936539712e-05,
"loss": 0.1794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12089153379201889,
"step": 770,
"valid_targets_mean": 15873.8,
"valid_targets_min": 7979
},
{
"epoch": 3.284501061571125,
"grad_norm": 0.27630841743869844,
"learning_rate": 2.563096286922474e-05,
"loss": 0.1948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0977085679769516,
"step": 775,
"valid_targets_mean": 13591.0,
"valid_targets_min": 3435
},
{
"epoch": 3.305732484076433,
"grad_norm": 0.31406681179293716,
"learning_rate": 2.54277916134407e-05,
"loss": 0.1825,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06433968245983124,
"step": 780,
"valid_targets_mean": 13319.6,
"valid_targets_min": 2535
},
{
"epoch": 3.326963906581741,
"grad_norm": 0.26059306614788147,
"learning_rate": 2.5224013870990868e-05,
"loss": 0.1861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08149316906929016,
"step": 785,
"valid_targets_mean": 13039.1,
"valid_targets_min": 6808
},
{
"epoch": 3.3481953290870488,
"grad_norm": 0.2351953795497981,
"learning_rate": 2.5019652411445704e-05,
"loss": 0.1929,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07320615649223328,
"step": 790,
"valid_targets_mean": 15469.1,
"valid_targets_min": 8955
},
{
"epoch": 3.3694267515923566,
"grad_norm": 0.2736683222828364,
"learning_rate": 2.4814730069598624e-05,
"loss": 0.1737,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0710974708199501,
"step": 795,
"valid_targets_mean": 11276.5,
"valid_targets_min": 5920
},
{
"epoch": 3.3906581740976645,
"grad_norm": 0.27054094450445326,
"learning_rate": 2.460926974291451e-05,
"loss": 0.1916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07035864889621735,
"step": 800,
"valid_targets_mean": 17484.8,
"valid_targets_min": 8262
},
{
"epoch": 3.4118895966029723,
"grad_norm": 0.29197318400903655,
"learning_rate": 2.440329438897122e-05,
"loss": 0.1705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09986551851034164,
"step": 805,
"valid_targets_mean": 16090.8,
"valid_targets_min": 7707
},
{
"epoch": 3.43312101910828,
"grad_norm": 0.2314470232042232,
"learning_rate": 2.419682702289432e-05,
"loss": 0.1584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05505535006523132,
"step": 810,
"valid_targets_mean": 16037.1,
"valid_targets_min": 9644
},
{
"epoch": 3.454352441613588,
"grad_norm": 0.3222253587001989,
"learning_rate": 2.3989890714785505e-05,
"loss": 0.1632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09005002677440643,
"step": 815,
"valid_targets_mean": 16947.9,
"valid_targets_min": 6163
},
{
"epoch": 3.475583864118896,
"grad_norm": 0.3294379601590877,
"learning_rate": 2.3782508587144774e-05,
"loss": 0.1693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11677606403827667,
"step": 820,
"valid_targets_mean": 13542.0,
"valid_targets_min": 6462
},
{
"epoch": 3.4968152866242037,
"grad_norm": 0.29000006498568415,
"learning_rate": 2.3574703812286766e-05,
"loss": 0.1746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07387042045593262,
"step": 825,
"valid_targets_mean": 11624.4,
"valid_targets_min": 4488
},
{
"epoch": 3.5180467091295116,
"grad_norm": 0.2401705211916322,
"learning_rate": 2.3366499609751593e-05,
"loss": 0.1736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09939301013946533,
"step": 830,
"valid_targets_mean": 15575.9,
"valid_targets_min": 8394
},
{
"epoch": 3.5392781316348194,
"grad_norm": 0.28451927113712266,
"learning_rate": 2.3157919243710318e-05,
"loss": 0.1789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09718938171863556,
"step": 835,
"valid_targets_mean": 16160.0,
"valid_targets_min": 7174
},
{
"epoch": 3.5605095541401273,
"grad_norm": 0.3211013643205975,
"learning_rate": 2.2948986020365493e-05,
"loss": 0.1955,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09303892403841019,
"step": 840,
"valid_targets_mean": 11994.6,
"valid_targets_min": 4268
},
{
"epoch": 3.581740976645435,
"grad_norm": 0.2746677955508634,
"learning_rate": 2.273972328534698e-05,
"loss": 0.2052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12424758076667786,
"step": 845,
"valid_targets_mean": 15329.9,
"valid_targets_min": 5514
},
{
"epoch": 3.602972399150743,
"grad_norm": 0.2916069982798815,
"learning_rate": 2.2530154421103386e-05,
"loss": 0.1627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0865262895822525,
"step": 850,
"valid_targets_mean": 12826.9,
"valid_targets_min": 1935
},
{
"epoch": 3.624203821656051,
"grad_norm": 0.329290804996052,
"learning_rate": 2.2320302844289366e-05,
"loss": 0.2028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1294848918914795,
"step": 855,
"valid_targets_mean": 14766.9,
"valid_targets_min": 4963
},
{
"epoch": 3.6454352441613587,
"grad_norm": 0.26352816238784327,
"learning_rate": 2.21101920031491e-05,
"loss": 0.1643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05863261595368385,
"step": 860,
"valid_targets_mean": 14058.9,
"valid_targets_min": 2079
},
{
"epoch": 3.6666666666666665,
"grad_norm": 0.27075913676748065,
"learning_rate": 2.1899845374896264e-05,
"loss": 0.1724,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0771893858909607,
"step": 865,
"valid_targets_mean": 15660.5,
"valid_targets_min": 3825
},
{
"epoch": 3.6878980891719744,
"grad_norm": 0.2879633243259211,
"learning_rate": 2.168928646309074e-05,
"loss": 0.1844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0968056321144104,
"step": 870,
"valid_targets_mean": 13577.5,
"valid_targets_min": 1343
},
{
"epoch": 3.709129511677282,
"grad_norm": 0.3324325284111313,
"learning_rate": 2.14785387950124e-05,
"loss": 0.2002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10815715789794922,
"step": 875,
"valid_targets_mean": 13251.2,
"valid_targets_min": 5776
},
{
"epoch": 3.73036093418259,
"grad_norm": 0.24385270873622308,
"learning_rate": 2.1267625919032233e-05,
"loss": 0.1949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07621297240257263,
"step": 880,
"valid_targets_mean": 14647.6,
"valid_targets_min": 1283
},
{
"epoch": 3.7515923566878984,
"grad_norm": 0.28158194480710597,
"learning_rate": 2.10565714019811e-05,
"loss": 0.1882,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09309081733226776,
"step": 885,
"valid_targets_mean": 13250.6,
"valid_targets_min": 5586
},
{
"epoch": 3.7728237791932058,
"grad_norm": 0.28387353955537614,
"learning_rate": 2.0845398826516457e-05,
"loss": 0.1844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0900927409529686,
"step": 890,
"valid_targets_mean": 15713.0,
"valid_targets_min": 5478
},
{
"epoch": 3.794055201698514,
"grad_norm": 0.22676398483352012,
"learning_rate": 2.0634131788487278e-05,
"loss": 0.187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0685080736875534,
"step": 895,
"valid_targets_mean": 17496.4,
"valid_targets_min": 11864
},
{
"epoch": 3.8152866242038215,
"grad_norm": 0.2894684737379915,
"learning_rate": 2.0422793894297533e-05,
"loss": 0.1743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09989657998085022,
"step": 900,
"valid_targets_mean": 15670.9,
"valid_targets_min": 5314
},
{
"epoch": 3.8365180467091298,
"grad_norm": 0.2529641193716833,
"learning_rate": 2.0211408758268468e-05,
"loss": 0.1832,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08049357682466507,
"step": 905,
"valid_targets_mean": 19922.8,
"valid_targets_min": 5124
},
{
"epoch": 3.857749469214437,
"grad_norm": 0.3262452116430981,
"learning_rate": 2e-05,
"loss": 0.1815,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06966280192136765,
"step": 910,
"valid_targets_mean": 10526.9,
"valid_targets_min": 3364
},
{
"epoch": 3.8789808917197455,
"grad_norm": 0.2935358593938512,
"learning_rate": 1.9788591241731535e-05,
"loss": 0.1781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09608276188373566,
"step": 915,
"valid_targets_mean": 13981.8,
"valid_targets_min": 8162
},
{
"epoch": 3.900212314225053,
"grad_norm": 0.27813488718681834,
"learning_rate": 1.9577206105702474e-05,
"loss": 0.1811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09818491339683533,
"step": 920,
"valid_targets_mean": 17414.1,
"valid_targets_min": 7717
},
{
"epoch": 3.921443736730361,
"grad_norm": 0.3884374840815922,
"learning_rate": 1.9365868211512725e-05,
"loss": 0.1958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12244383990764618,
"step": 925,
"valid_targets_mean": 13225.5,
"valid_targets_min": 7774
},
{
"epoch": 3.9426751592356686,
"grad_norm": 0.25803403519577645,
"learning_rate": 1.915460117348355e-05,
"loss": 0.1675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0736437737941742,
"step": 930,
"valid_targets_mean": 14138.8,
"valid_targets_min": 4599
},
{
"epoch": 3.963906581740977,
"grad_norm": 0.2948166835249158,
"learning_rate": 1.8943428598018904e-05,
"loss": 0.1729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10551305115222931,
"step": 935,
"valid_targets_mean": 13149.0,
"valid_targets_min": 5237
},
{
"epoch": 3.9851380042462843,
"grad_norm": 0.26157763886832613,
"learning_rate": 1.8732374080967774e-05,
"loss": 0.1664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08099900186061859,
"step": 940,
"valid_targets_mean": 16457.1,
"valid_targets_min": 5918
},
{
"epoch": 4.004246284501062,
"grad_norm": 0.22804078903359448,
"learning_rate": 1.8521461204987606e-05,
"loss": 0.1569,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044775694608688354,
"step": 945,
"valid_targets_mean": 16204.6,
"valid_targets_min": 7257
},
{
"epoch": 4.025477707006369,
"grad_norm": 0.2836852446635035,
"learning_rate": 1.8310713536909265e-05,
"loss": 0.1668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08778543770313263,
"step": 950,
"valid_targets_mean": 12359.9,
"valid_targets_min": 5076
},
{
"epoch": 4.046709129511678,
"grad_norm": 0.2442174895389702,
"learning_rate": 1.810015462510374e-05,
"loss": 0.1697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09868457168340683,
"step": 955,
"valid_targets_mean": 17049.4,
"valid_targets_min": 5491
},
{
"epoch": 4.067940552016985,
"grad_norm": 0.3695645197841845,
"learning_rate": 1.7889807996850906e-05,
"loss": 0.2036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11026948690414429,
"step": 960,
"valid_targets_mean": 9681.6,
"valid_targets_min": 3605
},
{
"epoch": 4.089171974522293,
"grad_norm": 0.29979727831410075,
"learning_rate": 1.767969715571064e-05,
"loss": 0.1686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0982229933142662,
"step": 965,
"valid_targets_mean": 11373.5,
"valid_targets_min": 2622
},
{
"epoch": 4.110403397027601,
"grad_norm": 0.29739837712764017,
"learning_rate": 1.746984557889662e-05,
"loss": 0.1837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08777828514575958,
"step": 970,
"valid_targets_mean": 14991.1,
"valid_targets_min": 6941
},
{
"epoch": 4.131634819532909,
"grad_norm": 0.27118781352159604,
"learning_rate": 1.7260276714653023e-05,
"loss": 0.1704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0875612124800682,
"step": 975,
"valid_targets_mean": 14782.4,
"valid_targets_min": 7258
},
{
"epoch": 4.1528662420382165,
"grad_norm": 0.28359027589369706,
"learning_rate": 1.7051013979634514e-05,
"loss": 0.1607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10205516219139099,
"step": 980,
"valid_targets_mean": 18528.0,
"valid_targets_min": 12316
},
{
"epoch": 4.174097664543525,
"grad_norm": 0.3340589809342197,
"learning_rate": 1.684208075628969e-05,
"loss": 0.1826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09689417481422424,
"step": 985,
"valid_targets_mean": 13361.2,
"valid_targets_min": 5776
},
{
"epoch": 4.195329087048832,
"grad_norm": 0.3077583048219397,
"learning_rate": 1.6633500390248414e-05,
"loss": 0.1573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06832315027713776,
"step": 990,
"valid_targets_mean": 12119.1,
"valid_targets_min": 4323
},
{
"epoch": 4.2165605095541405,
"grad_norm": 0.2573294853602038,
"learning_rate": 1.642529618771324e-05,
"loss": 0.1518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05930829420685768,
"step": 995,
"valid_targets_mean": 15648.5,
"valid_targets_min": 7985
},
{
"epoch": 4.237791932059448,
"grad_norm": 0.2690214106625097,
"learning_rate": 1.6217491412855233e-05,
"loss": 0.1813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07982520759105682,
"step": 1000,
"valid_targets_mean": 14926.2,
"valid_targets_min": 1956
},
{
"epoch": 4.259023354564756,
"grad_norm": 0.3360157443708867,
"learning_rate": 1.60101092852145e-05,
"loss": 0.1753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04510154575109482,
"step": 1005,
"valid_targets_mean": 13575.0,
"valid_targets_min": 7250
},
{
"epoch": 4.280254777070064,
"grad_norm": 0.3205217949902666,
"learning_rate": 1.5803172977105686e-05,
"loss": 0.1579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10675063729286194,
"step": 1010,
"valid_targets_mean": 15840.2,
"valid_targets_min": 5089
},
{
"epoch": 4.301486199575372,
"grad_norm": 0.2583709576870678,
"learning_rate": 1.5596705611028792e-05,
"loss": 0.1607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06442227959632874,
"step": 1015,
"valid_targets_mean": 17846.0,
"valid_targets_min": 5469
},
{
"epoch": 4.322717622080679,
"grad_norm": 0.26010908830801727,
"learning_rate": 1.5390730257085494e-05,
"loss": 0.162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07889682799577713,
"step": 1020,
"valid_targets_mean": 16397.1,
"valid_targets_min": 12382
},
{
"epoch": 4.343949044585988,
"grad_norm": 0.3022177554051947,
"learning_rate": 1.5185269930401381e-05,
"loss": 0.1734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0844014585018158,
"step": 1025,
"valid_targets_mean": 11215.9,
"valid_targets_min": 4517
},
{
"epoch": 4.365180467091295,
"grad_norm": 0.27137051227613906,
"learning_rate": 1.4980347588554302e-05,
"loss": 0.1632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10084296762943268,
"step": 1030,
"valid_targets_mean": 17699.4,
"valid_targets_min": 5458
},
{
"epoch": 4.386411889596603,
"grad_norm": 0.28712761699778766,
"learning_rate": 1.4775986129009137e-05,
"loss": 0.1897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08913597464561462,
"step": 1035,
"valid_targets_mean": 15072.6,
"valid_targets_min": 2535
},
{
"epoch": 4.407643312101911,
"grad_norm": 0.28374049176794086,
"learning_rate": 1.4572208386559304e-05,
"loss": 0.1672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08083443343639374,
"step": 1040,
"valid_targets_mean": 15562.0,
"valid_targets_min": 6274
},
{
"epoch": 4.428874734607219,
"grad_norm": 0.289299652196656,
"learning_rate": 1.436903713077526e-05,
"loss": 0.1598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08103075623512268,
"step": 1045,
"valid_targets_mean": 13985.0,
"valid_targets_min": 7381
},
{
"epoch": 4.450106157112526,
"grad_norm": 0.29939314691306246,
"learning_rate": 1.4166495063460295e-05,
"loss": 0.1619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07846647500991821,
"step": 1050,
"valid_targets_mean": 15401.0,
"valid_targets_min": 5135
},
{
"epoch": 4.471337579617835,
"grad_norm": 0.322213805306961,
"learning_rate": 1.3964604816113896e-05,
"loss": 0.1806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10395080596208572,
"step": 1055,
"valid_targets_mean": 14610.8,
"valid_targets_min": 7159
},
{
"epoch": 4.492569002123142,
"grad_norm": 0.2866514130234711,
"learning_rate": 1.3763388947402953e-05,
"loss": 0.1715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07112490385770798,
"step": 1060,
"valid_targets_mean": 15032.0,
"valid_targets_min": 8317
},
{
"epoch": 4.51380042462845,
"grad_norm": 0.314478967030206,
"learning_rate": 1.3562869940641123e-05,
"loss": 0.1691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10800331830978394,
"step": 1065,
"valid_targets_mean": 15537.0,
"valid_targets_min": 5167
},
{
"epoch": 4.535031847133758,
"grad_norm": 0.28204747261924135,
"learning_rate": 1.3363070201276606e-05,
"loss": 0.1601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07255604863166809,
"step": 1070,
"valid_targets_mean": 14644.9,
"valid_targets_min": 4870
},
{
"epoch": 4.556263269639066,
"grad_norm": 0.27569273817889167,
"learning_rate": 1.316401205438862e-05,
"loss": 0.1715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06936567276716232,
"step": 1075,
"valid_targets_mean": 14241.4,
"valid_targets_min": 4601
},
{
"epoch": 4.5774946921443735,
"grad_norm": 0.26253469480509967,
"learning_rate": 1.2965717742192866e-05,
"loss": 0.1734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08657175302505493,
"step": 1080,
"valid_targets_mean": 15617.2,
"valid_targets_min": 5992
},
{
"epoch": 4.598726114649682,
"grad_norm": 0.28581659144121935,
"learning_rate": 1.276820942155622e-05,
"loss": 0.1702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08918562531471252,
"step": 1085,
"valid_targets_mean": 13600.9,
"valid_targets_min": 4402
},
{
"epoch": 4.619957537154989,
"grad_norm": 0.3073024926683609,
"learning_rate": 1.2571509161521007e-05,
"loss": 0.1714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07768109440803528,
"step": 1090,
"valid_targets_mean": 12548.6,
"valid_targets_min": 4815
},
{
"epoch": 4.6411889596602975,
"grad_norm": 0.3269547869469645,
"learning_rate": 1.2375638940839062e-05,
"loss": 0.1954,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11400610208511353,
"step": 1095,
"valid_targets_mean": 12697.8,
"valid_targets_min": 5610
},
{
"epoch": 4.662420382165605,
"grad_norm": 0.27326799041082284,
"learning_rate": 1.2180620645515875e-05,
"loss": 0.1619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06606325507164001,
"step": 1100,
"valid_targets_mean": 10452.8,
"valid_targets_min": 5105
},
{
"epoch": 4.683651804670913,
"grad_norm": 0.31708108689013287,
"learning_rate": 1.1986476066365125e-05,
"loss": 0.1794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07225104421377182,
"step": 1105,
"valid_targets_mean": 10873.4,
"valid_targets_min": 5961
},
{
"epoch": 4.704883227176221,
"grad_norm": 0.34556552719228606,
"learning_rate": 1.179322689657381e-05,
"loss": 0.1964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13157233595848083,
"step": 1110,
"valid_targets_mean": 16111.0,
"valid_targets_min": 5477
},
{
"epoch": 4.726114649681529,
"grad_norm": 0.29047699922219883,
"learning_rate": 1.1600894729278333e-05,
"loss": 0.1633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07969208061695099,
"step": 1115,
"valid_targets_mean": 12312.8,
"valid_targets_min": 7979
},
{
"epoch": 4.747346072186836,
"grad_norm": 0.31878635054753074,
"learning_rate": 1.1409501055151726e-05,
"loss": 0.1663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11133340746164322,
"step": 1120,
"valid_targets_mean": 16930.0,
"valid_targets_min": 4513
},
{
"epoch": 4.768577494692145,
"grad_norm": 0.2894979007219718,
"learning_rate": 1.1219067260002352e-05,
"loss": 0.1481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08189079165458679,
"step": 1125,
"valid_targets_mean": 13020.8,
"valid_targets_min": 5919
},
{
"epoch": 4.789808917197452,
"grad_norm": 0.2974224757724096,
"learning_rate": 1.1029614622384307e-05,
"loss": 0.1763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0739152580499649,
"step": 1130,
"valid_targets_mean": 15006.2,
"valid_targets_min": 6402
},
{
"epoch": 4.81104033970276,
"grad_norm": 0.2740806790545324,
"learning_rate": 1.0841164311219812e-05,
"loss": 0.1665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09299416840076447,
"step": 1135,
"valid_targets_mean": 18061.0,
"valid_targets_min": 10000
},
{
"epoch": 4.832271762208068,
"grad_norm": 0.23454734933723237,
"learning_rate": 1.0653737383433869e-05,
"loss": 0.1727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08343719691038132,
"step": 1140,
"valid_targets_mean": 17752.4,
"valid_targets_min": 7344
},
{
"epoch": 4.853503184713376,
"grad_norm": 0.24458323267656765,
"learning_rate": 1.0467354781601395e-05,
"loss": 0.1664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06879070401191711,
"step": 1145,
"valid_targets_mean": 18738.1,
"valid_targets_min": 11083
},
{
"epoch": 4.8747346072186835,
"grad_norm": 0.26926970833953917,
"learning_rate": 1.0282037331607167e-05,
"loss": 0.165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09153994917869568,
"step": 1150,
"valid_targets_mean": 12938.4,
"valid_targets_min": 6938
},
{
"epoch": 4.895966029723992,
"grad_norm": 0.2475631115627024,
"learning_rate": 1.0097805740318797e-05,
"loss": 0.1613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06908365339040756,
"step": 1155,
"valid_targets_mean": 14550.2,
"valid_targets_min": 2924
},
{
"epoch": 4.917197452229299,
"grad_norm": 0.26381821539779826,
"learning_rate": 9.914680593273e-06,
"loss": 0.1855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0847349539399147,
"step": 1160,
"valid_targets_mean": 16227.0,
"valid_targets_min": 2970
},
{
"epoch": 4.9384288747346075,
"grad_norm": 0.2536112248092482,
"learning_rate": 9.732682352375418e-06,
"loss": 0.1692,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07469794154167175,
"step": 1165,
"valid_targets_mean": 16989.6,
"valid_targets_min": 1639
},
{
"epoch": 4.959660297239915,
"grad_norm": 0.26178169003572327,
"learning_rate": 9.551831353614272e-06,
"loss": 0.1666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0636444240808487,
"step": 1170,
"valid_targets_mean": 13780.4,
"valid_targets_min": 5417
},
{
"epoch": 4.980891719745223,
"grad_norm": 0.292574119747484,
"learning_rate": 9.372147804788063e-06,
"loss": 0.1838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10308250784873962,
"step": 1175,
"valid_targets_mean": 15759.5,
"valid_targets_min": 6097
},
{
"epoch": 5.0,
"grad_norm": 0.3633457948840684,
"learning_rate": 9.193651783247616e-06,
"loss": 0.1652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1370236575603485,
"step": 1180,
"valid_targets_mean": 13572.6,
"valid_targets_min": 8150
},
{
"epoch": 5.021231422505308,
"grad_norm": 0.32191020526943465,
"learning_rate": 9.016363233652686e-06,
"loss": 0.1657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10346332937479019,
"step": 1185,
"valid_targets_mean": 14068.4,
"valid_targets_min": 4870
},
{
"epoch": 5.042462845010616,
"grad_norm": 0.32642486899462897,
"learning_rate": 8.840301965743405e-06,
"loss": 0.1813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09915921092033386,
"step": 1190,
"valid_targets_mean": 11485.2,
"valid_targets_min": 5336
},
{
"epoch": 5.063694267515924,
"grad_norm": 0.28541664235799485,
"learning_rate": 8.665487652126785e-06,
"loss": 0.1678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05272082984447479,
"step": 1195,
"valid_targets_mean": 14355.5,
"valid_targets_min": 6036
},
{
"epoch": 5.084925690021231,
"grad_norm": 0.290919100504366,
"learning_rate": 8.491939826078552e-06,
"loss": 0.1691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08834843337535858,
"step": 1200,
"valid_targets_mean": 12843.2,
"valid_targets_min": 6392
},
{
"epoch": 5.10615711252654,
"grad_norm": 0.24588022878035837,
"learning_rate": 8.319677879360566e-06,
"loss": 0.1662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06265243887901306,
"step": 1205,
"valid_targets_mean": 14097.9,
"valid_targets_min": 1479
},
{
"epoch": 5.127388535031847,
"grad_norm": 0.26986615918502915,
"learning_rate": 8.148721060054026e-06,
"loss": 0.1576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09268582612276077,
"step": 1210,
"valid_targets_mean": 18160.4,
"valid_targets_min": 10243
},
{
"epoch": 5.148619957537155,
"grad_norm": 0.2900102170992239,
"learning_rate": 7.979088470408743e-06,
"loss": 0.1633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0702872946858406,
"step": 1215,
"valid_targets_mean": 13231.0,
"valid_targets_min": 6621
},
{
"epoch": 5.169851380042463,
"grad_norm": 0.2778789233044835,
"learning_rate": 7.81079906470872e-06,
"loss": 0.1707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08230331540107727,
"step": 1220,
"valid_targets_mean": 11612.5,
"valid_targets_min": 4938
},
{
"epoch": 5.191082802547771,
"grad_norm": 0.2746339546722902,
"learning_rate": 7.643871647154212e-06,
"loss": 0.1675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07877236604690552,
"step": 1225,
"valid_targets_mean": 13966.2,
"valid_targets_min": 5918
},
{
"epoch": 5.2123142250530785,
"grad_norm": 0.29827654767268313,
"learning_rate": 7.478324869760665e-06,
"loss": 0.1598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08680558204650879,
"step": 1230,
"valid_targets_mean": 14225.1,
"valid_targets_min": 6634
},
{
"epoch": 5.233545647558387,
"grad_norm": 0.3635364050170567,
"learning_rate": 7.314177230274522e-06,
"loss": 0.1498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07186665385961533,
"step": 1235,
"valid_targets_mean": 14451.9,
"valid_targets_min": 7291
},
{
"epoch": 5.254777070063694,
"grad_norm": 0.26514167860622967,
"learning_rate": 7.151447070106372e-06,
"loss": 0.1557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0701196938753128,
"step": 1240,
"valid_targets_mean": 13301.2,
"valid_targets_min": 3783
},
{
"epoch": 5.2760084925690025,
"grad_norm": 0.26324169878744724,
"learning_rate": 6.990152572281523e-06,
"loss": 0.1682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0794605165719986,
"step": 1245,
"valid_targets_mean": 16191.2,
"valid_targets_min": 8450
},
{
"epoch": 5.29723991507431,
"grad_norm": 0.2556220773865451,
"learning_rate": 6.830311759408275e-06,
"loss": 0.1478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0679427832365036,
"step": 1250,
"valid_targets_mean": 19563.9,
"valid_targets_min": 12610
},
{
"epoch": 5.318471337579618,
"grad_norm": 0.2512037430973632,
"learning_rate": 6.671942491664128e-06,
"loss": 0.1583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08314774930477142,
"step": 1255,
"valid_targets_mean": 17775.9,
"valid_targets_min": 6844
},
{
"epoch": 5.339702760084926,
"grad_norm": 0.301451978575514,
"learning_rate": 6.515062464800139e-06,
"loss": 0.1617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09716193377971649,
"step": 1260,
"valid_targets_mean": 13520.8,
"valid_targets_min": 6446
},
{
"epoch": 5.360934182590234,
"grad_norm": 0.3033561761745026,
"learning_rate": 6.359689208163635e-06,
"loss": 0.1786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09750799834728241,
"step": 1265,
"valid_targets_mean": 13563.6,
"valid_targets_min": 6054
},
{
"epoch": 5.382165605095541,
"grad_norm": 0.2867076697834142,
"learning_rate": 6.205840082739538e-06,
"loss": 0.1704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08606909960508347,
"step": 1270,
"valid_targets_mean": 15594.4,
"valid_targets_min": 7437
},
{
"epoch": 5.40339702760085,
"grad_norm": 0.2902611080814107,
"learning_rate": 6.053532279210494e-06,
"loss": 0.1819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09306415915489197,
"step": 1275,
"valid_targets_mean": 13659.0,
"valid_targets_min": 6067
},
{
"epoch": 5.424628450106157,
"grad_norm": 0.3138642504116254,
"learning_rate": 5.90278281603605e-06,
"loss": 0.1516,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06723940372467041,
"step": 1280,
"valid_targets_mean": 12938.2,
"valid_targets_min": 7569
},
{
"epoch": 5.445859872611465,
"grad_norm": 0.37944831844559856,
"learning_rate": 5.753608537551023e-06,
"loss": 0.1751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08615720272064209,
"step": 1285,
"valid_targets_mean": 20076.0,
"valid_targets_min": 8700
},
{
"epoch": 5.467091295116773,
"grad_norm": 0.30159242882323506,
"learning_rate": 5.606026112083383e-06,
"loss": 0.172,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09909145534038544,
"step": 1290,
"valid_targets_mean": 16584.8,
"valid_targets_min": 7272
},
{
"epoch": 5.488322717622081,
"grad_norm": 0.29081583680889445,
"learning_rate": 5.460052030091782e-06,
"loss": 0.1669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0889005959033966,
"step": 1295,
"valid_targets_mean": 17472.0,
"valid_targets_min": 8288
},
{
"epoch": 5.509554140127388,
"grad_norm": 0.2607145273144808,
"learning_rate": 5.315702602322943e-06,
"loss": 0.159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0712738037109375,
"step": 1300,
"valid_targets_mean": 14735.1,
"valid_targets_min": 7641
},
{
"epoch": 5.530785562632697,
"grad_norm": 0.29501459858586015,
"learning_rate": 5.1729939579891476e-06,
"loss": 0.162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07912551611661911,
"step": 1305,
"valid_targets_mean": 13685.9,
"valid_targets_min": 7765
},
{
"epoch": 5.552016985138004,
"grad_norm": 0.27509948934796713,
"learning_rate": 5.031942042966e-06,
"loss": 0.1647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08225835859775543,
"step": 1310,
"valid_targets_mean": 16989.0,
"valid_targets_min": 8083
},
{
"epoch": 5.573248407643312,
"grad_norm": 0.22169708987849615,
"learning_rate": 4.892562618010684e-06,
"loss": 0.1361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04339899122714996,
"step": 1315,
"valid_targets_mean": 17842.2,
"valid_targets_min": 8176
},
{
"epoch": 5.59447983014862,
"grad_norm": 0.3559664931393158,
"learning_rate": 4.754871257000888e-06,
"loss": 0.1583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11029928922653198,
"step": 1320,
"valid_targets_mean": 14398.4,
"valid_targets_min": 8162
},
{
"epoch": 5.615711252653928,
"grad_norm": 0.2679685791920965,
"learning_rate": 4.618883345194627e-06,
"loss": 0.1579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09200026094913483,
"step": 1325,
"valid_targets_mean": 17098.9,
"valid_targets_min": 7659
},
{
"epoch": 5.6369426751592355,
"grad_norm": 0.3057411211787925,
"learning_rate": 4.484614077511153e-06,
"loss": 0.1633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08388785272836685,
"step": 1330,
"valid_targets_mean": 13349.4,
"valid_targets_min": 7026
},
{
"epoch": 5.658174097664544,
"grad_norm": 0.2888169299715042,
"learning_rate": 4.352078456833082e-06,
"loss": 0.1719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10811308026313782,
"step": 1335,
"valid_targets_mean": 15849.9,
"valid_targets_min": 5320
},
{
"epoch": 5.679405520169851,
"grad_norm": 0.28951569017216344,
"learning_rate": 4.221291292330036e-06,
"loss": 0.1775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1030922383069992,
"step": 1340,
"valid_targets_mean": 15074.6,
"valid_targets_min": 9428
},
{
"epoch": 5.7006369426751595,
"grad_norm": 0.27915439610100806,
"learning_rate": 4.0922671978039055e-06,
"loss": 0.1676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06682918220758438,
"step": 1345,
"valid_targets_mean": 13675.4,
"valid_targets_min": 5223
},
{
"epoch": 5.721868365180467,
"grad_norm": 0.2747466866708511,
"learning_rate": 3.965020590055934e-06,
"loss": 0.1975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10387305170297623,
"step": 1350,
"valid_targets_mean": 16905.1,
"valid_targets_min": 10448
},
{
"epoch": 5.743099787685775,
"grad_norm": 0.39319047182394823,
"learning_rate": 3.839565687275835e-06,
"loss": 0.1555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10636863112449646,
"step": 1355,
"valid_targets_mean": 10298.1,
"valid_targets_min": 5818
},
{
"epoch": 5.764331210191083,
"grad_norm": 0.2508928317334097,
"learning_rate": 3.715916507453079e-06,
"loss": 0.1423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06272110342979431,
"step": 1360,
"valid_targets_mean": 14815.4,
"valid_targets_min": 3317
},
{
"epoch": 5.785562632696391,
"grad_norm": 0.2958681304685539,
"learning_rate": 3.5940868668105644e-06,
"loss": 0.1408,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07483332604169846,
"step": 1365,
"valid_targets_mean": 11648.4,
"valid_targets_min": 5762
},
{
"epoch": 5.806794055201698,
"grad_norm": 0.30333009062567456,
"learning_rate": 3.4740903782608416e-06,
"loss": 0.1697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08001961559057236,
"step": 1370,
"valid_targets_mean": 15420.4,
"valid_targets_min": 5495
},
{
"epoch": 5.828025477707007,
"grad_norm": 0.27437345080245856,
"learning_rate": 3.3559404498850245e-06,
"loss": 0.1836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08367906510829926,
"step": 1375,
"valid_targets_mean": 17487.1,
"valid_targets_min": 10947
},
{
"epoch": 5.849256900212314,
"grad_norm": 0.33157275873211317,
"learning_rate": 3.2396502834346277e-06,
"loss": 0.158,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10356227308511734,
"step": 1380,
"valid_targets_mean": 13732.8,
"valid_targets_min": 5999
},
{
"epoch": 5.870488322717622,
"grad_norm": 0.25499987617159037,
"learning_rate": 3.1252328728564206e-06,
"loss": 0.1609,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06108412891626358,
"step": 1385,
"valid_targets_mean": 13659.1,
"valid_targets_min": 6581
},
{
"epoch": 5.89171974522293,
"grad_norm": 0.2754456032364648,
"learning_rate": 3.0127010028405303e-06,
"loss": 0.1604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07066244632005692,
"step": 1390,
"valid_targets_mean": 16501.9,
"valid_targets_min": 7600
},
{
"epoch": 5.912951167728238,
"grad_norm": 0.3154111494075387,
"learning_rate": 2.9020672473919107e-06,
"loss": 0.1683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11153072118759155,
"step": 1395,
"valid_targets_mean": 12126.4,
"valid_targets_min": 1587
},
{
"epoch": 5.934182590233545,
"grad_norm": 0.28035453553485096,
"learning_rate": 2.7933439684253616e-06,
"loss": 0.1706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06870816648006439,
"step": 1400,
"valid_targets_mean": 13538.0,
"valid_targets_min": 6607
},
{
"epoch": 5.955414012738854,
"grad_norm": 0.3259723507914851,
"learning_rate": 2.6865433143842356e-06,
"loss": 0.1693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10865054279565811,
"step": 1405,
"valid_targets_mean": 12518.4,
"valid_targets_min": 4916
},
{
"epoch": 5.976645435244161,
"grad_norm": 0.334412099283546,
"learning_rate": 2.5816772188830098e-06,
"loss": 0.165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08065322041511536,
"step": 1410,
"valid_targets_mean": 12634.0,
"valid_targets_min": 6610
},
{
"epoch": 5.997876857749469,
"grad_norm": 0.2640023033609606,
"learning_rate": 2.4787573993738524e-06,
"loss": 0.1631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048118408769369125,
"step": 1415,
"valid_targets_mean": 10203.4,
"valid_targets_min": 1607
},
{
"epoch": 6.016985138004246,
"grad_norm": 0.2714448832343656,
"learning_rate": 2.377795355837349e-06,
"loss": 0.1523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07569775730371475,
"step": 1420,
"valid_targets_mean": 13937.0,
"valid_targets_min": 4540
},
{
"epoch": 6.038216560509555,
"grad_norm": 0.3472240836287177,
"learning_rate": 2.2788023694975236e-06,
"loss": 0.1663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09323162585496902,
"step": 1425,
"valid_targets_mean": 15866.6,
"valid_targets_min": 3141
},
{
"epoch": 6.059447983014862,
"grad_norm": 0.2350240326376912,
"learning_rate": 2.1817895015613134e-06,
"loss": 0.1575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07922834157943726,
"step": 1430,
"valid_targets_mean": 18886.6,
"valid_targets_min": 9895
},
{
"epoch": 6.08067940552017,
"grad_norm": 0.36818142115866936,
"learning_rate": 2.086767591982608e-06,
"loss": 0.1529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07837212830781937,
"step": 1435,
"valid_targets_mean": 12160.6,
"valid_targets_min": 3368
},
{
"epoch": 6.101910828025478,
"grad_norm": 0.2856655219402555,
"learning_rate": 1.9937472582510243e-06,
"loss": 0.1684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09155093133449554,
"step": 1440,
"valid_targets_mean": 15443.0,
"valid_targets_min": 5076
},
{
"epoch": 6.123142250530786,
"grad_norm": 0.2547340160025767,
"learning_rate": 1.902738894205547e-06,
"loss": 0.1554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.059237219393253326,
"step": 1445,
"valid_targets_mean": 14534.0,
"valid_targets_min": 5035
},
{
"epoch": 6.144373673036093,
"grad_norm": 0.3002693947204139,
"learning_rate": 1.8137526688731365e-06,
"loss": 0.1596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0889972448348999,
"step": 1450,
"valid_targets_mean": 16488.1,
"valid_targets_min": 8118
},
{
"epoch": 6.165605095541402,
"grad_norm": 0.298982073220577,
"learning_rate": 1.7267985253324803e-06,
"loss": 0.1534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07069272547960281,
"step": 1455,
"valid_targets_mean": 15787.2,
"valid_targets_min": 6470
},
{
"epoch": 6.186836518046709,
"grad_norm": 0.32579161912478005,
"learning_rate": 1.641886179602974e-06,
"loss": 0.1738,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07076440751552582,
"step": 1460,
"valid_targets_mean": 14006.6,
"valid_targets_min": 1307
},
{
"epoch": 6.208067940552017,
"grad_norm": 0.3669756453303981,
"learning_rate": 1.5590251195590811e-06,
"loss": 0.1723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06793823093175888,
"step": 1465,
"valid_targets_mean": 15177.5,
"valid_targets_min": 2535
},
{
"epoch": 6.229299363057325,
"grad_norm": 0.2450071301139486,
"learning_rate": 1.4782246038701865e-06,
"loss": 0.1708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06423592567443848,
"step": 1470,
"valid_targets_mean": 17624.6,
"valid_targets_min": 9107
},
{
"epoch": 6.250530785562633,
"grad_norm": 0.29353402854137134,
"learning_rate": 1.3994936609660493e-06,
"loss": 0.1735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06590164452791214,
"step": 1475,
"valid_targets_mean": 11156.4,
"valid_targets_min": 5149
},
{
"epoch": 6.2717622080679405,
"grad_norm": 0.3270038830460443,
"learning_rate": 1.3228410880280084e-06,
"loss": 0.1719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09932979941368103,
"step": 1480,
"valid_targets_mean": 13644.0,
"valid_targets_min": 6297
},
{
"epoch": 6.292993630573249,
"grad_norm": 0.2789895464186324,
"learning_rate": 1.248275450005987e-06,
"loss": 0.158,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08671200275421143,
"step": 1485,
"valid_targets_mean": 17008.0,
"valid_targets_min": 6585
},
{
"epoch": 6.314225053078556,
"grad_norm": 0.2922442549795321,
"learning_rate": 1.1758050786614872e-06,
"loss": 0.1674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08488278090953827,
"step": 1490,
"valid_targets_mean": 15370.1,
"valid_targets_min": 8217
},
{
"epoch": 6.3354564755838645,
"grad_norm": 0.2582229867861875,
"learning_rate": 1.1054380716366064e-06,
"loss": 0.1698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06779177486896515,
"step": 1495,
"valid_targets_mean": 14717.9,
"valid_targets_min": 5041
},
{
"epoch": 6.356687898089172,
"grad_norm": 0.2331255850760353,
"learning_rate": 1.0371822915492414e-06,
"loss": 0.1568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07795362919569016,
"step": 1500,
"valid_targets_mean": 16383.0,
"valid_targets_min": 8819
},
{
"epoch": 6.37791932059448,
"grad_norm": 0.2892889195801475,
"learning_rate": 9.710453651145335e-07,
"loss": 0.1634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07211548089981079,
"step": 1505,
"valid_targets_mean": 13909.4,
"valid_targets_min": 634
},
{
"epoch": 6.399150743099788,
"grad_norm": 0.26060830596236895,
"learning_rate": 9.070346822926846e-07,
"loss": 0.1658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09147711843252182,
"step": 1510,
"valid_targets_mean": 16547.1,
"valid_targets_min": 8024
},
{
"epoch": 6.420382165605096,
"grad_norm": 0.26764389867523897,
"learning_rate": 8.451573954632186e-07,
"loss": 0.1619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08518597483634949,
"step": 1515,
"valid_targets_mean": 16291.4,
"valid_targets_min": 3623
},
{
"epoch": 6.441613588110403,
"grad_norm": 0.30728258811352377,
"learning_rate": 7.854204186257952e-07,
"loss": 0.1543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1069483608007431,
"step": 1520,
"valid_targets_mean": 14551.8,
"valid_targets_min": 7637
},
{
"epoch": 6.462845010615712,
"grad_norm": 0.27808926869463985,
"learning_rate": 7.278304266276625e-07,
"loss": 0.1555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06044379994273186,
"step": 1525,
"valid_targets_mean": 13939.4,
"valid_targets_min": 5659
},
{
"epoch": 6.484076433121019,
"grad_norm": 0.23829230423437128,
"learning_rate": 6.723938544178232e-07,
"loss": 0.1524,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06626199930906296,
"step": 1530,
"valid_targets_mean": 18216.4,
"valid_targets_min": 6292
},
{
"epoch": 6.505307855626327,
"grad_norm": 0.28068157891206974,
"learning_rate": 6.191168963280136e-07,
"loss": 0.1545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07529743015766144,
"step": 1535,
"valid_targets_mean": 13190.6,
"valid_targets_min": 5603
},
{
"epoch": 6.526539278131635,
"grad_norm": 0.26745162960671476,
"learning_rate": 5.680055053805622e-07,
"loss": 0.1439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0684957280755043,
"step": 1540,
"valid_targets_mean": 15671.4,
"valid_targets_min": 9088
},
{
"epoch": 6.547770700636943,
"grad_norm": 0.3186094467001062,
"learning_rate": 5.190653926232169e-07,
"loss": 0.1787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09460516273975372,
"step": 1545,
"valid_targets_mean": 13747.5,
"valid_targets_min": 6715
},
{
"epoch": 6.56900212314225,
"grad_norm": 0.2830669692378638,
"learning_rate": 4.723020264910139e-07,
"loss": 0.1493,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10172918438911438,
"step": 1550,
"valid_targets_mean": 19980.6,
"valid_targets_min": 13214
},
{
"epoch": 6.590233545647559,
"grad_norm": 0.3388742629588612,
"learning_rate": 4.2772063219523875e-07,
"loss": 0.1871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06810663640499115,
"step": 1555,
"valid_targets_mean": 11326.8,
"valid_targets_min": 1387
},
{
"epoch": 6.611464968152866,
"grad_norm": 0.25098732861510203,
"learning_rate": 3.853261911395834e-07,
"loss": 0.162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08643859624862671,
"step": 1560,
"valid_targets_mean": 17087.6,
"valid_targets_min": 4205
},
{
"epoch": 6.632696390658174,
"grad_norm": 0.25308441903042767,
"learning_rate": 3.4512344036353727e-07,
"loss": 0.1771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0752149447798729,
"step": 1565,
"valid_targets_mean": 14135.8,
"valid_targets_min": 6446
},
{
"epoch": 6.653927813163482,
"grad_norm": 0.27008472046636767,
"learning_rate": 3.071168720130779e-07,
"loss": 0.1496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055712342262268066,
"step": 1570,
"valid_targets_mean": 10874.0,
"valid_targets_min": 1381
},
{
"epoch": 6.67515923566879,
"grad_norm": 0.25724134253077174,
"learning_rate": 2.7131073283873654e-07,
"loss": 0.1573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.059917815029621124,
"step": 1575,
"valid_targets_mean": 14455.5,
"valid_targets_min": 6049
},
{
"epoch": 6.6963906581740975,
"grad_norm": 0.27550494093570005,
"learning_rate": 2.3770902372107772e-07,
"loss": 0.1609,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0692015290260315,
"step": 1580,
"valid_targets_mean": 13820.6,
"valid_targets_min": 4902
},
{
"epoch": 6.717622080679406,
"grad_norm": 0.2523704810311892,
"learning_rate": 2.0631549922364824e-07,
"loss": 0.1427,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06639319658279419,
"step": 1585,
"valid_targets_mean": 11344.8,
"valid_targets_min": 5619
},
{
"epoch": 6.738853503184713,
"grad_norm": 0.29993361280228925,
"learning_rate": 1.7713366717344803e-07,
"loss": 0.1706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07957091927528381,
"step": 1590,
"valid_targets_mean": 9983.5,
"valid_targets_min": 4996
},
{
"epoch": 6.7600849256900215,
"grad_norm": 0.2502043937813371,
"learning_rate": 1.5016678826899055e-07,
"loss": 0.1495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07833079993724823,
"step": 1595,
"valid_targets_mean": 20954.8,
"valid_targets_min": 14257
},
{
"epoch": 6.781316348195329,
"grad_norm": 0.25034032952355,
"learning_rate": 1.2541787571594522e-07,
"loss": 0.1599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07827061414718628,
"step": 1600,
"valid_targets_mean": 15005.9,
"valid_targets_min": 9166
},
{
"epoch": 6.802547770700637,
"grad_norm": 0.22039819025048707,
"learning_rate": 1.0288969489046008e-07,
"loss": 0.1417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048642657697200775,
"step": 1605,
"valid_targets_mean": 19840.2,
"valid_targets_min": 11252
},
{
"epoch": 6.823779193205945,
"grad_norm": 0.2588836654976448,
"learning_rate": 8.258476303016017e-08,
"loss": 0.148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07373689115047455,
"step": 1610,
"valid_targets_mean": 14245.9,
"valid_targets_min": 8271
},
{
"epoch": 6.845010615711253,
"grad_norm": 0.2926967992400304,
"learning_rate": 6.45053489528813e-08,
"loss": 0.1707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08257532864809036,
"step": 1615,
"valid_targets_mean": 17754.5,
"valid_targets_min": 10599
},
{
"epoch": 6.86624203821656,
"grad_norm": 0.2838687369452468,
"learning_rate": 4.8653472803159576e-08,
"loss": 0.1737,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0895313173532486,
"step": 1620,
"valid_targets_mean": 16994.5,
"valid_targets_min": 8175
},
{
"epoch": 6.887473460721869,
"grad_norm": 0.2750729053185284,
"learning_rate": 3.503090582650081e-08,
"loss": 0.1656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07482630014419556,
"step": 1625,
"valid_targets_mean": 16871.8,
"valid_targets_min": 5469
},
{
"epoch": 6.908704883227176,
"grad_norm": 0.3270451602832223,
"learning_rate": 2.3639170171474434e-08,
"loss": 0.1578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08098286390304565,
"step": 1630,
"valid_targets_mean": 13039.2,
"valid_targets_min": 4344
},
{
"epoch": 6.929936305732484,
"grad_norm": 0.3057446527787021,
"learning_rate": 1.4479538719622822e-08,
"loss": 0.1607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07232065498828888,
"step": 1635,
"valid_targets_mean": 12158.9,
"valid_targets_min": 5313
},
{
"epoch": 6.951167728237792,
"grad_norm": 0.3010327503890946,
"learning_rate": 7.553034943243998e-09,
"loss": 0.174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10355747491121292,
"step": 1640,
"valid_targets_mean": 16122.8,
"valid_targets_min": 7424
},
{
"epoch": 6.9723991507431,
"grad_norm": 0.296512581796535,
"learning_rate": 2.8604327910186634e-09,
"loss": 0.1679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055927574634552,
"step": 1645,
"valid_targets_mean": 14152.6,
"valid_targets_min": 3364
},
{
"epoch": 6.993630573248407,
"grad_norm": 0.29446698895863765,
"learning_rate": 4.02256601546025e-10,
"loss": 0.1742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0790117084980011,
"step": 1650,
"valid_targets_mean": 12653.6,
"valid_targets_min": 5488
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17071497440338135,
"step": 1652,
"total_flos": 1.3627384946606735e+18,
"train_loss": 0.19606392417490914,
"train_runtime": 62933.2387,
"train_samples_per_second": 0.419,
"train_steps_per_second": 0.026,
"valid_targets_mean": 16957.1,
"valid_targets_min": 7571
}
],
"logging_steps": 5,
"max_steps": 1652,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.3627384946606735e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}