diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,206534 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5392362951724642, + "eval_steps": 500, + "global_step": 29500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.827919644652421e-05, + "grad_norm": 91.8160206307624, + "learning_rate": 0.0, + "loss": 34.0346, + "step": 1 + }, + { + "epoch": 3.655839289304842e-05, + "grad_norm": 73.5999744271434, + "learning_rate": 6.092916984006093e-09, + "loss": 31.4361, + "step": 2 + }, + { + "epoch": 5.483758933957263e-05, + "grad_norm": 51.695688615050905, + "learning_rate": 1.2185833968012185e-08, + "loss": 28.6464, + "step": 3 + }, + { + "epoch": 7.311678578609684e-05, + "grad_norm": 50.539720587475585, + "learning_rate": 1.827875095201828e-08, + "loss": 27.7592, + "step": 4 + }, + { + "epoch": 9.139598223262106e-05, + "grad_norm": 33.302774248523406, + "learning_rate": 2.437166793602437e-08, + "loss": 23.5683, + "step": 5 + }, + { + "epoch": 0.00010967517867914526, + "grad_norm": 30.74690231214884, + "learning_rate": 3.0464584920030467e-08, + "loss": 22.8067, + "step": 6 + }, + { + "epoch": 0.0001279543751256695, + "grad_norm": 23.259204581165505, + "learning_rate": 3.655750190403656e-08, + "loss": 22.4653, + "step": 7 + }, + { + "epoch": 0.0001462335715721937, + "grad_norm": 38.374691871614225, + "learning_rate": 4.265041888804266e-08, + "loss": 21.5733, + "step": 8 + }, + { + "epoch": 0.0001645127680187179, + "grad_norm": 27.796505852900417, + "learning_rate": 4.874333587204874e-08, + "loss": 21.1979, + "step": 9 + }, + { + "epoch": 0.00018279196446524212, + "grad_norm": 24.10366506784843, + "learning_rate": 5.483625285605484e-08, + "loss": 21.5174, + "step": 10 + }, + { + "epoch": 0.00020107116091176632, + "grad_norm": 16.145192145772253, + "learning_rate": 6.092916984006093e-08, + "loss": 20.6094, + "step": 11 + }, + { + "epoch": 0.00021935035735829052, + "grad_norm": 15.63771708728853, + "learning_rate": 6.702208682406704e-08, + "loss": 20.3061, + "step": 12 + }, + { + "epoch": 0.00023762955380481475, + "grad_norm": 14.09407904830339, + "learning_rate": 7.311500380807313e-08, + "loss": 20.0126, + "step": 13 + }, + { + "epoch": 0.000255908750251339, + "grad_norm": 11.623036021341093, + "learning_rate": 7.920792079207921e-08, + "loss": 19.2338, + "step": 14 + }, + { + "epoch": 0.0002741879466978632, + "grad_norm": 13.989361579308797, + "learning_rate": 8.530083777608532e-08, + "loss": 19.9629, + "step": 15 + }, + { + "epoch": 0.0002924671431443874, + "grad_norm": 16.006656703513595, + "learning_rate": 9.13937547600914e-08, + "loss": 19.8983, + "step": 16 + }, + { + "epoch": 0.0003107463395909116, + "grad_norm": 12.953914362703909, + "learning_rate": 9.748667174409748e-08, + "loss": 19.8281, + "step": 17 + }, + { + "epoch": 0.0003290255360374358, + "grad_norm": 13.199082096754312, + "learning_rate": 1.0357958872810359e-07, + "loss": 19.4738, + "step": 18 + }, + { + "epoch": 0.00034730473248396, + "grad_norm": 12.229065836228527, + "learning_rate": 1.0967250571210967e-07, + "loss": 18.9835, + "step": 19 + }, + { + "epoch": 0.00036558392893048424, + "grad_norm": 15.118897649083674, + "learning_rate": 1.1576542269611578e-07, + "loss": 20.3097, + "step": 20 + }, + { + "epoch": 0.00038386312537700844, + "grad_norm": 12.148133829458713, + "learning_rate": 1.2185833968012187e-07, + "loss": 19.2054, + "step": 21 + }, + { + "epoch": 0.00040214232182353264, + "grad_norm": 11.55384831434423, + "learning_rate": 1.2795125666412797e-07, + "loss": 18.9609, + "step": 22 + }, + { + "epoch": 0.00042042151827005684, + "grad_norm": 11.852696223714258, + "learning_rate": 1.3404417364813407e-07, + "loss": 19.2075, + "step": 23 + }, + { + "epoch": 0.00043870071471658104, + "grad_norm": 12.726817530901487, + "learning_rate": 1.4013709063214015e-07, + "loss": 19.6638, + "step": 24 + }, + { + "epoch": 0.0004569799111631053, + "grad_norm": 13.380375455690142, + "learning_rate": 1.4623000761614625e-07, + "loss": 19.8093, + "step": 25 + }, + { + "epoch": 0.0004752591076096295, + "grad_norm": 9.618449769800641, + "learning_rate": 1.5232292460015233e-07, + "loss": 18.6443, + "step": 26 + }, + { + "epoch": 0.0004935383040561537, + "grad_norm": 9.468083311223333, + "learning_rate": 1.5841584158415843e-07, + "loss": 19.0952, + "step": 27 + }, + { + "epoch": 0.000511817500502678, + "grad_norm": 10.742512704213103, + "learning_rate": 1.6450875856816453e-07, + "loss": 19.2092, + "step": 28 + }, + { + "epoch": 0.0005300966969492021, + "grad_norm": 9.384042455482811, + "learning_rate": 1.7060167555217063e-07, + "loss": 18.7381, + "step": 29 + }, + { + "epoch": 0.0005483758933957264, + "grad_norm": 9.362811579992922, + "learning_rate": 1.766945925361767e-07, + "loss": 19.0655, + "step": 30 + }, + { + "epoch": 0.0005666550898422505, + "grad_norm": 9.59019465210035, + "learning_rate": 1.827875095201828e-07, + "loss": 18.9933, + "step": 31 + }, + { + "epoch": 0.0005849342862887748, + "grad_norm": 10.416158655166349, + "learning_rate": 1.8888042650418892e-07, + "loss": 19.3626, + "step": 32 + }, + { + "epoch": 0.0006032134827352989, + "grad_norm": 9.362295378103198, + "learning_rate": 1.9497334348819496e-07, + "loss": 18.5162, + "step": 33 + }, + { + "epoch": 0.0006214926791818232, + "grad_norm": 10.082680254606226, + "learning_rate": 2.0106626047220107e-07, + "loss": 18.8908, + "step": 34 + }, + { + "epoch": 0.0006397718756283474, + "grad_norm": 11.776658098765381, + "learning_rate": 2.0715917745620717e-07, + "loss": 19.1424, + "step": 35 + }, + { + "epoch": 0.0006580510720748716, + "grad_norm": 10.029439496961013, + "learning_rate": 2.1325209444021327e-07, + "loss": 19.0282, + "step": 36 + }, + { + "epoch": 0.0006763302685213958, + "grad_norm": 8.464048988417401, + "learning_rate": 2.1934501142421935e-07, + "loss": 18.3312, + "step": 37 + }, + { + "epoch": 0.00069460946496792, + "grad_norm": 10.778425266213326, + "learning_rate": 2.2543792840822545e-07, + "loss": 19.3784, + "step": 38 + }, + { + "epoch": 0.0007128886614144442, + "grad_norm": 10.45936367907346, + "learning_rate": 2.3153084539223155e-07, + "loss": 18.772, + "step": 39 + }, + { + "epoch": 0.0007311678578609685, + "grad_norm": 10.180810668857037, + "learning_rate": 2.3762376237623766e-07, + "loss": 19.0677, + "step": 40 + }, + { + "epoch": 0.0007494470543074926, + "grad_norm": 9.486856492442154, + "learning_rate": 2.4371667936024373e-07, + "loss": 18.6111, + "step": 41 + }, + { + "epoch": 0.0007677262507540169, + "grad_norm": 8.892386702121996, + "learning_rate": 2.4980959634424986e-07, + "loss": 18.8254, + "step": 42 + }, + { + "epoch": 0.000786005447200541, + "grad_norm": 10.950408492270602, + "learning_rate": 2.5590251332825594e-07, + "loss": 19.4113, + "step": 43 + }, + { + "epoch": 0.0008042846436470653, + "grad_norm": 10.6052965565659, + "learning_rate": 2.61995430312262e-07, + "loss": 19.4145, + "step": 44 + }, + { + "epoch": 0.0008225638400935895, + "grad_norm": 9.928430603672055, + "learning_rate": 2.6808834729626814e-07, + "loss": 18.7649, + "step": 45 + }, + { + "epoch": 0.0008408430365401137, + "grad_norm": 9.414699639763915, + "learning_rate": 2.7418126428027417e-07, + "loss": 19.1676, + "step": 46 + }, + { + "epoch": 0.0008591222329866379, + "grad_norm": 10.136690731179094, + "learning_rate": 2.802741812642803e-07, + "loss": 18.9255, + "step": 47 + }, + { + "epoch": 0.0008774014294331621, + "grad_norm": 9.41687674012297, + "learning_rate": 2.8636709824828637e-07, + "loss": 18.5135, + "step": 48 + }, + { + "epoch": 0.0008956806258796863, + "grad_norm": 10.230953762022938, + "learning_rate": 2.924600152322925e-07, + "loss": 19.4077, + "step": 49 + }, + { + "epoch": 0.0009139598223262106, + "grad_norm": 9.331096241633832, + "learning_rate": 2.985529322162986e-07, + "loss": 18.5752, + "step": 50 + }, + { + "epoch": 0.0009322390187727347, + "grad_norm": 9.377359156140896, + "learning_rate": 3.0464584920030465e-07, + "loss": 18.6574, + "step": 51 + }, + { + "epoch": 0.000950518215219259, + "grad_norm": 10.24702422065715, + "learning_rate": 3.107387661843108e-07, + "loss": 18.7512, + "step": 52 + }, + { + "epoch": 0.0009687974116657831, + "grad_norm": 10.438749932564663, + "learning_rate": 3.1683168316831686e-07, + "loss": 19.1765, + "step": 53 + }, + { + "epoch": 0.0009870766081123074, + "grad_norm": 9.395189052530064, + "learning_rate": 3.2292460015232293e-07, + "loss": 18.7879, + "step": 54 + }, + { + "epoch": 0.0010053558045588315, + "grad_norm": 9.545581422460163, + "learning_rate": 3.2901751713632906e-07, + "loss": 18.5035, + "step": 55 + }, + { + "epoch": 0.001023635001005356, + "grad_norm": 8.756905292427446, + "learning_rate": 3.3511043412033514e-07, + "loss": 18.8401, + "step": 56 + }, + { + "epoch": 0.00104191419745188, + "grad_norm": 9.835134510268613, + "learning_rate": 3.4120335110434127e-07, + "loss": 18.6639, + "step": 57 + }, + { + "epoch": 0.0010601933938984042, + "grad_norm": 9.329687900850928, + "learning_rate": 3.472962680883473e-07, + "loss": 18.5866, + "step": 58 + }, + { + "epoch": 0.0010784725903449283, + "grad_norm": 10.385670344311434, + "learning_rate": 3.533891850723534e-07, + "loss": 19.4529, + "step": 59 + }, + { + "epoch": 0.0010967517867914527, + "grad_norm": 8.319642063163402, + "learning_rate": 3.594821020563595e-07, + "loss": 18.2629, + "step": 60 + }, + { + "epoch": 0.0011150309832379769, + "grad_norm": 10.166549933688021, + "learning_rate": 3.655750190403656e-07, + "loss": 19.0247, + "step": 61 + }, + { + "epoch": 0.001133310179684501, + "grad_norm": 10.381261400913276, + "learning_rate": 3.716679360243717e-07, + "loss": 19.3155, + "step": 62 + }, + { + "epoch": 0.0011515893761310254, + "grad_norm": 9.779550735131549, + "learning_rate": 3.7776085300837783e-07, + "loss": 18.7911, + "step": 63 + }, + { + "epoch": 0.0011698685725775495, + "grad_norm": 8.899746443889216, + "learning_rate": 3.838537699923839e-07, + "loss": 18.8994, + "step": 64 + }, + { + "epoch": 0.0011881477690240737, + "grad_norm": 9.321725176330542, + "learning_rate": 3.8994668697638993e-07, + "loss": 18.5516, + "step": 65 + }, + { + "epoch": 0.0012064269654705978, + "grad_norm": 10.686659102112074, + "learning_rate": 3.9603960396039606e-07, + "loss": 19.1882, + "step": 66 + }, + { + "epoch": 0.0012247061619171222, + "grad_norm": 10.278368781143437, + "learning_rate": 4.0213252094440214e-07, + "loss": 19.1586, + "step": 67 + }, + { + "epoch": 0.0012429853583636463, + "grad_norm": 9.853789742977987, + "learning_rate": 4.0822543792840826e-07, + "loss": 18.8072, + "step": 68 + }, + { + "epoch": 0.0012612645548101705, + "grad_norm": 8.424851011261469, + "learning_rate": 4.1431835491241434e-07, + "loss": 18.0205, + "step": 69 + }, + { + "epoch": 0.0012795437512566948, + "grad_norm": 11.179083215450209, + "learning_rate": 4.2041127189642047e-07, + "loss": 19.1082, + "step": 70 + }, + { + "epoch": 0.001297822947703219, + "grad_norm": 11.205875315547486, + "learning_rate": 4.2650418888042655e-07, + "loss": 19.1888, + "step": 71 + }, + { + "epoch": 0.0013161021441497431, + "grad_norm": 9.218667827948646, + "learning_rate": 4.325971058644327e-07, + "loss": 18.5834, + "step": 72 + }, + { + "epoch": 0.0013343813405962675, + "grad_norm": 9.114488209681602, + "learning_rate": 4.386900228484387e-07, + "loss": 18.3438, + "step": 73 + }, + { + "epoch": 0.0013526605370427916, + "grad_norm": 9.147427856276416, + "learning_rate": 4.4478293983244483e-07, + "loss": 18.3276, + "step": 74 + }, + { + "epoch": 0.0013709397334893158, + "grad_norm": 10.815359011016698, + "learning_rate": 4.508758568164509e-07, + "loss": 18.9537, + "step": 75 + }, + { + "epoch": 0.00138921892993584, + "grad_norm": 11.106338765167994, + "learning_rate": 4.5696877380045703e-07, + "loss": 20.4775, + "step": 76 + }, + { + "epoch": 0.0014074981263823643, + "grad_norm": 9.940729728721143, + "learning_rate": 4.630616907844631e-07, + "loss": 19.0393, + "step": 77 + }, + { + "epoch": 0.0014257773228288884, + "grad_norm": 9.792218976670425, + "learning_rate": 4.691546077684692e-07, + "loss": 18.8914, + "step": 78 + }, + { + "epoch": 0.0014440565192754126, + "grad_norm": 10.60799501326941, + "learning_rate": 4.752475247524753e-07, + "loss": 18.8633, + "step": 79 + }, + { + "epoch": 0.001462335715721937, + "grad_norm": 9.136615230502608, + "learning_rate": 4.813404417364814e-07, + "loss": 18.6693, + "step": 80 + }, + { + "epoch": 0.001480614912168461, + "grad_norm": 9.050880233447275, + "learning_rate": 4.874333587204875e-07, + "loss": 18.3404, + "step": 81 + }, + { + "epoch": 0.0014988941086149852, + "grad_norm": 9.821799815962711, + "learning_rate": 4.935262757044935e-07, + "loss": 19.028, + "step": 82 + }, + { + "epoch": 0.0015171733050615094, + "grad_norm": 9.00509710897049, + "learning_rate": 4.996191926884997e-07, + "loss": 18.6076, + "step": 83 + }, + { + "epoch": 0.0015354525015080338, + "grad_norm": 8.4164016249053, + "learning_rate": 5.057121096725057e-07, + "loss": 18.1026, + "step": 84 + }, + { + "epoch": 0.001553731697954558, + "grad_norm": 9.747465626841214, + "learning_rate": 5.118050266565119e-07, + "loss": 18.742, + "step": 85 + }, + { + "epoch": 0.001572010894401082, + "grad_norm": 11.031842189183193, + "learning_rate": 5.17897943640518e-07, + "loss": 19.5137, + "step": 86 + }, + { + "epoch": 0.0015902900908476064, + "grad_norm": 8.60703077645388, + "learning_rate": 5.23990860624524e-07, + "loss": 18.6961, + "step": 87 + }, + { + "epoch": 0.0016085692872941306, + "grad_norm": 9.06509676047877, + "learning_rate": 5.300837776085301e-07, + "loss": 18.5877, + "step": 88 + }, + { + "epoch": 0.0016268484837406547, + "grad_norm": 9.844496870043358, + "learning_rate": 5.361766945925363e-07, + "loss": 18.7855, + "step": 89 + }, + { + "epoch": 0.001645127680187179, + "grad_norm": 9.277358662258633, + "learning_rate": 5.422696115765424e-07, + "loss": 18.7348, + "step": 90 + }, + { + "epoch": 0.0016634068766337032, + "grad_norm": 9.252853720597152, + "learning_rate": 5.483625285605483e-07, + "loss": 18.6759, + "step": 91 + }, + { + "epoch": 0.0016816860730802274, + "grad_norm": 8.84638620725077, + "learning_rate": 5.544554455445545e-07, + "loss": 18.4325, + "step": 92 + }, + { + "epoch": 0.0016999652695267515, + "grad_norm": 10.139260624547632, + "learning_rate": 5.605483625285606e-07, + "loss": 19.0223, + "step": 93 + }, + { + "epoch": 0.0017182444659732759, + "grad_norm": 10.300594130638922, + "learning_rate": 5.666412795125667e-07, + "loss": 18.9603, + "step": 94 + }, + { + "epoch": 0.0017365236624198, + "grad_norm": 8.558786354910753, + "learning_rate": 5.727341964965727e-07, + "loss": 18.1716, + "step": 95 + }, + { + "epoch": 0.0017548028588663242, + "grad_norm": 9.221680135788464, + "learning_rate": 5.788271134805789e-07, + "loss": 19.0363, + "step": 96 + }, + { + "epoch": 0.0017730820553128485, + "grad_norm": 9.318146513867163, + "learning_rate": 5.84920030464585e-07, + "loss": 18.308, + "step": 97 + }, + { + "epoch": 0.0017913612517593727, + "grad_norm": 8.89684007120383, + "learning_rate": 5.910129474485911e-07, + "loss": 18.3987, + "step": 98 + }, + { + "epoch": 0.0018096404482058968, + "grad_norm": 8.770987219267509, + "learning_rate": 5.971058644325972e-07, + "loss": 18.2499, + "step": 99 + }, + { + "epoch": 0.0018279196446524212, + "grad_norm": 8.715509627670407, + "learning_rate": 6.031987814166032e-07, + "loss": 18.3155, + "step": 100 + }, + { + "epoch": 0.0018461988410989453, + "grad_norm": 9.157266713898336, + "learning_rate": 6.092916984006093e-07, + "loss": 18.7471, + "step": 101 + }, + { + "epoch": 0.0018644780375454695, + "grad_norm": 8.141147516544505, + "learning_rate": 6.153846153846155e-07, + "loss": 17.8998, + "step": 102 + }, + { + "epoch": 0.0018827572339919936, + "grad_norm": 9.099602296515032, + "learning_rate": 6.214775323686216e-07, + "loss": 18.5905, + "step": 103 + }, + { + "epoch": 0.001901036430438518, + "grad_norm": 9.459641647038191, + "learning_rate": 6.275704493526276e-07, + "loss": 18.919, + "step": 104 + }, + { + "epoch": 0.0019193156268850421, + "grad_norm": 8.533043987820777, + "learning_rate": 6.336633663366337e-07, + "loss": 18.2984, + "step": 105 + }, + { + "epoch": 0.0019375948233315663, + "grad_norm": 8.831215555540918, + "learning_rate": 6.397562833206399e-07, + "loss": 18.4262, + "step": 106 + }, + { + "epoch": 0.0019558740197780904, + "grad_norm": 9.760196162863846, + "learning_rate": 6.458492003046459e-07, + "loss": 18.7845, + "step": 107 + }, + { + "epoch": 0.001974153216224615, + "grad_norm": 9.619501623532049, + "learning_rate": 6.519421172886519e-07, + "loss": 18.6491, + "step": 108 + }, + { + "epoch": 0.001992432412671139, + "grad_norm": 9.120851474475579, + "learning_rate": 6.580350342726581e-07, + "loss": 18.7755, + "step": 109 + }, + { + "epoch": 0.002010711609117663, + "grad_norm": 10.429387763449409, + "learning_rate": 6.641279512566642e-07, + "loss": 18.8206, + "step": 110 + }, + { + "epoch": 0.0020289908055641875, + "grad_norm": 10.444785293681528, + "learning_rate": 6.702208682406703e-07, + "loss": 19.0005, + "step": 111 + }, + { + "epoch": 0.002047270002010712, + "grad_norm": 10.653039310285068, + "learning_rate": 6.763137852246764e-07, + "loss": 19.1512, + "step": 112 + }, + { + "epoch": 0.0020655491984572357, + "grad_norm": 8.025944215659788, + "learning_rate": 6.824067022086825e-07, + "loss": 18.0986, + "step": 113 + }, + { + "epoch": 0.00208382839490376, + "grad_norm": 9.547795043033215, + "learning_rate": 6.884996191926886e-07, + "loss": 18.8097, + "step": 114 + }, + { + "epoch": 0.002102107591350284, + "grad_norm": 9.19001868551479, + "learning_rate": 6.945925361766946e-07, + "loss": 18.7753, + "step": 115 + }, + { + "epoch": 0.0021203867877968084, + "grad_norm": 9.39295941251693, + "learning_rate": 7.006854531607008e-07, + "loss": 18.4715, + "step": 116 + }, + { + "epoch": 0.0021386659842433328, + "grad_norm": 7.970423062653836, + "learning_rate": 7.067783701447068e-07, + "loss": 18.0459, + "step": 117 + }, + { + "epoch": 0.0021569451806898567, + "grad_norm": 9.510873550335704, + "learning_rate": 7.128712871287129e-07, + "loss": 18.8316, + "step": 118 + }, + { + "epoch": 0.002175224377136381, + "grad_norm": 10.785468497817561, + "learning_rate": 7.18964204112719e-07, + "loss": 19.3941, + "step": 119 + }, + { + "epoch": 0.0021935035735829054, + "grad_norm": 9.073789812838537, + "learning_rate": 7.250571210967252e-07, + "loss": 18.5127, + "step": 120 + }, + { + "epoch": 0.0022117827700294294, + "grad_norm": 10.862325819460475, + "learning_rate": 7.311500380807313e-07, + "loss": 19.019, + "step": 121 + }, + { + "epoch": 0.0022300619664759537, + "grad_norm": 7.8996003598539035, + "learning_rate": 7.372429550647372e-07, + "loss": 18.0205, + "step": 122 + }, + { + "epoch": 0.002248341162922478, + "grad_norm": 8.825842860534118, + "learning_rate": 7.433358720487434e-07, + "loss": 18.3188, + "step": 123 + }, + { + "epoch": 0.002266620359369002, + "grad_norm": 8.5897317430774, + "learning_rate": 7.494287890327495e-07, + "loss": 18.6975, + "step": 124 + }, + { + "epoch": 0.0022848995558155264, + "grad_norm": 8.95155813372537, + "learning_rate": 7.555217060167557e-07, + "loss": 18.6758, + "step": 125 + }, + { + "epoch": 0.0023031787522620507, + "grad_norm": 9.042809889012444, + "learning_rate": 7.616146230007616e-07, + "loss": 18.2421, + "step": 126 + }, + { + "epoch": 0.0023214579487085747, + "grad_norm": 8.188339694755228, + "learning_rate": 7.677075399847678e-07, + "loss": 18.4488, + "step": 127 + }, + { + "epoch": 0.002339737145155099, + "grad_norm": 9.252561792937517, + "learning_rate": 7.738004569687739e-07, + "loss": 18.3231, + "step": 128 + }, + { + "epoch": 0.0023580163416016234, + "grad_norm": 9.19760399419201, + "learning_rate": 7.798933739527799e-07, + "loss": 18.5613, + "step": 129 + }, + { + "epoch": 0.0023762955380481473, + "grad_norm": 9.755431876364451, + "learning_rate": 7.85986290936786e-07, + "loss": 18.842, + "step": 130 + }, + { + "epoch": 0.0023945747344946717, + "grad_norm": 8.252542603191571, + "learning_rate": 7.920792079207921e-07, + "loss": 17.9852, + "step": 131 + }, + { + "epoch": 0.0024128539309411956, + "grad_norm": 8.43311297243739, + "learning_rate": 7.981721249047983e-07, + "loss": 18.5464, + "step": 132 + }, + { + "epoch": 0.00243113312738772, + "grad_norm": 9.599173769619222, + "learning_rate": 8.042650418888043e-07, + "loss": 18.9277, + "step": 133 + }, + { + "epoch": 0.0024494123238342443, + "grad_norm": 9.308163019342764, + "learning_rate": 8.103579588728105e-07, + "loss": 18.9868, + "step": 134 + }, + { + "epoch": 0.0024676915202807683, + "grad_norm": 8.775269468740735, + "learning_rate": 8.164508758568165e-07, + "loss": 18.3321, + "step": 135 + }, + { + "epoch": 0.0024859707167272926, + "grad_norm": 9.4470228819566, + "learning_rate": 8.225437928408227e-07, + "loss": 18.7455, + "step": 136 + }, + { + "epoch": 0.002504249913173817, + "grad_norm": 8.587540626226483, + "learning_rate": 8.286367098248287e-07, + "loss": 18.2026, + "step": 137 + }, + { + "epoch": 0.002522529109620341, + "grad_norm": 9.605233117505259, + "learning_rate": 8.347296268088348e-07, + "loss": 18.452, + "step": 138 + }, + { + "epoch": 0.0025408083060668653, + "grad_norm": 9.439766715402934, + "learning_rate": 8.408225437928409e-07, + "loss": 18.5597, + "step": 139 + }, + { + "epoch": 0.0025590875025133897, + "grad_norm": 8.865103055248236, + "learning_rate": 8.46915460776847e-07, + "loss": 18.1067, + "step": 140 + }, + { + "epoch": 0.0025773666989599136, + "grad_norm": 8.963452516669594, + "learning_rate": 8.530083777608531e-07, + "loss": 18.3106, + "step": 141 + }, + { + "epoch": 0.002595645895406438, + "grad_norm": 9.31260967372063, + "learning_rate": 8.591012947448592e-07, + "loss": 18.1281, + "step": 142 + }, + { + "epoch": 0.0026139250918529623, + "grad_norm": 9.983750461174719, + "learning_rate": 8.651942117288653e-07, + "loss": 18.9052, + "step": 143 + }, + { + "epoch": 0.0026322042882994862, + "grad_norm": 8.525711362497992, + "learning_rate": 8.712871287128713e-07, + "loss": 17.9311, + "step": 144 + }, + { + "epoch": 0.0026504834847460106, + "grad_norm": 8.16279592260429, + "learning_rate": 8.773800456968774e-07, + "loss": 18.0707, + "step": 145 + }, + { + "epoch": 0.002668762681192535, + "grad_norm": 10.810956985437747, + "learning_rate": 8.834729626808836e-07, + "loss": 19.6098, + "step": 146 + }, + { + "epoch": 0.002687041877639059, + "grad_norm": 8.418666058583149, + "learning_rate": 8.895658796648897e-07, + "loss": 18.3767, + "step": 147 + }, + { + "epoch": 0.0027053210740855833, + "grad_norm": 7.566753590641935, + "learning_rate": 8.956587966488957e-07, + "loss": 17.986, + "step": 148 + }, + { + "epoch": 0.002723600270532107, + "grad_norm": 8.820077881486961, + "learning_rate": 9.017517136329018e-07, + "loss": 17.9399, + "step": 149 + }, + { + "epoch": 0.0027418794669786316, + "grad_norm": 7.835441517740366, + "learning_rate": 9.07844630616908e-07, + "loss": 18.1436, + "step": 150 + }, + { + "epoch": 0.002760158663425156, + "grad_norm": 10.311120459482858, + "learning_rate": 9.139375476009141e-07, + "loss": 19.287, + "step": 151 + }, + { + "epoch": 0.00277843785987168, + "grad_norm": 9.180260647695155, + "learning_rate": 9.2003046458492e-07, + "loss": 18.2681, + "step": 152 + }, + { + "epoch": 0.0027967170563182042, + "grad_norm": 9.223803948298361, + "learning_rate": 9.261233815689262e-07, + "loss": 18.5626, + "step": 153 + }, + { + "epoch": 0.0028149962527647286, + "grad_norm": 9.743220538497313, + "learning_rate": 9.322162985529323e-07, + "loss": 18.5406, + "step": 154 + }, + { + "epoch": 0.0028332754492112525, + "grad_norm": 8.421411343466511, + "learning_rate": 9.383092155369384e-07, + "loss": 18.3646, + "step": 155 + }, + { + "epoch": 0.002851554645657777, + "grad_norm": 8.990896416587637, + "learning_rate": 9.444021325209444e-07, + "loss": 18.4719, + "step": 156 + }, + { + "epoch": 0.0028698338421043012, + "grad_norm": 8.754872875971705, + "learning_rate": 9.504950495049506e-07, + "loss": 18.2906, + "step": 157 + }, + { + "epoch": 0.002888113038550825, + "grad_norm": 9.710686470408715, + "learning_rate": 9.565879664889568e-07, + "loss": 18.6634, + "step": 158 + }, + { + "epoch": 0.0029063922349973495, + "grad_norm": 8.724629853904714, + "learning_rate": 9.626808834729628e-07, + "loss": 18.9012, + "step": 159 + }, + { + "epoch": 0.002924671431443874, + "grad_norm": 9.188575498830037, + "learning_rate": 9.687738004569687e-07, + "loss": 18.2614, + "step": 160 + }, + { + "epoch": 0.002942950627890398, + "grad_norm": 11.013102190121193, + "learning_rate": 9.74866717440975e-07, + "loss": 19.1223, + "step": 161 + }, + { + "epoch": 0.002961229824336922, + "grad_norm": 8.394700341610353, + "learning_rate": 9.809596344249811e-07, + "loss": 18.1358, + "step": 162 + }, + { + "epoch": 0.0029795090207834466, + "grad_norm": 8.069990243816974, + "learning_rate": 9.87052551408987e-07, + "loss": 17.8555, + "step": 163 + }, + { + "epoch": 0.0029977882172299705, + "grad_norm": 9.303198108361302, + "learning_rate": 9.931454683929933e-07, + "loss": 18.468, + "step": 164 + }, + { + "epoch": 0.003016067413676495, + "grad_norm": 8.335228410252673, + "learning_rate": 9.992383853769994e-07, + "loss": 18.0916, + "step": 165 + }, + { + "epoch": 0.0030343466101230188, + "grad_norm": 8.432740981655469, + "learning_rate": 1.0053313023610054e-06, + "loss": 18.0996, + "step": 166 + }, + { + "epoch": 0.003052625806569543, + "grad_norm": 9.519777017936155, + "learning_rate": 1.0114242193450114e-06, + "loss": 19.1096, + "step": 167 + }, + { + "epoch": 0.0030709050030160675, + "grad_norm": 8.065844106323338, + "learning_rate": 1.0175171363290176e-06, + "loss": 18.1032, + "step": 168 + }, + { + "epoch": 0.0030891841994625914, + "grad_norm": 9.163532440473222, + "learning_rate": 1.0236100533130238e-06, + "loss": 18.282, + "step": 169 + }, + { + "epoch": 0.003107463395909116, + "grad_norm": 9.388304585526452, + "learning_rate": 1.0297029702970297e-06, + "loss": 18.6834, + "step": 170 + }, + { + "epoch": 0.00312574259235564, + "grad_norm": 8.87715952646457, + "learning_rate": 1.035795887281036e-06, + "loss": 18.477, + "step": 171 + }, + { + "epoch": 0.003144021788802164, + "grad_norm": 26.114301441663105, + "learning_rate": 1.041888804265042e-06, + "loss": 19.293, + "step": 172 + }, + { + "epoch": 0.0031623009852486885, + "grad_norm": 9.694088824254733, + "learning_rate": 1.047981721249048e-06, + "loss": 18.7479, + "step": 173 + }, + { + "epoch": 0.003180580181695213, + "grad_norm": 9.095561608266575, + "learning_rate": 1.054074638233054e-06, + "loss": 18.3212, + "step": 174 + }, + { + "epoch": 0.0031988593781417368, + "grad_norm": 9.421354731090213, + "learning_rate": 1.0601675552170602e-06, + "loss": 18.6331, + "step": 175 + }, + { + "epoch": 0.003217138574588261, + "grad_norm": 10.830755674328625, + "learning_rate": 1.0662604722010664e-06, + "loss": 19.3953, + "step": 176 + }, + { + "epoch": 0.0032354177710347855, + "grad_norm": 8.334796100622082, + "learning_rate": 1.0723533891850726e-06, + "loss": 18.0036, + "step": 177 + }, + { + "epoch": 0.0032536969674813094, + "grad_norm": 8.274933800538554, + "learning_rate": 1.0784463061690785e-06, + "loss": 18.0767, + "step": 178 + }, + { + "epoch": 0.0032719761639278338, + "grad_norm": 8.952990668764464, + "learning_rate": 1.0845392231530847e-06, + "loss": 18.3319, + "step": 179 + }, + { + "epoch": 0.003290255360374358, + "grad_norm": 7.448210693608828, + "learning_rate": 1.0906321401370907e-06, + "loss": 17.8476, + "step": 180 + }, + { + "epoch": 0.003308534556820882, + "grad_norm": 11.033797657019463, + "learning_rate": 1.0967250571210967e-06, + "loss": 19.3991, + "step": 181 + }, + { + "epoch": 0.0033268137532674064, + "grad_norm": 10.099842515451865, + "learning_rate": 1.1028179741051028e-06, + "loss": 19.1332, + "step": 182 + }, + { + "epoch": 0.0033450929497139304, + "grad_norm": 9.208122392507601, + "learning_rate": 1.108910891089109e-06, + "loss": 18.6594, + "step": 183 + }, + { + "epoch": 0.0033633721461604547, + "grad_norm": 9.323685684955574, + "learning_rate": 1.1150038080731152e-06, + "loss": 19.0113, + "step": 184 + }, + { + "epoch": 0.003381651342606979, + "grad_norm": 9.48121086474806, + "learning_rate": 1.1210967250571212e-06, + "loss": 18.2684, + "step": 185 + }, + { + "epoch": 0.003399930539053503, + "grad_norm": 9.458692967694427, + "learning_rate": 1.1271896420411274e-06, + "loss": 18.5252, + "step": 186 + }, + { + "epoch": 0.0034182097355000274, + "grad_norm": 10.162976696658363, + "learning_rate": 1.1332825590251333e-06, + "loss": 18.7188, + "step": 187 + }, + { + "epoch": 0.0034364889319465517, + "grad_norm": 7.536693138762417, + "learning_rate": 1.1393754760091395e-06, + "loss": 17.7561, + "step": 188 + }, + { + "epoch": 0.0034547681283930757, + "grad_norm": 10.261621116891442, + "learning_rate": 1.1454683929931455e-06, + "loss": 19.0036, + "step": 189 + }, + { + "epoch": 0.0034730473248396, + "grad_norm": 9.211901187355524, + "learning_rate": 1.1515613099771517e-06, + "loss": 18.6356, + "step": 190 + }, + { + "epoch": 0.0034913265212861244, + "grad_norm": 9.798349541549694, + "learning_rate": 1.1576542269611578e-06, + "loss": 19.301, + "step": 191 + }, + { + "epoch": 0.0035096057177326483, + "grad_norm": 9.408807780556515, + "learning_rate": 1.1637471439451638e-06, + "loss": 18.2518, + "step": 192 + }, + { + "epoch": 0.0035278849141791727, + "grad_norm": 8.703383551292617, + "learning_rate": 1.16984006092917e-06, + "loss": 18.2204, + "step": 193 + }, + { + "epoch": 0.003546164110625697, + "grad_norm": 9.964051574549913, + "learning_rate": 1.175932977913176e-06, + "loss": 18.758, + "step": 194 + }, + { + "epoch": 0.003564443307072221, + "grad_norm": 8.45774454932824, + "learning_rate": 1.1820258948971822e-06, + "loss": 18.0124, + "step": 195 + }, + { + "epoch": 0.0035827225035187454, + "grad_norm": 9.41053515094707, + "learning_rate": 1.1881188118811881e-06, + "loss": 18.6049, + "step": 196 + }, + { + "epoch": 0.0036010016999652697, + "grad_norm": 7.926905776853164, + "learning_rate": 1.1942117288651943e-06, + "loss": 17.9763, + "step": 197 + }, + { + "epoch": 0.0036192808964117936, + "grad_norm": 7.762301016854572, + "learning_rate": 1.2003046458492005e-06, + "loss": 18.0592, + "step": 198 + }, + { + "epoch": 0.003637560092858318, + "grad_norm": 8.298271358933013, + "learning_rate": 1.2063975628332065e-06, + "loss": 18.0102, + "step": 199 + }, + { + "epoch": 0.0036558392893048424, + "grad_norm": 10.730216617638389, + "learning_rate": 1.2124904798172126e-06, + "loss": 19.0529, + "step": 200 + }, + { + "epoch": 0.0036741184857513663, + "grad_norm": 8.315309216617923, + "learning_rate": 1.2185833968012186e-06, + "loss": 18.2883, + "step": 201 + }, + { + "epoch": 0.0036923976821978907, + "grad_norm": 9.937267774764383, + "learning_rate": 1.2246763137852248e-06, + "loss": 18.8833, + "step": 202 + }, + { + "epoch": 0.0037106768786444146, + "grad_norm": 8.298280967026155, + "learning_rate": 1.230769230769231e-06, + "loss": 18.4287, + "step": 203 + }, + { + "epoch": 0.003728956075090939, + "grad_norm": 8.872669745889823, + "learning_rate": 1.236862147753237e-06, + "loss": 18.3237, + "step": 204 + }, + { + "epoch": 0.0037472352715374633, + "grad_norm": 9.26640734442622, + "learning_rate": 1.2429550647372431e-06, + "loss": 18.5356, + "step": 205 + }, + { + "epoch": 0.0037655144679839873, + "grad_norm": 9.193263429338693, + "learning_rate": 1.249047981721249e-06, + "loss": 18.1574, + "step": 206 + }, + { + "epoch": 0.0037837936644305116, + "grad_norm": 9.406715721001522, + "learning_rate": 1.2551408987052553e-06, + "loss": 18.5967, + "step": 207 + }, + { + "epoch": 0.003802072860877036, + "grad_norm": 8.430542392894331, + "learning_rate": 1.2612338156892615e-06, + "loss": 18.4397, + "step": 208 + }, + { + "epoch": 0.00382035205732356, + "grad_norm": 8.324568367394546, + "learning_rate": 1.2673267326732674e-06, + "loss": 18.1317, + "step": 209 + }, + { + "epoch": 0.0038386312537700843, + "grad_norm": 8.636707220640757, + "learning_rate": 1.2734196496572734e-06, + "loss": 18.2539, + "step": 210 + }, + { + "epoch": 0.0038569104502166086, + "grad_norm": 8.559056941183735, + "learning_rate": 1.2795125666412798e-06, + "loss": 18.1112, + "step": 211 + }, + { + "epoch": 0.0038751896466631326, + "grad_norm": 8.349106399691129, + "learning_rate": 1.2856054836252858e-06, + "loss": 18.0289, + "step": 212 + }, + { + "epoch": 0.003893468843109657, + "grad_norm": 9.109517130979178, + "learning_rate": 1.2916984006092917e-06, + "loss": 18.4914, + "step": 213 + }, + { + "epoch": 0.003911748039556181, + "grad_norm": 10.17773107995757, + "learning_rate": 1.297791317593298e-06, + "loss": 18.686, + "step": 214 + }, + { + "epoch": 0.003930027236002705, + "grad_norm": 8.880061859485329, + "learning_rate": 1.3038842345773039e-06, + "loss": 18.1707, + "step": 215 + }, + { + "epoch": 0.00394830643244923, + "grad_norm": 10.41268299096758, + "learning_rate": 1.3099771515613103e-06, + "loss": 19.4722, + "step": 216 + }, + { + "epoch": 0.003966585628895754, + "grad_norm": 9.19944078898899, + "learning_rate": 1.3160700685453163e-06, + "loss": 18.5689, + "step": 217 + }, + { + "epoch": 0.003984864825342278, + "grad_norm": 8.52480330975333, + "learning_rate": 1.3221629855293222e-06, + "loss": 18.3901, + "step": 218 + }, + { + "epoch": 0.004003144021788802, + "grad_norm": 9.08474388291791, + "learning_rate": 1.3282559025133284e-06, + "loss": 18.4845, + "step": 219 + }, + { + "epoch": 0.004021423218235326, + "grad_norm": 9.937722849785393, + "learning_rate": 1.3343488194973344e-06, + "loss": 18.5381, + "step": 220 + }, + { + "epoch": 0.0040397024146818505, + "grad_norm": 9.6690554776204, + "learning_rate": 1.3404417364813406e-06, + "loss": 18.1501, + "step": 221 + }, + { + "epoch": 0.004057981611128375, + "grad_norm": 8.28729947943615, + "learning_rate": 1.3465346534653467e-06, + "loss": 18.0598, + "step": 222 + }, + { + "epoch": 0.004076260807574899, + "grad_norm": 9.286936139854614, + "learning_rate": 1.3526275704493527e-06, + "loss": 18.5233, + "step": 223 + }, + { + "epoch": 0.004094540004021424, + "grad_norm": 10.265707040677311, + "learning_rate": 1.3587204874333587e-06, + "loss": 18.3456, + "step": 224 + }, + { + "epoch": 0.004112819200467947, + "grad_norm": 9.052625229520075, + "learning_rate": 1.364813404417365e-06, + "loss": 18.1519, + "step": 225 + }, + { + "epoch": 0.0041310983969144715, + "grad_norm": 9.00678834172409, + "learning_rate": 1.370906321401371e-06, + "loss": 18.598, + "step": 226 + }, + { + "epoch": 0.004149377593360996, + "grad_norm": 9.73419082794281, + "learning_rate": 1.3769992383853772e-06, + "loss": 18.5196, + "step": 227 + }, + { + "epoch": 0.00416765678980752, + "grad_norm": 9.43062878041649, + "learning_rate": 1.3830921553693832e-06, + "loss": 18.437, + "step": 228 + }, + { + "epoch": 0.004185935986254045, + "grad_norm": 8.860809691215216, + "learning_rate": 1.3891850723533892e-06, + "loss": 18.2498, + "step": 229 + }, + { + "epoch": 0.004204215182700568, + "grad_norm": 8.461344696554631, + "learning_rate": 1.3952779893373956e-06, + "loss": 18.3432, + "step": 230 + }, + { + "epoch": 0.0042224943791470924, + "grad_norm": 8.923589818596337, + "learning_rate": 1.4013709063214015e-06, + "loss": 18.1395, + "step": 231 + }, + { + "epoch": 0.004240773575593617, + "grad_norm": 8.70195064898488, + "learning_rate": 1.4074638233054075e-06, + "loss": 18.0284, + "step": 232 + }, + { + "epoch": 0.004259052772040141, + "grad_norm": 9.260996824567075, + "learning_rate": 1.4135567402894137e-06, + "loss": 18.4668, + "step": 233 + }, + { + "epoch": 0.0042773319684866655, + "grad_norm": 9.055537513006174, + "learning_rate": 1.4196496572734197e-06, + "loss": 18.1067, + "step": 234 + }, + { + "epoch": 0.00429561116493319, + "grad_norm": 8.58620539390862, + "learning_rate": 1.4257425742574258e-06, + "loss": 17.9723, + "step": 235 + }, + { + "epoch": 0.004313890361379713, + "grad_norm": 9.219953464408006, + "learning_rate": 1.431835491241432e-06, + "loss": 18.6434, + "step": 236 + }, + { + "epoch": 0.004332169557826238, + "grad_norm": 9.00950759799232, + "learning_rate": 1.437928408225438e-06, + "loss": 18.3838, + "step": 237 + }, + { + "epoch": 0.004350448754272762, + "grad_norm": 8.951165357685648, + "learning_rate": 1.4440213252094442e-06, + "loss": 18.0981, + "step": 238 + }, + { + "epoch": 0.0043687279507192865, + "grad_norm": 10.090790708095982, + "learning_rate": 1.4501142421934503e-06, + "loss": 18.4739, + "step": 239 + }, + { + "epoch": 0.004387007147165811, + "grad_norm": 8.43510075285029, + "learning_rate": 1.4562071591774563e-06, + "loss": 18.1353, + "step": 240 + }, + { + "epoch": 0.004405286343612335, + "grad_norm": 8.575581867632986, + "learning_rate": 1.4623000761614625e-06, + "loss": 18.0785, + "step": 241 + }, + { + "epoch": 0.004423565540058859, + "grad_norm": 7.335566930392319, + "learning_rate": 1.4683929931454685e-06, + "loss": 17.9746, + "step": 242 + }, + { + "epoch": 0.004441844736505383, + "grad_norm": 8.967314116339633, + "learning_rate": 1.4744859101294744e-06, + "loss": 18.5792, + "step": 243 + }, + { + "epoch": 0.0044601239329519074, + "grad_norm": 9.172005768183263, + "learning_rate": 1.4805788271134808e-06, + "loss": 18.1535, + "step": 244 + }, + { + "epoch": 0.004478403129398432, + "grad_norm": 11.596045287093936, + "learning_rate": 1.4866717440974868e-06, + "loss": 19.2335, + "step": 245 + }, + { + "epoch": 0.004496682325844956, + "grad_norm": 9.252752460154074, + "learning_rate": 1.4927646610814928e-06, + "loss": 18.8617, + "step": 246 + }, + { + "epoch": 0.00451496152229148, + "grad_norm": 8.579697314813979, + "learning_rate": 1.498857578065499e-06, + "loss": 18.2312, + "step": 247 + }, + { + "epoch": 0.004533240718738004, + "grad_norm": 7.887835186000388, + "learning_rate": 1.504950495049505e-06, + "loss": 17.8583, + "step": 248 + }, + { + "epoch": 0.004551519915184528, + "grad_norm": 9.296133202137408, + "learning_rate": 1.5110434120335113e-06, + "loss": 18.5319, + "step": 249 + }, + { + "epoch": 0.004569799111631053, + "grad_norm": 9.58481341796332, + "learning_rate": 1.5171363290175173e-06, + "loss": 19.2434, + "step": 250 + }, + { + "epoch": 0.004588078308077577, + "grad_norm": 7.905919594260702, + "learning_rate": 1.5232292460015233e-06, + "loss": 18.1045, + "step": 251 + }, + { + "epoch": 0.0046063575045241015, + "grad_norm": 8.59126376514489, + "learning_rate": 1.5293221629855294e-06, + "loss": 18.3831, + "step": 252 + }, + { + "epoch": 0.004624636700970625, + "grad_norm": 8.375949981654848, + "learning_rate": 1.5354150799695356e-06, + "loss": 18.3196, + "step": 253 + }, + { + "epoch": 0.004642915897417149, + "grad_norm": 8.653570175094352, + "learning_rate": 1.5415079969535416e-06, + "loss": 18.6534, + "step": 254 + }, + { + "epoch": 0.004661195093863674, + "grad_norm": 8.793798043219379, + "learning_rate": 1.5476009139375478e-06, + "loss": 18.3333, + "step": 255 + }, + { + "epoch": 0.004679474290310198, + "grad_norm": 9.311086736443427, + "learning_rate": 1.5536938309215537e-06, + "loss": 18.7836, + "step": 256 + }, + { + "epoch": 0.004697753486756722, + "grad_norm": 9.503701462288985, + "learning_rate": 1.5597867479055597e-06, + "loss": 18.6113, + "step": 257 + }, + { + "epoch": 0.004716032683203247, + "grad_norm": 9.64408701783925, + "learning_rate": 1.5658796648895661e-06, + "loss": 18.3561, + "step": 258 + }, + { + "epoch": 0.00473431187964977, + "grad_norm": 7.412750251028354, + "learning_rate": 1.571972581873572e-06, + "loss": 17.6636, + "step": 259 + }, + { + "epoch": 0.004752591076096295, + "grad_norm": 8.565521370367357, + "learning_rate": 1.5780654988575783e-06, + "loss": 18.2246, + "step": 260 + }, + { + "epoch": 0.004770870272542819, + "grad_norm": 9.021155353544811, + "learning_rate": 1.5841584158415842e-06, + "loss": 18.8278, + "step": 261 + }, + { + "epoch": 0.004789149468989343, + "grad_norm": 8.911016454755051, + "learning_rate": 1.5902513328255902e-06, + "loss": 17.9716, + "step": 262 + }, + { + "epoch": 0.004807428665435868, + "grad_norm": 9.753553688000444, + "learning_rate": 1.5963442498095966e-06, + "loss": 18.6498, + "step": 263 + }, + { + "epoch": 0.004825707861882391, + "grad_norm": 9.410473633134915, + "learning_rate": 1.6024371667936026e-06, + "loss": 18.7673, + "step": 264 + }, + { + "epoch": 0.004843987058328916, + "grad_norm": 8.841939654519999, + "learning_rate": 1.6085300837776085e-06, + "loss": 18.7812, + "step": 265 + }, + { + "epoch": 0.00486226625477544, + "grad_norm": 10.138007808406874, + "learning_rate": 1.6146230007616147e-06, + "loss": 18.2232, + "step": 266 + }, + { + "epoch": 0.004880545451221964, + "grad_norm": 7.882651548223116, + "learning_rate": 1.620715917745621e-06, + "loss": 18.0703, + "step": 267 + }, + { + "epoch": 0.004898824647668489, + "grad_norm": 7.30363490583701, + "learning_rate": 1.6268088347296269e-06, + "loss": 17.7259, + "step": 268 + }, + { + "epoch": 0.004917103844115013, + "grad_norm": 7.617273837055213, + "learning_rate": 1.632901751713633e-06, + "loss": 18.2572, + "step": 269 + }, + { + "epoch": 0.0049353830405615366, + "grad_norm": 9.61420946928789, + "learning_rate": 1.638994668697639e-06, + "loss": 18.7611, + "step": 270 + }, + { + "epoch": 0.004953662237008061, + "grad_norm": 9.261819370122486, + "learning_rate": 1.6450875856816454e-06, + "loss": 18.2477, + "step": 271 + }, + { + "epoch": 0.004971941433454585, + "grad_norm": 8.86145477554711, + "learning_rate": 1.6511805026656514e-06, + "loss": 18.3001, + "step": 272 + }, + { + "epoch": 0.00499022062990111, + "grad_norm": 10.384890389543454, + "learning_rate": 1.6572734196496574e-06, + "loss": 19.3691, + "step": 273 + }, + { + "epoch": 0.005008499826347634, + "grad_norm": 8.430726901053445, + "learning_rate": 1.6633663366336635e-06, + "loss": 18.0541, + "step": 274 + }, + { + "epoch": 0.005026779022794158, + "grad_norm": 8.983040809310916, + "learning_rate": 1.6694592536176695e-06, + "loss": 18.1607, + "step": 275 + }, + { + "epoch": 0.005045058219240682, + "grad_norm": 8.829373292406805, + "learning_rate": 1.6755521706016755e-06, + "loss": 19.0493, + "step": 276 + }, + { + "epoch": 0.005063337415687206, + "grad_norm": 8.995825308388472, + "learning_rate": 1.6816450875856819e-06, + "loss": 18.2772, + "step": 277 + }, + { + "epoch": 0.005081616612133731, + "grad_norm": 9.21103924369425, + "learning_rate": 1.6877380045696878e-06, + "loss": 18.4878, + "step": 278 + }, + { + "epoch": 0.005099895808580255, + "grad_norm": 8.451162184646366, + "learning_rate": 1.693830921553694e-06, + "loss": 18.3464, + "step": 279 + }, + { + "epoch": 0.005118175005026779, + "grad_norm": 11.161474200142381, + "learning_rate": 1.6999238385377e-06, + "loss": 18.4198, + "step": 280 + }, + { + "epoch": 0.005136454201473303, + "grad_norm": 9.183530687167856, + "learning_rate": 1.7060167555217062e-06, + "loss": 18.1488, + "step": 281 + }, + { + "epoch": 0.005154733397919827, + "grad_norm": 8.647176482887394, + "learning_rate": 1.7121096725057124e-06, + "loss": 18.1579, + "step": 282 + }, + { + "epoch": 0.0051730125943663515, + "grad_norm": 8.517489610499862, + "learning_rate": 1.7182025894897183e-06, + "loss": 18.3083, + "step": 283 + }, + { + "epoch": 0.005191291790812876, + "grad_norm": 8.675364535322364, + "learning_rate": 1.7242955064737243e-06, + "loss": 18.0499, + "step": 284 + }, + { + "epoch": 0.0052095709872594, + "grad_norm": 7.709633922459531, + "learning_rate": 1.7303884234577307e-06, + "loss": 17.971, + "step": 285 + }, + { + "epoch": 0.005227850183705925, + "grad_norm": 7.786420777908942, + "learning_rate": 1.7364813404417367e-06, + "loss": 17.9058, + "step": 286 + }, + { + "epoch": 0.005246129380152448, + "grad_norm": 9.372489454130031, + "learning_rate": 1.7425742574257426e-06, + "loss": 18.5079, + "step": 287 + }, + { + "epoch": 0.0052644085765989725, + "grad_norm": 8.644590798969862, + "learning_rate": 1.7486671744097488e-06, + "loss": 18.163, + "step": 288 + }, + { + "epoch": 0.005282687773045497, + "grad_norm": 7.555436323653494, + "learning_rate": 1.7547600913937548e-06, + "loss": 17.7009, + "step": 289 + }, + { + "epoch": 0.005300966969492021, + "grad_norm": 9.88718105471376, + "learning_rate": 1.7608530083777612e-06, + "loss": 18.5575, + "step": 290 + }, + { + "epoch": 0.005319246165938546, + "grad_norm": 9.61361164826843, + "learning_rate": 1.7669459253617672e-06, + "loss": 18.4487, + "step": 291 + }, + { + "epoch": 0.00533752536238507, + "grad_norm": 9.052047294027663, + "learning_rate": 1.7730388423457731e-06, + "loss": 18.5464, + "step": 292 + }, + { + "epoch": 0.0053558045588315934, + "grad_norm": 8.356682986131199, + "learning_rate": 1.7791317593297793e-06, + "loss": 18.055, + "step": 293 + }, + { + "epoch": 0.005374083755278118, + "grad_norm": 8.852621554464067, + "learning_rate": 1.7852246763137853e-06, + "loss": 18.463, + "step": 294 + }, + { + "epoch": 0.005392362951724642, + "grad_norm": 8.885811095965344, + "learning_rate": 1.7913175932977915e-06, + "loss": 18.1471, + "step": 295 + }, + { + "epoch": 0.0054106421481711665, + "grad_norm": 8.926621838046268, + "learning_rate": 1.7974105102817976e-06, + "loss": 18.4295, + "step": 296 + }, + { + "epoch": 0.005428921344617691, + "grad_norm": 8.210608181416873, + "learning_rate": 1.8035034272658036e-06, + "loss": 18.4575, + "step": 297 + }, + { + "epoch": 0.005447200541064214, + "grad_norm": 9.173397538987054, + "learning_rate": 1.8095963442498096e-06, + "loss": 18.2811, + "step": 298 + }, + { + "epoch": 0.005465479737510739, + "grad_norm": 8.352517539856125, + "learning_rate": 1.815689261233816e-06, + "loss": 17.9896, + "step": 299 + }, + { + "epoch": 0.005483758933957263, + "grad_norm": 9.58651656979235, + "learning_rate": 1.821782178217822e-06, + "loss": 18.5783, + "step": 300 + }, + { + "epoch": 0.0055020381304037875, + "grad_norm": 9.415882626435163, + "learning_rate": 1.8278750952018281e-06, + "loss": 18.2743, + "step": 301 + }, + { + "epoch": 0.005520317326850312, + "grad_norm": 9.350881387259488, + "learning_rate": 1.833968012185834e-06, + "loss": 18.5922, + "step": 302 + }, + { + "epoch": 0.005538596523296836, + "grad_norm": 9.428154148379228, + "learning_rate": 1.84006092916984e-06, + "loss": 18.6393, + "step": 303 + }, + { + "epoch": 0.00555687571974336, + "grad_norm": 8.481958715323854, + "learning_rate": 1.8461538461538465e-06, + "loss": 18.031, + "step": 304 + }, + { + "epoch": 0.005575154916189884, + "grad_norm": 8.183490386485298, + "learning_rate": 1.8522467631378524e-06, + "loss": 18.0689, + "step": 305 + }, + { + "epoch": 0.0055934341126364084, + "grad_norm": 8.527268923870656, + "learning_rate": 1.8583396801218584e-06, + "loss": 18.4661, + "step": 306 + }, + { + "epoch": 0.005611713309082933, + "grad_norm": 7.812079110964028, + "learning_rate": 1.8644325971058646e-06, + "loss": 17.8002, + "step": 307 + }, + { + "epoch": 0.005629992505529457, + "grad_norm": 8.41008868031268, + "learning_rate": 1.8705255140898706e-06, + "loss": 18.105, + "step": 308 + }, + { + "epoch": 0.0056482717019759815, + "grad_norm": 8.814591940500186, + "learning_rate": 1.8766184310738767e-06, + "loss": 18.508, + "step": 309 + }, + { + "epoch": 0.005666550898422505, + "grad_norm": 9.81463607128983, + "learning_rate": 1.882711348057883e-06, + "loss": 18.5906, + "step": 310 + }, + { + "epoch": 0.005684830094869029, + "grad_norm": 9.259484237090978, + "learning_rate": 1.8888042650418889e-06, + "loss": 18.2118, + "step": 311 + }, + { + "epoch": 0.005703109291315554, + "grad_norm": 8.229928076135344, + "learning_rate": 1.8948971820258953e-06, + "loss": 17.8685, + "step": 312 + }, + { + "epoch": 0.005721388487762078, + "grad_norm": 9.328445386502587, + "learning_rate": 1.9009900990099013e-06, + "loss": 18.5938, + "step": 313 + }, + { + "epoch": 0.0057396676842086025, + "grad_norm": 10.30968060318531, + "learning_rate": 1.9070830159939072e-06, + "loss": 19.0348, + "step": 314 + }, + { + "epoch": 0.005757946880655126, + "grad_norm": 9.588842801909218, + "learning_rate": 1.9131759329779136e-06, + "loss": 18.6145, + "step": 315 + }, + { + "epoch": 0.00577622607710165, + "grad_norm": 9.160642251033686, + "learning_rate": 1.9192688499619194e-06, + "loss": 18.4347, + "step": 316 + }, + { + "epoch": 0.005794505273548175, + "grad_norm": 9.510478117148674, + "learning_rate": 1.9253617669459256e-06, + "loss": 18.4841, + "step": 317 + }, + { + "epoch": 0.005812784469994699, + "grad_norm": 7.720316738121152, + "learning_rate": 1.9314546839299317e-06, + "loss": 17.8745, + "step": 318 + }, + { + "epoch": 0.005831063666441223, + "grad_norm": 8.380398366818962, + "learning_rate": 1.9375476009139375e-06, + "loss": 18.0486, + "step": 319 + }, + { + "epoch": 0.005849342862887748, + "grad_norm": 9.883905650411798, + "learning_rate": 1.9436405178979437e-06, + "loss": 18.8674, + "step": 320 + }, + { + "epoch": 0.005867622059334271, + "grad_norm": 8.708964463668561, + "learning_rate": 1.94973343488195e-06, + "loss": 18.4288, + "step": 321 + }, + { + "epoch": 0.005885901255780796, + "grad_norm": 9.297137906751821, + "learning_rate": 1.955826351865956e-06, + "loss": 19.0322, + "step": 322 + }, + { + "epoch": 0.00590418045222732, + "grad_norm": 8.618946541704723, + "learning_rate": 1.9619192688499622e-06, + "loss": 18.214, + "step": 323 + }, + { + "epoch": 0.005922459648673844, + "grad_norm": 8.060235940220567, + "learning_rate": 1.968012185833968e-06, + "loss": 17.9303, + "step": 324 + }, + { + "epoch": 0.005940738845120369, + "grad_norm": 7.665853382725324, + "learning_rate": 1.974105102817974e-06, + "loss": 17.8826, + "step": 325 + }, + { + "epoch": 0.005959018041566893, + "grad_norm": 8.121066064855041, + "learning_rate": 1.9801980198019803e-06, + "loss": 18.1789, + "step": 326 + }, + { + "epoch": 0.005977297238013417, + "grad_norm": 8.541733313908619, + "learning_rate": 1.9862909367859865e-06, + "loss": 18.6535, + "step": 327 + }, + { + "epoch": 0.005995576434459941, + "grad_norm": 8.531476217309224, + "learning_rate": 1.9923838537699923e-06, + "loss": 18.0689, + "step": 328 + }, + { + "epoch": 0.006013855630906465, + "grad_norm": 9.387532432564992, + "learning_rate": 1.998476770753999e-06, + "loss": 18.4685, + "step": 329 + }, + { + "epoch": 0.00603213482735299, + "grad_norm": 8.560597959419821, + "learning_rate": 2.0045696877380047e-06, + "loss": 18.1564, + "step": 330 + }, + { + "epoch": 0.006050414023799514, + "grad_norm": 8.543531262561194, + "learning_rate": 2.010662604722011e-06, + "loss": 18.0159, + "step": 331 + }, + { + "epoch": 0.0060686932202460376, + "grad_norm": 9.089859495341674, + "learning_rate": 2.016755521706017e-06, + "loss": 18.4976, + "step": 332 + }, + { + "epoch": 0.006086972416692562, + "grad_norm": 7.892896022706285, + "learning_rate": 2.0228484386900228e-06, + "loss": 18.1215, + "step": 333 + }, + { + "epoch": 0.006105251613139086, + "grad_norm": 9.459962297194554, + "learning_rate": 2.0289413556740294e-06, + "loss": 18.4911, + "step": 334 + }, + { + "epoch": 0.006123530809585611, + "grad_norm": 9.975390120935543, + "learning_rate": 2.035034272658035e-06, + "loss": 18.3436, + "step": 335 + }, + { + "epoch": 0.006141810006032135, + "grad_norm": 9.206433781385257, + "learning_rate": 2.0411271896420413e-06, + "loss": 18.6202, + "step": 336 + }, + { + "epoch": 0.006160089202478659, + "grad_norm": 9.225056037073902, + "learning_rate": 2.0472201066260475e-06, + "loss": 18.5441, + "step": 337 + }, + { + "epoch": 0.006178368398925183, + "grad_norm": 8.79411752064298, + "learning_rate": 2.0533130236100533e-06, + "loss": 18.2518, + "step": 338 + }, + { + "epoch": 0.006196647595371707, + "grad_norm": 7.682661005049543, + "learning_rate": 2.0594059405940594e-06, + "loss": 17.9822, + "step": 339 + }, + { + "epoch": 0.006214926791818232, + "grad_norm": 8.617648520128705, + "learning_rate": 2.0654988575780656e-06, + "loss": 18.1993, + "step": 340 + }, + { + "epoch": 0.006233205988264756, + "grad_norm": 9.284518303747348, + "learning_rate": 2.071591774562072e-06, + "loss": 18.3468, + "step": 341 + }, + { + "epoch": 0.00625148518471128, + "grad_norm": 9.146487062937387, + "learning_rate": 2.077684691546078e-06, + "loss": 18.6132, + "step": 342 + }, + { + "epoch": 0.006269764381157805, + "grad_norm": 8.55358864306958, + "learning_rate": 2.083777608530084e-06, + "loss": 18.2002, + "step": 343 + }, + { + "epoch": 0.006288043577604328, + "grad_norm": 10.309465924549984, + "learning_rate": 2.08987052551409e-06, + "loss": 18.7386, + "step": 344 + }, + { + "epoch": 0.0063063227740508526, + "grad_norm": 8.050315978102859, + "learning_rate": 2.095963442498096e-06, + "loss": 18.0105, + "step": 345 + }, + { + "epoch": 0.006324601970497377, + "grad_norm": 9.290651889149846, + "learning_rate": 2.1020563594821023e-06, + "loss": 18.4974, + "step": 346 + }, + { + "epoch": 0.006342881166943901, + "grad_norm": 8.017310403022595, + "learning_rate": 2.108149276466108e-06, + "loss": 18.0765, + "step": 347 + }, + { + "epoch": 0.006361160363390426, + "grad_norm": 8.43604073342653, + "learning_rate": 2.1142421934501147e-06, + "loss": 18.2749, + "step": 348 + }, + { + "epoch": 0.006379439559836949, + "grad_norm": 8.412960841835288, + "learning_rate": 2.1203351104341204e-06, + "loss": 18.1562, + "step": 349 + }, + { + "epoch": 0.0063977187562834735, + "grad_norm": 8.57195602786084, + "learning_rate": 2.1264280274181266e-06, + "loss": 18.2799, + "step": 350 + }, + { + "epoch": 0.006415997952729998, + "grad_norm": 7.794052012116019, + "learning_rate": 2.1325209444021328e-06, + "loss": 17.8709, + "step": 351 + }, + { + "epoch": 0.006434277149176522, + "grad_norm": 9.982820168942267, + "learning_rate": 2.1386138613861385e-06, + "loss": 18.6899, + "step": 352 + }, + { + "epoch": 0.006452556345623047, + "grad_norm": 10.415493642023899, + "learning_rate": 2.144706778370145e-06, + "loss": 18.9632, + "step": 353 + }, + { + "epoch": 0.006470835542069571, + "grad_norm": 9.477248548554387, + "learning_rate": 2.150799695354151e-06, + "loss": 18.648, + "step": 354 + }, + { + "epoch": 0.0064891147385160945, + "grad_norm": 9.470811841953198, + "learning_rate": 2.156892612338157e-06, + "loss": 18.5502, + "step": 355 + }, + { + "epoch": 0.006507393934962619, + "grad_norm": 8.463485062752705, + "learning_rate": 2.1629855293221633e-06, + "loss": 18.2422, + "step": 356 + }, + { + "epoch": 0.006525673131409143, + "grad_norm": 8.813364649737569, + "learning_rate": 2.1690784463061694e-06, + "loss": 18.5201, + "step": 357 + }, + { + "epoch": 0.0065439523278556675, + "grad_norm": 9.680619522560455, + "learning_rate": 2.175171363290175e-06, + "loss": 18.7938, + "step": 358 + }, + { + "epoch": 0.006562231524302192, + "grad_norm": 7.425556851346463, + "learning_rate": 2.1812642802741814e-06, + "loss": 17.9377, + "step": 359 + }, + { + "epoch": 0.006580510720748716, + "grad_norm": 9.254648716313772, + "learning_rate": 2.1873571972581876e-06, + "loss": 19.1159, + "step": 360 + }, + { + "epoch": 0.00659878991719524, + "grad_norm": 8.771527877599317, + "learning_rate": 2.1934501142421933e-06, + "loss": 18.3495, + "step": 361 + }, + { + "epoch": 0.006617069113641764, + "grad_norm": 8.411301969877785, + "learning_rate": 2.1995430312262e-06, + "loss": 18.1057, + "step": 362 + }, + { + "epoch": 0.0066353483100882885, + "grad_norm": 8.639194963524913, + "learning_rate": 2.2056359482102057e-06, + "loss": 18.071, + "step": 363 + }, + { + "epoch": 0.006653627506534813, + "grad_norm": 8.385867594796746, + "learning_rate": 2.211728865194212e-06, + "loss": 18.3967, + "step": 364 + }, + { + "epoch": 0.006671906702981337, + "grad_norm": 7.846216957233574, + "learning_rate": 2.217821782178218e-06, + "loss": 17.9985, + "step": 365 + }, + { + "epoch": 0.006690185899427861, + "grad_norm": 9.3430447459956, + "learning_rate": 2.223914699162224e-06, + "loss": 18.239, + "step": 366 + }, + { + "epoch": 0.006708465095874385, + "grad_norm": 8.99258309655721, + "learning_rate": 2.2300076161462304e-06, + "loss": 18.3144, + "step": 367 + }, + { + "epoch": 0.0067267442923209094, + "grad_norm": 8.561083662711514, + "learning_rate": 2.236100533130236e-06, + "loss": 18.0658, + "step": 368 + }, + { + "epoch": 0.006745023488767434, + "grad_norm": 8.278398615614147, + "learning_rate": 2.2421934501142424e-06, + "loss": 18.2668, + "step": 369 + }, + { + "epoch": 0.006763302685213958, + "grad_norm": 9.086882339107945, + "learning_rate": 2.2482863670982485e-06, + "loss": 18.5646, + "step": 370 + }, + { + "epoch": 0.0067815818816604825, + "grad_norm": 8.594250049493938, + "learning_rate": 2.2543792840822547e-06, + "loss": 18.2993, + "step": 371 + }, + { + "epoch": 0.006799861078107006, + "grad_norm": 7.963099594471555, + "learning_rate": 2.2604722010662605e-06, + "loss": 17.8993, + "step": 372 + }, + { + "epoch": 0.00681814027455353, + "grad_norm": 8.712076673097176, + "learning_rate": 2.2665651180502667e-06, + "loss": 18.2929, + "step": 373 + }, + { + "epoch": 0.006836419471000055, + "grad_norm": 8.612392279685967, + "learning_rate": 2.272658035034273e-06, + "loss": 18.394, + "step": 374 + }, + { + "epoch": 0.006854698667446579, + "grad_norm": 7.550688028009268, + "learning_rate": 2.278750952018279e-06, + "loss": 17.8218, + "step": 375 + }, + { + "epoch": 0.0068729778638931035, + "grad_norm": 8.913047191507939, + "learning_rate": 2.284843869002285e-06, + "loss": 18.3896, + "step": 376 + }, + { + "epoch": 0.006891257060339628, + "grad_norm": 8.22715602929957, + "learning_rate": 2.290936785986291e-06, + "loss": 17.933, + "step": 377 + }, + { + "epoch": 0.006909536256786151, + "grad_norm": 7.2560412530408795, + "learning_rate": 2.297029702970297e-06, + "loss": 17.7516, + "step": 378 + }, + { + "epoch": 0.006927815453232676, + "grad_norm": 8.175683719428385, + "learning_rate": 2.3031226199543033e-06, + "loss": 17.8862, + "step": 379 + }, + { + "epoch": 0.0069460946496792, + "grad_norm": 9.035425554274873, + "learning_rate": 2.309215536938309e-06, + "loss": 18.2054, + "step": 380 + }, + { + "epoch": 0.0069643738461257244, + "grad_norm": 7.955366223753293, + "learning_rate": 2.3153084539223157e-06, + "loss": 17.9941, + "step": 381 + }, + { + "epoch": 0.006982653042572249, + "grad_norm": 8.114741230412864, + "learning_rate": 2.3214013709063215e-06, + "loss": 18.2079, + "step": 382 + }, + { + "epoch": 0.007000932239018773, + "grad_norm": 9.849098947167999, + "learning_rate": 2.3274942878903276e-06, + "loss": 18.8764, + "step": 383 + }, + { + "epoch": 0.007019211435465297, + "grad_norm": 8.816246921630963, + "learning_rate": 2.333587204874334e-06, + "loss": 18.23, + "step": 384 + }, + { + "epoch": 0.007037490631911821, + "grad_norm": 8.478770203163386, + "learning_rate": 2.33968012185834e-06, + "loss": 18.0982, + "step": 385 + }, + { + "epoch": 0.007055769828358345, + "grad_norm": 8.73342392337039, + "learning_rate": 2.345773038842346e-06, + "loss": 18.2808, + "step": 386 + }, + { + "epoch": 0.00707404902480487, + "grad_norm": 8.551873904980154, + "learning_rate": 2.351865955826352e-06, + "loss": 18.4912, + "step": 387 + }, + { + "epoch": 0.007092328221251394, + "grad_norm": 7.85190380133635, + "learning_rate": 2.357958872810358e-06, + "loss": 18.053, + "step": 388 + }, + { + "epoch": 0.007110607417697918, + "grad_norm": 7.448370159944098, + "learning_rate": 2.3640517897943643e-06, + "loss": 17.6652, + "step": 389 + }, + { + "epoch": 0.007128886614144442, + "grad_norm": 8.734311497128099, + "learning_rate": 2.3701447067783705e-06, + "loss": 18.2995, + "step": 390 + }, + { + "epoch": 0.007147165810590966, + "grad_norm": 9.316955074087764, + "learning_rate": 2.3762376237623762e-06, + "loss": 19.1144, + "step": 391 + }, + { + "epoch": 0.007165445007037491, + "grad_norm": 7.268656975869662, + "learning_rate": 2.3823305407463824e-06, + "loss": 17.8357, + "step": 392 + }, + { + "epoch": 0.007183724203484015, + "grad_norm": 8.476905541872776, + "learning_rate": 2.3884234577303886e-06, + "loss": 17.9871, + "step": 393 + }, + { + "epoch": 0.007202003399930539, + "grad_norm": 9.06955839869183, + "learning_rate": 2.3945163747143944e-06, + "loss": 18.0771, + "step": 394 + }, + { + "epoch": 0.007220282596377063, + "grad_norm": 9.233554569688772, + "learning_rate": 2.400609291698401e-06, + "loss": 18.3878, + "step": 395 + }, + { + "epoch": 0.007238561792823587, + "grad_norm": 9.262697112074246, + "learning_rate": 2.4067022086824067e-06, + "loss": 18.0157, + "step": 396 + }, + { + "epoch": 0.007256840989270112, + "grad_norm": 8.806520720371429, + "learning_rate": 2.412795125666413e-06, + "loss": 18.2733, + "step": 397 + }, + { + "epoch": 0.007275120185716636, + "grad_norm": 9.965781203723397, + "learning_rate": 2.418888042650419e-06, + "loss": 18.6585, + "step": 398 + }, + { + "epoch": 0.00729339938216316, + "grad_norm": 7.6022212125416475, + "learning_rate": 2.4249809596344253e-06, + "loss": 17.5579, + "step": 399 + }, + { + "epoch": 0.007311678578609685, + "grad_norm": 8.688091981163637, + "learning_rate": 2.4310738766184315e-06, + "loss": 18.3117, + "step": 400 + }, + { + "epoch": 0.007329957775056208, + "grad_norm": 10.294080040154196, + "learning_rate": 2.4371667936024372e-06, + "loss": 18.8181, + "step": 401 + }, + { + "epoch": 0.007348236971502733, + "grad_norm": 8.829931437470595, + "learning_rate": 2.4432597105864434e-06, + "loss": 18.1261, + "step": 402 + }, + { + "epoch": 0.007366516167949257, + "grad_norm": 8.191147431313302, + "learning_rate": 2.4493526275704496e-06, + "loss": 18.0081, + "step": 403 + }, + { + "epoch": 0.007384795364395781, + "grad_norm": 7.470300626118907, + "learning_rate": 2.4554455445544558e-06, + "loss": 17.5224, + "step": 404 + }, + { + "epoch": 0.007403074560842306, + "grad_norm": 9.007826178671621, + "learning_rate": 2.461538461538462e-06, + "loss": 18.4673, + "step": 405 + }, + { + "epoch": 0.007421353757288829, + "grad_norm": 7.5259895720074335, + "learning_rate": 2.4676313785224677e-06, + "loss": 18.0889, + "step": 406 + }, + { + "epoch": 0.0074396329537353536, + "grad_norm": 8.50269065120894, + "learning_rate": 2.473724295506474e-06, + "loss": 18.4111, + "step": 407 + }, + { + "epoch": 0.007457912150181878, + "grad_norm": 10.621098192229644, + "learning_rate": 2.47981721249048e-06, + "loss": 18.6, + "step": 408 + }, + { + "epoch": 0.007476191346628402, + "grad_norm": 7.89152843253615, + "learning_rate": 2.4859101294744863e-06, + "loss": 17.7234, + "step": 409 + }, + { + "epoch": 0.007494470543074927, + "grad_norm": 7.698598794778496, + "learning_rate": 2.492003046458492e-06, + "loss": 17.9908, + "step": 410 + }, + { + "epoch": 0.007512749739521451, + "grad_norm": 8.585157258990362, + "learning_rate": 2.498095963442498e-06, + "loss": 18.3271, + "step": 411 + }, + { + "epoch": 0.0075310289359679745, + "grad_norm": 8.91463773765338, + "learning_rate": 2.504188880426505e-06, + "loss": 18.1335, + "step": 412 + }, + { + "epoch": 0.007549308132414499, + "grad_norm": 7.890245109336917, + "learning_rate": 2.5102817974105106e-06, + "loss": 17.9491, + "step": 413 + }, + { + "epoch": 0.007567587328861023, + "grad_norm": 9.457275052109484, + "learning_rate": 2.5163747143945167e-06, + "loss": 18.5356, + "step": 414 + }, + { + "epoch": 0.007585866525307548, + "grad_norm": 8.431178749153313, + "learning_rate": 2.522467631378523e-06, + "loss": 18.3072, + "step": 415 + }, + { + "epoch": 0.007604145721754072, + "grad_norm": 7.7381910924238175, + "learning_rate": 2.5285605483625287e-06, + "loss": 17.778, + "step": 416 + }, + { + "epoch": 0.007622424918200596, + "grad_norm": 8.475808326620589, + "learning_rate": 2.534653465346535e-06, + "loss": 17.9745, + "step": 417 + }, + { + "epoch": 0.00764070411464712, + "grad_norm": 11.878356849600886, + "learning_rate": 2.540746382330541e-06, + "loss": 19.1678, + "step": 418 + }, + { + "epoch": 0.007658983311093644, + "grad_norm": 8.731099408033131, + "learning_rate": 2.546839299314547e-06, + "loss": 18.3064, + "step": 419 + }, + { + "epoch": 0.0076772625075401685, + "grad_norm": 9.373694875302448, + "learning_rate": 2.552932216298553e-06, + "loss": 18.4102, + "step": 420 + }, + { + "epoch": 0.007695541703986693, + "grad_norm": 9.479865504833226, + "learning_rate": 2.5590251332825596e-06, + "loss": 18.679, + "step": 421 + }, + { + "epoch": 0.007713820900433217, + "grad_norm": 9.656544881683654, + "learning_rate": 2.565118050266565e-06, + "loss": 18.8041, + "step": 422 + }, + { + "epoch": 0.007732100096879741, + "grad_norm": 8.426811539874398, + "learning_rate": 2.5712109672505715e-06, + "loss": 18.1999, + "step": 423 + }, + { + "epoch": 0.007750379293326265, + "grad_norm": 7.631132413151855, + "learning_rate": 2.5773038842345777e-06, + "loss": 17.8451, + "step": 424 + }, + { + "epoch": 0.0077686584897727895, + "grad_norm": 9.967810100187242, + "learning_rate": 2.5833968012185835e-06, + "loss": 18.3001, + "step": 425 + }, + { + "epoch": 0.007786937686219314, + "grad_norm": 10.34311365776934, + "learning_rate": 2.5894897182025897e-06, + "loss": 18.9942, + "step": 426 + }, + { + "epoch": 0.007805216882665838, + "grad_norm": 8.436342406167185, + "learning_rate": 2.595582635186596e-06, + "loss": 18.5091, + "step": 427 + }, + { + "epoch": 0.007823496079112362, + "grad_norm": 8.084051116156678, + "learning_rate": 2.6016755521706016e-06, + "loss": 18.2411, + "step": 428 + }, + { + "epoch": 0.007841775275558887, + "grad_norm": 9.286977568279523, + "learning_rate": 2.6077684691546078e-06, + "loss": 18.3457, + "step": 429 + }, + { + "epoch": 0.00786005447200541, + "grad_norm": 7.550128803367409, + "learning_rate": 2.6138613861386144e-06, + "loss": 17.8468, + "step": 430 + }, + { + "epoch": 0.007878333668451936, + "grad_norm": 9.730459473486697, + "learning_rate": 2.6199543031226206e-06, + "loss": 18.5781, + "step": 431 + }, + { + "epoch": 0.00789661286489846, + "grad_norm": 9.466978400057728, + "learning_rate": 2.6260472201066263e-06, + "loss": 18.767, + "step": 432 + }, + { + "epoch": 0.007914892061344983, + "grad_norm": 8.812812776275303, + "learning_rate": 2.6321401370906325e-06, + "loss": 18.279, + "step": 433 + }, + { + "epoch": 0.007933171257791508, + "grad_norm": 9.176005654446904, + "learning_rate": 2.6382330540746387e-06, + "loss": 18.8576, + "step": 434 + }, + { + "epoch": 0.007951450454238031, + "grad_norm": 8.825127776543228, + "learning_rate": 2.6443259710586444e-06, + "loss": 18.334, + "step": 435 + }, + { + "epoch": 0.007969729650684557, + "grad_norm": 8.78079113285075, + "learning_rate": 2.6504188880426506e-06, + "loss": 17.8408, + "step": 436 + }, + { + "epoch": 0.00798800884713108, + "grad_norm": 8.891443387231634, + "learning_rate": 2.656511805026657e-06, + "loss": 18.2782, + "step": 437 + }, + { + "epoch": 0.008006288043577604, + "grad_norm": 8.64136165084598, + "learning_rate": 2.6626047220106626e-06, + "loss": 18.2856, + "step": 438 + }, + { + "epoch": 0.008024567240024129, + "grad_norm": 11.422630324831495, + "learning_rate": 2.6686976389946687e-06, + "loss": 19.0644, + "step": 439 + }, + { + "epoch": 0.008042846436470652, + "grad_norm": 8.816431365246043, + "learning_rate": 2.6747905559786754e-06, + "loss": 18.2401, + "step": 440 + }, + { + "epoch": 0.008061125632917178, + "grad_norm": 8.278936924735184, + "learning_rate": 2.680883472962681e-06, + "loss": 17.7142, + "step": 441 + }, + { + "epoch": 0.008079404829363701, + "grad_norm": 9.882950868408622, + "learning_rate": 2.6869763899466873e-06, + "loss": 18.6747, + "step": 442 + }, + { + "epoch": 0.008097684025810225, + "grad_norm": 8.455266639438943, + "learning_rate": 2.6930693069306935e-06, + "loss": 18.5501, + "step": 443 + }, + { + "epoch": 0.00811596322225675, + "grad_norm": 10.369712253033393, + "learning_rate": 2.6991622239146992e-06, + "loss": 18.9826, + "step": 444 + }, + { + "epoch": 0.008134242418703273, + "grad_norm": 7.588267951842359, + "learning_rate": 2.7052551408987054e-06, + "loss": 17.7885, + "step": 445 + }, + { + "epoch": 0.008152521615149799, + "grad_norm": 7.933435039710536, + "learning_rate": 2.7113480578827116e-06, + "loss": 17.9875, + "step": 446 + }, + { + "epoch": 0.008170800811596322, + "grad_norm": 9.844989948105257, + "learning_rate": 2.7174409748667174e-06, + "loss": 18.874, + "step": 447 + }, + { + "epoch": 0.008189080008042847, + "grad_norm": 7.816887348231905, + "learning_rate": 2.7235338918507235e-06, + "loss": 17.8547, + "step": 448 + }, + { + "epoch": 0.00820735920448937, + "grad_norm": 8.319887922416477, + "learning_rate": 2.72962680883473e-06, + "loss": 18.1645, + "step": 449 + }, + { + "epoch": 0.008225638400935894, + "grad_norm": 8.794004300642134, + "learning_rate": 2.7357197258187355e-06, + "loss": 18.6253, + "step": 450 + }, + { + "epoch": 0.00824391759738242, + "grad_norm": 8.052292213418909, + "learning_rate": 2.741812642802742e-06, + "loss": 17.8093, + "step": 451 + }, + { + "epoch": 0.008262196793828943, + "grad_norm": 9.269781357392727, + "learning_rate": 2.7479055597867483e-06, + "loss": 18.4396, + "step": 452 + }, + { + "epoch": 0.008280475990275468, + "grad_norm": 8.652844062265205, + "learning_rate": 2.7539984767707544e-06, + "loss": 18.473, + "step": 453 + }, + { + "epoch": 0.008298755186721992, + "grad_norm": 7.383249069054072, + "learning_rate": 2.76009139375476e-06, + "loss": 17.7205, + "step": 454 + }, + { + "epoch": 0.008317034383168515, + "grad_norm": 8.18718452206247, + "learning_rate": 2.7661843107387664e-06, + "loss": 17.7939, + "step": 455 + }, + { + "epoch": 0.00833531357961504, + "grad_norm": 10.176755743735992, + "learning_rate": 2.7722772277227726e-06, + "loss": 18.5029, + "step": 456 + }, + { + "epoch": 0.008353592776061564, + "grad_norm": 9.711682630724553, + "learning_rate": 2.7783701447067783e-06, + "loss": 18.4737, + "step": 457 + }, + { + "epoch": 0.00837187197250809, + "grad_norm": 9.317443047461476, + "learning_rate": 2.784463061690785e-06, + "loss": 18.1023, + "step": 458 + }, + { + "epoch": 0.008390151168954613, + "grad_norm": 8.11083945397948, + "learning_rate": 2.790555978674791e-06, + "loss": 17.7362, + "step": 459 + }, + { + "epoch": 0.008408430365401136, + "grad_norm": 7.3596780356905, + "learning_rate": 2.796648895658797e-06, + "loss": 18.0177, + "step": 460 + }, + { + "epoch": 0.008426709561847661, + "grad_norm": 9.332385377216356, + "learning_rate": 2.802741812642803e-06, + "loss": 18.4037, + "step": 461 + }, + { + "epoch": 0.008444988758294185, + "grad_norm": 8.070677476220393, + "learning_rate": 2.8088347296268092e-06, + "loss": 18.1327, + "step": 462 + }, + { + "epoch": 0.00846326795474071, + "grad_norm": 10.043995385236958, + "learning_rate": 2.814927646610815e-06, + "loss": 18.7866, + "step": 463 + }, + { + "epoch": 0.008481547151187234, + "grad_norm": 9.612765078676649, + "learning_rate": 2.821020563594821e-06, + "loss": 18.3719, + "step": 464 + }, + { + "epoch": 0.008499826347633759, + "grad_norm": 9.096496728607754, + "learning_rate": 2.8271134805788274e-06, + "loss": 18.3143, + "step": 465 + }, + { + "epoch": 0.008518105544080282, + "grad_norm": 8.650570824357551, + "learning_rate": 2.833206397562833e-06, + "loss": 18.2524, + "step": 466 + }, + { + "epoch": 0.008536384740526806, + "grad_norm": 8.783777135279273, + "learning_rate": 2.8392993145468393e-06, + "loss": 18.1164, + "step": 467 + }, + { + "epoch": 0.008554663936973331, + "grad_norm": 9.136119852739785, + "learning_rate": 2.845392231530846e-06, + "loss": 18.4177, + "step": 468 + }, + { + "epoch": 0.008572943133419855, + "grad_norm": 6.913422302003662, + "learning_rate": 2.8514851485148517e-06, + "loss": 17.6912, + "step": 469 + }, + { + "epoch": 0.00859122232986638, + "grad_norm": 9.128585392774562, + "learning_rate": 2.857578065498858e-06, + "loss": 18.6487, + "step": 470 + }, + { + "epoch": 0.008609501526312903, + "grad_norm": 9.215913965517334, + "learning_rate": 2.863670982482864e-06, + "loss": 18.528, + "step": 471 + }, + { + "epoch": 0.008627780722759427, + "grad_norm": 7.578818180282947, + "learning_rate": 2.86976389946687e-06, + "loss": 17.693, + "step": 472 + }, + { + "epoch": 0.008646059919205952, + "grad_norm": 8.458790231416076, + "learning_rate": 2.875856816450876e-06, + "loss": 18.4237, + "step": 473 + }, + { + "epoch": 0.008664339115652476, + "grad_norm": 8.536550233318124, + "learning_rate": 2.881949733434882e-06, + "loss": 18.6088, + "step": 474 + }, + { + "epoch": 0.008682618312099, + "grad_norm": 8.39836290974198, + "learning_rate": 2.8880426504188883e-06, + "loss": 18.0771, + "step": 475 + }, + { + "epoch": 0.008700897508545524, + "grad_norm": 8.847571763064265, + "learning_rate": 2.894135567402894e-06, + "loss": 18.2639, + "step": 476 + }, + { + "epoch": 0.008719176704992048, + "grad_norm": 8.476705894783775, + "learning_rate": 2.9002284843869007e-06, + "loss": 17.8502, + "step": 477 + }, + { + "epoch": 0.008737455901438573, + "grad_norm": 8.431699688669129, + "learning_rate": 2.906321401370907e-06, + "loss": 18.1192, + "step": 478 + }, + { + "epoch": 0.008755735097885096, + "grad_norm": 10.200575097290223, + "learning_rate": 2.9124143183549126e-06, + "loss": 18.5815, + "step": 479 + }, + { + "epoch": 0.008774014294331622, + "grad_norm": 8.227135289405737, + "learning_rate": 2.918507235338919e-06, + "loss": 18.035, + "step": 480 + }, + { + "epoch": 0.008792293490778145, + "grad_norm": 9.188182347644483, + "learning_rate": 2.924600152322925e-06, + "loss": 18.5853, + "step": 481 + }, + { + "epoch": 0.00881057268722467, + "grad_norm": 8.748967205379937, + "learning_rate": 2.9306930693069308e-06, + "loss": 18.3194, + "step": 482 + }, + { + "epoch": 0.008828851883671194, + "grad_norm": 8.974929578637878, + "learning_rate": 2.936785986290937e-06, + "loss": 18.4196, + "step": 483 + }, + { + "epoch": 0.008847131080117717, + "grad_norm": 8.726187193187672, + "learning_rate": 2.942878903274943e-06, + "loss": 18.5871, + "step": 484 + }, + { + "epoch": 0.008865410276564243, + "grad_norm": 8.242732808561213, + "learning_rate": 2.948971820258949e-06, + "loss": 18.1831, + "step": 485 + }, + { + "epoch": 0.008883689473010766, + "grad_norm": 8.258898171491985, + "learning_rate": 2.9550647372429555e-06, + "loss": 18.0755, + "step": 486 + }, + { + "epoch": 0.008901968669457291, + "grad_norm": 8.393333887873215, + "learning_rate": 2.9611576542269617e-06, + "loss": 18.3118, + "step": 487 + }, + { + "epoch": 0.008920247865903815, + "grad_norm": 8.683881824599178, + "learning_rate": 2.9672505712109674e-06, + "loss": 18.2373, + "step": 488 + }, + { + "epoch": 0.008938527062350338, + "grad_norm": 10.074471771430872, + "learning_rate": 2.9733434881949736e-06, + "loss": 18.8629, + "step": 489 + }, + { + "epoch": 0.008956806258796864, + "grad_norm": 9.057481759135253, + "learning_rate": 2.97943640517898e-06, + "loss": 18.1558, + "step": 490 + }, + { + "epoch": 0.008975085455243387, + "grad_norm": 8.608543843136571, + "learning_rate": 2.9855293221629856e-06, + "loss": 18.1241, + "step": 491 + }, + { + "epoch": 0.008993364651689912, + "grad_norm": 8.893306631487548, + "learning_rate": 2.9916222391469917e-06, + "loss": 18.4825, + "step": 492 + }, + { + "epoch": 0.009011643848136436, + "grad_norm": 8.26756330038832, + "learning_rate": 2.997715156130998e-06, + "loss": 18.3376, + "step": 493 + }, + { + "epoch": 0.00902992304458296, + "grad_norm": 10.071683744373866, + "learning_rate": 3.0038080731150045e-06, + "loss": 18.6427, + "step": 494 + }, + { + "epoch": 0.009048202241029485, + "grad_norm": 7.610212877203644, + "learning_rate": 3.00990099009901e-06, + "loss": 17.6488, + "step": 495 + }, + { + "epoch": 0.009066481437476008, + "grad_norm": 8.866784008361785, + "learning_rate": 3.0159939070830165e-06, + "loss": 18.5203, + "step": 496 + }, + { + "epoch": 0.009084760633922533, + "grad_norm": 8.267221539992438, + "learning_rate": 3.0220868240670226e-06, + "loss": 18.162, + "step": 497 + }, + { + "epoch": 0.009103039830369057, + "grad_norm": 9.833899972727973, + "learning_rate": 3.0281797410510284e-06, + "loss": 18.4136, + "step": 498 + }, + { + "epoch": 0.009121319026815582, + "grad_norm": 8.598654687238412, + "learning_rate": 3.0342726580350346e-06, + "loss": 18.1908, + "step": 499 + }, + { + "epoch": 0.009139598223262105, + "grad_norm": 8.83631830602541, + "learning_rate": 3.0403655750190408e-06, + "loss": 18.255, + "step": 500 + }, + { + "epoch": 0.009157877419708629, + "grad_norm": 8.967581726984752, + "learning_rate": 3.0464584920030465e-06, + "loss": 18.3796, + "step": 501 + }, + { + "epoch": 0.009176156616155154, + "grad_norm": 8.391512538029074, + "learning_rate": 3.0525514089870527e-06, + "loss": 18.1908, + "step": 502 + }, + { + "epoch": 0.009194435812601678, + "grad_norm": 8.489402237604713, + "learning_rate": 3.058644325971059e-06, + "loss": 18.0274, + "step": 503 + }, + { + "epoch": 0.009212715009048203, + "grad_norm": 9.185868381210456, + "learning_rate": 3.0647372429550646e-06, + "loss": 18.7543, + "step": 504 + }, + { + "epoch": 0.009230994205494726, + "grad_norm": 8.64799578928608, + "learning_rate": 3.0708301599390713e-06, + "loss": 18.178, + "step": 505 + }, + { + "epoch": 0.00924927340194125, + "grad_norm": 8.024152009533692, + "learning_rate": 3.0769230769230774e-06, + "loss": 17.8823, + "step": 506 + }, + { + "epoch": 0.009267552598387775, + "grad_norm": 8.913452620302673, + "learning_rate": 3.083015993907083e-06, + "loss": 18.2871, + "step": 507 + }, + { + "epoch": 0.009285831794834299, + "grad_norm": 8.739662477117557, + "learning_rate": 3.0891089108910894e-06, + "loss": 17.8769, + "step": 508 + }, + { + "epoch": 0.009304110991280824, + "grad_norm": 8.00629871672884, + "learning_rate": 3.0952018278750956e-06, + "loss": 18.0679, + "step": 509 + }, + { + "epoch": 0.009322390187727347, + "grad_norm": 9.412734562267573, + "learning_rate": 3.1012947448591013e-06, + "loss": 18.6921, + "step": 510 + }, + { + "epoch": 0.009340669384173871, + "grad_norm": 7.921780414150499, + "learning_rate": 3.1073876618431075e-06, + "loss": 17.9678, + "step": 511 + }, + { + "epoch": 0.009358948580620396, + "grad_norm": 7.302084134718715, + "learning_rate": 3.1134805788271137e-06, + "loss": 17.5462, + "step": 512 + }, + { + "epoch": 0.00937722777706692, + "grad_norm": 10.521947594287234, + "learning_rate": 3.1195734958111194e-06, + "loss": 19.0029, + "step": 513 + }, + { + "epoch": 0.009395506973513445, + "grad_norm": 8.848198510870068, + "learning_rate": 3.125666412795126e-06, + "loss": 18.1732, + "step": 514 + }, + { + "epoch": 0.009413786169959968, + "grad_norm": 7.693917315803662, + "learning_rate": 3.1317593297791322e-06, + "loss": 17.8773, + "step": 515 + }, + { + "epoch": 0.009432065366406494, + "grad_norm": 10.244409066966945, + "learning_rate": 3.1378522467631384e-06, + "loss": 18.6571, + "step": 516 + }, + { + "epoch": 0.009450344562853017, + "grad_norm": 7.874769630024627, + "learning_rate": 3.143945163747144e-06, + "loss": 17.8227, + "step": 517 + }, + { + "epoch": 0.00946862375929954, + "grad_norm": 7.241900829546251, + "learning_rate": 3.1500380807311503e-06, + "loss": 17.5432, + "step": 518 + }, + { + "epoch": 0.009486902955746066, + "grad_norm": 8.907781822473183, + "learning_rate": 3.1561309977151565e-06, + "loss": 18.0927, + "step": 519 + }, + { + "epoch": 0.00950518215219259, + "grad_norm": 8.46348947828814, + "learning_rate": 3.1622239146991623e-06, + "loss": 18.3927, + "step": 520 + }, + { + "epoch": 0.009523461348639115, + "grad_norm": 9.445588967967891, + "learning_rate": 3.1683168316831685e-06, + "loss": 18.5475, + "step": 521 + }, + { + "epoch": 0.009541740545085638, + "grad_norm": 8.61062653825342, + "learning_rate": 3.174409748667175e-06, + "loss": 18.1747, + "step": 522 + }, + { + "epoch": 0.009560019741532162, + "grad_norm": 8.633999487272066, + "learning_rate": 3.1805026656511804e-06, + "loss": 18.1129, + "step": 523 + }, + { + "epoch": 0.009578298937978687, + "grad_norm": 8.65307697819484, + "learning_rate": 3.186595582635187e-06, + "loss": 18.0431, + "step": 524 + }, + { + "epoch": 0.00959657813442521, + "grad_norm": 9.227500699335675, + "learning_rate": 3.192688499619193e-06, + "loss": 18.4762, + "step": 525 + }, + { + "epoch": 0.009614857330871735, + "grad_norm": 8.172972022293687, + "learning_rate": 3.198781416603199e-06, + "loss": 18.3787, + "step": 526 + }, + { + "epoch": 0.009633136527318259, + "grad_norm": 7.785109284311084, + "learning_rate": 3.204874333587205e-06, + "loss": 17.7882, + "step": 527 + }, + { + "epoch": 0.009651415723764782, + "grad_norm": 9.420314791564312, + "learning_rate": 3.2109672505712113e-06, + "loss": 18.5747, + "step": 528 + }, + { + "epoch": 0.009669694920211308, + "grad_norm": 9.244874151341696, + "learning_rate": 3.217060167555217e-06, + "loss": 18.0547, + "step": 529 + }, + { + "epoch": 0.009687974116657831, + "grad_norm": 8.311126105224647, + "learning_rate": 3.2231530845392233e-06, + "loss": 18.0579, + "step": 530 + }, + { + "epoch": 0.009706253313104356, + "grad_norm": 8.997860459062794, + "learning_rate": 3.2292460015232294e-06, + "loss": 18.2353, + "step": 531 + }, + { + "epoch": 0.00972453250955088, + "grad_norm": 8.59961107594842, + "learning_rate": 3.235338918507235e-06, + "loss": 18.2798, + "step": 532 + }, + { + "epoch": 0.009742811705997405, + "grad_norm": 9.227733614804333, + "learning_rate": 3.241431835491242e-06, + "loss": 18.3154, + "step": 533 + }, + { + "epoch": 0.009761090902443929, + "grad_norm": 8.635515736231104, + "learning_rate": 3.247524752475248e-06, + "loss": 18.5925, + "step": 534 + }, + { + "epoch": 0.009779370098890452, + "grad_norm": 8.32479719423018, + "learning_rate": 3.2536176694592537e-06, + "loss": 17.6423, + "step": 535 + }, + { + "epoch": 0.009797649295336977, + "grad_norm": 8.790696290335909, + "learning_rate": 3.25971058644326e-06, + "loss": 18.4782, + "step": 536 + }, + { + "epoch": 0.009815928491783501, + "grad_norm": 9.479560069100044, + "learning_rate": 3.265803503427266e-06, + "loss": 18.1152, + "step": 537 + }, + { + "epoch": 0.009834207688230026, + "grad_norm": 8.192728144832243, + "learning_rate": 3.2718964204112723e-06, + "loss": 18.0095, + "step": 538 + }, + { + "epoch": 0.00985248688467655, + "grad_norm": 7.876454527850719, + "learning_rate": 3.277989337395278e-06, + "loss": 17.8086, + "step": 539 + }, + { + "epoch": 0.009870766081123073, + "grad_norm": 8.067141115388166, + "learning_rate": 3.2840822543792842e-06, + "loss": 18.0498, + "step": 540 + }, + { + "epoch": 0.009889045277569598, + "grad_norm": 9.316743951006133, + "learning_rate": 3.290175171363291e-06, + "loss": 18.3959, + "step": 541 + }, + { + "epoch": 0.009907324474016122, + "grad_norm": 8.47902828845691, + "learning_rate": 3.2962680883472966e-06, + "loss": 18.0238, + "step": 542 + }, + { + "epoch": 0.009925603670462647, + "grad_norm": 8.856677568217423, + "learning_rate": 3.3023610053313028e-06, + "loss": 18.2624, + "step": 543 + }, + { + "epoch": 0.00994388286690917, + "grad_norm": 8.443474590903095, + "learning_rate": 3.308453922315309e-06, + "loss": 18.1976, + "step": 544 + }, + { + "epoch": 0.009962162063355694, + "grad_norm": 9.36612442369136, + "learning_rate": 3.3145468392993147e-06, + "loss": 18.8425, + "step": 545 + }, + { + "epoch": 0.00998044125980222, + "grad_norm": 7.064347581584799, + "learning_rate": 3.320639756283321e-06, + "loss": 17.4856, + "step": 546 + }, + { + "epoch": 0.009998720456248743, + "grad_norm": 8.9669000570803, + "learning_rate": 3.326732673267327e-06, + "loss": 17.9647, + "step": 547 + }, + { + "epoch": 0.010016999652695268, + "grad_norm": 8.66512459966387, + "learning_rate": 3.332825590251333e-06, + "loss": 18.1868, + "step": 548 + }, + { + "epoch": 0.010035278849141792, + "grad_norm": 8.010190288017151, + "learning_rate": 3.338918507235339e-06, + "loss": 18.1856, + "step": 549 + }, + { + "epoch": 0.010053558045588317, + "grad_norm": 7.387120482691083, + "learning_rate": 3.3450114242193456e-06, + "loss": 17.6946, + "step": 550 + }, + { + "epoch": 0.01007183724203484, + "grad_norm": 8.32475615889767, + "learning_rate": 3.351104341203351e-06, + "loss": 17.911, + "step": 551 + }, + { + "epoch": 0.010090116438481364, + "grad_norm": 9.066205394885595, + "learning_rate": 3.3571972581873576e-06, + "loss": 18.9419, + "step": 552 + }, + { + "epoch": 0.010108395634927889, + "grad_norm": 7.968193698210883, + "learning_rate": 3.3632901751713638e-06, + "loss": 18.0055, + "step": 553 + }, + { + "epoch": 0.010126674831374412, + "grad_norm": 8.571786943624106, + "learning_rate": 3.3693830921553695e-06, + "loss": 18.0664, + "step": 554 + }, + { + "epoch": 0.010144954027820938, + "grad_norm": 7.699019935806702, + "learning_rate": 3.3754760091393757e-06, + "loss": 18.0581, + "step": 555 + }, + { + "epoch": 0.010163233224267461, + "grad_norm": 8.067866766547853, + "learning_rate": 3.381568926123382e-06, + "loss": 18.2582, + "step": 556 + }, + { + "epoch": 0.010181512420713985, + "grad_norm": 9.433101264824572, + "learning_rate": 3.387661843107388e-06, + "loss": 18.5321, + "step": 557 + }, + { + "epoch": 0.01019979161716051, + "grad_norm": 8.540100236577365, + "learning_rate": 3.393754760091394e-06, + "loss": 18.1843, + "step": 558 + }, + { + "epoch": 0.010218070813607033, + "grad_norm": 9.476723770747716, + "learning_rate": 3.3998476770754e-06, + "loss": 18.5148, + "step": 559 + }, + { + "epoch": 0.010236350010053559, + "grad_norm": 7.916222040531197, + "learning_rate": 3.4059405940594066e-06, + "loss": 17.8445, + "step": 560 + }, + { + "epoch": 0.010254629206500082, + "grad_norm": 8.660451835322666, + "learning_rate": 3.4120335110434124e-06, + "loss": 18.2338, + "step": 561 + }, + { + "epoch": 0.010272908402946606, + "grad_norm": 8.200092029419919, + "learning_rate": 3.4181264280274185e-06, + "loss": 17.8906, + "step": 562 + }, + { + "epoch": 0.01029118759939313, + "grad_norm": 7.665208685321601, + "learning_rate": 3.4242193450114247e-06, + "loss": 18.0897, + "step": 563 + }, + { + "epoch": 0.010309466795839654, + "grad_norm": 8.377511704569045, + "learning_rate": 3.4303122619954305e-06, + "loss": 18.3939, + "step": 564 + }, + { + "epoch": 0.01032774599228618, + "grad_norm": 8.14051934165229, + "learning_rate": 3.4364051789794367e-06, + "loss": 18.0354, + "step": 565 + }, + { + "epoch": 0.010346025188732703, + "grad_norm": 8.893467288989342, + "learning_rate": 3.442498095963443e-06, + "loss": 18.9523, + "step": 566 + }, + { + "epoch": 0.010364304385179228, + "grad_norm": 13.06411652942076, + "learning_rate": 3.4485910129474486e-06, + "loss": 18.6884, + "step": 567 + }, + { + "epoch": 0.010382583581625752, + "grad_norm": 8.159390916538014, + "learning_rate": 3.454683929931455e-06, + "loss": 17.9738, + "step": 568 + }, + { + "epoch": 0.010400862778072275, + "grad_norm": 8.901397762873613, + "learning_rate": 3.4607768469154614e-06, + "loss": 18.5212, + "step": 569 + }, + { + "epoch": 0.0104191419745188, + "grad_norm": 8.263202136880551, + "learning_rate": 3.466869763899467e-06, + "loss": 18.1432, + "step": 570 + }, + { + "epoch": 0.010437421170965324, + "grad_norm": 8.642791941923635, + "learning_rate": 3.4729626808834733e-06, + "loss": 18.1083, + "step": 571 + }, + { + "epoch": 0.01045570036741185, + "grad_norm": 9.079489731525967, + "learning_rate": 3.4790555978674795e-06, + "loss": 18.4298, + "step": 572 + }, + { + "epoch": 0.010473979563858373, + "grad_norm": 8.28360939638146, + "learning_rate": 3.4851485148514853e-06, + "loss": 17.9617, + "step": 573 + }, + { + "epoch": 0.010492258760304896, + "grad_norm": 8.334940108308801, + "learning_rate": 3.4912414318354915e-06, + "loss": 18.3559, + "step": 574 + }, + { + "epoch": 0.010510537956751421, + "grad_norm": 7.857994330225646, + "learning_rate": 3.4973343488194976e-06, + "loss": 18.1056, + "step": 575 + }, + { + "epoch": 0.010528817153197945, + "grad_norm": 8.330149196467916, + "learning_rate": 3.5034272658035034e-06, + "loss": 18.0329, + "step": 576 + }, + { + "epoch": 0.01054709634964447, + "grad_norm": 8.661868500547321, + "learning_rate": 3.5095201827875096e-06, + "loss": 18.4591, + "step": 577 + }, + { + "epoch": 0.010565375546090994, + "grad_norm": 8.163560354563982, + "learning_rate": 3.515613099771516e-06, + "loss": 18.0669, + "step": 578 + }, + { + "epoch": 0.010583654742537517, + "grad_norm": 8.242693534936947, + "learning_rate": 3.5217060167555224e-06, + "loss": 18.1963, + "step": 579 + }, + { + "epoch": 0.010601933938984042, + "grad_norm": 9.176823530741894, + "learning_rate": 3.527798933739528e-06, + "loss": 18.4939, + "step": 580 + }, + { + "epoch": 0.010620213135430566, + "grad_norm": 8.788621060069486, + "learning_rate": 3.5338918507235343e-06, + "loss": 18.2809, + "step": 581 + }, + { + "epoch": 0.010638492331877091, + "grad_norm": 8.090462283188902, + "learning_rate": 3.5399847677075405e-06, + "loss": 18.0944, + "step": 582 + }, + { + "epoch": 0.010656771528323615, + "grad_norm": 8.596137634283256, + "learning_rate": 3.5460776846915462e-06, + "loss": 18.278, + "step": 583 + }, + { + "epoch": 0.01067505072477014, + "grad_norm": 8.247695085227974, + "learning_rate": 3.5521706016755524e-06, + "loss": 18.1307, + "step": 584 + }, + { + "epoch": 0.010693329921216663, + "grad_norm": 8.127347406336867, + "learning_rate": 3.5582635186595586e-06, + "loss": 18.1265, + "step": 585 + }, + { + "epoch": 0.010711609117663187, + "grad_norm": 8.371783660452177, + "learning_rate": 3.5643564356435644e-06, + "loss": 18.1125, + "step": 586 + }, + { + "epoch": 0.010729888314109712, + "grad_norm": 8.99520294894812, + "learning_rate": 3.5704493526275706e-06, + "loss": 18.2121, + "step": 587 + }, + { + "epoch": 0.010748167510556236, + "grad_norm": 10.608664862244305, + "learning_rate": 3.576542269611577e-06, + "loss": 18.846, + "step": 588 + }, + { + "epoch": 0.01076644670700276, + "grad_norm": 8.968867264754163, + "learning_rate": 3.582635186595583e-06, + "loss": 18.4245, + "step": 589 + }, + { + "epoch": 0.010784725903449284, + "grad_norm": 7.8113289740846925, + "learning_rate": 3.588728103579589e-06, + "loss": 17.9216, + "step": 590 + }, + { + "epoch": 0.010803005099895808, + "grad_norm": 8.547150312484515, + "learning_rate": 3.5948210205635953e-06, + "loss": 18.164, + "step": 591 + }, + { + "epoch": 0.010821284296342333, + "grad_norm": 8.034774327385934, + "learning_rate": 3.600913937547601e-06, + "loss": 18.1415, + "step": 592 + }, + { + "epoch": 0.010839563492788857, + "grad_norm": 8.482926628353267, + "learning_rate": 3.6070068545316072e-06, + "loss": 17.8454, + "step": 593 + }, + { + "epoch": 0.010857842689235382, + "grad_norm": 7.159187145580739, + "learning_rate": 3.6130997715156134e-06, + "loss": 17.5982, + "step": 594 + }, + { + "epoch": 0.010876121885681905, + "grad_norm": 6.956524183644067, + "learning_rate": 3.619192688499619e-06, + "loss": 17.7585, + "step": 595 + }, + { + "epoch": 0.010894401082128429, + "grad_norm": 8.289400186115694, + "learning_rate": 3.6252856054836253e-06, + "loss": 18.2065, + "step": 596 + }, + { + "epoch": 0.010912680278574954, + "grad_norm": 8.926577429484452, + "learning_rate": 3.631378522467632e-06, + "loss": 18.0875, + "step": 597 + }, + { + "epoch": 0.010930959475021478, + "grad_norm": 9.22683538460072, + "learning_rate": 3.6374714394516377e-06, + "loss": 18.6423, + "step": 598 + }, + { + "epoch": 0.010949238671468003, + "grad_norm": 8.270948741163098, + "learning_rate": 3.643564356435644e-06, + "loss": 17.7624, + "step": 599 + }, + { + "epoch": 0.010967517867914526, + "grad_norm": 10.822683093286273, + "learning_rate": 3.64965727341965e-06, + "loss": 19.2622, + "step": 600 + }, + { + "epoch": 0.010985797064361051, + "grad_norm": 8.958097787886413, + "learning_rate": 3.6557501904036563e-06, + "loss": 18.1253, + "step": 601 + }, + { + "epoch": 0.011004076260807575, + "grad_norm": 7.719258571985554, + "learning_rate": 3.661843107387662e-06, + "loss": 17.9526, + "step": 602 + }, + { + "epoch": 0.011022355457254098, + "grad_norm": 8.450593543919977, + "learning_rate": 3.667936024371668e-06, + "loss": 18.0805, + "step": 603 + }, + { + "epoch": 0.011040634653700624, + "grad_norm": 8.90466662740431, + "learning_rate": 3.6740289413556744e-06, + "loss": 18.538, + "step": 604 + }, + { + "epoch": 0.011058913850147147, + "grad_norm": 8.414490229419439, + "learning_rate": 3.68012185833968e-06, + "loss": 18.5623, + "step": 605 + }, + { + "epoch": 0.011077193046593672, + "grad_norm": 7.692666688497806, + "learning_rate": 3.6862147753236867e-06, + "loss": 18.0062, + "step": 606 + }, + { + "epoch": 0.011095472243040196, + "grad_norm": 8.293027250646789, + "learning_rate": 3.692307692307693e-06, + "loss": 17.9804, + "step": 607 + }, + { + "epoch": 0.01111375143948672, + "grad_norm": 8.690172577012346, + "learning_rate": 3.6984006092916987e-06, + "loss": 18.4833, + "step": 608 + }, + { + "epoch": 0.011132030635933245, + "grad_norm": 8.655996628896485, + "learning_rate": 3.704493526275705e-06, + "loss": 17.9411, + "step": 609 + }, + { + "epoch": 0.011150309832379768, + "grad_norm": 8.91574917849141, + "learning_rate": 3.710586443259711e-06, + "loss": 18.256, + "step": 610 + }, + { + "epoch": 0.011168589028826293, + "grad_norm": 7.01065365822396, + "learning_rate": 3.716679360243717e-06, + "loss": 17.572, + "step": 611 + }, + { + "epoch": 0.011186868225272817, + "grad_norm": 9.289538437867213, + "learning_rate": 3.722772277227723e-06, + "loss": 18.6452, + "step": 612 + }, + { + "epoch": 0.01120514742171934, + "grad_norm": 8.308113412260557, + "learning_rate": 3.728865194211729e-06, + "loss": 18.0685, + "step": 613 + }, + { + "epoch": 0.011223426618165866, + "grad_norm": 7.931418707584115, + "learning_rate": 3.734958111195735e-06, + "loss": 18.0801, + "step": 614 + }, + { + "epoch": 0.011241705814612389, + "grad_norm": 8.886288471507592, + "learning_rate": 3.741051028179741e-06, + "loss": 18.1922, + "step": 615 + }, + { + "epoch": 0.011259985011058914, + "grad_norm": 7.177341956825686, + "learning_rate": 3.7471439451637477e-06, + "loss": 17.8097, + "step": 616 + }, + { + "epoch": 0.011278264207505438, + "grad_norm": 8.904115254334988, + "learning_rate": 3.7532368621477535e-06, + "loss": 18.1632, + "step": 617 + }, + { + "epoch": 0.011296543403951963, + "grad_norm": 8.775998773629942, + "learning_rate": 3.7593297791317597e-06, + "loss": 18.4946, + "step": 618 + }, + { + "epoch": 0.011314822600398487, + "grad_norm": 7.860619604169376, + "learning_rate": 3.765422696115766e-06, + "loss": 17.8058, + "step": 619 + }, + { + "epoch": 0.01133310179684501, + "grad_norm": 9.143618681610725, + "learning_rate": 3.771515613099772e-06, + "loss": 18.5067, + "step": 620 + }, + { + "epoch": 0.011351380993291535, + "grad_norm": 7.641110873624864, + "learning_rate": 3.7776085300837778e-06, + "loss": 18.0647, + "step": 621 + }, + { + "epoch": 0.011369660189738059, + "grad_norm": 9.881622540341965, + "learning_rate": 3.783701447067784e-06, + "loss": 18.7808, + "step": 622 + }, + { + "epoch": 0.011387939386184584, + "grad_norm": 9.079147581664522, + "learning_rate": 3.7897943640517906e-06, + "loss": 18.6017, + "step": 623 + }, + { + "epoch": 0.011406218582631108, + "grad_norm": 9.919861848991172, + "learning_rate": 3.795887281035796e-06, + "loss": 18.5653, + "step": 624 + }, + { + "epoch": 0.011424497779077631, + "grad_norm": 8.772754960101011, + "learning_rate": 3.8019801980198025e-06, + "loss": 18.4951, + "step": 625 + }, + { + "epoch": 0.011442776975524156, + "grad_norm": 9.245420728499028, + "learning_rate": 3.8080731150038087e-06, + "loss": 18.5503, + "step": 626 + }, + { + "epoch": 0.01146105617197068, + "grad_norm": 7.1149859102439015, + "learning_rate": 3.8141660319878144e-06, + "loss": 17.7896, + "step": 627 + }, + { + "epoch": 0.011479335368417205, + "grad_norm": 8.922405789331233, + "learning_rate": 3.820258948971821e-06, + "loss": 18.1187, + "step": 628 + }, + { + "epoch": 0.011497614564863728, + "grad_norm": 9.334662108517135, + "learning_rate": 3.826351865955827e-06, + "loss": 18.4043, + "step": 629 + }, + { + "epoch": 0.011515893761310252, + "grad_norm": 7.991348921154775, + "learning_rate": 3.832444782939833e-06, + "loss": 17.9648, + "step": 630 + }, + { + "epoch": 0.011534172957756777, + "grad_norm": 10.35442384336909, + "learning_rate": 3.838537699923839e-06, + "loss": 18.8284, + "step": 631 + }, + { + "epoch": 0.0115524521542033, + "grad_norm": 7.6281038550182485, + "learning_rate": 3.844630616907845e-06, + "loss": 17.7663, + "step": 632 + }, + { + "epoch": 0.011570731350649826, + "grad_norm": 9.003867909251422, + "learning_rate": 3.850723533891851e-06, + "loss": 18.4097, + "step": 633 + }, + { + "epoch": 0.01158901054709635, + "grad_norm": 9.353285303990113, + "learning_rate": 3.856816450875857e-06, + "loss": 18.3862, + "step": 634 + }, + { + "epoch": 0.011607289743542875, + "grad_norm": 7.913205735721484, + "learning_rate": 3.8629093678598635e-06, + "loss": 17.7015, + "step": 635 + }, + { + "epoch": 0.011625568939989398, + "grad_norm": 7.850792293732742, + "learning_rate": 3.869002284843869e-06, + "loss": 17.7488, + "step": 636 + }, + { + "epoch": 0.011643848136435922, + "grad_norm": 8.565520597603587, + "learning_rate": 3.875095201827875e-06, + "loss": 18.0983, + "step": 637 + }, + { + "epoch": 0.011662127332882447, + "grad_norm": 9.956027586916676, + "learning_rate": 3.881188118811882e-06, + "loss": 18.7461, + "step": 638 + }, + { + "epoch": 0.01168040652932897, + "grad_norm": 10.054966931273652, + "learning_rate": 3.887281035795887e-06, + "loss": 18.8291, + "step": 639 + }, + { + "epoch": 0.011698685725775496, + "grad_norm": 9.251972467038426, + "learning_rate": 3.893373952779894e-06, + "loss": 18.4111, + "step": 640 + }, + { + "epoch": 0.011716964922222019, + "grad_norm": 8.358312202277967, + "learning_rate": 3.8994668697639e-06, + "loss": 18.0409, + "step": 641 + }, + { + "epoch": 0.011735244118668543, + "grad_norm": 8.422320418791143, + "learning_rate": 3.905559786747906e-06, + "loss": 18.1452, + "step": 642 + }, + { + "epoch": 0.011753523315115068, + "grad_norm": 7.671429243467571, + "learning_rate": 3.911652703731912e-06, + "loss": 18.0494, + "step": 643 + }, + { + "epoch": 0.011771802511561591, + "grad_norm": 8.50051421134067, + "learning_rate": 3.917745620715918e-06, + "loss": 18.4015, + "step": 644 + }, + { + "epoch": 0.011790081708008117, + "grad_norm": 7.760926670958736, + "learning_rate": 3.9238385376999244e-06, + "loss": 17.921, + "step": 645 + }, + { + "epoch": 0.01180836090445464, + "grad_norm": 11.513527376413613, + "learning_rate": 3.92993145468393e-06, + "loss": 18.1754, + "step": 646 + }, + { + "epoch": 0.011826640100901164, + "grad_norm": 8.028483144791913, + "learning_rate": 3.936024371667936e-06, + "loss": 18.1008, + "step": 647 + }, + { + "epoch": 0.011844919297347689, + "grad_norm": 8.918002922760337, + "learning_rate": 3.9421172886519426e-06, + "loss": 18.5311, + "step": 648 + }, + { + "epoch": 0.011863198493794212, + "grad_norm": 8.4695793378545, + "learning_rate": 3.948210205635948e-06, + "loss": 18.1985, + "step": 649 + }, + { + "epoch": 0.011881477690240737, + "grad_norm": 8.294055827860042, + "learning_rate": 3.954303122619955e-06, + "loss": 18.1201, + "step": 650 + }, + { + "epoch": 0.011899756886687261, + "grad_norm": 9.017151892282369, + "learning_rate": 3.960396039603961e-06, + "loss": 18.646, + "step": 651 + }, + { + "epoch": 0.011918036083133786, + "grad_norm": 9.311975409243328, + "learning_rate": 3.9664889565879665e-06, + "loss": 18.3601, + "step": 652 + }, + { + "epoch": 0.01193631527958031, + "grad_norm": 9.28881290790521, + "learning_rate": 3.972581873571973e-06, + "loss": 18.3552, + "step": 653 + }, + { + "epoch": 0.011954594476026833, + "grad_norm": 8.911077660865459, + "learning_rate": 3.978674790555979e-06, + "loss": 18.6692, + "step": 654 + }, + { + "epoch": 0.011972873672473358, + "grad_norm": 8.750159759354592, + "learning_rate": 3.9847677075399846e-06, + "loss": 17.8843, + "step": 655 + }, + { + "epoch": 0.011991152868919882, + "grad_norm": 7.5063771211308845, + "learning_rate": 3.990860624523991e-06, + "loss": 17.684, + "step": 656 + }, + { + "epoch": 0.012009432065366407, + "grad_norm": 9.058283847455503, + "learning_rate": 3.996953541507998e-06, + "loss": 18.5061, + "step": 657 + }, + { + "epoch": 0.01202771126181293, + "grad_norm": 10.043624776829995, + "learning_rate": 4.0030464584920035e-06, + "loss": 18.1349, + "step": 658 + }, + { + "epoch": 0.012045990458259454, + "grad_norm": 8.24023318173618, + "learning_rate": 4.009139375476009e-06, + "loss": 18.1363, + "step": 659 + }, + { + "epoch": 0.01206426965470598, + "grad_norm": 9.524182484965243, + "learning_rate": 4.015232292460016e-06, + "loss": 18.3919, + "step": 660 + }, + { + "epoch": 0.012082548851152503, + "grad_norm": 8.654553969481384, + "learning_rate": 4.021325209444022e-06, + "loss": 17.8421, + "step": 661 + }, + { + "epoch": 0.012100828047599028, + "grad_norm": 8.413561700429844, + "learning_rate": 4.0274181264280274e-06, + "loss": 17.94, + "step": 662 + }, + { + "epoch": 0.012119107244045552, + "grad_norm": 8.42006127552643, + "learning_rate": 4.033511043412034e-06, + "loss": 18.0531, + "step": 663 + }, + { + "epoch": 0.012137386440492075, + "grad_norm": 8.256668684070519, + "learning_rate": 4.03960396039604e-06, + "loss": 17.8619, + "step": 664 + }, + { + "epoch": 0.0121556656369386, + "grad_norm": 7.924822317596749, + "learning_rate": 4.0456968773800455e-06, + "loss": 18.0097, + "step": 665 + }, + { + "epoch": 0.012173944833385124, + "grad_norm": 8.47007375308315, + "learning_rate": 4.051789794364052e-06, + "loss": 18.0684, + "step": 666 + }, + { + "epoch": 0.012192224029831649, + "grad_norm": 9.597647355318228, + "learning_rate": 4.057882711348059e-06, + "loss": 18.5228, + "step": 667 + }, + { + "epoch": 0.012210503226278173, + "grad_norm": 8.081301569529527, + "learning_rate": 4.0639756283320645e-06, + "loss": 18.029, + "step": 668 + }, + { + "epoch": 0.012228782422724698, + "grad_norm": 8.09801481857068, + "learning_rate": 4.07006854531607e-06, + "loss": 17.9184, + "step": 669 + }, + { + "epoch": 0.012247061619171221, + "grad_norm": 7.773602705447997, + "learning_rate": 4.076161462300077e-06, + "loss": 17.8822, + "step": 670 + }, + { + "epoch": 0.012265340815617745, + "grad_norm": 9.38410411202206, + "learning_rate": 4.082254379284083e-06, + "loss": 18.2827, + "step": 671 + }, + { + "epoch": 0.01228362001206427, + "grad_norm": 9.93505517562674, + "learning_rate": 4.088347296268088e-06, + "loss": 18.6631, + "step": 672 + }, + { + "epoch": 0.012301899208510794, + "grad_norm": 10.076632024957327, + "learning_rate": 4.094440213252095e-06, + "loss": 18.8522, + "step": 673 + }, + { + "epoch": 0.012320178404957319, + "grad_norm": 9.590342148059808, + "learning_rate": 4.100533130236101e-06, + "loss": 18.4533, + "step": 674 + }, + { + "epoch": 0.012338457601403842, + "grad_norm": 8.648378057532792, + "learning_rate": 4.1066260472201065e-06, + "loss": 18.3743, + "step": 675 + }, + { + "epoch": 0.012356736797850366, + "grad_norm": 9.225214321976841, + "learning_rate": 4.112718964204113e-06, + "loss": 18.5045, + "step": 676 + }, + { + "epoch": 0.012375015994296891, + "grad_norm": 9.641471319011561, + "learning_rate": 4.118811881188119e-06, + "loss": 18.5693, + "step": 677 + }, + { + "epoch": 0.012393295190743414, + "grad_norm": 8.71795174350895, + "learning_rate": 4.1249047981721255e-06, + "loss": 18.3874, + "step": 678 + }, + { + "epoch": 0.01241157438718994, + "grad_norm": 8.631448803632992, + "learning_rate": 4.130997715156131e-06, + "loss": 18.1557, + "step": 679 + }, + { + "epoch": 0.012429853583636463, + "grad_norm": 8.282374061730781, + "learning_rate": 4.137090632140137e-06, + "loss": 18.2244, + "step": 680 + }, + { + "epoch": 0.012448132780082987, + "grad_norm": 7.363293065006019, + "learning_rate": 4.143183549124144e-06, + "loss": 17.7314, + "step": 681 + }, + { + "epoch": 0.012466411976529512, + "grad_norm": 10.797530506423593, + "learning_rate": 4.149276466108149e-06, + "loss": 18.9832, + "step": 682 + }, + { + "epoch": 0.012484691172976035, + "grad_norm": 7.879375084137035, + "learning_rate": 4.155369383092156e-06, + "loss": 18.1933, + "step": 683 + }, + { + "epoch": 0.01250297036942256, + "grad_norm": 8.588918231392771, + "learning_rate": 4.161462300076162e-06, + "loss": 18.3093, + "step": 684 + }, + { + "epoch": 0.012521249565869084, + "grad_norm": 8.136157675867892, + "learning_rate": 4.167555217060168e-06, + "loss": 17.8996, + "step": 685 + }, + { + "epoch": 0.01253952876231561, + "grad_norm": 8.244391120869468, + "learning_rate": 4.173648134044174e-06, + "loss": 18.3063, + "step": 686 + }, + { + "epoch": 0.012557807958762133, + "grad_norm": 9.254053025083964, + "learning_rate": 4.17974105102818e-06, + "loss": 18.552, + "step": 687 + }, + { + "epoch": 0.012576087155208656, + "grad_norm": 8.510600321405521, + "learning_rate": 4.1858339680121865e-06, + "loss": 18.4846, + "step": 688 + }, + { + "epoch": 0.012594366351655182, + "grad_norm": 8.01458664798195, + "learning_rate": 4.191926884996192e-06, + "loss": 17.715, + "step": 689 + }, + { + "epoch": 0.012612645548101705, + "grad_norm": 8.626646670224346, + "learning_rate": 4.198019801980198e-06, + "loss": 18.2243, + "step": 690 + }, + { + "epoch": 0.01263092474454823, + "grad_norm": 8.939771947894256, + "learning_rate": 4.204112718964205e-06, + "loss": 18.5696, + "step": 691 + }, + { + "epoch": 0.012649203940994754, + "grad_norm": 8.120519986053191, + "learning_rate": 4.21020563594821e-06, + "loss": 17.9166, + "step": 692 + }, + { + "epoch": 0.012667483137441277, + "grad_norm": 8.029242137098644, + "learning_rate": 4.216298552932216e-06, + "loss": 18.2364, + "step": 693 + }, + { + "epoch": 0.012685762333887803, + "grad_norm": 8.858387502251698, + "learning_rate": 4.222391469916223e-06, + "loss": 18.4259, + "step": 694 + }, + { + "epoch": 0.012704041530334326, + "grad_norm": 7.660860311701185, + "learning_rate": 4.228484386900229e-06, + "loss": 17.7606, + "step": 695 + }, + { + "epoch": 0.012722320726780851, + "grad_norm": 9.906135660762699, + "learning_rate": 4.234577303884235e-06, + "loss": 18.7554, + "step": 696 + }, + { + "epoch": 0.012740599923227375, + "grad_norm": 9.840851089802282, + "learning_rate": 4.240670220868241e-06, + "loss": 18.6306, + "step": 697 + }, + { + "epoch": 0.012758879119673898, + "grad_norm": 8.604357795575616, + "learning_rate": 4.2467631378522474e-06, + "loss": 18.024, + "step": 698 + }, + { + "epoch": 0.012777158316120424, + "grad_norm": 9.356382353257045, + "learning_rate": 4.252856054836253e-06, + "loss": 18.5935, + "step": 699 + }, + { + "epoch": 0.012795437512566947, + "grad_norm": 9.281401062431087, + "learning_rate": 4.258948971820259e-06, + "loss": 18.2712, + "step": 700 + }, + { + "epoch": 0.012813716709013472, + "grad_norm": 9.449169354332904, + "learning_rate": 4.2650418888042656e-06, + "loss": 18.5111, + "step": 701 + }, + { + "epoch": 0.012831995905459996, + "grad_norm": 6.581590647391288, + "learning_rate": 4.271134805788271e-06, + "loss": 17.2692, + "step": 702 + }, + { + "epoch": 0.012850275101906521, + "grad_norm": 8.941852584792075, + "learning_rate": 4.277227722772277e-06, + "loss": 18.1318, + "step": 703 + }, + { + "epoch": 0.012868554298353044, + "grad_norm": 11.52066482368107, + "learning_rate": 4.283320639756284e-06, + "loss": 17.9786, + "step": 704 + }, + { + "epoch": 0.012886833494799568, + "grad_norm": 9.50710199009215, + "learning_rate": 4.28941355674029e-06, + "loss": 18.4536, + "step": 705 + }, + { + "epoch": 0.012905112691246093, + "grad_norm": 6.858864428444414, + "learning_rate": 4.295506473724296e-06, + "loss": 17.5341, + "step": 706 + }, + { + "epoch": 0.012923391887692617, + "grad_norm": 8.694385305192165, + "learning_rate": 4.301599390708302e-06, + "loss": 18.3906, + "step": 707 + }, + { + "epoch": 0.012941671084139142, + "grad_norm": 8.10655763097337, + "learning_rate": 4.307692307692308e-06, + "loss": 18.2554, + "step": 708 + }, + { + "epoch": 0.012959950280585665, + "grad_norm": 10.10716824262294, + "learning_rate": 4.313785224676314e-06, + "loss": 18.3628, + "step": 709 + }, + { + "epoch": 0.012978229477032189, + "grad_norm": 8.193394417599954, + "learning_rate": 4.31987814166032e-06, + "loss": 17.9166, + "step": 710 + }, + { + "epoch": 0.012996508673478714, + "grad_norm": 9.12124387859505, + "learning_rate": 4.3259710586443265e-06, + "loss": 18.2743, + "step": 711 + }, + { + "epoch": 0.013014787869925238, + "grad_norm": 8.275436544030585, + "learning_rate": 4.332063975628332e-06, + "loss": 18.3223, + "step": 712 + }, + { + "epoch": 0.013033067066371763, + "grad_norm": 9.919219202939884, + "learning_rate": 4.338156892612339e-06, + "loss": 18.7695, + "step": 713 + }, + { + "epoch": 0.013051346262818286, + "grad_norm": 9.006295734611193, + "learning_rate": 4.344249809596345e-06, + "loss": 18.4789, + "step": 714 + }, + { + "epoch": 0.01306962545926481, + "grad_norm": 8.727624480806657, + "learning_rate": 4.35034272658035e-06, + "loss": 18.3371, + "step": 715 + }, + { + "epoch": 0.013087904655711335, + "grad_norm": 7.154181830598685, + "learning_rate": 4.356435643564357e-06, + "loss": 17.5559, + "step": 716 + }, + { + "epoch": 0.013106183852157859, + "grad_norm": 7.965608283923321, + "learning_rate": 4.362528560548363e-06, + "loss": 18.0304, + "step": 717 + }, + { + "epoch": 0.013124463048604384, + "grad_norm": 7.751987635624145, + "learning_rate": 4.3686214775323685e-06, + "loss": 17.7836, + "step": 718 + }, + { + "epoch": 0.013142742245050907, + "grad_norm": 9.822875745653093, + "learning_rate": 4.374714394516375e-06, + "loss": 18.4279, + "step": 719 + }, + { + "epoch": 0.013161021441497433, + "grad_norm": 11.257898723251781, + "learning_rate": 4.380807311500381e-06, + "loss": 18.8577, + "step": 720 + }, + { + "epoch": 0.013179300637943956, + "grad_norm": 7.736540051326125, + "learning_rate": 4.386900228484387e-06, + "loss": 17.7715, + "step": 721 + }, + { + "epoch": 0.01319757983439048, + "grad_norm": 7.872605497863902, + "learning_rate": 4.392993145468393e-06, + "loss": 17.7712, + "step": 722 + }, + { + "epoch": 0.013215859030837005, + "grad_norm": 9.113611652172722, + "learning_rate": 4.3990860624524e-06, + "loss": 18.3717, + "step": 723 + }, + { + "epoch": 0.013234138227283528, + "grad_norm": 8.358889954921496, + "learning_rate": 4.405178979436406e-06, + "loss": 18.1748, + "step": 724 + }, + { + "epoch": 0.013252417423730053, + "grad_norm": 7.803959399616063, + "learning_rate": 4.411271896420411e-06, + "loss": 17.8882, + "step": 725 + }, + { + "epoch": 0.013270696620176577, + "grad_norm": 10.211516845076726, + "learning_rate": 4.417364813404418e-06, + "loss": 18.6295, + "step": 726 + }, + { + "epoch": 0.0132889758166231, + "grad_norm": 8.15584773898762, + "learning_rate": 4.423457730388424e-06, + "loss": 17.8261, + "step": 727 + }, + { + "epoch": 0.013307255013069626, + "grad_norm": 8.265193313412619, + "learning_rate": 4.4295506473724295e-06, + "loss": 18.2289, + "step": 728 + }, + { + "epoch": 0.01332553420951615, + "grad_norm": 9.83364866575862, + "learning_rate": 4.435643564356436e-06, + "loss": 18.6857, + "step": 729 + }, + { + "epoch": 0.013343813405962674, + "grad_norm": 8.859709514341905, + "learning_rate": 4.441736481340443e-06, + "loss": 18.426, + "step": 730 + }, + { + "epoch": 0.013362092602409198, + "grad_norm": 6.84547356640548, + "learning_rate": 4.447829398324448e-06, + "loss": 17.7192, + "step": 731 + }, + { + "epoch": 0.013380371798855721, + "grad_norm": 9.62516363738731, + "learning_rate": 4.453922315308454e-06, + "loss": 18.5106, + "step": 732 + }, + { + "epoch": 0.013398650995302247, + "grad_norm": 9.710791197347461, + "learning_rate": 4.460015232292461e-06, + "loss": 18.3155, + "step": 733 + }, + { + "epoch": 0.01341693019174877, + "grad_norm": 7.088619072988261, + "learning_rate": 4.466108149276467e-06, + "loss": 17.5585, + "step": 734 + }, + { + "epoch": 0.013435209388195295, + "grad_norm": 8.813905602728473, + "learning_rate": 4.472201066260472e-06, + "loss": 18.3018, + "step": 735 + }, + { + "epoch": 0.013453488584641819, + "grad_norm": 7.7363833694697375, + "learning_rate": 4.478293983244479e-06, + "loss": 17.8962, + "step": 736 + }, + { + "epoch": 0.013471767781088344, + "grad_norm": 8.71227916566431, + "learning_rate": 4.484386900228485e-06, + "loss": 18.3363, + "step": 737 + }, + { + "epoch": 0.013490046977534868, + "grad_norm": 7.840887611839472, + "learning_rate": 4.4904798172124905e-06, + "loss": 18.0087, + "step": 738 + }, + { + "epoch": 0.013508326173981391, + "grad_norm": 8.133505317881973, + "learning_rate": 4.496572734196497e-06, + "loss": 18.1477, + "step": 739 + }, + { + "epoch": 0.013526605370427916, + "grad_norm": 7.991598762492575, + "learning_rate": 4.502665651180503e-06, + "loss": 18.1431, + "step": 740 + }, + { + "epoch": 0.01354488456687444, + "grad_norm": 8.610191757930943, + "learning_rate": 4.5087585681645095e-06, + "loss": 18.3059, + "step": 741 + }, + { + "epoch": 0.013563163763320965, + "grad_norm": 7.179232227677593, + "learning_rate": 4.514851485148515e-06, + "loss": 17.4715, + "step": 742 + }, + { + "epoch": 0.013581442959767489, + "grad_norm": 8.35948954945627, + "learning_rate": 4.520944402132521e-06, + "loss": 18.2916, + "step": 743 + }, + { + "epoch": 0.013599722156214012, + "grad_norm": 8.46206693768903, + "learning_rate": 4.5270373191165276e-06, + "loss": 17.9517, + "step": 744 + }, + { + "epoch": 0.013618001352660537, + "grad_norm": 10.010037950216763, + "learning_rate": 4.533130236100533e-06, + "loss": 19.1337, + "step": 745 + }, + { + "epoch": 0.01363628054910706, + "grad_norm": 10.033585816329204, + "learning_rate": 4.53922315308454e-06, + "loss": 18.9602, + "step": 746 + }, + { + "epoch": 0.013654559745553586, + "grad_norm": 11.821488713055107, + "learning_rate": 4.545316070068546e-06, + "loss": 18.67, + "step": 747 + }, + { + "epoch": 0.01367283894200011, + "grad_norm": 8.7428412408898, + "learning_rate": 4.5514089870525515e-06, + "loss": 18.1375, + "step": 748 + }, + { + "epoch": 0.013691118138446635, + "grad_norm": 8.861577418193692, + "learning_rate": 4.557501904036558e-06, + "loss": 18.4961, + "step": 749 + }, + { + "epoch": 0.013709397334893158, + "grad_norm": 8.00181313724855, + "learning_rate": 4.563594821020564e-06, + "loss": 17.7919, + "step": 750 + }, + { + "epoch": 0.013727676531339682, + "grad_norm": 8.107133488958844, + "learning_rate": 4.56968773800457e-06, + "loss": 17.9478, + "step": 751 + }, + { + "epoch": 0.013745955727786207, + "grad_norm": 8.527100099140132, + "learning_rate": 4.575780654988576e-06, + "loss": 18.3121, + "step": 752 + }, + { + "epoch": 0.01376423492423273, + "grad_norm": 9.229603260412576, + "learning_rate": 4.581873571972582e-06, + "loss": 18.3729, + "step": 753 + }, + { + "epoch": 0.013782514120679256, + "grad_norm": 7.851968233333457, + "learning_rate": 4.5879664889565885e-06, + "loss": 17.7429, + "step": 754 + }, + { + "epoch": 0.01380079331712578, + "grad_norm": 8.500953189255293, + "learning_rate": 4.594059405940594e-06, + "loss": 18.412, + "step": 755 + }, + { + "epoch": 0.013819072513572303, + "grad_norm": 8.261006107947916, + "learning_rate": 4.6001523229246e-06, + "loss": 18.09, + "step": 756 + }, + { + "epoch": 0.013837351710018828, + "grad_norm": 8.336345469458829, + "learning_rate": 4.606245239908607e-06, + "loss": 18.2866, + "step": 757 + }, + { + "epoch": 0.013855630906465351, + "grad_norm": 9.247618791384095, + "learning_rate": 4.612338156892613e-06, + "loss": 18.4629, + "step": 758 + }, + { + "epoch": 0.013873910102911877, + "grad_norm": 7.692097753420125, + "learning_rate": 4.618431073876618e-06, + "loss": 18.0023, + "step": 759 + }, + { + "epoch": 0.0138921892993584, + "grad_norm": 8.185703810583892, + "learning_rate": 4.624523990860625e-06, + "loss": 17.9522, + "step": 760 + }, + { + "epoch": 0.013910468495804924, + "grad_norm": 9.73059709231243, + "learning_rate": 4.630616907844631e-06, + "loss": 18.8149, + "step": 761 + }, + { + "epoch": 0.013928747692251449, + "grad_norm": 8.807976975651538, + "learning_rate": 4.636709824828637e-06, + "loss": 18.2019, + "step": 762 + }, + { + "epoch": 0.013947026888697972, + "grad_norm": 8.807756941411888, + "learning_rate": 4.642802741812643e-06, + "loss": 18.582, + "step": 763 + }, + { + "epoch": 0.013965306085144498, + "grad_norm": 7.734928300785946, + "learning_rate": 4.6488956587966495e-06, + "loss": 18.0705, + "step": 764 + }, + { + "epoch": 0.013983585281591021, + "grad_norm": 9.32532334645556, + "learning_rate": 4.654988575780655e-06, + "loss": 18.4407, + "step": 765 + }, + { + "epoch": 0.014001864478037546, + "grad_norm": 7.226946936382555, + "learning_rate": 4.661081492764661e-06, + "loss": 17.8439, + "step": 766 + }, + { + "epoch": 0.01402014367448407, + "grad_norm": 7.562857315784674, + "learning_rate": 4.667174409748668e-06, + "loss": 17.507, + "step": 767 + }, + { + "epoch": 0.014038422870930593, + "grad_norm": 8.533365880373301, + "learning_rate": 4.673267326732674e-06, + "loss": 18.3735, + "step": 768 + }, + { + "epoch": 0.014056702067377119, + "grad_norm": 8.474421295874395, + "learning_rate": 4.67936024371668e-06, + "loss": 18.3085, + "step": 769 + }, + { + "epoch": 0.014074981263823642, + "grad_norm": 7.930561966593505, + "learning_rate": 4.685453160700686e-06, + "loss": 17.8629, + "step": 770 + }, + { + "epoch": 0.014093260460270167, + "grad_norm": 8.364342894711397, + "learning_rate": 4.691546077684692e-06, + "loss": 17.9165, + "step": 771 + }, + { + "epoch": 0.01411153965671669, + "grad_norm": 8.644803044948478, + "learning_rate": 4.697638994668698e-06, + "loss": 18.3113, + "step": 772 + }, + { + "epoch": 0.014129818853163214, + "grad_norm": 7.0042353905597246, + "learning_rate": 4.703731911652704e-06, + "loss": 17.5256, + "step": 773 + }, + { + "epoch": 0.01414809804960974, + "grad_norm": 7.770288505567848, + "learning_rate": 4.7098248286367105e-06, + "loss": 17.6203, + "step": 774 + }, + { + "epoch": 0.014166377246056263, + "grad_norm": 8.114082084007649, + "learning_rate": 4.715917745620716e-06, + "loss": 17.8323, + "step": 775 + }, + { + "epoch": 0.014184656442502788, + "grad_norm": 8.107987396579302, + "learning_rate": 4.722010662604722e-06, + "loss": 18.0544, + "step": 776 + }, + { + "epoch": 0.014202935638949312, + "grad_norm": 9.195067350381473, + "learning_rate": 4.728103579588729e-06, + "loss": 18.643, + "step": 777 + }, + { + "epoch": 0.014221214835395835, + "grad_norm": 8.057056508611788, + "learning_rate": 4.734196496572734e-06, + "loss": 17.9456, + "step": 778 + }, + { + "epoch": 0.01423949403184236, + "grad_norm": 8.593365449774803, + "learning_rate": 4.740289413556741e-06, + "loss": 18.4926, + "step": 779 + }, + { + "epoch": 0.014257773228288884, + "grad_norm": 8.77392213344187, + "learning_rate": 4.746382330540747e-06, + "loss": 18.4204, + "step": 780 + }, + { + "epoch": 0.01427605242473541, + "grad_norm": 8.686221466198258, + "learning_rate": 4.7524752475247525e-06, + "loss": 17.8882, + "step": 781 + }, + { + "epoch": 0.014294331621181933, + "grad_norm": 7.543089350198794, + "learning_rate": 4.758568164508759e-06, + "loss": 17.8673, + "step": 782 + }, + { + "epoch": 0.014312610817628458, + "grad_norm": 7.72067947204325, + "learning_rate": 4.764661081492765e-06, + "loss": 18.07, + "step": 783 + }, + { + "epoch": 0.014330890014074981, + "grad_norm": 8.14505655255484, + "learning_rate": 4.770753998476771e-06, + "loss": 18.2828, + "step": 784 + }, + { + "epoch": 0.014349169210521505, + "grad_norm": 13.760586711568584, + "learning_rate": 4.776846915460777e-06, + "loss": 17.8392, + "step": 785 + }, + { + "epoch": 0.01436744840696803, + "grad_norm": 8.131829236787254, + "learning_rate": 4.782939832444784e-06, + "loss": 18.3155, + "step": 786 + }, + { + "epoch": 0.014385727603414554, + "grad_norm": 8.479480779011643, + "learning_rate": 4.789032749428789e-06, + "loss": 18.1185, + "step": 787 + }, + { + "epoch": 0.014404006799861079, + "grad_norm": 8.001242895150341, + "learning_rate": 4.795125666412795e-06, + "loss": 17.9873, + "step": 788 + }, + { + "epoch": 0.014422285996307602, + "grad_norm": 7.5503732470374985, + "learning_rate": 4.801218583396802e-06, + "loss": 17.8135, + "step": 789 + }, + { + "epoch": 0.014440565192754126, + "grad_norm": 9.701365399956416, + "learning_rate": 4.807311500380808e-06, + "loss": 18.2403, + "step": 790 + }, + { + "epoch": 0.014458844389200651, + "grad_norm": 9.540336103150405, + "learning_rate": 4.8134044173648135e-06, + "loss": 18.6102, + "step": 791 + }, + { + "epoch": 0.014477123585647175, + "grad_norm": 9.501216323824327, + "learning_rate": 4.81949733434882e-06, + "loss": 18.5699, + "step": 792 + }, + { + "epoch": 0.0144954027820937, + "grad_norm": 9.226566125082897, + "learning_rate": 4.825590251332826e-06, + "loss": 18.44, + "step": 793 + }, + { + "epoch": 0.014513681978540223, + "grad_norm": 8.286721370460413, + "learning_rate": 4.831683168316832e-06, + "loss": 18.0559, + "step": 794 + }, + { + "epoch": 0.014531961174986747, + "grad_norm": 9.034488978203306, + "learning_rate": 4.837776085300838e-06, + "loss": 18.4985, + "step": 795 + }, + { + "epoch": 0.014550240371433272, + "grad_norm": 8.494852627339926, + "learning_rate": 4.843869002284845e-06, + "loss": 18.5201, + "step": 796 + }, + { + "epoch": 0.014568519567879796, + "grad_norm": 7.87204466443327, + "learning_rate": 4.8499619192688506e-06, + "loss": 17.3842, + "step": 797 + }, + { + "epoch": 0.01458679876432632, + "grad_norm": 6.817930845924937, + "learning_rate": 4.856054836252856e-06, + "loss": 17.6466, + "step": 798 + }, + { + "epoch": 0.014605077960772844, + "grad_norm": 8.6349003420488, + "learning_rate": 4.862147753236863e-06, + "loss": 18.4442, + "step": 799 + }, + { + "epoch": 0.01462335715721937, + "grad_norm": 9.243550819410242, + "learning_rate": 4.868240670220869e-06, + "loss": 18.442, + "step": 800 + }, + { + "epoch": 0.014641636353665893, + "grad_norm": 9.380566812642247, + "learning_rate": 4.8743335872048744e-06, + "loss": 18.4791, + "step": 801 + }, + { + "epoch": 0.014659915550112416, + "grad_norm": 8.10614450572759, + "learning_rate": 4.880426504188881e-06, + "loss": 18.5559, + "step": 802 + }, + { + "epoch": 0.014678194746558942, + "grad_norm": 8.647298812820837, + "learning_rate": 4.886519421172887e-06, + "loss": 18.3998, + "step": 803 + }, + { + "epoch": 0.014696473943005465, + "grad_norm": 7.246022705060174, + "learning_rate": 4.8926123381568926e-06, + "loss": 17.6121, + "step": 804 + }, + { + "epoch": 0.01471475313945199, + "grad_norm": 7.971771019689157, + "learning_rate": 4.898705255140899e-06, + "loss": 17.9529, + "step": 805 + }, + { + "epoch": 0.014733032335898514, + "grad_norm": 8.063907379592704, + "learning_rate": 4.904798172124905e-06, + "loss": 18.2078, + "step": 806 + }, + { + "epoch": 0.014751311532345037, + "grad_norm": 9.569172498445036, + "learning_rate": 4.9108910891089115e-06, + "loss": 18.7412, + "step": 807 + }, + { + "epoch": 0.014769590728791563, + "grad_norm": 8.647993281620685, + "learning_rate": 4.916984006092917e-06, + "loss": 18.1838, + "step": 808 + }, + { + "epoch": 0.014787869925238086, + "grad_norm": 8.090149228825357, + "learning_rate": 4.923076923076924e-06, + "loss": 18.025, + "step": 809 + }, + { + "epoch": 0.014806149121684611, + "grad_norm": 7.454200251754739, + "learning_rate": 4.92916984006093e-06, + "loss": 17.6568, + "step": 810 + }, + { + "epoch": 0.014824428318131135, + "grad_norm": 7.943819236198461, + "learning_rate": 4.935262757044935e-06, + "loss": 17.8122, + "step": 811 + }, + { + "epoch": 0.014842707514577658, + "grad_norm": 7.699759087568516, + "learning_rate": 4.941355674028942e-06, + "loss": 17.7524, + "step": 812 + }, + { + "epoch": 0.014860986711024184, + "grad_norm": 8.368089050350493, + "learning_rate": 4.947448591012948e-06, + "loss": 18.1158, + "step": 813 + }, + { + "epoch": 0.014879265907470707, + "grad_norm": 7.805938306042278, + "learning_rate": 4.953541507996954e-06, + "loss": 18.0641, + "step": 814 + }, + { + "epoch": 0.014897545103917232, + "grad_norm": 7.943826091683709, + "learning_rate": 4.95963442498096e-06, + "loss": 17.8384, + "step": 815 + }, + { + "epoch": 0.014915824300363756, + "grad_norm": 7.757634044377665, + "learning_rate": 4.965727341964966e-06, + "loss": 17.9461, + "step": 816 + }, + { + "epoch": 0.014934103496810281, + "grad_norm": 9.9770776203452, + "learning_rate": 4.9718202589489725e-06, + "loss": 18.1553, + "step": 817 + }, + { + "epoch": 0.014952382693256805, + "grad_norm": 8.503532888039603, + "learning_rate": 4.977913175932978e-06, + "loss": 17.8678, + "step": 818 + }, + { + "epoch": 0.014970661889703328, + "grad_norm": 8.106962674768528, + "learning_rate": 4.984006092916984e-06, + "loss": 17.8451, + "step": 819 + }, + { + "epoch": 0.014988941086149853, + "grad_norm": 8.833702556308314, + "learning_rate": 4.990099009900991e-06, + "loss": 17.9596, + "step": 820 + }, + { + "epoch": 0.015007220282596377, + "grad_norm": 7.937916712032483, + "learning_rate": 4.996191926884996e-06, + "loss": 17.9292, + "step": 821 + }, + { + "epoch": 0.015025499479042902, + "grad_norm": 7.462546508111826, + "learning_rate": 5.002284843869003e-06, + "loss": 17.8169, + "step": 822 + }, + { + "epoch": 0.015043778675489426, + "grad_norm": 9.39886461087385, + "learning_rate": 5.00837776085301e-06, + "loss": 18.4869, + "step": 823 + }, + { + "epoch": 0.015062057871935949, + "grad_norm": 8.050725336094882, + "learning_rate": 5.0144706778370145e-06, + "loss": 17.8235, + "step": 824 + }, + { + "epoch": 0.015080337068382474, + "grad_norm": 9.269730982093947, + "learning_rate": 5.020563594821021e-06, + "loss": 18.3107, + "step": 825 + }, + { + "epoch": 0.015098616264828998, + "grad_norm": 6.197477814797283, + "learning_rate": 5.026656511805027e-06, + "loss": 17.2913, + "step": 826 + }, + { + "epoch": 0.015116895461275523, + "grad_norm": 8.072231109131868, + "learning_rate": 5.0327494287890335e-06, + "loss": 18.1054, + "step": 827 + }, + { + "epoch": 0.015135174657722046, + "grad_norm": 7.95630608538138, + "learning_rate": 5.038842345773039e-06, + "loss": 18.0839, + "step": 828 + }, + { + "epoch": 0.01515345385416857, + "grad_norm": 7.566277226086446, + "learning_rate": 5.044935262757046e-06, + "loss": 18.2472, + "step": 829 + }, + { + "epoch": 0.015171733050615095, + "grad_norm": 8.616470788304431, + "learning_rate": 5.051028179741051e-06, + "loss": 18.4674, + "step": 830 + }, + { + "epoch": 0.015190012247061619, + "grad_norm": 7.0757863279990865, + "learning_rate": 5.057121096725057e-06, + "loss": 17.5577, + "step": 831 + }, + { + "epoch": 0.015208291443508144, + "grad_norm": 8.079457005193985, + "learning_rate": 5.063214013709063e-06, + "loss": 18.3606, + "step": 832 + }, + { + "epoch": 0.015226570639954667, + "grad_norm": 8.778226755851602, + "learning_rate": 5.06930693069307e-06, + "loss": 18.2777, + "step": 833 + }, + { + "epoch": 0.015244849836401193, + "grad_norm": 8.512983358399698, + "learning_rate": 5.075399847677076e-06, + "loss": 18.5117, + "step": 834 + }, + { + "epoch": 0.015263129032847716, + "grad_norm": 8.56006329627807, + "learning_rate": 5.081492764661082e-06, + "loss": 18.4841, + "step": 835 + }, + { + "epoch": 0.01528140822929424, + "grad_norm": 9.288836121000003, + "learning_rate": 5.087585681645088e-06, + "loss": 18.3186, + "step": 836 + }, + { + "epoch": 0.015299687425740765, + "grad_norm": 12.407714577797238, + "learning_rate": 5.093678598629094e-06, + "loss": 18.7556, + "step": 837 + }, + { + "epoch": 0.015317966622187288, + "grad_norm": 8.347581071473323, + "learning_rate": 5.0997715156131e-06, + "loss": 18.1685, + "step": 838 + }, + { + "epoch": 0.015336245818633814, + "grad_norm": 8.13250343081008, + "learning_rate": 5.105864432597106e-06, + "loss": 18.0985, + "step": 839 + }, + { + "epoch": 0.015354525015080337, + "grad_norm": 8.810476732287764, + "learning_rate": 5.1119573495811126e-06, + "loss": 18.333, + "step": 840 + }, + { + "epoch": 0.01537280421152686, + "grad_norm": 7.078936610191827, + "learning_rate": 5.118050266565119e-06, + "loss": 17.4763, + "step": 841 + }, + { + "epoch": 0.015391083407973386, + "grad_norm": 7.42314765775181, + "learning_rate": 5.124143183549125e-06, + "loss": 17.6829, + "step": 842 + }, + { + "epoch": 0.01540936260441991, + "grad_norm": 7.98124248088551, + "learning_rate": 5.13023610053313e-06, + "loss": 17.992, + "step": 843 + }, + { + "epoch": 0.015427641800866435, + "grad_norm": 9.077498128006873, + "learning_rate": 5.1363290175171365e-06, + "loss": 18.2794, + "step": 844 + }, + { + "epoch": 0.015445920997312958, + "grad_norm": 8.072782086561258, + "learning_rate": 5.142421934501143e-06, + "loss": 17.945, + "step": 845 + }, + { + "epoch": 0.015464200193759482, + "grad_norm": 9.451917620903867, + "learning_rate": 5.148514851485149e-06, + "loss": 18.9475, + "step": 846 + }, + { + "epoch": 0.015482479390206007, + "grad_norm": 8.796830074489074, + "learning_rate": 5.1546077684691554e-06, + "loss": 18.49, + "step": 847 + }, + { + "epoch": 0.01550075858665253, + "grad_norm": 8.306849648605311, + "learning_rate": 5.160700685453162e-06, + "loss": 18.0991, + "step": 848 + }, + { + "epoch": 0.015519037783099056, + "grad_norm": 7.459114632117044, + "learning_rate": 5.166793602437167e-06, + "loss": 17.8777, + "step": 849 + }, + { + "epoch": 0.015537316979545579, + "grad_norm": 7.5045201012713045, + "learning_rate": 5.172886519421173e-06, + "loss": 17.9472, + "step": 850 + }, + { + "epoch": 0.015555596175992104, + "grad_norm": 8.71227984792957, + "learning_rate": 5.178979436405179e-06, + "loss": 18.0386, + "step": 851 + }, + { + "epoch": 0.015573875372438628, + "grad_norm": 7.765463154922769, + "learning_rate": 5.185072353389186e-06, + "loss": 17.9142, + "step": 852 + }, + { + "epoch": 0.015592154568885151, + "grad_norm": 10.161510898240463, + "learning_rate": 5.191165270373192e-06, + "loss": 18.6878, + "step": 853 + }, + { + "epoch": 0.015610433765331676, + "grad_norm": 9.565597600633472, + "learning_rate": 5.197258187357198e-06, + "loss": 18.3524, + "step": 854 + }, + { + "epoch": 0.0156287129617782, + "grad_norm": 8.17644137642925, + "learning_rate": 5.203351104341203e-06, + "loss": 17.7937, + "step": 855 + }, + { + "epoch": 0.015646992158224723, + "grad_norm": 9.283708677240496, + "learning_rate": 5.20944402132521e-06, + "loss": 18.368, + "step": 856 + }, + { + "epoch": 0.01566527135467125, + "grad_norm": 10.0686745278984, + "learning_rate": 5.2155369383092155e-06, + "loss": 18.7853, + "step": 857 + }, + { + "epoch": 0.015683550551117774, + "grad_norm": 7.431681847854302, + "learning_rate": 5.221629855293222e-06, + "loss": 17.6462, + "step": 858 + }, + { + "epoch": 0.015701829747564296, + "grad_norm": 8.486462263281691, + "learning_rate": 5.227722772277229e-06, + "loss": 18.0921, + "step": 859 + }, + { + "epoch": 0.01572010894401082, + "grad_norm": 7.586138553514978, + "learning_rate": 5.2338156892612345e-06, + "loss": 17.8335, + "step": 860 + }, + { + "epoch": 0.015738388140457346, + "grad_norm": 7.120489311236346, + "learning_rate": 5.239908606245241e-06, + "loss": 17.7785, + "step": 861 + }, + { + "epoch": 0.01575666733690387, + "grad_norm": 7.800959298571683, + "learning_rate": 5.246001523229246e-06, + "loss": 17.9972, + "step": 862 + }, + { + "epoch": 0.015774946533350393, + "grad_norm": 8.62951022292379, + "learning_rate": 5.252094440213253e-06, + "loss": 18.4499, + "step": 863 + }, + { + "epoch": 0.01579322572979692, + "grad_norm": 7.9691727795812195, + "learning_rate": 5.258187357197258e-06, + "loss": 18.3753, + "step": 864 + }, + { + "epoch": 0.015811504926243444, + "grad_norm": 8.388302723530174, + "learning_rate": 5.264280274181265e-06, + "loss": 18.4872, + "step": 865 + }, + { + "epoch": 0.015829784122689965, + "grad_norm": 8.049158898020348, + "learning_rate": 5.270373191165271e-06, + "loss": 17.7163, + "step": 866 + }, + { + "epoch": 0.01584806331913649, + "grad_norm": 9.6038408512579, + "learning_rate": 5.276466108149277e-06, + "loss": 19.0707, + "step": 867 + }, + { + "epoch": 0.015866342515583016, + "grad_norm": 7.382442367842405, + "learning_rate": 5.282559025133282e-06, + "loss": 17.6808, + "step": 868 + }, + { + "epoch": 0.015884621712029538, + "grad_norm": 8.080470041328557, + "learning_rate": 5.288651942117289e-06, + "loss": 18.38, + "step": 869 + }, + { + "epoch": 0.015902900908476063, + "grad_norm": 9.767620188020627, + "learning_rate": 5.2947448591012955e-06, + "loss": 18.9061, + "step": 870 + }, + { + "epoch": 0.015921180104922588, + "grad_norm": 8.31662946648782, + "learning_rate": 5.300837776085301e-06, + "loss": 17.8949, + "step": 871 + }, + { + "epoch": 0.015939459301369113, + "grad_norm": 7.701637780454898, + "learning_rate": 5.306930693069308e-06, + "loss": 17.6824, + "step": 872 + }, + { + "epoch": 0.015957738497815635, + "grad_norm": 8.790161934550762, + "learning_rate": 5.313023610053314e-06, + "loss": 18.6216, + "step": 873 + }, + { + "epoch": 0.01597601769426216, + "grad_norm": 7.674755782194827, + "learning_rate": 5.319116527037319e-06, + "loss": 17.5485, + "step": 874 + }, + { + "epoch": 0.015994296890708685, + "grad_norm": 8.867388068015622, + "learning_rate": 5.325209444021325e-06, + "loss": 18.4883, + "step": 875 + }, + { + "epoch": 0.016012576087155207, + "grad_norm": 9.1371586404217, + "learning_rate": 5.331302361005332e-06, + "loss": 18.6562, + "step": 876 + }, + { + "epoch": 0.016030855283601732, + "grad_norm": 8.012671192217049, + "learning_rate": 5.3373952779893375e-06, + "loss": 17.9416, + "step": 877 + }, + { + "epoch": 0.016049134480048258, + "grad_norm": 9.978771087488296, + "learning_rate": 5.343488194973344e-06, + "loss": 18.8468, + "step": 878 + }, + { + "epoch": 0.016067413676494783, + "grad_norm": 9.18322782849521, + "learning_rate": 5.349581111957351e-06, + "loss": 18.4841, + "step": 879 + }, + { + "epoch": 0.016085692872941305, + "grad_norm": 10.14432801291603, + "learning_rate": 5.355674028941356e-06, + "loss": 18.7332, + "step": 880 + }, + { + "epoch": 0.01610397206938783, + "grad_norm": 7.964452973970781, + "learning_rate": 5.361766945925362e-06, + "loss": 18.2144, + "step": 881 + }, + { + "epoch": 0.016122251265834355, + "grad_norm": 7.916189059576979, + "learning_rate": 5.367859862909368e-06, + "loss": 17.9018, + "step": 882 + }, + { + "epoch": 0.016140530462280877, + "grad_norm": 8.399208049518332, + "learning_rate": 5.373952779893375e-06, + "loss": 18.0163, + "step": 883 + }, + { + "epoch": 0.016158809658727402, + "grad_norm": 9.116458026740009, + "learning_rate": 5.38004569687738e-06, + "loss": 18.4248, + "step": 884 + }, + { + "epoch": 0.016177088855173927, + "grad_norm": 8.630937961007142, + "learning_rate": 5.386138613861387e-06, + "loss": 18.3698, + "step": 885 + }, + { + "epoch": 0.01619536805162045, + "grad_norm": 9.413585620699246, + "learning_rate": 5.3922315308453936e-06, + "loss": 18.6116, + "step": 886 + }, + { + "epoch": 0.016213647248066974, + "grad_norm": 7.6368778975300184, + "learning_rate": 5.3983244478293985e-06, + "loss": 17.8458, + "step": 887 + }, + { + "epoch": 0.0162319264445135, + "grad_norm": 7.403055825794283, + "learning_rate": 5.404417364813404e-06, + "loss": 17.747, + "step": 888 + }, + { + "epoch": 0.016250205640960025, + "grad_norm": 7.766207662372749, + "learning_rate": 5.410510281797411e-06, + "loss": 18.1277, + "step": 889 + }, + { + "epoch": 0.016268484837406547, + "grad_norm": 8.930033722117813, + "learning_rate": 5.4166031987814174e-06, + "loss": 18.3946, + "step": 890 + }, + { + "epoch": 0.016286764033853072, + "grad_norm": 8.124914772358064, + "learning_rate": 5.422696115765423e-06, + "loss": 18.2002, + "step": 891 + }, + { + "epoch": 0.016305043230299597, + "grad_norm": 9.296893407946603, + "learning_rate": 5.42878903274943e-06, + "loss": 18.4557, + "step": 892 + }, + { + "epoch": 0.01632332242674612, + "grad_norm": 8.282641846666893, + "learning_rate": 5.434881949733435e-06, + "loss": 18.3414, + "step": 893 + }, + { + "epoch": 0.016341601623192644, + "grad_norm": 7.70249475337819, + "learning_rate": 5.440974866717441e-06, + "loss": 18.0186, + "step": 894 + }, + { + "epoch": 0.01635988081963917, + "grad_norm": 8.136410031229282, + "learning_rate": 5.447067783701447e-06, + "loss": 18.4672, + "step": 895 + }, + { + "epoch": 0.016378160016085695, + "grad_norm": 9.9700678925168, + "learning_rate": 5.453160700685454e-06, + "loss": 18.9682, + "step": 896 + }, + { + "epoch": 0.016396439212532216, + "grad_norm": 7.017176755988746, + "learning_rate": 5.45925361766946e-06, + "loss": 17.8646, + "step": 897 + }, + { + "epoch": 0.01641471840897874, + "grad_norm": 9.103585413979374, + "learning_rate": 5.465346534653466e-06, + "loss": 18.3866, + "step": 898 + }, + { + "epoch": 0.016432997605425267, + "grad_norm": 8.404999518682738, + "learning_rate": 5.471439451637471e-06, + "loss": 18.418, + "step": 899 + }, + { + "epoch": 0.01645127680187179, + "grad_norm": 7.937828994479255, + "learning_rate": 5.4775323686214776e-06, + "loss": 18.3949, + "step": 900 + }, + { + "epoch": 0.016469555998318314, + "grad_norm": 8.51573068887959, + "learning_rate": 5.483625285605484e-06, + "loss": 18.1735, + "step": 901 + }, + { + "epoch": 0.01648783519476484, + "grad_norm": 8.529255174675132, + "learning_rate": 5.48971820258949e-06, + "loss": 18.2995, + "step": 902 + }, + { + "epoch": 0.01650611439121136, + "grad_norm": 7.766746131636291, + "learning_rate": 5.4958111195734965e-06, + "loss": 17.3718, + "step": 903 + }, + { + "epoch": 0.016524393587657886, + "grad_norm": 9.746610904544609, + "learning_rate": 5.501904036557502e-06, + "loss": 18.1831, + "step": 904 + }, + { + "epoch": 0.01654267278410441, + "grad_norm": 8.476263194389706, + "learning_rate": 5.507996953541509e-06, + "loss": 17.8804, + "step": 905 + }, + { + "epoch": 0.016560951980550936, + "grad_norm": 7.748230606111784, + "learning_rate": 5.514089870525514e-06, + "loss": 17.8924, + "step": 906 + }, + { + "epoch": 0.016579231176997458, + "grad_norm": 7.432102607667173, + "learning_rate": 5.52018278750952e-06, + "loss": 18.1648, + "step": 907 + }, + { + "epoch": 0.016597510373443983, + "grad_norm": 7.84694618480964, + "learning_rate": 5.526275704493527e-06, + "loss": 17.9913, + "step": 908 + }, + { + "epoch": 0.01661578956989051, + "grad_norm": 7.404929998940032, + "learning_rate": 5.532368621477533e-06, + "loss": 17.8132, + "step": 909 + }, + { + "epoch": 0.01663406876633703, + "grad_norm": 9.29751618630069, + "learning_rate": 5.538461538461539e-06, + "loss": 18.5345, + "step": 910 + }, + { + "epoch": 0.016652347962783556, + "grad_norm": 7.417833178895084, + "learning_rate": 5.544554455445545e-06, + "loss": 17.6646, + "step": 911 + }, + { + "epoch": 0.01667062715923008, + "grad_norm": 9.70772100620693, + "learning_rate": 5.550647372429551e-06, + "loss": 18.7744, + "step": 912 + }, + { + "epoch": 0.016688906355676606, + "grad_norm": 9.072232524086857, + "learning_rate": 5.556740289413557e-06, + "loss": 18.3423, + "step": 913 + }, + { + "epoch": 0.016707185552123128, + "grad_norm": 9.07910036742243, + "learning_rate": 5.562833206397563e-06, + "loss": 18.2481, + "step": 914 + }, + { + "epoch": 0.016725464748569653, + "grad_norm": 9.809361373009457, + "learning_rate": 5.56892612338157e-06, + "loss": 18.367, + "step": 915 + }, + { + "epoch": 0.01674374394501618, + "grad_norm": 10.115596121390496, + "learning_rate": 5.575019040365576e-06, + "loss": 18.3545, + "step": 916 + }, + { + "epoch": 0.0167620231414627, + "grad_norm": 6.7708275169011305, + "learning_rate": 5.581111957349582e-06, + "loss": 17.5047, + "step": 917 + }, + { + "epoch": 0.016780302337909225, + "grad_norm": 8.138730437640659, + "learning_rate": 5.587204874333587e-06, + "loss": 18.3553, + "step": 918 + }, + { + "epoch": 0.01679858153435575, + "grad_norm": 9.609285718167515, + "learning_rate": 5.593297791317594e-06, + "loss": 18.3202, + "step": 919 + }, + { + "epoch": 0.016816860730802272, + "grad_norm": 8.384124976958663, + "learning_rate": 5.5993907083015995e-06, + "loss": 18.2082, + "step": 920 + }, + { + "epoch": 0.016835139927248798, + "grad_norm": 9.14439730562192, + "learning_rate": 5.605483625285606e-06, + "loss": 17.9248, + "step": 921 + }, + { + "epoch": 0.016853419123695323, + "grad_norm": 8.996609322297227, + "learning_rate": 5.611576542269612e-06, + "loss": 18.3729, + "step": 922 + }, + { + "epoch": 0.016871698320141848, + "grad_norm": 7.315932421270537, + "learning_rate": 5.6176694592536185e-06, + "loss": 17.8854, + "step": 923 + }, + { + "epoch": 0.01688997751658837, + "grad_norm": 8.122807052797084, + "learning_rate": 5.623762376237625e-06, + "loss": 18.3153, + "step": 924 + }, + { + "epoch": 0.016908256713034895, + "grad_norm": 7.902339895701034, + "learning_rate": 5.62985529322163e-06, + "loss": 18.2543, + "step": 925 + }, + { + "epoch": 0.01692653590948142, + "grad_norm": 9.590556579018623, + "learning_rate": 5.635948210205637e-06, + "loss": 19.1718, + "step": 926 + }, + { + "epoch": 0.016944815105927942, + "grad_norm": 8.795053677679899, + "learning_rate": 5.642041127189642e-06, + "loss": 18.236, + "step": 927 + }, + { + "epoch": 0.016963094302374467, + "grad_norm": 8.946710023619962, + "learning_rate": 5.648134044173649e-06, + "loss": 18.3254, + "step": 928 + }, + { + "epoch": 0.016981373498820992, + "grad_norm": 7.717632589221404, + "learning_rate": 5.654226961157655e-06, + "loss": 17.883, + "step": 929 + }, + { + "epoch": 0.016999652695267518, + "grad_norm": 8.818060009422503, + "learning_rate": 5.660319878141661e-06, + "loss": 18.7201, + "step": 930 + }, + { + "epoch": 0.01701793189171404, + "grad_norm": 7.428304896972738, + "learning_rate": 5.666412795125666e-06, + "loss": 17.6157, + "step": 931 + }, + { + "epoch": 0.017036211088160565, + "grad_norm": 8.63741944219696, + "learning_rate": 5.672505712109673e-06, + "loss": 18.2596, + "step": 932 + }, + { + "epoch": 0.01705449028460709, + "grad_norm": 8.776657541381901, + "learning_rate": 5.678598629093679e-06, + "loss": 18.8281, + "step": 933 + }, + { + "epoch": 0.01707276948105361, + "grad_norm": 8.657479987630891, + "learning_rate": 5.684691546077685e-06, + "loss": 17.9952, + "step": 934 + }, + { + "epoch": 0.017091048677500137, + "grad_norm": 8.165623263296865, + "learning_rate": 5.690784463061692e-06, + "loss": 18.0903, + "step": 935 + }, + { + "epoch": 0.017109327873946662, + "grad_norm": 7.746609654316087, + "learning_rate": 5.6968773800456976e-06, + "loss": 17.9119, + "step": 936 + }, + { + "epoch": 0.017127607070393184, + "grad_norm": 6.5051760343841245, + "learning_rate": 5.702970297029703e-06, + "loss": 17.2384, + "step": 937 + }, + { + "epoch": 0.01714588626683971, + "grad_norm": 10.149156592353432, + "learning_rate": 5.709063214013709e-06, + "loss": 18.8406, + "step": 938 + }, + { + "epoch": 0.017164165463286234, + "grad_norm": 7.539662391577778, + "learning_rate": 5.715156130997716e-06, + "loss": 17.7378, + "step": 939 + }, + { + "epoch": 0.01718244465973276, + "grad_norm": 7.8752178884870565, + "learning_rate": 5.7212490479817215e-06, + "loss": 17.9224, + "step": 940 + }, + { + "epoch": 0.01720072385617928, + "grad_norm": 8.16065607456676, + "learning_rate": 5.727341964965728e-06, + "loss": 18.0614, + "step": 941 + }, + { + "epoch": 0.017219003052625807, + "grad_norm": 9.572267813617717, + "learning_rate": 5.733434881949735e-06, + "loss": 18.6002, + "step": 942 + }, + { + "epoch": 0.017237282249072332, + "grad_norm": 8.646762787246, + "learning_rate": 5.73952779893374e-06, + "loss": 17.9923, + "step": 943 + }, + { + "epoch": 0.017255561445518854, + "grad_norm": 7.53253733262006, + "learning_rate": 5.745620715917745e-06, + "loss": 17.9094, + "step": 944 + }, + { + "epoch": 0.01727384064196538, + "grad_norm": 7.52707356259082, + "learning_rate": 5.751713632901752e-06, + "loss": 17.6118, + "step": 945 + }, + { + "epoch": 0.017292119838411904, + "grad_norm": 8.480907437056311, + "learning_rate": 5.7578065498857585e-06, + "loss": 17.9889, + "step": 946 + }, + { + "epoch": 0.01731039903485843, + "grad_norm": 7.035617828510175, + "learning_rate": 5.763899466869764e-06, + "loss": 17.6165, + "step": 947 + }, + { + "epoch": 0.01732867823130495, + "grad_norm": 8.0693542132295, + "learning_rate": 5.769992383853771e-06, + "loss": 18.1385, + "step": 948 + }, + { + "epoch": 0.017346957427751476, + "grad_norm": 9.662756503100724, + "learning_rate": 5.776085300837777e-06, + "loss": 19.1192, + "step": 949 + }, + { + "epoch": 0.017365236624198, + "grad_norm": 7.757126477800354, + "learning_rate": 5.7821782178217824e-06, + "loss": 17.9017, + "step": 950 + }, + { + "epoch": 0.017383515820644523, + "grad_norm": 9.677005352029196, + "learning_rate": 5.788271134805788e-06, + "loss": 18.4819, + "step": 951 + }, + { + "epoch": 0.01740179501709105, + "grad_norm": 7.30926952903755, + "learning_rate": 5.794364051789795e-06, + "loss": 17.8772, + "step": 952 + }, + { + "epoch": 0.017420074213537574, + "grad_norm": 8.325157678978579, + "learning_rate": 5.800456968773801e-06, + "loss": 18.1306, + "step": 953 + }, + { + "epoch": 0.017438353409984095, + "grad_norm": 7.731164288521678, + "learning_rate": 5.806549885757807e-06, + "loss": 17.9777, + "step": 954 + }, + { + "epoch": 0.01745663260643062, + "grad_norm": 7.6792569282850485, + "learning_rate": 5.812642802741814e-06, + "loss": 18.0973, + "step": 955 + }, + { + "epoch": 0.017474911802877146, + "grad_norm": 8.399265872828295, + "learning_rate": 5.818735719725819e-06, + "loss": 18.2354, + "step": 956 + }, + { + "epoch": 0.01749319099932367, + "grad_norm": 8.10916575336011, + "learning_rate": 5.824828636709825e-06, + "loss": 17.878, + "step": 957 + }, + { + "epoch": 0.017511470195770193, + "grad_norm": 7.027505730094863, + "learning_rate": 5.830921553693831e-06, + "loss": 17.8894, + "step": 958 + }, + { + "epoch": 0.017529749392216718, + "grad_norm": 7.652531931486096, + "learning_rate": 5.837014470677838e-06, + "loss": 18.224, + "step": 959 + }, + { + "epoch": 0.017548028588663243, + "grad_norm": 10.327063870382897, + "learning_rate": 5.843107387661843e-06, + "loss": 17.9355, + "step": 960 + }, + { + "epoch": 0.017566307785109765, + "grad_norm": 8.613566541365316, + "learning_rate": 5.84920030464585e-06, + "loss": 18.0343, + "step": 961 + }, + { + "epoch": 0.01758458698155629, + "grad_norm": 8.57057882780188, + "learning_rate": 5.855293221629855e-06, + "loss": 18.196, + "step": 962 + }, + { + "epoch": 0.017602866178002816, + "grad_norm": 7.938972752095507, + "learning_rate": 5.8613861386138615e-06, + "loss": 17.6136, + "step": 963 + }, + { + "epoch": 0.01762114537444934, + "grad_norm": 7.478286976202789, + "learning_rate": 5.867479055597868e-06, + "loss": 17.9303, + "step": 964 + }, + { + "epoch": 0.017639424570895863, + "grad_norm": 8.883723962903053, + "learning_rate": 5.873571972581874e-06, + "loss": 18.2692, + "step": 965 + }, + { + "epoch": 0.017657703767342388, + "grad_norm": 8.651125461568094, + "learning_rate": 5.8796648895658805e-06, + "loss": 18.1206, + "step": 966 + }, + { + "epoch": 0.017675982963788913, + "grad_norm": 11.154753319717942, + "learning_rate": 5.885757806549886e-06, + "loss": 19.2872, + "step": 967 + }, + { + "epoch": 0.017694262160235435, + "grad_norm": 10.085538888665493, + "learning_rate": 5.891850723533893e-06, + "loss": 18.393, + "step": 968 + }, + { + "epoch": 0.01771254135668196, + "grad_norm": 7.843160528454247, + "learning_rate": 5.897943640517898e-06, + "loss": 18.0221, + "step": 969 + }, + { + "epoch": 0.017730820553128485, + "grad_norm": 7.979372852978716, + "learning_rate": 5.904036557501904e-06, + "loss": 17.9246, + "step": 970 + }, + { + "epoch": 0.017749099749575007, + "grad_norm": 9.453295281409622, + "learning_rate": 5.910129474485911e-06, + "loss": 18.7617, + "step": 971 + }, + { + "epoch": 0.017767378946021532, + "grad_norm": 8.043964875887363, + "learning_rate": 5.916222391469917e-06, + "loss": 17.8481, + "step": 972 + }, + { + "epoch": 0.017785658142468058, + "grad_norm": 8.844986788228319, + "learning_rate": 5.922315308453923e-06, + "loss": 18.457, + "step": 973 + }, + { + "epoch": 0.017803937338914583, + "grad_norm": 9.057668393721022, + "learning_rate": 5.928408225437929e-06, + "loss": 18.1555, + "step": 974 + }, + { + "epoch": 0.017822216535361105, + "grad_norm": 8.104869602718992, + "learning_rate": 5.934501142421935e-06, + "loss": 17.8352, + "step": 975 + }, + { + "epoch": 0.01784049573180763, + "grad_norm": 7.66250929390229, + "learning_rate": 5.940594059405941e-06, + "loss": 17.9034, + "step": 976 + }, + { + "epoch": 0.017858774928254155, + "grad_norm": 9.66899303971445, + "learning_rate": 5.946686976389947e-06, + "loss": 18.6847, + "step": 977 + }, + { + "epoch": 0.017877054124700677, + "grad_norm": 8.201507650990836, + "learning_rate": 5.952779893373953e-06, + "loss": 18.3439, + "step": 978 + }, + { + "epoch": 0.017895333321147202, + "grad_norm": 7.123313056914936, + "learning_rate": 5.95887281035796e-06, + "loss": 17.396, + "step": 979 + }, + { + "epoch": 0.017913612517593727, + "grad_norm": 8.536481554834298, + "learning_rate": 5.964965727341966e-06, + "loss": 18.5038, + "step": 980 + }, + { + "epoch": 0.017931891714040252, + "grad_norm": 6.346971469399508, + "learning_rate": 5.971058644325971e-06, + "loss": 17.1418, + "step": 981 + }, + { + "epoch": 0.017950170910486774, + "grad_norm": 7.619923063560709, + "learning_rate": 5.977151561309978e-06, + "loss": 17.7694, + "step": 982 + }, + { + "epoch": 0.0179684501069333, + "grad_norm": 7.432489736143195, + "learning_rate": 5.9832444782939835e-06, + "loss": 17.6501, + "step": 983 + }, + { + "epoch": 0.017986729303379825, + "grad_norm": 8.525453283530835, + "learning_rate": 5.98933739527799e-06, + "loss": 18.0, + "step": 984 + }, + { + "epoch": 0.018005008499826346, + "grad_norm": 8.319849636997306, + "learning_rate": 5.995430312261996e-06, + "loss": 17.8633, + "step": 985 + }, + { + "epoch": 0.01802328769627287, + "grad_norm": 9.110643654811224, + "learning_rate": 6.0015232292460024e-06, + "loss": 18.3716, + "step": 986 + }, + { + "epoch": 0.018041566892719397, + "grad_norm": 7.473648054911621, + "learning_rate": 6.007616146230009e-06, + "loss": 17.6931, + "step": 987 + }, + { + "epoch": 0.01805984608916592, + "grad_norm": 8.189401317764087, + "learning_rate": 6.013709063214014e-06, + "loss": 17.9509, + "step": 988 + }, + { + "epoch": 0.018078125285612444, + "grad_norm": 8.063947143779858, + "learning_rate": 6.01980198019802e-06, + "loss": 18.3433, + "step": 989 + }, + { + "epoch": 0.01809640448205897, + "grad_norm": 7.397779832323864, + "learning_rate": 6.025894897182026e-06, + "loss": 17.6071, + "step": 990 + }, + { + "epoch": 0.018114683678505494, + "grad_norm": 7.525569710600015, + "learning_rate": 6.031987814166033e-06, + "loss": 17.718, + "step": 991 + }, + { + "epoch": 0.018132962874952016, + "grad_norm": 8.473776849496664, + "learning_rate": 6.038080731150039e-06, + "loss": 18.1782, + "step": 992 + }, + { + "epoch": 0.01815124207139854, + "grad_norm": 8.206811645257254, + "learning_rate": 6.044173648134045e-06, + "loss": 17.9923, + "step": 993 + }, + { + "epoch": 0.018169521267845067, + "grad_norm": 7.4978382746034296, + "learning_rate": 6.05026656511805e-06, + "loss": 18.3228, + "step": 994 + }, + { + "epoch": 0.01818780046429159, + "grad_norm": 8.273357499146584, + "learning_rate": 6.056359482102057e-06, + "loss": 18.4123, + "step": 995 + }, + { + "epoch": 0.018206079660738114, + "grad_norm": 6.892464190887397, + "learning_rate": 6.0624523990860626e-06, + "loss": 17.5935, + "step": 996 + }, + { + "epoch": 0.01822435885718464, + "grad_norm": 7.848376155985841, + "learning_rate": 6.068545316070069e-06, + "loss": 17.9181, + "step": 997 + }, + { + "epoch": 0.018242638053631164, + "grad_norm": 8.05134430330745, + "learning_rate": 6.074638233054076e-06, + "loss": 18.1096, + "step": 998 + }, + { + "epoch": 0.018260917250077686, + "grad_norm": 8.255534490186607, + "learning_rate": 6.0807311500380815e-06, + "loss": 17.9048, + "step": 999 + }, + { + "epoch": 0.01827919644652421, + "grad_norm": 7.252054367954556, + "learning_rate": 6.0868240670220864e-06, + "loss": 17.9062, + "step": 1000 + }, + { + "epoch": 0.018297475642970736, + "grad_norm": 9.036905165252518, + "learning_rate": 6.092916984006093e-06, + "loss": 18.292, + "step": 1001 + }, + { + "epoch": 0.018315754839417258, + "grad_norm": 7.928256324043463, + "learning_rate": 6.0990099009901e-06, + "loss": 17.8227, + "step": 1002 + }, + { + "epoch": 0.018334034035863783, + "grad_norm": 9.459523185652099, + "learning_rate": 6.105102817974105e-06, + "loss": 18.4981, + "step": 1003 + }, + { + "epoch": 0.01835231323231031, + "grad_norm": 9.253943259491349, + "learning_rate": 6.111195734958112e-06, + "loss": 18.2896, + "step": 1004 + }, + { + "epoch": 0.01837059242875683, + "grad_norm": 7.049122820792015, + "learning_rate": 6.117288651942118e-06, + "loss": 17.567, + "step": 1005 + }, + { + "epoch": 0.018388871625203355, + "grad_norm": 7.454126276420805, + "learning_rate": 6.1233815689261235e-06, + "loss": 17.6288, + "step": 1006 + }, + { + "epoch": 0.01840715082164988, + "grad_norm": 7.554500865570359, + "learning_rate": 6.129474485910129e-06, + "loss": 17.5567, + "step": 1007 + }, + { + "epoch": 0.018425430018096406, + "grad_norm": 7.5218192319804595, + "learning_rate": 6.135567402894136e-06, + "loss": 17.662, + "step": 1008 + }, + { + "epoch": 0.018443709214542928, + "grad_norm": 8.684509200583053, + "learning_rate": 6.1416603198781425e-06, + "loss": 18.411, + "step": 1009 + }, + { + "epoch": 0.018461988410989453, + "grad_norm": 8.410385879818696, + "learning_rate": 6.147753236862148e-06, + "loss": 17.9104, + "step": 1010 + }, + { + "epoch": 0.018480267607435978, + "grad_norm": 7.885404566415697, + "learning_rate": 6.153846153846155e-06, + "loss": 18.0883, + "step": 1011 + }, + { + "epoch": 0.0184985468038825, + "grad_norm": 8.439882645200115, + "learning_rate": 6.159939070830161e-06, + "loss": 18.3346, + "step": 1012 + }, + { + "epoch": 0.018516826000329025, + "grad_norm": 8.40871647134056, + "learning_rate": 6.166031987814166e-06, + "loss": 18.1789, + "step": 1013 + }, + { + "epoch": 0.01853510519677555, + "grad_norm": 8.60571070495959, + "learning_rate": 6.172124904798172e-06, + "loss": 18.5632, + "step": 1014 + }, + { + "epoch": 0.018553384393222076, + "grad_norm": 8.243216980529974, + "learning_rate": 6.178217821782179e-06, + "loss": 18.0231, + "step": 1015 + }, + { + "epoch": 0.018571663589668597, + "grad_norm": 7.86378260881206, + "learning_rate": 6.1843107387661845e-06, + "loss": 17.7789, + "step": 1016 + }, + { + "epoch": 0.018589942786115123, + "grad_norm": 8.820936931499855, + "learning_rate": 6.190403655750191e-06, + "loss": 18.6289, + "step": 1017 + }, + { + "epoch": 0.018608221982561648, + "grad_norm": 8.782521896358636, + "learning_rate": 6.196496572734198e-06, + "loss": 18.4418, + "step": 1018 + }, + { + "epoch": 0.01862650117900817, + "grad_norm": 6.899410432972345, + "learning_rate": 6.202589489718203e-06, + "loss": 17.4621, + "step": 1019 + }, + { + "epoch": 0.018644780375454695, + "grad_norm": 8.786703836897136, + "learning_rate": 6.208682406702209e-06, + "loss": 18.611, + "step": 1020 + }, + { + "epoch": 0.01866305957190122, + "grad_norm": 7.575104462004002, + "learning_rate": 6.214775323686215e-06, + "loss": 17.7327, + "step": 1021 + }, + { + "epoch": 0.018681338768347742, + "grad_norm": 7.787769432834653, + "learning_rate": 6.220868240670222e-06, + "loss": 18.0189, + "step": 1022 + }, + { + "epoch": 0.018699617964794267, + "grad_norm": 8.26130050674653, + "learning_rate": 6.226961157654227e-06, + "loss": 18.0139, + "step": 1023 + }, + { + "epoch": 0.018717897161240792, + "grad_norm": 7.317306905270767, + "learning_rate": 6.233054074638234e-06, + "loss": 17.6614, + "step": 1024 + }, + { + "epoch": 0.018736176357687317, + "grad_norm": 7.253990770261824, + "learning_rate": 6.239146991622239e-06, + "loss": 17.7952, + "step": 1025 + }, + { + "epoch": 0.01875445555413384, + "grad_norm": 7.277989169206523, + "learning_rate": 6.2452399086062455e-06, + "loss": 17.9357, + "step": 1026 + }, + { + "epoch": 0.018772734750580364, + "grad_norm": 8.994043472015644, + "learning_rate": 6.251332825590252e-06, + "loss": 18.5219, + "step": 1027 + }, + { + "epoch": 0.01879101394702689, + "grad_norm": 10.04532201853213, + "learning_rate": 6.257425742574258e-06, + "loss": 18.5184, + "step": 1028 + }, + { + "epoch": 0.01880929314347341, + "grad_norm": 8.069337617902569, + "learning_rate": 6.2635186595582645e-06, + "loss": 18.1767, + "step": 1029 + }, + { + "epoch": 0.018827572339919937, + "grad_norm": 8.78105839296198, + "learning_rate": 6.26961157654227e-06, + "loss": 18.4066, + "step": 1030 + }, + { + "epoch": 0.018845851536366462, + "grad_norm": 7.886476272784995, + "learning_rate": 6.275704493526277e-06, + "loss": 17.7988, + "step": 1031 + }, + { + "epoch": 0.018864130732812987, + "grad_norm": 7.714648845995187, + "learning_rate": 6.281797410510282e-06, + "loss": 17.9388, + "step": 1032 + }, + { + "epoch": 0.01888240992925951, + "grad_norm": 9.323420103128699, + "learning_rate": 6.287890327494288e-06, + "loss": 18.2556, + "step": 1033 + }, + { + "epoch": 0.018900689125706034, + "grad_norm": 8.036862821596728, + "learning_rate": 6.293983244478294e-06, + "loss": 17.8791, + "step": 1034 + }, + { + "epoch": 0.01891896832215256, + "grad_norm": 7.757507873745402, + "learning_rate": 6.300076161462301e-06, + "loss": 17.9595, + "step": 1035 + }, + { + "epoch": 0.01893724751859908, + "grad_norm": 7.829337929170519, + "learning_rate": 6.306169078446307e-06, + "loss": 17.8935, + "step": 1036 + }, + { + "epoch": 0.018955526715045606, + "grad_norm": 7.992430925493884, + "learning_rate": 6.312261995430313e-06, + "loss": 17.9734, + "step": 1037 + }, + { + "epoch": 0.01897380591149213, + "grad_norm": 7.085494698683286, + "learning_rate": 6.318354912414319e-06, + "loss": 17.7806, + "step": 1038 + }, + { + "epoch": 0.018992085107938653, + "grad_norm": 7.997564969386241, + "learning_rate": 6.324447829398325e-06, + "loss": 18.1079, + "step": 1039 + }, + { + "epoch": 0.01901036430438518, + "grad_norm": 7.5883916646067755, + "learning_rate": 6.330540746382331e-06, + "loss": 17.9337, + "step": 1040 + }, + { + "epoch": 0.019028643500831704, + "grad_norm": 7.976100637615995, + "learning_rate": 6.336633663366337e-06, + "loss": 17.9246, + "step": 1041 + }, + { + "epoch": 0.01904692269727823, + "grad_norm": 8.071392426167597, + "learning_rate": 6.3427265803503435e-06, + "loss": 18.0945, + "step": 1042 + }, + { + "epoch": 0.01906520189372475, + "grad_norm": 9.530560059122761, + "learning_rate": 6.34881949733435e-06, + "loss": 18.1668, + "step": 1043 + }, + { + "epoch": 0.019083481090171276, + "grad_norm": 8.56340449411634, + "learning_rate": 6.354912414318355e-06, + "loss": 18.4313, + "step": 1044 + }, + { + "epoch": 0.0191017602866178, + "grad_norm": 8.121857464759803, + "learning_rate": 6.361005331302361e-06, + "loss": 17.8877, + "step": 1045 + }, + { + "epoch": 0.019120039483064323, + "grad_norm": 8.417793702742772, + "learning_rate": 6.3670982482863674e-06, + "loss": 18.3695, + "step": 1046 + }, + { + "epoch": 0.01913831867951085, + "grad_norm": 7.6983153438617835, + "learning_rate": 6.373191165270374e-06, + "loss": 18.035, + "step": 1047 + }, + { + "epoch": 0.019156597875957374, + "grad_norm": 8.893385558769452, + "learning_rate": 6.37928408225438e-06, + "loss": 18.3983, + "step": 1048 + }, + { + "epoch": 0.0191748770724039, + "grad_norm": 10.604928656082148, + "learning_rate": 6.385376999238386e-06, + "loss": 19.0163, + "step": 1049 + }, + { + "epoch": 0.01919315626885042, + "grad_norm": 8.015052112864332, + "learning_rate": 6.391469916222392e-06, + "loss": 18.0764, + "step": 1050 + }, + { + "epoch": 0.019211435465296946, + "grad_norm": 7.794808472928623, + "learning_rate": 6.397562833206398e-06, + "loss": 18.0088, + "step": 1051 + }, + { + "epoch": 0.01922971466174347, + "grad_norm": 9.257359827663915, + "learning_rate": 6.403655750190404e-06, + "loss": 18.2095, + "step": 1052 + }, + { + "epoch": 0.019247993858189993, + "grad_norm": 9.54466142041045, + "learning_rate": 6.40974866717441e-06, + "loss": 18.9175, + "step": 1053 + }, + { + "epoch": 0.019266273054636518, + "grad_norm": 8.425516813446993, + "learning_rate": 6.415841584158417e-06, + "loss": 18.5695, + "step": 1054 + }, + { + "epoch": 0.019284552251083043, + "grad_norm": 8.821858127632892, + "learning_rate": 6.421934501142423e-06, + "loss": 18.0299, + "step": 1055 + }, + { + "epoch": 0.019302831447529565, + "grad_norm": 8.460954236609926, + "learning_rate": 6.428027418126429e-06, + "loss": 18.195, + "step": 1056 + }, + { + "epoch": 0.01932111064397609, + "grad_norm": 7.721241701220841, + "learning_rate": 6.434120335110434e-06, + "loss": 17.8886, + "step": 1057 + }, + { + "epoch": 0.019339389840422615, + "grad_norm": 8.513571952984433, + "learning_rate": 6.440213252094441e-06, + "loss": 18.3746, + "step": 1058 + }, + { + "epoch": 0.01935766903686914, + "grad_norm": 7.964369243429046, + "learning_rate": 6.4463061690784465e-06, + "loss": 18.0392, + "step": 1059 + }, + { + "epoch": 0.019375948233315662, + "grad_norm": 8.517667077055881, + "learning_rate": 6.452399086062453e-06, + "loss": 18.6715, + "step": 1060 + }, + { + "epoch": 0.019394227429762188, + "grad_norm": 8.02065554970321, + "learning_rate": 6.458492003046459e-06, + "loss": 17.7864, + "step": 1061 + }, + { + "epoch": 0.019412506626208713, + "grad_norm": 8.48795408189634, + "learning_rate": 6.4645849200304655e-06, + "loss": 18.2628, + "step": 1062 + }, + { + "epoch": 0.019430785822655235, + "grad_norm": 8.544082265276332, + "learning_rate": 6.47067783701447e-06, + "loss": 18.0344, + "step": 1063 + }, + { + "epoch": 0.01944906501910176, + "grad_norm": 9.285600661351275, + "learning_rate": 6.476770753998477e-06, + "loss": 18.3072, + "step": 1064 + }, + { + "epoch": 0.019467344215548285, + "grad_norm": 10.555387468574859, + "learning_rate": 6.482863670982484e-06, + "loss": 18.9304, + "step": 1065 + }, + { + "epoch": 0.01948562341199481, + "grad_norm": 7.76696304450934, + "learning_rate": 6.488956587966489e-06, + "loss": 18.0832, + "step": 1066 + }, + { + "epoch": 0.019503902608441332, + "grad_norm": 7.212742111061074, + "learning_rate": 6.495049504950496e-06, + "loss": 17.7941, + "step": 1067 + }, + { + "epoch": 0.019522181804887857, + "grad_norm": 7.273730185764739, + "learning_rate": 6.501142421934502e-06, + "loss": 17.5107, + "step": 1068 + }, + { + "epoch": 0.019540461001334383, + "grad_norm": 8.713701016851418, + "learning_rate": 6.5072353389185075e-06, + "loss": 18.1767, + "step": 1069 + }, + { + "epoch": 0.019558740197780904, + "grad_norm": 7.520575463727811, + "learning_rate": 6.513328255902513e-06, + "loss": 17.9406, + "step": 1070 + }, + { + "epoch": 0.01957701939422743, + "grad_norm": 7.842937496796728, + "learning_rate": 6.51942117288652e-06, + "loss": 18.0233, + "step": 1071 + }, + { + "epoch": 0.019595298590673955, + "grad_norm": 8.061706991842145, + "learning_rate": 6.525514089870526e-06, + "loss": 17.7602, + "step": 1072 + }, + { + "epoch": 0.019613577787120477, + "grad_norm": 7.6223938861675835, + "learning_rate": 6.531607006854532e-06, + "loss": 18.1191, + "step": 1073 + }, + { + "epoch": 0.019631856983567002, + "grad_norm": 8.279127166238007, + "learning_rate": 6.537699923838539e-06, + "loss": 18.3115, + "step": 1074 + }, + { + "epoch": 0.019650136180013527, + "grad_norm": 7.188251509364014, + "learning_rate": 6.543792840822545e-06, + "loss": 17.3865, + "step": 1075 + }, + { + "epoch": 0.019668415376460052, + "grad_norm": 7.216757458949848, + "learning_rate": 6.54988575780655e-06, + "loss": 17.6607, + "step": 1076 + }, + { + "epoch": 0.019686694572906574, + "grad_norm": 8.670515815497264, + "learning_rate": 6.555978674790556e-06, + "loss": 18.1868, + "step": 1077 + }, + { + "epoch": 0.0197049737693531, + "grad_norm": 8.110697301247246, + "learning_rate": 6.562071591774563e-06, + "loss": 18.0288, + "step": 1078 + }, + { + "epoch": 0.019723252965799624, + "grad_norm": 8.809505809170233, + "learning_rate": 6.5681645087585685e-06, + "loss": 18.0671, + "step": 1079 + }, + { + "epoch": 0.019741532162246146, + "grad_norm": 9.605865738512373, + "learning_rate": 6.574257425742575e-06, + "loss": 18.4982, + "step": 1080 + }, + { + "epoch": 0.01975981135869267, + "grad_norm": 7.916135171417083, + "learning_rate": 6.580350342726582e-06, + "loss": 18.1144, + "step": 1081 + }, + { + "epoch": 0.019778090555139197, + "grad_norm": 7.217096870695601, + "learning_rate": 6.586443259710587e-06, + "loss": 18.0143, + "step": 1082 + }, + { + "epoch": 0.019796369751585722, + "grad_norm": 7.679991772771017, + "learning_rate": 6.592536176694593e-06, + "loss": 18.0063, + "step": 1083 + }, + { + "epoch": 0.019814648948032244, + "grad_norm": 8.20226140412346, + "learning_rate": 6.598629093678599e-06, + "loss": 18.4665, + "step": 1084 + }, + { + "epoch": 0.01983292814447877, + "grad_norm": 9.078351119787966, + "learning_rate": 6.6047220106626056e-06, + "loss": 18.7463, + "step": 1085 + }, + { + "epoch": 0.019851207340925294, + "grad_norm": 7.516852386672339, + "learning_rate": 6.610814927646611e-06, + "loss": 17.775, + "step": 1086 + }, + { + "epoch": 0.019869486537371816, + "grad_norm": 8.181787029821258, + "learning_rate": 6.616907844630618e-06, + "loss": 18.472, + "step": 1087 + }, + { + "epoch": 0.01988776573381834, + "grad_norm": 6.560494302351963, + "learning_rate": 6.623000761614623e-06, + "loss": 17.5194, + "step": 1088 + }, + { + "epoch": 0.019906044930264866, + "grad_norm": 7.901692922390689, + "learning_rate": 6.6290936785986294e-06, + "loss": 18.0642, + "step": 1089 + }, + { + "epoch": 0.019924324126711388, + "grad_norm": 8.061066613367577, + "learning_rate": 6.635186595582635e-06, + "loss": 18.0876, + "step": 1090 + }, + { + "epoch": 0.019942603323157913, + "grad_norm": 9.454097103350708, + "learning_rate": 6.641279512566642e-06, + "loss": 18.8881, + "step": 1091 + }, + { + "epoch": 0.01996088251960444, + "grad_norm": 8.756284787713158, + "learning_rate": 6.647372429550648e-06, + "loss": 18.2767, + "step": 1092 + }, + { + "epoch": 0.019979161716050964, + "grad_norm": 7.533720389804396, + "learning_rate": 6.653465346534654e-06, + "loss": 18.2497, + "step": 1093 + }, + { + "epoch": 0.019997440912497486, + "grad_norm": 9.298600283610812, + "learning_rate": 6.659558263518661e-06, + "loss": 18.5542, + "step": 1094 + }, + { + "epoch": 0.02001572010894401, + "grad_norm": 7.740774888213946, + "learning_rate": 6.665651180502666e-06, + "loss": 18.2266, + "step": 1095 + }, + { + "epoch": 0.020033999305390536, + "grad_norm": 8.909679220515974, + "learning_rate": 6.671744097486672e-06, + "loss": 18.6344, + "step": 1096 + }, + { + "epoch": 0.020052278501837058, + "grad_norm": 7.63494248931986, + "learning_rate": 6.677837014470678e-06, + "loss": 17.5885, + "step": 1097 + }, + { + "epoch": 0.020070557698283583, + "grad_norm": 8.920698496042283, + "learning_rate": 6.683929931454685e-06, + "loss": 18.087, + "step": 1098 + }, + { + "epoch": 0.020088836894730108, + "grad_norm": 7.6970425992182765, + "learning_rate": 6.690022848438691e-06, + "loss": 17.8351, + "step": 1099 + }, + { + "epoch": 0.020107116091176633, + "grad_norm": 9.550901562462453, + "learning_rate": 6.696115765422697e-06, + "loss": 18.9141, + "step": 1100 + }, + { + "epoch": 0.020125395287623155, + "grad_norm": 7.903962156781939, + "learning_rate": 6.702208682406702e-06, + "loss": 17.9881, + "step": 1101 + }, + { + "epoch": 0.02014367448406968, + "grad_norm": 9.009575604456296, + "learning_rate": 6.7083015993907085e-06, + "loss": 18.5816, + "step": 1102 + }, + { + "epoch": 0.020161953680516206, + "grad_norm": 8.063362266384035, + "learning_rate": 6.714394516374715e-06, + "loss": 17.9211, + "step": 1103 + }, + { + "epoch": 0.020180232876962727, + "grad_norm": 7.689532167015797, + "learning_rate": 6.720487433358721e-06, + "loss": 17.8806, + "step": 1104 + }, + { + "epoch": 0.020198512073409253, + "grad_norm": 8.370291445199264, + "learning_rate": 6.7265803503427275e-06, + "loss": 18.36, + "step": 1105 + }, + { + "epoch": 0.020216791269855778, + "grad_norm": 7.660706061027755, + "learning_rate": 6.732673267326733e-06, + "loss": 18.0976, + "step": 1106 + }, + { + "epoch": 0.0202350704663023, + "grad_norm": 7.696634048353973, + "learning_rate": 6.738766184310739e-06, + "loss": 17.7352, + "step": 1107 + }, + { + "epoch": 0.020253349662748825, + "grad_norm": 14.379213048417702, + "learning_rate": 6.744859101294745e-06, + "loss": 17.7067, + "step": 1108 + }, + { + "epoch": 0.02027162885919535, + "grad_norm": 8.662425910853875, + "learning_rate": 6.750952018278751e-06, + "loss": 18.3063, + "step": 1109 + }, + { + "epoch": 0.020289908055641875, + "grad_norm": 7.891443885305967, + "learning_rate": 6.757044935262758e-06, + "loss": 17.7426, + "step": 1110 + }, + { + "epoch": 0.020308187252088397, + "grad_norm": 7.881527662289122, + "learning_rate": 6.763137852246764e-06, + "loss": 18.2592, + "step": 1111 + }, + { + "epoch": 0.020326466448534922, + "grad_norm": 6.824244603799724, + "learning_rate": 6.76923076923077e-06, + "loss": 17.5405, + "step": 1112 + }, + { + "epoch": 0.020344745644981448, + "grad_norm": 6.955748008576041, + "learning_rate": 6.775323686214776e-06, + "loss": 17.4328, + "step": 1113 + }, + { + "epoch": 0.02036302484142797, + "grad_norm": 8.531623209320305, + "learning_rate": 6.781416603198782e-06, + "loss": 18.531, + "step": 1114 + }, + { + "epoch": 0.020381304037874495, + "grad_norm": 7.793085602064876, + "learning_rate": 6.787509520182788e-06, + "loss": 17.8731, + "step": 1115 + }, + { + "epoch": 0.02039958323432102, + "grad_norm": 8.83908926012251, + "learning_rate": 6.793602437166794e-06, + "loss": 17.9839, + "step": 1116 + }, + { + "epoch": 0.020417862430767545, + "grad_norm": 9.225053276907715, + "learning_rate": 6.7996953541508e-06, + "loss": 18.1455, + "step": 1117 + }, + { + "epoch": 0.020436141627214067, + "grad_norm": 8.482282192574337, + "learning_rate": 6.805788271134807e-06, + "loss": 18.2174, + "step": 1118 + }, + { + "epoch": 0.020454420823660592, + "grad_norm": 8.932118579503443, + "learning_rate": 6.811881188118813e-06, + "loss": 18.3675, + "step": 1119 + }, + { + "epoch": 0.020472700020107117, + "grad_norm": 7.505436002781391, + "learning_rate": 6.817974105102818e-06, + "loss": 18.0324, + "step": 1120 + }, + { + "epoch": 0.02049097921655364, + "grad_norm": 6.9349703359748585, + "learning_rate": 6.824067022086825e-06, + "loss": 17.7132, + "step": 1121 + }, + { + "epoch": 0.020509258413000164, + "grad_norm": 8.981324443612397, + "learning_rate": 6.8301599390708305e-06, + "loss": 18.2598, + "step": 1122 + }, + { + "epoch": 0.02052753760944669, + "grad_norm": 7.754558487822148, + "learning_rate": 6.836252856054837e-06, + "loss": 18.0601, + "step": 1123 + }, + { + "epoch": 0.02054581680589321, + "grad_norm": 8.370429440767074, + "learning_rate": 6.842345773038843e-06, + "loss": 17.9774, + "step": 1124 + }, + { + "epoch": 0.020564096002339737, + "grad_norm": 8.02602419251651, + "learning_rate": 6.8484386900228495e-06, + "loss": 17.9154, + "step": 1125 + }, + { + "epoch": 0.02058237519878626, + "grad_norm": 7.650949652381696, + "learning_rate": 6.854531607006854e-06, + "loss": 17.8702, + "step": 1126 + }, + { + "epoch": 0.020600654395232787, + "grad_norm": 8.204914255177334, + "learning_rate": 6.860624523990861e-06, + "loss": 18.0647, + "step": 1127 + }, + { + "epoch": 0.02061893359167931, + "grad_norm": 6.855912112886448, + "learning_rate": 6.866717440974867e-06, + "loss": 17.48, + "step": 1128 + }, + { + "epoch": 0.020637212788125834, + "grad_norm": 8.105338657042287, + "learning_rate": 6.872810357958873e-06, + "loss": 18.0055, + "step": 1129 + }, + { + "epoch": 0.02065549198457236, + "grad_norm": 16.77405238000618, + "learning_rate": 6.87890327494288e-06, + "loss": 18.3776, + "step": 1130 + }, + { + "epoch": 0.02067377118101888, + "grad_norm": 6.822106523614206, + "learning_rate": 6.884996191926886e-06, + "loss": 17.4504, + "step": 1131 + }, + { + "epoch": 0.020692050377465406, + "grad_norm": 7.702096099615508, + "learning_rate": 6.8910891089108915e-06, + "loss": 17.686, + "step": 1132 + }, + { + "epoch": 0.02071032957391193, + "grad_norm": 8.577549907942483, + "learning_rate": 6.897182025894897e-06, + "loss": 18.5179, + "step": 1133 + }, + { + "epoch": 0.020728608770358457, + "grad_norm": 77.81058128614599, + "learning_rate": 6.903274942878904e-06, + "loss": 18.0846, + "step": 1134 + }, + { + "epoch": 0.02074688796680498, + "grad_norm": 7.777221959468819, + "learning_rate": 6.90936785986291e-06, + "loss": 17.982, + "step": 1135 + }, + { + "epoch": 0.020765167163251504, + "grad_norm": 8.103358397861477, + "learning_rate": 6.915460776846916e-06, + "loss": 18.0749, + "step": 1136 + }, + { + "epoch": 0.02078344635969803, + "grad_norm": 7.954324518176213, + "learning_rate": 6.921553693830923e-06, + "loss": 17.9536, + "step": 1137 + }, + { + "epoch": 0.02080172555614455, + "grad_norm": 7.646304408043269, + "learning_rate": 6.9276466108149285e-06, + "loss": 17.7323, + "step": 1138 + }, + { + "epoch": 0.020820004752591076, + "grad_norm": 8.764899829664277, + "learning_rate": 6.933739527798934e-06, + "loss": 17.9325, + "step": 1139 + }, + { + "epoch": 0.0208382839490376, + "grad_norm": 8.90169081801138, + "learning_rate": 6.93983244478294e-06, + "loss": 18.2958, + "step": 1140 + }, + { + "epoch": 0.020856563145484123, + "grad_norm": 8.411302997086182, + "learning_rate": 6.945925361766947e-06, + "loss": 18.0948, + "step": 1141 + }, + { + "epoch": 0.020874842341930648, + "grad_norm": 8.049056501337239, + "learning_rate": 6.9520182787509524e-06, + "loss": 18.2222, + "step": 1142 + }, + { + "epoch": 0.020893121538377173, + "grad_norm": 7.323621447312056, + "learning_rate": 6.958111195734959e-06, + "loss": 17.8308, + "step": 1143 + }, + { + "epoch": 0.0209114007348237, + "grad_norm": 8.592023496690768, + "learning_rate": 6.964204112718966e-06, + "loss": 18.1117, + "step": 1144 + }, + { + "epoch": 0.02092967993127022, + "grad_norm": 7.995621972747377, + "learning_rate": 6.9702970297029706e-06, + "loss": 17.7442, + "step": 1145 + }, + { + "epoch": 0.020947959127716746, + "grad_norm": 6.8862191468237945, + "learning_rate": 6.976389946686976e-06, + "loss": 17.4325, + "step": 1146 + }, + { + "epoch": 0.02096623832416327, + "grad_norm": 8.536205187859835, + "learning_rate": 6.982482863670983e-06, + "loss": 18.3987, + "step": 1147 + }, + { + "epoch": 0.020984517520609793, + "grad_norm": 7.969283065900971, + "learning_rate": 6.9885757806549895e-06, + "loss": 18.0284, + "step": 1148 + }, + { + "epoch": 0.021002796717056318, + "grad_norm": 8.109090041444265, + "learning_rate": 6.994668697638995e-06, + "loss": 17.8368, + "step": 1149 + }, + { + "epoch": 0.021021075913502843, + "grad_norm": 8.094394126698537, + "learning_rate": 7.000761614623002e-06, + "loss": 17.8873, + "step": 1150 + }, + { + "epoch": 0.021039355109949368, + "grad_norm": 7.159650139988644, + "learning_rate": 7.006854531607007e-06, + "loss": 17.7425, + "step": 1151 + }, + { + "epoch": 0.02105763430639589, + "grad_norm": 8.30888924591329, + "learning_rate": 7.012947448591013e-06, + "loss": 17.8527, + "step": 1152 + }, + { + "epoch": 0.021075913502842415, + "grad_norm": 8.820744459819323, + "learning_rate": 7.019040365575019e-06, + "loss": 18.3725, + "step": 1153 + }, + { + "epoch": 0.02109419269928894, + "grad_norm": 8.35480812143568, + "learning_rate": 7.025133282559026e-06, + "loss": 18.1813, + "step": 1154 + }, + { + "epoch": 0.021112471895735462, + "grad_norm": 8.036912715992331, + "learning_rate": 7.031226199543032e-06, + "loss": 17.8396, + "step": 1155 + }, + { + "epoch": 0.021130751092181987, + "grad_norm": 7.277613895898985, + "learning_rate": 7.037319116527038e-06, + "loss": 17.5703, + "step": 1156 + }, + { + "epoch": 0.021149030288628513, + "grad_norm": 8.160707335936069, + "learning_rate": 7.043412033511045e-06, + "loss": 17.9347, + "step": 1157 + }, + { + "epoch": 0.021167309485075034, + "grad_norm": 8.324350963433098, + "learning_rate": 7.04950495049505e-06, + "loss": 17.9741, + "step": 1158 + }, + { + "epoch": 0.02118558868152156, + "grad_norm": 8.313499862915545, + "learning_rate": 7.055597867479056e-06, + "loss": 17.9326, + "step": 1159 + }, + { + "epoch": 0.021203867877968085, + "grad_norm": 8.373934244972215, + "learning_rate": 7.061690784463062e-06, + "loss": 18.0119, + "step": 1160 + }, + { + "epoch": 0.02122214707441461, + "grad_norm": 8.734489214418272, + "learning_rate": 7.067783701447069e-06, + "loss": 18.5876, + "step": 1161 + }, + { + "epoch": 0.021240426270861132, + "grad_norm": 7.940739926024548, + "learning_rate": 7.073876618431074e-06, + "loss": 17.9932, + "step": 1162 + }, + { + "epoch": 0.021258705467307657, + "grad_norm": 7.643890938802826, + "learning_rate": 7.079969535415081e-06, + "loss": 17.6314, + "step": 1163 + }, + { + "epoch": 0.021276984663754182, + "grad_norm": 7.541401411005756, + "learning_rate": 7.086062452399086e-06, + "loss": 17.9616, + "step": 1164 + }, + { + "epoch": 0.021295263860200704, + "grad_norm": 9.213679865116323, + "learning_rate": 7.0921553693830925e-06, + "loss": 18.9202, + "step": 1165 + }, + { + "epoch": 0.02131354305664723, + "grad_norm": 8.511693300693606, + "learning_rate": 7.098248286367099e-06, + "loss": 18.196, + "step": 1166 + }, + { + "epoch": 0.021331822253093755, + "grad_norm": 7.7357627986481985, + "learning_rate": 7.104341203351105e-06, + "loss": 18.066, + "step": 1167 + }, + { + "epoch": 0.02135010144954028, + "grad_norm": 8.057760559908589, + "learning_rate": 7.1104341203351115e-06, + "loss": 18.0976, + "step": 1168 + }, + { + "epoch": 0.0213683806459868, + "grad_norm": 8.785475163168408, + "learning_rate": 7.116527037319117e-06, + "loss": 18.1866, + "step": 1169 + }, + { + "epoch": 0.021386659842433327, + "grad_norm": 7.4913324178511225, + "learning_rate": 7.122619954303123e-06, + "loss": 17.7044, + "step": 1170 + }, + { + "epoch": 0.021404939038879852, + "grad_norm": 7.681956106312898, + "learning_rate": 7.128712871287129e-06, + "loss": 17.8684, + "step": 1171 + }, + { + "epoch": 0.021423218235326374, + "grad_norm": 7.4616185648565905, + "learning_rate": 7.134805788271135e-06, + "loss": 17.7606, + "step": 1172 + }, + { + "epoch": 0.0214414974317729, + "grad_norm": 7.119361393828716, + "learning_rate": 7.140898705255141e-06, + "loss": 17.7639, + "step": 1173 + }, + { + "epoch": 0.021459776628219424, + "grad_norm": 9.123788086352594, + "learning_rate": 7.146991622239148e-06, + "loss": 18.2655, + "step": 1174 + }, + { + "epoch": 0.021478055824665946, + "grad_norm": 9.977952496483601, + "learning_rate": 7.153084539223154e-06, + "loss": 18.1025, + "step": 1175 + }, + { + "epoch": 0.02149633502111247, + "grad_norm": 8.010184469210712, + "learning_rate": 7.15917745620716e-06, + "loss": 18.0235, + "step": 1176 + }, + { + "epoch": 0.021514614217558996, + "grad_norm": 7.340378645183165, + "learning_rate": 7.165270373191166e-06, + "loss": 17.7148, + "step": 1177 + }, + { + "epoch": 0.02153289341400552, + "grad_norm": 8.10655777724448, + "learning_rate": 7.171363290175172e-06, + "loss": 18.1131, + "step": 1178 + }, + { + "epoch": 0.021551172610452043, + "grad_norm": 6.828081387443637, + "learning_rate": 7.177456207159178e-06, + "loss": 17.4375, + "step": 1179 + }, + { + "epoch": 0.02156945180689857, + "grad_norm": 8.552928156945542, + "learning_rate": 7.183549124143184e-06, + "loss": 18.1215, + "step": 1180 + }, + { + "epoch": 0.021587731003345094, + "grad_norm": 7.566082042425317, + "learning_rate": 7.1896420411271906e-06, + "loss": 17.7592, + "step": 1181 + }, + { + "epoch": 0.021606010199791616, + "grad_norm": 8.748982056419061, + "learning_rate": 7.195734958111197e-06, + "loss": 18.2236, + "step": 1182 + }, + { + "epoch": 0.02162428939623814, + "grad_norm": 7.85067598085476, + "learning_rate": 7.201827875095202e-06, + "loss": 17.6095, + "step": 1183 + }, + { + "epoch": 0.021642568592684666, + "grad_norm": 7.498161013542759, + "learning_rate": 7.207920792079208e-06, + "loss": 17.9093, + "step": 1184 + }, + { + "epoch": 0.02166084778913119, + "grad_norm": 7.768815471400683, + "learning_rate": 7.2140137090632144e-06, + "loss": 17.9479, + "step": 1185 + }, + { + "epoch": 0.021679126985577713, + "grad_norm": 8.335391937231623, + "learning_rate": 7.220106626047221e-06, + "loss": 18.1221, + "step": 1186 + }, + { + "epoch": 0.02169740618202424, + "grad_norm": 7.984945971046457, + "learning_rate": 7.226199543031227e-06, + "loss": 18.1438, + "step": 1187 + }, + { + "epoch": 0.021715685378470764, + "grad_norm": 9.06770301142076, + "learning_rate": 7.232292460015233e-06, + "loss": 18.6921, + "step": 1188 + }, + { + "epoch": 0.021733964574917285, + "grad_norm": 8.037666318852676, + "learning_rate": 7.238385376999238e-06, + "loss": 18.3687, + "step": 1189 + }, + { + "epoch": 0.02175224377136381, + "grad_norm": 7.792236263999609, + "learning_rate": 7.244478293983245e-06, + "loss": 18.0255, + "step": 1190 + }, + { + "epoch": 0.021770522967810336, + "grad_norm": 8.645863994009908, + "learning_rate": 7.250571210967251e-06, + "loss": 18.3111, + "step": 1191 + }, + { + "epoch": 0.021788802164256858, + "grad_norm": 8.891587503165612, + "learning_rate": 7.256664127951257e-06, + "loss": 18.6329, + "step": 1192 + }, + { + "epoch": 0.021807081360703383, + "grad_norm": 7.7969448972465125, + "learning_rate": 7.262757044935264e-06, + "loss": 17.9866, + "step": 1193 + }, + { + "epoch": 0.021825360557149908, + "grad_norm": 9.036182301496826, + "learning_rate": 7.26884996191927e-06, + "loss": 18.2366, + "step": 1194 + }, + { + "epoch": 0.021843639753596433, + "grad_norm": 7.9134513330621825, + "learning_rate": 7.274942878903275e-06, + "loss": 18.3917, + "step": 1195 + }, + { + "epoch": 0.021861918950042955, + "grad_norm": 10.12767509358225, + "learning_rate": 7.281035795887281e-06, + "loss": 18.3437, + "step": 1196 + }, + { + "epoch": 0.02188019814648948, + "grad_norm": 9.349416412034195, + "learning_rate": 7.287128712871288e-06, + "loss": 18.2855, + "step": 1197 + }, + { + "epoch": 0.021898477342936006, + "grad_norm": 8.686163963468262, + "learning_rate": 7.2932216298552935e-06, + "loss": 18.1963, + "step": 1198 + }, + { + "epoch": 0.021916756539382527, + "grad_norm": 8.215999122820618, + "learning_rate": 7.2993145468393e-06, + "loss": 18.0912, + "step": 1199 + }, + { + "epoch": 0.021935035735829053, + "grad_norm": 8.909752446449565, + "learning_rate": 7.305407463823307e-06, + "loss": 18.4349, + "step": 1200 + }, + { + "epoch": 0.021953314932275578, + "grad_norm": 7.864052924827168, + "learning_rate": 7.3115003808073125e-06, + "loss": 18.1708, + "step": 1201 + }, + { + "epoch": 0.021971594128722103, + "grad_norm": 7.975719874675216, + "learning_rate": 7.317593297791317e-06, + "loss": 17.9927, + "step": 1202 + }, + { + "epoch": 0.021989873325168625, + "grad_norm": 8.06241978310815, + "learning_rate": 7.323686214775324e-06, + "loss": 18.1951, + "step": 1203 + }, + { + "epoch": 0.02200815252161515, + "grad_norm": 8.61763658655989, + "learning_rate": 7.329779131759331e-06, + "loss": 18.4821, + "step": 1204 + }, + { + "epoch": 0.022026431718061675, + "grad_norm": 8.610045656208115, + "learning_rate": 7.335872048743336e-06, + "loss": 18.4585, + "step": 1205 + }, + { + "epoch": 0.022044710914508197, + "grad_norm": 7.1058283019046815, + "learning_rate": 7.341964965727343e-06, + "loss": 17.9557, + "step": 1206 + }, + { + "epoch": 0.022062990110954722, + "grad_norm": 8.852871625404843, + "learning_rate": 7.348057882711349e-06, + "loss": 18.8266, + "step": 1207 + }, + { + "epoch": 0.022081269307401247, + "grad_norm": 8.641928794327619, + "learning_rate": 7.3541507996953545e-06, + "loss": 18.5967, + "step": 1208 + }, + { + "epoch": 0.02209954850384777, + "grad_norm": 8.118616842382611, + "learning_rate": 7.36024371667936e-06, + "loss": 17.9766, + "step": 1209 + }, + { + "epoch": 0.022117827700294294, + "grad_norm": 7.8062309682541615, + "learning_rate": 7.366336633663367e-06, + "loss": 18.2301, + "step": 1210 + }, + { + "epoch": 0.02213610689674082, + "grad_norm": 8.256561305103666, + "learning_rate": 7.3724295506473735e-06, + "loss": 18.2072, + "step": 1211 + }, + { + "epoch": 0.022154386093187345, + "grad_norm": 7.541355836416142, + "learning_rate": 7.378522467631379e-06, + "loss": 17.8086, + "step": 1212 + }, + { + "epoch": 0.022172665289633867, + "grad_norm": 8.373746914388754, + "learning_rate": 7.384615384615386e-06, + "loss": 18.2852, + "step": 1213 + }, + { + "epoch": 0.022190944486080392, + "grad_norm": 6.529008972161452, + "learning_rate": 7.390708301599391e-06, + "loss": 17.2029, + "step": 1214 + }, + { + "epoch": 0.022209223682526917, + "grad_norm": 8.548302012635622, + "learning_rate": 7.396801218583397e-06, + "loss": 18.3498, + "step": 1215 + }, + { + "epoch": 0.02222750287897344, + "grad_norm": 9.300415300136658, + "learning_rate": 7.402894135567403e-06, + "loss": 18.4759, + "step": 1216 + }, + { + "epoch": 0.022245782075419964, + "grad_norm": 7.982304795140464, + "learning_rate": 7.40898705255141e-06, + "loss": 18.0739, + "step": 1217 + }, + { + "epoch": 0.02226406127186649, + "grad_norm": 6.895074547057548, + "learning_rate": 7.4150799695354155e-06, + "loss": 17.5846, + "step": 1218 + }, + { + "epoch": 0.022282340468313015, + "grad_norm": 9.15493873323251, + "learning_rate": 7.421172886519422e-06, + "loss": 18.7379, + "step": 1219 + }, + { + "epoch": 0.022300619664759536, + "grad_norm": 7.37272617147564, + "learning_rate": 7.427265803503429e-06, + "loss": 17.6261, + "step": 1220 + }, + { + "epoch": 0.02231889886120606, + "grad_norm": 8.105896696615837, + "learning_rate": 7.433358720487434e-06, + "loss": 18.3197, + "step": 1221 + }, + { + "epoch": 0.022337178057652587, + "grad_norm": 6.971224136969124, + "learning_rate": 7.43945163747144e-06, + "loss": 17.5691, + "step": 1222 + }, + { + "epoch": 0.02235545725409911, + "grad_norm": 7.7229301300845155, + "learning_rate": 7.445544554455446e-06, + "loss": 17.9763, + "step": 1223 + }, + { + "epoch": 0.022373736450545634, + "grad_norm": 8.79029937572499, + "learning_rate": 7.4516374714394526e-06, + "loss": 18.6975, + "step": 1224 + }, + { + "epoch": 0.02239201564699216, + "grad_norm": 7.653051662866822, + "learning_rate": 7.457730388423458e-06, + "loss": 17.8637, + "step": 1225 + }, + { + "epoch": 0.02241029484343868, + "grad_norm": 10.015118294113028, + "learning_rate": 7.463823305407465e-06, + "loss": 18.3986, + "step": 1226 + }, + { + "epoch": 0.022428574039885206, + "grad_norm": 9.135396156077341, + "learning_rate": 7.46991622239147e-06, + "loss": 18.1684, + "step": 1227 + }, + { + "epoch": 0.02244685323633173, + "grad_norm": 8.840419125171318, + "learning_rate": 7.4760091393754765e-06, + "loss": 18.5559, + "step": 1228 + }, + { + "epoch": 0.022465132432778256, + "grad_norm": 8.685221814751454, + "learning_rate": 7.482102056359482e-06, + "loss": 17.9879, + "step": 1229 + }, + { + "epoch": 0.022483411629224778, + "grad_norm": 7.582756705088697, + "learning_rate": 7.488194973343489e-06, + "loss": 17.8319, + "step": 1230 + }, + { + "epoch": 0.022501690825671303, + "grad_norm": 9.55332474929589, + "learning_rate": 7.4942878903274954e-06, + "loss": 18.5795, + "step": 1231 + }, + { + "epoch": 0.02251997002211783, + "grad_norm": 9.12537600125327, + "learning_rate": 7.500380807311501e-06, + "loss": 18.5816, + "step": 1232 + }, + { + "epoch": 0.02253824921856435, + "grad_norm": 8.200056774096385, + "learning_rate": 7.506473724295507e-06, + "loss": 17.9549, + "step": 1233 + }, + { + "epoch": 0.022556528415010876, + "grad_norm": 8.269221292204605, + "learning_rate": 7.512566641279513e-06, + "loss": 17.793, + "step": 1234 + }, + { + "epoch": 0.0225748076114574, + "grad_norm": 6.9529769475855785, + "learning_rate": 7.518659558263519e-06, + "loss": 17.6142, + "step": 1235 + }, + { + "epoch": 0.022593086807903926, + "grad_norm": 9.460123773145922, + "learning_rate": 7.524752475247525e-06, + "loss": 18.5836, + "step": 1236 + }, + { + "epoch": 0.022611366004350448, + "grad_norm": 6.935214348736729, + "learning_rate": 7.530845392231532e-06, + "loss": 17.5914, + "step": 1237 + }, + { + "epoch": 0.022629645200796973, + "grad_norm": 8.38004457254756, + "learning_rate": 7.536938309215538e-06, + "loss": 18.0831, + "step": 1238 + }, + { + "epoch": 0.0226479243972435, + "grad_norm": 7.3943109247066, + "learning_rate": 7.543031226199544e-06, + "loss": 17.8206, + "step": 1239 + }, + { + "epoch": 0.02266620359369002, + "grad_norm": 7.861977798654075, + "learning_rate": 7.549124143183549e-06, + "loss": 17.8095, + "step": 1240 + }, + { + "epoch": 0.022684482790136545, + "grad_norm": 8.081413578470416, + "learning_rate": 7.5552170601675556e-06, + "loss": 18.0777, + "step": 1241 + }, + { + "epoch": 0.02270276198658307, + "grad_norm": 8.227747563013045, + "learning_rate": 7.561309977151562e-06, + "loss": 18.1766, + "step": 1242 + }, + { + "epoch": 0.022721041183029592, + "grad_norm": 8.080913904692055, + "learning_rate": 7.567402894135568e-06, + "loss": 17.864, + "step": 1243 + }, + { + "epoch": 0.022739320379476118, + "grad_norm": 8.179396165410159, + "learning_rate": 7.5734958111195745e-06, + "loss": 17.9948, + "step": 1244 + }, + { + "epoch": 0.022757599575922643, + "grad_norm": 9.386646441003995, + "learning_rate": 7.579588728103581e-06, + "loss": 18.8239, + "step": 1245 + }, + { + "epoch": 0.022775878772369168, + "grad_norm": 8.975542711309226, + "learning_rate": 7.585681645087586e-06, + "loss": 18.5923, + "step": 1246 + }, + { + "epoch": 0.02279415796881569, + "grad_norm": 8.844141143227652, + "learning_rate": 7.591774562071592e-06, + "loss": 18.1258, + "step": 1247 + }, + { + "epoch": 0.022812437165262215, + "grad_norm": 7.184291908214676, + "learning_rate": 7.597867479055598e-06, + "loss": 17.7494, + "step": 1248 + }, + { + "epoch": 0.02283071636170874, + "grad_norm": 7.973705351159692, + "learning_rate": 7.603960396039605e-06, + "loss": 17.9706, + "step": 1249 + }, + { + "epoch": 0.022848995558155262, + "grad_norm": 7.011117924638664, + "learning_rate": 7.610053313023611e-06, + "loss": 17.6627, + "step": 1250 + }, + { + "epoch": 0.022867274754601787, + "grad_norm": 6.581057991386631, + "learning_rate": 7.616146230007617e-06, + "loss": 17.6904, + "step": 1251 + }, + { + "epoch": 0.022885553951048312, + "grad_norm": 8.604049446250782, + "learning_rate": 7.622239146991622e-06, + "loss": 18.4233, + "step": 1252 + }, + { + "epoch": 0.022903833147494838, + "grad_norm": 6.804372320887771, + "learning_rate": 7.628332063975629e-06, + "loss": 17.3679, + "step": 1253 + }, + { + "epoch": 0.02292211234394136, + "grad_norm": 8.276516712834422, + "learning_rate": 7.634424980959635e-06, + "loss": 18.0717, + "step": 1254 + }, + { + "epoch": 0.022940391540387885, + "grad_norm": 8.808370707379996, + "learning_rate": 7.640517897943641e-06, + "loss": 18.0482, + "step": 1255 + }, + { + "epoch": 0.02295867073683441, + "grad_norm": 7.882389237429789, + "learning_rate": 7.646610814927647e-06, + "loss": 17.8727, + "step": 1256 + }, + { + "epoch": 0.02297694993328093, + "grad_norm": 8.213135962682458, + "learning_rate": 7.652703731911654e-06, + "loss": 18.0153, + "step": 1257 + }, + { + "epoch": 0.022995229129727457, + "grad_norm": 8.466431958181476, + "learning_rate": 7.658796648895659e-06, + "loss": 18.008, + "step": 1258 + }, + { + "epoch": 0.023013508326173982, + "grad_norm": 7.234200452168259, + "learning_rate": 7.664889565879666e-06, + "loss": 17.4817, + "step": 1259 + }, + { + "epoch": 0.023031787522620504, + "grad_norm": 9.59877550737015, + "learning_rate": 7.670982482863672e-06, + "loss": 18.6498, + "step": 1260 + }, + { + "epoch": 0.02305006671906703, + "grad_norm": 8.341860614988887, + "learning_rate": 7.677075399847677e-06, + "loss": 18.2708, + "step": 1261 + }, + { + "epoch": 0.023068345915513554, + "grad_norm": 9.281859020029916, + "learning_rate": 7.683168316831683e-06, + "loss": 18.9397, + "step": 1262 + }, + { + "epoch": 0.02308662511196008, + "grad_norm": 7.710288194975174, + "learning_rate": 7.68926123381569e-06, + "loss": 17.9752, + "step": 1263 + }, + { + "epoch": 0.0231049043084066, + "grad_norm": 7.063195913746644, + "learning_rate": 7.695354150799696e-06, + "loss": 17.487, + "step": 1264 + }, + { + "epoch": 0.023123183504853127, + "grad_norm": 7.9920256770402345, + "learning_rate": 7.701447067783702e-06, + "loss": 17.8697, + "step": 1265 + }, + { + "epoch": 0.023141462701299652, + "grad_norm": 8.164032651953145, + "learning_rate": 7.707539984767708e-06, + "loss": 18.4767, + "step": 1266 + }, + { + "epoch": 0.023159741897746174, + "grad_norm": 9.071888596238017, + "learning_rate": 7.713632901751714e-06, + "loss": 18.0909, + "step": 1267 + }, + { + "epoch": 0.0231780210941927, + "grad_norm": 12.59211531240348, + "learning_rate": 7.719725818735721e-06, + "loss": 18.4718, + "step": 1268 + }, + { + "epoch": 0.023196300290639224, + "grad_norm": 7.991050919837849, + "learning_rate": 7.725818735719727e-06, + "loss": 18.1672, + "step": 1269 + }, + { + "epoch": 0.02321457948708575, + "grad_norm": 7.370297221569767, + "learning_rate": 7.731911652703733e-06, + "loss": 17.7813, + "step": 1270 + }, + { + "epoch": 0.02323285868353227, + "grad_norm": 7.807076685784157, + "learning_rate": 7.738004569687738e-06, + "loss": 17.8473, + "step": 1271 + }, + { + "epoch": 0.023251137879978796, + "grad_norm": 7.309326311800068, + "learning_rate": 7.744097486671744e-06, + "loss": 17.7814, + "step": 1272 + }, + { + "epoch": 0.02326941707642532, + "grad_norm": 7.143892484378977, + "learning_rate": 7.75019040365575e-06, + "loss": 17.8614, + "step": 1273 + }, + { + "epoch": 0.023287696272871843, + "grad_norm": 8.334417705202664, + "learning_rate": 7.756283320639757e-06, + "loss": 17.9317, + "step": 1274 + }, + { + "epoch": 0.02330597546931837, + "grad_norm": 8.737528222459407, + "learning_rate": 7.762376237623763e-06, + "loss": 18.4585, + "step": 1275 + }, + { + "epoch": 0.023324254665764894, + "grad_norm": 7.829535864184691, + "learning_rate": 7.768469154607769e-06, + "loss": 18.0296, + "step": 1276 + }, + { + "epoch": 0.023342533862211415, + "grad_norm": 7.179278938725156, + "learning_rate": 7.774562071591775e-06, + "loss": 17.659, + "step": 1277 + }, + { + "epoch": 0.02336081305865794, + "grad_norm": 8.395295598169197, + "learning_rate": 7.78065498857578e-06, + "loss": 18.4474, + "step": 1278 + }, + { + "epoch": 0.023379092255104466, + "grad_norm": 8.107695250173576, + "learning_rate": 7.786747905559788e-06, + "loss": 18.3523, + "step": 1279 + }, + { + "epoch": 0.02339737145155099, + "grad_norm": 8.02572642939791, + "learning_rate": 7.792840822543794e-06, + "loss": 18.0061, + "step": 1280 + }, + { + "epoch": 0.023415650647997513, + "grad_norm": 8.54594362762417, + "learning_rate": 7.7989337395278e-06, + "loss": 18.2185, + "step": 1281 + }, + { + "epoch": 0.023433929844444038, + "grad_norm": 7.036244253198506, + "learning_rate": 7.805026656511805e-06, + "loss": 17.6652, + "step": 1282 + }, + { + "epoch": 0.023452209040890563, + "grad_norm": 8.064019494416113, + "learning_rate": 7.811119573495813e-06, + "loss": 18.0908, + "step": 1283 + }, + { + "epoch": 0.023470488237337085, + "grad_norm": 8.372108528772818, + "learning_rate": 7.817212490479817e-06, + "loss": 18.4715, + "step": 1284 + }, + { + "epoch": 0.02348876743378361, + "grad_norm": 9.683067868413996, + "learning_rate": 7.823305407463824e-06, + "loss": 19.0494, + "step": 1285 + }, + { + "epoch": 0.023507046630230136, + "grad_norm": 8.750064171301153, + "learning_rate": 7.82939832444783e-06, + "loss": 18.3841, + "step": 1286 + }, + { + "epoch": 0.02352532582667666, + "grad_norm": 8.621107473939952, + "learning_rate": 7.835491241431836e-06, + "loss": 18.4852, + "step": 1287 + }, + { + "epoch": 0.023543605023123183, + "grad_norm": 7.6179221400081385, + "learning_rate": 7.841584158415843e-06, + "loss": 17.8382, + "step": 1288 + }, + { + "epoch": 0.023561884219569708, + "grad_norm": 7.162638702613578, + "learning_rate": 7.847677075399849e-06, + "loss": 17.9635, + "step": 1289 + }, + { + "epoch": 0.023580163416016233, + "grad_norm": 7.740343557998449, + "learning_rate": 7.853769992383855e-06, + "loss": 18.1045, + "step": 1290 + }, + { + "epoch": 0.023598442612462755, + "grad_norm": 7.856847072877854, + "learning_rate": 7.85986290936786e-06, + "loss": 17.9265, + "step": 1291 + }, + { + "epoch": 0.02361672180890928, + "grad_norm": 7.819128359976211, + "learning_rate": 7.865955826351866e-06, + "loss": 18.0816, + "step": 1292 + }, + { + "epoch": 0.023635001005355805, + "grad_norm": 6.68728879211887, + "learning_rate": 7.872048743335872e-06, + "loss": 17.2439, + "step": 1293 + }, + { + "epoch": 0.023653280201802327, + "grad_norm": 7.637521824211896, + "learning_rate": 7.87814166031988e-06, + "loss": 17.8988, + "step": 1294 + }, + { + "epoch": 0.023671559398248852, + "grad_norm": 9.032365359032385, + "learning_rate": 7.884234577303885e-06, + "loss": 18.6905, + "step": 1295 + }, + { + "epoch": 0.023689838594695378, + "grad_norm": 8.251170325901612, + "learning_rate": 7.890327494287891e-06, + "loss": 18.1274, + "step": 1296 + }, + { + "epoch": 0.023708117791141903, + "grad_norm": 7.744327318199243, + "learning_rate": 7.896420411271897e-06, + "loss": 17.9762, + "step": 1297 + }, + { + "epoch": 0.023726396987588425, + "grad_norm": 7.5883481159425665, + "learning_rate": 7.902513328255902e-06, + "loss": 17.6878, + "step": 1298 + }, + { + "epoch": 0.02374467618403495, + "grad_norm": 7.853608917102702, + "learning_rate": 7.90860624523991e-06, + "loss": 18.0944, + "step": 1299 + }, + { + "epoch": 0.023762955380481475, + "grad_norm": 7.702152236003674, + "learning_rate": 7.914699162223916e-06, + "loss": 17.8327, + "step": 1300 + }, + { + "epoch": 0.023781234576927997, + "grad_norm": 8.376841760535141, + "learning_rate": 7.920792079207921e-06, + "loss": 18.306, + "step": 1301 + }, + { + "epoch": 0.023799513773374522, + "grad_norm": 8.309740430141177, + "learning_rate": 7.926884996191929e-06, + "loss": 17.7201, + "step": 1302 + }, + { + "epoch": 0.023817792969821047, + "grad_norm": 7.3776967431244005, + "learning_rate": 7.932977913175933e-06, + "loss": 17.6082, + "step": 1303 + }, + { + "epoch": 0.023836072166267572, + "grad_norm": 8.479560489220585, + "learning_rate": 7.93907083015994e-06, + "loss": 18.1974, + "step": 1304 + }, + { + "epoch": 0.023854351362714094, + "grad_norm": 7.627994332940635, + "learning_rate": 7.945163747143946e-06, + "loss": 17.9485, + "step": 1305 + }, + { + "epoch": 0.02387263055916062, + "grad_norm": 7.129336933906195, + "learning_rate": 7.951256664127952e-06, + "loss": 17.7024, + "step": 1306 + }, + { + "epoch": 0.023890909755607145, + "grad_norm": 10.21498902994689, + "learning_rate": 7.957349581111958e-06, + "loss": 18.6435, + "step": 1307 + }, + { + "epoch": 0.023909188952053666, + "grad_norm": 8.606824128185288, + "learning_rate": 7.963442498095965e-06, + "loss": 18.3299, + "step": 1308 + }, + { + "epoch": 0.02392746814850019, + "grad_norm": 8.712980733390193, + "learning_rate": 7.969535415079969e-06, + "loss": 18.6535, + "step": 1309 + }, + { + "epoch": 0.023945747344946717, + "grad_norm": 7.552296265392005, + "learning_rate": 7.975628332063977e-06, + "loss": 17.997, + "step": 1310 + }, + { + "epoch": 0.02396402654139324, + "grad_norm": 8.348030354934505, + "learning_rate": 7.981721249047982e-06, + "loss": 18.0706, + "step": 1311 + }, + { + "epoch": 0.023982305737839764, + "grad_norm": 8.030722009723371, + "learning_rate": 7.987814166031988e-06, + "loss": 18.0331, + "step": 1312 + }, + { + "epoch": 0.02400058493428629, + "grad_norm": 8.09434320280789, + "learning_rate": 7.993907083015996e-06, + "loss": 18.4149, + "step": 1313 + }, + { + "epoch": 0.024018864130732814, + "grad_norm": 8.1395626020549, + "learning_rate": 8.000000000000001e-06, + "loss": 18.3051, + "step": 1314 + }, + { + "epoch": 0.024037143327179336, + "grad_norm": 8.052697051283777, + "learning_rate": 8.006092916984007e-06, + "loss": 18.0066, + "step": 1315 + }, + { + "epoch": 0.02405542252362586, + "grad_norm": 7.117663298312368, + "learning_rate": 8.012185833968013e-06, + "loss": 17.6996, + "step": 1316 + }, + { + "epoch": 0.024073701720072387, + "grad_norm": 7.712078554983164, + "learning_rate": 8.018278750952019e-06, + "loss": 17.8507, + "step": 1317 + }, + { + "epoch": 0.02409198091651891, + "grad_norm": 8.428532674109862, + "learning_rate": 8.024371667936024e-06, + "loss": 18.1838, + "step": 1318 + }, + { + "epoch": 0.024110260112965434, + "grad_norm": 7.979458394015958, + "learning_rate": 8.030464584920032e-06, + "loss": 17.9062, + "step": 1319 + }, + { + "epoch": 0.02412853930941196, + "grad_norm": 8.53801088058669, + "learning_rate": 8.036557501904038e-06, + "loss": 18.3146, + "step": 1320 + }, + { + "epoch": 0.024146818505858484, + "grad_norm": 8.362033362045155, + "learning_rate": 8.042650418888043e-06, + "loss": 18.0613, + "step": 1321 + }, + { + "epoch": 0.024165097702305006, + "grad_norm": 7.526317721252609, + "learning_rate": 8.048743335872049e-06, + "loss": 17.9492, + "step": 1322 + }, + { + "epoch": 0.02418337689875153, + "grad_norm": 7.766073534985361, + "learning_rate": 8.054836252856055e-06, + "loss": 18.0612, + "step": 1323 + }, + { + "epoch": 0.024201656095198056, + "grad_norm": 7.771387693349303, + "learning_rate": 8.060929169840062e-06, + "loss": 17.6484, + "step": 1324 + }, + { + "epoch": 0.024219935291644578, + "grad_norm": 7.451722400646125, + "learning_rate": 8.067022086824068e-06, + "loss": 17.7878, + "step": 1325 + }, + { + "epoch": 0.024238214488091103, + "grad_norm": 7.859467102333334, + "learning_rate": 8.073115003808074e-06, + "loss": 17.9367, + "step": 1326 + }, + { + "epoch": 0.02425649368453763, + "grad_norm": 7.056189195058994, + "learning_rate": 8.07920792079208e-06, + "loss": 17.7121, + "step": 1327 + }, + { + "epoch": 0.02427477288098415, + "grad_norm": 7.959798202564799, + "learning_rate": 8.085300837776085e-06, + "loss": 18.172, + "step": 1328 + }, + { + "epoch": 0.024293052077430675, + "grad_norm": 9.09571371326946, + "learning_rate": 8.091393754760091e-06, + "loss": 18.5352, + "step": 1329 + }, + { + "epoch": 0.0243113312738772, + "grad_norm": 9.120709054336443, + "learning_rate": 8.097486671744099e-06, + "loss": 18.2114, + "step": 1330 + }, + { + "epoch": 0.024329610470323726, + "grad_norm": 9.68771610352554, + "learning_rate": 8.103579588728104e-06, + "loss": 18.6835, + "step": 1331 + }, + { + "epoch": 0.024347889666770248, + "grad_norm": 8.156074313348503, + "learning_rate": 8.10967250571211e-06, + "loss": 18.0789, + "step": 1332 + }, + { + "epoch": 0.024366168863216773, + "grad_norm": 7.477498680504085, + "learning_rate": 8.115765422696118e-06, + "loss": 17.538, + "step": 1333 + }, + { + "epoch": 0.024384448059663298, + "grad_norm": 8.659555772193457, + "learning_rate": 8.121858339680122e-06, + "loss": 18.72, + "step": 1334 + }, + { + "epoch": 0.02440272725610982, + "grad_norm": 7.384290456888504, + "learning_rate": 8.127951256664129e-06, + "loss": 17.8932, + "step": 1335 + }, + { + "epoch": 0.024421006452556345, + "grad_norm": 7.994339782859948, + "learning_rate": 8.134044173648135e-06, + "loss": 17.8404, + "step": 1336 + }, + { + "epoch": 0.02443928564900287, + "grad_norm": 7.180607066628015, + "learning_rate": 8.14013709063214e-06, + "loss": 17.462, + "step": 1337 + }, + { + "epoch": 0.024457564845449396, + "grad_norm": 9.032330013580443, + "learning_rate": 8.146230007616146e-06, + "loss": 18.5001, + "step": 1338 + }, + { + "epoch": 0.024475844041895917, + "grad_norm": 7.546251182125515, + "learning_rate": 8.152322924600154e-06, + "loss": 18.0689, + "step": 1339 + }, + { + "epoch": 0.024494123238342443, + "grad_norm": 8.256495716099927, + "learning_rate": 8.158415841584158e-06, + "loss": 18.2256, + "step": 1340 + }, + { + "epoch": 0.024512402434788968, + "grad_norm": 8.446449937335991, + "learning_rate": 8.164508758568165e-06, + "loss": 18.2305, + "step": 1341 + }, + { + "epoch": 0.02453068163123549, + "grad_norm": 7.647762757269953, + "learning_rate": 8.170601675552171e-06, + "loss": 17.9599, + "step": 1342 + }, + { + "epoch": 0.024548960827682015, + "grad_norm": 9.336594874256123, + "learning_rate": 8.176694592536177e-06, + "loss": 18.5839, + "step": 1343 + }, + { + "epoch": 0.02456724002412854, + "grad_norm": 8.458582893372924, + "learning_rate": 8.182787509520184e-06, + "loss": 18.3154, + "step": 1344 + }, + { + "epoch": 0.024585519220575062, + "grad_norm": 7.221096422825132, + "learning_rate": 8.18888042650419e-06, + "loss": 17.6017, + "step": 1345 + }, + { + "epoch": 0.024603798417021587, + "grad_norm": 8.195830887342922, + "learning_rate": 8.194973343488196e-06, + "loss": 18.1285, + "step": 1346 + }, + { + "epoch": 0.024622077613468112, + "grad_norm": 7.271801674324512, + "learning_rate": 8.201066260472202e-06, + "loss": 17.6622, + "step": 1347 + }, + { + "epoch": 0.024640356809914638, + "grad_norm": 7.7027468671500054, + "learning_rate": 8.207159177456207e-06, + "loss": 17.8249, + "step": 1348 + }, + { + "epoch": 0.02465863600636116, + "grad_norm": 7.360532133484001, + "learning_rate": 8.213252094440213e-06, + "loss": 17.363, + "step": 1349 + }, + { + "epoch": 0.024676915202807684, + "grad_norm": 7.690719116058331, + "learning_rate": 8.21934501142422e-06, + "loss": 17.9928, + "step": 1350 + }, + { + "epoch": 0.02469519439925421, + "grad_norm": 7.945431554099586, + "learning_rate": 8.225437928408226e-06, + "loss": 18.0503, + "step": 1351 + }, + { + "epoch": 0.02471347359570073, + "grad_norm": 7.106647440398628, + "learning_rate": 8.231530845392232e-06, + "loss": 17.7759, + "step": 1352 + }, + { + "epoch": 0.024731752792147257, + "grad_norm": 7.751453091434756, + "learning_rate": 8.237623762376238e-06, + "loss": 17.9833, + "step": 1353 + }, + { + "epoch": 0.024750031988593782, + "grad_norm": 8.384614172000148, + "learning_rate": 8.243716679360244e-06, + "loss": 17.792, + "step": 1354 + }, + { + "epoch": 0.024768311185040307, + "grad_norm": 7.531315262910314, + "learning_rate": 8.249809596344251e-06, + "loss": 17.7091, + "step": 1355 + }, + { + "epoch": 0.02478659038148683, + "grad_norm": 7.387069015047229, + "learning_rate": 8.255902513328257e-06, + "loss": 17.9286, + "step": 1356 + }, + { + "epoch": 0.024804869577933354, + "grad_norm": 6.941253945063988, + "learning_rate": 8.261995430312262e-06, + "loss": 17.6529, + "step": 1357 + }, + { + "epoch": 0.02482314877437988, + "grad_norm": 7.435027662647396, + "learning_rate": 8.26808834729627e-06, + "loss": 17.9858, + "step": 1358 + }, + { + "epoch": 0.0248414279708264, + "grad_norm": 7.562534840266457, + "learning_rate": 8.274181264280274e-06, + "loss": 17.7777, + "step": 1359 + }, + { + "epoch": 0.024859707167272926, + "grad_norm": 7.631480061899964, + "learning_rate": 8.28027418126428e-06, + "loss": 17.7919, + "step": 1360 + }, + { + "epoch": 0.02487798636371945, + "grad_norm": 7.871776632436914, + "learning_rate": 8.286367098248287e-06, + "loss": 17.9316, + "step": 1361 + }, + { + "epoch": 0.024896265560165973, + "grad_norm": 6.821665850570862, + "learning_rate": 8.292460015232293e-06, + "loss": 17.5959, + "step": 1362 + }, + { + "epoch": 0.0249145447566125, + "grad_norm": 8.166453099333848, + "learning_rate": 8.298552932216299e-06, + "loss": 17.8598, + "step": 1363 + }, + { + "epoch": 0.024932823953059024, + "grad_norm": 9.331197609076883, + "learning_rate": 8.304645849200306e-06, + "loss": 18.1722, + "step": 1364 + }, + { + "epoch": 0.02495110314950555, + "grad_norm": 8.276872570089795, + "learning_rate": 8.310738766184312e-06, + "loss": 18.0691, + "step": 1365 + }, + { + "epoch": 0.02496938234595207, + "grad_norm": 7.337851108553668, + "learning_rate": 8.316831683168318e-06, + "loss": 17.4398, + "step": 1366 + }, + { + "epoch": 0.024987661542398596, + "grad_norm": 8.59083660305102, + "learning_rate": 8.322924600152323e-06, + "loss": 18.1222, + "step": 1367 + }, + { + "epoch": 0.02500594073884512, + "grad_norm": 9.118795882833696, + "learning_rate": 8.32901751713633e-06, + "loss": 18.1363, + "step": 1368 + }, + { + "epoch": 0.025024219935291643, + "grad_norm": 8.371941378867396, + "learning_rate": 8.335110434120337e-06, + "loss": 18.2809, + "step": 1369 + }, + { + "epoch": 0.02504249913173817, + "grad_norm": 7.641238868727458, + "learning_rate": 8.341203351104342e-06, + "loss": 18.1704, + "step": 1370 + }, + { + "epoch": 0.025060778328184694, + "grad_norm": 9.042468938009703, + "learning_rate": 8.347296268088348e-06, + "loss": 18.3551, + "step": 1371 + }, + { + "epoch": 0.02507905752463122, + "grad_norm": 9.506376707817328, + "learning_rate": 8.353389185072354e-06, + "loss": 18.368, + "step": 1372 + }, + { + "epoch": 0.02509733672107774, + "grad_norm": 7.770551176630004, + "learning_rate": 8.35948210205636e-06, + "loss": 17.8866, + "step": 1373 + }, + { + "epoch": 0.025115615917524266, + "grad_norm": 8.290447583037134, + "learning_rate": 8.365575019040365e-06, + "loss": 18.5078, + "step": 1374 + }, + { + "epoch": 0.02513389511397079, + "grad_norm": 9.77723850031326, + "learning_rate": 8.371667936024373e-06, + "loss": 18.3157, + "step": 1375 + }, + { + "epoch": 0.025152174310417313, + "grad_norm": 8.296936345506735, + "learning_rate": 8.377760853008379e-06, + "loss": 18.2976, + "step": 1376 + }, + { + "epoch": 0.025170453506863838, + "grad_norm": 8.124263286385034, + "learning_rate": 8.383853769992384e-06, + "loss": 17.8877, + "step": 1377 + }, + { + "epoch": 0.025188732703310363, + "grad_norm": 6.454023710336849, + "learning_rate": 8.38994668697639e-06, + "loss": 17.4242, + "step": 1378 + }, + { + "epoch": 0.025207011899756885, + "grad_norm": 8.9444550386909, + "learning_rate": 8.396039603960396e-06, + "loss": 18.3085, + "step": 1379 + }, + { + "epoch": 0.02522529109620341, + "grad_norm": 9.69406386868736, + "learning_rate": 8.402132520944403e-06, + "loss": 18.3456, + "step": 1380 + }, + { + "epoch": 0.025243570292649935, + "grad_norm": 8.074897555646956, + "learning_rate": 8.40822543792841e-06, + "loss": 17.8998, + "step": 1381 + }, + { + "epoch": 0.02526184948909646, + "grad_norm": 7.596511536029738, + "learning_rate": 8.414318354912415e-06, + "loss": 17.9059, + "step": 1382 + }, + { + "epoch": 0.025280128685542982, + "grad_norm": 8.829738986897409, + "learning_rate": 8.42041127189642e-06, + "loss": 18.412, + "step": 1383 + }, + { + "epoch": 0.025298407881989508, + "grad_norm": 8.174371688738475, + "learning_rate": 8.426504188880426e-06, + "loss": 17.8264, + "step": 1384 + }, + { + "epoch": 0.025316687078436033, + "grad_norm": 7.676731543848915, + "learning_rate": 8.432597105864432e-06, + "loss": 17.9388, + "step": 1385 + }, + { + "epoch": 0.025334966274882555, + "grad_norm": 10.247213204871045, + "learning_rate": 8.43869002284844e-06, + "loss": 18.6418, + "step": 1386 + }, + { + "epoch": 0.02535324547132908, + "grad_norm": 8.571825620462517, + "learning_rate": 8.444782939832445e-06, + "loss": 18.4308, + "step": 1387 + }, + { + "epoch": 0.025371524667775605, + "grad_norm": 7.6782801297365095, + "learning_rate": 8.450875856816451e-06, + "loss": 18.0378, + "step": 1388 + }, + { + "epoch": 0.02538980386422213, + "grad_norm": 7.48276588353417, + "learning_rate": 8.456968773800459e-06, + "loss": 17.8743, + "step": 1389 + }, + { + "epoch": 0.025408083060668652, + "grad_norm": 8.583044967740078, + "learning_rate": 8.463061690784464e-06, + "loss": 17.9977, + "step": 1390 + }, + { + "epoch": 0.025426362257115177, + "grad_norm": 8.824243738339353, + "learning_rate": 8.46915460776847e-06, + "loss": 18.3367, + "step": 1391 + }, + { + "epoch": 0.025444641453561703, + "grad_norm": 7.915063711593708, + "learning_rate": 8.475247524752476e-06, + "loss": 18.2498, + "step": 1392 + }, + { + "epoch": 0.025462920650008224, + "grad_norm": 7.177028404247519, + "learning_rate": 8.481340441736482e-06, + "loss": 17.6313, + "step": 1393 + }, + { + "epoch": 0.02548119984645475, + "grad_norm": 7.274675016163252, + "learning_rate": 8.487433358720487e-06, + "loss": 17.9967, + "step": 1394 + }, + { + "epoch": 0.025499479042901275, + "grad_norm": 8.246748890142642, + "learning_rate": 8.493526275704495e-06, + "loss": 18.0346, + "step": 1395 + }, + { + "epoch": 0.025517758239347797, + "grad_norm": 8.132276464199869, + "learning_rate": 8.4996191926885e-06, + "loss": 17.7565, + "step": 1396 + }, + { + "epoch": 0.025536037435794322, + "grad_norm": 9.160219262127326, + "learning_rate": 8.505712109672506e-06, + "loss": 18.8931, + "step": 1397 + }, + { + "epoch": 0.025554316632240847, + "grad_norm": 6.949895499620933, + "learning_rate": 8.511805026656512e-06, + "loss": 17.6727, + "step": 1398 + }, + { + "epoch": 0.025572595828687372, + "grad_norm": 8.297904726022859, + "learning_rate": 8.517897943640518e-06, + "loss": 18.4404, + "step": 1399 + }, + { + "epoch": 0.025590875025133894, + "grad_norm": 8.146645435656021, + "learning_rate": 8.523990860624525e-06, + "loss": 17.9447, + "step": 1400 + }, + { + "epoch": 0.02560915422158042, + "grad_norm": 8.20198363793982, + "learning_rate": 8.530083777608531e-06, + "loss": 17.9231, + "step": 1401 + }, + { + "epoch": 0.025627433418026944, + "grad_norm": 7.367031033065404, + "learning_rate": 8.536176694592537e-06, + "loss": 17.7908, + "step": 1402 + }, + { + "epoch": 0.025645712614473466, + "grad_norm": 7.993296262853948, + "learning_rate": 8.542269611576543e-06, + "loss": 18.4607, + "step": 1403 + }, + { + "epoch": 0.02566399181091999, + "grad_norm": 8.358059257126076, + "learning_rate": 8.548362528560548e-06, + "loss": 18.1583, + "step": 1404 + }, + { + "epoch": 0.025682271007366517, + "grad_norm": 7.8085931466035445, + "learning_rate": 8.554455445544554e-06, + "loss": 17.9273, + "step": 1405 + }, + { + "epoch": 0.025700550203813042, + "grad_norm": 8.795074595148867, + "learning_rate": 8.560548362528562e-06, + "loss": 18.2, + "step": 1406 + }, + { + "epoch": 0.025718829400259564, + "grad_norm": 7.341380886517458, + "learning_rate": 8.566641279512567e-06, + "loss": 17.7604, + "step": 1407 + }, + { + "epoch": 0.02573710859670609, + "grad_norm": 6.530914080059693, + "learning_rate": 8.572734196496573e-06, + "loss": 17.4624, + "step": 1408 + }, + { + "epoch": 0.025755387793152614, + "grad_norm": 7.879802029965479, + "learning_rate": 8.57882711348058e-06, + "loss": 17.9349, + "step": 1409 + }, + { + "epoch": 0.025773666989599136, + "grad_norm": 8.300464636465835, + "learning_rate": 8.584920030464585e-06, + "loss": 17.7776, + "step": 1410 + }, + { + "epoch": 0.02579194618604566, + "grad_norm": 8.395711099456939, + "learning_rate": 8.591012947448592e-06, + "loss": 18.0982, + "step": 1411 + }, + { + "epoch": 0.025810225382492186, + "grad_norm": 8.474296514569252, + "learning_rate": 8.597105864432598e-06, + "loss": 18.0764, + "step": 1412 + }, + { + "epoch": 0.025828504578938708, + "grad_norm": 8.777423423211545, + "learning_rate": 8.603198781416604e-06, + "loss": 18.3759, + "step": 1413 + }, + { + "epoch": 0.025846783775385233, + "grad_norm": 7.36446733094962, + "learning_rate": 8.609291698400611e-06, + "loss": 17.9045, + "step": 1414 + }, + { + "epoch": 0.02586506297183176, + "grad_norm": 9.504182858462068, + "learning_rate": 8.615384615384617e-06, + "loss": 18.6237, + "step": 1415 + }, + { + "epoch": 0.025883342168278284, + "grad_norm": 9.063770990698762, + "learning_rate": 8.621477532368621e-06, + "loss": 18.4992, + "step": 1416 + }, + { + "epoch": 0.025901621364724806, + "grad_norm": 7.996481775653979, + "learning_rate": 8.627570449352628e-06, + "loss": 18.1038, + "step": 1417 + }, + { + "epoch": 0.02591990056117133, + "grad_norm": 8.596649768941777, + "learning_rate": 8.633663366336634e-06, + "loss": 18.3631, + "step": 1418 + }, + { + "epoch": 0.025938179757617856, + "grad_norm": 7.646225022339392, + "learning_rate": 8.63975628332064e-06, + "loss": 17.8257, + "step": 1419 + }, + { + "epoch": 0.025956458954064378, + "grad_norm": 8.799682429765236, + "learning_rate": 8.645849200304647e-06, + "loss": 18.4929, + "step": 1420 + }, + { + "epoch": 0.025974738150510903, + "grad_norm": 7.632664788232884, + "learning_rate": 8.651942117288653e-06, + "loss": 17.7875, + "step": 1421 + }, + { + "epoch": 0.02599301734695743, + "grad_norm": 6.492463194039088, + "learning_rate": 8.658035034272659e-06, + "loss": 17.3591, + "step": 1422 + }, + { + "epoch": 0.026011296543403954, + "grad_norm": 8.51316418431203, + "learning_rate": 8.664127951256665e-06, + "loss": 18.2339, + "step": 1423 + }, + { + "epoch": 0.026029575739850475, + "grad_norm": 8.855657011481085, + "learning_rate": 8.67022086824067e-06, + "loss": 18.3553, + "step": 1424 + }, + { + "epoch": 0.026047854936297, + "grad_norm": 6.309419290605549, + "learning_rate": 8.676313785224678e-06, + "loss": 17.1397, + "step": 1425 + }, + { + "epoch": 0.026066134132743526, + "grad_norm": 7.753423110867713, + "learning_rate": 8.682406702208684e-06, + "loss": 17.7617, + "step": 1426 + }, + { + "epoch": 0.026084413329190047, + "grad_norm": 7.523806497889765, + "learning_rate": 8.68849961919269e-06, + "loss": 17.7815, + "step": 1427 + }, + { + "epoch": 0.026102692525636573, + "grad_norm": 7.607409503407267, + "learning_rate": 8.694592536176695e-06, + "loss": 17.8936, + "step": 1428 + }, + { + "epoch": 0.026120971722083098, + "grad_norm": 6.895009237836748, + "learning_rate": 8.7006854531607e-06, + "loss": 17.5724, + "step": 1429 + }, + { + "epoch": 0.02613925091852962, + "grad_norm": 8.873927842757839, + "learning_rate": 8.706778370144707e-06, + "loss": 18.2576, + "step": 1430 + }, + { + "epoch": 0.026157530114976145, + "grad_norm": 7.546760724787882, + "learning_rate": 8.712871287128714e-06, + "loss": 17.628, + "step": 1431 + }, + { + "epoch": 0.02617580931142267, + "grad_norm": 7.9874520930562705, + "learning_rate": 8.71896420411272e-06, + "loss": 17.8476, + "step": 1432 + }, + { + "epoch": 0.026194088507869195, + "grad_norm": 8.739040378905148, + "learning_rate": 8.725057121096726e-06, + "loss": 18.1888, + "step": 1433 + }, + { + "epoch": 0.026212367704315717, + "grad_norm": 8.135508969320531, + "learning_rate": 8.731150038080733e-06, + "loss": 18.3601, + "step": 1434 + }, + { + "epoch": 0.026230646900762242, + "grad_norm": 7.964696704255999, + "learning_rate": 8.737242955064737e-06, + "loss": 18.1727, + "step": 1435 + }, + { + "epoch": 0.026248926097208768, + "grad_norm": 8.904126796062632, + "learning_rate": 8.743335872048745e-06, + "loss": 18.2646, + "step": 1436 + }, + { + "epoch": 0.02626720529365529, + "grad_norm": 8.195402872333565, + "learning_rate": 8.74942878903275e-06, + "loss": 18.0862, + "step": 1437 + }, + { + "epoch": 0.026285484490101815, + "grad_norm": 6.965868215006295, + "learning_rate": 8.755521706016756e-06, + "loss": 17.6407, + "step": 1438 + }, + { + "epoch": 0.02630376368654834, + "grad_norm": 7.587462863152006, + "learning_rate": 8.761614623000762e-06, + "loss": 17.8189, + "step": 1439 + }, + { + "epoch": 0.026322042882994865, + "grad_norm": 9.130781072940874, + "learning_rate": 8.76770753998477e-06, + "loss": 18.4033, + "step": 1440 + }, + { + "epoch": 0.026340322079441387, + "grad_norm": 6.468831354069997, + "learning_rate": 8.773800456968773e-06, + "loss": 17.3407, + "step": 1441 + }, + { + "epoch": 0.026358601275887912, + "grad_norm": 9.156269451296055, + "learning_rate": 8.77989337395278e-06, + "loss": 18.4001, + "step": 1442 + }, + { + "epoch": 0.026376880472334437, + "grad_norm": 7.183314914355377, + "learning_rate": 8.785986290936787e-06, + "loss": 17.9738, + "step": 1443 + }, + { + "epoch": 0.02639515966878096, + "grad_norm": 8.599106529459707, + "learning_rate": 8.792079207920792e-06, + "loss": 18.1737, + "step": 1444 + }, + { + "epoch": 0.026413438865227484, + "grad_norm": 7.942865142973476, + "learning_rate": 8.7981721249048e-06, + "loss": 18.0981, + "step": 1445 + }, + { + "epoch": 0.02643171806167401, + "grad_norm": 8.542916435346985, + "learning_rate": 8.804265041888805e-06, + "loss": 18.4702, + "step": 1446 + }, + { + "epoch": 0.02644999725812053, + "grad_norm": 7.9391169682901515, + "learning_rate": 8.810357958872811e-06, + "loss": 17.8375, + "step": 1447 + }, + { + "epoch": 0.026468276454567057, + "grad_norm": 7.973990058925455, + "learning_rate": 8.816450875856817e-06, + "loss": 18.0898, + "step": 1448 + }, + { + "epoch": 0.026486555651013582, + "grad_norm": 8.14941052802256, + "learning_rate": 8.822543792840823e-06, + "loss": 18.5443, + "step": 1449 + }, + { + "epoch": 0.026504834847460107, + "grad_norm": 8.351592252513374, + "learning_rate": 8.828636709824829e-06, + "loss": 18.0385, + "step": 1450 + }, + { + "epoch": 0.02652311404390663, + "grad_norm": 12.065678884127646, + "learning_rate": 8.834729626808836e-06, + "loss": 17.76, + "step": 1451 + }, + { + "epoch": 0.026541393240353154, + "grad_norm": 7.990968201402576, + "learning_rate": 8.840822543792842e-06, + "loss": 18.057, + "step": 1452 + }, + { + "epoch": 0.02655967243679968, + "grad_norm": 7.870114463708758, + "learning_rate": 8.846915460776847e-06, + "loss": 17.5919, + "step": 1453 + }, + { + "epoch": 0.0265779516332462, + "grad_norm": 8.37421222401054, + "learning_rate": 8.853008377760853e-06, + "loss": 17.9004, + "step": 1454 + }, + { + "epoch": 0.026596230829692726, + "grad_norm": 7.512862929896719, + "learning_rate": 8.859101294744859e-06, + "loss": 17.7529, + "step": 1455 + }, + { + "epoch": 0.02661451002613925, + "grad_norm": 8.277450520977178, + "learning_rate": 8.865194211728866e-06, + "loss": 18.1617, + "step": 1456 + }, + { + "epoch": 0.026632789222585777, + "grad_norm": 7.8667995056229145, + "learning_rate": 8.871287128712872e-06, + "loss": 17.8809, + "step": 1457 + }, + { + "epoch": 0.0266510684190323, + "grad_norm": 7.613928529760875, + "learning_rate": 8.877380045696878e-06, + "loss": 17.5719, + "step": 1458 + }, + { + "epoch": 0.026669347615478824, + "grad_norm": 8.035004828733218, + "learning_rate": 8.883472962680885e-06, + "loss": 18.0841, + "step": 1459 + }, + { + "epoch": 0.02668762681192535, + "grad_norm": 9.677789943846687, + "learning_rate": 8.88956587966489e-06, + "loss": 18.8802, + "step": 1460 + }, + { + "epoch": 0.02670590600837187, + "grad_norm": 9.55373517160617, + "learning_rate": 8.895658796648895e-06, + "loss": 18.8335, + "step": 1461 + }, + { + "epoch": 0.026724185204818396, + "grad_norm": 7.892072339341589, + "learning_rate": 8.901751713632903e-06, + "loss": 17.7087, + "step": 1462 + }, + { + "epoch": 0.02674246440126492, + "grad_norm": 7.999818442281938, + "learning_rate": 8.907844630616908e-06, + "loss": 18.1386, + "step": 1463 + }, + { + "epoch": 0.026760743597711443, + "grad_norm": 8.75483316003142, + "learning_rate": 8.913937547600914e-06, + "loss": 18.1697, + "step": 1464 + }, + { + "epoch": 0.026779022794157968, + "grad_norm": 7.675223165596685, + "learning_rate": 8.920030464584922e-06, + "loss": 17.6498, + "step": 1465 + }, + { + "epoch": 0.026797301990604493, + "grad_norm": 8.578202310137655, + "learning_rate": 8.926123381568926e-06, + "loss": 18.1871, + "step": 1466 + }, + { + "epoch": 0.02681558118705102, + "grad_norm": 8.097931941837253, + "learning_rate": 8.932216298552933e-06, + "loss": 17.8763, + "step": 1467 + }, + { + "epoch": 0.02683386038349754, + "grad_norm": 7.425964756487236, + "learning_rate": 8.938309215536939e-06, + "loss": 17.6646, + "step": 1468 + }, + { + "epoch": 0.026852139579944066, + "grad_norm": 7.352889034745282, + "learning_rate": 8.944402132520945e-06, + "loss": 17.8098, + "step": 1469 + }, + { + "epoch": 0.02687041877639059, + "grad_norm": 8.094022459132429, + "learning_rate": 8.950495049504952e-06, + "loss": 18.0239, + "step": 1470 + }, + { + "epoch": 0.026888697972837113, + "grad_norm": 7.825426674849305, + "learning_rate": 8.956587966488958e-06, + "loss": 17.9517, + "step": 1471 + }, + { + "epoch": 0.026906977169283638, + "grad_norm": 7.934927280932549, + "learning_rate": 8.962680883472964e-06, + "loss": 18.3729, + "step": 1472 + }, + { + "epoch": 0.026925256365730163, + "grad_norm": 9.164940864472438, + "learning_rate": 8.96877380045697e-06, + "loss": 18.8277, + "step": 1473 + }, + { + "epoch": 0.026943535562176688, + "grad_norm": 8.608148379267885, + "learning_rate": 8.974866717440975e-06, + "loss": 17.7031, + "step": 1474 + }, + { + "epoch": 0.02696181475862321, + "grad_norm": 7.558928448476973, + "learning_rate": 8.980959634424981e-06, + "loss": 17.8896, + "step": 1475 + }, + { + "epoch": 0.026980093955069735, + "grad_norm": 7.3185664527139265, + "learning_rate": 8.987052551408988e-06, + "loss": 17.7327, + "step": 1476 + }, + { + "epoch": 0.02699837315151626, + "grad_norm": 9.086940154591609, + "learning_rate": 8.993145468392994e-06, + "loss": 18.6602, + "step": 1477 + }, + { + "epoch": 0.027016652347962782, + "grad_norm": 8.199731072144623, + "learning_rate": 8.999238385377e-06, + "loss": 18.0815, + "step": 1478 + }, + { + "epoch": 0.027034931544409307, + "grad_norm": 8.692421826783356, + "learning_rate": 9.005331302361006e-06, + "loss": 18.5877, + "step": 1479 + }, + { + "epoch": 0.027053210740855833, + "grad_norm": 8.5464561802531, + "learning_rate": 9.011424219345011e-06, + "loss": 18.249, + "step": 1480 + }, + { + "epoch": 0.027071489937302358, + "grad_norm": 8.391435606391324, + "learning_rate": 9.017517136329019e-06, + "loss": 18.234, + "step": 1481 + }, + { + "epoch": 0.02708976913374888, + "grad_norm": 8.28855798861418, + "learning_rate": 9.023610053313025e-06, + "loss": 18.1964, + "step": 1482 + }, + { + "epoch": 0.027108048330195405, + "grad_norm": 7.522206878997334, + "learning_rate": 9.02970297029703e-06, + "loss": 18.1083, + "step": 1483 + }, + { + "epoch": 0.02712632752664193, + "grad_norm": 7.6996569698720165, + "learning_rate": 9.035795887281036e-06, + "loss": 17.7058, + "step": 1484 + }, + { + "epoch": 0.027144606723088452, + "grad_norm": 7.770670567376332, + "learning_rate": 9.041888804265042e-06, + "loss": 17.6966, + "step": 1485 + }, + { + "epoch": 0.027162885919534977, + "grad_norm": 8.512304497126742, + "learning_rate": 9.047981721249048e-06, + "loss": 18.2586, + "step": 1486 + }, + { + "epoch": 0.027181165115981502, + "grad_norm": 7.627840662620894, + "learning_rate": 9.054074638233055e-06, + "loss": 18.1694, + "step": 1487 + }, + { + "epoch": 0.027199444312428024, + "grad_norm": 7.91230120789127, + "learning_rate": 9.060167555217061e-06, + "loss": 17.8939, + "step": 1488 + }, + { + "epoch": 0.02721772350887455, + "grad_norm": 8.156236620458687, + "learning_rate": 9.066260472201067e-06, + "loss": 18.2974, + "step": 1489 + }, + { + "epoch": 0.027236002705321075, + "grad_norm": 7.951072967239382, + "learning_rate": 9.072353389185074e-06, + "loss": 17.9947, + "step": 1490 + }, + { + "epoch": 0.0272542819017676, + "grad_norm": 6.436830455155051, + "learning_rate": 9.07844630616908e-06, + "loss": 17.38, + "step": 1491 + }, + { + "epoch": 0.02727256109821412, + "grad_norm": 8.842741546278877, + "learning_rate": 9.084539223153086e-06, + "loss": 18.5525, + "step": 1492 + }, + { + "epoch": 0.027290840294660647, + "grad_norm": 8.541348994364565, + "learning_rate": 9.090632140137091e-06, + "loss": 18.2744, + "step": 1493 + }, + { + "epoch": 0.027309119491107172, + "grad_norm": 7.677758366809842, + "learning_rate": 9.096725057121097e-06, + "loss": 17.9632, + "step": 1494 + }, + { + "epoch": 0.027327398687553694, + "grad_norm": 11.031595550811504, + "learning_rate": 9.102817974105103e-06, + "loss": 18.1301, + "step": 1495 + }, + { + "epoch": 0.02734567788400022, + "grad_norm": 7.6316264895902615, + "learning_rate": 9.10891089108911e-06, + "loss": 17.5932, + "step": 1496 + }, + { + "epoch": 0.027363957080446744, + "grad_norm": 7.942322461509622, + "learning_rate": 9.115003808073116e-06, + "loss": 17.8623, + "step": 1497 + }, + { + "epoch": 0.02738223627689327, + "grad_norm": 8.987789022179895, + "learning_rate": 9.121096725057122e-06, + "loss": 18.4788, + "step": 1498 + }, + { + "epoch": 0.02740051547333979, + "grad_norm": 7.202629039212628, + "learning_rate": 9.127189642041128e-06, + "loss": 17.7036, + "step": 1499 + }, + { + "epoch": 0.027418794669786316, + "grad_norm": 8.718231529858707, + "learning_rate": 9.133282559025133e-06, + "loss": 18.4281, + "step": 1500 + }, + { + "epoch": 0.02743707386623284, + "grad_norm": 8.224690865957287, + "learning_rate": 9.13937547600914e-06, + "loss": 17.9646, + "step": 1501 + }, + { + "epoch": 0.027455353062679363, + "grad_norm": 8.103325679036537, + "learning_rate": 9.145468392993147e-06, + "loss": 17.8065, + "step": 1502 + }, + { + "epoch": 0.02747363225912589, + "grad_norm": 7.595202129353247, + "learning_rate": 9.151561309977152e-06, + "loss": 17.7352, + "step": 1503 + }, + { + "epoch": 0.027491911455572414, + "grad_norm": 9.137774770007482, + "learning_rate": 9.157654226961158e-06, + "loss": 18.267, + "step": 1504 + }, + { + "epoch": 0.027510190652018936, + "grad_norm": 8.226653766856433, + "learning_rate": 9.163747143945164e-06, + "loss": 18.1586, + "step": 1505 + }, + { + "epoch": 0.02752846984846546, + "grad_norm": 8.250869806845207, + "learning_rate": 9.16984006092917e-06, + "loss": 18.1652, + "step": 1506 + }, + { + "epoch": 0.027546749044911986, + "grad_norm": 7.819873303002308, + "learning_rate": 9.175932977913177e-06, + "loss": 17.9536, + "step": 1507 + }, + { + "epoch": 0.02756502824135851, + "grad_norm": 7.971809249675537, + "learning_rate": 9.182025894897183e-06, + "loss": 17.8049, + "step": 1508 + }, + { + "epoch": 0.027583307437805033, + "grad_norm": 7.868639339632311, + "learning_rate": 9.188118811881189e-06, + "loss": 18.0095, + "step": 1509 + }, + { + "epoch": 0.02760158663425156, + "grad_norm": 8.046255160243186, + "learning_rate": 9.194211728865194e-06, + "loss": 18.2376, + "step": 1510 + }, + { + "epoch": 0.027619865830698084, + "grad_norm": 8.19271834161202, + "learning_rate": 9.2003046458492e-06, + "loss": 18.1397, + "step": 1511 + }, + { + "epoch": 0.027638145027144605, + "grad_norm": 7.972401688363723, + "learning_rate": 9.206397562833208e-06, + "loss": 18.1718, + "step": 1512 + }, + { + "epoch": 0.02765642422359113, + "grad_norm": 7.806246299530137, + "learning_rate": 9.212490479817213e-06, + "loss": 17.9699, + "step": 1513 + }, + { + "epoch": 0.027674703420037656, + "grad_norm": 6.966609041650007, + "learning_rate": 9.218583396801219e-06, + "loss": 17.8136, + "step": 1514 + }, + { + "epoch": 0.02769298261648418, + "grad_norm": 7.395969299114421, + "learning_rate": 9.224676313785227e-06, + "loss": 17.9745, + "step": 1515 + }, + { + "epoch": 0.027711261812930703, + "grad_norm": 8.108307050861727, + "learning_rate": 9.230769230769232e-06, + "loss": 18.1577, + "step": 1516 + }, + { + "epoch": 0.027729541009377228, + "grad_norm": 8.441209289907926, + "learning_rate": 9.236862147753236e-06, + "loss": 18.3238, + "step": 1517 + }, + { + "epoch": 0.027747820205823753, + "grad_norm": 7.1675044271113855, + "learning_rate": 9.242955064737244e-06, + "loss": 17.5438, + "step": 1518 + }, + { + "epoch": 0.027766099402270275, + "grad_norm": 7.356867118981828, + "learning_rate": 9.24904798172125e-06, + "loss": 17.5816, + "step": 1519 + }, + { + "epoch": 0.0277843785987168, + "grad_norm": 8.201443433514227, + "learning_rate": 9.255140898705255e-06, + "loss": 18.529, + "step": 1520 + }, + { + "epoch": 0.027802657795163326, + "grad_norm": 8.642047252829247, + "learning_rate": 9.261233815689263e-06, + "loss": 18.0311, + "step": 1521 + }, + { + "epoch": 0.027820936991609847, + "grad_norm": 9.120647593902877, + "learning_rate": 9.267326732673269e-06, + "loss": 18.3459, + "step": 1522 + }, + { + "epoch": 0.027839216188056373, + "grad_norm": 8.22577861240296, + "learning_rate": 9.273419649657274e-06, + "loss": 18.0776, + "step": 1523 + }, + { + "epoch": 0.027857495384502898, + "grad_norm": 8.844626158932435, + "learning_rate": 9.27951256664128e-06, + "loss": 18.4426, + "step": 1524 + }, + { + "epoch": 0.027875774580949423, + "grad_norm": 6.779648817912882, + "learning_rate": 9.285605483625286e-06, + "loss": 17.3176, + "step": 1525 + }, + { + "epoch": 0.027894053777395945, + "grad_norm": 8.492963085283831, + "learning_rate": 9.291698400609293e-06, + "loss": 18.0535, + "step": 1526 + }, + { + "epoch": 0.02791233297384247, + "grad_norm": 7.247862231746354, + "learning_rate": 9.297791317593299e-06, + "loss": 17.7189, + "step": 1527 + }, + { + "epoch": 0.027930612170288995, + "grad_norm": 9.165028186905502, + "learning_rate": 9.303884234577305e-06, + "loss": 18.7618, + "step": 1528 + }, + { + "epoch": 0.027948891366735517, + "grad_norm": 7.957554243726399, + "learning_rate": 9.30997715156131e-06, + "loss": 18.3054, + "step": 1529 + }, + { + "epoch": 0.027967170563182042, + "grad_norm": 8.85634231870708, + "learning_rate": 9.316070068545316e-06, + "loss": 18.8453, + "step": 1530 + }, + { + "epoch": 0.027985449759628567, + "grad_norm": 6.072056271219344, + "learning_rate": 9.322162985529322e-06, + "loss": 17.3045, + "step": 1531 + }, + { + "epoch": 0.028003728956075093, + "grad_norm": 7.304157703076281, + "learning_rate": 9.32825590251333e-06, + "loss": 17.8859, + "step": 1532 + }, + { + "epoch": 0.028022008152521614, + "grad_norm": 8.566346981570362, + "learning_rate": 9.334348819497335e-06, + "loss": 18.2739, + "step": 1533 + }, + { + "epoch": 0.02804028734896814, + "grad_norm": 8.863402700936351, + "learning_rate": 9.340441736481341e-06, + "loss": 18.1267, + "step": 1534 + }, + { + "epoch": 0.028058566545414665, + "grad_norm": 7.032337434310726, + "learning_rate": 9.346534653465348e-06, + "loss": 17.5745, + "step": 1535 + }, + { + "epoch": 0.028076845741861187, + "grad_norm": 7.905856887685362, + "learning_rate": 9.352627570449353e-06, + "loss": 18.1291, + "step": 1536 + }, + { + "epoch": 0.028095124938307712, + "grad_norm": 6.83724131858278, + "learning_rate": 9.35872048743336e-06, + "loss": 17.6759, + "step": 1537 + }, + { + "epoch": 0.028113404134754237, + "grad_norm": 7.371139363257559, + "learning_rate": 9.364813404417366e-06, + "loss": 17.6574, + "step": 1538 + }, + { + "epoch": 0.02813168333120076, + "grad_norm": 7.556429626574926, + "learning_rate": 9.370906321401372e-06, + "loss": 17.9372, + "step": 1539 + }, + { + "epoch": 0.028149962527647284, + "grad_norm": 8.229899918476479, + "learning_rate": 9.376999238385377e-06, + "loss": 18.3724, + "step": 1540 + }, + { + "epoch": 0.02816824172409381, + "grad_norm": 6.980564490492054, + "learning_rate": 9.383092155369385e-06, + "loss": 17.4149, + "step": 1541 + }, + { + "epoch": 0.028186520920540335, + "grad_norm": 8.549941551251717, + "learning_rate": 9.389185072353389e-06, + "loss": 18.3236, + "step": 1542 + }, + { + "epoch": 0.028204800116986856, + "grad_norm": 8.117965248196436, + "learning_rate": 9.395277989337396e-06, + "loss": 18.2625, + "step": 1543 + }, + { + "epoch": 0.02822307931343338, + "grad_norm": 8.087251903485502, + "learning_rate": 9.401370906321402e-06, + "loss": 17.9754, + "step": 1544 + }, + { + "epoch": 0.028241358509879907, + "grad_norm": 8.109262754282136, + "learning_rate": 9.407463823305408e-06, + "loss": 17.9735, + "step": 1545 + }, + { + "epoch": 0.02825963770632643, + "grad_norm": 8.449701368675408, + "learning_rate": 9.413556740289415e-06, + "loss": 18.3141, + "step": 1546 + }, + { + "epoch": 0.028277916902772954, + "grad_norm": 8.021681692913548, + "learning_rate": 9.419649657273421e-06, + "loss": 17.7494, + "step": 1547 + }, + { + "epoch": 0.02829619609921948, + "grad_norm": 8.654610764330709, + "learning_rate": 9.425742574257427e-06, + "loss": 18.3164, + "step": 1548 + }, + { + "epoch": 0.028314475295666004, + "grad_norm": 8.934826474472013, + "learning_rate": 9.431835491241433e-06, + "loss": 18.5265, + "step": 1549 + }, + { + "epoch": 0.028332754492112526, + "grad_norm": 6.615085962692111, + "learning_rate": 9.437928408225438e-06, + "loss": 17.4637, + "step": 1550 + }, + { + "epoch": 0.02835103368855905, + "grad_norm": 7.193084585584398, + "learning_rate": 9.444021325209444e-06, + "loss": 17.8196, + "step": 1551 + }, + { + "epoch": 0.028369312885005576, + "grad_norm": 7.5635242082002705, + "learning_rate": 9.450114242193451e-06, + "loss": 17.8508, + "step": 1552 + }, + { + "epoch": 0.028387592081452098, + "grad_norm": 8.727849038197348, + "learning_rate": 9.456207159177457e-06, + "loss": 17.8505, + "step": 1553 + }, + { + "epoch": 0.028405871277898623, + "grad_norm": 8.546991746139607, + "learning_rate": 9.462300076161463e-06, + "loss": 18.2066, + "step": 1554 + }, + { + "epoch": 0.02842415047434515, + "grad_norm": 8.434184761377928, + "learning_rate": 9.468392993145469e-06, + "loss": 18.3708, + "step": 1555 + }, + { + "epoch": 0.02844242967079167, + "grad_norm": 7.600295088603328, + "learning_rate": 9.474485910129475e-06, + "loss": 17.712, + "step": 1556 + }, + { + "epoch": 0.028460708867238196, + "grad_norm": 8.812810635554333, + "learning_rate": 9.480578827113482e-06, + "loss": 18.6113, + "step": 1557 + }, + { + "epoch": 0.02847898806368472, + "grad_norm": 7.1772233524126765, + "learning_rate": 9.486671744097488e-06, + "loss": 17.3414, + "step": 1558 + }, + { + "epoch": 0.028497267260131246, + "grad_norm": 8.008227503749223, + "learning_rate": 9.492764661081493e-06, + "loss": 17.7675, + "step": 1559 + }, + { + "epoch": 0.028515546456577768, + "grad_norm": 7.615853746598634, + "learning_rate": 9.498857578065501e-06, + "loss": 17.6669, + "step": 1560 + }, + { + "epoch": 0.028533825653024293, + "grad_norm": 9.332984012169115, + "learning_rate": 9.504950495049505e-06, + "loss": 18.7201, + "step": 1561 + }, + { + "epoch": 0.02855210484947082, + "grad_norm": 6.923109174353397, + "learning_rate": 9.51104341203351e-06, + "loss": 17.9097, + "step": 1562 + }, + { + "epoch": 0.02857038404591734, + "grad_norm": 7.931663485509248, + "learning_rate": 9.517136329017518e-06, + "loss": 17.7847, + "step": 1563 + }, + { + "epoch": 0.028588663242363865, + "grad_norm": 7.60567237482634, + "learning_rate": 9.523229246001524e-06, + "loss": 17.9421, + "step": 1564 + }, + { + "epoch": 0.02860694243881039, + "grad_norm": 7.780004874882818, + "learning_rate": 9.52932216298553e-06, + "loss": 17.8349, + "step": 1565 + }, + { + "epoch": 0.028625221635256916, + "grad_norm": 7.848484706525409, + "learning_rate": 9.535415079969537e-06, + "loss": 17.9837, + "step": 1566 + }, + { + "epoch": 0.028643500831703438, + "grad_norm": 9.128305637632943, + "learning_rate": 9.541507996953541e-06, + "loss": 18.6661, + "step": 1567 + }, + { + "epoch": 0.028661780028149963, + "grad_norm": 8.250098244391708, + "learning_rate": 9.547600913937549e-06, + "loss": 18.1463, + "step": 1568 + }, + { + "epoch": 0.028680059224596488, + "grad_norm": 7.282911330342466, + "learning_rate": 9.553693830921554e-06, + "loss": 17.9766, + "step": 1569 + }, + { + "epoch": 0.02869833842104301, + "grad_norm": 8.728107028323366, + "learning_rate": 9.55978674790556e-06, + "loss": 18.4801, + "step": 1570 + }, + { + "epoch": 0.028716617617489535, + "grad_norm": 7.322854026891061, + "learning_rate": 9.565879664889568e-06, + "loss": 17.4926, + "step": 1571 + }, + { + "epoch": 0.02873489681393606, + "grad_norm": 9.024688004243888, + "learning_rate": 9.571972581873573e-06, + "loss": 18.507, + "step": 1572 + }, + { + "epoch": 0.028753176010382582, + "grad_norm": 8.72362500062873, + "learning_rate": 9.578065498857577e-06, + "loss": 18.3873, + "step": 1573 + }, + { + "epoch": 0.028771455206829107, + "grad_norm": 7.608476714455864, + "learning_rate": 9.584158415841585e-06, + "loss": 17.8651, + "step": 1574 + }, + { + "epoch": 0.028789734403275632, + "grad_norm": 7.4612096250296105, + "learning_rate": 9.59025133282559e-06, + "loss": 17.8404, + "step": 1575 + }, + { + "epoch": 0.028808013599722158, + "grad_norm": 7.472152034411249, + "learning_rate": 9.596344249809596e-06, + "loss": 17.8312, + "step": 1576 + }, + { + "epoch": 0.02882629279616868, + "grad_norm": 8.903692153904508, + "learning_rate": 9.602437166793604e-06, + "loss": 18.5229, + "step": 1577 + }, + { + "epoch": 0.028844571992615205, + "grad_norm": 7.796879083278068, + "learning_rate": 9.60853008377761e-06, + "loss": 17.9882, + "step": 1578 + }, + { + "epoch": 0.02886285118906173, + "grad_norm": 7.756058097712065, + "learning_rate": 9.614623000761615e-06, + "loss": 17.9424, + "step": 1579 + }, + { + "epoch": 0.02888113038550825, + "grad_norm": 9.72247257932452, + "learning_rate": 9.620715917745621e-06, + "loss": 19.3409, + "step": 1580 + }, + { + "epoch": 0.028899409581954777, + "grad_norm": 7.420575822299361, + "learning_rate": 9.626808834729627e-06, + "loss": 17.5876, + "step": 1581 + }, + { + "epoch": 0.028917688778401302, + "grad_norm": 8.134881444053162, + "learning_rate": 9.632901751713634e-06, + "loss": 17.9443, + "step": 1582 + }, + { + "epoch": 0.028935967974847827, + "grad_norm": 8.170014718000967, + "learning_rate": 9.63899466869764e-06, + "loss": 18.1135, + "step": 1583 + }, + { + "epoch": 0.02895424717129435, + "grad_norm": 7.727004414989496, + "learning_rate": 9.645087585681646e-06, + "loss": 18.0018, + "step": 1584 + }, + { + "epoch": 0.028972526367740874, + "grad_norm": 8.724147523267623, + "learning_rate": 9.651180502665652e-06, + "loss": 18.3142, + "step": 1585 + }, + { + "epoch": 0.0289908055641874, + "grad_norm": 6.320770195610164, + "learning_rate": 9.657273419649657e-06, + "loss": 17.0664, + "step": 1586 + }, + { + "epoch": 0.02900908476063392, + "grad_norm": 7.725570724377613, + "learning_rate": 9.663366336633663e-06, + "loss": 18.0783, + "step": 1587 + }, + { + "epoch": 0.029027363957080447, + "grad_norm": 7.694211438039342, + "learning_rate": 9.66945925361767e-06, + "loss": 17.6469, + "step": 1588 + }, + { + "epoch": 0.029045643153526972, + "grad_norm": 7.70476316150784, + "learning_rate": 9.675552170601676e-06, + "loss": 17.7812, + "step": 1589 + }, + { + "epoch": 0.029063922349973494, + "grad_norm": 8.563173823510006, + "learning_rate": 9.681645087585682e-06, + "loss": 18.2982, + "step": 1590 + }, + { + "epoch": 0.02908220154642002, + "grad_norm": 7.4474122934320635, + "learning_rate": 9.68773800456969e-06, + "loss": 17.896, + "step": 1591 + }, + { + "epoch": 0.029100480742866544, + "grad_norm": 8.433496131474252, + "learning_rate": 9.693830921553694e-06, + "loss": 18.0938, + "step": 1592 + }, + { + "epoch": 0.02911875993931307, + "grad_norm": 7.952162694222141, + "learning_rate": 9.699923838537701e-06, + "loss": 17.8548, + "step": 1593 + }, + { + "epoch": 0.02913703913575959, + "grad_norm": 9.547958041113377, + "learning_rate": 9.706016755521707e-06, + "loss": 18.7101, + "step": 1594 + }, + { + "epoch": 0.029155318332206116, + "grad_norm": 7.35777251789467, + "learning_rate": 9.712109672505713e-06, + "loss": 17.6883, + "step": 1595 + }, + { + "epoch": 0.02917359752865264, + "grad_norm": 8.204778394839916, + "learning_rate": 9.718202589489718e-06, + "loss": 18.4319, + "step": 1596 + }, + { + "epoch": 0.029191876725099163, + "grad_norm": 8.288620463582449, + "learning_rate": 9.724295506473726e-06, + "loss": 18.2228, + "step": 1597 + }, + { + "epoch": 0.02921015592154569, + "grad_norm": 7.888180051404007, + "learning_rate": 9.730388423457732e-06, + "loss": 17.9801, + "step": 1598 + }, + { + "epoch": 0.029228435117992214, + "grad_norm": 7.935515095892514, + "learning_rate": 9.736481340441737e-06, + "loss": 18.1441, + "step": 1599 + }, + { + "epoch": 0.02924671431443874, + "grad_norm": 6.5810518907819615, + "learning_rate": 9.742574257425743e-06, + "loss": 17.487, + "step": 1600 + }, + { + "epoch": 0.02926499351088526, + "grad_norm": 8.057191057617796, + "learning_rate": 9.748667174409749e-06, + "loss": 18.5901, + "step": 1601 + }, + { + "epoch": 0.029283272707331786, + "grad_norm": 7.454482283214667, + "learning_rate": 9.754760091393756e-06, + "loss": 17.7255, + "step": 1602 + }, + { + "epoch": 0.02930155190377831, + "grad_norm": 7.1869910327073825, + "learning_rate": 9.760853008377762e-06, + "loss": 18.0372, + "step": 1603 + }, + { + "epoch": 0.029319831100224833, + "grad_norm": 8.727975453617983, + "learning_rate": 9.766945925361768e-06, + "loss": 18.4149, + "step": 1604 + }, + { + "epoch": 0.029338110296671358, + "grad_norm": 7.777877274694171, + "learning_rate": 9.773038842345774e-06, + "loss": 17.7231, + "step": 1605 + }, + { + "epoch": 0.029356389493117883, + "grad_norm": 7.513074478390638, + "learning_rate": 9.77913175932978e-06, + "loss": 17.436, + "step": 1606 + }, + { + "epoch": 0.029374668689564405, + "grad_norm": 7.792920525633787, + "learning_rate": 9.785224676313785e-06, + "loss": 18.1177, + "step": 1607 + }, + { + "epoch": 0.02939294788601093, + "grad_norm": 6.898953111016352, + "learning_rate": 9.791317593297793e-06, + "loss": 17.8014, + "step": 1608 + }, + { + "epoch": 0.029411227082457456, + "grad_norm": 8.330048553409595, + "learning_rate": 9.797410510281798e-06, + "loss": 18.1343, + "step": 1609 + }, + { + "epoch": 0.02942950627890398, + "grad_norm": 8.156161288497497, + "learning_rate": 9.803503427265804e-06, + "loss": 18.1073, + "step": 1610 + }, + { + "epoch": 0.029447785475350503, + "grad_norm": 7.707355244288337, + "learning_rate": 9.80959634424981e-06, + "loss": 17.8602, + "step": 1611 + }, + { + "epoch": 0.029466064671797028, + "grad_norm": 6.982593291457571, + "learning_rate": 9.815689261233816e-06, + "loss": 17.3715, + "step": 1612 + }, + { + "epoch": 0.029484343868243553, + "grad_norm": 7.6136798692364644, + "learning_rate": 9.821782178217823e-06, + "loss": 17.6577, + "step": 1613 + }, + { + "epoch": 0.029502623064690075, + "grad_norm": 8.831629567786118, + "learning_rate": 9.827875095201829e-06, + "loss": 18.5191, + "step": 1614 + }, + { + "epoch": 0.0295209022611366, + "grad_norm": 8.241696213267609, + "learning_rate": 9.833968012185835e-06, + "loss": 17.9193, + "step": 1615 + }, + { + "epoch": 0.029539181457583125, + "grad_norm": 8.513703873129042, + "learning_rate": 9.840060929169842e-06, + "loss": 18.2604, + "step": 1616 + }, + { + "epoch": 0.02955746065402965, + "grad_norm": 8.166623481874485, + "learning_rate": 9.846153846153848e-06, + "loss": 18.3146, + "step": 1617 + }, + { + "epoch": 0.029575739850476172, + "grad_norm": 9.591123009193785, + "learning_rate": 9.852246763137852e-06, + "loss": 18.0625, + "step": 1618 + }, + { + "epoch": 0.029594019046922698, + "grad_norm": 7.342378307838155, + "learning_rate": 9.85833968012186e-06, + "loss": 17.7567, + "step": 1619 + }, + { + "epoch": 0.029612298243369223, + "grad_norm": 7.711579400603312, + "learning_rate": 9.864432597105865e-06, + "loss": 17.5905, + "step": 1620 + }, + { + "epoch": 0.029630577439815745, + "grad_norm": 8.719640722806627, + "learning_rate": 9.87052551408987e-06, + "loss": 18.6454, + "step": 1621 + }, + { + "epoch": 0.02964885663626227, + "grad_norm": 8.310001366312644, + "learning_rate": 9.876618431073878e-06, + "loss": 18.2321, + "step": 1622 + }, + { + "epoch": 0.029667135832708795, + "grad_norm": 8.298224474526126, + "learning_rate": 9.882711348057884e-06, + "loss": 18.2345, + "step": 1623 + }, + { + "epoch": 0.029685415029155317, + "grad_norm": 7.868201262455768, + "learning_rate": 9.88880426504189e-06, + "loss": 18.2417, + "step": 1624 + }, + { + "epoch": 0.029703694225601842, + "grad_norm": 7.868651271556509, + "learning_rate": 9.894897182025896e-06, + "loss": 18.1694, + "step": 1625 + }, + { + "epoch": 0.029721973422048367, + "grad_norm": 7.213672615749309, + "learning_rate": 9.900990099009901e-06, + "loss": 17.8703, + "step": 1626 + }, + { + "epoch": 0.029740252618494892, + "grad_norm": 9.445225101439867, + "learning_rate": 9.907083015993909e-06, + "loss": 18.9733, + "step": 1627 + }, + { + "epoch": 0.029758531814941414, + "grad_norm": 8.085466053201765, + "learning_rate": 9.913175932977915e-06, + "loss": 18.1514, + "step": 1628 + }, + { + "epoch": 0.02977681101138794, + "grad_norm": 8.670797616389512, + "learning_rate": 9.91926884996192e-06, + "loss": 18.1964, + "step": 1629 + }, + { + "epoch": 0.029795090207834465, + "grad_norm": 9.299323756435573, + "learning_rate": 9.925361766945926e-06, + "loss": 18.3876, + "step": 1630 + }, + { + "epoch": 0.029813369404280986, + "grad_norm": 7.965960012792072, + "learning_rate": 9.931454683929932e-06, + "loss": 17.6732, + "step": 1631 + }, + { + "epoch": 0.02983164860072751, + "grad_norm": 8.34160354865226, + "learning_rate": 9.937547600913938e-06, + "loss": 18.0594, + "step": 1632 + }, + { + "epoch": 0.029849927797174037, + "grad_norm": 8.703633671131135, + "learning_rate": 9.943640517897945e-06, + "loss": 18.2577, + "step": 1633 + }, + { + "epoch": 0.029868206993620562, + "grad_norm": 8.138745042647287, + "learning_rate": 9.94973343488195e-06, + "loss": 17.9059, + "step": 1634 + }, + { + "epoch": 0.029886486190067084, + "grad_norm": 6.956669647829765, + "learning_rate": 9.955826351865957e-06, + "loss": 17.4469, + "step": 1635 + }, + { + "epoch": 0.02990476538651361, + "grad_norm": 7.462595981410311, + "learning_rate": 9.961919268849962e-06, + "loss": 17.7349, + "step": 1636 + }, + { + "epoch": 0.029923044582960134, + "grad_norm": 7.365790430570946, + "learning_rate": 9.968012185833968e-06, + "loss": 17.743, + "step": 1637 + }, + { + "epoch": 0.029941323779406656, + "grad_norm": 7.9050808159133, + "learning_rate": 9.974105102817975e-06, + "loss": 17.8508, + "step": 1638 + }, + { + "epoch": 0.02995960297585318, + "grad_norm": 7.08010023731484, + "learning_rate": 9.980198019801981e-06, + "loss": 17.6474, + "step": 1639 + }, + { + "epoch": 0.029977882172299707, + "grad_norm": 7.241082228675219, + "learning_rate": 9.986290936785987e-06, + "loss": 17.5394, + "step": 1640 + }, + { + "epoch": 0.02999616136874623, + "grad_norm": 6.70050213401771, + "learning_rate": 9.992383853769993e-06, + "loss": 17.5436, + "step": 1641 + }, + { + "epoch": 0.030014440565192754, + "grad_norm": 8.603058800151555, + "learning_rate": 9.998476770754e-06, + "loss": 18.2784, + "step": 1642 + }, + { + "epoch": 0.03003271976163928, + "grad_norm": 8.16200725958428, + "learning_rate": 9.999999995071216e-06, + "loss": 18.1774, + "step": 1643 + }, + { + "epoch": 0.030050998958085804, + "grad_norm": 8.77092898355948, + "learning_rate": 9.99999997316551e-06, + "loss": 18.3002, + "step": 1644 + }, + { + "epoch": 0.030069278154532326, + "grad_norm": 7.532952323927857, + "learning_rate": 9.999999933735236e-06, + "loss": 17.752, + "step": 1645 + }, + { + "epoch": 0.03008755735097885, + "grad_norm": 7.604105027865724, + "learning_rate": 9.999999876780395e-06, + "loss": 17.7217, + "step": 1646 + }, + { + "epoch": 0.030105836547425376, + "grad_norm": 8.217025734469807, + "learning_rate": 9.99999980230099e-06, + "loss": 18.2265, + "step": 1647 + }, + { + "epoch": 0.030124115743871898, + "grad_norm": 7.19985957568226, + "learning_rate": 9.99999971029702e-06, + "loss": 17.6727, + "step": 1648 + }, + { + "epoch": 0.030142394940318423, + "grad_norm": 7.4626368055052055, + "learning_rate": 9.999999600768484e-06, + "loss": 17.6681, + "step": 1649 + }, + { + "epoch": 0.03016067413676495, + "grad_norm": 8.597444246697309, + "learning_rate": 9.999999473715385e-06, + "loss": 18.2233, + "step": 1650 + }, + { + "epoch": 0.030178953333211474, + "grad_norm": 7.339821522451504, + "learning_rate": 9.99999932913772e-06, + "loss": 18.012, + "step": 1651 + }, + { + "epoch": 0.030197232529657995, + "grad_norm": 9.495837231113391, + "learning_rate": 9.99999916703549e-06, + "loss": 18.7163, + "step": 1652 + }, + { + "epoch": 0.03021551172610452, + "grad_norm": 8.153424924313496, + "learning_rate": 9.999998987408699e-06, + "loss": 18.1528, + "step": 1653 + }, + { + "epoch": 0.030233790922551046, + "grad_norm": 7.4629859005548305, + "learning_rate": 9.999998790257344e-06, + "loss": 17.9068, + "step": 1654 + }, + { + "epoch": 0.030252070118997568, + "grad_norm": 7.5700990178487695, + "learning_rate": 9.99999857558143e-06, + "loss": 17.6228, + "step": 1655 + }, + { + "epoch": 0.030270349315444093, + "grad_norm": 8.497903245407256, + "learning_rate": 9.99999834338095e-06, + "loss": 18.1078, + "step": 1656 + }, + { + "epoch": 0.030288628511890618, + "grad_norm": 8.217118353802384, + "learning_rate": 9.999998093655913e-06, + "loss": 18.2546, + "step": 1657 + }, + { + "epoch": 0.03030690770833714, + "grad_norm": 7.85525980711628, + "learning_rate": 9.999997826406315e-06, + "loss": 17.9371, + "step": 1658 + }, + { + "epoch": 0.030325186904783665, + "grad_norm": 7.506971097466327, + "learning_rate": 9.99999754163216e-06, + "loss": 17.9911, + "step": 1659 + }, + { + "epoch": 0.03034346610123019, + "grad_norm": 7.483869241789261, + "learning_rate": 9.999997239333448e-06, + "loss": 17.6404, + "step": 1660 + }, + { + "epoch": 0.030361745297676716, + "grad_norm": 7.8702369812607955, + "learning_rate": 9.999996919510177e-06, + "loss": 18.014, + "step": 1661 + }, + { + "epoch": 0.030380024494123237, + "grad_norm": 7.478131951125737, + "learning_rate": 9.999996582162353e-06, + "loss": 17.9119, + "step": 1662 + }, + { + "epoch": 0.030398303690569763, + "grad_norm": 6.891453312552426, + "learning_rate": 9.999996227289975e-06, + "loss": 17.5924, + "step": 1663 + }, + { + "epoch": 0.030416582887016288, + "grad_norm": 7.649411616217506, + "learning_rate": 9.999995854893042e-06, + "loss": 17.8496, + "step": 1664 + }, + { + "epoch": 0.03043486208346281, + "grad_norm": 7.687138540451615, + "learning_rate": 9.999995464971559e-06, + "loss": 18.0789, + "step": 1665 + }, + { + "epoch": 0.030453141279909335, + "grad_norm": 6.754059755238239, + "learning_rate": 9.999995057525525e-06, + "loss": 17.5271, + "step": 1666 + }, + { + "epoch": 0.03047142047635586, + "grad_norm": 9.442846909744565, + "learning_rate": 9.999994632554943e-06, + "loss": 18.5949, + "step": 1667 + }, + { + "epoch": 0.030489699672802385, + "grad_norm": 8.563193819594666, + "learning_rate": 9.999994190059814e-06, + "loss": 18.1301, + "step": 1668 + }, + { + "epoch": 0.030507978869248907, + "grad_norm": 8.829640791058583, + "learning_rate": 9.999993730040137e-06, + "loss": 17.6752, + "step": 1669 + }, + { + "epoch": 0.030526258065695432, + "grad_norm": 7.8977755747954035, + "learning_rate": 9.999993252495917e-06, + "loss": 18.0845, + "step": 1670 + }, + { + "epoch": 0.030544537262141958, + "grad_norm": 6.729064744656194, + "learning_rate": 9.999992757427155e-06, + "loss": 17.4574, + "step": 1671 + }, + { + "epoch": 0.03056281645858848, + "grad_norm": 6.678060278276319, + "learning_rate": 9.999992244833852e-06, + "loss": 17.4589, + "step": 1672 + }, + { + "epoch": 0.030581095655035005, + "grad_norm": 7.737125905516272, + "learning_rate": 9.99999171471601e-06, + "loss": 17.8177, + "step": 1673 + }, + { + "epoch": 0.03059937485148153, + "grad_norm": 7.657764374448887, + "learning_rate": 9.999991167073632e-06, + "loss": 18.2387, + "step": 1674 + }, + { + "epoch": 0.03061765404792805, + "grad_norm": 8.346199749612207, + "learning_rate": 9.999990601906717e-06, + "loss": 18.0601, + "step": 1675 + }, + { + "epoch": 0.030635933244374577, + "grad_norm": 6.530886845125346, + "learning_rate": 9.999990019215271e-06, + "loss": 17.3699, + "step": 1676 + }, + { + "epoch": 0.030654212440821102, + "grad_norm": 8.763388542357818, + "learning_rate": 9.999989418999292e-06, + "loss": 18.0928, + "step": 1677 + }, + { + "epoch": 0.030672491637267627, + "grad_norm": 8.045196888267036, + "learning_rate": 9.999988801258785e-06, + "loss": 17.999, + "step": 1678 + }, + { + "epoch": 0.03069077083371415, + "grad_norm": 9.041537569253046, + "learning_rate": 9.999988165993751e-06, + "loss": 18.5063, + "step": 1679 + }, + { + "epoch": 0.030709050030160674, + "grad_norm": 8.317873113243014, + "learning_rate": 9.999987513204192e-06, + "loss": 18.4539, + "step": 1680 + }, + { + "epoch": 0.0307273292266072, + "grad_norm": 7.850982443122049, + "learning_rate": 9.99998684289011e-06, + "loss": 18.1377, + "step": 1681 + }, + { + "epoch": 0.03074560842305372, + "grad_norm": 8.09374934043548, + "learning_rate": 9.999986155051508e-06, + "loss": 17.9622, + "step": 1682 + }, + { + "epoch": 0.030763887619500246, + "grad_norm": 8.2254588213774, + "learning_rate": 9.99998544968839e-06, + "loss": 18.2687, + "step": 1683 + }, + { + "epoch": 0.03078216681594677, + "grad_norm": 7.751994688589054, + "learning_rate": 9.999984726800756e-06, + "loss": 18.1268, + "step": 1684 + }, + { + "epoch": 0.030800446012393297, + "grad_norm": 8.142840854061875, + "learning_rate": 9.99998398638861e-06, + "loss": 18.0481, + "step": 1685 + }, + { + "epoch": 0.03081872520883982, + "grad_norm": 7.412709744013709, + "learning_rate": 9.999983228451953e-06, + "loss": 17.6665, + "step": 1686 + }, + { + "epoch": 0.030837004405286344, + "grad_norm": 8.274057649578133, + "learning_rate": 9.999982452990789e-06, + "loss": 18.1678, + "step": 1687 + }, + { + "epoch": 0.03085528360173287, + "grad_norm": 7.733422141237051, + "learning_rate": 9.99998166000512e-06, + "loss": 18.0693, + "step": 1688 + }, + { + "epoch": 0.03087356279817939, + "grad_norm": 6.331266037416006, + "learning_rate": 9.99998084949495e-06, + "loss": 17.3541, + "step": 1689 + }, + { + "epoch": 0.030891841994625916, + "grad_norm": 8.981301420778157, + "learning_rate": 9.99998002146028e-06, + "loss": 18.3912, + "step": 1690 + }, + { + "epoch": 0.03091012119107244, + "grad_norm": 8.480155189671931, + "learning_rate": 9.999979175901116e-06, + "loss": 18.6498, + "step": 1691 + }, + { + "epoch": 0.030928400387518963, + "grad_norm": 8.869112873111437, + "learning_rate": 9.999978312817455e-06, + "loss": 18.1799, + "step": 1692 + }, + { + "epoch": 0.03094667958396549, + "grad_norm": 7.102599203870701, + "learning_rate": 9.999977432209306e-06, + "loss": 17.654, + "step": 1693 + }, + { + "epoch": 0.030964958780412014, + "grad_norm": 7.914076704773177, + "learning_rate": 9.999976534076672e-06, + "loss": 17.7237, + "step": 1694 + }, + { + "epoch": 0.03098323797685854, + "grad_norm": 7.402304949605478, + "learning_rate": 9.999975618419553e-06, + "loss": 17.7698, + "step": 1695 + }, + { + "epoch": 0.03100151717330506, + "grad_norm": 7.1955102355883565, + "learning_rate": 9.999974685237951e-06, + "loss": 17.6303, + "step": 1696 + }, + { + "epoch": 0.031019796369751586, + "grad_norm": 7.944128019819306, + "learning_rate": 9.999973734531873e-06, + "loss": 17.8963, + "step": 1697 + }, + { + "epoch": 0.03103807556619811, + "grad_norm": 7.73052692176272, + "learning_rate": 9.999972766301323e-06, + "loss": 18.0474, + "step": 1698 + }, + { + "epoch": 0.031056354762644633, + "grad_norm": 8.060643549825397, + "learning_rate": 9.9999717805463e-06, + "loss": 18.2613, + "step": 1699 + }, + { + "epoch": 0.031074633959091158, + "grad_norm": 6.903036766193613, + "learning_rate": 9.99997077726681e-06, + "loss": 17.5884, + "step": 1700 + }, + { + "epoch": 0.031092913155537683, + "grad_norm": 7.937945436907774, + "learning_rate": 9.999969756462858e-06, + "loss": 18.2496, + "step": 1701 + }, + { + "epoch": 0.03111119235198421, + "grad_norm": 8.85048821164819, + "learning_rate": 9.999968718134443e-06, + "loss": 18.4589, + "step": 1702 + }, + { + "epoch": 0.03112947154843073, + "grad_norm": 6.437826666373901, + "learning_rate": 9.999967662281574e-06, + "loss": 17.6258, + "step": 1703 + }, + { + "epoch": 0.031147750744877255, + "grad_norm": 8.149168186932878, + "learning_rate": 9.99996658890425e-06, + "loss": 18.0005, + "step": 1704 + }, + { + "epoch": 0.03116602994132378, + "grad_norm": 8.46177987736004, + "learning_rate": 9.99996549800248e-06, + "loss": 18.4022, + "step": 1705 + }, + { + "epoch": 0.031184309137770302, + "grad_norm": 7.1207036582000365, + "learning_rate": 9.999964389576262e-06, + "loss": 17.7006, + "step": 1706 + }, + { + "epoch": 0.031202588334216828, + "grad_norm": 8.74135307600333, + "learning_rate": 9.999963263625604e-06, + "loss": 18.1906, + "step": 1707 + }, + { + "epoch": 0.031220867530663353, + "grad_norm": 7.094062218917512, + "learning_rate": 9.999962120150507e-06, + "loss": 17.5309, + "step": 1708 + }, + { + "epoch": 0.031239146727109875, + "grad_norm": 6.13369738189881, + "learning_rate": 9.99996095915098e-06, + "loss": 17.1825, + "step": 1709 + }, + { + "epoch": 0.0312574259235564, + "grad_norm": 8.334653328554893, + "learning_rate": 9.999959780627021e-06, + "loss": 18.2506, + "step": 1710 + }, + { + "epoch": 0.031275705120002925, + "grad_norm": 7.9067533539838815, + "learning_rate": 9.999958584578638e-06, + "loss": 18.3141, + "step": 1711 + }, + { + "epoch": 0.03129398431644945, + "grad_norm": 7.669650189051857, + "learning_rate": 9.999957371005833e-06, + "loss": 17.7663, + "step": 1712 + }, + { + "epoch": 0.031312263512895976, + "grad_norm": 6.893918883243496, + "learning_rate": 9.999956139908613e-06, + "loss": 17.5321, + "step": 1713 + }, + { + "epoch": 0.0313305427093425, + "grad_norm": 8.880750021766438, + "learning_rate": 9.999954891286978e-06, + "loss": 18.2603, + "step": 1714 + }, + { + "epoch": 0.03134882190578902, + "grad_norm": 7.745527335498254, + "learning_rate": 9.999953625140938e-06, + "loss": 18.1199, + "step": 1715 + }, + { + "epoch": 0.03136710110223555, + "grad_norm": 8.629927372918612, + "learning_rate": 9.999952341470492e-06, + "loss": 18.442, + "step": 1716 + }, + { + "epoch": 0.03138538029868207, + "grad_norm": 9.315829947449322, + "learning_rate": 9.999951040275648e-06, + "loss": 18.4559, + "step": 1717 + }, + { + "epoch": 0.03140365949512859, + "grad_norm": 8.011886242632372, + "learning_rate": 9.99994972155641e-06, + "loss": 17.8989, + "step": 1718 + }, + { + "epoch": 0.03142193869157512, + "grad_norm": 8.688456567864376, + "learning_rate": 9.99994838531278e-06, + "loss": 17.9268, + "step": 1719 + }, + { + "epoch": 0.03144021788802164, + "grad_norm": 7.863237480162188, + "learning_rate": 9.999947031544768e-06, + "loss": 18.0234, + "step": 1720 + }, + { + "epoch": 0.031458497084468164, + "grad_norm": 9.614738708606128, + "learning_rate": 9.999945660252372e-06, + "loss": 19.0878, + "step": 1721 + }, + { + "epoch": 0.03147677628091469, + "grad_norm": 7.484839742830578, + "learning_rate": 9.999944271435604e-06, + "loss": 17.7975, + "step": 1722 + }, + { + "epoch": 0.031495055477361214, + "grad_norm": 8.302144392083973, + "learning_rate": 9.999942865094463e-06, + "loss": 18.1382, + "step": 1723 + }, + { + "epoch": 0.03151333467380774, + "grad_norm": 8.800750917473952, + "learning_rate": 9.999941441228955e-06, + "loss": 18.5514, + "step": 1724 + }, + { + "epoch": 0.031531613870254264, + "grad_norm": 8.085960134063056, + "learning_rate": 9.999939999839087e-06, + "loss": 18.0244, + "step": 1725 + }, + { + "epoch": 0.031549893066700786, + "grad_norm": 8.970989611370843, + "learning_rate": 9.999938540924865e-06, + "loss": 18.4848, + "step": 1726 + }, + { + "epoch": 0.031568172263147315, + "grad_norm": 13.206975008573506, + "learning_rate": 9.999937064486292e-06, + "loss": 18.5302, + "step": 1727 + }, + { + "epoch": 0.03158645145959384, + "grad_norm": 7.733792813452686, + "learning_rate": 9.999935570523371e-06, + "loss": 17.971, + "step": 1728 + }, + { + "epoch": 0.03160473065604036, + "grad_norm": 8.211191887685656, + "learning_rate": 9.999934059036111e-06, + "loss": 17.9593, + "step": 1729 + }, + { + "epoch": 0.03162300985248689, + "grad_norm": 7.575399705091664, + "learning_rate": 9.999932530024517e-06, + "loss": 18.111, + "step": 1730 + }, + { + "epoch": 0.03164128904893341, + "grad_norm": 7.464121770275439, + "learning_rate": 9.999930983488592e-06, + "loss": 17.9209, + "step": 1731 + }, + { + "epoch": 0.03165956824537993, + "grad_norm": 7.272034710638791, + "learning_rate": 9.999929419428345e-06, + "loss": 17.7786, + "step": 1732 + }, + { + "epoch": 0.03167784744182646, + "grad_norm": 8.457109924726934, + "learning_rate": 9.999927837843778e-06, + "loss": 18.276, + "step": 1733 + }, + { + "epoch": 0.03169612663827298, + "grad_norm": 9.83487092928988, + "learning_rate": 9.999926238734896e-06, + "loss": 18.1567, + "step": 1734 + }, + { + "epoch": 0.0317144058347195, + "grad_norm": 7.058737498003848, + "learning_rate": 9.999924622101708e-06, + "loss": 17.4511, + "step": 1735 + }, + { + "epoch": 0.03173268503116603, + "grad_norm": 17.665633839089814, + "learning_rate": 9.999922987944218e-06, + "loss": 18.6444, + "step": 1736 + }, + { + "epoch": 0.03175096422761255, + "grad_norm": 7.935431722583716, + "learning_rate": 9.999921336262432e-06, + "loss": 17.7928, + "step": 1737 + }, + { + "epoch": 0.031769243424059075, + "grad_norm": 7.7888607207513205, + "learning_rate": 9.999919667056355e-06, + "loss": 18.1576, + "step": 1738 + }, + { + "epoch": 0.031787522620505604, + "grad_norm": 8.223768512472157, + "learning_rate": 9.999917980325993e-06, + "loss": 18.2164, + "step": 1739 + }, + { + "epoch": 0.031805801816952126, + "grad_norm": 7.552464065738869, + "learning_rate": 9.999916276071352e-06, + "loss": 17.7676, + "step": 1740 + }, + { + "epoch": 0.031824081013398654, + "grad_norm": 7.752283479585389, + "learning_rate": 9.99991455429244e-06, + "loss": 17.7247, + "step": 1741 + }, + { + "epoch": 0.031842360209845176, + "grad_norm": 8.32808808108957, + "learning_rate": 9.99991281498926e-06, + "loss": 18.2492, + "step": 1742 + }, + { + "epoch": 0.0318606394062917, + "grad_norm": 7.311715080136691, + "learning_rate": 9.999911058161821e-06, + "loss": 17.6998, + "step": 1743 + }, + { + "epoch": 0.031878918602738227, + "grad_norm": 7.809550576897194, + "learning_rate": 9.999909283810127e-06, + "loss": 17.8632, + "step": 1744 + }, + { + "epoch": 0.03189719779918475, + "grad_norm": 8.643321746507269, + "learning_rate": 9.999907491934184e-06, + "loss": 18.3235, + "step": 1745 + }, + { + "epoch": 0.03191547699563127, + "grad_norm": 8.69193791200664, + "learning_rate": 9.999905682534002e-06, + "loss": 18.1556, + "step": 1746 + }, + { + "epoch": 0.0319337561920778, + "grad_norm": 7.41714272691651, + "learning_rate": 9.99990385560958e-06, + "loss": 17.4267, + "step": 1747 + }, + { + "epoch": 0.03195203538852432, + "grad_norm": 7.647497052491643, + "learning_rate": 9.99990201116093e-06, + "loss": 17.8335, + "step": 1748 + }, + { + "epoch": 0.03197031458497084, + "grad_norm": 8.250167251223719, + "learning_rate": 9.99990014918806e-06, + "loss": 17.9621, + "step": 1749 + }, + { + "epoch": 0.03198859378141737, + "grad_norm": 7.817423589544294, + "learning_rate": 9.999898269690972e-06, + "loss": 18.0841, + "step": 1750 + }, + { + "epoch": 0.03200687297786389, + "grad_norm": 9.061676820807548, + "learning_rate": 9.999896372669675e-06, + "loss": 18.3327, + "step": 1751 + }, + { + "epoch": 0.032025152174310414, + "grad_norm": 8.171012294543834, + "learning_rate": 9.999894458124176e-06, + "loss": 18.2276, + "step": 1752 + }, + { + "epoch": 0.03204343137075694, + "grad_norm": 8.81443248428237, + "learning_rate": 9.99989252605448e-06, + "loss": 18.2164, + "step": 1753 + }, + { + "epoch": 0.032061710567203465, + "grad_norm": 8.244576239047426, + "learning_rate": 9.999890576460593e-06, + "loss": 18.0588, + "step": 1754 + }, + { + "epoch": 0.03207998976364999, + "grad_norm": 7.500940593794085, + "learning_rate": 9.999888609342523e-06, + "loss": 17.9209, + "step": 1755 + }, + { + "epoch": 0.032098268960096515, + "grad_norm": 8.445595785573039, + "learning_rate": 9.99988662470028e-06, + "loss": 18.0948, + "step": 1756 + }, + { + "epoch": 0.03211654815654304, + "grad_norm": 8.781581898387014, + "learning_rate": 9.999884622533866e-06, + "loss": 18.289, + "step": 1757 + }, + { + "epoch": 0.032134827352989566, + "grad_norm": 8.653088007149135, + "learning_rate": 9.999882602843292e-06, + "loss": 18.0247, + "step": 1758 + }, + { + "epoch": 0.03215310654943609, + "grad_norm": 8.900805886681988, + "learning_rate": 9.999880565628564e-06, + "loss": 18.0382, + "step": 1759 + }, + { + "epoch": 0.03217138574588261, + "grad_norm": 6.761814575284438, + "learning_rate": 9.999878510889686e-06, + "loss": 17.549, + "step": 1760 + }, + { + "epoch": 0.03218966494232914, + "grad_norm": 8.382118165256745, + "learning_rate": 9.999876438626669e-06, + "loss": 18.1429, + "step": 1761 + }, + { + "epoch": 0.03220794413877566, + "grad_norm": 8.748807697101075, + "learning_rate": 9.99987434883952e-06, + "loss": 17.8109, + "step": 1762 + }, + { + "epoch": 0.03222622333522218, + "grad_norm": 8.559930526855778, + "learning_rate": 9.999872241528244e-06, + "loss": 18.5522, + "step": 1763 + }, + { + "epoch": 0.03224450253166871, + "grad_norm": 7.741650185924381, + "learning_rate": 9.99987011669285e-06, + "loss": 17.8154, + "step": 1764 + }, + { + "epoch": 0.03226278172811523, + "grad_norm": 6.293094129356059, + "learning_rate": 9.999867974333345e-06, + "loss": 17.4139, + "step": 1765 + }, + { + "epoch": 0.032281060924561754, + "grad_norm": 7.907972598846681, + "learning_rate": 9.999865814449734e-06, + "loss": 18.1276, + "step": 1766 + }, + { + "epoch": 0.03229934012100828, + "grad_norm": 8.237264405705627, + "learning_rate": 9.99986363704203e-06, + "loss": 18.3052, + "step": 1767 + }, + { + "epoch": 0.032317619317454804, + "grad_norm": 9.82774985313453, + "learning_rate": 9.999861442110238e-06, + "loss": 18.9897, + "step": 1768 + }, + { + "epoch": 0.032335898513901326, + "grad_norm": 6.961508335763593, + "learning_rate": 9.999859229654364e-06, + "loss": 17.55, + "step": 1769 + }, + { + "epoch": 0.032354177710347855, + "grad_norm": 7.384652934661802, + "learning_rate": 9.99985699967442e-06, + "loss": 17.5297, + "step": 1770 + }, + { + "epoch": 0.03237245690679438, + "grad_norm": 7.411518265275107, + "learning_rate": 9.999854752170409e-06, + "loss": 17.6561, + "step": 1771 + }, + { + "epoch": 0.0323907361032409, + "grad_norm": 9.123059934482233, + "learning_rate": 9.99985248714234e-06, + "loss": 18.4026, + "step": 1772 + }, + { + "epoch": 0.03240901529968743, + "grad_norm": 8.417191475521836, + "learning_rate": 9.999850204590224e-06, + "loss": 17.8137, + "step": 1773 + }, + { + "epoch": 0.03242729449613395, + "grad_norm": 10.61543485670314, + "learning_rate": 9.999847904514066e-06, + "loss": 18.8098, + "step": 1774 + }, + { + "epoch": 0.03244557369258048, + "grad_norm": 8.532504542166224, + "learning_rate": 9.999845586913876e-06, + "loss": 18.3173, + "step": 1775 + }, + { + "epoch": 0.032463852889027, + "grad_norm": 7.43589045622056, + "learning_rate": 9.999843251789659e-06, + "loss": 17.5071, + "step": 1776 + }, + { + "epoch": 0.03248213208547352, + "grad_norm": 8.617687971638343, + "learning_rate": 9.999840899141426e-06, + "loss": 18.0293, + "step": 1777 + }, + { + "epoch": 0.03250041128192005, + "grad_norm": 6.778603432440775, + "learning_rate": 9.999838528969186e-06, + "loss": 17.6333, + "step": 1778 + }, + { + "epoch": 0.03251869047836657, + "grad_norm": 9.147403305663513, + "learning_rate": 9.999836141272945e-06, + "loss": 18.2796, + "step": 1779 + }, + { + "epoch": 0.03253696967481309, + "grad_norm": 8.132127085631549, + "learning_rate": 9.99983373605271e-06, + "loss": 17.8568, + "step": 1780 + }, + { + "epoch": 0.03255524887125962, + "grad_norm": 7.365472948844682, + "learning_rate": 9.999831313308495e-06, + "loss": 17.6739, + "step": 1781 + }, + { + "epoch": 0.032573528067706144, + "grad_norm": 8.497651957313906, + "learning_rate": 9.999828873040303e-06, + "loss": 18.2875, + "step": 1782 + }, + { + "epoch": 0.032591807264152665, + "grad_norm": 8.484429761607142, + "learning_rate": 9.999826415248146e-06, + "loss": 18.1602, + "step": 1783 + }, + { + "epoch": 0.032610086460599194, + "grad_norm": 9.548530803029397, + "learning_rate": 9.999823939932031e-06, + "loss": 19.0218, + "step": 1784 + }, + { + "epoch": 0.032628365657045716, + "grad_norm": 6.485042818984111, + "learning_rate": 9.999821447091967e-06, + "loss": 17.4927, + "step": 1785 + }, + { + "epoch": 0.03264664485349224, + "grad_norm": 8.360808490797504, + "learning_rate": 9.999818936727963e-06, + "loss": 18.4266, + "step": 1786 + }, + { + "epoch": 0.032664924049938766, + "grad_norm": 6.781187397765794, + "learning_rate": 9.999816408840024e-06, + "loss": 17.5942, + "step": 1787 + }, + { + "epoch": 0.03268320324638529, + "grad_norm": 6.312570278027369, + "learning_rate": 9.999813863428167e-06, + "loss": 17.3115, + "step": 1788 + }, + { + "epoch": 0.03270148244283181, + "grad_norm": 8.02380292866149, + "learning_rate": 9.999811300492394e-06, + "loss": 17.8317, + "step": 1789 + }, + { + "epoch": 0.03271976163927834, + "grad_norm": 9.2842251146869, + "learning_rate": 9.999808720032717e-06, + "loss": 18.6453, + "step": 1790 + }, + { + "epoch": 0.03273804083572486, + "grad_norm": 7.710705644639956, + "learning_rate": 9.999806122049144e-06, + "loss": 17.9747, + "step": 1791 + }, + { + "epoch": 0.03275632003217139, + "grad_norm": 7.297062474874555, + "learning_rate": 9.999803506541683e-06, + "loss": 17.7699, + "step": 1792 + }, + { + "epoch": 0.03277459922861791, + "grad_norm": 7.1899855935789505, + "learning_rate": 9.999800873510347e-06, + "loss": 17.5291, + "step": 1793 + }, + { + "epoch": 0.03279287842506443, + "grad_norm": 7.666647090498624, + "learning_rate": 9.999798222955142e-06, + "loss": 18.088, + "step": 1794 + }, + { + "epoch": 0.03281115762151096, + "grad_norm": 8.884884975344518, + "learning_rate": 9.999795554876078e-06, + "loss": 18.2303, + "step": 1795 + }, + { + "epoch": 0.03282943681795748, + "grad_norm": 7.208892860970315, + "learning_rate": 9.999792869273165e-06, + "loss": 17.2642, + "step": 1796 + }, + { + "epoch": 0.032847716014404005, + "grad_norm": 6.873435544437457, + "learning_rate": 9.99979016614641e-06, + "loss": 17.6393, + "step": 1797 + }, + { + "epoch": 0.032865995210850533, + "grad_norm": 8.38149535209502, + "learning_rate": 9.999787445495825e-06, + "loss": 18.1904, + "step": 1798 + }, + { + "epoch": 0.032884274407297055, + "grad_norm": 7.783435755971429, + "learning_rate": 9.999784707321419e-06, + "loss": 18.0207, + "step": 1799 + }, + { + "epoch": 0.03290255360374358, + "grad_norm": 8.311374471647397, + "learning_rate": 9.999781951623202e-06, + "loss": 18.3539, + "step": 1800 + }, + { + "epoch": 0.032920832800190106, + "grad_norm": 8.174072765469017, + "learning_rate": 9.999779178401183e-06, + "loss": 17.9323, + "step": 1801 + }, + { + "epoch": 0.03293911199663663, + "grad_norm": 7.232056843641817, + "learning_rate": 9.999776387655372e-06, + "loss": 17.8261, + "step": 1802 + }, + { + "epoch": 0.03295739119308315, + "grad_norm": 8.218568405365268, + "learning_rate": 9.999773579385779e-06, + "loss": 18.2206, + "step": 1803 + }, + { + "epoch": 0.03297567038952968, + "grad_norm": 8.093855359353427, + "learning_rate": 9.999770753592413e-06, + "loss": 18.1971, + "step": 1804 + }, + { + "epoch": 0.0329939495859762, + "grad_norm": 8.901442169351329, + "learning_rate": 9.999767910275283e-06, + "loss": 18.1861, + "step": 1805 + }, + { + "epoch": 0.03301222878242272, + "grad_norm": 7.670484431715468, + "learning_rate": 9.999765049434403e-06, + "loss": 17.7351, + "step": 1806 + }, + { + "epoch": 0.03303050797886925, + "grad_norm": 8.876821800472102, + "learning_rate": 9.999762171069777e-06, + "loss": 18.2117, + "step": 1807 + }, + { + "epoch": 0.03304878717531577, + "grad_norm": 7.313675489864291, + "learning_rate": 9.999759275181421e-06, + "loss": 17.8017, + "step": 1808 + }, + { + "epoch": 0.0330670663717623, + "grad_norm": 8.396988203693653, + "learning_rate": 9.999756361769342e-06, + "loss": 18.074, + "step": 1809 + }, + { + "epoch": 0.03308534556820882, + "grad_norm": 7.325734865195764, + "learning_rate": 9.99975343083355e-06, + "loss": 17.8234, + "step": 1810 + }, + { + "epoch": 0.033103624764655344, + "grad_norm": 7.32385429488872, + "learning_rate": 9.999750482374057e-06, + "loss": 17.6628, + "step": 1811 + }, + { + "epoch": 0.03312190396110187, + "grad_norm": 7.584415550741578, + "learning_rate": 9.999747516390872e-06, + "loss": 17.8922, + "step": 1812 + }, + { + "epoch": 0.033140183157548395, + "grad_norm": 6.8395529500348164, + "learning_rate": 9.999744532884006e-06, + "loss": 17.7227, + "step": 1813 + }, + { + "epoch": 0.033158462353994916, + "grad_norm": 7.67886942130527, + "learning_rate": 9.999741531853469e-06, + "loss": 17.9991, + "step": 1814 + }, + { + "epoch": 0.033176741550441445, + "grad_norm": 7.686021597942992, + "learning_rate": 9.999738513299273e-06, + "loss": 18.0536, + "step": 1815 + }, + { + "epoch": 0.03319502074688797, + "grad_norm": 7.563094170574433, + "learning_rate": 9.999735477221426e-06, + "loss": 17.8883, + "step": 1816 + }, + { + "epoch": 0.03321329994333449, + "grad_norm": 6.360992366183429, + "learning_rate": 9.999732423619941e-06, + "loss": 17.331, + "step": 1817 + }, + { + "epoch": 0.03323157913978102, + "grad_norm": 6.9729700691565535, + "learning_rate": 9.999729352494827e-06, + "loss": 17.9784, + "step": 1818 + }, + { + "epoch": 0.03324985833622754, + "grad_norm": 8.680611695987398, + "learning_rate": 9.999726263846096e-06, + "loss": 18.2799, + "step": 1819 + }, + { + "epoch": 0.03326813753267406, + "grad_norm": 6.483530997935041, + "learning_rate": 9.999723157673758e-06, + "loss": 17.3454, + "step": 1820 + }, + { + "epoch": 0.03328641672912059, + "grad_norm": 7.496250705900263, + "learning_rate": 9.999720033977824e-06, + "loss": 17.8439, + "step": 1821 + }, + { + "epoch": 0.03330469592556711, + "grad_norm": 7.462704294828384, + "learning_rate": 9.999716892758305e-06, + "loss": 17.7355, + "step": 1822 + }, + { + "epoch": 0.03332297512201363, + "grad_norm": 9.644458381912422, + "learning_rate": 9.999713734015212e-06, + "loss": 18.4893, + "step": 1823 + }, + { + "epoch": 0.03334125431846016, + "grad_norm": 7.666527601067586, + "learning_rate": 9.999710557748557e-06, + "loss": 17.9815, + "step": 1824 + }, + { + "epoch": 0.033359533514906684, + "grad_norm": 7.864692354523234, + "learning_rate": 9.99970736395835e-06, + "loss": 17.9709, + "step": 1825 + }, + { + "epoch": 0.03337781271135321, + "grad_norm": 8.264337407873395, + "learning_rate": 9.999704152644603e-06, + "loss": 17.7038, + "step": 1826 + }, + { + "epoch": 0.033396091907799734, + "grad_norm": 7.969120135020211, + "learning_rate": 9.999700923807326e-06, + "loss": 18.1438, + "step": 1827 + }, + { + "epoch": 0.033414371104246256, + "grad_norm": 8.931013694821827, + "learning_rate": 9.999697677446531e-06, + "loss": 18.0319, + "step": 1828 + }, + { + "epoch": 0.033432650300692784, + "grad_norm": 6.194512930888271, + "learning_rate": 9.999694413562231e-06, + "loss": 17.2692, + "step": 1829 + }, + { + "epoch": 0.033450929497139306, + "grad_norm": 7.971082112958304, + "learning_rate": 9.999691132154435e-06, + "loss": 17.8576, + "step": 1830 + }, + { + "epoch": 0.03346920869358583, + "grad_norm": 9.431769907366288, + "learning_rate": 9.999687833223155e-06, + "loss": 18.2726, + "step": 1831 + }, + { + "epoch": 0.03348748789003236, + "grad_norm": 8.729898612350182, + "learning_rate": 9.999684516768402e-06, + "loss": 18.335, + "step": 1832 + }, + { + "epoch": 0.03350576708647888, + "grad_norm": 7.8105456031618505, + "learning_rate": 9.999681182790191e-06, + "loss": 18.1409, + "step": 1833 + }, + { + "epoch": 0.0335240462829254, + "grad_norm": 7.747628228044708, + "learning_rate": 9.99967783128853e-06, + "loss": 17.727, + "step": 1834 + }, + { + "epoch": 0.03354232547937193, + "grad_norm": 6.45043338847289, + "learning_rate": 9.999674462263434e-06, + "loss": 17.2659, + "step": 1835 + }, + { + "epoch": 0.03356060467581845, + "grad_norm": 8.008648065130282, + "learning_rate": 9.999671075714909e-06, + "loss": 17.9419, + "step": 1836 + }, + { + "epoch": 0.03357888387226497, + "grad_norm": 8.439711355653493, + "learning_rate": 9.999667671642975e-06, + "loss": 17.9714, + "step": 1837 + }, + { + "epoch": 0.0335971630687115, + "grad_norm": 8.097927086887031, + "learning_rate": 9.999664250047636e-06, + "loss": 18.2587, + "step": 1838 + }, + { + "epoch": 0.03361544226515802, + "grad_norm": 6.714611037306719, + "learning_rate": 9.99966081092891e-06, + "loss": 17.5767, + "step": 1839 + }, + { + "epoch": 0.033633721461604545, + "grad_norm": 6.919967731204464, + "learning_rate": 9.999657354286806e-06, + "loss": 17.6317, + "step": 1840 + }, + { + "epoch": 0.03365200065805107, + "grad_norm": 8.150508868141921, + "learning_rate": 9.999653880121336e-06, + "loss": 18.2641, + "step": 1841 + }, + { + "epoch": 0.033670279854497595, + "grad_norm": 7.5496710279480155, + "learning_rate": 9.999650388432513e-06, + "loss": 18.2013, + "step": 1842 + }, + { + "epoch": 0.033688559050944124, + "grad_norm": 7.531807395143053, + "learning_rate": 9.99964687922035e-06, + "loss": 17.5703, + "step": 1843 + }, + { + "epoch": 0.033706838247390646, + "grad_norm": 8.08451813867511, + "learning_rate": 9.99964335248486e-06, + "loss": 18.0743, + "step": 1844 + }, + { + "epoch": 0.03372511744383717, + "grad_norm": 7.861686401413494, + "learning_rate": 9.999639808226051e-06, + "loss": 18.0132, + "step": 1845 + }, + { + "epoch": 0.033743396640283696, + "grad_norm": 9.898916711748324, + "learning_rate": 9.999636246443941e-06, + "loss": 18.6431, + "step": 1846 + }, + { + "epoch": 0.03376167583673022, + "grad_norm": 7.855318071091838, + "learning_rate": 9.999632667138539e-06, + "loss": 17.876, + "step": 1847 + }, + { + "epoch": 0.03377995503317674, + "grad_norm": 7.994621814215297, + "learning_rate": 9.999629070309858e-06, + "loss": 18.2559, + "step": 1848 + }, + { + "epoch": 0.03379823422962327, + "grad_norm": 8.328924256453455, + "learning_rate": 9.999625455957912e-06, + "loss": 18.039, + "step": 1849 + }, + { + "epoch": 0.03381651342606979, + "grad_norm": 7.423442934416348, + "learning_rate": 9.99962182408271e-06, + "loss": 17.8073, + "step": 1850 + }, + { + "epoch": 0.03383479262251631, + "grad_norm": 8.670113783939007, + "learning_rate": 9.99961817468427e-06, + "loss": 18.0425, + "step": 1851 + }, + { + "epoch": 0.03385307181896284, + "grad_norm": 8.341152385144946, + "learning_rate": 9.9996145077626e-06, + "loss": 18.3054, + "step": 1852 + }, + { + "epoch": 0.03387135101540936, + "grad_norm": 6.040590275864804, + "learning_rate": 9.999610823317716e-06, + "loss": 17.2615, + "step": 1853 + }, + { + "epoch": 0.033889630211855884, + "grad_norm": 6.908992376375361, + "learning_rate": 9.99960712134963e-06, + "loss": 17.6936, + "step": 1854 + }, + { + "epoch": 0.03390790940830241, + "grad_norm": 7.7583741758822695, + "learning_rate": 9.999603401858354e-06, + "loss": 17.8515, + "step": 1855 + }, + { + "epoch": 0.033926188604748934, + "grad_norm": 7.975857615183032, + "learning_rate": 9.999599664843903e-06, + "loss": 18.2947, + "step": 1856 + }, + { + "epoch": 0.033944467801195456, + "grad_norm": 9.107143634269054, + "learning_rate": 9.99959591030629e-06, + "loss": 18.2875, + "step": 1857 + }, + { + "epoch": 0.033962746997641985, + "grad_norm": 8.885538749469587, + "learning_rate": 9.999592138245524e-06, + "loss": 18.4399, + "step": 1858 + }, + { + "epoch": 0.03398102619408851, + "grad_norm": 7.619089325646182, + "learning_rate": 9.999588348661625e-06, + "loss": 18.0025, + "step": 1859 + }, + { + "epoch": 0.033999305390535035, + "grad_norm": 7.923948501979195, + "learning_rate": 9.9995845415546e-06, + "loss": 17.952, + "step": 1860 + }, + { + "epoch": 0.03401758458698156, + "grad_norm": 7.556172395801596, + "learning_rate": 9.999580716924467e-06, + "loss": 17.8152, + "step": 1861 + }, + { + "epoch": 0.03403586378342808, + "grad_norm": 7.908539024199279, + "learning_rate": 9.999576874771236e-06, + "loss": 18.2262, + "step": 1862 + }, + { + "epoch": 0.03405414297987461, + "grad_norm": 7.590176741000986, + "learning_rate": 9.999573015094921e-06, + "loss": 17.5107, + "step": 1863 + }, + { + "epoch": 0.03407242217632113, + "grad_norm": 10.501502046595387, + "learning_rate": 9.99956913789554e-06, + "loss": 19.0452, + "step": 1864 + }, + { + "epoch": 0.03409070137276765, + "grad_norm": 8.724630148307154, + "learning_rate": 9.999565243173099e-06, + "loss": 18.1187, + "step": 1865 + }, + { + "epoch": 0.03410898056921418, + "grad_norm": 6.192883744104812, + "learning_rate": 9.999561330927619e-06, + "loss": 17.2361, + "step": 1866 + }, + { + "epoch": 0.0341272597656607, + "grad_norm": 7.32055343370581, + "learning_rate": 9.999557401159107e-06, + "loss": 18.113, + "step": 1867 + }, + { + "epoch": 0.03414553896210722, + "grad_norm": 7.015490518486996, + "learning_rate": 9.999553453867583e-06, + "loss": 17.7648, + "step": 1868 + }, + { + "epoch": 0.03416381815855375, + "grad_norm": 7.479288524521664, + "learning_rate": 9.999549489053056e-06, + "loss": 17.6716, + "step": 1869 + }, + { + "epoch": 0.034182097355000274, + "grad_norm": 9.573109660956048, + "learning_rate": 9.999545506715544e-06, + "loss": 18.5602, + "step": 1870 + }, + { + "epoch": 0.034200376551446796, + "grad_norm": 7.398144103675194, + "learning_rate": 9.999541506855058e-06, + "loss": 17.7404, + "step": 1871 + }, + { + "epoch": 0.034218655747893324, + "grad_norm": 7.95634507515605, + "learning_rate": 9.999537489471612e-06, + "loss": 18.0822, + "step": 1872 + }, + { + "epoch": 0.034236934944339846, + "grad_norm": 7.556869318283255, + "learning_rate": 9.999533454565222e-06, + "loss": 17.8391, + "step": 1873 + }, + { + "epoch": 0.03425521414078637, + "grad_norm": 7.993039144663855, + "learning_rate": 9.999529402135899e-06, + "loss": 18.0815, + "step": 1874 + }, + { + "epoch": 0.034273493337232896, + "grad_norm": 8.544069018131108, + "learning_rate": 9.999525332183662e-06, + "loss": 18.4803, + "step": 1875 + }, + { + "epoch": 0.03429177253367942, + "grad_norm": 9.479112947375867, + "learning_rate": 9.99952124470852e-06, + "loss": 18.4248, + "step": 1876 + }, + { + "epoch": 0.03431005173012595, + "grad_norm": 7.197687460803949, + "learning_rate": 9.999517139710493e-06, + "loss": 17.9663, + "step": 1877 + }, + { + "epoch": 0.03432833092657247, + "grad_norm": 8.248143550009049, + "learning_rate": 9.99951301718959e-06, + "loss": 18.4598, + "step": 1878 + }, + { + "epoch": 0.03434661012301899, + "grad_norm": 8.299650311859507, + "learning_rate": 9.999508877145827e-06, + "loss": 18.1611, + "step": 1879 + }, + { + "epoch": 0.03436488931946552, + "grad_norm": 7.568158080890924, + "learning_rate": 9.999504719579221e-06, + "loss": 17.8868, + "step": 1880 + }, + { + "epoch": 0.03438316851591204, + "grad_norm": 7.415992966366197, + "learning_rate": 9.999500544489785e-06, + "loss": 17.7056, + "step": 1881 + }, + { + "epoch": 0.03440144771235856, + "grad_norm": 8.916900753766164, + "learning_rate": 9.999496351877533e-06, + "loss": 18.3458, + "step": 1882 + }, + { + "epoch": 0.03441972690880509, + "grad_norm": 8.935175191478718, + "learning_rate": 9.99949214174248e-06, + "loss": 18.6321, + "step": 1883 + }, + { + "epoch": 0.03443800610525161, + "grad_norm": 7.722295304191317, + "learning_rate": 9.99948791408464e-06, + "loss": 17.9493, + "step": 1884 + }, + { + "epoch": 0.034456285301698135, + "grad_norm": 8.007541693731353, + "learning_rate": 9.999483668904029e-06, + "loss": 18.2557, + "step": 1885 + }, + { + "epoch": 0.034474564498144664, + "grad_norm": 6.611765754812756, + "learning_rate": 9.999479406200663e-06, + "loss": 17.3192, + "step": 1886 + }, + { + "epoch": 0.034492843694591185, + "grad_norm": 6.3385108608479985, + "learning_rate": 9.999475125974553e-06, + "loss": 17.2715, + "step": 1887 + }, + { + "epoch": 0.03451112289103771, + "grad_norm": 7.520879210002364, + "learning_rate": 9.999470828225718e-06, + "loss": 17.829, + "step": 1888 + }, + { + "epoch": 0.034529402087484236, + "grad_norm": 7.494377127553713, + "learning_rate": 9.999466512954173e-06, + "loss": 17.7672, + "step": 1889 + }, + { + "epoch": 0.03454768128393076, + "grad_norm": 7.763998443291807, + "learning_rate": 9.99946218015993e-06, + "loss": 17.8187, + "step": 1890 + }, + { + "epoch": 0.03456596048037728, + "grad_norm": 7.289771091936282, + "learning_rate": 9.999457829843005e-06, + "loss": 17.796, + "step": 1891 + }, + { + "epoch": 0.03458423967682381, + "grad_norm": 6.623575404079241, + "learning_rate": 9.999453462003417e-06, + "loss": 17.4244, + "step": 1892 + }, + { + "epoch": 0.03460251887327033, + "grad_norm": 10.017125767421753, + "learning_rate": 9.999449076641176e-06, + "loss": 18.8644, + "step": 1893 + }, + { + "epoch": 0.03462079806971686, + "grad_norm": 8.20371403655816, + "learning_rate": 9.9994446737563e-06, + "loss": 18.0538, + "step": 1894 + }, + { + "epoch": 0.03463907726616338, + "grad_norm": 9.802161995721683, + "learning_rate": 9.999440253348805e-06, + "loss": 18.7308, + "step": 1895 + }, + { + "epoch": 0.0346573564626099, + "grad_norm": 8.322224215761992, + "learning_rate": 9.999435815418705e-06, + "loss": 18.3015, + "step": 1896 + }, + { + "epoch": 0.03467563565905643, + "grad_norm": 8.148581700632096, + "learning_rate": 9.999431359966017e-06, + "loss": 17.8139, + "step": 1897 + }, + { + "epoch": 0.03469391485550295, + "grad_norm": 8.92816337345986, + "learning_rate": 9.999426886990758e-06, + "loss": 18.0394, + "step": 1898 + }, + { + "epoch": 0.034712194051949474, + "grad_norm": 8.825948598919227, + "learning_rate": 9.999422396492937e-06, + "loss": 18.4764, + "step": 1899 + }, + { + "epoch": 0.034730473248396, + "grad_norm": 8.627968631064658, + "learning_rate": 9.999417888472578e-06, + "loss": 17.8798, + "step": 1900 + }, + { + "epoch": 0.034748752444842525, + "grad_norm": 7.6009155117931675, + "learning_rate": 9.999413362929691e-06, + "loss": 18.207, + "step": 1901 + }, + { + "epoch": 0.034767031641289046, + "grad_norm": 7.870549136132222, + "learning_rate": 9.999408819864296e-06, + "loss": 17.9298, + "step": 1902 + }, + { + "epoch": 0.034785310837735575, + "grad_norm": 6.961455876093974, + "learning_rate": 9.999404259276404e-06, + "loss": 17.6394, + "step": 1903 + }, + { + "epoch": 0.0348035900341821, + "grad_norm": 7.456431814896433, + "learning_rate": 9.999399681166036e-06, + "loss": 18.1099, + "step": 1904 + }, + { + "epoch": 0.03482186923062862, + "grad_norm": 7.428766418167107, + "learning_rate": 9.999395085533205e-06, + "loss": 17.8307, + "step": 1905 + }, + { + "epoch": 0.03484014842707515, + "grad_norm": 8.163133227231434, + "learning_rate": 9.99939047237793e-06, + "loss": 18.1721, + "step": 1906 + }, + { + "epoch": 0.03485842762352167, + "grad_norm": 6.325296098614813, + "learning_rate": 9.999385841700224e-06, + "loss": 17.3985, + "step": 1907 + }, + { + "epoch": 0.03487670681996819, + "grad_norm": 8.425185552349872, + "learning_rate": 9.999381193500104e-06, + "loss": 18.0274, + "step": 1908 + }, + { + "epoch": 0.03489498601641472, + "grad_norm": 8.931619125908387, + "learning_rate": 9.999376527777587e-06, + "loss": 18.5191, + "step": 1909 + }, + { + "epoch": 0.03491326521286124, + "grad_norm": 7.408835504737836, + "learning_rate": 9.999371844532689e-06, + "loss": 17.9273, + "step": 1910 + }, + { + "epoch": 0.03493154440930777, + "grad_norm": 7.623356726961149, + "learning_rate": 9.999367143765428e-06, + "loss": 18.272, + "step": 1911 + }, + { + "epoch": 0.03494982360575429, + "grad_norm": 7.95269138874238, + "learning_rate": 9.999362425475817e-06, + "loss": 17.9312, + "step": 1912 + }, + { + "epoch": 0.034968102802200814, + "grad_norm": 7.462108851980732, + "learning_rate": 9.999357689663875e-06, + "loss": 17.7149, + "step": 1913 + }, + { + "epoch": 0.03498638199864734, + "grad_norm": 7.283136697952984, + "learning_rate": 9.999352936329619e-06, + "loss": 17.6428, + "step": 1914 + }, + { + "epoch": 0.035004661195093864, + "grad_norm": 8.192691586643772, + "learning_rate": 9.999348165473064e-06, + "loss": 18.2675, + "step": 1915 + }, + { + "epoch": 0.035022940391540386, + "grad_norm": 8.498990045712594, + "learning_rate": 9.999343377094227e-06, + "loss": 18.2104, + "step": 1916 + }, + { + "epoch": 0.035041219587986915, + "grad_norm": 8.765368366785685, + "learning_rate": 9.999338571193126e-06, + "loss": 17.692, + "step": 1917 + }, + { + "epoch": 0.035059498784433436, + "grad_norm": 8.098246273684264, + "learning_rate": 9.999333747769777e-06, + "loss": 18.1488, + "step": 1918 + }, + { + "epoch": 0.03507777798087996, + "grad_norm": 6.413145284090817, + "learning_rate": 9.999328906824198e-06, + "loss": 17.4056, + "step": 1919 + }, + { + "epoch": 0.03509605717732649, + "grad_norm": 7.89135265827549, + "learning_rate": 9.999324048356403e-06, + "loss": 17.845, + "step": 1920 + }, + { + "epoch": 0.03511433637377301, + "grad_norm": 7.709013863430543, + "learning_rate": 9.999319172366412e-06, + "loss": 17.934, + "step": 1921 + }, + { + "epoch": 0.03513261557021953, + "grad_norm": 8.564207535733933, + "learning_rate": 9.999314278854242e-06, + "loss": 18.1819, + "step": 1922 + }, + { + "epoch": 0.03515089476666606, + "grad_norm": 7.149263373420875, + "learning_rate": 9.999309367819907e-06, + "loss": 17.7353, + "step": 1923 + }, + { + "epoch": 0.03516917396311258, + "grad_norm": 8.18832302643092, + "learning_rate": 9.999304439263428e-06, + "loss": 18.0075, + "step": 1924 + }, + { + "epoch": 0.0351874531595591, + "grad_norm": 8.182007369919635, + "learning_rate": 9.999299493184822e-06, + "loss": 18.1542, + "step": 1925 + }, + { + "epoch": 0.03520573235600563, + "grad_norm": 9.944147453652565, + "learning_rate": 9.999294529584102e-06, + "loss": 18.7217, + "step": 1926 + }, + { + "epoch": 0.03522401155245215, + "grad_norm": 7.885567662621238, + "learning_rate": 9.999289548461292e-06, + "loss": 18.1261, + "step": 1927 + }, + { + "epoch": 0.03524229074889868, + "grad_norm": 7.620667758005586, + "learning_rate": 9.999284549816403e-06, + "loss": 17.7779, + "step": 1928 + }, + { + "epoch": 0.0352605699453452, + "grad_norm": 9.852841139533853, + "learning_rate": 9.999279533649458e-06, + "loss": 18.6092, + "step": 1929 + }, + { + "epoch": 0.035278849141791725, + "grad_norm": 7.928579728197884, + "learning_rate": 9.99927449996047e-06, + "loss": 17.9599, + "step": 1930 + }, + { + "epoch": 0.035297128338238254, + "grad_norm": 8.184321980595112, + "learning_rate": 9.999269448749461e-06, + "loss": 18.1889, + "step": 1931 + }, + { + "epoch": 0.035315407534684776, + "grad_norm": 8.402935327668816, + "learning_rate": 9.999264380016444e-06, + "loss": 18.3697, + "step": 1932 + }, + { + "epoch": 0.0353336867311313, + "grad_norm": 9.086470670696299, + "learning_rate": 9.99925929376144e-06, + "loss": 18.5396, + "step": 1933 + }, + { + "epoch": 0.035351965927577826, + "grad_norm": 8.490461255836463, + "learning_rate": 9.999254189984466e-06, + "loss": 18.2341, + "step": 1934 + }, + { + "epoch": 0.03537024512402435, + "grad_norm": 7.094884201354597, + "learning_rate": 9.999249068685539e-06, + "loss": 17.5918, + "step": 1935 + }, + { + "epoch": 0.03538852432047087, + "grad_norm": 8.499136896972471, + "learning_rate": 9.999243929864679e-06, + "loss": 18.1365, + "step": 1936 + }, + { + "epoch": 0.0354068035169174, + "grad_norm": 7.535927532248468, + "learning_rate": 9.999238773521902e-06, + "loss": 17.9605, + "step": 1937 + }, + { + "epoch": 0.03542508271336392, + "grad_norm": 8.389999966836863, + "learning_rate": 9.999233599657228e-06, + "loss": 17.7126, + "step": 1938 + }, + { + "epoch": 0.03544336190981044, + "grad_norm": 8.287982338740152, + "learning_rate": 9.999228408270674e-06, + "loss": 18.2419, + "step": 1939 + }, + { + "epoch": 0.03546164110625697, + "grad_norm": 7.9503520158933165, + "learning_rate": 9.999223199362257e-06, + "loss": 18.0246, + "step": 1940 + }, + { + "epoch": 0.03547992030270349, + "grad_norm": 9.21418739958329, + "learning_rate": 9.999217972931998e-06, + "loss": 18.7629, + "step": 1941 + }, + { + "epoch": 0.035498199499150014, + "grad_norm": 7.782900053670787, + "learning_rate": 9.999212728979912e-06, + "loss": 17.839, + "step": 1942 + }, + { + "epoch": 0.03551647869559654, + "grad_norm": 8.143164639304983, + "learning_rate": 9.999207467506022e-06, + "loss": 18.0542, + "step": 1943 + }, + { + "epoch": 0.035534757892043065, + "grad_norm": 7.389277482561079, + "learning_rate": 9.999202188510341e-06, + "loss": 17.6361, + "step": 1944 + }, + { + "epoch": 0.03555303708848959, + "grad_norm": 10.202846117878034, + "learning_rate": 9.999196891992892e-06, + "loss": 18.7116, + "step": 1945 + }, + { + "epoch": 0.035571316284936115, + "grad_norm": 7.494605340684641, + "learning_rate": 9.999191577953692e-06, + "loss": 18.011, + "step": 1946 + }, + { + "epoch": 0.03558959548138264, + "grad_norm": 8.38155763029932, + "learning_rate": 9.999186246392756e-06, + "loss": 18.3662, + "step": 1947 + }, + { + "epoch": 0.035607874677829165, + "grad_norm": 7.850468999216842, + "learning_rate": 9.999180897310108e-06, + "loss": 17.9277, + "step": 1948 + }, + { + "epoch": 0.03562615387427569, + "grad_norm": 7.2770297355717615, + "learning_rate": 9.999175530705765e-06, + "loss": 17.8763, + "step": 1949 + }, + { + "epoch": 0.03564443307072221, + "grad_norm": 7.497010366244362, + "learning_rate": 9.999170146579746e-06, + "loss": 17.9092, + "step": 1950 + }, + { + "epoch": 0.03566271226716874, + "grad_norm": 7.147592489045447, + "learning_rate": 9.999164744932069e-06, + "loss": 17.5548, + "step": 1951 + }, + { + "epoch": 0.03568099146361526, + "grad_norm": 7.1198846531257995, + "learning_rate": 9.999159325762753e-06, + "loss": 17.7998, + "step": 1952 + }, + { + "epoch": 0.03569927066006178, + "grad_norm": 8.128197845288225, + "learning_rate": 9.999153889071818e-06, + "loss": 17.9989, + "step": 1953 + }, + { + "epoch": 0.03571754985650831, + "grad_norm": 7.423488993866567, + "learning_rate": 9.999148434859282e-06, + "loss": 17.7657, + "step": 1954 + }, + { + "epoch": 0.03573582905295483, + "grad_norm": 9.400429043325508, + "learning_rate": 9.999142963125164e-06, + "loss": 18.5479, + "step": 1955 + }, + { + "epoch": 0.03575410824940135, + "grad_norm": 10.386644570380422, + "learning_rate": 9.999137473869484e-06, + "loss": 18.5998, + "step": 1956 + }, + { + "epoch": 0.03577238744584788, + "grad_norm": 7.552400237830502, + "learning_rate": 9.999131967092262e-06, + "loss": 17.7759, + "step": 1957 + }, + { + "epoch": 0.035790666642294404, + "grad_norm": 7.230520694917503, + "learning_rate": 9.999126442793515e-06, + "loss": 17.8236, + "step": 1958 + }, + { + "epoch": 0.035808945838740926, + "grad_norm": 9.240852364016215, + "learning_rate": 9.999120900973264e-06, + "loss": 18.3381, + "step": 1959 + }, + { + "epoch": 0.035827225035187454, + "grad_norm": 8.479502318693042, + "learning_rate": 9.999115341631528e-06, + "loss": 18.5659, + "step": 1960 + }, + { + "epoch": 0.035845504231633976, + "grad_norm": 8.255809598397338, + "learning_rate": 9.999109764768328e-06, + "loss": 17.9439, + "step": 1961 + }, + { + "epoch": 0.035863783428080505, + "grad_norm": 7.10311904635282, + "learning_rate": 9.99910417038368e-06, + "loss": 17.5031, + "step": 1962 + }, + { + "epoch": 0.03588206262452703, + "grad_norm": 8.354939519427754, + "learning_rate": 9.999098558477606e-06, + "loss": 18.2835, + "step": 1963 + }, + { + "epoch": 0.03590034182097355, + "grad_norm": 8.169834269388787, + "learning_rate": 9.999092929050126e-06, + "loss": 18.0539, + "step": 1964 + }, + { + "epoch": 0.03591862101742008, + "grad_norm": 7.209724239423868, + "learning_rate": 9.99908728210126e-06, + "loss": 17.5991, + "step": 1965 + }, + { + "epoch": 0.0359369002138666, + "grad_norm": 7.6799187576786245, + "learning_rate": 9.999081617631026e-06, + "loss": 18.0274, + "step": 1966 + }, + { + "epoch": 0.03595517941031312, + "grad_norm": 8.902126792562187, + "learning_rate": 9.999075935639445e-06, + "loss": 18.6911, + "step": 1967 + }, + { + "epoch": 0.03597345860675965, + "grad_norm": 7.673549516548946, + "learning_rate": 9.999070236126536e-06, + "loss": 18.1461, + "step": 1968 + }, + { + "epoch": 0.03599173780320617, + "grad_norm": 7.166515808137726, + "learning_rate": 9.99906451909232e-06, + "loss": 17.4114, + "step": 1969 + }, + { + "epoch": 0.03601001699965269, + "grad_norm": 7.428286380871508, + "learning_rate": 9.999058784536816e-06, + "loss": 17.7822, + "step": 1970 + }, + { + "epoch": 0.03602829619609922, + "grad_norm": 8.27330724470139, + "learning_rate": 9.999053032460044e-06, + "loss": 17.432, + "step": 1971 + }, + { + "epoch": 0.03604657539254574, + "grad_norm": 7.744580295743159, + "learning_rate": 9.999047262862027e-06, + "loss": 17.8798, + "step": 1972 + }, + { + "epoch": 0.036064854588992265, + "grad_norm": 7.065151922031846, + "learning_rate": 9.999041475742783e-06, + "loss": 18.016, + "step": 1973 + }, + { + "epoch": 0.036083133785438794, + "grad_norm": 7.15757239909588, + "learning_rate": 9.99903567110233e-06, + "loss": 17.8817, + "step": 1974 + }, + { + "epoch": 0.036101412981885316, + "grad_norm": 8.853898794161905, + "learning_rate": 9.999029848940694e-06, + "loss": 17.9513, + "step": 1975 + }, + { + "epoch": 0.03611969217833184, + "grad_norm": 6.892125342950771, + "learning_rate": 9.99902400925789e-06, + "loss": 17.4626, + "step": 1976 + }, + { + "epoch": 0.036137971374778366, + "grad_norm": 8.855795305081163, + "learning_rate": 9.999018152053942e-06, + "loss": 18.453, + "step": 1977 + }, + { + "epoch": 0.03615625057122489, + "grad_norm": 9.553477180571765, + "learning_rate": 9.999012277328868e-06, + "loss": 19.0001, + "step": 1978 + }, + { + "epoch": 0.036174529767671416, + "grad_norm": 7.146861108136913, + "learning_rate": 9.99900638508269e-06, + "loss": 17.5435, + "step": 1979 + }, + { + "epoch": 0.03619280896411794, + "grad_norm": 7.083904017713698, + "learning_rate": 9.999000475315429e-06, + "loss": 17.6269, + "step": 1980 + }, + { + "epoch": 0.03621108816056446, + "grad_norm": 8.181874569690923, + "learning_rate": 9.998994548027106e-06, + "loss": 18.339, + "step": 1981 + }, + { + "epoch": 0.03622936735701099, + "grad_norm": 8.15235850629926, + "learning_rate": 9.998988603217738e-06, + "loss": 18.3941, + "step": 1982 + }, + { + "epoch": 0.03624764655345751, + "grad_norm": 8.495091145565938, + "learning_rate": 9.998982640887352e-06, + "loss": 17.9946, + "step": 1983 + }, + { + "epoch": 0.03626592574990403, + "grad_norm": 6.4835607043705785, + "learning_rate": 9.998976661035964e-06, + "loss": 17.6544, + "step": 1984 + }, + { + "epoch": 0.03628420494635056, + "grad_norm": 8.841987597518289, + "learning_rate": 9.998970663663596e-06, + "loss": 18.0508, + "step": 1985 + }, + { + "epoch": 0.03630248414279708, + "grad_norm": 10.217090662048435, + "learning_rate": 9.998964648770271e-06, + "loss": 18.1618, + "step": 1986 + }, + { + "epoch": 0.036320763339243604, + "grad_norm": 8.019689206397057, + "learning_rate": 9.998958616356006e-06, + "loss": 17.9514, + "step": 1987 + }, + { + "epoch": 0.03633904253569013, + "grad_norm": 6.991445236201103, + "learning_rate": 9.998952566420828e-06, + "loss": 17.8643, + "step": 1988 + }, + { + "epoch": 0.036357321732136655, + "grad_norm": 7.789091397301192, + "learning_rate": 9.998946498964755e-06, + "loss": 18.2038, + "step": 1989 + }, + { + "epoch": 0.03637560092858318, + "grad_norm": 7.141653228475028, + "learning_rate": 9.998940413987805e-06, + "loss": 17.8061, + "step": 1990 + }, + { + "epoch": 0.036393880125029705, + "grad_norm": 8.066331113358645, + "learning_rate": 9.998934311490005e-06, + "loss": 18.1814, + "step": 1991 + }, + { + "epoch": 0.03641215932147623, + "grad_norm": 7.335124048620984, + "learning_rate": 9.998928191471376e-06, + "loss": 17.875, + "step": 1992 + }, + { + "epoch": 0.03643043851792275, + "grad_norm": 9.112505337222876, + "learning_rate": 9.998922053931935e-06, + "loss": 18.6293, + "step": 1993 + }, + { + "epoch": 0.03644871771436928, + "grad_norm": 7.54777861867234, + "learning_rate": 9.998915898871705e-06, + "loss": 17.8176, + "step": 1994 + }, + { + "epoch": 0.0364669969108158, + "grad_norm": 8.540294308523698, + "learning_rate": 9.998909726290711e-06, + "loss": 18.4626, + "step": 1995 + }, + { + "epoch": 0.03648527610726233, + "grad_norm": 8.571296800654416, + "learning_rate": 9.99890353618897e-06, + "loss": 18.1324, + "step": 1996 + }, + { + "epoch": 0.03650355530370885, + "grad_norm": 8.426795245663996, + "learning_rate": 9.998897328566506e-06, + "loss": 17.8787, + "step": 1997 + }, + { + "epoch": 0.03652183450015537, + "grad_norm": 7.607679739554118, + "learning_rate": 9.998891103423343e-06, + "loss": 17.7262, + "step": 1998 + }, + { + "epoch": 0.0365401136966019, + "grad_norm": 7.353285686355623, + "learning_rate": 9.998884860759499e-06, + "loss": 17.7066, + "step": 1999 + }, + { + "epoch": 0.03655839289304842, + "grad_norm": 8.198968849277952, + "learning_rate": 9.998878600574998e-06, + "loss": 18.0655, + "step": 2000 + }, + { + "epoch": 0.036576672089494944, + "grad_norm": 7.90908222211619, + "learning_rate": 9.998872322869859e-06, + "loss": 18.1217, + "step": 2001 + }, + { + "epoch": 0.03659495128594147, + "grad_norm": 7.434964235402946, + "learning_rate": 9.99886602764411e-06, + "loss": 17.6969, + "step": 2002 + }, + { + "epoch": 0.036613230482387994, + "grad_norm": 8.434196775394652, + "learning_rate": 9.998859714897765e-06, + "loss": 18.1832, + "step": 2003 + }, + { + "epoch": 0.036631509678834516, + "grad_norm": 7.846211573266152, + "learning_rate": 9.998853384630853e-06, + "loss": 17.8832, + "step": 2004 + }, + { + "epoch": 0.036649788875281045, + "grad_norm": 8.75645058921149, + "learning_rate": 9.998847036843394e-06, + "loss": 18.4057, + "step": 2005 + }, + { + "epoch": 0.036668068071727566, + "grad_norm": 7.4793447817184955, + "learning_rate": 9.998840671535411e-06, + "loss": 17.7704, + "step": 2006 + }, + { + "epoch": 0.03668634726817409, + "grad_norm": 8.060297496152803, + "learning_rate": 9.998834288706922e-06, + "loss": 17.9751, + "step": 2007 + }, + { + "epoch": 0.03670462646462062, + "grad_norm": 6.269820340808365, + "learning_rate": 9.998827888357956e-06, + "loss": 17.3203, + "step": 2008 + }, + { + "epoch": 0.03672290566106714, + "grad_norm": 7.616782429628068, + "learning_rate": 9.998821470488529e-06, + "loss": 17.8384, + "step": 2009 + }, + { + "epoch": 0.03674118485751366, + "grad_norm": 8.33085901188751, + "learning_rate": 9.998815035098668e-06, + "loss": 18.0717, + "step": 2010 + }, + { + "epoch": 0.03675946405396019, + "grad_norm": 7.723734714418637, + "learning_rate": 9.998808582188393e-06, + "loss": 17.8507, + "step": 2011 + }, + { + "epoch": 0.03677774325040671, + "grad_norm": 9.018866210300486, + "learning_rate": 9.998802111757729e-06, + "loss": 18.6216, + "step": 2012 + }, + { + "epoch": 0.03679602244685324, + "grad_norm": 6.885161674366217, + "learning_rate": 9.998795623806697e-06, + "loss": 17.5822, + "step": 2013 + }, + { + "epoch": 0.03681430164329976, + "grad_norm": 8.091115820985259, + "learning_rate": 9.99878911833532e-06, + "loss": 17.9485, + "step": 2014 + }, + { + "epoch": 0.03683258083974628, + "grad_norm": 6.987997512528637, + "learning_rate": 9.998782595343621e-06, + "loss": 17.4912, + "step": 2015 + }, + { + "epoch": 0.03685086003619281, + "grad_norm": 8.222991809959506, + "learning_rate": 9.998776054831623e-06, + "loss": 18.1029, + "step": 2016 + }, + { + "epoch": 0.036869139232639334, + "grad_norm": 9.05027801037263, + "learning_rate": 9.998769496799347e-06, + "loss": 18.2343, + "step": 2017 + }, + { + "epoch": 0.036887418429085855, + "grad_norm": 9.128431730728373, + "learning_rate": 9.99876292124682e-06, + "loss": 18.5984, + "step": 2018 + }, + { + "epoch": 0.036905697625532384, + "grad_norm": 7.954206344639059, + "learning_rate": 9.998756328174062e-06, + "loss": 18.0505, + "step": 2019 + }, + { + "epoch": 0.036923976821978906, + "grad_norm": 8.188015996937526, + "learning_rate": 9.998749717581097e-06, + "loss": 17.894, + "step": 2020 + }, + { + "epoch": 0.03694225601842543, + "grad_norm": 7.75412895491765, + "learning_rate": 9.998743089467949e-06, + "loss": 18.148, + "step": 2021 + }, + { + "epoch": 0.036960535214871956, + "grad_norm": 8.109647665884854, + "learning_rate": 9.998736443834637e-06, + "loss": 18.3236, + "step": 2022 + }, + { + "epoch": 0.03697881441131848, + "grad_norm": 7.780509706408551, + "learning_rate": 9.99872978068119e-06, + "loss": 18.0863, + "step": 2023 + }, + { + "epoch": 0.036997093607765, + "grad_norm": 9.09458460854067, + "learning_rate": 9.998723100007628e-06, + "loss": 18.5133, + "step": 2024 + }, + { + "epoch": 0.03701537280421153, + "grad_norm": 9.863279863855022, + "learning_rate": 9.998716401813975e-06, + "loss": 19.0626, + "step": 2025 + }, + { + "epoch": 0.03703365200065805, + "grad_norm": 7.253133859486355, + "learning_rate": 9.998709686100256e-06, + "loss": 17.9412, + "step": 2026 + }, + { + "epoch": 0.03705193119710457, + "grad_norm": 7.932175176406323, + "learning_rate": 9.998702952866494e-06, + "loss": 17.6815, + "step": 2027 + }, + { + "epoch": 0.0370702103935511, + "grad_norm": 7.339503659937987, + "learning_rate": 9.99869620211271e-06, + "loss": 17.6606, + "step": 2028 + }, + { + "epoch": 0.03708848958999762, + "grad_norm": 7.907738127442454, + "learning_rate": 9.99868943383893e-06, + "loss": 18.2433, + "step": 2029 + }, + { + "epoch": 0.03710676878644415, + "grad_norm": 7.775746964937671, + "learning_rate": 9.998682648045178e-06, + "loss": 17.9101, + "step": 2030 + }, + { + "epoch": 0.03712504798289067, + "grad_norm": 9.04856581963131, + "learning_rate": 9.998675844731475e-06, + "loss": 18.1439, + "step": 2031 + }, + { + "epoch": 0.037143327179337195, + "grad_norm": 7.3359385924263485, + "learning_rate": 9.99866902389785e-06, + "loss": 18.0449, + "step": 2032 + }, + { + "epoch": 0.03716160637578372, + "grad_norm": 7.519600586027241, + "learning_rate": 9.998662185544323e-06, + "loss": 17.8224, + "step": 2033 + }, + { + "epoch": 0.037179885572230245, + "grad_norm": 7.987638704160971, + "learning_rate": 9.998655329670918e-06, + "loss": 18.2923, + "step": 2034 + }, + { + "epoch": 0.03719816476867677, + "grad_norm": 7.681158036608714, + "learning_rate": 9.998648456277659e-06, + "loss": 18.1971, + "step": 2035 + }, + { + "epoch": 0.037216443965123296, + "grad_norm": 8.12090519344087, + "learning_rate": 9.998641565364573e-06, + "loss": 18.4449, + "step": 2036 + }, + { + "epoch": 0.03723472316156982, + "grad_norm": 7.51928492652649, + "learning_rate": 9.99863465693168e-06, + "loss": 17.6491, + "step": 2037 + }, + { + "epoch": 0.03725300235801634, + "grad_norm": 7.913103109118726, + "learning_rate": 9.998627730979008e-06, + "loss": 17.7631, + "step": 2038 + }, + { + "epoch": 0.03727128155446287, + "grad_norm": 7.669560039298758, + "learning_rate": 9.99862078750658e-06, + "loss": 17.8085, + "step": 2039 + }, + { + "epoch": 0.03728956075090939, + "grad_norm": 8.251834017752726, + "learning_rate": 9.998613826514418e-06, + "loss": 18.243, + "step": 2040 + }, + { + "epoch": 0.03730783994735591, + "grad_norm": 8.873049721162209, + "learning_rate": 9.998606848002548e-06, + "loss": 18.4131, + "step": 2041 + }, + { + "epoch": 0.03732611914380244, + "grad_norm": 7.451580746322862, + "learning_rate": 9.998599851970997e-06, + "loss": 17.9024, + "step": 2042 + }, + { + "epoch": 0.03734439834024896, + "grad_norm": 7.697008875890094, + "learning_rate": 9.998592838419787e-06, + "loss": 18.1986, + "step": 2043 + }, + { + "epoch": 0.037362677536695484, + "grad_norm": 8.120055913869294, + "learning_rate": 9.998585807348942e-06, + "loss": 17.8143, + "step": 2044 + }, + { + "epoch": 0.03738095673314201, + "grad_norm": 8.278267453559662, + "learning_rate": 9.998578758758486e-06, + "loss": 18.3097, + "step": 2045 + }, + { + "epoch": 0.037399235929588534, + "grad_norm": 8.153431653087639, + "learning_rate": 9.998571692648447e-06, + "loss": 18.1901, + "step": 2046 + }, + { + "epoch": 0.03741751512603506, + "grad_norm": 8.006834892764367, + "learning_rate": 9.998564609018848e-06, + "loss": 18.4237, + "step": 2047 + }, + { + "epoch": 0.037435794322481585, + "grad_norm": 8.303060078555337, + "learning_rate": 9.998557507869714e-06, + "loss": 18.3405, + "step": 2048 + }, + { + "epoch": 0.037454073518928106, + "grad_norm": 7.2265990118581955, + "learning_rate": 9.99855038920107e-06, + "loss": 17.6187, + "step": 2049 + }, + { + "epoch": 0.037472352715374635, + "grad_norm": 8.041556758724703, + "learning_rate": 9.998543253012938e-06, + "loss": 17.9432, + "step": 2050 + }, + { + "epoch": 0.03749063191182116, + "grad_norm": 6.991937103045851, + "learning_rate": 9.998536099305348e-06, + "loss": 17.675, + "step": 2051 + }, + { + "epoch": 0.03750891110826768, + "grad_norm": 8.035188795438987, + "learning_rate": 9.998528928078321e-06, + "loss": 18.1265, + "step": 2052 + }, + { + "epoch": 0.03752719030471421, + "grad_norm": 7.620085867284849, + "learning_rate": 9.998521739331886e-06, + "loss": 17.9205, + "step": 2053 + }, + { + "epoch": 0.03754546950116073, + "grad_norm": 9.072221427512684, + "learning_rate": 9.998514533066066e-06, + "loss": 18.5335, + "step": 2054 + }, + { + "epoch": 0.03756374869760725, + "grad_norm": 8.568238815700667, + "learning_rate": 9.998507309280886e-06, + "loss": 18.0401, + "step": 2055 + }, + { + "epoch": 0.03758202789405378, + "grad_norm": 10.039383540553157, + "learning_rate": 9.99850006797637e-06, + "loss": 18.831, + "step": 2056 + }, + { + "epoch": 0.0376003070905003, + "grad_norm": 6.1131761158643245, + "learning_rate": 9.998492809152545e-06, + "loss": 17.1808, + "step": 2057 + }, + { + "epoch": 0.03761858628694682, + "grad_norm": 6.7440363014529074, + "learning_rate": 9.99848553280944e-06, + "loss": 17.7552, + "step": 2058 + }, + { + "epoch": 0.03763686548339335, + "grad_norm": 6.955143337867716, + "learning_rate": 9.998478238947074e-06, + "loss": 17.4316, + "step": 2059 + }, + { + "epoch": 0.03765514467983987, + "grad_norm": 7.28685085214896, + "learning_rate": 9.998470927565476e-06, + "loss": 17.5236, + "step": 2060 + }, + { + "epoch": 0.037673423876286395, + "grad_norm": 9.235502043145019, + "learning_rate": 9.998463598664669e-06, + "loss": 18.3993, + "step": 2061 + }, + { + "epoch": 0.037691703072732924, + "grad_norm": 8.432838318719247, + "learning_rate": 9.998456252244684e-06, + "loss": 18.4229, + "step": 2062 + }, + { + "epoch": 0.037709982269179446, + "grad_norm": 7.339767176483528, + "learning_rate": 9.998448888305543e-06, + "loss": 17.6698, + "step": 2063 + }, + { + "epoch": 0.037728261465625974, + "grad_norm": 6.833365341315317, + "learning_rate": 9.998441506847271e-06, + "loss": 17.5013, + "step": 2064 + }, + { + "epoch": 0.037746540662072496, + "grad_norm": 6.318341016075187, + "learning_rate": 9.998434107869897e-06, + "loss": 17.2457, + "step": 2065 + }, + { + "epoch": 0.03776481985851902, + "grad_norm": 7.97844973845759, + "learning_rate": 9.998426691373443e-06, + "loss": 18.2731, + "step": 2066 + }, + { + "epoch": 0.03778309905496555, + "grad_norm": 9.022125937561709, + "learning_rate": 9.99841925735794e-06, + "loss": 18.7425, + "step": 2067 + }, + { + "epoch": 0.03780137825141207, + "grad_norm": 8.080430879740227, + "learning_rate": 9.99841180582341e-06, + "loss": 18.5238, + "step": 2068 + }, + { + "epoch": 0.03781965744785859, + "grad_norm": 7.334230547063637, + "learning_rate": 9.99840433676988e-06, + "loss": 17.6445, + "step": 2069 + }, + { + "epoch": 0.03783793664430512, + "grad_norm": 7.8255129735717155, + "learning_rate": 9.998396850197376e-06, + "loss": 18.0013, + "step": 2070 + }, + { + "epoch": 0.03785621584075164, + "grad_norm": 7.146299981961723, + "learning_rate": 9.998389346105925e-06, + "loss": 17.7126, + "step": 2071 + }, + { + "epoch": 0.03787449503719816, + "grad_norm": 7.973704356003748, + "learning_rate": 9.998381824495556e-06, + "loss": 17.6137, + "step": 2072 + }, + { + "epoch": 0.03789277423364469, + "grad_norm": 8.325580556641624, + "learning_rate": 9.998374285366289e-06, + "loss": 18.1783, + "step": 2073 + }, + { + "epoch": 0.03791105343009121, + "grad_norm": 8.501162206117245, + "learning_rate": 9.998366728718155e-06, + "loss": 18.1392, + "step": 2074 + }, + { + "epoch": 0.037929332626537735, + "grad_norm": 6.5954947334230365, + "learning_rate": 9.998359154551178e-06, + "loss": 17.4255, + "step": 2075 + }, + { + "epoch": 0.03794761182298426, + "grad_norm": 9.095095045655555, + "learning_rate": 9.998351562865387e-06, + "loss": 18.9657, + "step": 2076 + }, + { + "epoch": 0.037965891019430785, + "grad_norm": 8.181969869684231, + "learning_rate": 9.99834395366081e-06, + "loss": 18.2521, + "step": 2077 + }, + { + "epoch": 0.03798417021587731, + "grad_norm": 7.557685423507036, + "learning_rate": 9.998336326937468e-06, + "loss": 17.8126, + "step": 2078 + }, + { + "epoch": 0.038002449412323835, + "grad_norm": 8.69340557232809, + "learning_rate": 9.998328682695391e-06, + "loss": 18.5631, + "step": 2079 + }, + { + "epoch": 0.03802072860877036, + "grad_norm": 7.546948153844951, + "learning_rate": 9.998321020934607e-06, + "loss": 18.0963, + "step": 2080 + }, + { + "epoch": 0.038039007805216886, + "grad_norm": 8.297753147884801, + "learning_rate": 9.998313341655142e-06, + "loss": 18.312, + "step": 2081 + }, + { + "epoch": 0.03805728700166341, + "grad_norm": 6.922271759890242, + "learning_rate": 9.99830564485702e-06, + "loss": 17.6805, + "step": 2082 + }, + { + "epoch": 0.03807556619810993, + "grad_norm": 7.008500913491092, + "learning_rate": 9.998297930540273e-06, + "loss": 17.6757, + "step": 2083 + }, + { + "epoch": 0.03809384539455646, + "grad_norm": 9.141224497140042, + "learning_rate": 9.998290198704924e-06, + "loss": 18.5254, + "step": 2084 + }, + { + "epoch": 0.03811212459100298, + "grad_norm": 6.172946546054869, + "learning_rate": 9.998282449351002e-06, + "loss": 17.2599, + "step": 2085 + }, + { + "epoch": 0.0381304037874495, + "grad_norm": 8.891334316381819, + "learning_rate": 9.998274682478535e-06, + "loss": 18.6278, + "step": 2086 + }, + { + "epoch": 0.03814868298389603, + "grad_norm": 8.313104732808613, + "learning_rate": 9.998266898087546e-06, + "loss": 18.0706, + "step": 2087 + }, + { + "epoch": 0.03816696218034255, + "grad_norm": 7.617135262532365, + "learning_rate": 9.998259096178067e-06, + "loss": 18.0323, + "step": 2088 + }, + { + "epoch": 0.038185241376789074, + "grad_norm": 7.312424259446676, + "learning_rate": 9.998251276750124e-06, + "loss": 17.57, + "step": 2089 + }, + { + "epoch": 0.0382035205732356, + "grad_norm": 7.557625541284032, + "learning_rate": 9.998243439803743e-06, + "loss": 17.8035, + "step": 2090 + }, + { + "epoch": 0.038221799769682124, + "grad_norm": 8.286744336007226, + "learning_rate": 9.998235585338953e-06, + "loss": 18.3543, + "step": 2091 + }, + { + "epoch": 0.038240078966128646, + "grad_norm": 8.51951773168702, + "learning_rate": 9.998227713355782e-06, + "loss": 18.3313, + "step": 2092 + }, + { + "epoch": 0.038258358162575175, + "grad_norm": 7.784579198221027, + "learning_rate": 9.998219823854255e-06, + "loss": 18.3275, + "step": 2093 + }, + { + "epoch": 0.0382766373590217, + "grad_norm": 6.878269528459254, + "learning_rate": 9.998211916834402e-06, + "loss": 17.506, + "step": 2094 + }, + { + "epoch": 0.03829491655546822, + "grad_norm": 6.077809690735215, + "learning_rate": 9.99820399229625e-06, + "loss": 17.1299, + "step": 2095 + }, + { + "epoch": 0.03831319575191475, + "grad_norm": 7.245859913601165, + "learning_rate": 9.998196050239827e-06, + "loss": 17.6635, + "step": 2096 + }, + { + "epoch": 0.03833147494836127, + "grad_norm": 7.879792392452729, + "learning_rate": 9.998188090665159e-06, + "loss": 17.8758, + "step": 2097 + }, + { + "epoch": 0.0383497541448078, + "grad_norm": 6.97668441078997, + "learning_rate": 9.998180113572277e-06, + "loss": 17.6424, + "step": 2098 + }, + { + "epoch": 0.03836803334125432, + "grad_norm": 7.754336743164202, + "learning_rate": 9.998172118961207e-06, + "loss": 18.2514, + "step": 2099 + }, + { + "epoch": 0.03838631253770084, + "grad_norm": 8.158229133475526, + "learning_rate": 9.998164106831978e-06, + "loss": 18.0946, + "step": 2100 + }, + { + "epoch": 0.03840459173414737, + "grad_norm": 7.938238548236804, + "learning_rate": 9.998156077184617e-06, + "loss": 18.078, + "step": 2101 + }, + { + "epoch": 0.03842287093059389, + "grad_norm": 8.48571756895092, + "learning_rate": 9.998148030019152e-06, + "loss": 18.186, + "step": 2102 + }, + { + "epoch": 0.03844115012704041, + "grad_norm": 6.878298713002423, + "learning_rate": 9.998139965335613e-06, + "loss": 17.4737, + "step": 2103 + }, + { + "epoch": 0.03845942932348694, + "grad_norm": 8.569729559872155, + "learning_rate": 9.998131883134028e-06, + "loss": 18.0949, + "step": 2104 + }, + { + "epoch": 0.038477708519933464, + "grad_norm": 7.296476454258555, + "learning_rate": 9.998123783414421e-06, + "loss": 18.0296, + "step": 2105 + }, + { + "epoch": 0.038495987716379985, + "grad_norm": 8.22560600922055, + "learning_rate": 9.998115666176828e-06, + "loss": 17.8063, + "step": 2106 + }, + { + "epoch": 0.038514266912826514, + "grad_norm": 8.49243278887264, + "learning_rate": 9.99810753142127e-06, + "loss": 18.2786, + "step": 2107 + }, + { + "epoch": 0.038532546109273036, + "grad_norm": 7.4911301907547685, + "learning_rate": 9.99809937914778e-06, + "loss": 17.9241, + "step": 2108 + }, + { + "epoch": 0.03855082530571956, + "grad_norm": 8.29966767865699, + "learning_rate": 9.998091209356387e-06, + "loss": 18.4018, + "step": 2109 + }, + { + "epoch": 0.038569104502166086, + "grad_norm": 7.275110598935181, + "learning_rate": 9.998083022047116e-06, + "loss": 17.7244, + "step": 2110 + }, + { + "epoch": 0.03858738369861261, + "grad_norm": 6.894500030672901, + "learning_rate": 9.998074817219999e-06, + "loss": 17.4782, + "step": 2111 + }, + { + "epoch": 0.03860566289505913, + "grad_norm": 6.973114564655955, + "learning_rate": 9.998066594875063e-06, + "loss": 17.6483, + "step": 2112 + }, + { + "epoch": 0.03862394209150566, + "grad_norm": 7.8712395141148495, + "learning_rate": 9.998058355012337e-06, + "loss": 18.2726, + "step": 2113 + }, + { + "epoch": 0.03864222128795218, + "grad_norm": 7.846501033939585, + "learning_rate": 9.99805009763185e-06, + "loss": 17.8641, + "step": 2114 + }, + { + "epoch": 0.03866050048439871, + "grad_norm": 7.796771949115895, + "learning_rate": 9.99804182273363e-06, + "loss": 18.0188, + "step": 2115 + }, + { + "epoch": 0.03867877968084523, + "grad_norm": 6.346450837225925, + "learning_rate": 9.99803353031771e-06, + "loss": 17.2014, + "step": 2116 + }, + { + "epoch": 0.03869705887729175, + "grad_norm": 9.008885742021926, + "learning_rate": 9.998025220384114e-06, + "loss": 18.5797, + "step": 2117 + }, + { + "epoch": 0.03871533807373828, + "grad_norm": 6.600728835551202, + "learning_rate": 9.998016892932873e-06, + "loss": 17.3858, + "step": 2118 + }, + { + "epoch": 0.0387336172701848, + "grad_norm": 7.9493070426391075, + "learning_rate": 9.998008547964018e-06, + "loss": 17.9694, + "step": 2119 + }, + { + "epoch": 0.038751896466631325, + "grad_norm": 8.29793855732644, + "learning_rate": 9.998000185477576e-06, + "loss": 18.1664, + "step": 2120 + }, + { + "epoch": 0.038770175663077854, + "grad_norm": 8.018611705768237, + "learning_rate": 9.997991805473577e-06, + "loss": 18.1572, + "step": 2121 + }, + { + "epoch": 0.038788454859524375, + "grad_norm": 7.7486960969514245, + "learning_rate": 9.997983407952052e-06, + "loss": 17.7031, + "step": 2122 + }, + { + "epoch": 0.0388067340559709, + "grad_norm": 7.005074737558086, + "learning_rate": 9.997974992913026e-06, + "loss": 17.7374, + "step": 2123 + }, + { + "epoch": 0.038825013252417426, + "grad_norm": 8.463804697132712, + "learning_rate": 9.997966560356534e-06, + "loss": 18.3967, + "step": 2124 + }, + { + "epoch": 0.03884329244886395, + "grad_norm": 8.827072480057215, + "learning_rate": 9.997958110282602e-06, + "loss": 18.3065, + "step": 2125 + }, + { + "epoch": 0.03886157164531047, + "grad_norm": 8.028287371587094, + "learning_rate": 9.99794964269126e-06, + "loss": 18.1977, + "step": 2126 + }, + { + "epoch": 0.038879850841757, + "grad_norm": 7.609837991470296, + "learning_rate": 9.997941157582538e-06, + "loss": 17.8667, + "step": 2127 + }, + { + "epoch": 0.03889813003820352, + "grad_norm": 7.609650111579857, + "learning_rate": 9.997932654956467e-06, + "loss": 17.8524, + "step": 2128 + }, + { + "epoch": 0.03891640923465004, + "grad_norm": 6.3001475843222075, + "learning_rate": 9.997924134813075e-06, + "loss": 17.3262, + "step": 2129 + }, + { + "epoch": 0.03893468843109657, + "grad_norm": 8.218052885471584, + "learning_rate": 9.997915597152394e-06, + "loss": 18.016, + "step": 2130 + }, + { + "epoch": 0.03895296762754309, + "grad_norm": 8.96614079578861, + "learning_rate": 9.99790704197445e-06, + "loss": 18.2741, + "step": 2131 + }, + { + "epoch": 0.03897124682398962, + "grad_norm": 7.338381226610959, + "learning_rate": 9.997898469279278e-06, + "loss": 17.5664, + "step": 2132 + }, + { + "epoch": 0.03898952602043614, + "grad_norm": 7.397884465912606, + "learning_rate": 9.997889879066904e-06, + "loss": 18.05, + "step": 2133 + }, + { + "epoch": 0.039007805216882664, + "grad_norm": 6.351306050693116, + "learning_rate": 9.99788127133736e-06, + "loss": 17.3067, + "step": 2134 + }, + { + "epoch": 0.03902608441332919, + "grad_norm": 8.342880151387575, + "learning_rate": 9.997872646090675e-06, + "loss": 18.4756, + "step": 2135 + }, + { + "epoch": 0.039044363609775715, + "grad_norm": 8.520078110217645, + "learning_rate": 9.997864003326882e-06, + "loss": 18.5616, + "step": 2136 + }, + { + "epoch": 0.039062642806222236, + "grad_norm": 7.15659043512593, + "learning_rate": 9.997855343046007e-06, + "loss": 17.5117, + "step": 2137 + }, + { + "epoch": 0.039080922002668765, + "grad_norm": 6.854967211447595, + "learning_rate": 9.997846665248086e-06, + "loss": 17.6334, + "step": 2138 + }, + { + "epoch": 0.03909920119911529, + "grad_norm": 7.0789309063840165, + "learning_rate": 9.997837969933144e-06, + "loss": 17.7366, + "step": 2139 + }, + { + "epoch": 0.03911748039556181, + "grad_norm": 9.024590484928225, + "learning_rate": 9.997829257101214e-06, + "loss": 18.69, + "step": 2140 + }, + { + "epoch": 0.03913575959200834, + "grad_norm": 8.416640311669644, + "learning_rate": 9.997820526752327e-06, + "loss": 17.8544, + "step": 2141 + }, + { + "epoch": 0.03915403878845486, + "grad_norm": 8.718851178261065, + "learning_rate": 9.99781177888651e-06, + "loss": 18.3247, + "step": 2142 + }, + { + "epoch": 0.03917231798490138, + "grad_norm": 8.160393105502534, + "learning_rate": 9.9978030135038e-06, + "loss": 17.906, + "step": 2143 + }, + { + "epoch": 0.03919059718134791, + "grad_norm": 7.707170090801509, + "learning_rate": 9.997794230604221e-06, + "loss": 17.9793, + "step": 2144 + }, + { + "epoch": 0.03920887637779443, + "grad_norm": 6.371815042310033, + "learning_rate": 9.997785430187808e-06, + "loss": 17.5896, + "step": 2145 + }, + { + "epoch": 0.03922715557424095, + "grad_norm": 6.710940234620757, + "learning_rate": 9.99777661225459e-06, + "loss": 17.4544, + "step": 2146 + }, + { + "epoch": 0.03924543477068748, + "grad_norm": 7.709280379495519, + "learning_rate": 9.997767776804601e-06, + "loss": 18.0954, + "step": 2147 + }, + { + "epoch": 0.039263713967134004, + "grad_norm": 7.760644279078112, + "learning_rate": 9.997758923837868e-06, + "loss": 18.0527, + "step": 2148 + }, + { + "epoch": 0.03928199316358053, + "grad_norm": 7.049552993140452, + "learning_rate": 9.997750053354425e-06, + "loss": 17.6109, + "step": 2149 + }, + { + "epoch": 0.039300272360027054, + "grad_norm": 7.566624110683898, + "learning_rate": 9.9977411653543e-06, + "loss": 18.1585, + "step": 2150 + }, + { + "epoch": 0.039318551556473576, + "grad_norm": 7.097300511901119, + "learning_rate": 9.997732259837528e-06, + "loss": 17.6664, + "step": 2151 + }, + { + "epoch": 0.039336830752920104, + "grad_norm": 7.893771154913385, + "learning_rate": 9.997723336804134e-06, + "loss": 18.1228, + "step": 2152 + }, + { + "epoch": 0.039355109949366626, + "grad_norm": 7.819552400648903, + "learning_rate": 9.997714396254157e-06, + "loss": 17.9937, + "step": 2153 + }, + { + "epoch": 0.03937338914581315, + "grad_norm": 7.864297066489012, + "learning_rate": 9.997705438187624e-06, + "loss": 17.902, + "step": 2154 + }, + { + "epoch": 0.03939166834225968, + "grad_norm": 7.733725836401352, + "learning_rate": 9.997696462604567e-06, + "loss": 17.6863, + "step": 2155 + }, + { + "epoch": 0.0394099475387062, + "grad_norm": 8.522942184970894, + "learning_rate": 9.997687469505018e-06, + "loss": 18.3173, + "step": 2156 + }, + { + "epoch": 0.03942822673515272, + "grad_norm": 8.175017067967467, + "learning_rate": 9.997678458889006e-06, + "loss": 17.6859, + "step": 2157 + }, + { + "epoch": 0.03944650593159925, + "grad_norm": 8.978411114997986, + "learning_rate": 9.997669430756567e-06, + "loss": 19.04, + "step": 2158 + }, + { + "epoch": 0.03946478512804577, + "grad_norm": 7.482174928325935, + "learning_rate": 9.99766038510773e-06, + "loss": 17.8164, + "step": 2159 + }, + { + "epoch": 0.03948306432449229, + "grad_norm": 7.921072208230814, + "learning_rate": 9.997651321942526e-06, + "loss": 18.1876, + "step": 2160 + }, + { + "epoch": 0.03950134352093882, + "grad_norm": 7.17372865010213, + "learning_rate": 9.997642241260988e-06, + "loss": 17.7621, + "step": 2161 + }, + { + "epoch": 0.03951962271738534, + "grad_norm": 7.3970388554120605, + "learning_rate": 9.997633143063147e-06, + "loss": 17.8985, + "step": 2162 + }, + { + "epoch": 0.039537901913831865, + "grad_norm": 8.093249250600254, + "learning_rate": 9.997624027349038e-06, + "loss": 18.1239, + "step": 2163 + }, + { + "epoch": 0.03955618111027839, + "grad_norm": 9.891216885761784, + "learning_rate": 9.99761489411869e-06, + "loss": 18.365, + "step": 2164 + }, + { + "epoch": 0.039574460306724915, + "grad_norm": 8.247118588942913, + "learning_rate": 9.997605743372135e-06, + "loss": 17.8682, + "step": 2165 + }, + { + "epoch": 0.039592739503171444, + "grad_norm": 8.006775421381619, + "learning_rate": 9.997596575109403e-06, + "loss": 18.0026, + "step": 2166 + }, + { + "epoch": 0.039611018699617966, + "grad_norm": 8.080014015242162, + "learning_rate": 9.99758738933053e-06, + "loss": 18.2187, + "step": 2167 + }, + { + "epoch": 0.03962929789606449, + "grad_norm": 8.059337299934262, + "learning_rate": 9.997578186035548e-06, + "loss": 18.2853, + "step": 2168 + }, + { + "epoch": 0.039647577092511016, + "grad_norm": 8.587820665517523, + "learning_rate": 9.997568965224489e-06, + "loss": 18.5507, + "step": 2169 + }, + { + "epoch": 0.03966585628895754, + "grad_norm": 7.372263721174855, + "learning_rate": 9.997559726897382e-06, + "loss": 18.0406, + "step": 2170 + }, + { + "epoch": 0.03968413548540406, + "grad_norm": 6.542605135963088, + "learning_rate": 9.997550471054262e-06, + "loss": 17.5526, + "step": 2171 + }, + { + "epoch": 0.03970241468185059, + "grad_norm": 7.189110663432018, + "learning_rate": 9.997541197695165e-06, + "loss": 17.8062, + "step": 2172 + }, + { + "epoch": 0.03972069387829711, + "grad_norm": 6.404821137499881, + "learning_rate": 9.997531906820114e-06, + "loss": 17.2803, + "step": 2173 + }, + { + "epoch": 0.03973897307474363, + "grad_norm": 8.369116003250504, + "learning_rate": 9.997522598429152e-06, + "loss": 18.1112, + "step": 2174 + }, + { + "epoch": 0.03975725227119016, + "grad_norm": 8.105760046900059, + "learning_rate": 9.997513272522306e-06, + "loss": 18.1757, + "step": 2175 + }, + { + "epoch": 0.03977553146763668, + "grad_norm": 6.8914560879095, + "learning_rate": 9.997503929099608e-06, + "loss": 17.6367, + "step": 2176 + }, + { + "epoch": 0.039793810664083204, + "grad_norm": 8.74928482923437, + "learning_rate": 9.997494568161094e-06, + "loss": 18.797, + "step": 2177 + }, + { + "epoch": 0.03981208986052973, + "grad_norm": 7.606032195700107, + "learning_rate": 9.997485189706794e-06, + "loss": 18.0399, + "step": 2178 + }, + { + "epoch": 0.039830369056976254, + "grad_norm": 7.957640315235665, + "learning_rate": 9.997475793736742e-06, + "loss": 18.1217, + "step": 2179 + }, + { + "epoch": 0.039848648253422776, + "grad_norm": 8.172829907955222, + "learning_rate": 9.997466380250972e-06, + "loss": 17.9818, + "step": 2180 + }, + { + "epoch": 0.039866927449869305, + "grad_norm": 8.639348016920609, + "learning_rate": 9.997456949249516e-06, + "loss": 18.3638, + "step": 2181 + }, + { + "epoch": 0.03988520664631583, + "grad_norm": 8.28297815129272, + "learning_rate": 9.997447500732408e-06, + "loss": 18.0711, + "step": 2182 + }, + { + "epoch": 0.039903485842762355, + "grad_norm": 7.301929917593747, + "learning_rate": 9.997438034699676e-06, + "loss": 17.9614, + "step": 2183 + }, + { + "epoch": 0.03992176503920888, + "grad_norm": 9.2133706103282, + "learning_rate": 9.99742855115136e-06, + "loss": 18.3219, + "step": 2184 + }, + { + "epoch": 0.0399400442356554, + "grad_norm": 7.6662851379575, + "learning_rate": 9.997419050087491e-06, + "loss": 18.0362, + "step": 2185 + }, + { + "epoch": 0.03995832343210193, + "grad_norm": 9.40252182257759, + "learning_rate": 9.997409531508102e-06, + "loss": 18.7361, + "step": 2186 + }, + { + "epoch": 0.03997660262854845, + "grad_norm": 7.715201964930718, + "learning_rate": 9.997399995413225e-06, + "loss": 18.0677, + "step": 2187 + }, + { + "epoch": 0.03999488182499497, + "grad_norm": 8.180109037376967, + "learning_rate": 9.997390441802896e-06, + "loss": 17.8739, + "step": 2188 + }, + { + "epoch": 0.0400131610214415, + "grad_norm": 7.474350082348093, + "learning_rate": 9.997380870677147e-06, + "loss": 17.736, + "step": 2189 + }, + { + "epoch": 0.04003144021788802, + "grad_norm": 7.30539824775302, + "learning_rate": 9.997371282036012e-06, + "loss": 17.7862, + "step": 2190 + }, + { + "epoch": 0.04004971941433454, + "grad_norm": 8.10049297693191, + "learning_rate": 9.997361675879524e-06, + "loss": 18.3183, + "step": 2191 + }, + { + "epoch": 0.04006799861078107, + "grad_norm": 8.148440778879321, + "learning_rate": 9.997352052207717e-06, + "loss": 17.8117, + "step": 2192 + }, + { + "epoch": 0.040086277807227594, + "grad_norm": 7.662804144562566, + "learning_rate": 9.997342411020623e-06, + "loss": 17.9414, + "step": 2193 + }, + { + "epoch": 0.040104557003674116, + "grad_norm": 8.657393083701246, + "learning_rate": 9.99733275231828e-06, + "loss": 18.4035, + "step": 2194 + }, + { + "epoch": 0.040122836200120644, + "grad_norm": 6.363443744243816, + "learning_rate": 9.997323076100718e-06, + "loss": 17.273, + "step": 2195 + }, + { + "epoch": 0.040141115396567166, + "grad_norm": 8.061330105461877, + "learning_rate": 9.997313382367973e-06, + "loss": 18.3943, + "step": 2196 + }, + { + "epoch": 0.04015939459301369, + "grad_norm": 7.726391277553473, + "learning_rate": 9.997303671120077e-06, + "loss": 17.7805, + "step": 2197 + }, + { + "epoch": 0.040177673789460217, + "grad_norm": 8.212107448574407, + "learning_rate": 9.997293942357065e-06, + "loss": 17.96, + "step": 2198 + }, + { + "epoch": 0.04019595298590674, + "grad_norm": 8.290238516875284, + "learning_rate": 9.997284196078974e-06, + "loss": 17.9312, + "step": 2199 + }, + { + "epoch": 0.04021423218235327, + "grad_norm": 7.98989253657042, + "learning_rate": 9.997274432285833e-06, + "loss": 17.9388, + "step": 2200 + }, + { + "epoch": 0.04023251137879979, + "grad_norm": 6.8044758666856815, + "learning_rate": 9.997264650977681e-06, + "loss": 17.4357, + "step": 2201 + }, + { + "epoch": 0.04025079057524631, + "grad_norm": 8.221859777178526, + "learning_rate": 9.997254852154548e-06, + "loss": 18.1218, + "step": 2202 + }, + { + "epoch": 0.04026906977169284, + "grad_norm": 8.345525451984651, + "learning_rate": 9.997245035816471e-06, + "loss": 18.1535, + "step": 2203 + }, + { + "epoch": 0.04028734896813936, + "grad_norm": 7.8169540138706335, + "learning_rate": 9.997235201963484e-06, + "loss": 17.9238, + "step": 2204 + }, + { + "epoch": 0.04030562816458588, + "grad_norm": 7.951482942811827, + "learning_rate": 9.99722535059562e-06, + "loss": 17.6738, + "step": 2205 + }, + { + "epoch": 0.04032390736103241, + "grad_norm": 7.5674725394502, + "learning_rate": 9.997215481712917e-06, + "loss": 17.9341, + "step": 2206 + }, + { + "epoch": 0.04034218655747893, + "grad_norm": 8.671669698506888, + "learning_rate": 9.997205595315406e-06, + "loss": 18.5597, + "step": 2207 + }, + { + "epoch": 0.040360465753925455, + "grad_norm": 6.815993343653375, + "learning_rate": 9.997195691403123e-06, + "loss": 17.3178, + "step": 2208 + }, + { + "epoch": 0.040378744950371984, + "grad_norm": 8.389172345015542, + "learning_rate": 9.997185769976104e-06, + "loss": 18.2111, + "step": 2209 + }, + { + "epoch": 0.040397024146818505, + "grad_norm": 7.831804934723512, + "learning_rate": 9.997175831034382e-06, + "loss": 18.0893, + "step": 2210 + }, + { + "epoch": 0.04041530334326503, + "grad_norm": 7.819787617633279, + "learning_rate": 9.99716587457799e-06, + "loss": 17.8114, + "step": 2211 + }, + { + "epoch": 0.040433582539711556, + "grad_norm": 7.213440914722836, + "learning_rate": 9.997155900606968e-06, + "loss": 17.5792, + "step": 2212 + }, + { + "epoch": 0.04045186173615808, + "grad_norm": 7.467749826895467, + "learning_rate": 9.99714590912135e-06, + "loss": 18.1147, + "step": 2213 + }, + { + "epoch": 0.0404701409326046, + "grad_norm": 7.3030276478195715, + "learning_rate": 9.997135900121164e-06, + "loss": 17.7149, + "step": 2214 + }, + { + "epoch": 0.04048842012905113, + "grad_norm": 9.368146596367051, + "learning_rate": 9.997125873606452e-06, + "loss": 18.43, + "step": 2215 + }, + { + "epoch": 0.04050669932549765, + "grad_norm": 7.54753489549492, + "learning_rate": 9.99711582957725e-06, + "loss": 17.8491, + "step": 2216 + }, + { + "epoch": 0.04052497852194418, + "grad_norm": 7.018911450745933, + "learning_rate": 9.997105768033588e-06, + "loss": 17.5845, + "step": 2217 + }, + { + "epoch": 0.0405432577183907, + "grad_norm": 8.017074287783036, + "learning_rate": 9.997095688975506e-06, + "loss": 18.0492, + "step": 2218 + }, + { + "epoch": 0.04056153691483722, + "grad_norm": 6.573016273674605, + "learning_rate": 9.997085592403036e-06, + "loss": 17.4416, + "step": 2219 + }, + { + "epoch": 0.04057981611128375, + "grad_norm": 7.165610220362177, + "learning_rate": 9.997075478316213e-06, + "loss": 17.748, + "step": 2220 + }, + { + "epoch": 0.04059809530773027, + "grad_norm": 8.116293660856455, + "learning_rate": 9.997065346715079e-06, + "loss": 18.1911, + "step": 2221 + }, + { + "epoch": 0.040616374504176794, + "grad_norm": 8.492003188224011, + "learning_rate": 9.99705519759966e-06, + "loss": 18.0617, + "step": 2222 + }, + { + "epoch": 0.04063465370062332, + "grad_norm": 6.45728722483106, + "learning_rate": 9.997045030969997e-06, + "loss": 17.3788, + "step": 2223 + }, + { + "epoch": 0.040652932897069845, + "grad_norm": 9.357708050753015, + "learning_rate": 9.997034846826126e-06, + "loss": 18.6276, + "step": 2224 + }, + { + "epoch": 0.040671212093516367, + "grad_norm": 8.108005144604292, + "learning_rate": 9.99702464516808e-06, + "loss": 18.2943, + "step": 2225 + }, + { + "epoch": 0.040689491289962895, + "grad_norm": 7.856412638551117, + "learning_rate": 9.997014425995898e-06, + "loss": 18.0768, + "step": 2226 + }, + { + "epoch": 0.04070777048640942, + "grad_norm": 7.9677426244573795, + "learning_rate": 9.997004189309614e-06, + "loss": 17.9339, + "step": 2227 + }, + { + "epoch": 0.04072604968285594, + "grad_norm": 8.68121235006211, + "learning_rate": 9.996993935109263e-06, + "loss": 18.3509, + "step": 2228 + }, + { + "epoch": 0.04074432887930247, + "grad_norm": 9.159087383676308, + "learning_rate": 9.99698366339488e-06, + "loss": 18.6396, + "step": 2229 + }, + { + "epoch": 0.04076260807574899, + "grad_norm": 9.129100089272592, + "learning_rate": 9.996973374166505e-06, + "loss": 18.3589, + "step": 2230 + }, + { + "epoch": 0.04078088727219551, + "grad_norm": 7.63200488625213, + "learning_rate": 9.996963067424173e-06, + "loss": 17.8978, + "step": 2231 + }, + { + "epoch": 0.04079916646864204, + "grad_norm": 7.037195597311852, + "learning_rate": 9.996952743167919e-06, + "loss": 17.9303, + "step": 2232 + }, + { + "epoch": 0.04081744566508856, + "grad_norm": 6.697245553985456, + "learning_rate": 9.996942401397776e-06, + "loss": 17.2696, + "step": 2233 + }, + { + "epoch": 0.04083572486153509, + "grad_norm": 8.364622068437564, + "learning_rate": 9.996932042113785e-06, + "loss": 18.2497, + "step": 2234 + }, + { + "epoch": 0.04085400405798161, + "grad_norm": 7.55560813284806, + "learning_rate": 9.996921665315982e-06, + "loss": 17.7116, + "step": 2235 + }, + { + "epoch": 0.040872283254428134, + "grad_norm": 7.189021265997685, + "learning_rate": 9.996911271004403e-06, + "loss": 17.7219, + "step": 2236 + }, + { + "epoch": 0.04089056245087466, + "grad_norm": 8.405771124735566, + "learning_rate": 9.996900859179082e-06, + "loss": 18.4455, + "step": 2237 + }, + { + "epoch": 0.040908841647321184, + "grad_norm": 8.110808715552439, + "learning_rate": 9.996890429840057e-06, + "loss": 18.0719, + "step": 2238 + }, + { + "epoch": 0.040927120843767706, + "grad_norm": 6.7657326654624805, + "learning_rate": 9.996879982987365e-06, + "loss": 17.6897, + "step": 2239 + }, + { + "epoch": 0.040945400040214235, + "grad_norm": 7.902971602385438, + "learning_rate": 9.996869518621043e-06, + "loss": 17.916, + "step": 2240 + }, + { + "epoch": 0.040963679236660756, + "grad_norm": 8.739284291169165, + "learning_rate": 9.996859036741125e-06, + "loss": 18.4968, + "step": 2241 + }, + { + "epoch": 0.04098195843310728, + "grad_norm": 8.863783295034544, + "learning_rate": 9.996848537347651e-06, + "loss": 18.4603, + "step": 2242 + }, + { + "epoch": 0.04100023762955381, + "grad_norm": 8.827975485715196, + "learning_rate": 9.996838020440656e-06, + "loss": 18.1549, + "step": 2243 + }, + { + "epoch": 0.04101851682600033, + "grad_norm": 7.242534711509926, + "learning_rate": 9.996827486020178e-06, + "loss": 18.0313, + "step": 2244 + }, + { + "epoch": 0.04103679602244685, + "grad_norm": 8.24530816384671, + "learning_rate": 9.996816934086253e-06, + "loss": 18.1642, + "step": 2245 + }, + { + "epoch": 0.04105507521889338, + "grad_norm": 8.827207274456681, + "learning_rate": 9.996806364638917e-06, + "loss": 17.7236, + "step": 2246 + }, + { + "epoch": 0.0410733544153399, + "grad_norm": 7.681208695267572, + "learning_rate": 9.996795777678212e-06, + "loss": 18.0287, + "step": 2247 + }, + { + "epoch": 0.04109163361178642, + "grad_norm": 7.7030414415403685, + "learning_rate": 9.996785173204168e-06, + "loss": 17.817, + "step": 2248 + }, + { + "epoch": 0.04110991280823295, + "grad_norm": 7.760771743532918, + "learning_rate": 9.996774551216825e-06, + "loss": 17.6442, + "step": 2249 + }, + { + "epoch": 0.04112819200467947, + "grad_norm": 7.646132278653959, + "learning_rate": 9.996763911716223e-06, + "loss": 17.8221, + "step": 2250 + }, + { + "epoch": 0.041146471201126, + "grad_norm": 7.664331066563644, + "learning_rate": 9.996753254702396e-06, + "loss": 18.082, + "step": 2251 + }, + { + "epoch": 0.04116475039757252, + "grad_norm": 8.051571433608776, + "learning_rate": 9.996742580175383e-06, + "loss": 17.8551, + "step": 2252 + }, + { + "epoch": 0.041183029594019045, + "grad_norm": 9.186677857311098, + "learning_rate": 9.996731888135221e-06, + "loss": 18.7887, + "step": 2253 + }, + { + "epoch": 0.041201308790465574, + "grad_norm": 9.121127798151235, + "learning_rate": 9.996721178581948e-06, + "loss": 18.4585, + "step": 2254 + }, + { + "epoch": 0.041219587986912096, + "grad_norm": 7.671654595276185, + "learning_rate": 9.9967104515156e-06, + "loss": 17.8728, + "step": 2255 + }, + { + "epoch": 0.04123786718335862, + "grad_norm": 6.970048735806223, + "learning_rate": 9.996699706936214e-06, + "loss": 17.661, + "step": 2256 + }, + { + "epoch": 0.041256146379805146, + "grad_norm": 7.430312463763205, + "learning_rate": 9.99668894484383e-06, + "loss": 18.0237, + "step": 2257 + }, + { + "epoch": 0.04127442557625167, + "grad_norm": 7.511826843925589, + "learning_rate": 9.996678165238486e-06, + "loss": 17.7627, + "step": 2258 + }, + { + "epoch": 0.04129270477269819, + "grad_norm": 8.420303737893573, + "learning_rate": 9.996667368120219e-06, + "loss": 18.2609, + "step": 2259 + }, + { + "epoch": 0.04131098396914472, + "grad_norm": 7.10238546153036, + "learning_rate": 9.996656553489063e-06, + "loss": 17.7902, + "step": 2260 + }, + { + "epoch": 0.04132926316559124, + "grad_norm": 6.945695856385279, + "learning_rate": 9.996645721345064e-06, + "loss": 17.4524, + "step": 2261 + }, + { + "epoch": 0.04134754236203776, + "grad_norm": 6.764161856334369, + "learning_rate": 9.996634871688252e-06, + "loss": 17.5518, + "step": 2262 + }, + { + "epoch": 0.04136582155848429, + "grad_norm": 7.665182816750473, + "learning_rate": 9.99662400451867e-06, + "loss": 18.115, + "step": 2263 + }, + { + "epoch": 0.04138410075493081, + "grad_norm": 8.059909396412177, + "learning_rate": 9.996613119836354e-06, + "loss": 18.2972, + "step": 2264 + }, + { + "epoch": 0.041402379951377334, + "grad_norm": 8.569543981619749, + "learning_rate": 9.996602217641342e-06, + "loss": 18.4112, + "step": 2265 + }, + { + "epoch": 0.04142065914782386, + "grad_norm": 7.7239610781494425, + "learning_rate": 9.996591297933674e-06, + "loss": 17.869, + "step": 2266 + }, + { + "epoch": 0.041438938344270385, + "grad_norm": 9.416283894916795, + "learning_rate": 9.996580360713386e-06, + "loss": 18.6436, + "step": 2267 + }, + { + "epoch": 0.04145721754071691, + "grad_norm": 7.5379396551312885, + "learning_rate": 9.996569405980517e-06, + "loss": 17.797, + "step": 2268 + }, + { + "epoch": 0.041475496737163435, + "grad_norm": 7.955023806419039, + "learning_rate": 9.996558433735106e-06, + "loss": 17.8707, + "step": 2269 + }, + { + "epoch": 0.04149377593360996, + "grad_norm": 8.470373947819228, + "learning_rate": 9.996547443977193e-06, + "loss": 18.0362, + "step": 2270 + }, + { + "epoch": 0.041512055130056486, + "grad_norm": 8.130937681452403, + "learning_rate": 9.996536436706815e-06, + "loss": 17.9607, + "step": 2271 + }, + { + "epoch": 0.04153033432650301, + "grad_norm": 9.185888336239445, + "learning_rate": 9.996525411924008e-06, + "loss": 18.8378, + "step": 2272 + }, + { + "epoch": 0.04154861352294953, + "grad_norm": 7.703635781287982, + "learning_rate": 9.996514369628813e-06, + "loss": 17.9426, + "step": 2273 + }, + { + "epoch": 0.04156689271939606, + "grad_norm": 8.686634293642456, + "learning_rate": 9.99650330982127e-06, + "loss": 18.4029, + "step": 2274 + }, + { + "epoch": 0.04158517191584258, + "grad_norm": 7.505067804924687, + "learning_rate": 9.996492232501416e-06, + "loss": 17.9971, + "step": 2275 + }, + { + "epoch": 0.0416034511122891, + "grad_norm": 8.266241750127806, + "learning_rate": 9.996481137669291e-06, + "loss": 18.1485, + "step": 2276 + }, + { + "epoch": 0.04162173030873563, + "grad_norm": 6.9270516165961755, + "learning_rate": 9.996470025324933e-06, + "loss": 17.66, + "step": 2277 + }, + { + "epoch": 0.04164000950518215, + "grad_norm": 8.25286348451379, + "learning_rate": 9.99645889546838e-06, + "loss": 18.0654, + "step": 2278 + }, + { + "epoch": 0.041658288701628673, + "grad_norm": 7.159158430658167, + "learning_rate": 9.996447748099673e-06, + "loss": 17.8872, + "step": 2279 + }, + { + "epoch": 0.0416765678980752, + "grad_norm": 6.995221413745061, + "learning_rate": 9.996436583218852e-06, + "loss": 17.4837, + "step": 2280 + }, + { + "epoch": 0.041694847094521724, + "grad_norm": 6.775646813300009, + "learning_rate": 9.996425400825952e-06, + "loss": 17.4218, + "step": 2281 + }, + { + "epoch": 0.041713126290968246, + "grad_norm": 6.985900307532805, + "learning_rate": 9.996414200921014e-06, + "loss": 17.6457, + "step": 2282 + }, + { + "epoch": 0.041731405487414774, + "grad_norm": 6.908522988366118, + "learning_rate": 9.996402983504079e-06, + "loss": 17.5612, + "step": 2283 + }, + { + "epoch": 0.041749684683861296, + "grad_norm": 8.094841746586784, + "learning_rate": 9.996391748575184e-06, + "loss": 18.0505, + "step": 2284 + }, + { + "epoch": 0.041767963880307825, + "grad_norm": 7.070760098061046, + "learning_rate": 9.996380496134372e-06, + "loss": 17.9362, + "step": 2285 + }, + { + "epoch": 0.04178624307675435, + "grad_norm": 8.978344297434703, + "learning_rate": 9.996369226181678e-06, + "loss": 18.6022, + "step": 2286 + }, + { + "epoch": 0.04180452227320087, + "grad_norm": 9.632714862373605, + "learning_rate": 9.996357938717144e-06, + "loss": 18.3985, + "step": 2287 + }, + { + "epoch": 0.0418228014696474, + "grad_norm": 8.054088454669444, + "learning_rate": 9.996346633740809e-06, + "loss": 18.1617, + "step": 2288 + }, + { + "epoch": 0.04184108066609392, + "grad_norm": 8.577846213672203, + "learning_rate": 9.996335311252712e-06, + "loss": 18.3944, + "step": 2289 + }, + { + "epoch": 0.04185935986254044, + "grad_norm": 6.67528905552185, + "learning_rate": 9.996323971252895e-06, + "loss": 17.6992, + "step": 2290 + }, + { + "epoch": 0.04187763905898697, + "grad_norm": 8.042630167840407, + "learning_rate": 9.996312613741394e-06, + "loss": 18.6025, + "step": 2291 + }, + { + "epoch": 0.04189591825543349, + "grad_norm": 7.633142920361117, + "learning_rate": 9.996301238718251e-06, + "loss": 18.0448, + "step": 2292 + }, + { + "epoch": 0.04191419745188001, + "grad_norm": 8.627363254939468, + "learning_rate": 9.996289846183506e-06, + "loss": 18.4076, + "step": 2293 + }, + { + "epoch": 0.04193247664832654, + "grad_norm": 6.953966791148869, + "learning_rate": 9.9962784361372e-06, + "loss": 17.5413, + "step": 2294 + }, + { + "epoch": 0.04195075584477306, + "grad_norm": 8.570535225232478, + "learning_rate": 9.99626700857937e-06, + "loss": 18.5337, + "step": 2295 + }, + { + "epoch": 0.041969035041219585, + "grad_norm": 7.60298214718083, + "learning_rate": 9.996255563510059e-06, + "loss": 17.8707, + "step": 2296 + }, + { + "epoch": 0.041987314237666114, + "grad_norm": 8.817426993262535, + "learning_rate": 9.996244100929305e-06, + "loss": 18.5549, + "step": 2297 + }, + { + "epoch": 0.042005593434112636, + "grad_norm": 8.051074390814165, + "learning_rate": 9.99623262083715e-06, + "loss": 18.0903, + "step": 2298 + }, + { + "epoch": 0.04202387263055916, + "grad_norm": 6.914049677660477, + "learning_rate": 9.996221123233631e-06, + "loss": 17.652, + "step": 2299 + }, + { + "epoch": 0.042042151827005686, + "grad_norm": 9.300933604045385, + "learning_rate": 9.996209608118792e-06, + "loss": 18.8076, + "step": 2300 + }, + { + "epoch": 0.04206043102345221, + "grad_norm": 8.587086318109883, + "learning_rate": 9.99619807549267e-06, + "loss": 18.1632, + "step": 2301 + }, + { + "epoch": 0.042078710219898736, + "grad_norm": 8.43519282951352, + "learning_rate": 9.996186525355312e-06, + "loss": 17.9335, + "step": 2302 + }, + { + "epoch": 0.04209698941634526, + "grad_norm": 7.099712415454915, + "learning_rate": 9.99617495770675e-06, + "loss": 17.8879, + "step": 2303 + }, + { + "epoch": 0.04211526861279178, + "grad_norm": 7.410752085634336, + "learning_rate": 9.99616337254703e-06, + "loss": 17.6165, + "step": 2304 + }, + { + "epoch": 0.04213354780923831, + "grad_norm": 9.23145486560416, + "learning_rate": 9.99615176987619e-06, + "loss": 18.5385, + "step": 2305 + }, + { + "epoch": 0.04215182700568483, + "grad_norm": 7.804166649928118, + "learning_rate": 9.996140149694271e-06, + "loss": 18.1071, + "step": 2306 + }, + { + "epoch": 0.04217010620213135, + "grad_norm": 7.292123905585162, + "learning_rate": 9.996128512001315e-06, + "loss": 17.7422, + "step": 2307 + }, + { + "epoch": 0.04218838539857788, + "grad_norm": 7.915688154902509, + "learning_rate": 9.996116856797361e-06, + "loss": 17.8606, + "step": 2308 + }, + { + "epoch": 0.0422066645950244, + "grad_norm": 7.326183825319675, + "learning_rate": 9.996105184082451e-06, + "loss": 17.6888, + "step": 2309 + }, + { + "epoch": 0.042224943791470924, + "grad_norm": 7.985166596550133, + "learning_rate": 9.996093493856629e-06, + "loss": 17.962, + "step": 2310 + }, + { + "epoch": 0.04224322298791745, + "grad_norm": 8.03668699206688, + "learning_rate": 9.996081786119932e-06, + "loss": 18.0788, + "step": 2311 + }, + { + "epoch": 0.042261502184363975, + "grad_norm": 7.6983489170030825, + "learning_rate": 9.996070060872397e-06, + "loss": 17.7543, + "step": 2312 + }, + { + "epoch": 0.0422797813808105, + "grad_norm": 8.654245011201022, + "learning_rate": 9.996058318114076e-06, + "loss": 18.1858, + "step": 2313 + }, + { + "epoch": 0.042298060577257025, + "grad_norm": 7.674808547428686, + "learning_rate": 9.996046557845e-06, + "loss": 17.8757, + "step": 2314 + }, + { + "epoch": 0.04231633977370355, + "grad_norm": 7.9920460527959305, + "learning_rate": 9.996034780065218e-06, + "loss": 17.7592, + "step": 2315 + }, + { + "epoch": 0.04233461897015007, + "grad_norm": 8.769256034609777, + "learning_rate": 9.996022984774764e-06, + "loss": 18.0602, + "step": 2316 + }, + { + "epoch": 0.0423528981665966, + "grad_norm": 7.93222165293833, + "learning_rate": 9.996011171973686e-06, + "loss": 18.0744, + "step": 2317 + }, + { + "epoch": 0.04237117736304312, + "grad_norm": 7.781045243938328, + "learning_rate": 9.995999341662021e-06, + "loss": 17.9564, + "step": 2318 + }, + { + "epoch": 0.04238945655948965, + "grad_norm": 7.744615501885769, + "learning_rate": 9.995987493839812e-06, + "loss": 17.7038, + "step": 2319 + }, + { + "epoch": 0.04240773575593617, + "grad_norm": 7.516691960954121, + "learning_rate": 9.995975628507099e-06, + "loss": 17.6575, + "step": 2320 + }, + { + "epoch": 0.04242601495238269, + "grad_norm": 6.115400759637814, + "learning_rate": 9.995963745663928e-06, + "loss": 17.475, + "step": 2321 + }, + { + "epoch": 0.04244429414882922, + "grad_norm": 7.22892817618116, + "learning_rate": 9.995951845310334e-06, + "loss": 17.5793, + "step": 2322 + }, + { + "epoch": 0.04246257334527574, + "grad_norm": 8.062822196393663, + "learning_rate": 9.995939927446366e-06, + "loss": 18.0531, + "step": 2323 + }, + { + "epoch": 0.042480852541722264, + "grad_norm": 7.660615550768664, + "learning_rate": 9.995927992072058e-06, + "loss": 17.7797, + "step": 2324 + }, + { + "epoch": 0.04249913173816879, + "grad_norm": 7.982847840410579, + "learning_rate": 9.995916039187458e-06, + "loss": 18.2002, + "step": 2325 + }, + { + "epoch": 0.042517410934615314, + "grad_norm": 8.785543921953549, + "learning_rate": 9.995904068792607e-06, + "loss": 18.5416, + "step": 2326 + }, + { + "epoch": 0.042535690131061836, + "grad_norm": 7.097569729541003, + "learning_rate": 9.995892080887545e-06, + "loss": 17.6896, + "step": 2327 + }, + { + "epoch": 0.042553969327508365, + "grad_norm": 8.176532027468705, + "learning_rate": 9.995880075472315e-06, + "loss": 18.2831, + "step": 2328 + }, + { + "epoch": 0.042572248523954886, + "grad_norm": 7.885895150721695, + "learning_rate": 9.995868052546957e-06, + "loss": 17.9436, + "step": 2329 + }, + { + "epoch": 0.04259052772040141, + "grad_norm": 6.775177913906577, + "learning_rate": 9.995856012111517e-06, + "loss": 17.3252, + "step": 2330 + }, + { + "epoch": 0.04260880691684794, + "grad_norm": 7.94354048743229, + "learning_rate": 9.995843954166036e-06, + "loss": 17.9838, + "step": 2331 + }, + { + "epoch": 0.04262708611329446, + "grad_norm": 7.764688523989066, + "learning_rate": 9.995831878710553e-06, + "loss": 17.9115, + "step": 2332 + }, + { + "epoch": 0.04264536530974098, + "grad_norm": 7.409280094124277, + "learning_rate": 9.995819785745113e-06, + "loss": 17.729, + "step": 2333 + }, + { + "epoch": 0.04266364450618751, + "grad_norm": 6.778335589173222, + "learning_rate": 9.995807675269759e-06, + "loss": 17.6421, + "step": 2334 + }, + { + "epoch": 0.04268192370263403, + "grad_norm": 8.37586586816977, + "learning_rate": 9.995795547284533e-06, + "loss": 18.3069, + "step": 2335 + }, + { + "epoch": 0.04270020289908056, + "grad_norm": 8.1383868168471, + "learning_rate": 9.995783401789476e-06, + "loss": 17.9825, + "step": 2336 + }, + { + "epoch": 0.04271848209552708, + "grad_norm": 8.10989994308069, + "learning_rate": 9.995771238784633e-06, + "loss": 18.2562, + "step": 2337 + }, + { + "epoch": 0.0427367612919736, + "grad_norm": 7.72664475872143, + "learning_rate": 9.995759058270046e-06, + "loss": 17.9149, + "step": 2338 + }, + { + "epoch": 0.04275504048842013, + "grad_norm": 8.247015240804508, + "learning_rate": 9.995746860245754e-06, + "loss": 18.2216, + "step": 2339 + }, + { + "epoch": 0.042773319684866654, + "grad_norm": 7.422653431185456, + "learning_rate": 9.995734644711806e-06, + "loss": 17.6408, + "step": 2340 + }, + { + "epoch": 0.042791598881313175, + "grad_norm": 7.072127301139403, + "learning_rate": 9.99572241166824e-06, + "loss": 17.7117, + "step": 2341 + }, + { + "epoch": 0.042809878077759704, + "grad_norm": 7.858912139734376, + "learning_rate": 9.9957101611151e-06, + "loss": 18.0305, + "step": 2342 + }, + { + "epoch": 0.042828157274206226, + "grad_norm": 8.45495617543098, + "learning_rate": 9.99569789305243e-06, + "loss": 18.0969, + "step": 2343 + }, + { + "epoch": 0.04284643647065275, + "grad_norm": 8.59786241900796, + "learning_rate": 9.995685607480272e-06, + "loss": 18.3043, + "step": 2344 + }, + { + "epoch": 0.042864715667099276, + "grad_norm": 7.112209556935003, + "learning_rate": 9.99567330439867e-06, + "loss": 17.716, + "step": 2345 + }, + { + "epoch": 0.0428829948635458, + "grad_norm": 7.4017512032226565, + "learning_rate": 9.995660983807667e-06, + "loss": 17.7608, + "step": 2346 + }, + { + "epoch": 0.04290127405999232, + "grad_norm": 8.037185708433707, + "learning_rate": 9.995648645707305e-06, + "loss": 18.2411, + "step": 2347 + }, + { + "epoch": 0.04291955325643885, + "grad_norm": 8.216894936542884, + "learning_rate": 9.995636290097627e-06, + "loss": 18.3719, + "step": 2348 + }, + { + "epoch": 0.04293783245288537, + "grad_norm": 7.685407484316789, + "learning_rate": 9.99562391697868e-06, + "loss": 17.9064, + "step": 2349 + }, + { + "epoch": 0.04295611164933189, + "grad_norm": 8.427764315977564, + "learning_rate": 9.995611526350502e-06, + "loss": 18.5452, + "step": 2350 + }, + { + "epoch": 0.04297439084577842, + "grad_norm": 8.095761168162861, + "learning_rate": 9.99559911821314e-06, + "loss": 17.9802, + "step": 2351 + }, + { + "epoch": 0.04299267004222494, + "grad_norm": 7.0316064907918445, + "learning_rate": 9.995586692566636e-06, + "loss": 17.8435, + "step": 2352 + }, + { + "epoch": 0.04301094923867147, + "grad_norm": 7.839253788821011, + "learning_rate": 9.995574249411035e-06, + "loss": 17.8707, + "step": 2353 + }, + { + "epoch": 0.04302922843511799, + "grad_norm": 7.38435360392192, + "learning_rate": 9.99556178874638e-06, + "loss": 17.7948, + "step": 2354 + }, + { + "epoch": 0.043047507631564515, + "grad_norm": 7.911927060871132, + "learning_rate": 9.995549310572714e-06, + "loss": 18.0407, + "step": 2355 + }, + { + "epoch": 0.04306578682801104, + "grad_norm": 7.234185652413484, + "learning_rate": 9.995536814890081e-06, + "loss": 17.8514, + "step": 2356 + }, + { + "epoch": 0.043084066024457565, + "grad_norm": 7.87114672882479, + "learning_rate": 9.995524301698525e-06, + "loss": 17.9316, + "step": 2357 + }, + { + "epoch": 0.04310234522090409, + "grad_norm": 6.701925047119131, + "learning_rate": 9.995511770998089e-06, + "loss": 17.5415, + "step": 2358 + }, + { + "epoch": 0.043120624417350616, + "grad_norm": 7.237738766893789, + "learning_rate": 9.99549922278882e-06, + "loss": 17.8181, + "step": 2359 + }, + { + "epoch": 0.04313890361379714, + "grad_norm": 8.366866841506923, + "learning_rate": 9.995486657070758e-06, + "loss": 18.3393, + "step": 2360 + }, + { + "epoch": 0.04315718281024366, + "grad_norm": 8.159031238202944, + "learning_rate": 9.99547407384395e-06, + "loss": 18.1373, + "step": 2361 + }, + { + "epoch": 0.04317546200669019, + "grad_norm": 9.519533536980633, + "learning_rate": 9.99546147310844e-06, + "loss": 19.0391, + "step": 2362 + }, + { + "epoch": 0.04319374120313671, + "grad_norm": 7.203080784424371, + "learning_rate": 9.995448854864267e-06, + "loss": 17.6502, + "step": 2363 + }, + { + "epoch": 0.04321202039958323, + "grad_norm": 7.785504030875423, + "learning_rate": 9.99543621911148e-06, + "loss": 18.0516, + "step": 2364 + }, + { + "epoch": 0.04323029959602976, + "grad_norm": 7.418746620233205, + "learning_rate": 9.995423565850125e-06, + "loss": 17.7921, + "step": 2365 + }, + { + "epoch": 0.04324857879247628, + "grad_norm": 6.901505175666439, + "learning_rate": 9.995410895080242e-06, + "loss": 17.5693, + "step": 2366 + }, + { + "epoch": 0.043266857988922804, + "grad_norm": 7.735162149311585, + "learning_rate": 9.995398206801878e-06, + "loss": 17.93, + "step": 2367 + }, + { + "epoch": 0.04328513718536933, + "grad_norm": 7.727478640094747, + "learning_rate": 9.995385501015079e-06, + "loss": 17.6187, + "step": 2368 + }, + { + "epoch": 0.043303416381815854, + "grad_norm": 6.75990624060066, + "learning_rate": 9.995372777719885e-06, + "loss": 17.6831, + "step": 2369 + }, + { + "epoch": 0.04332169557826238, + "grad_norm": 7.396815271837521, + "learning_rate": 9.995360036916342e-06, + "loss": 17.6261, + "step": 2370 + }, + { + "epoch": 0.043339974774708905, + "grad_norm": 8.951021034027882, + "learning_rate": 9.995347278604497e-06, + "loss": 18.623, + "step": 2371 + }, + { + "epoch": 0.043358253971155426, + "grad_norm": 7.478975138883387, + "learning_rate": 9.995334502784392e-06, + "loss": 17.8627, + "step": 2372 + }, + { + "epoch": 0.043376533167601955, + "grad_norm": 7.210516005272581, + "learning_rate": 9.995321709456074e-06, + "loss": 17.6906, + "step": 2373 + }, + { + "epoch": 0.04339481236404848, + "grad_norm": 8.089111675125123, + "learning_rate": 9.995308898619587e-06, + "loss": 17.8964, + "step": 2374 + }, + { + "epoch": 0.043413091560495, + "grad_norm": 7.221725490241234, + "learning_rate": 9.995296070274976e-06, + "loss": 17.7029, + "step": 2375 + }, + { + "epoch": 0.04343137075694153, + "grad_norm": 9.049432813964266, + "learning_rate": 9.995283224422285e-06, + "loss": 18.5432, + "step": 2376 + }, + { + "epoch": 0.04344964995338805, + "grad_norm": 8.101618835608113, + "learning_rate": 9.99527036106156e-06, + "loss": 17.9293, + "step": 2377 + }, + { + "epoch": 0.04346792914983457, + "grad_norm": 7.583140210100685, + "learning_rate": 9.995257480192845e-06, + "loss": 17.9971, + "step": 2378 + }, + { + "epoch": 0.0434862083462811, + "grad_norm": 7.069738679577622, + "learning_rate": 9.995244581816189e-06, + "loss": 17.5991, + "step": 2379 + }, + { + "epoch": 0.04350448754272762, + "grad_norm": 7.385570093641195, + "learning_rate": 9.99523166593163e-06, + "loss": 17.629, + "step": 2380 + }, + { + "epoch": 0.04352276673917414, + "grad_norm": 7.691840493197007, + "learning_rate": 9.99521873253922e-06, + "loss": 17.9091, + "step": 2381 + }, + { + "epoch": 0.04354104593562067, + "grad_norm": 8.304064472508092, + "learning_rate": 9.995205781639001e-06, + "loss": 18.22, + "step": 2382 + }, + { + "epoch": 0.04355932513206719, + "grad_norm": 8.722371398782412, + "learning_rate": 9.99519281323102e-06, + "loss": 17.6181, + "step": 2383 + }, + { + "epoch": 0.043577604328513715, + "grad_norm": 8.297329667723888, + "learning_rate": 9.995179827315321e-06, + "loss": 18.0572, + "step": 2384 + }, + { + "epoch": 0.043595883524960244, + "grad_norm": 6.547568797748973, + "learning_rate": 9.99516682389195e-06, + "loss": 17.7223, + "step": 2385 + }, + { + "epoch": 0.043614162721406766, + "grad_norm": 7.8858528862674016, + "learning_rate": 9.995153802960952e-06, + "loss": 18.2852, + "step": 2386 + }, + { + "epoch": 0.043632441917853294, + "grad_norm": 7.346611184463637, + "learning_rate": 9.995140764522377e-06, + "loss": 17.7132, + "step": 2387 + }, + { + "epoch": 0.043650721114299816, + "grad_norm": 7.232702754468648, + "learning_rate": 9.995127708576265e-06, + "loss": 17.7087, + "step": 2388 + }, + { + "epoch": 0.04366900031074634, + "grad_norm": 7.8902377192310285, + "learning_rate": 9.995114635122663e-06, + "loss": 18.1182, + "step": 2389 + }, + { + "epoch": 0.04368727950719287, + "grad_norm": 7.541585681087609, + "learning_rate": 9.995101544161617e-06, + "loss": 17.9787, + "step": 2390 + }, + { + "epoch": 0.04370555870363939, + "grad_norm": 8.022977044455, + "learning_rate": 9.995088435693174e-06, + "loss": 18.0081, + "step": 2391 + }, + { + "epoch": 0.04372383790008591, + "grad_norm": 9.649686114276376, + "learning_rate": 9.995075309717382e-06, + "loss": 18.4127, + "step": 2392 + }, + { + "epoch": 0.04374211709653244, + "grad_norm": 9.461293606285105, + "learning_rate": 9.995062166234281e-06, + "loss": 18.5204, + "step": 2393 + }, + { + "epoch": 0.04376039629297896, + "grad_norm": 7.099137168277874, + "learning_rate": 9.995049005243922e-06, + "loss": 18.0249, + "step": 2394 + }, + { + "epoch": 0.04377867548942548, + "grad_norm": 7.888544589686886, + "learning_rate": 9.995035826746351e-06, + "loss": 18.1497, + "step": 2395 + }, + { + "epoch": 0.04379695468587201, + "grad_norm": 8.461906939436192, + "learning_rate": 9.99502263074161e-06, + "loss": 17.9549, + "step": 2396 + }, + { + "epoch": 0.04381523388231853, + "grad_norm": 8.681457125286627, + "learning_rate": 9.99500941722975e-06, + "loss": 18.5861, + "step": 2397 + }, + { + "epoch": 0.043833513078765055, + "grad_norm": 7.982245776764294, + "learning_rate": 9.994996186210818e-06, + "loss": 17.9586, + "step": 2398 + }, + { + "epoch": 0.04385179227521158, + "grad_norm": 6.70762930155129, + "learning_rate": 9.994982937684854e-06, + "loss": 17.6772, + "step": 2399 + }, + { + "epoch": 0.043870071471658105, + "grad_norm": 7.625164098269424, + "learning_rate": 9.994969671651908e-06, + "loss": 18.307, + "step": 2400 + }, + { + "epoch": 0.04388835066810463, + "grad_norm": 8.025152234520164, + "learning_rate": 9.99495638811203e-06, + "loss": 17.929, + "step": 2401 + }, + { + "epoch": 0.043906629864551155, + "grad_norm": 7.188413988586524, + "learning_rate": 9.99494308706526e-06, + "loss": 17.5913, + "step": 2402 + }, + { + "epoch": 0.04392490906099768, + "grad_norm": 7.566729513834296, + "learning_rate": 9.99492976851165e-06, + "loss": 18.0366, + "step": 2403 + }, + { + "epoch": 0.043943188257444206, + "grad_norm": 7.22772701732747, + "learning_rate": 9.994916432451245e-06, + "loss": 17.6594, + "step": 2404 + }, + { + "epoch": 0.04396146745389073, + "grad_norm": 8.347832930396576, + "learning_rate": 9.99490307888409e-06, + "loss": 18.0701, + "step": 2405 + }, + { + "epoch": 0.04397974665033725, + "grad_norm": 7.283857657416851, + "learning_rate": 9.994889707810234e-06, + "loss": 17.6575, + "step": 2406 + }, + { + "epoch": 0.04399802584678378, + "grad_norm": 6.820984119408071, + "learning_rate": 9.99487631922972e-06, + "loss": 17.2445, + "step": 2407 + }, + { + "epoch": 0.0440163050432303, + "grad_norm": 7.403463314533847, + "learning_rate": 9.994862913142602e-06, + "loss": 18.2281, + "step": 2408 + }, + { + "epoch": 0.04403458423967682, + "grad_norm": 7.991456749842262, + "learning_rate": 9.994849489548921e-06, + "loss": 18.057, + "step": 2409 + }, + { + "epoch": 0.04405286343612335, + "grad_norm": 7.159303653286646, + "learning_rate": 9.994836048448726e-06, + "loss": 17.8026, + "step": 2410 + }, + { + "epoch": 0.04407114263256987, + "grad_norm": 7.207852452805044, + "learning_rate": 9.994822589842065e-06, + "loss": 18.0369, + "step": 2411 + }, + { + "epoch": 0.044089421829016394, + "grad_norm": 8.142630712018148, + "learning_rate": 9.994809113728982e-06, + "loss": 18.0258, + "step": 2412 + }, + { + "epoch": 0.04410770102546292, + "grad_norm": 7.681017370419984, + "learning_rate": 9.99479562010953e-06, + "loss": 18.1285, + "step": 2413 + }, + { + "epoch": 0.044125980221909444, + "grad_norm": 6.686508257741154, + "learning_rate": 9.994782108983749e-06, + "loss": 17.2982, + "step": 2414 + }, + { + "epoch": 0.044144259418355966, + "grad_norm": 8.305098334149148, + "learning_rate": 9.994768580351692e-06, + "loss": 18.1322, + "step": 2415 + }, + { + "epoch": 0.044162538614802495, + "grad_norm": 8.117129217177633, + "learning_rate": 9.994755034213405e-06, + "loss": 18.311, + "step": 2416 + }, + { + "epoch": 0.04418081781124902, + "grad_norm": 7.380525097184161, + "learning_rate": 9.994741470568937e-06, + "loss": 17.8848, + "step": 2417 + }, + { + "epoch": 0.04419909700769554, + "grad_norm": 7.3047276088671005, + "learning_rate": 9.99472788941833e-06, + "loss": 17.8573, + "step": 2418 + }, + { + "epoch": 0.04421737620414207, + "grad_norm": 7.392153203517438, + "learning_rate": 9.994714290761636e-06, + "loss": 17.8191, + "step": 2419 + }, + { + "epoch": 0.04423565540058859, + "grad_norm": 7.1426956309508896, + "learning_rate": 9.994700674598902e-06, + "loss": 17.4996, + "step": 2420 + }, + { + "epoch": 0.04425393459703512, + "grad_norm": 8.566850220352459, + "learning_rate": 9.994687040930176e-06, + "loss": 17.8436, + "step": 2421 + }, + { + "epoch": 0.04427221379348164, + "grad_norm": 9.009723701167259, + "learning_rate": 9.994673389755504e-06, + "loss": 18.4966, + "step": 2422 + }, + { + "epoch": 0.04429049298992816, + "grad_norm": 7.648464419628126, + "learning_rate": 9.994659721074937e-06, + "loss": 17.5574, + "step": 2423 + }, + { + "epoch": 0.04430877218637469, + "grad_norm": 6.899282785325929, + "learning_rate": 9.99464603488852e-06, + "loss": 17.5233, + "step": 2424 + }, + { + "epoch": 0.04432705138282121, + "grad_norm": 7.419090401494869, + "learning_rate": 9.994632331196303e-06, + "loss": 17.6791, + "step": 2425 + }, + { + "epoch": 0.04434533057926773, + "grad_norm": 7.856012202760776, + "learning_rate": 9.994618609998333e-06, + "loss": 17.9967, + "step": 2426 + }, + { + "epoch": 0.04436360977571426, + "grad_norm": 7.899339983429512, + "learning_rate": 9.994604871294658e-06, + "loss": 18.1975, + "step": 2427 + }, + { + "epoch": 0.044381888972160784, + "grad_norm": 7.842972356313221, + "learning_rate": 9.994591115085324e-06, + "loss": 18.1075, + "step": 2428 + }, + { + "epoch": 0.044400168168607305, + "grad_norm": 8.647233203391728, + "learning_rate": 9.994577341370384e-06, + "loss": 18.4699, + "step": 2429 + }, + { + "epoch": 0.044418447365053834, + "grad_norm": 8.191500746778265, + "learning_rate": 9.994563550149884e-06, + "loss": 18.016, + "step": 2430 + }, + { + "epoch": 0.044436726561500356, + "grad_norm": 9.147080973632784, + "learning_rate": 9.994549741423871e-06, + "loss": 18.5194, + "step": 2431 + }, + { + "epoch": 0.04445500575794688, + "grad_norm": 7.428488346981479, + "learning_rate": 9.994535915192396e-06, + "loss": 17.7912, + "step": 2432 + }, + { + "epoch": 0.044473284954393406, + "grad_norm": 7.011024965235697, + "learning_rate": 9.994522071455505e-06, + "loss": 17.6894, + "step": 2433 + }, + { + "epoch": 0.04449156415083993, + "grad_norm": 8.19376074623173, + "learning_rate": 9.994508210213248e-06, + "loss": 17.7326, + "step": 2434 + }, + { + "epoch": 0.04450984334728645, + "grad_norm": 7.26414722027003, + "learning_rate": 9.994494331465672e-06, + "loss": 17.8731, + "step": 2435 + }, + { + "epoch": 0.04452812254373298, + "grad_norm": 7.258459064949432, + "learning_rate": 9.994480435212827e-06, + "loss": 17.8062, + "step": 2436 + }, + { + "epoch": 0.0445464017401795, + "grad_norm": 6.059524784370213, + "learning_rate": 9.994466521454762e-06, + "loss": 17.1452, + "step": 2437 + }, + { + "epoch": 0.04456468093662603, + "grad_norm": 6.39379233735651, + "learning_rate": 9.994452590191525e-06, + "loss": 17.6071, + "step": 2438 + }, + { + "epoch": 0.04458296013307255, + "grad_norm": 7.5348896456735615, + "learning_rate": 9.994438641423166e-06, + "loss": 18.2033, + "step": 2439 + }, + { + "epoch": 0.04460123932951907, + "grad_norm": 6.765819290377159, + "learning_rate": 9.994424675149733e-06, + "loss": 17.7391, + "step": 2440 + }, + { + "epoch": 0.0446195185259656, + "grad_norm": 8.045367878632184, + "learning_rate": 9.994410691371274e-06, + "loss": 18.264, + "step": 2441 + }, + { + "epoch": 0.04463779772241212, + "grad_norm": 8.450665323988279, + "learning_rate": 9.99439669008784e-06, + "loss": 17.9337, + "step": 2442 + }, + { + "epoch": 0.044656076918858645, + "grad_norm": 7.566173010961893, + "learning_rate": 9.994382671299477e-06, + "loss": 17.8623, + "step": 2443 + }, + { + "epoch": 0.044674356115305174, + "grad_norm": 7.130095907079948, + "learning_rate": 9.994368635006238e-06, + "loss": 17.78, + "step": 2444 + }, + { + "epoch": 0.044692635311751695, + "grad_norm": 7.27100666984323, + "learning_rate": 9.99435458120817e-06, + "loss": 17.8144, + "step": 2445 + }, + { + "epoch": 0.04471091450819822, + "grad_norm": 7.489837115950441, + "learning_rate": 9.994340509905321e-06, + "loss": 18.0407, + "step": 2446 + }, + { + "epoch": 0.044729193704644746, + "grad_norm": 7.612045002891485, + "learning_rate": 9.994326421097744e-06, + "loss": 18.1473, + "step": 2447 + }, + { + "epoch": 0.04474747290109127, + "grad_norm": 7.642973655488191, + "learning_rate": 9.994312314785486e-06, + "loss": 18.1447, + "step": 2448 + }, + { + "epoch": 0.04476575209753779, + "grad_norm": 6.7859223007249705, + "learning_rate": 9.994298190968595e-06, + "loss": 17.3404, + "step": 2449 + }, + { + "epoch": 0.04478403129398432, + "grad_norm": 8.558894441486864, + "learning_rate": 9.994284049647123e-06, + "loss": 18.4606, + "step": 2450 + }, + { + "epoch": 0.04480231049043084, + "grad_norm": 7.807862136108618, + "learning_rate": 9.99426989082112e-06, + "loss": 18.0718, + "step": 2451 + }, + { + "epoch": 0.04482058968687736, + "grad_norm": 7.627670044603417, + "learning_rate": 9.994255714490633e-06, + "loss": 17.7779, + "step": 2452 + }, + { + "epoch": 0.04483886888332389, + "grad_norm": 7.095676428920803, + "learning_rate": 9.994241520655713e-06, + "loss": 17.7749, + "step": 2453 + }, + { + "epoch": 0.04485714807977041, + "grad_norm": 8.826666879389675, + "learning_rate": 9.99422730931641e-06, + "loss": 18.3998, + "step": 2454 + }, + { + "epoch": 0.04487542727621694, + "grad_norm": 7.622749814638532, + "learning_rate": 9.994213080472776e-06, + "loss": 17.8817, + "step": 2455 + }, + { + "epoch": 0.04489370647266346, + "grad_norm": 7.67749884483121, + "learning_rate": 9.994198834124856e-06, + "loss": 18.1708, + "step": 2456 + }, + { + "epoch": 0.044911985669109984, + "grad_norm": 7.882270641978379, + "learning_rate": 9.994184570272704e-06, + "loss": 18.1039, + "step": 2457 + }, + { + "epoch": 0.04493026486555651, + "grad_norm": 7.53739610912295, + "learning_rate": 9.994170288916367e-06, + "loss": 17.9699, + "step": 2458 + }, + { + "epoch": 0.044948544062003035, + "grad_norm": 7.929458391148995, + "learning_rate": 9.994155990055897e-06, + "loss": 18.0522, + "step": 2459 + }, + { + "epoch": 0.044966823258449556, + "grad_norm": 8.150928372404985, + "learning_rate": 9.994141673691345e-06, + "loss": 18.3842, + "step": 2460 + }, + { + "epoch": 0.044985102454896085, + "grad_norm": 8.197250165025617, + "learning_rate": 9.994127339822756e-06, + "loss": 18.397, + "step": 2461 + }, + { + "epoch": 0.04500338165134261, + "grad_norm": 7.0408254789572275, + "learning_rate": 9.99411298845019e-06, + "loss": 17.6103, + "step": 2462 + }, + { + "epoch": 0.04502166084778913, + "grad_norm": 9.306036505095154, + "learning_rate": 9.994098619573687e-06, + "loss": 18.6617, + "step": 2463 + }, + { + "epoch": 0.04503994004423566, + "grad_norm": 8.719533738544047, + "learning_rate": 9.994084233193303e-06, + "loss": 18.2127, + "step": 2464 + }, + { + "epoch": 0.04505821924068218, + "grad_norm": 7.993337299543475, + "learning_rate": 9.994069829309086e-06, + "loss": 17.7749, + "step": 2465 + }, + { + "epoch": 0.0450764984371287, + "grad_norm": 8.786320469873472, + "learning_rate": 9.994055407921088e-06, + "loss": 18.265, + "step": 2466 + }, + { + "epoch": 0.04509477763357523, + "grad_norm": 8.088655571473474, + "learning_rate": 9.99404096902936e-06, + "loss": 18.0567, + "step": 2467 + }, + { + "epoch": 0.04511305683002175, + "grad_norm": 10.785255980731446, + "learning_rate": 9.99402651263395e-06, + "loss": 18.8973, + "step": 2468 + }, + { + "epoch": 0.04513133602646827, + "grad_norm": 6.947880353235863, + "learning_rate": 9.994012038734912e-06, + "loss": 17.7813, + "step": 2469 + }, + { + "epoch": 0.0451496152229148, + "grad_norm": 8.40840965315626, + "learning_rate": 9.993997547332295e-06, + "loss": 18.0729, + "step": 2470 + }, + { + "epoch": 0.045167894419361324, + "grad_norm": 7.141559747847017, + "learning_rate": 9.993983038426149e-06, + "loss": 17.7627, + "step": 2471 + }, + { + "epoch": 0.04518617361580785, + "grad_norm": 8.083598178975093, + "learning_rate": 9.993968512016528e-06, + "loss": 18.0206, + "step": 2472 + }, + { + "epoch": 0.045204452812254374, + "grad_norm": 7.941864410688481, + "learning_rate": 9.993953968103479e-06, + "loss": 17.7503, + "step": 2473 + }, + { + "epoch": 0.045222732008700896, + "grad_norm": 8.276880607497981, + "learning_rate": 9.993939406687055e-06, + "loss": 18.1408, + "step": 2474 + }, + { + "epoch": 0.045241011205147424, + "grad_norm": 7.494499518134069, + "learning_rate": 9.993924827767306e-06, + "loss": 17.717, + "step": 2475 + }, + { + "epoch": 0.045259290401593946, + "grad_norm": 6.279519489638303, + "learning_rate": 9.993910231344286e-06, + "loss": 17.3121, + "step": 2476 + }, + { + "epoch": 0.04527756959804047, + "grad_norm": 6.10448976291681, + "learning_rate": 9.993895617418042e-06, + "loss": 17.2826, + "step": 2477 + }, + { + "epoch": 0.045295848794487, + "grad_norm": 8.485248276703604, + "learning_rate": 9.993880985988629e-06, + "loss": 18.2969, + "step": 2478 + }, + { + "epoch": 0.04531412799093352, + "grad_norm": 6.956430929645586, + "learning_rate": 9.993866337056095e-06, + "loss": 17.6805, + "step": 2479 + }, + { + "epoch": 0.04533240718738004, + "grad_norm": 7.577875930364536, + "learning_rate": 9.993851670620494e-06, + "loss": 17.957, + "step": 2480 + }, + { + "epoch": 0.04535068638382657, + "grad_norm": 7.293365459040691, + "learning_rate": 9.993836986681876e-06, + "loss": 17.8977, + "step": 2481 + }, + { + "epoch": 0.04536896558027309, + "grad_norm": 10.212845440385083, + "learning_rate": 9.99382228524029e-06, + "loss": 19.0827, + "step": 2482 + }, + { + "epoch": 0.04538724477671961, + "grad_norm": 6.5640105097470505, + "learning_rate": 9.993807566295793e-06, + "loss": 17.4783, + "step": 2483 + }, + { + "epoch": 0.04540552397316614, + "grad_norm": 7.88718427575359, + "learning_rate": 9.993792829848433e-06, + "loss": 17.909, + "step": 2484 + }, + { + "epoch": 0.04542380316961266, + "grad_norm": 7.313171341929883, + "learning_rate": 9.993778075898262e-06, + "loss": 17.6938, + "step": 2485 + }, + { + "epoch": 0.045442082366059185, + "grad_norm": 9.18976380206171, + "learning_rate": 9.993763304445335e-06, + "loss": 18.4729, + "step": 2486 + }, + { + "epoch": 0.04546036156250571, + "grad_norm": 7.671585186971003, + "learning_rate": 9.993748515489698e-06, + "loss": 17.5632, + "step": 2487 + }, + { + "epoch": 0.045478640758952235, + "grad_norm": 7.396646593191951, + "learning_rate": 9.993733709031406e-06, + "loss": 18.0158, + "step": 2488 + }, + { + "epoch": 0.045496919955398764, + "grad_norm": 7.30338313789073, + "learning_rate": 9.993718885070512e-06, + "loss": 17.8184, + "step": 2489 + }, + { + "epoch": 0.045515199151845286, + "grad_norm": 8.156573329646484, + "learning_rate": 9.993704043607065e-06, + "loss": 18.0749, + "step": 2490 + }, + { + "epoch": 0.04553347834829181, + "grad_norm": 7.444852773785196, + "learning_rate": 9.99368918464112e-06, + "loss": 18.1151, + "step": 2491 + }, + { + "epoch": 0.045551757544738336, + "grad_norm": 8.574863713539944, + "learning_rate": 9.993674308172727e-06, + "loss": 18.6178, + "step": 2492 + }, + { + "epoch": 0.04557003674118486, + "grad_norm": 9.383076273075883, + "learning_rate": 9.993659414201939e-06, + "loss": 18.8024, + "step": 2493 + }, + { + "epoch": 0.04558831593763138, + "grad_norm": 7.58726551821705, + "learning_rate": 9.993644502728809e-06, + "loss": 17.6787, + "step": 2494 + }, + { + "epoch": 0.04560659513407791, + "grad_norm": 6.9906904839724175, + "learning_rate": 9.993629573753387e-06, + "loss": 17.6734, + "step": 2495 + }, + { + "epoch": 0.04562487433052443, + "grad_norm": 7.602778161713747, + "learning_rate": 9.993614627275728e-06, + "loss": 18.0471, + "step": 2496 + }, + { + "epoch": 0.04564315352697095, + "grad_norm": 6.743092129784845, + "learning_rate": 9.993599663295883e-06, + "loss": 17.7301, + "step": 2497 + }, + { + "epoch": 0.04566143272341748, + "grad_norm": 6.698977437278408, + "learning_rate": 9.993584681813904e-06, + "loss": 17.4601, + "step": 2498 + }, + { + "epoch": 0.045679711919864, + "grad_norm": 8.453281591316307, + "learning_rate": 9.993569682829843e-06, + "loss": 18.4345, + "step": 2499 + }, + { + "epoch": 0.045697991116310524, + "grad_norm": 10.112200193544616, + "learning_rate": 9.993554666343755e-06, + "loss": 18.6083, + "step": 2500 + }, + { + "epoch": 0.04571627031275705, + "grad_norm": 7.121410796053459, + "learning_rate": 9.99353963235569e-06, + "loss": 17.675, + "step": 2501 + }, + { + "epoch": 0.045734549509203574, + "grad_norm": 7.625019759673931, + "learning_rate": 9.993524580865704e-06, + "loss": 17.769, + "step": 2502 + }, + { + "epoch": 0.045752828705650096, + "grad_norm": 10.04571223137181, + "learning_rate": 9.993509511873845e-06, + "loss": 18.951, + "step": 2503 + }, + { + "epoch": 0.045771107902096625, + "grad_norm": 8.376891770038641, + "learning_rate": 9.99349442538017e-06, + "loss": 18.0827, + "step": 2504 + }, + { + "epoch": 0.04578938709854315, + "grad_norm": 7.862265599796088, + "learning_rate": 9.99347932138473e-06, + "loss": 17.9922, + "step": 2505 + }, + { + "epoch": 0.045807666294989675, + "grad_norm": 8.124258938135004, + "learning_rate": 9.993464199887578e-06, + "loss": 18.638, + "step": 2506 + }, + { + "epoch": 0.0458259454914362, + "grad_norm": 7.022653855067991, + "learning_rate": 9.993449060888768e-06, + "loss": 17.6937, + "step": 2507 + }, + { + "epoch": 0.04584422468788272, + "grad_norm": 6.996402976687276, + "learning_rate": 9.993433904388352e-06, + "loss": 17.3463, + "step": 2508 + }, + { + "epoch": 0.04586250388432925, + "grad_norm": 7.98813478789894, + "learning_rate": 9.993418730386384e-06, + "loss": 17.7135, + "step": 2509 + }, + { + "epoch": 0.04588078308077577, + "grad_norm": 7.056808940985014, + "learning_rate": 9.993403538882915e-06, + "loss": 17.8142, + "step": 2510 + }, + { + "epoch": 0.04589906227722229, + "grad_norm": 8.740012775396496, + "learning_rate": 9.993388329878002e-06, + "loss": 18.1227, + "step": 2511 + }, + { + "epoch": 0.04591734147366882, + "grad_norm": 8.882864774096744, + "learning_rate": 9.993373103371695e-06, + "loss": 18.1953, + "step": 2512 + }, + { + "epoch": 0.04593562067011534, + "grad_norm": 7.958453931071923, + "learning_rate": 9.993357859364048e-06, + "loss": 17.9278, + "step": 2513 + }, + { + "epoch": 0.04595389986656186, + "grad_norm": 7.776292641995152, + "learning_rate": 9.993342597855117e-06, + "loss": 18.2712, + "step": 2514 + }, + { + "epoch": 0.04597217906300839, + "grad_norm": 8.292059225346584, + "learning_rate": 9.993327318844952e-06, + "loss": 18.0476, + "step": 2515 + }, + { + "epoch": 0.045990458259454914, + "grad_norm": 8.386415553531005, + "learning_rate": 9.993312022333608e-06, + "loss": 18.35, + "step": 2516 + }, + { + "epoch": 0.046008737455901436, + "grad_norm": 8.045486169098941, + "learning_rate": 9.99329670832114e-06, + "loss": 18.2319, + "step": 2517 + }, + { + "epoch": 0.046027016652347964, + "grad_norm": 9.232407530293376, + "learning_rate": 9.993281376807598e-06, + "loss": 18.4699, + "step": 2518 + }, + { + "epoch": 0.046045295848794486, + "grad_norm": 7.8520732123053225, + "learning_rate": 9.99326602779304e-06, + "loss": 18.191, + "step": 2519 + }, + { + "epoch": 0.04606357504524101, + "grad_norm": 7.16087588755416, + "learning_rate": 9.993250661277519e-06, + "loss": 17.6772, + "step": 2520 + }, + { + "epoch": 0.046081854241687537, + "grad_norm": 8.187990817383524, + "learning_rate": 9.993235277261084e-06, + "loss": 18.0416, + "step": 2521 + }, + { + "epoch": 0.04610013343813406, + "grad_norm": 7.291193162948097, + "learning_rate": 9.993219875743796e-06, + "loss": 17.7921, + "step": 2522 + }, + { + "epoch": 0.04611841263458059, + "grad_norm": 7.269722958984533, + "learning_rate": 9.993204456725703e-06, + "loss": 17.7402, + "step": 2523 + }, + { + "epoch": 0.04613669183102711, + "grad_norm": 9.39702523786115, + "learning_rate": 9.993189020206863e-06, + "loss": 18.7017, + "step": 2524 + }, + { + "epoch": 0.04615497102747363, + "grad_norm": 8.422467668553633, + "learning_rate": 9.99317356618733e-06, + "loss": 18.2036, + "step": 2525 + }, + { + "epoch": 0.04617325022392016, + "grad_norm": 7.692280567456548, + "learning_rate": 9.993158094667155e-06, + "loss": 17.9819, + "step": 2526 + }, + { + "epoch": 0.04619152942036668, + "grad_norm": 8.8049564707133, + "learning_rate": 9.993142605646395e-06, + "loss": 18.1562, + "step": 2527 + }, + { + "epoch": 0.0462098086168132, + "grad_norm": 7.517125465847618, + "learning_rate": 9.993127099125104e-06, + "loss": 17.8768, + "step": 2528 + }, + { + "epoch": 0.04622808781325973, + "grad_norm": 7.10833132981136, + "learning_rate": 9.993111575103334e-06, + "loss": 17.7035, + "step": 2529 + }, + { + "epoch": 0.04624636700970625, + "grad_norm": 8.616302291529108, + "learning_rate": 9.993096033581143e-06, + "loss": 18.3784, + "step": 2530 + }, + { + "epoch": 0.046264646206152775, + "grad_norm": 8.768140196492558, + "learning_rate": 9.993080474558584e-06, + "loss": 18.3982, + "step": 2531 + }, + { + "epoch": 0.046282925402599304, + "grad_norm": 8.069596110815922, + "learning_rate": 9.993064898035709e-06, + "loss": 18.495, + "step": 2532 + }, + { + "epoch": 0.046301204599045825, + "grad_norm": 6.436102127592379, + "learning_rate": 9.993049304012576e-06, + "loss": 17.221, + "step": 2533 + }, + { + "epoch": 0.04631948379549235, + "grad_norm": 9.965611577331051, + "learning_rate": 9.993033692489239e-06, + "loss": 19.1679, + "step": 2534 + }, + { + "epoch": 0.046337762991938876, + "grad_norm": 8.699977371157395, + "learning_rate": 9.993018063465751e-06, + "loss": 18.0354, + "step": 2535 + }, + { + "epoch": 0.0463560421883854, + "grad_norm": 7.766489544229284, + "learning_rate": 9.993002416942171e-06, + "loss": 18.1615, + "step": 2536 + }, + { + "epoch": 0.04637432138483192, + "grad_norm": 7.966456947663706, + "learning_rate": 9.992986752918548e-06, + "loss": 18.0083, + "step": 2537 + }, + { + "epoch": 0.04639260058127845, + "grad_norm": 7.8614001915835265, + "learning_rate": 9.99297107139494e-06, + "loss": 18.0716, + "step": 2538 + }, + { + "epoch": 0.04641087977772497, + "grad_norm": 7.738265008664061, + "learning_rate": 9.9929553723714e-06, + "loss": 17.7186, + "step": 2539 + }, + { + "epoch": 0.0464291589741715, + "grad_norm": 7.755710041382345, + "learning_rate": 9.992939655847988e-06, + "loss": 17.9862, + "step": 2540 + }, + { + "epoch": 0.04644743817061802, + "grad_norm": 7.007740920005631, + "learning_rate": 9.992923921824754e-06, + "loss": 17.6481, + "step": 2541 + }, + { + "epoch": 0.04646571736706454, + "grad_norm": 7.8676222835982506, + "learning_rate": 9.992908170301755e-06, + "loss": 17.9741, + "step": 2542 + }, + { + "epoch": 0.04648399656351107, + "grad_norm": 8.11514801090296, + "learning_rate": 9.992892401279047e-06, + "loss": 18.0688, + "step": 2543 + }, + { + "epoch": 0.04650227575995759, + "grad_norm": 7.784713886404579, + "learning_rate": 9.992876614756684e-06, + "loss": 18.4181, + "step": 2544 + }, + { + "epoch": 0.046520554956404114, + "grad_norm": 7.55659106472757, + "learning_rate": 9.992860810734722e-06, + "loss": 17.849, + "step": 2545 + }, + { + "epoch": 0.04653883415285064, + "grad_norm": 8.141532762416936, + "learning_rate": 9.992844989213213e-06, + "loss": 18.0762, + "step": 2546 + }, + { + "epoch": 0.046557113349297165, + "grad_norm": 7.9981572386877495, + "learning_rate": 9.99282915019222e-06, + "loss": 18.0064, + "step": 2547 + }, + { + "epoch": 0.04657539254574369, + "grad_norm": 5.807312095679033, + "learning_rate": 9.992813293671792e-06, + "loss": 17.2469, + "step": 2548 + }, + { + "epoch": 0.046593671742190215, + "grad_norm": 8.14120649526761, + "learning_rate": 9.992797419651988e-06, + "loss": 18.2199, + "step": 2549 + }, + { + "epoch": 0.04661195093863674, + "grad_norm": 7.693158337588116, + "learning_rate": 9.99278152813286e-06, + "loss": 17.988, + "step": 2550 + }, + { + "epoch": 0.04663023013508326, + "grad_norm": 8.492286261870182, + "learning_rate": 9.992765619114467e-06, + "loss": 18.3276, + "step": 2551 + }, + { + "epoch": 0.04664850933152979, + "grad_norm": 7.146893703259297, + "learning_rate": 9.992749692596863e-06, + "loss": 17.8245, + "step": 2552 + }, + { + "epoch": 0.04666678852797631, + "grad_norm": 7.384159350830155, + "learning_rate": 9.992733748580108e-06, + "loss": 17.9852, + "step": 2553 + }, + { + "epoch": 0.04668506772442283, + "grad_norm": 8.624144394164366, + "learning_rate": 9.99271778706425e-06, + "loss": 17.6288, + "step": 2554 + }, + { + "epoch": 0.04670334692086936, + "grad_norm": 7.930266695448252, + "learning_rate": 9.992701808049351e-06, + "loss": 18.2687, + "step": 2555 + }, + { + "epoch": 0.04672162611731588, + "grad_norm": 7.435136965216283, + "learning_rate": 9.992685811535467e-06, + "loss": 17.8033, + "step": 2556 + }, + { + "epoch": 0.04673990531376241, + "grad_norm": 7.599543264066056, + "learning_rate": 9.992669797522652e-06, + "loss": 17.7481, + "step": 2557 + }, + { + "epoch": 0.04675818451020893, + "grad_norm": 8.429798985553415, + "learning_rate": 9.992653766010961e-06, + "loss": 18.4653, + "step": 2558 + }, + { + "epoch": 0.046776463706655454, + "grad_norm": 8.076173987447698, + "learning_rate": 9.992637717000453e-06, + "loss": 18.2065, + "step": 2559 + }, + { + "epoch": 0.04679474290310198, + "grad_norm": 8.781536678777618, + "learning_rate": 9.992621650491184e-06, + "loss": 18.5883, + "step": 2560 + }, + { + "epoch": 0.046813022099548504, + "grad_norm": 7.444958861920365, + "learning_rate": 9.992605566483206e-06, + "loss": 17.9761, + "step": 2561 + }, + { + "epoch": 0.046831301295995026, + "grad_norm": 8.423244032545847, + "learning_rate": 9.992589464976581e-06, + "loss": 18.1584, + "step": 2562 + }, + { + "epoch": 0.046849580492441555, + "grad_norm": 7.3249632914321054, + "learning_rate": 9.992573345971364e-06, + "loss": 18.007, + "step": 2563 + }, + { + "epoch": 0.046867859688888076, + "grad_norm": 8.690883457836394, + "learning_rate": 9.99255720946761e-06, + "loss": 18.4969, + "step": 2564 + }, + { + "epoch": 0.0468861388853346, + "grad_norm": 8.766408683723455, + "learning_rate": 9.992541055465374e-06, + "loss": 18.6329, + "step": 2565 + }, + { + "epoch": 0.04690441808178113, + "grad_norm": 7.732559015449722, + "learning_rate": 9.992524883964717e-06, + "loss": 18.4675, + "step": 2566 + }, + { + "epoch": 0.04692269727822765, + "grad_norm": 11.206808671649624, + "learning_rate": 9.992508694965695e-06, + "loss": 19.42, + "step": 2567 + }, + { + "epoch": 0.04694097647467417, + "grad_norm": 6.774508301080423, + "learning_rate": 9.99249248846836e-06, + "loss": 17.5349, + "step": 2568 + }, + { + "epoch": 0.0469592556711207, + "grad_norm": 7.27514549721159, + "learning_rate": 9.992476264472774e-06, + "loss": 17.8747, + "step": 2569 + }, + { + "epoch": 0.04697753486756722, + "grad_norm": 7.423724748051973, + "learning_rate": 9.992460022978991e-06, + "loss": 17.8545, + "step": 2570 + }, + { + "epoch": 0.04699581406401374, + "grad_norm": 8.096889005274987, + "learning_rate": 9.99244376398707e-06, + "loss": 18.4889, + "step": 2571 + }, + { + "epoch": 0.04701409326046027, + "grad_norm": 7.674387346092054, + "learning_rate": 9.992427487497065e-06, + "loss": 18.1159, + "step": 2572 + }, + { + "epoch": 0.04703237245690679, + "grad_norm": 7.788396641401589, + "learning_rate": 9.992411193509037e-06, + "loss": 17.8568, + "step": 2573 + }, + { + "epoch": 0.04705065165335332, + "grad_norm": 7.8762417225446395, + "learning_rate": 9.99239488202304e-06, + "loss": 18.0088, + "step": 2574 + }, + { + "epoch": 0.047068930849799843, + "grad_norm": 7.238786197429148, + "learning_rate": 9.992378553039132e-06, + "loss": 17.8319, + "step": 2575 + }, + { + "epoch": 0.047087210046246365, + "grad_norm": 7.068213361844402, + "learning_rate": 9.99236220655737e-06, + "loss": 17.73, + "step": 2576 + }, + { + "epoch": 0.047105489242692894, + "grad_norm": 8.780673471941459, + "learning_rate": 9.992345842577814e-06, + "loss": 18.3554, + "step": 2577 + }, + { + "epoch": 0.047123768439139416, + "grad_norm": 8.159953183875249, + "learning_rate": 9.992329461100517e-06, + "loss": 17.8448, + "step": 2578 + }, + { + "epoch": 0.04714204763558594, + "grad_norm": 7.723023562155313, + "learning_rate": 9.992313062125538e-06, + "loss": 17.7103, + "step": 2579 + }, + { + "epoch": 0.047160326832032466, + "grad_norm": 7.616277240305632, + "learning_rate": 9.992296645652934e-06, + "loss": 17.9065, + "step": 2580 + }, + { + "epoch": 0.04717860602847899, + "grad_norm": 7.571498465607509, + "learning_rate": 9.992280211682766e-06, + "loss": 17.9898, + "step": 2581 + }, + { + "epoch": 0.04719688522492551, + "grad_norm": 9.024191217366022, + "learning_rate": 9.992263760215089e-06, + "loss": 18.402, + "step": 2582 + }, + { + "epoch": 0.04721516442137204, + "grad_norm": 7.7871098173690125, + "learning_rate": 9.992247291249959e-06, + "loss": 18.2878, + "step": 2583 + }, + { + "epoch": 0.04723344361781856, + "grad_norm": 8.830236557374969, + "learning_rate": 9.992230804787438e-06, + "loss": 17.8729, + "step": 2584 + }, + { + "epoch": 0.04725172281426508, + "grad_norm": 7.122543761547274, + "learning_rate": 9.992214300827579e-06, + "loss": 17.8097, + "step": 2585 + }, + { + "epoch": 0.04727000201071161, + "grad_norm": 7.615968526497754, + "learning_rate": 9.992197779370443e-06, + "loss": 17.6749, + "step": 2586 + }, + { + "epoch": 0.04728828120715813, + "grad_norm": 6.58962357922287, + "learning_rate": 9.992181240416086e-06, + "loss": 17.2602, + "step": 2587 + }, + { + "epoch": 0.047306560403604654, + "grad_norm": 7.1704096445101335, + "learning_rate": 9.992164683964567e-06, + "loss": 17.594, + "step": 2588 + }, + { + "epoch": 0.04732483960005118, + "grad_norm": 7.340001869666006, + "learning_rate": 9.992148110015944e-06, + "loss": 17.9317, + "step": 2589 + }, + { + "epoch": 0.047343118796497705, + "grad_norm": 7.094104168318126, + "learning_rate": 9.992131518570276e-06, + "loss": 17.5242, + "step": 2590 + }, + { + "epoch": 0.04736139799294423, + "grad_norm": 8.661227238066143, + "learning_rate": 9.99211490962762e-06, + "loss": 18.1788, + "step": 2591 + }, + { + "epoch": 0.047379677189390755, + "grad_norm": 9.758540631505968, + "learning_rate": 9.992098283188034e-06, + "loss": 18.3244, + "step": 2592 + }, + { + "epoch": 0.04739795638583728, + "grad_norm": 7.213278668106901, + "learning_rate": 9.992081639251578e-06, + "loss": 17.8225, + "step": 2593 + }, + { + "epoch": 0.047416235582283806, + "grad_norm": 7.203090426209105, + "learning_rate": 9.992064977818307e-06, + "loss": 17.7184, + "step": 2594 + }, + { + "epoch": 0.04743451477873033, + "grad_norm": 7.4925183784994065, + "learning_rate": 9.992048298888284e-06, + "loss": 17.9008, + "step": 2595 + }, + { + "epoch": 0.04745279397517685, + "grad_norm": 7.907230747099184, + "learning_rate": 9.992031602461562e-06, + "loss": 18.0021, + "step": 2596 + }, + { + "epoch": 0.04747107317162338, + "grad_norm": 7.467349716623535, + "learning_rate": 9.992014888538204e-06, + "loss": 17.9413, + "step": 2597 + }, + { + "epoch": 0.0474893523680699, + "grad_norm": 7.353131603693356, + "learning_rate": 9.991998157118267e-06, + "loss": 17.9257, + "step": 2598 + }, + { + "epoch": 0.04750763156451642, + "grad_norm": 8.172457199217725, + "learning_rate": 9.99198140820181e-06, + "loss": 18.531, + "step": 2599 + }, + { + "epoch": 0.04752591076096295, + "grad_norm": 7.448640737510543, + "learning_rate": 9.991964641788892e-06, + "loss": 17.7291, + "step": 2600 + }, + { + "epoch": 0.04754418995740947, + "grad_norm": 7.752089224294541, + "learning_rate": 9.991947857879569e-06, + "loss": 18.0091, + "step": 2601 + }, + { + "epoch": 0.047562469153855993, + "grad_norm": 8.286655052760677, + "learning_rate": 9.991931056473902e-06, + "loss": 18.1219, + "step": 2602 + }, + { + "epoch": 0.04758074835030252, + "grad_norm": 7.051573843177877, + "learning_rate": 9.991914237571952e-06, + "loss": 17.877, + "step": 2603 + }, + { + "epoch": 0.047599027546749044, + "grad_norm": 7.29651890949779, + "learning_rate": 9.991897401173774e-06, + "loss": 17.7525, + "step": 2604 + }, + { + "epoch": 0.047617306743195566, + "grad_norm": 6.765985640866973, + "learning_rate": 9.99188054727943e-06, + "loss": 17.6648, + "step": 2605 + }, + { + "epoch": 0.047635585939642094, + "grad_norm": 8.575109016142505, + "learning_rate": 9.991863675888977e-06, + "loss": 18.3309, + "step": 2606 + }, + { + "epoch": 0.047653865136088616, + "grad_norm": 8.388452131734812, + "learning_rate": 9.991846787002474e-06, + "loss": 18.1312, + "step": 2607 + }, + { + "epoch": 0.047672144332535145, + "grad_norm": 8.464403390656695, + "learning_rate": 9.991829880619983e-06, + "loss": 18.205, + "step": 2608 + }, + { + "epoch": 0.04769042352898167, + "grad_norm": 7.73207336863152, + "learning_rate": 9.99181295674156e-06, + "loss": 18.0825, + "step": 2609 + }, + { + "epoch": 0.04770870272542819, + "grad_norm": 7.234080259893069, + "learning_rate": 9.991796015367267e-06, + "loss": 17.675, + "step": 2610 + }, + { + "epoch": 0.04772698192187472, + "grad_norm": 8.487742452379393, + "learning_rate": 9.991779056497161e-06, + "loss": 18.1511, + "step": 2611 + }, + { + "epoch": 0.04774526111832124, + "grad_norm": 9.64341880092905, + "learning_rate": 9.991762080131305e-06, + "loss": 18.5887, + "step": 2612 + }, + { + "epoch": 0.04776354031476776, + "grad_norm": 6.820414549309318, + "learning_rate": 9.991745086269753e-06, + "loss": 17.8486, + "step": 2613 + }, + { + "epoch": 0.04778181951121429, + "grad_norm": 7.205085987664576, + "learning_rate": 9.99172807491257e-06, + "loss": 18.1047, + "step": 2614 + }, + { + "epoch": 0.04780009870766081, + "grad_norm": 8.163994358227953, + "learning_rate": 9.991711046059812e-06, + "loss": 18.2374, + "step": 2615 + }, + { + "epoch": 0.04781837790410733, + "grad_norm": 8.120598451868254, + "learning_rate": 9.991693999711541e-06, + "loss": 17.9739, + "step": 2616 + }, + { + "epoch": 0.04783665710055386, + "grad_norm": 8.265683503522936, + "learning_rate": 9.991676935867813e-06, + "loss": 18.0476, + "step": 2617 + }, + { + "epoch": 0.04785493629700038, + "grad_norm": 8.903684856768098, + "learning_rate": 9.991659854528693e-06, + "loss": 18.1277, + "step": 2618 + }, + { + "epoch": 0.047873215493446905, + "grad_norm": 7.700767221730183, + "learning_rate": 9.991642755694238e-06, + "loss": 17.9356, + "step": 2619 + }, + { + "epoch": 0.047891494689893434, + "grad_norm": 8.101046234456692, + "learning_rate": 9.991625639364508e-06, + "loss": 18.0207, + "step": 2620 + }, + { + "epoch": 0.047909773886339956, + "grad_norm": 7.70355085855746, + "learning_rate": 9.991608505539563e-06, + "loss": 18.1949, + "step": 2621 + }, + { + "epoch": 0.04792805308278648, + "grad_norm": 7.208674369499795, + "learning_rate": 9.991591354219463e-06, + "loss": 17.9747, + "step": 2622 + }, + { + "epoch": 0.047946332279233006, + "grad_norm": 7.9057625499185615, + "learning_rate": 9.991574185404269e-06, + "loss": 18.1491, + "step": 2623 + }, + { + "epoch": 0.04796461147567953, + "grad_norm": 8.137422512066872, + "learning_rate": 9.99155699909404e-06, + "loss": 17.7673, + "step": 2624 + }, + { + "epoch": 0.047982890672126056, + "grad_norm": 6.801004677122794, + "learning_rate": 9.991539795288839e-06, + "loss": 17.7284, + "step": 2625 + }, + { + "epoch": 0.04800116986857258, + "grad_norm": 7.036059525082331, + "learning_rate": 9.991522573988723e-06, + "loss": 17.792, + "step": 2626 + }, + { + "epoch": 0.0480194490650191, + "grad_norm": 6.357666108609581, + "learning_rate": 9.991505335193752e-06, + "loss": 17.4053, + "step": 2627 + }, + { + "epoch": 0.04803772826146563, + "grad_norm": 6.981203675681132, + "learning_rate": 9.99148807890399e-06, + "loss": 17.5025, + "step": 2628 + }, + { + "epoch": 0.04805600745791215, + "grad_norm": 7.709372655406259, + "learning_rate": 9.991470805119494e-06, + "loss": 18.0022, + "step": 2629 + }, + { + "epoch": 0.04807428665435867, + "grad_norm": 8.657897652446362, + "learning_rate": 9.991453513840327e-06, + "loss": 18.302, + "step": 2630 + }, + { + "epoch": 0.0480925658508052, + "grad_norm": 8.339666057920908, + "learning_rate": 9.991436205066549e-06, + "loss": 18.4404, + "step": 2631 + }, + { + "epoch": 0.04811084504725172, + "grad_norm": 8.137596756990114, + "learning_rate": 9.991418878798219e-06, + "loss": 18.1143, + "step": 2632 + }, + { + "epoch": 0.048129124243698244, + "grad_norm": 7.829696040410456, + "learning_rate": 9.991401535035398e-06, + "loss": 17.6556, + "step": 2633 + }, + { + "epoch": 0.04814740344014477, + "grad_norm": 7.93700230501803, + "learning_rate": 9.99138417377815e-06, + "loss": 17.9485, + "step": 2634 + }, + { + "epoch": 0.048165682636591295, + "grad_norm": 7.596326532724841, + "learning_rate": 9.991366795026533e-06, + "loss": 17.8489, + "step": 2635 + }, + { + "epoch": 0.04818396183303782, + "grad_norm": 7.7847163418671865, + "learning_rate": 9.991349398780607e-06, + "loss": 18.0203, + "step": 2636 + }, + { + "epoch": 0.048202241029484345, + "grad_norm": 8.30779716323521, + "learning_rate": 9.991331985040437e-06, + "loss": 18.1566, + "step": 2637 + }, + { + "epoch": 0.04822052022593087, + "grad_norm": 7.767510160412045, + "learning_rate": 9.991314553806079e-06, + "loss": 17.8402, + "step": 2638 + }, + { + "epoch": 0.04823879942237739, + "grad_norm": 24.00078297240223, + "learning_rate": 9.9912971050776e-06, + "loss": 19.0444, + "step": 2639 + }, + { + "epoch": 0.04825707861882392, + "grad_norm": 8.314372895709644, + "learning_rate": 9.991279638855056e-06, + "loss": 18.3847, + "step": 2640 + }, + { + "epoch": 0.04827535781527044, + "grad_norm": 6.9919850584153265, + "learning_rate": 9.991262155138507e-06, + "loss": 18.0172, + "step": 2641 + }, + { + "epoch": 0.04829363701171697, + "grad_norm": 7.086213799597908, + "learning_rate": 9.991244653928021e-06, + "loss": 17.582, + "step": 2642 + }, + { + "epoch": 0.04831191620816349, + "grad_norm": 8.058177867690771, + "learning_rate": 9.991227135223656e-06, + "loss": 18.4592, + "step": 2643 + }, + { + "epoch": 0.04833019540461001, + "grad_norm": 7.769721264013488, + "learning_rate": 9.991209599025471e-06, + "loss": 18.0455, + "step": 2644 + }, + { + "epoch": 0.04834847460105654, + "grad_norm": 7.232355571336095, + "learning_rate": 9.991192045333529e-06, + "loss": 17.6612, + "step": 2645 + }, + { + "epoch": 0.04836675379750306, + "grad_norm": 8.41234655633902, + "learning_rate": 9.991174474147892e-06, + "loss": 18.1701, + "step": 2646 + }, + { + "epoch": 0.048385032993949584, + "grad_norm": 8.208951686838457, + "learning_rate": 9.991156885468626e-06, + "loss": 18.4221, + "step": 2647 + }, + { + "epoch": 0.04840331219039611, + "grad_norm": 6.2405343761220315, + "learning_rate": 9.991139279295784e-06, + "loss": 17.4688, + "step": 2648 + }, + { + "epoch": 0.048421591386842634, + "grad_norm": 8.138753409541787, + "learning_rate": 9.991121655629433e-06, + "loss": 18.0926, + "step": 2649 + }, + { + "epoch": 0.048439870583289156, + "grad_norm": 7.509042346002413, + "learning_rate": 9.991104014469633e-06, + "loss": 18.0035, + "step": 2650 + }, + { + "epoch": 0.048458149779735685, + "grad_norm": 9.058818337912584, + "learning_rate": 9.991086355816445e-06, + "loss": 18.5006, + "step": 2651 + }, + { + "epoch": 0.048476428976182206, + "grad_norm": 7.074231101969972, + "learning_rate": 9.991068679669936e-06, + "loss": 17.4708, + "step": 2652 + }, + { + "epoch": 0.04849470817262873, + "grad_norm": 8.563493350493948, + "learning_rate": 9.991050986030163e-06, + "loss": 18.3651, + "step": 2653 + }, + { + "epoch": 0.04851298736907526, + "grad_norm": 7.195424460601591, + "learning_rate": 9.991033274897189e-06, + "loss": 17.8509, + "step": 2654 + }, + { + "epoch": 0.04853126656552178, + "grad_norm": 7.36165428435057, + "learning_rate": 9.991015546271076e-06, + "loss": 17.838, + "step": 2655 + }, + { + "epoch": 0.0485495457619683, + "grad_norm": 7.307704359588099, + "learning_rate": 9.990997800151888e-06, + "loss": 17.606, + "step": 2656 + }, + { + "epoch": 0.04856782495841483, + "grad_norm": 7.144856676722134, + "learning_rate": 9.990980036539683e-06, + "loss": 17.7495, + "step": 2657 + }, + { + "epoch": 0.04858610415486135, + "grad_norm": 8.362609901603209, + "learning_rate": 9.99096225543453e-06, + "loss": 18.3898, + "step": 2658 + }, + { + "epoch": 0.04860438335130788, + "grad_norm": 7.095504670362594, + "learning_rate": 9.990944456836485e-06, + "loss": 17.6543, + "step": 2659 + }, + { + "epoch": 0.0486226625477544, + "grad_norm": 7.662936816391292, + "learning_rate": 9.990926640745612e-06, + "loss": 17.8874, + "step": 2660 + }, + { + "epoch": 0.04864094174420092, + "grad_norm": 7.528980619883442, + "learning_rate": 9.990908807161976e-06, + "loss": 17.9651, + "step": 2661 + }, + { + "epoch": 0.04865922094064745, + "grad_norm": 7.575553019127141, + "learning_rate": 9.990890956085638e-06, + "loss": 18.044, + "step": 2662 + }, + { + "epoch": 0.048677500137093974, + "grad_norm": 6.732173943432213, + "learning_rate": 9.99087308751666e-06, + "loss": 17.5589, + "step": 2663 + }, + { + "epoch": 0.048695779333540495, + "grad_norm": 7.923031811798792, + "learning_rate": 9.990855201455104e-06, + "loss": 18.3133, + "step": 2664 + }, + { + "epoch": 0.048714058529987024, + "grad_norm": 8.057810619771036, + "learning_rate": 9.990837297901034e-06, + "loss": 18.3014, + "step": 2665 + }, + { + "epoch": 0.048732337726433546, + "grad_norm": 8.077171777686797, + "learning_rate": 9.990819376854512e-06, + "loss": 18.3852, + "step": 2666 + }, + { + "epoch": 0.04875061692288007, + "grad_norm": 6.893345504430657, + "learning_rate": 9.990801438315603e-06, + "loss": 17.6469, + "step": 2667 + }, + { + "epoch": 0.048768896119326596, + "grad_norm": 7.148773305250935, + "learning_rate": 9.990783482284366e-06, + "loss": 17.9787, + "step": 2668 + }, + { + "epoch": 0.04878717531577312, + "grad_norm": 7.814760911182298, + "learning_rate": 9.990765508760867e-06, + "loss": 17.8342, + "step": 2669 + }, + { + "epoch": 0.04880545451221964, + "grad_norm": 7.085645296399639, + "learning_rate": 9.990747517745168e-06, + "loss": 17.7406, + "step": 2670 + }, + { + "epoch": 0.04882373370866617, + "grad_norm": 7.047769382770596, + "learning_rate": 9.99072950923733e-06, + "loss": 17.857, + "step": 2671 + }, + { + "epoch": 0.04884201290511269, + "grad_norm": 8.144642378718615, + "learning_rate": 9.99071148323742e-06, + "loss": 18.29, + "step": 2672 + }, + { + "epoch": 0.04886029210155921, + "grad_norm": 9.409388533866025, + "learning_rate": 9.9906934397455e-06, + "loss": 18.1425, + "step": 2673 + }, + { + "epoch": 0.04887857129800574, + "grad_norm": 7.787783845583783, + "learning_rate": 9.99067537876163e-06, + "loss": 17.5491, + "step": 2674 + }, + { + "epoch": 0.04889685049445226, + "grad_norm": 7.101698957878979, + "learning_rate": 9.990657300285878e-06, + "loss": 17.5371, + "step": 2675 + }, + { + "epoch": 0.04891512969089879, + "grad_norm": 7.236285902937417, + "learning_rate": 9.990639204318305e-06, + "loss": 17.491, + "step": 2676 + }, + { + "epoch": 0.04893340888734531, + "grad_norm": 6.455738269410737, + "learning_rate": 9.990621090858974e-06, + "loss": 17.3108, + "step": 2677 + }, + { + "epoch": 0.048951688083791835, + "grad_norm": 7.938276799312635, + "learning_rate": 9.99060295990795e-06, + "loss": 18.1582, + "step": 2678 + }, + { + "epoch": 0.04896996728023836, + "grad_norm": 7.514158271321571, + "learning_rate": 9.990584811465295e-06, + "loss": 17.871, + "step": 2679 + }, + { + "epoch": 0.048988246476684885, + "grad_norm": 7.4360056843811755, + "learning_rate": 9.990566645531074e-06, + "loss": 17.8645, + "step": 2680 + }, + { + "epoch": 0.04900652567313141, + "grad_norm": 7.44016188929528, + "learning_rate": 9.99054846210535e-06, + "loss": 17.5871, + "step": 2681 + }, + { + "epoch": 0.049024804869577936, + "grad_norm": 7.422628097035798, + "learning_rate": 9.990530261188185e-06, + "loss": 17.8896, + "step": 2682 + }, + { + "epoch": 0.04904308406602446, + "grad_norm": 8.248708541352205, + "learning_rate": 9.990512042779646e-06, + "loss": 18.092, + "step": 2683 + }, + { + "epoch": 0.04906136326247098, + "grad_norm": 9.317770876223529, + "learning_rate": 9.990493806879795e-06, + "loss": 18.4137, + "step": 2684 + }, + { + "epoch": 0.04907964245891751, + "grad_norm": 8.191308927578604, + "learning_rate": 9.990475553488697e-06, + "loss": 18.0061, + "step": 2685 + }, + { + "epoch": 0.04909792165536403, + "grad_norm": 8.381473981870313, + "learning_rate": 9.990457282606413e-06, + "loss": 18.1359, + "step": 2686 + }, + { + "epoch": 0.04911620085181055, + "grad_norm": 8.144436950856923, + "learning_rate": 9.990438994233012e-06, + "loss": 17.9591, + "step": 2687 + }, + { + "epoch": 0.04913448004825708, + "grad_norm": 9.027632766190614, + "learning_rate": 9.990420688368553e-06, + "loss": 18.4355, + "step": 2688 + }, + { + "epoch": 0.0491527592447036, + "grad_norm": 8.154001017546962, + "learning_rate": 9.990402365013106e-06, + "loss": 18.1974, + "step": 2689 + }, + { + "epoch": 0.049171038441150124, + "grad_norm": 7.254699510203792, + "learning_rate": 9.990384024166729e-06, + "loss": 17.431, + "step": 2690 + }, + { + "epoch": 0.04918931763759665, + "grad_norm": 6.519549556102076, + "learning_rate": 9.990365665829489e-06, + "loss": 17.2872, + "step": 2691 + }, + { + "epoch": 0.049207596834043174, + "grad_norm": 8.202965101946765, + "learning_rate": 9.99034729000145e-06, + "loss": 18.3197, + "step": 2692 + }, + { + "epoch": 0.0492258760304897, + "grad_norm": 6.895803622581931, + "learning_rate": 9.990328896682678e-06, + "loss": 17.5714, + "step": 2693 + }, + { + "epoch": 0.049244155226936225, + "grad_norm": 6.846212451053361, + "learning_rate": 9.990310485873236e-06, + "loss": 17.408, + "step": 2694 + }, + { + "epoch": 0.049262434423382746, + "grad_norm": 8.374596122436799, + "learning_rate": 9.990292057573188e-06, + "loss": 18.2443, + "step": 2695 + }, + { + "epoch": 0.049280713619829275, + "grad_norm": 8.820118880435693, + "learning_rate": 9.990273611782602e-06, + "loss": 18.427, + "step": 2696 + }, + { + "epoch": 0.0492989928162758, + "grad_norm": 7.0649054904768995, + "learning_rate": 9.990255148501537e-06, + "loss": 17.8637, + "step": 2697 + }, + { + "epoch": 0.04931727201272232, + "grad_norm": 7.902482265181302, + "learning_rate": 9.990236667730062e-06, + "loss": 18.1622, + "step": 2698 + }, + { + "epoch": 0.04933555120916885, + "grad_norm": 7.266759864899283, + "learning_rate": 9.99021816946824e-06, + "loss": 17.6958, + "step": 2699 + }, + { + "epoch": 0.04935383040561537, + "grad_norm": 8.263652197088238, + "learning_rate": 9.990199653716138e-06, + "loss": 18.38, + "step": 2700 + }, + { + "epoch": 0.04937210960206189, + "grad_norm": 8.477531153024145, + "learning_rate": 9.990181120473816e-06, + "loss": 18.3485, + "step": 2701 + }, + { + "epoch": 0.04939038879850842, + "grad_norm": 7.514234356040806, + "learning_rate": 9.990162569741346e-06, + "loss": 17.9061, + "step": 2702 + }, + { + "epoch": 0.04940866799495494, + "grad_norm": 8.407196745090438, + "learning_rate": 9.990144001518787e-06, + "loss": 18.3472, + "step": 2703 + }, + { + "epoch": 0.04942694719140146, + "grad_norm": 7.354320770017759, + "learning_rate": 9.990125415806207e-06, + "loss": 17.6776, + "step": 2704 + }, + { + "epoch": 0.04944522638784799, + "grad_norm": 7.396485600416532, + "learning_rate": 9.990106812603671e-06, + "loss": 17.9695, + "step": 2705 + }, + { + "epoch": 0.04946350558429451, + "grad_norm": 7.814055803305334, + "learning_rate": 9.990088191911242e-06, + "loss": 18.1308, + "step": 2706 + }, + { + "epoch": 0.049481784780741035, + "grad_norm": 7.437319022808082, + "learning_rate": 9.990069553728986e-06, + "loss": 17.9965, + "step": 2707 + }, + { + "epoch": 0.049500063977187564, + "grad_norm": 7.669524625555445, + "learning_rate": 9.990050898056973e-06, + "loss": 18.1744, + "step": 2708 + }, + { + "epoch": 0.049518343173634086, + "grad_norm": 7.536290606373893, + "learning_rate": 9.99003222489526e-06, + "loss": 18.0637, + "step": 2709 + }, + { + "epoch": 0.049536622370080614, + "grad_norm": 8.743718557438587, + "learning_rate": 9.99001353424392e-06, + "loss": 18.8466, + "step": 2710 + }, + { + "epoch": 0.049554901566527136, + "grad_norm": 8.740644797272857, + "learning_rate": 9.989994826103017e-06, + "loss": 18.5599, + "step": 2711 + }, + { + "epoch": 0.04957318076297366, + "grad_norm": 6.463562885126097, + "learning_rate": 9.989976100472612e-06, + "loss": 17.3284, + "step": 2712 + }, + { + "epoch": 0.04959145995942019, + "grad_norm": 7.729746826931038, + "learning_rate": 9.989957357352775e-06, + "loss": 18.1162, + "step": 2713 + }, + { + "epoch": 0.04960973915586671, + "grad_norm": 8.397131288738244, + "learning_rate": 9.989938596743571e-06, + "loss": 18.1535, + "step": 2714 + }, + { + "epoch": 0.04962801835231323, + "grad_norm": 7.299517582885823, + "learning_rate": 9.989919818645066e-06, + "loss": 17.8696, + "step": 2715 + }, + { + "epoch": 0.04964629754875976, + "grad_norm": 6.118793825633256, + "learning_rate": 9.989901023057323e-06, + "loss": 17.3392, + "step": 2716 + }, + { + "epoch": 0.04966457674520628, + "grad_norm": 8.087623894782654, + "learning_rate": 9.989882209980411e-06, + "loss": 18.0371, + "step": 2717 + }, + { + "epoch": 0.0496828559416528, + "grad_norm": 7.182842784130826, + "learning_rate": 9.989863379414395e-06, + "loss": 17.953, + "step": 2718 + }, + { + "epoch": 0.04970113513809933, + "grad_norm": 7.789297634414667, + "learning_rate": 9.98984453135934e-06, + "loss": 18.2924, + "step": 2719 + }, + { + "epoch": 0.04971941433454585, + "grad_norm": 7.484815689910988, + "learning_rate": 9.989825665815314e-06, + "loss": 17.7452, + "step": 2720 + }, + { + "epoch": 0.049737693530992375, + "grad_norm": 8.464123159713012, + "learning_rate": 9.989806782782381e-06, + "loss": 18.1465, + "step": 2721 + }, + { + "epoch": 0.0497559727274389, + "grad_norm": 7.29323077246332, + "learning_rate": 9.989787882260609e-06, + "loss": 17.7274, + "step": 2722 + }, + { + "epoch": 0.049774251923885425, + "grad_norm": 7.370170116436082, + "learning_rate": 9.989768964250062e-06, + "loss": 17.826, + "step": 2723 + }, + { + "epoch": 0.04979253112033195, + "grad_norm": 8.555492267977986, + "learning_rate": 9.98975002875081e-06, + "loss": 18.3993, + "step": 2724 + }, + { + "epoch": 0.049810810316778475, + "grad_norm": 7.738168820238301, + "learning_rate": 9.989731075762916e-06, + "loss": 18.149, + "step": 2725 + }, + { + "epoch": 0.049829089513225, + "grad_norm": 6.684681111753755, + "learning_rate": 9.989712105286446e-06, + "loss": 17.6446, + "step": 2726 + }, + { + "epoch": 0.049847368709671526, + "grad_norm": 7.423002418597444, + "learning_rate": 9.98969311732147e-06, + "loss": 17.9281, + "step": 2727 + }, + { + "epoch": 0.04986564790611805, + "grad_norm": 7.046937825717382, + "learning_rate": 9.989674111868052e-06, + "loss": 17.6348, + "step": 2728 + }, + { + "epoch": 0.04988392710256457, + "grad_norm": 7.3771534692869, + "learning_rate": 9.98965508892626e-06, + "loss": 17.5365, + "step": 2729 + }, + { + "epoch": 0.0499022062990111, + "grad_norm": 7.3746896958532995, + "learning_rate": 9.98963604849616e-06, + "loss": 17.8783, + "step": 2730 + }, + { + "epoch": 0.04992048549545762, + "grad_norm": 7.743231553670266, + "learning_rate": 9.989616990577817e-06, + "loss": 17.9597, + "step": 2731 + }, + { + "epoch": 0.04993876469190414, + "grad_norm": 9.20320942441712, + "learning_rate": 9.9895979151713e-06, + "loss": 18.9336, + "step": 2732 + }, + { + "epoch": 0.04995704388835067, + "grad_norm": 7.675649772850519, + "learning_rate": 9.989578822276678e-06, + "loss": 18.2667, + "step": 2733 + }, + { + "epoch": 0.04997532308479719, + "grad_norm": 7.708890407167971, + "learning_rate": 9.989559711894012e-06, + "loss": 18.1385, + "step": 2734 + }, + { + "epoch": 0.049993602281243714, + "grad_norm": 6.9503072081652215, + "learning_rate": 9.989540584023374e-06, + "loss": 17.496, + "step": 2735 + }, + { + "epoch": 0.05001188147769024, + "grad_norm": 7.330806638692699, + "learning_rate": 9.989521438664828e-06, + "loss": 17.7176, + "step": 2736 + }, + { + "epoch": 0.050030160674136764, + "grad_norm": 9.208583128486223, + "learning_rate": 9.989502275818443e-06, + "loss": 18.4525, + "step": 2737 + }, + { + "epoch": 0.050048439870583286, + "grad_norm": 6.781366804392853, + "learning_rate": 9.989483095484284e-06, + "loss": 17.7662, + "step": 2738 + }, + { + "epoch": 0.050066719067029815, + "grad_norm": 7.084519030626298, + "learning_rate": 9.989463897662421e-06, + "loss": 17.546, + "step": 2739 + }, + { + "epoch": 0.05008499826347634, + "grad_norm": 6.586986390290971, + "learning_rate": 9.98944468235292e-06, + "loss": 17.7685, + "step": 2740 + }, + { + "epoch": 0.05010327745992286, + "grad_norm": 9.214525055319351, + "learning_rate": 9.989425449555848e-06, + "loss": 18.6747, + "step": 2741 + }, + { + "epoch": 0.05012155665636939, + "grad_norm": 8.345511864208909, + "learning_rate": 9.989406199271273e-06, + "loss": 18.0107, + "step": 2742 + }, + { + "epoch": 0.05013983585281591, + "grad_norm": 6.7572346177421965, + "learning_rate": 9.989386931499264e-06, + "loss": 17.6164, + "step": 2743 + }, + { + "epoch": 0.05015811504926244, + "grad_norm": 8.56372854986229, + "learning_rate": 9.989367646239883e-06, + "loss": 18.5043, + "step": 2744 + }, + { + "epoch": 0.05017639424570896, + "grad_norm": 7.870519450132207, + "learning_rate": 9.989348343493204e-06, + "loss": 18.1654, + "step": 2745 + }, + { + "epoch": 0.05019467344215548, + "grad_norm": 11.299407608036292, + "learning_rate": 9.989329023259291e-06, + "loss": 17.8543, + "step": 2746 + }, + { + "epoch": 0.05021295263860201, + "grad_norm": 6.967217079294844, + "learning_rate": 9.989309685538214e-06, + "loss": 17.8087, + "step": 2747 + }, + { + "epoch": 0.05023123183504853, + "grad_norm": 7.634909711788494, + "learning_rate": 9.989290330330038e-06, + "loss": 17.8961, + "step": 2748 + }, + { + "epoch": 0.05024951103149505, + "grad_norm": 8.418568458532993, + "learning_rate": 9.989270957634834e-06, + "loss": 18.7163, + "step": 2749 + }, + { + "epoch": 0.05026779022794158, + "grad_norm": 8.256148404351109, + "learning_rate": 9.989251567452668e-06, + "loss": 18.1897, + "step": 2750 + }, + { + "epoch": 0.050286069424388104, + "grad_norm": 7.9624461708300975, + "learning_rate": 9.989232159783606e-06, + "loss": 17.9329, + "step": 2751 + }, + { + "epoch": 0.050304348620834625, + "grad_norm": 8.20349321365802, + "learning_rate": 9.98921273462772e-06, + "loss": 17.9774, + "step": 2752 + }, + { + "epoch": 0.050322627817281154, + "grad_norm": 8.250555558527648, + "learning_rate": 9.989193291985076e-06, + "loss": 18.4838, + "step": 2753 + }, + { + "epoch": 0.050340907013727676, + "grad_norm": 7.451051830766465, + "learning_rate": 9.98917383185574e-06, + "loss": 17.7306, + "step": 2754 + }, + { + "epoch": 0.0503591862101742, + "grad_norm": 7.542378455227766, + "learning_rate": 9.989154354239787e-06, + "loss": 18.0032, + "step": 2755 + }, + { + "epoch": 0.050377465406620726, + "grad_norm": 8.392777781864194, + "learning_rate": 9.989134859137279e-06, + "loss": 18.193, + "step": 2756 + }, + { + "epoch": 0.05039574460306725, + "grad_norm": 7.408888359247001, + "learning_rate": 9.989115346548285e-06, + "loss": 18.0566, + "step": 2757 + }, + { + "epoch": 0.05041402379951377, + "grad_norm": 7.532589654429319, + "learning_rate": 9.989095816472876e-06, + "loss": 17.8037, + "step": 2758 + }, + { + "epoch": 0.0504323029959603, + "grad_norm": 7.664168018715292, + "learning_rate": 9.989076268911119e-06, + "loss": 18.1644, + "step": 2759 + }, + { + "epoch": 0.05045058219240682, + "grad_norm": 7.5215914971492115, + "learning_rate": 9.989056703863083e-06, + "loss": 17.9341, + "step": 2760 + }, + { + "epoch": 0.05046886138885335, + "grad_norm": 7.206447022471866, + "learning_rate": 9.989037121328835e-06, + "loss": 17.9137, + "step": 2761 + }, + { + "epoch": 0.05048714058529987, + "grad_norm": 7.76329719211878, + "learning_rate": 9.989017521308447e-06, + "loss": 18.292, + "step": 2762 + }, + { + "epoch": 0.05050541978174639, + "grad_norm": 7.716220081439177, + "learning_rate": 9.988997903801984e-06, + "loss": 17.5243, + "step": 2763 + }, + { + "epoch": 0.05052369897819292, + "grad_norm": 8.135237247062651, + "learning_rate": 9.988978268809516e-06, + "loss": 18.4224, + "step": 2764 + }, + { + "epoch": 0.05054197817463944, + "grad_norm": 7.340949505261392, + "learning_rate": 9.988958616331112e-06, + "loss": 17.8489, + "step": 2765 + }, + { + "epoch": 0.050560257371085965, + "grad_norm": 8.604957659993723, + "learning_rate": 9.988938946366841e-06, + "loss": 18.1277, + "step": 2766 + }, + { + "epoch": 0.050578536567532494, + "grad_norm": 6.703904060868726, + "learning_rate": 9.988919258916775e-06, + "loss": 17.6016, + "step": 2767 + }, + { + "epoch": 0.050596815763979015, + "grad_norm": 7.615904567530197, + "learning_rate": 9.988899553980975e-06, + "loss": 18.0835, + "step": 2768 + }, + { + "epoch": 0.05061509496042554, + "grad_norm": 7.656346752005079, + "learning_rate": 9.988879831559516e-06, + "loss": 17.724, + "step": 2769 + }, + { + "epoch": 0.050633374156872066, + "grad_norm": 7.6703645021028946, + "learning_rate": 9.988860091652469e-06, + "loss": 17.9447, + "step": 2770 + }, + { + "epoch": 0.05065165335331859, + "grad_norm": 6.133725110298065, + "learning_rate": 9.988840334259898e-06, + "loss": 17.2218, + "step": 2771 + }, + { + "epoch": 0.05066993254976511, + "grad_norm": 8.109487016435057, + "learning_rate": 9.988820559381876e-06, + "loss": 18.2571, + "step": 2772 + }, + { + "epoch": 0.05068821174621164, + "grad_norm": 6.865556555909455, + "learning_rate": 9.98880076701847e-06, + "loss": 17.6203, + "step": 2773 + }, + { + "epoch": 0.05070649094265816, + "grad_norm": 8.299023898912916, + "learning_rate": 9.98878095716975e-06, + "loss": 17.972, + "step": 2774 + }, + { + "epoch": 0.05072477013910468, + "grad_norm": 7.050039866717872, + "learning_rate": 9.988761129835787e-06, + "loss": 17.6643, + "step": 2775 + }, + { + "epoch": 0.05074304933555121, + "grad_norm": 7.070438246786039, + "learning_rate": 9.988741285016648e-06, + "loss": 17.748, + "step": 2776 + }, + { + "epoch": 0.05076132853199773, + "grad_norm": 7.422909549351179, + "learning_rate": 9.988721422712401e-06, + "loss": 17.976, + "step": 2777 + }, + { + "epoch": 0.05077960772844426, + "grad_norm": 7.5322755767621326, + "learning_rate": 9.988701542923122e-06, + "loss": 17.9389, + "step": 2778 + }, + { + "epoch": 0.05079788692489078, + "grad_norm": 7.267922269520265, + "learning_rate": 9.988681645648876e-06, + "loss": 17.4306, + "step": 2779 + }, + { + "epoch": 0.050816166121337304, + "grad_norm": 10.366644639631332, + "learning_rate": 9.988661730889734e-06, + "loss": 18.8708, + "step": 2780 + }, + { + "epoch": 0.05083444531778383, + "grad_norm": 8.489072949808808, + "learning_rate": 9.988641798645767e-06, + "loss": 18.211, + "step": 2781 + }, + { + "epoch": 0.050852724514230355, + "grad_norm": 7.94867403158134, + "learning_rate": 9.98862184891704e-06, + "loss": 17.809, + "step": 2782 + }, + { + "epoch": 0.050871003710676876, + "grad_norm": 8.308052879184427, + "learning_rate": 9.988601881703628e-06, + "loss": 18.4109, + "step": 2783 + }, + { + "epoch": 0.050889282907123405, + "grad_norm": 6.382505749570598, + "learning_rate": 9.9885818970056e-06, + "loss": 17.5987, + "step": 2784 + }, + { + "epoch": 0.05090756210356993, + "grad_norm": 8.65878624516278, + "learning_rate": 9.988561894823025e-06, + "loss": 18.5557, + "step": 2785 + }, + { + "epoch": 0.05092584130001645, + "grad_norm": 7.704704682550004, + "learning_rate": 9.988541875155972e-06, + "loss": 18.2683, + "step": 2786 + }, + { + "epoch": 0.05094412049646298, + "grad_norm": 7.87920528871043, + "learning_rate": 9.988521838004515e-06, + "loss": 17.8086, + "step": 2787 + }, + { + "epoch": 0.0509623996929095, + "grad_norm": 9.26821564888294, + "learning_rate": 9.98850178336872e-06, + "loss": 18.5718, + "step": 2788 + }, + { + "epoch": 0.05098067888935602, + "grad_norm": 6.667709687642425, + "learning_rate": 9.988481711248659e-06, + "loss": 17.7336, + "step": 2789 + }, + { + "epoch": 0.05099895808580255, + "grad_norm": 7.861762973631642, + "learning_rate": 9.988461621644403e-06, + "loss": 17.9559, + "step": 2790 + }, + { + "epoch": 0.05101723728224907, + "grad_norm": 8.98523827817954, + "learning_rate": 9.988441514556022e-06, + "loss": 18.5481, + "step": 2791 + }, + { + "epoch": 0.05103551647869559, + "grad_norm": 7.411424716591436, + "learning_rate": 9.988421389983588e-06, + "loss": 17.8808, + "step": 2792 + }, + { + "epoch": 0.05105379567514212, + "grad_norm": 7.7931229957919035, + "learning_rate": 9.988401247927167e-06, + "loss": 17.8705, + "step": 2793 + }, + { + "epoch": 0.051072074871588644, + "grad_norm": 8.198926676791432, + "learning_rate": 9.988381088386833e-06, + "loss": 18.1572, + "step": 2794 + }, + { + "epoch": 0.05109035406803517, + "grad_norm": 7.361559633425119, + "learning_rate": 9.988360911362658e-06, + "loss": 17.7629, + "step": 2795 + }, + { + "epoch": 0.051108633264481694, + "grad_norm": 8.045631817199665, + "learning_rate": 9.98834071685471e-06, + "loss": 18.3438, + "step": 2796 + }, + { + "epoch": 0.051126912460928216, + "grad_norm": 6.775119038069602, + "learning_rate": 9.98832050486306e-06, + "loss": 17.5067, + "step": 2797 + }, + { + "epoch": 0.051145191657374744, + "grad_norm": 7.169523145096916, + "learning_rate": 9.988300275387781e-06, + "loss": 17.9069, + "step": 2798 + }, + { + "epoch": 0.051163470853821266, + "grad_norm": 9.458653998953265, + "learning_rate": 9.98828002842894e-06, + "loss": 18.2526, + "step": 2799 + }, + { + "epoch": 0.05118175005026779, + "grad_norm": 7.221610231316801, + "learning_rate": 9.988259763986613e-06, + "loss": 17.6764, + "step": 2800 + }, + { + "epoch": 0.05120002924671432, + "grad_norm": 8.594630950349286, + "learning_rate": 9.988239482060867e-06, + "loss": 18.0721, + "step": 2801 + }, + { + "epoch": 0.05121830844316084, + "grad_norm": 7.934044726120394, + "learning_rate": 9.988219182651775e-06, + "loss": 18.1069, + "step": 2802 + }, + { + "epoch": 0.05123658763960736, + "grad_norm": 9.296702673248296, + "learning_rate": 9.988198865759406e-06, + "loss": 18.4842, + "step": 2803 + }, + { + "epoch": 0.05125486683605389, + "grad_norm": 7.743424084410392, + "learning_rate": 9.988178531383834e-06, + "loss": 18.0088, + "step": 2804 + }, + { + "epoch": 0.05127314603250041, + "grad_norm": 7.314397756537535, + "learning_rate": 9.98815817952513e-06, + "loss": 17.5609, + "step": 2805 + }, + { + "epoch": 0.05129142522894693, + "grad_norm": 8.346376439142999, + "learning_rate": 9.988137810183363e-06, + "loss": 18.2768, + "step": 2806 + }, + { + "epoch": 0.05130970442539346, + "grad_norm": 7.676496545059605, + "learning_rate": 9.988117423358607e-06, + "loss": 18.0011, + "step": 2807 + }, + { + "epoch": 0.05132798362183998, + "grad_norm": 6.422274098613539, + "learning_rate": 9.988097019050932e-06, + "loss": 17.4351, + "step": 2808 + }, + { + "epoch": 0.051346262818286505, + "grad_norm": 8.137567816071021, + "learning_rate": 9.98807659726041e-06, + "loss": 17.8924, + "step": 2809 + }, + { + "epoch": 0.05136454201473303, + "grad_norm": 8.880395227381214, + "learning_rate": 9.988056157987111e-06, + "loss": 18.3481, + "step": 2810 + }, + { + "epoch": 0.051382821211179555, + "grad_norm": 7.717293646585981, + "learning_rate": 9.988035701231108e-06, + "loss": 18.0143, + "step": 2811 + }, + { + "epoch": 0.051401100407626084, + "grad_norm": 7.959960217519171, + "learning_rate": 9.988015226992476e-06, + "loss": 18.2028, + "step": 2812 + }, + { + "epoch": 0.051419379604072606, + "grad_norm": 7.950395785233589, + "learning_rate": 9.98799473527128e-06, + "loss": 18.3118, + "step": 2813 + }, + { + "epoch": 0.05143765880051913, + "grad_norm": 7.384008026563201, + "learning_rate": 9.987974226067597e-06, + "loss": 17.6602, + "step": 2814 + }, + { + "epoch": 0.051455937996965656, + "grad_norm": 9.229153952983667, + "learning_rate": 9.987953699381496e-06, + "loss": 18.5138, + "step": 2815 + }, + { + "epoch": 0.05147421719341218, + "grad_norm": 8.544361818534913, + "learning_rate": 9.98793315521305e-06, + "loss": 19.0313, + "step": 2816 + }, + { + "epoch": 0.0514924963898587, + "grad_norm": 8.774064474288327, + "learning_rate": 9.987912593562332e-06, + "loss": 17.9237, + "step": 2817 + }, + { + "epoch": 0.05151077558630523, + "grad_norm": 7.47245073365734, + "learning_rate": 9.987892014429414e-06, + "loss": 17.6392, + "step": 2818 + }, + { + "epoch": 0.05152905478275175, + "grad_norm": 7.947494112070558, + "learning_rate": 9.987871417814365e-06, + "loss": 18.1905, + "step": 2819 + }, + { + "epoch": 0.05154733397919827, + "grad_norm": 7.694083457772169, + "learning_rate": 9.987850803717262e-06, + "loss": 17.829, + "step": 2820 + }, + { + "epoch": 0.0515656131756448, + "grad_norm": 7.212210507516075, + "learning_rate": 9.987830172138174e-06, + "loss": 17.9155, + "step": 2821 + }, + { + "epoch": 0.05158389237209132, + "grad_norm": 7.334533983864379, + "learning_rate": 9.987809523077173e-06, + "loss": 17.5396, + "step": 2822 + }, + { + "epoch": 0.051602171568537844, + "grad_norm": 8.101411659752552, + "learning_rate": 9.987788856534333e-06, + "loss": 18.021, + "step": 2823 + }, + { + "epoch": 0.05162045076498437, + "grad_norm": 6.834266760261926, + "learning_rate": 9.987768172509727e-06, + "loss": 17.5892, + "step": 2824 + }, + { + "epoch": 0.051638729961430894, + "grad_norm": 7.733353573907947, + "learning_rate": 9.987747471003424e-06, + "loss": 18.3115, + "step": 2825 + }, + { + "epoch": 0.051657009157877416, + "grad_norm": 7.736141934885637, + "learning_rate": 9.9877267520155e-06, + "loss": 18.0004, + "step": 2826 + }, + { + "epoch": 0.051675288354323945, + "grad_norm": 7.272801249753164, + "learning_rate": 9.987706015546027e-06, + "loss": 17.8209, + "step": 2827 + }, + { + "epoch": 0.05169356755077047, + "grad_norm": 6.3552622795233145, + "learning_rate": 9.987685261595077e-06, + "loss": 17.6643, + "step": 2828 + }, + { + "epoch": 0.051711846747216995, + "grad_norm": 7.530113854779712, + "learning_rate": 9.987664490162722e-06, + "loss": 18.1216, + "step": 2829 + }, + { + "epoch": 0.05173012594366352, + "grad_norm": 6.3255561749029425, + "learning_rate": 9.987643701249038e-06, + "loss": 17.3771, + "step": 2830 + }, + { + "epoch": 0.05174840514011004, + "grad_norm": 7.601770793981465, + "learning_rate": 9.987622894854094e-06, + "loss": 17.9845, + "step": 2831 + }, + { + "epoch": 0.05176668433655657, + "grad_norm": 7.655184473142678, + "learning_rate": 9.987602070977966e-06, + "loss": 17.804, + "step": 2832 + }, + { + "epoch": 0.05178496353300309, + "grad_norm": 6.22552614974228, + "learning_rate": 9.987581229620724e-06, + "loss": 17.4074, + "step": 2833 + }, + { + "epoch": 0.05180324272944961, + "grad_norm": 8.24580283423378, + "learning_rate": 9.987560370782443e-06, + "loss": 18.3363, + "step": 2834 + }, + { + "epoch": 0.05182152192589614, + "grad_norm": 8.827466730094583, + "learning_rate": 9.987539494463197e-06, + "loss": 18.5687, + "step": 2835 + }, + { + "epoch": 0.05183980112234266, + "grad_norm": 8.22822456444839, + "learning_rate": 9.987518600663055e-06, + "loss": 18.0483, + "step": 2836 + }, + { + "epoch": 0.05185808031878918, + "grad_norm": 7.6572746032820636, + "learning_rate": 9.987497689382095e-06, + "loss": 17.8647, + "step": 2837 + }, + { + "epoch": 0.05187635951523571, + "grad_norm": 7.507526556204351, + "learning_rate": 9.987476760620389e-06, + "loss": 17.7855, + "step": 2838 + }, + { + "epoch": 0.051894638711682234, + "grad_norm": 8.800249578147918, + "learning_rate": 9.987455814378008e-06, + "loss": 18.3258, + "step": 2839 + }, + { + "epoch": 0.051912917908128756, + "grad_norm": 8.583545120493428, + "learning_rate": 9.98743485065503e-06, + "loss": 18.4643, + "step": 2840 + }, + { + "epoch": 0.051931197104575284, + "grad_norm": 8.589006782220283, + "learning_rate": 9.987413869451522e-06, + "loss": 18.3036, + "step": 2841 + }, + { + "epoch": 0.051949476301021806, + "grad_norm": 8.02098650833697, + "learning_rate": 9.987392870767563e-06, + "loss": 18.0703, + "step": 2842 + }, + { + "epoch": 0.05196775549746833, + "grad_norm": 8.222004790638712, + "learning_rate": 9.987371854603226e-06, + "loss": 18.2864, + "step": 2843 + }, + { + "epoch": 0.05198603469391486, + "grad_norm": 6.197485706489133, + "learning_rate": 9.987350820958581e-06, + "loss": 17.4297, + "step": 2844 + }, + { + "epoch": 0.05200431389036138, + "grad_norm": 7.830326125569888, + "learning_rate": 9.987329769833705e-06, + "loss": 18.1324, + "step": 2845 + }, + { + "epoch": 0.05202259308680791, + "grad_norm": 8.706779415610775, + "learning_rate": 9.987308701228672e-06, + "loss": 18.2751, + "step": 2846 + }, + { + "epoch": 0.05204087228325443, + "grad_norm": 6.159749869858416, + "learning_rate": 9.987287615143554e-06, + "loss": 17.5055, + "step": 2847 + }, + { + "epoch": 0.05205915147970095, + "grad_norm": 8.608441703583498, + "learning_rate": 9.987266511578425e-06, + "loss": 18.2668, + "step": 2848 + }, + { + "epoch": 0.05207743067614748, + "grad_norm": 8.791378746879808, + "learning_rate": 9.98724539053336e-06, + "loss": 18.3234, + "step": 2849 + }, + { + "epoch": 0.052095709872594, + "grad_norm": 7.175952125316248, + "learning_rate": 9.987224252008433e-06, + "loss": 17.918, + "step": 2850 + }, + { + "epoch": 0.05211398906904052, + "grad_norm": 8.17411494576387, + "learning_rate": 9.987203096003719e-06, + "loss": 18.7266, + "step": 2851 + }, + { + "epoch": 0.05213226826548705, + "grad_norm": 7.924278562250412, + "learning_rate": 9.98718192251929e-06, + "loss": 18.2808, + "step": 2852 + }, + { + "epoch": 0.05215054746193357, + "grad_norm": 7.0300478896758225, + "learning_rate": 9.98716073155522e-06, + "loss": 17.9663, + "step": 2853 + }, + { + "epoch": 0.052168826658380095, + "grad_norm": 8.450063164139488, + "learning_rate": 9.987139523111585e-06, + "loss": 18.3254, + "step": 2854 + }, + { + "epoch": 0.052187105854826624, + "grad_norm": 8.215549204227752, + "learning_rate": 9.98711829718846e-06, + "loss": 18.232, + "step": 2855 + }, + { + "epoch": 0.052205385051273145, + "grad_norm": 7.8183822272817505, + "learning_rate": 9.987097053785918e-06, + "loss": 18.2791, + "step": 2856 + }, + { + "epoch": 0.05222366424771967, + "grad_norm": 8.670558646100888, + "learning_rate": 9.987075792904031e-06, + "loss": 18.415, + "step": 2857 + }, + { + "epoch": 0.052241943444166196, + "grad_norm": 8.412970962496319, + "learning_rate": 9.987054514542878e-06, + "loss": 18.25, + "step": 2858 + }, + { + "epoch": 0.05226022264061272, + "grad_norm": 6.930987927455506, + "learning_rate": 9.987033218702531e-06, + "loss": 17.5139, + "step": 2859 + }, + { + "epoch": 0.05227850183705924, + "grad_norm": 7.58864030339164, + "learning_rate": 9.987011905383066e-06, + "loss": 17.9744, + "step": 2860 + }, + { + "epoch": 0.05229678103350577, + "grad_norm": 10.258257176125628, + "learning_rate": 9.986990574584556e-06, + "loss": 18.803, + "step": 2861 + }, + { + "epoch": 0.05231506022995229, + "grad_norm": 7.413678639752427, + "learning_rate": 9.986969226307078e-06, + "loss": 17.7989, + "step": 2862 + }, + { + "epoch": 0.05233333942639882, + "grad_norm": 7.445005414972241, + "learning_rate": 9.986947860550705e-06, + "loss": 17.9625, + "step": 2863 + }, + { + "epoch": 0.05235161862284534, + "grad_norm": 7.4133344111490915, + "learning_rate": 9.986926477315512e-06, + "loss": 17.7285, + "step": 2864 + }, + { + "epoch": 0.05236989781929186, + "grad_norm": 7.1495490687528624, + "learning_rate": 9.986905076601577e-06, + "loss": 17.9919, + "step": 2865 + }, + { + "epoch": 0.05238817701573839, + "grad_norm": 7.945602880664831, + "learning_rate": 9.98688365840897e-06, + "loss": 18.034, + "step": 2866 + }, + { + "epoch": 0.05240645621218491, + "grad_norm": 9.226799939576006, + "learning_rate": 9.98686222273777e-06, + "loss": 18.2153, + "step": 2867 + }, + { + "epoch": 0.052424735408631434, + "grad_norm": 7.4125133452789544, + "learning_rate": 9.986840769588049e-06, + "loss": 17.8405, + "step": 2868 + }, + { + "epoch": 0.05244301460507796, + "grad_norm": 8.488537991500818, + "learning_rate": 9.986819298959885e-06, + "loss": 18.0606, + "step": 2869 + }, + { + "epoch": 0.052461293801524485, + "grad_norm": 6.820147326899155, + "learning_rate": 9.986797810853352e-06, + "loss": 17.4996, + "step": 2870 + }, + { + "epoch": 0.05247957299797101, + "grad_norm": 5.829696315380732, + "learning_rate": 9.986776305268524e-06, + "loss": 17.2355, + "step": 2871 + }, + { + "epoch": 0.052497852194417535, + "grad_norm": 6.216952987512384, + "learning_rate": 9.986754782205479e-06, + "loss": 17.3076, + "step": 2872 + }, + { + "epoch": 0.05251613139086406, + "grad_norm": 7.184225084460174, + "learning_rate": 9.986733241664291e-06, + "loss": 17.6507, + "step": 2873 + }, + { + "epoch": 0.05253441058731058, + "grad_norm": 7.666798363507448, + "learning_rate": 9.986711683645036e-06, + "loss": 17.8974, + "step": 2874 + }, + { + "epoch": 0.05255268978375711, + "grad_norm": 8.129386680646322, + "learning_rate": 9.98669010814779e-06, + "loss": 18.1159, + "step": 2875 + }, + { + "epoch": 0.05257096898020363, + "grad_norm": 8.878008739092836, + "learning_rate": 9.986668515172626e-06, + "loss": 18.3956, + "step": 2876 + }, + { + "epoch": 0.05258924817665015, + "grad_norm": 8.609838043081584, + "learning_rate": 9.986646904719624e-06, + "loss": 17.837, + "step": 2877 + }, + { + "epoch": 0.05260752737309668, + "grad_norm": 8.710736010767981, + "learning_rate": 9.986625276788855e-06, + "loss": 18.8199, + "step": 2878 + }, + { + "epoch": 0.0526258065695432, + "grad_norm": 7.748052330501188, + "learning_rate": 9.986603631380399e-06, + "loss": 17.93, + "step": 2879 + }, + { + "epoch": 0.05264408576598973, + "grad_norm": 7.966771545840098, + "learning_rate": 9.986581968494328e-06, + "loss": 18.3258, + "step": 2880 + }, + { + "epoch": 0.05266236496243625, + "grad_norm": 7.6868080308718785, + "learning_rate": 9.98656028813072e-06, + "loss": 18.0548, + "step": 2881 + }, + { + "epoch": 0.052680644158882774, + "grad_norm": 7.655713983096808, + "learning_rate": 9.986538590289654e-06, + "loss": 18.0637, + "step": 2882 + }, + { + "epoch": 0.0526989233553293, + "grad_norm": 7.5684529254383595, + "learning_rate": 9.9865168749712e-06, + "loss": 17.9218, + "step": 2883 + }, + { + "epoch": 0.052717202551775824, + "grad_norm": 7.6454991418926905, + "learning_rate": 9.986495142175437e-06, + "loss": 17.786, + "step": 2884 + }, + { + "epoch": 0.052735481748222346, + "grad_norm": 7.733469324401486, + "learning_rate": 9.986473391902442e-06, + "loss": 18.0281, + "step": 2885 + }, + { + "epoch": 0.052753760944668875, + "grad_norm": 7.64452382839804, + "learning_rate": 9.986451624152291e-06, + "loss": 17.9382, + "step": 2886 + }, + { + "epoch": 0.052772040141115396, + "grad_norm": 8.626678024177481, + "learning_rate": 9.986429838925059e-06, + "loss": 18.3031, + "step": 2887 + }, + { + "epoch": 0.05279031933756192, + "grad_norm": 7.30641898232523, + "learning_rate": 9.986408036220825e-06, + "loss": 17.8578, + "step": 2888 + }, + { + "epoch": 0.05280859853400845, + "grad_norm": 8.222903766705286, + "learning_rate": 9.986386216039661e-06, + "loss": 18.5021, + "step": 2889 + }, + { + "epoch": 0.05282687773045497, + "grad_norm": 6.821592759931198, + "learning_rate": 9.986364378381647e-06, + "loss": 17.5543, + "step": 2890 + }, + { + "epoch": 0.05284515692690149, + "grad_norm": 5.638335855246105, + "learning_rate": 9.986342523246857e-06, + "loss": 17.3, + "step": 2891 + }, + { + "epoch": 0.05286343612334802, + "grad_norm": 6.637079075331286, + "learning_rate": 9.986320650635371e-06, + "loss": 17.6331, + "step": 2892 + }, + { + "epoch": 0.05288171531979454, + "grad_norm": 8.285688386237048, + "learning_rate": 9.986298760547264e-06, + "loss": 18.2569, + "step": 2893 + }, + { + "epoch": 0.05289999451624106, + "grad_norm": 7.658564001180271, + "learning_rate": 9.986276852982612e-06, + "loss": 17.7906, + "step": 2894 + }, + { + "epoch": 0.05291827371268759, + "grad_norm": 9.256670358946646, + "learning_rate": 9.986254927941492e-06, + "loss": 18.7537, + "step": 2895 + }, + { + "epoch": 0.05293655290913411, + "grad_norm": 8.027355404343318, + "learning_rate": 9.986232985423982e-06, + "loss": 18.3691, + "step": 2896 + }, + { + "epoch": 0.05295483210558064, + "grad_norm": 7.654962863757842, + "learning_rate": 9.986211025430156e-06, + "loss": 17.8887, + "step": 2897 + }, + { + "epoch": 0.052973111302027164, + "grad_norm": 8.15685620262823, + "learning_rate": 9.986189047960096e-06, + "loss": 18.5433, + "step": 2898 + }, + { + "epoch": 0.052991390498473685, + "grad_norm": 11.409212729323968, + "learning_rate": 9.986167053013875e-06, + "loss": 18.1699, + "step": 2899 + }, + { + "epoch": 0.053009669694920214, + "grad_norm": 7.302661018768685, + "learning_rate": 9.986145040591571e-06, + "loss": 17.9199, + "step": 2900 + }, + { + "epoch": 0.053027948891366736, + "grad_norm": 8.143621464920205, + "learning_rate": 9.986123010693261e-06, + "loss": 18.4157, + "step": 2901 + }, + { + "epoch": 0.05304622808781326, + "grad_norm": 6.974448154848328, + "learning_rate": 9.986100963319023e-06, + "loss": 17.4241, + "step": 2902 + }, + { + "epoch": 0.053064507284259786, + "grad_norm": 9.076493067839538, + "learning_rate": 9.986078898468934e-06, + "loss": 18.4846, + "step": 2903 + }, + { + "epoch": 0.05308278648070631, + "grad_norm": 8.606778338151203, + "learning_rate": 9.98605681614307e-06, + "loss": 18.4125, + "step": 2904 + }, + { + "epoch": 0.05310106567715283, + "grad_norm": 9.238450490429171, + "learning_rate": 9.98603471634151e-06, + "loss": 18.6629, + "step": 2905 + }, + { + "epoch": 0.05311934487359936, + "grad_norm": 7.476422017962867, + "learning_rate": 9.986012599064332e-06, + "loss": 17.6207, + "step": 2906 + }, + { + "epoch": 0.05313762407004588, + "grad_norm": 10.354709407007224, + "learning_rate": 9.98599046431161e-06, + "loss": 19.1014, + "step": 2907 + }, + { + "epoch": 0.0531559032664924, + "grad_norm": 7.589139360560411, + "learning_rate": 9.985968312083428e-06, + "loss": 17.8751, + "step": 2908 + }, + { + "epoch": 0.05317418246293893, + "grad_norm": 7.32388149339212, + "learning_rate": 9.985946142379856e-06, + "loss": 17.7151, + "step": 2909 + }, + { + "epoch": 0.05319246165938545, + "grad_norm": 9.125049790803184, + "learning_rate": 9.985923955200977e-06, + "loss": 18.2955, + "step": 2910 + }, + { + "epoch": 0.053210740855831974, + "grad_norm": 7.4579332937791945, + "learning_rate": 9.985901750546867e-06, + "loss": 17.7339, + "step": 2911 + }, + { + "epoch": 0.0532290200522785, + "grad_norm": 8.629397393390434, + "learning_rate": 9.985879528417604e-06, + "loss": 18.2328, + "step": 2912 + }, + { + "epoch": 0.053247299248725025, + "grad_norm": 8.475233804732257, + "learning_rate": 9.985857288813266e-06, + "loss": 17.9082, + "step": 2913 + }, + { + "epoch": 0.05326557844517155, + "grad_norm": 7.094067999462129, + "learning_rate": 9.985835031733931e-06, + "loss": 17.638, + "step": 2914 + }, + { + "epoch": 0.053283857641618075, + "grad_norm": 7.467000267979098, + "learning_rate": 9.985812757179677e-06, + "loss": 17.728, + "step": 2915 + }, + { + "epoch": 0.0533021368380646, + "grad_norm": 8.54598987332618, + "learning_rate": 9.98579046515058e-06, + "loss": 18.105, + "step": 2916 + }, + { + "epoch": 0.053320416034511126, + "grad_norm": 7.344890427722456, + "learning_rate": 9.985768155646721e-06, + "loss": 17.7487, + "step": 2917 + }, + { + "epoch": 0.05333869523095765, + "grad_norm": 6.628978628649656, + "learning_rate": 9.98574582866818e-06, + "loss": 17.7106, + "step": 2918 + }, + { + "epoch": 0.05335697442740417, + "grad_norm": 8.360264382365479, + "learning_rate": 9.98572348421503e-06, + "loss": 17.9406, + "step": 2919 + }, + { + "epoch": 0.0533752536238507, + "grad_norm": 6.455080799676221, + "learning_rate": 9.985701122287352e-06, + "loss": 17.3594, + "step": 2920 + }, + { + "epoch": 0.05339353282029722, + "grad_norm": 9.13581655350652, + "learning_rate": 9.985678742885225e-06, + "loss": 18.6982, + "step": 2921 + }, + { + "epoch": 0.05341181201674374, + "grad_norm": 9.032337151984729, + "learning_rate": 9.985656346008727e-06, + "loss": 18.4182, + "step": 2922 + }, + { + "epoch": 0.05343009121319027, + "grad_norm": 8.318073385313161, + "learning_rate": 9.985633931657934e-06, + "loss": 18.0261, + "step": 2923 + }, + { + "epoch": 0.05344837040963679, + "grad_norm": 7.455914868336505, + "learning_rate": 9.985611499832929e-06, + "loss": 17.9919, + "step": 2924 + }, + { + "epoch": 0.053466649606083314, + "grad_norm": 7.73762423130492, + "learning_rate": 9.985589050533787e-06, + "loss": 17.6016, + "step": 2925 + }, + { + "epoch": 0.05348492880252984, + "grad_norm": 7.469676296186834, + "learning_rate": 9.98556658376059e-06, + "loss": 18.2103, + "step": 2926 + }, + { + "epoch": 0.053503207998976364, + "grad_norm": 9.74712854445016, + "learning_rate": 9.985544099513412e-06, + "loss": 18.6005, + "step": 2927 + }, + { + "epoch": 0.053521487195422886, + "grad_norm": 8.990621077448539, + "learning_rate": 9.985521597792336e-06, + "loss": 18.4498, + "step": 2928 + }, + { + "epoch": 0.053539766391869414, + "grad_norm": 7.56040073408485, + "learning_rate": 9.985499078597438e-06, + "loss": 17.9899, + "step": 2929 + }, + { + "epoch": 0.053558045588315936, + "grad_norm": 7.9332027847347515, + "learning_rate": 9.9854765419288e-06, + "loss": 18.2143, + "step": 2930 + }, + { + "epoch": 0.053576324784762465, + "grad_norm": 8.469629512471046, + "learning_rate": 9.985453987786498e-06, + "loss": 18.0529, + "step": 2931 + }, + { + "epoch": 0.05359460398120899, + "grad_norm": 7.188217590980575, + "learning_rate": 9.985431416170615e-06, + "loss": 17.7905, + "step": 2932 + }, + { + "epoch": 0.05361288317765551, + "grad_norm": 7.1093033521734235, + "learning_rate": 9.985408827081225e-06, + "loss": 17.8705, + "step": 2933 + }, + { + "epoch": 0.05363116237410204, + "grad_norm": 7.80175291704277, + "learning_rate": 9.98538622051841e-06, + "loss": 18.183, + "step": 2934 + }, + { + "epoch": 0.05364944157054856, + "grad_norm": 6.68863290206066, + "learning_rate": 9.98536359648225e-06, + "loss": 17.4041, + "step": 2935 + }, + { + "epoch": 0.05366772076699508, + "grad_norm": 9.240501255887352, + "learning_rate": 9.985340954972822e-06, + "loss": 18.7026, + "step": 2936 + }, + { + "epoch": 0.05368599996344161, + "grad_norm": 6.466188007149112, + "learning_rate": 9.985318295990207e-06, + "loss": 17.3034, + "step": 2937 + }, + { + "epoch": 0.05370427915988813, + "grad_norm": 7.41776976246194, + "learning_rate": 9.985295619534482e-06, + "loss": 17.8361, + "step": 2938 + }, + { + "epoch": 0.05372255835633465, + "grad_norm": 7.012147727360407, + "learning_rate": 9.985272925605732e-06, + "loss": 17.793, + "step": 2939 + }, + { + "epoch": 0.05374083755278118, + "grad_norm": 7.991114266886512, + "learning_rate": 9.985250214204032e-06, + "loss": 18.0574, + "step": 2940 + }, + { + "epoch": 0.0537591167492277, + "grad_norm": 9.579076946959471, + "learning_rate": 9.985227485329461e-06, + "loss": 18.7311, + "step": 2941 + }, + { + "epoch": 0.053777395945674225, + "grad_norm": 7.725627071464176, + "learning_rate": 9.985204738982102e-06, + "loss": 17.8584, + "step": 2942 + }, + { + "epoch": 0.053795675142120754, + "grad_norm": 8.69365900468432, + "learning_rate": 9.985181975162032e-06, + "loss": 18.2724, + "step": 2943 + }, + { + "epoch": 0.053813954338567276, + "grad_norm": 9.613125557602531, + "learning_rate": 9.985159193869333e-06, + "loss": 19.1958, + "step": 2944 + }, + { + "epoch": 0.053832233535013804, + "grad_norm": 6.2900735210417436, + "learning_rate": 9.985136395104082e-06, + "loss": 17.5073, + "step": 2945 + }, + { + "epoch": 0.053850512731460326, + "grad_norm": 6.6392678417274755, + "learning_rate": 9.98511357886636e-06, + "loss": 17.6563, + "step": 2946 + }, + { + "epoch": 0.05386879192790685, + "grad_norm": 7.664506189258505, + "learning_rate": 9.985090745156249e-06, + "loss": 17.8886, + "step": 2947 + }, + { + "epoch": 0.053887071124353376, + "grad_norm": 9.27521136601304, + "learning_rate": 9.985067893973828e-06, + "loss": 18.4531, + "step": 2948 + }, + { + "epoch": 0.0539053503207999, + "grad_norm": 10.37789530787545, + "learning_rate": 9.985045025319175e-06, + "loss": 18.9894, + "step": 2949 + }, + { + "epoch": 0.05392362951724642, + "grad_norm": 7.78415898265666, + "learning_rate": 9.985022139192372e-06, + "loss": 17.8419, + "step": 2950 + }, + { + "epoch": 0.05394190871369295, + "grad_norm": 7.510278415092891, + "learning_rate": 9.984999235593502e-06, + "loss": 17.9377, + "step": 2951 + }, + { + "epoch": 0.05396018791013947, + "grad_norm": 6.68026717118842, + "learning_rate": 9.984976314522638e-06, + "loss": 17.4353, + "step": 2952 + }, + { + "epoch": 0.05397846710658599, + "grad_norm": 7.087524709653039, + "learning_rate": 9.984953375979868e-06, + "loss": 17.7807, + "step": 2953 + }, + { + "epoch": 0.05399674630303252, + "grad_norm": 8.411742752976526, + "learning_rate": 9.984930419965266e-06, + "loss": 18.2082, + "step": 2954 + }, + { + "epoch": 0.05401502549947904, + "grad_norm": 7.812448934713648, + "learning_rate": 9.984907446478918e-06, + "loss": 18.1363, + "step": 2955 + }, + { + "epoch": 0.054033304695925564, + "grad_norm": 8.044356207894898, + "learning_rate": 9.9848844555209e-06, + "loss": 18.2735, + "step": 2956 + }, + { + "epoch": 0.05405158389237209, + "grad_norm": 7.736509632554892, + "learning_rate": 9.984861447091296e-06, + "loss": 18.1222, + "step": 2957 + }, + { + "epoch": 0.054069863088818615, + "grad_norm": 5.933792456814279, + "learning_rate": 9.984838421190184e-06, + "loss": 17.1622, + "step": 2958 + }, + { + "epoch": 0.05408814228526514, + "grad_norm": 7.904815188668401, + "learning_rate": 9.984815377817648e-06, + "loss": 17.5206, + "step": 2959 + }, + { + "epoch": 0.054106421481711665, + "grad_norm": 7.295836961563647, + "learning_rate": 9.984792316973765e-06, + "loss": 17.8553, + "step": 2960 + }, + { + "epoch": 0.05412470067815819, + "grad_norm": 8.1689220413142, + "learning_rate": 9.984769238658617e-06, + "loss": 18.0965, + "step": 2961 + }, + { + "epoch": 0.054142979874604716, + "grad_norm": 7.841275180485553, + "learning_rate": 9.984746142872287e-06, + "loss": 18.3405, + "step": 2962 + }, + { + "epoch": 0.05416125907105124, + "grad_norm": 6.116042868616801, + "learning_rate": 9.984723029614853e-06, + "loss": 17.2451, + "step": 2963 + }, + { + "epoch": 0.05417953826749776, + "grad_norm": 7.649454684530826, + "learning_rate": 9.984699898886397e-06, + "loss": 17.9682, + "step": 2964 + }, + { + "epoch": 0.05419781746394429, + "grad_norm": 8.02078081864306, + "learning_rate": 9.984676750687e-06, + "loss": 18.1302, + "step": 2965 + }, + { + "epoch": 0.05421609666039081, + "grad_norm": 6.873682823028608, + "learning_rate": 9.984653585016747e-06, + "loss": 17.6275, + "step": 2966 + }, + { + "epoch": 0.05423437585683733, + "grad_norm": 7.627279055780362, + "learning_rate": 9.984630401875712e-06, + "loss": 17.8492, + "step": 2967 + }, + { + "epoch": 0.05425265505328386, + "grad_norm": 7.2484768612040185, + "learning_rate": 9.984607201263983e-06, + "loss": 17.9409, + "step": 2968 + }, + { + "epoch": 0.05427093424973038, + "grad_norm": 8.53649524945606, + "learning_rate": 9.984583983181634e-06, + "loss": 18.4741, + "step": 2969 + }, + { + "epoch": 0.054289213446176904, + "grad_norm": 7.404241934179543, + "learning_rate": 9.984560747628755e-06, + "loss": 17.9715, + "step": 2970 + }, + { + "epoch": 0.05430749264262343, + "grad_norm": 7.885455730551289, + "learning_rate": 9.984537494605422e-06, + "loss": 18.206, + "step": 2971 + }, + { + "epoch": 0.054325771839069954, + "grad_norm": 7.419783122451107, + "learning_rate": 9.984514224111717e-06, + "loss": 18.0498, + "step": 2972 + }, + { + "epoch": 0.054344051035516476, + "grad_norm": 7.201191579877522, + "learning_rate": 9.984490936147722e-06, + "loss": 17.7067, + "step": 2973 + }, + { + "epoch": 0.054362330231963005, + "grad_norm": 7.263656909714293, + "learning_rate": 9.98446763071352e-06, + "loss": 17.8044, + "step": 2974 + }, + { + "epoch": 0.054380609428409526, + "grad_norm": 7.688869607832282, + "learning_rate": 9.984444307809189e-06, + "loss": 18.0741, + "step": 2975 + }, + { + "epoch": 0.05439888862485605, + "grad_norm": 7.933950074273668, + "learning_rate": 9.984420967434815e-06, + "loss": 18.2206, + "step": 2976 + }, + { + "epoch": 0.05441716782130258, + "grad_norm": 7.689423644462968, + "learning_rate": 9.984397609590478e-06, + "loss": 17.866, + "step": 2977 + }, + { + "epoch": 0.0544354470177491, + "grad_norm": 6.19100681423261, + "learning_rate": 9.98437423427626e-06, + "loss": 17.2061, + "step": 2978 + }, + { + "epoch": 0.05445372621419563, + "grad_norm": 7.220269564326452, + "learning_rate": 9.984350841492243e-06, + "loss": 17.8103, + "step": 2979 + }, + { + "epoch": 0.05447200541064215, + "grad_norm": 7.535332315801858, + "learning_rate": 9.984327431238508e-06, + "loss": 17.9382, + "step": 2980 + }, + { + "epoch": 0.05449028460708867, + "grad_norm": 8.61754436364827, + "learning_rate": 9.984304003515137e-06, + "loss": 17.9783, + "step": 2981 + }, + { + "epoch": 0.0545085638035352, + "grad_norm": 6.6467237653339, + "learning_rate": 9.984280558322215e-06, + "loss": 17.4914, + "step": 2982 + }, + { + "epoch": 0.05452684299998172, + "grad_norm": 8.041593626098898, + "learning_rate": 9.984257095659821e-06, + "loss": 17.8183, + "step": 2983 + }, + { + "epoch": 0.05454512219642824, + "grad_norm": 8.110138118165157, + "learning_rate": 9.98423361552804e-06, + "loss": 17.7319, + "step": 2984 + }, + { + "epoch": 0.05456340139287477, + "grad_norm": 6.615120623978179, + "learning_rate": 9.984210117926952e-06, + "loss": 17.3877, + "step": 2985 + }, + { + "epoch": 0.054581680589321294, + "grad_norm": 7.320762110935072, + "learning_rate": 9.984186602856639e-06, + "loss": 17.9738, + "step": 2986 + }, + { + "epoch": 0.054599959785767815, + "grad_norm": 6.925733595848153, + "learning_rate": 9.984163070317186e-06, + "loss": 17.7418, + "step": 2987 + }, + { + "epoch": 0.054618238982214344, + "grad_norm": 7.202411637507689, + "learning_rate": 9.984139520308672e-06, + "loss": 18.0995, + "step": 2988 + }, + { + "epoch": 0.054636518178660866, + "grad_norm": 7.164653278773729, + "learning_rate": 9.984115952831182e-06, + "loss": 17.8108, + "step": 2989 + }, + { + "epoch": 0.05465479737510739, + "grad_norm": 8.897122619932148, + "learning_rate": 9.9840923678848e-06, + "loss": 18.1692, + "step": 2990 + }, + { + "epoch": 0.054673076571553916, + "grad_norm": 7.030115125998609, + "learning_rate": 9.984068765469603e-06, + "loss": 17.8911, + "step": 2991 + }, + { + "epoch": 0.05469135576800044, + "grad_norm": 7.308291531501065, + "learning_rate": 9.984045145585681e-06, + "loss": 17.8525, + "step": 2992 + }, + { + "epoch": 0.05470963496444696, + "grad_norm": 9.113822979077941, + "learning_rate": 9.984021508233111e-06, + "loss": 18.4587, + "step": 2993 + }, + { + "epoch": 0.05472791416089349, + "grad_norm": 7.429441521159165, + "learning_rate": 9.98399785341198e-06, + "loss": 17.9046, + "step": 2994 + }, + { + "epoch": 0.05474619335734001, + "grad_norm": 6.609227078282274, + "learning_rate": 9.983974181122368e-06, + "loss": 17.3922, + "step": 2995 + }, + { + "epoch": 0.05476447255378654, + "grad_norm": 7.506487250012753, + "learning_rate": 9.98395049136436e-06, + "loss": 17.6925, + "step": 2996 + }, + { + "epoch": 0.05478275175023306, + "grad_norm": 7.4518922699203864, + "learning_rate": 9.983926784138036e-06, + "loss": 17.8667, + "step": 2997 + }, + { + "epoch": 0.05480103094667958, + "grad_norm": 8.356611829728001, + "learning_rate": 9.983903059443482e-06, + "loss": 18.1327, + "step": 2998 + }, + { + "epoch": 0.05481931014312611, + "grad_norm": 8.82366343581557, + "learning_rate": 9.98387931728078e-06, + "loss": 18.3259, + "step": 2999 + }, + { + "epoch": 0.05483758933957263, + "grad_norm": 8.375593874272335, + "learning_rate": 9.983855557650015e-06, + "loss": 18.6367, + "step": 3000 + }, + { + "epoch": 0.054855868536019155, + "grad_norm": 6.387070611131344, + "learning_rate": 9.983831780551268e-06, + "loss": 17.3594, + "step": 3001 + }, + { + "epoch": 0.05487414773246568, + "grad_norm": 7.948890739914074, + "learning_rate": 9.983807985984621e-06, + "loss": 18.0505, + "step": 3002 + }, + { + "epoch": 0.054892426928912205, + "grad_norm": 7.114125273937795, + "learning_rate": 9.983784173950163e-06, + "loss": 17.6408, + "step": 3003 + }, + { + "epoch": 0.05491070612535873, + "grad_norm": 7.006302790073139, + "learning_rate": 9.983760344447972e-06, + "loss": 17.6835, + "step": 3004 + }, + { + "epoch": 0.054928985321805256, + "grad_norm": 7.28263684589425, + "learning_rate": 9.983736497478134e-06, + "loss": 17.6965, + "step": 3005 + }, + { + "epoch": 0.05494726451825178, + "grad_norm": 7.751363796915561, + "learning_rate": 9.983712633040732e-06, + "loss": 17.8543, + "step": 3006 + }, + { + "epoch": 0.0549655437146983, + "grad_norm": 8.07461854526666, + "learning_rate": 9.983688751135849e-06, + "loss": 18.1354, + "step": 3007 + }, + { + "epoch": 0.05498382291114483, + "grad_norm": 8.218066766385554, + "learning_rate": 9.983664851763571e-06, + "loss": 18.3705, + "step": 3008 + }, + { + "epoch": 0.05500210210759135, + "grad_norm": 8.192671558395386, + "learning_rate": 9.983640934923977e-06, + "loss": 18.5413, + "step": 3009 + }, + { + "epoch": 0.05502038130403787, + "grad_norm": 7.698426913627192, + "learning_rate": 9.983617000617157e-06, + "loss": 17.7436, + "step": 3010 + }, + { + "epoch": 0.0550386605004844, + "grad_norm": 7.305591498734186, + "learning_rate": 9.98359304884319e-06, + "loss": 17.8984, + "step": 3011 + }, + { + "epoch": 0.05505693969693092, + "grad_norm": 7.462744730812517, + "learning_rate": 9.983569079602163e-06, + "loss": 17.8616, + "step": 3012 + }, + { + "epoch": 0.05507521889337745, + "grad_norm": 9.204157449070113, + "learning_rate": 9.983545092894158e-06, + "loss": 18.6582, + "step": 3013 + }, + { + "epoch": 0.05509349808982397, + "grad_norm": 6.781635136989092, + "learning_rate": 9.983521088719262e-06, + "loss": 17.5635, + "step": 3014 + }, + { + "epoch": 0.055111777286270494, + "grad_norm": 7.529536812907106, + "learning_rate": 9.983497067077554e-06, + "loss": 17.8964, + "step": 3015 + }, + { + "epoch": 0.05513005648271702, + "grad_norm": 8.508994685012931, + "learning_rate": 9.983473027969122e-06, + "loss": 18.6022, + "step": 3016 + }, + { + "epoch": 0.055148335679163545, + "grad_norm": 7.681556406311905, + "learning_rate": 9.983448971394051e-06, + "loss": 17.9823, + "step": 3017 + }, + { + "epoch": 0.055166614875610066, + "grad_norm": 7.653760825345166, + "learning_rate": 9.983424897352422e-06, + "loss": 18.1274, + "step": 3018 + }, + { + "epoch": 0.055184894072056595, + "grad_norm": 7.305335009265789, + "learning_rate": 9.983400805844324e-06, + "loss": 18.0893, + "step": 3019 + }, + { + "epoch": 0.05520317326850312, + "grad_norm": 5.745154385526005, + "learning_rate": 9.983376696869836e-06, + "loss": 17.3378, + "step": 3020 + }, + { + "epoch": 0.05522145246494964, + "grad_norm": 6.646640837725589, + "learning_rate": 9.983352570429046e-06, + "loss": 17.5679, + "step": 3021 + }, + { + "epoch": 0.05523973166139617, + "grad_norm": 7.178155182805772, + "learning_rate": 9.983328426522036e-06, + "loss": 17.7329, + "step": 3022 + }, + { + "epoch": 0.05525801085784269, + "grad_norm": 8.229139627854392, + "learning_rate": 9.983304265148894e-06, + "loss": 18.2517, + "step": 3023 + }, + { + "epoch": 0.05527629005428921, + "grad_norm": 7.105184523923117, + "learning_rate": 9.983280086309703e-06, + "loss": 17.9693, + "step": 3024 + }, + { + "epoch": 0.05529456925073574, + "grad_norm": 7.504820287503861, + "learning_rate": 9.983255890004548e-06, + "loss": 18.0733, + "step": 3025 + }, + { + "epoch": 0.05531284844718226, + "grad_norm": 8.121601282141208, + "learning_rate": 9.983231676233513e-06, + "loss": 18.4775, + "step": 3026 + }, + { + "epoch": 0.05533112764362878, + "grad_norm": 7.522589727206308, + "learning_rate": 9.983207444996682e-06, + "loss": 17.9586, + "step": 3027 + }, + { + "epoch": 0.05534940684007531, + "grad_norm": 8.58402787228452, + "learning_rate": 9.983183196294144e-06, + "loss": 18.5355, + "step": 3028 + }, + { + "epoch": 0.05536768603652183, + "grad_norm": 6.920000719747553, + "learning_rate": 9.98315893012598e-06, + "loss": 17.6501, + "step": 3029 + }, + { + "epoch": 0.05538596523296836, + "grad_norm": 6.34494087197748, + "learning_rate": 9.983134646492277e-06, + "loss": 17.4743, + "step": 3030 + }, + { + "epoch": 0.055404244429414884, + "grad_norm": 9.061419916247212, + "learning_rate": 9.983110345393119e-06, + "loss": 18.9192, + "step": 3031 + }, + { + "epoch": 0.055422523625861406, + "grad_norm": 7.2914007617061145, + "learning_rate": 9.983086026828592e-06, + "loss": 17.5493, + "step": 3032 + }, + { + "epoch": 0.055440802822307934, + "grad_norm": 7.286057303199407, + "learning_rate": 9.983061690798782e-06, + "loss": 17.8788, + "step": 3033 + }, + { + "epoch": 0.055459082018754456, + "grad_norm": 7.314665473423591, + "learning_rate": 9.98303733730377e-06, + "loss": 18.1814, + "step": 3034 + }, + { + "epoch": 0.05547736121520098, + "grad_norm": 8.666248371560606, + "learning_rate": 9.983012966343645e-06, + "loss": 18.5522, + "step": 3035 + }, + { + "epoch": 0.05549564041164751, + "grad_norm": 6.389428773229833, + "learning_rate": 9.982988577918494e-06, + "loss": 17.7468, + "step": 3036 + }, + { + "epoch": 0.05551391960809403, + "grad_norm": 6.422842320255038, + "learning_rate": 9.9829641720284e-06, + "loss": 17.5976, + "step": 3037 + }, + { + "epoch": 0.05553219880454055, + "grad_norm": 8.907083855297007, + "learning_rate": 9.982939748673447e-06, + "loss": 18.4818, + "step": 3038 + }, + { + "epoch": 0.05555047800098708, + "grad_norm": 6.595998114701068, + "learning_rate": 9.982915307853725e-06, + "loss": 17.5303, + "step": 3039 + }, + { + "epoch": 0.0555687571974336, + "grad_norm": 8.342567757686064, + "learning_rate": 9.982890849569318e-06, + "loss": 18.3321, + "step": 3040 + }, + { + "epoch": 0.05558703639388012, + "grad_norm": 7.3346381271810825, + "learning_rate": 9.982866373820308e-06, + "loss": 17.9998, + "step": 3041 + }, + { + "epoch": 0.05560531559032665, + "grad_norm": 8.02451992770075, + "learning_rate": 9.982841880606786e-06, + "loss": 17.886, + "step": 3042 + }, + { + "epoch": 0.05562359478677317, + "grad_norm": 6.86313206794148, + "learning_rate": 9.982817369928834e-06, + "loss": 17.8138, + "step": 3043 + }, + { + "epoch": 0.055641873983219695, + "grad_norm": 7.792835334586353, + "learning_rate": 9.98279284178654e-06, + "loss": 18.1808, + "step": 3044 + }, + { + "epoch": 0.05566015317966622, + "grad_norm": 8.365750307803037, + "learning_rate": 9.982768296179989e-06, + "loss": 18.1171, + "step": 3045 + }, + { + "epoch": 0.055678432376112745, + "grad_norm": 9.19430205300137, + "learning_rate": 9.98274373310927e-06, + "loss": 18.6058, + "step": 3046 + }, + { + "epoch": 0.055696711572559274, + "grad_norm": 9.578064016682275, + "learning_rate": 9.982719152574465e-06, + "loss": 18.844, + "step": 3047 + }, + { + "epoch": 0.055714990769005796, + "grad_norm": 7.766110325583367, + "learning_rate": 9.982694554575661e-06, + "loss": 18.0555, + "step": 3048 + }, + { + "epoch": 0.05573326996545232, + "grad_norm": 8.788885331065469, + "learning_rate": 9.982669939112946e-06, + "loss": 18.2255, + "step": 3049 + }, + { + "epoch": 0.055751549161898846, + "grad_norm": 6.953589933728604, + "learning_rate": 9.982645306186405e-06, + "loss": 17.6454, + "step": 3050 + }, + { + "epoch": 0.05576982835834537, + "grad_norm": 8.833252919081332, + "learning_rate": 9.982620655796123e-06, + "loss": 18.5215, + "step": 3051 + }, + { + "epoch": 0.05578810755479189, + "grad_norm": 7.84034395593779, + "learning_rate": 9.98259598794219e-06, + "loss": 18.1439, + "step": 3052 + }, + { + "epoch": 0.05580638675123842, + "grad_norm": 6.8936031465011185, + "learning_rate": 9.982571302624691e-06, + "loss": 17.6117, + "step": 3053 + }, + { + "epoch": 0.05582466594768494, + "grad_norm": 7.101308281568558, + "learning_rate": 9.982546599843709e-06, + "loss": 17.7474, + "step": 3054 + }, + { + "epoch": 0.05584294514413146, + "grad_norm": 9.115871047523548, + "learning_rate": 9.982521879599337e-06, + "loss": 18.3247, + "step": 3055 + }, + { + "epoch": 0.05586122434057799, + "grad_norm": 7.753280953249115, + "learning_rate": 9.982497141891656e-06, + "loss": 17.8628, + "step": 3056 + }, + { + "epoch": 0.05587950353702451, + "grad_norm": 8.13964987987562, + "learning_rate": 9.982472386720754e-06, + "loss": 18.2396, + "step": 3057 + }, + { + "epoch": 0.055897782733471034, + "grad_norm": 7.772282244173961, + "learning_rate": 9.982447614086721e-06, + "loss": 18.3326, + "step": 3058 + }, + { + "epoch": 0.05591606192991756, + "grad_norm": 8.526465003179315, + "learning_rate": 9.98242282398964e-06, + "loss": 18.188, + "step": 3059 + }, + { + "epoch": 0.055934341126364084, + "grad_norm": 6.492358861107072, + "learning_rate": 9.982398016429599e-06, + "loss": 17.7256, + "step": 3060 + }, + { + "epoch": 0.055952620322810606, + "grad_norm": 7.825082106363907, + "learning_rate": 9.982373191406687e-06, + "loss": 17.8356, + "step": 3061 + }, + { + "epoch": 0.055970899519257135, + "grad_norm": 7.567674437585254, + "learning_rate": 9.982348348920988e-06, + "loss": 17.7985, + "step": 3062 + }, + { + "epoch": 0.05598917871570366, + "grad_norm": 7.691814573161281, + "learning_rate": 9.982323488972592e-06, + "loss": 18.0445, + "step": 3063 + }, + { + "epoch": 0.056007457912150185, + "grad_norm": 8.31240527788603, + "learning_rate": 9.982298611561583e-06, + "loss": 18.3168, + "step": 3064 + }, + { + "epoch": 0.05602573710859671, + "grad_norm": 9.123638086422385, + "learning_rate": 9.982273716688048e-06, + "loss": 18.5168, + "step": 3065 + }, + { + "epoch": 0.05604401630504323, + "grad_norm": 7.429816320145203, + "learning_rate": 9.982248804352079e-06, + "loss": 17.961, + "step": 3066 + }, + { + "epoch": 0.05606229550148976, + "grad_norm": 7.028700829005101, + "learning_rate": 9.98222387455376e-06, + "loss": 17.6662, + "step": 3067 + }, + { + "epoch": 0.05608057469793628, + "grad_norm": 7.397826548373838, + "learning_rate": 9.982198927293177e-06, + "loss": 17.9261, + "step": 3068 + }, + { + "epoch": 0.0560988538943828, + "grad_norm": 7.661716198505955, + "learning_rate": 9.98217396257042e-06, + "loss": 17.9355, + "step": 3069 + }, + { + "epoch": 0.05611713309082933, + "grad_norm": 7.724760272102188, + "learning_rate": 9.982148980385576e-06, + "loss": 18.0717, + "step": 3070 + }, + { + "epoch": 0.05613541228727585, + "grad_norm": 6.8711206059020995, + "learning_rate": 9.982123980738731e-06, + "loss": 17.8262, + "step": 3071 + }, + { + "epoch": 0.05615369148372237, + "grad_norm": 8.514880584015112, + "learning_rate": 9.982098963629975e-06, + "loss": 18.3857, + "step": 3072 + }, + { + "epoch": 0.0561719706801689, + "grad_norm": 6.537398221025665, + "learning_rate": 9.982073929059394e-06, + "loss": 17.4544, + "step": 3073 + }, + { + "epoch": 0.056190249876615424, + "grad_norm": 7.224619991155388, + "learning_rate": 9.982048877027077e-06, + "loss": 17.7508, + "step": 3074 + }, + { + "epoch": 0.056208529073061946, + "grad_norm": 8.93773857365674, + "learning_rate": 9.98202380753311e-06, + "loss": 18.9491, + "step": 3075 + }, + { + "epoch": 0.056226808269508474, + "grad_norm": 7.119907582320052, + "learning_rate": 9.98199872057758e-06, + "loss": 17.6432, + "step": 3076 + }, + { + "epoch": 0.056245087465954996, + "grad_norm": 7.169995182665816, + "learning_rate": 9.98197361616058e-06, + "loss": 17.4187, + "step": 3077 + }, + { + "epoch": 0.05626336666240152, + "grad_norm": 7.795123421903371, + "learning_rate": 9.981948494282195e-06, + "loss": 18.0035, + "step": 3078 + }, + { + "epoch": 0.056281645858848046, + "grad_norm": 7.726347244191609, + "learning_rate": 9.98192335494251e-06, + "loss": 18.0938, + "step": 3079 + }, + { + "epoch": 0.05629992505529457, + "grad_norm": 6.6654710399569606, + "learning_rate": 9.98189819814162e-06, + "loss": 17.3324, + "step": 3080 + }, + { + "epoch": 0.0563182042517411, + "grad_norm": 8.028342189665967, + "learning_rate": 9.981873023879605e-06, + "loss": 17.6997, + "step": 3081 + }, + { + "epoch": 0.05633648344818762, + "grad_norm": 7.351889325695132, + "learning_rate": 9.981847832156559e-06, + "loss": 17.9691, + "step": 3082 + }, + { + "epoch": 0.05635476264463414, + "grad_norm": 6.877054372041403, + "learning_rate": 9.981822622972568e-06, + "loss": 17.6384, + "step": 3083 + }, + { + "epoch": 0.05637304184108067, + "grad_norm": 6.459901030651059, + "learning_rate": 9.981797396327722e-06, + "loss": 17.3577, + "step": 3084 + }, + { + "epoch": 0.05639132103752719, + "grad_norm": 7.909241642474109, + "learning_rate": 9.981772152222109e-06, + "loss": 18.504, + "step": 3085 + }, + { + "epoch": 0.05640960023397371, + "grad_norm": 7.433748334982519, + "learning_rate": 9.981746890655815e-06, + "loss": 17.7713, + "step": 3086 + }, + { + "epoch": 0.05642787943042024, + "grad_norm": 6.659830404372923, + "learning_rate": 9.981721611628932e-06, + "loss": 17.4545, + "step": 3087 + }, + { + "epoch": 0.05644615862686676, + "grad_norm": 7.765799042403962, + "learning_rate": 9.981696315141546e-06, + "loss": 17.8737, + "step": 3088 + }, + { + "epoch": 0.056464437823313285, + "grad_norm": 8.867682423038241, + "learning_rate": 9.981671001193748e-06, + "loss": 18.1657, + "step": 3089 + }, + { + "epoch": 0.056482717019759814, + "grad_norm": 6.237795543665309, + "learning_rate": 9.981645669785624e-06, + "loss": 17.1925, + "step": 3090 + }, + { + "epoch": 0.056500996216206335, + "grad_norm": 6.925673048313482, + "learning_rate": 9.981620320917264e-06, + "loss": 17.7825, + "step": 3091 + }, + { + "epoch": 0.05651927541265286, + "grad_norm": 7.10750480705101, + "learning_rate": 9.981594954588759e-06, + "loss": 17.7329, + "step": 3092 + }, + { + "epoch": 0.056537554609099386, + "grad_norm": 7.388764822206102, + "learning_rate": 9.981569570800194e-06, + "loss": 17.9458, + "step": 3093 + }, + { + "epoch": 0.05655583380554591, + "grad_norm": 7.695452348366865, + "learning_rate": 9.98154416955166e-06, + "loss": 17.8532, + "step": 3094 + }, + { + "epoch": 0.05657411300199243, + "grad_norm": 7.685635568854148, + "learning_rate": 9.981518750843247e-06, + "loss": 17.948, + "step": 3095 + }, + { + "epoch": 0.05659239219843896, + "grad_norm": 8.262705401974944, + "learning_rate": 9.981493314675044e-06, + "loss": 18.4305, + "step": 3096 + }, + { + "epoch": 0.05661067139488548, + "grad_norm": 8.344981147862454, + "learning_rate": 9.981467861047137e-06, + "loss": 18.1224, + "step": 3097 + }, + { + "epoch": 0.05662895059133201, + "grad_norm": 7.611113823112375, + "learning_rate": 9.981442389959619e-06, + "loss": 17.7372, + "step": 3098 + }, + { + "epoch": 0.05664722978777853, + "grad_norm": 8.82550241492585, + "learning_rate": 9.981416901412577e-06, + "loss": 17.7954, + "step": 3099 + }, + { + "epoch": 0.05666550898422505, + "grad_norm": 6.426091669920725, + "learning_rate": 9.9813913954061e-06, + "loss": 17.4338, + "step": 3100 + }, + { + "epoch": 0.05668378818067158, + "grad_norm": 7.6781511310715445, + "learning_rate": 9.981365871940281e-06, + "loss": 17.8858, + "step": 3101 + }, + { + "epoch": 0.0567020673771181, + "grad_norm": 8.51942401916345, + "learning_rate": 9.981340331015205e-06, + "loss": 18.1104, + "step": 3102 + }, + { + "epoch": 0.056720346573564624, + "grad_norm": 7.303153742323697, + "learning_rate": 9.981314772630963e-06, + "loss": 17.8026, + "step": 3103 + }, + { + "epoch": 0.05673862577001115, + "grad_norm": 7.70273749115573, + "learning_rate": 9.981289196787646e-06, + "loss": 17.8407, + "step": 3104 + }, + { + "epoch": 0.056756904966457675, + "grad_norm": 7.384810357472592, + "learning_rate": 9.981263603485343e-06, + "loss": 17.916, + "step": 3105 + }, + { + "epoch": 0.056775184162904196, + "grad_norm": 6.9427720875237915, + "learning_rate": 9.981237992724142e-06, + "loss": 17.5603, + "step": 3106 + }, + { + "epoch": 0.056793463359350725, + "grad_norm": 7.586135612857699, + "learning_rate": 9.981212364504135e-06, + "loss": 17.96, + "step": 3107 + }, + { + "epoch": 0.05681174255579725, + "grad_norm": 6.941673895097665, + "learning_rate": 9.98118671882541e-06, + "loss": 17.7614, + "step": 3108 + }, + { + "epoch": 0.05683002175224377, + "grad_norm": 7.669828434678138, + "learning_rate": 9.98116105568806e-06, + "loss": 18.0786, + "step": 3109 + }, + { + "epoch": 0.0568483009486903, + "grad_norm": 8.214138452932188, + "learning_rate": 9.98113537509217e-06, + "loss": 18.5006, + "step": 3110 + }, + { + "epoch": 0.05686658014513682, + "grad_norm": 8.769302543947646, + "learning_rate": 9.981109677037834e-06, + "loss": 18.7026, + "step": 3111 + }, + { + "epoch": 0.05688485934158334, + "grad_norm": 6.736288333953572, + "learning_rate": 9.981083961525142e-06, + "loss": 17.7235, + "step": 3112 + }, + { + "epoch": 0.05690313853802987, + "grad_norm": 7.525238302445102, + "learning_rate": 9.981058228554182e-06, + "loss": 17.7236, + "step": 3113 + }, + { + "epoch": 0.05692141773447639, + "grad_norm": 8.304692348974578, + "learning_rate": 9.981032478125044e-06, + "loss": 18.3424, + "step": 3114 + }, + { + "epoch": 0.05693969693092292, + "grad_norm": 7.416249290681871, + "learning_rate": 9.981006710237822e-06, + "loss": 17.8649, + "step": 3115 + }, + { + "epoch": 0.05695797612736944, + "grad_norm": 7.713644412755266, + "learning_rate": 9.9809809248926e-06, + "loss": 18.1338, + "step": 3116 + }, + { + "epoch": 0.056976255323815964, + "grad_norm": 8.090571385668342, + "learning_rate": 9.980955122089476e-06, + "loss": 17.7447, + "step": 3117 + }, + { + "epoch": 0.05699453452026249, + "grad_norm": 6.579862160274033, + "learning_rate": 9.980929301828533e-06, + "loss": 17.3605, + "step": 3118 + }, + { + "epoch": 0.057012813716709014, + "grad_norm": 6.459900712847999, + "learning_rate": 9.980903464109868e-06, + "loss": 17.5769, + "step": 3119 + }, + { + "epoch": 0.057031092913155536, + "grad_norm": 8.30606420475151, + "learning_rate": 9.980877608933566e-06, + "loss": 18.0798, + "step": 3120 + }, + { + "epoch": 0.057049372109602065, + "grad_norm": 7.7536226712048295, + "learning_rate": 9.98085173629972e-06, + "loss": 18.4216, + "step": 3121 + }, + { + "epoch": 0.057067651306048586, + "grad_norm": 6.864119640927641, + "learning_rate": 9.980825846208424e-06, + "loss": 17.6863, + "step": 3122 + }, + { + "epoch": 0.05708593050249511, + "grad_norm": 8.094039835281302, + "learning_rate": 9.980799938659764e-06, + "loss": 18.0637, + "step": 3123 + }, + { + "epoch": 0.05710420969894164, + "grad_norm": 9.114359137978889, + "learning_rate": 9.980774013653834e-06, + "loss": 18.836, + "step": 3124 + }, + { + "epoch": 0.05712248889538816, + "grad_norm": 6.761714260561507, + "learning_rate": 9.980748071190721e-06, + "loss": 17.8163, + "step": 3125 + }, + { + "epoch": 0.05714076809183468, + "grad_norm": 7.915229109343791, + "learning_rate": 9.980722111270518e-06, + "loss": 18.1644, + "step": 3126 + }, + { + "epoch": 0.05715904728828121, + "grad_norm": 7.696451437354202, + "learning_rate": 9.980696133893317e-06, + "loss": 17.7345, + "step": 3127 + }, + { + "epoch": 0.05717732648472773, + "grad_norm": 8.703426742138486, + "learning_rate": 9.98067013905921e-06, + "loss": 18.429, + "step": 3128 + }, + { + "epoch": 0.05719560568117425, + "grad_norm": 8.743940883294867, + "learning_rate": 9.980644126768283e-06, + "loss": 18.8959, + "step": 3129 + }, + { + "epoch": 0.05721388487762078, + "grad_norm": 8.240905753578073, + "learning_rate": 9.980618097020634e-06, + "loss": 18.2815, + "step": 3130 + }, + { + "epoch": 0.0572321640740673, + "grad_norm": 7.306110914231443, + "learning_rate": 9.98059204981635e-06, + "loss": 17.9447, + "step": 3131 + }, + { + "epoch": 0.05725044327051383, + "grad_norm": 8.189961498568708, + "learning_rate": 9.98056598515552e-06, + "loss": 18.007, + "step": 3132 + }, + { + "epoch": 0.05726872246696035, + "grad_norm": 6.871746975009577, + "learning_rate": 9.980539903038241e-06, + "loss": 17.5626, + "step": 3133 + }, + { + "epoch": 0.057287001663406875, + "grad_norm": 7.01148624787496, + "learning_rate": 9.980513803464602e-06, + "loss": 17.7858, + "step": 3134 + }, + { + "epoch": 0.057305280859853404, + "grad_norm": 7.346460217629107, + "learning_rate": 9.980487686434694e-06, + "loss": 17.4628, + "step": 3135 + }, + { + "epoch": 0.057323560056299926, + "grad_norm": 7.5282863277135865, + "learning_rate": 9.980461551948609e-06, + "loss": 18.0323, + "step": 3136 + }, + { + "epoch": 0.05734183925274645, + "grad_norm": 8.097465864323018, + "learning_rate": 9.980435400006436e-06, + "loss": 18.2426, + "step": 3137 + }, + { + "epoch": 0.057360118449192976, + "grad_norm": 7.932203910254175, + "learning_rate": 9.980409230608272e-06, + "loss": 17.9906, + "step": 3138 + }, + { + "epoch": 0.0573783976456395, + "grad_norm": 6.793077500662868, + "learning_rate": 9.980383043754206e-06, + "loss": 17.822, + "step": 3139 + }, + { + "epoch": 0.05739667684208602, + "grad_norm": 7.4046220582163, + "learning_rate": 9.980356839444328e-06, + "loss": 18.009, + "step": 3140 + }, + { + "epoch": 0.05741495603853255, + "grad_norm": 7.34207226631687, + "learning_rate": 9.980330617678731e-06, + "loss": 17.8602, + "step": 3141 + }, + { + "epoch": 0.05743323523497907, + "grad_norm": 8.544845945713869, + "learning_rate": 9.980304378457508e-06, + "loss": 18.5323, + "step": 3142 + }, + { + "epoch": 0.05745151443142559, + "grad_norm": 7.644146605020451, + "learning_rate": 9.98027812178075e-06, + "loss": 17.9759, + "step": 3143 + }, + { + "epoch": 0.05746979362787212, + "grad_norm": 7.028547691424313, + "learning_rate": 9.980251847648551e-06, + "loss": 17.8357, + "step": 3144 + }, + { + "epoch": 0.05748807282431864, + "grad_norm": 7.782382990392942, + "learning_rate": 9.980225556061e-06, + "loss": 18.0765, + "step": 3145 + }, + { + "epoch": 0.057506352020765164, + "grad_norm": 7.8146244170079955, + "learning_rate": 9.980199247018193e-06, + "loss": 18.1027, + "step": 3146 + }, + { + "epoch": 0.05752463121721169, + "grad_norm": 7.558449598618881, + "learning_rate": 9.980172920520216e-06, + "loss": 18.4348, + "step": 3147 + }, + { + "epoch": 0.057542910413658215, + "grad_norm": 8.008248929861436, + "learning_rate": 9.980146576567167e-06, + "loss": 18.2474, + "step": 3148 + }, + { + "epoch": 0.05756118961010474, + "grad_norm": 6.320683275914581, + "learning_rate": 9.980120215159138e-06, + "loss": 17.4358, + "step": 3149 + }, + { + "epoch": 0.057579468806551265, + "grad_norm": 7.500021336597586, + "learning_rate": 9.980093836296216e-06, + "loss": 18.0094, + "step": 3150 + }, + { + "epoch": 0.05759774800299779, + "grad_norm": 6.889861023698829, + "learning_rate": 9.9800674399785e-06, + "loss": 17.8159, + "step": 3151 + }, + { + "epoch": 0.057616027199444315, + "grad_norm": 8.129009473177554, + "learning_rate": 9.98004102620608e-06, + "loss": 18.1608, + "step": 3152 + }, + { + "epoch": 0.05763430639589084, + "grad_norm": 8.48684312755742, + "learning_rate": 9.980014594979047e-06, + "loss": 18.0661, + "step": 3153 + }, + { + "epoch": 0.05765258559233736, + "grad_norm": 7.411642195765292, + "learning_rate": 9.979988146297494e-06, + "loss": 17.8226, + "step": 3154 + }, + { + "epoch": 0.05767086478878389, + "grad_norm": 8.940883368045336, + "learning_rate": 9.979961680161517e-06, + "loss": 18.8211, + "step": 3155 + }, + { + "epoch": 0.05768914398523041, + "grad_norm": 7.436304395529918, + "learning_rate": 9.979935196571207e-06, + "loss": 17.8067, + "step": 3156 + }, + { + "epoch": 0.05770742318167693, + "grad_norm": 7.192700013865126, + "learning_rate": 9.979908695526655e-06, + "loss": 17.8254, + "step": 3157 + }, + { + "epoch": 0.05772570237812346, + "grad_norm": 7.848188521253715, + "learning_rate": 9.979882177027955e-06, + "loss": 17.769, + "step": 3158 + }, + { + "epoch": 0.05774398157456998, + "grad_norm": 7.25186655023415, + "learning_rate": 9.9798556410752e-06, + "loss": 17.7228, + "step": 3159 + }, + { + "epoch": 0.0577622607710165, + "grad_norm": 9.492483714987832, + "learning_rate": 9.979829087668483e-06, + "loss": 18.5494, + "step": 3160 + }, + { + "epoch": 0.05778053996746303, + "grad_norm": 8.523093300760328, + "learning_rate": 9.979802516807897e-06, + "loss": 18.058, + "step": 3161 + }, + { + "epoch": 0.057798819163909554, + "grad_norm": 9.087293568614113, + "learning_rate": 9.979775928493536e-06, + "loss": 18.4453, + "step": 3162 + }, + { + "epoch": 0.057817098360356076, + "grad_norm": 7.340983964773885, + "learning_rate": 9.979749322725492e-06, + "loss": 17.9543, + "step": 3163 + }, + { + "epoch": 0.057835377556802604, + "grad_norm": 8.117419409975199, + "learning_rate": 9.979722699503859e-06, + "loss": 18.4618, + "step": 3164 + }, + { + "epoch": 0.057853656753249126, + "grad_norm": 8.060086976673936, + "learning_rate": 9.97969605882873e-06, + "loss": 18.0255, + "step": 3165 + }, + { + "epoch": 0.057871935949695655, + "grad_norm": 6.691453776028693, + "learning_rate": 9.979669400700198e-06, + "loss": 17.5696, + "step": 3166 + }, + { + "epoch": 0.05789021514614218, + "grad_norm": 7.723493050490721, + "learning_rate": 9.979642725118358e-06, + "loss": 18.1638, + "step": 3167 + }, + { + "epoch": 0.0579084943425887, + "grad_norm": 7.910569492674526, + "learning_rate": 9.979616032083301e-06, + "loss": 18.0093, + "step": 3168 + }, + { + "epoch": 0.05792677353903523, + "grad_norm": 7.362247485355764, + "learning_rate": 9.979589321595123e-06, + "loss": 17.5434, + "step": 3169 + }, + { + "epoch": 0.05794505273548175, + "grad_norm": 7.540447907965872, + "learning_rate": 9.979562593653916e-06, + "loss": 18.1084, + "step": 3170 + }, + { + "epoch": 0.05796333193192827, + "grad_norm": 7.413722488365144, + "learning_rate": 9.979535848259775e-06, + "loss": 17.7816, + "step": 3171 + }, + { + "epoch": 0.0579816111283748, + "grad_norm": 8.349769595748214, + "learning_rate": 9.979509085412793e-06, + "loss": 18.3193, + "step": 3172 + }, + { + "epoch": 0.05799989032482132, + "grad_norm": 7.2552155432041365, + "learning_rate": 9.979482305113062e-06, + "loss": 17.6679, + "step": 3173 + }, + { + "epoch": 0.05801816952126784, + "grad_norm": 9.039718750437268, + "learning_rate": 9.979455507360679e-06, + "loss": 18.6886, + "step": 3174 + }, + { + "epoch": 0.05803644871771437, + "grad_norm": 6.273050204214056, + "learning_rate": 9.979428692155737e-06, + "loss": 17.3761, + "step": 3175 + }, + { + "epoch": 0.05805472791416089, + "grad_norm": 6.715594056949398, + "learning_rate": 9.979401859498327e-06, + "loss": 17.3928, + "step": 3176 + }, + { + "epoch": 0.058073007110607415, + "grad_norm": 6.650037070977743, + "learning_rate": 9.979375009388548e-06, + "loss": 17.7325, + "step": 3177 + }, + { + "epoch": 0.058091286307053944, + "grad_norm": 6.673201876651628, + "learning_rate": 9.979348141826491e-06, + "loss": 17.5246, + "step": 3178 + }, + { + "epoch": 0.058109565503500465, + "grad_norm": 7.527923065096524, + "learning_rate": 9.979321256812252e-06, + "loss": 18.2962, + "step": 3179 + }, + { + "epoch": 0.05812784469994699, + "grad_norm": 9.008414939763789, + "learning_rate": 9.979294354345923e-06, + "loss": 18.6181, + "step": 3180 + }, + { + "epoch": 0.058146123896393516, + "grad_norm": 7.737827894858357, + "learning_rate": 9.979267434427599e-06, + "loss": 17.8865, + "step": 3181 + }, + { + "epoch": 0.05816440309284004, + "grad_norm": 7.435350935716762, + "learning_rate": 9.979240497057374e-06, + "loss": 18.1134, + "step": 3182 + }, + { + "epoch": 0.058182682289286566, + "grad_norm": 6.392868758180979, + "learning_rate": 9.979213542235346e-06, + "loss": 17.4657, + "step": 3183 + }, + { + "epoch": 0.05820096148573309, + "grad_norm": 8.061051245157545, + "learning_rate": 9.979186569961603e-06, + "loss": 18.4981, + "step": 3184 + }, + { + "epoch": 0.05821924068217961, + "grad_norm": 7.574884996174679, + "learning_rate": 9.979159580236246e-06, + "loss": 17.8623, + "step": 3185 + }, + { + "epoch": 0.05823751987862614, + "grad_norm": 6.972585575102587, + "learning_rate": 9.979132573059366e-06, + "loss": 17.7043, + "step": 3186 + }, + { + "epoch": 0.05825579907507266, + "grad_norm": 8.01277178504557, + "learning_rate": 9.979105548431058e-06, + "loss": 18.2272, + "step": 3187 + }, + { + "epoch": 0.05827407827151918, + "grad_norm": 6.883657298753507, + "learning_rate": 9.979078506351418e-06, + "loss": 17.5574, + "step": 3188 + }, + { + "epoch": 0.05829235746796571, + "grad_norm": 8.252378597440396, + "learning_rate": 9.979051446820539e-06, + "loss": 18.0717, + "step": 3189 + }, + { + "epoch": 0.05831063666441223, + "grad_norm": 9.18754927192144, + "learning_rate": 9.979024369838516e-06, + "loss": 18.5159, + "step": 3190 + }, + { + "epoch": 0.058328915860858754, + "grad_norm": 7.617789065932563, + "learning_rate": 9.978997275405447e-06, + "loss": 18.0219, + "step": 3191 + }, + { + "epoch": 0.05834719505730528, + "grad_norm": 5.51650755113365, + "learning_rate": 9.978970163521422e-06, + "loss": 17.0512, + "step": 3192 + }, + { + "epoch": 0.058365474253751805, + "grad_norm": 7.808301822700816, + "learning_rate": 9.978943034186539e-06, + "loss": 17.8967, + "step": 3193 + }, + { + "epoch": 0.05838375345019833, + "grad_norm": 8.157376624839548, + "learning_rate": 9.978915887400894e-06, + "loss": 18.2762, + "step": 3194 + }, + { + "epoch": 0.058402032646644855, + "grad_norm": 7.530590281296273, + "learning_rate": 9.978888723164581e-06, + "loss": 17.754, + "step": 3195 + }, + { + "epoch": 0.05842031184309138, + "grad_norm": 8.107889478601699, + "learning_rate": 9.978861541477694e-06, + "loss": 18.3321, + "step": 3196 + }, + { + "epoch": 0.0584385910395379, + "grad_norm": 7.672274118425343, + "learning_rate": 9.978834342340329e-06, + "loss": 17.8744, + "step": 3197 + }, + { + "epoch": 0.05845687023598443, + "grad_norm": 7.50720109128021, + "learning_rate": 9.978807125752582e-06, + "loss": 18.3519, + "step": 3198 + }, + { + "epoch": 0.05847514943243095, + "grad_norm": 8.526593217492945, + "learning_rate": 9.97877989171455e-06, + "loss": 18.2489, + "step": 3199 + }, + { + "epoch": 0.05849342862887748, + "grad_norm": 9.441229667642398, + "learning_rate": 9.978752640226325e-06, + "loss": 19.2197, + "step": 3200 + }, + { + "epoch": 0.058511707825324, + "grad_norm": 8.319443367985162, + "learning_rate": 9.978725371288004e-06, + "loss": 18.3359, + "step": 3201 + }, + { + "epoch": 0.05852998702177052, + "grad_norm": 8.011564051634045, + "learning_rate": 9.978698084899682e-06, + "loss": 18.2131, + "step": 3202 + }, + { + "epoch": 0.05854826621821705, + "grad_norm": 7.764131198002185, + "learning_rate": 9.978670781061457e-06, + "loss": 18.3141, + "step": 3203 + }, + { + "epoch": 0.05856654541466357, + "grad_norm": 7.6742758845209735, + "learning_rate": 9.97864345977342e-06, + "loss": 18.1446, + "step": 3204 + }, + { + "epoch": 0.058584824611110094, + "grad_norm": 7.481482628458801, + "learning_rate": 9.978616121035672e-06, + "loss": 18.2536, + "step": 3205 + }, + { + "epoch": 0.05860310380755662, + "grad_norm": 8.053813882400595, + "learning_rate": 9.978588764848307e-06, + "loss": 17.7258, + "step": 3206 + }, + { + "epoch": 0.058621383004003144, + "grad_norm": 9.181886304858015, + "learning_rate": 9.97856139121142e-06, + "loss": 18.8872, + "step": 3207 + }, + { + "epoch": 0.058639662200449666, + "grad_norm": 6.952728968364468, + "learning_rate": 9.978534000125106e-06, + "loss": 17.8382, + "step": 3208 + }, + { + "epoch": 0.058657941396896195, + "grad_norm": 6.639926778610925, + "learning_rate": 9.978506591589463e-06, + "loss": 17.7191, + "step": 3209 + }, + { + "epoch": 0.058676220593342716, + "grad_norm": 7.467723950323885, + "learning_rate": 9.978479165604586e-06, + "loss": 18.1541, + "step": 3210 + }, + { + "epoch": 0.05869449978978924, + "grad_norm": 8.352101402192078, + "learning_rate": 9.978451722170572e-06, + "loss": 17.8843, + "step": 3211 + }, + { + "epoch": 0.05871277898623577, + "grad_norm": 6.614723848081901, + "learning_rate": 9.978424261287518e-06, + "loss": 17.4353, + "step": 3212 + }, + { + "epoch": 0.05873105818268229, + "grad_norm": 9.070620645938524, + "learning_rate": 9.978396782955518e-06, + "loss": 19.1536, + "step": 3213 + }, + { + "epoch": 0.05874933737912881, + "grad_norm": 8.718314512695132, + "learning_rate": 9.978369287174668e-06, + "loss": 18.5638, + "step": 3214 + }, + { + "epoch": 0.05876761657557534, + "grad_norm": 6.716483752096194, + "learning_rate": 9.978341773945067e-06, + "loss": 17.8021, + "step": 3215 + }, + { + "epoch": 0.05878589577202186, + "grad_norm": 8.165609762942184, + "learning_rate": 9.97831424326681e-06, + "loss": 18.0288, + "step": 3216 + }, + { + "epoch": 0.05880417496846839, + "grad_norm": 6.437159544469477, + "learning_rate": 9.978286695139993e-06, + "loss": 17.4915, + "step": 3217 + }, + { + "epoch": 0.05882245416491491, + "grad_norm": 7.366466706580026, + "learning_rate": 9.978259129564713e-06, + "loss": 17.7899, + "step": 3218 + }, + { + "epoch": 0.05884073336136143, + "grad_norm": 9.018297973451752, + "learning_rate": 9.978231546541069e-06, + "loss": 18.2572, + "step": 3219 + }, + { + "epoch": 0.05885901255780796, + "grad_norm": 8.295663361723875, + "learning_rate": 9.978203946069154e-06, + "loss": 18.2259, + "step": 3220 + }, + { + "epoch": 0.058877291754254484, + "grad_norm": 6.665727972785896, + "learning_rate": 9.978176328149064e-06, + "loss": 17.5892, + "step": 3221 + }, + { + "epoch": 0.058895570950701005, + "grad_norm": 9.108174279374897, + "learning_rate": 9.9781486927809e-06, + "loss": 18.667, + "step": 3222 + }, + { + "epoch": 0.058913850147147534, + "grad_norm": 7.2191477273365425, + "learning_rate": 9.978121039964757e-06, + "loss": 17.9186, + "step": 3223 + }, + { + "epoch": 0.058932129343594056, + "grad_norm": 6.952581207518228, + "learning_rate": 9.978093369700733e-06, + "loss": 18.0369, + "step": 3224 + }, + { + "epoch": 0.05895040854004058, + "grad_norm": 9.454293435602219, + "learning_rate": 9.978065681988921e-06, + "loss": 19.1317, + "step": 3225 + }, + { + "epoch": 0.058968687736487106, + "grad_norm": 7.661129555256189, + "learning_rate": 9.978037976829423e-06, + "loss": 18.2276, + "step": 3226 + }, + { + "epoch": 0.05898696693293363, + "grad_norm": 7.190124056734599, + "learning_rate": 9.978010254222332e-06, + "loss": 17.7638, + "step": 3227 + }, + { + "epoch": 0.05900524612938015, + "grad_norm": 6.885324932029891, + "learning_rate": 9.977982514167748e-06, + "loss": 17.6156, + "step": 3228 + }, + { + "epoch": 0.05902352532582668, + "grad_norm": 8.836508839201812, + "learning_rate": 9.977954756665766e-06, + "loss": 18.5187, + "step": 3229 + }, + { + "epoch": 0.0590418045222732, + "grad_norm": 7.042024853413884, + "learning_rate": 9.977926981716486e-06, + "loss": 17.7895, + "step": 3230 + }, + { + "epoch": 0.05906008371871972, + "grad_norm": 7.222072324753219, + "learning_rate": 9.977899189320002e-06, + "loss": 18.0641, + "step": 3231 + }, + { + "epoch": 0.05907836291516625, + "grad_norm": 7.789156150808883, + "learning_rate": 9.977871379476416e-06, + "loss": 18.1854, + "step": 3232 + }, + { + "epoch": 0.05909664211161277, + "grad_norm": 7.019874445437046, + "learning_rate": 9.977843552185822e-06, + "loss": 17.5921, + "step": 3233 + }, + { + "epoch": 0.0591149213080593, + "grad_norm": 6.961227256593004, + "learning_rate": 9.977815707448317e-06, + "loss": 17.638, + "step": 3234 + }, + { + "epoch": 0.05913320050450582, + "grad_norm": 7.404831439084697, + "learning_rate": 9.977787845264001e-06, + "loss": 18.1359, + "step": 3235 + }, + { + "epoch": 0.059151479700952345, + "grad_norm": 7.6081190895585085, + "learning_rate": 9.97775996563297e-06, + "loss": 17.9784, + "step": 3236 + }, + { + "epoch": 0.05916975889739887, + "grad_norm": 8.189798289907229, + "learning_rate": 9.977732068555323e-06, + "loss": 18.1947, + "step": 3237 + }, + { + "epoch": 0.059188038093845395, + "grad_norm": 9.603313968659359, + "learning_rate": 9.977704154031156e-06, + "loss": 18.4913, + "step": 3238 + }, + { + "epoch": 0.05920631729029192, + "grad_norm": 7.24942481827696, + "learning_rate": 9.977676222060568e-06, + "loss": 17.6742, + "step": 3239 + }, + { + "epoch": 0.059224596486738446, + "grad_norm": 7.837441304765053, + "learning_rate": 9.977648272643658e-06, + "loss": 18.347, + "step": 3240 + }, + { + "epoch": 0.05924287568318497, + "grad_norm": 7.488596814459485, + "learning_rate": 9.977620305780522e-06, + "loss": 17.9552, + "step": 3241 + }, + { + "epoch": 0.05926115487963149, + "grad_norm": 25.05949073017427, + "learning_rate": 9.977592321471259e-06, + "loss": 18.189, + "step": 3242 + }, + { + "epoch": 0.05927943407607802, + "grad_norm": 8.342189405871983, + "learning_rate": 9.977564319715966e-06, + "loss": 17.5618, + "step": 3243 + }, + { + "epoch": 0.05929771327252454, + "grad_norm": 9.221504010474346, + "learning_rate": 9.977536300514742e-06, + "loss": 17.8729, + "step": 3244 + }, + { + "epoch": 0.05931599246897106, + "grad_norm": 10.218891821200936, + "learning_rate": 9.977508263867688e-06, + "loss": 18.9667, + "step": 3245 + }, + { + "epoch": 0.05933427166541759, + "grad_norm": 8.044011082674539, + "learning_rate": 9.977480209774897e-06, + "loss": 18.1998, + "step": 3246 + }, + { + "epoch": 0.05935255086186411, + "grad_norm": 8.786931638511275, + "learning_rate": 9.977452138236469e-06, + "loss": 17.7495, + "step": 3247 + }, + { + "epoch": 0.059370830058310634, + "grad_norm": 7.66967034121712, + "learning_rate": 9.977424049252504e-06, + "loss": 17.4421, + "step": 3248 + }, + { + "epoch": 0.05938910925475716, + "grad_norm": 7.727496643488349, + "learning_rate": 9.9773959428231e-06, + "loss": 18.2082, + "step": 3249 + }, + { + "epoch": 0.059407388451203684, + "grad_norm": 8.994041901925366, + "learning_rate": 9.977367818948355e-06, + "loss": 18.2771, + "step": 3250 + }, + { + "epoch": 0.05942566764765021, + "grad_norm": 8.026884494537635, + "learning_rate": 9.977339677628369e-06, + "loss": 17.595, + "step": 3251 + }, + { + "epoch": 0.059443946844096734, + "grad_norm": 7.570263993485957, + "learning_rate": 9.977311518863237e-06, + "loss": 17.6949, + "step": 3252 + }, + { + "epoch": 0.059462226040543256, + "grad_norm": 7.846573858742153, + "learning_rate": 9.97728334265306e-06, + "loss": 18.1315, + "step": 3253 + }, + { + "epoch": 0.059480505236989785, + "grad_norm": 9.346891470386613, + "learning_rate": 9.97725514899794e-06, + "loss": 18.8245, + "step": 3254 + }, + { + "epoch": 0.05949878443343631, + "grad_norm": 8.173897293518895, + "learning_rate": 9.97722693789797e-06, + "loss": 17.6173, + "step": 3255 + }, + { + "epoch": 0.05951706362988283, + "grad_norm": 6.700094234008094, + "learning_rate": 9.97719870935325e-06, + "loss": 17.3285, + "step": 3256 + }, + { + "epoch": 0.05953534282632936, + "grad_norm": 7.4321584915430785, + "learning_rate": 9.977170463363883e-06, + "loss": 17.7529, + "step": 3257 + }, + { + "epoch": 0.05955362202277588, + "grad_norm": 8.382330015612283, + "learning_rate": 9.977142199929965e-06, + "loss": 17.927, + "step": 3258 + }, + { + "epoch": 0.0595719012192224, + "grad_norm": 7.894075544574096, + "learning_rate": 9.977113919051595e-06, + "loss": 17.815, + "step": 3259 + }, + { + "epoch": 0.05959018041566893, + "grad_norm": 8.098918306058742, + "learning_rate": 9.977085620728875e-06, + "loss": 18.2385, + "step": 3260 + }, + { + "epoch": 0.05960845961211545, + "grad_norm": 8.068916376934771, + "learning_rate": 9.977057304961899e-06, + "loss": 17.9026, + "step": 3261 + }, + { + "epoch": 0.05962673880856197, + "grad_norm": 8.10641133511313, + "learning_rate": 9.977028971750769e-06, + "loss": 18.2146, + "step": 3262 + }, + { + "epoch": 0.0596450180050085, + "grad_norm": 6.512236878155436, + "learning_rate": 9.977000621095585e-06, + "loss": 17.4373, + "step": 3263 + }, + { + "epoch": 0.05966329720145502, + "grad_norm": 8.71608684189703, + "learning_rate": 9.976972252996447e-06, + "loss": 18.4464, + "step": 3264 + }, + { + "epoch": 0.059681576397901545, + "grad_norm": 7.281434667691815, + "learning_rate": 9.976943867453452e-06, + "loss": 17.6039, + "step": 3265 + }, + { + "epoch": 0.059699855594348074, + "grad_norm": 8.526349343559344, + "learning_rate": 9.9769154644667e-06, + "loss": 18.5155, + "step": 3266 + }, + { + "epoch": 0.059718134790794596, + "grad_norm": 7.266321725012322, + "learning_rate": 9.976887044036291e-06, + "loss": 17.7049, + "step": 3267 + }, + { + "epoch": 0.059736413987241124, + "grad_norm": 8.96036466200305, + "learning_rate": 9.976858606162326e-06, + "loss": 18.1321, + "step": 3268 + }, + { + "epoch": 0.059754693183687646, + "grad_norm": 9.878808320607032, + "learning_rate": 9.976830150844902e-06, + "loss": 18.5663, + "step": 3269 + }, + { + "epoch": 0.05977297238013417, + "grad_norm": 8.15912469902459, + "learning_rate": 9.976801678084123e-06, + "loss": 17.9054, + "step": 3270 + }, + { + "epoch": 0.059791251576580697, + "grad_norm": 6.4488928278431255, + "learning_rate": 9.976773187880083e-06, + "loss": 17.4122, + "step": 3271 + }, + { + "epoch": 0.05980953077302722, + "grad_norm": 8.785649582603085, + "learning_rate": 9.976744680232886e-06, + "loss": 18.1126, + "step": 3272 + }, + { + "epoch": 0.05982780996947374, + "grad_norm": 8.9666925267377, + "learning_rate": 9.976716155142632e-06, + "loss": 18.475, + "step": 3273 + }, + { + "epoch": 0.05984608916592027, + "grad_norm": 8.085738504039458, + "learning_rate": 9.976687612609417e-06, + "loss": 18.0753, + "step": 3274 + }, + { + "epoch": 0.05986436836236679, + "grad_norm": 8.834988133785304, + "learning_rate": 9.976659052633347e-06, + "loss": 18.6388, + "step": 3275 + }, + { + "epoch": 0.05988264755881331, + "grad_norm": 7.149216722572933, + "learning_rate": 9.976630475214515e-06, + "loss": 17.5655, + "step": 3276 + }, + { + "epoch": 0.05990092675525984, + "grad_norm": 7.248163919124378, + "learning_rate": 9.976601880353028e-06, + "loss": 17.8018, + "step": 3277 + }, + { + "epoch": 0.05991920595170636, + "grad_norm": 7.903396617238845, + "learning_rate": 9.976573268048983e-06, + "loss": 18.1317, + "step": 3278 + }, + { + "epoch": 0.059937485148152884, + "grad_norm": 7.2367672796718105, + "learning_rate": 9.97654463830248e-06, + "loss": 17.8175, + "step": 3279 + }, + { + "epoch": 0.05995576434459941, + "grad_norm": 7.61388358163043, + "learning_rate": 9.97651599111362e-06, + "loss": 17.9395, + "step": 3280 + }, + { + "epoch": 0.059974043541045935, + "grad_norm": 6.776332874611314, + "learning_rate": 9.976487326482503e-06, + "loss": 17.7383, + "step": 3281 + }, + { + "epoch": 0.05999232273749246, + "grad_norm": 8.476125130232775, + "learning_rate": 9.976458644409231e-06, + "loss": 18.3575, + "step": 3282 + }, + { + "epoch": 0.060010601933938985, + "grad_norm": 7.487495575125886, + "learning_rate": 9.976429944893902e-06, + "loss": 17.8411, + "step": 3283 + }, + { + "epoch": 0.06002888113038551, + "grad_norm": 7.629057785867848, + "learning_rate": 9.976401227936616e-06, + "loss": 17.9797, + "step": 3284 + }, + { + "epoch": 0.060047160326832036, + "grad_norm": 7.9242789683691015, + "learning_rate": 9.976372493537479e-06, + "loss": 17.9919, + "step": 3285 + }, + { + "epoch": 0.06006543952327856, + "grad_norm": 7.6290860103886695, + "learning_rate": 9.976343741696586e-06, + "loss": 17.9728, + "step": 3286 + }, + { + "epoch": 0.06008371871972508, + "grad_norm": 7.552164863018478, + "learning_rate": 9.97631497241404e-06, + "loss": 17.7703, + "step": 3287 + }, + { + "epoch": 0.06010199791617161, + "grad_norm": 7.17350157489045, + "learning_rate": 9.976286185689944e-06, + "loss": 17.8132, + "step": 3288 + }, + { + "epoch": 0.06012027711261813, + "grad_norm": 7.168377024849365, + "learning_rate": 9.976257381524396e-06, + "loss": 17.771, + "step": 3289 + }, + { + "epoch": 0.06013855630906465, + "grad_norm": 9.442343423631371, + "learning_rate": 9.976228559917497e-06, + "loss": 18.45, + "step": 3290 + }, + { + "epoch": 0.06015683550551118, + "grad_norm": 7.685395625931217, + "learning_rate": 9.976199720869348e-06, + "loss": 18.0959, + "step": 3291 + }, + { + "epoch": 0.0601751147019577, + "grad_norm": 7.789790910507743, + "learning_rate": 9.976170864380052e-06, + "loss": 18.2507, + "step": 3292 + }, + { + "epoch": 0.060193393898404224, + "grad_norm": 6.91393548772296, + "learning_rate": 9.976141990449708e-06, + "loss": 17.4989, + "step": 3293 + }, + { + "epoch": 0.06021167309485075, + "grad_norm": 7.900983241934687, + "learning_rate": 9.97611309907842e-06, + "loss": 18.2346, + "step": 3294 + }, + { + "epoch": 0.060229952291297274, + "grad_norm": 10.003760275284977, + "learning_rate": 9.976084190266286e-06, + "loss": 19.1263, + "step": 3295 + }, + { + "epoch": 0.060248231487743796, + "grad_norm": 8.209602792150497, + "learning_rate": 9.976055264013408e-06, + "loss": 18.0399, + "step": 3296 + }, + { + "epoch": 0.060266510684190325, + "grad_norm": 8.706734941265879, + "learning_rate": 9.97602632031989e-06, + "loss": 18.4225, + "step": 3297 + }, + { + "epoch": 0.060284789880636847, + "grad_norm": 8.016640715379337, + "learning_rate": 9.97599735918583e-06, + "loss": 18.3653, + "step": 3298 + }, + { + "epoch": 0.06030306907708337, + "grad_norm": 6.334820181175187, + "learning_rate": 9.975968380611332e-06, + "loss": 17.2041, + "step": 3299 + }, + { + "epoch": 0.0603213482735299, + "grad_norm": 7.578792617036335, + "learning_rate": 9.975939384596496e-06, + "loss": 17.9529, + "step": 3300 + }, + { + "epoch": 0.06033962746997642, + "grad_norm": 8.093553982308935, + "learning_rate": 9.975910371141424e-06, + "loss": 18.4033, + "step": 3301 + }, + { + "epoch": 0.06035790666642295, + "grad_norm": 8.083567144807205, + "learning_rate": 9.975881340246218e-06, + "loss": 18.0603, + "step": 3302 + }, + { + "epoch": 0.06037618586286947, + "grad_norm": 9.014212016579211, + "learning_rate": 9.975852291910982e-06, + "loss": 17.8603, + "step": 3303 + }, + { + "epoch": 0.06039446505931599, + "grad_norm": 7.197672327541092, + "learning_rate": 9.975823226135813e-06, + "loss": 17.8271, + "step": 3304 + }, + { + "epoch": 0.06041274425576252, + "grad_norm": 6.384490700127685, + "learning_rate": 9.975794142920815e-06, + "loss": 17.4841, + "step": 3305 + }, + { + "epoch": 0.06043102345220904, + "grad_norm": 7.368130733758599, + "learning_rate": 9.975765042266091e-06, + "loss": 18.0889, + "step": 3306 + }, + { + "epoch": 0.06044930264865556, + "grad_norm": 6.495304240788988, + "learning_rate": 9.975735924171744e-06, + "loss": 17.2142, + "step": 3307 + }, + { + "epoch": 0.06046758184510209, + "grad_norm": 6.956934837553296, + "learning_rate": 9.97570678863787e-06, + "loss": 17.6698, + "step": 3308 + }, + { + "epoch": 0.060485861041548614, + "grad_norm": 8.587591323241254, + "learning_rate": 9.97567763566458e-06, + "loss": 18.2999, + "step": 3309 + }, + { + "epoch": 0.060504140237995135, + "grad_norm": 8.128712306915489, + "learning_rate": 9.97564846525197e-06, + "loss": 18.3157, + "step": 3310 + }, + { + "epoch": 0.060522419434441664, + "grad_norm": 6.778574663367813, + "learning_rate": 9.975619277400144e-06, + "loss": 17.5754, + "step": 3311 + }, + { + "epoch": 0.060540698630888186, + "grad_norm": 7.144848975595105, + "learning_rate": 9.975590072109205e-06, + "loss": 17.6057, + "step": 3312 + }, + { + "epoch": 0.06055897782733471, + "grad_norm": 6.863204769693211, + "learning_rate": 9.975560849379253e-06, + "loss": 17.6446, + "step": 3313 + }, + { + "epoch": 0.060577257023781236, + "grad_norm": 6.954857254229112, + "learning_rate": 9.975531609210393e-06, + "loss": 17.3515, + "step": 3314 + }, + { + "epoch": 0.06059553622022776, + "grad_norm": 7.387071064673921, + "learning_rate": 9.975502351602726e-06, + "loss": 17.9307, + "step": 3315 + }, + { + "epoch": 0.06061381541667428, + "grad_norm": 7.204790219760822, + "learning_rate": 9.975473076556355e-06, + "loss": 17.552, + "step": 3316 + }, + { + "epoch": 0.06063209461312081, + "grad_norm": 7.034817915669464, + "learning_rate": 9.975443784071383e-06, + "loss": 17.6357, + "step": 3317 + }, + { + "epoch": 0.06065037380956733, + "grad_norm": 7.178123674791742, + "learning_rate": 9.975414474147911e-06, + "loss": 17.7269, + "step": 3318 + }, + { + "epoch": 0.06066865300601386, + "grad_norm": 7.519903671620943, + "learning_rate": 9.975385146786044e-06, + "loss": 17.9158, + "step": 3319 + }, + { + "epoch": 0.06068693220246038, + "grad_norm": 7.304931624101692, + "learning_rate": 9.975355801985885e-06, + "loss": 17.7184, + "step": 3320 + }, + { + "epoch": 0.0607052113989069, + "grad_norm": 6.743292187568647, + "learning_rate": 9.975326439747534e-06, + "loss": 17.5652, + "step": 3321 + }, + { + "epoch": 0.06072349059535343, + "grad_norm": 9.112577517205871, + "learning_rate": 9.975297060071097e-06, + "loss": 18.6824, + "step": 3322 + }, + { + "epoch": 0.06074176979179995, + "grad_norm": 7.689219605823097, + "learning_rate": 9.975267662956674e-06, + "loss": 17.776, + "step": 3323 + }, + { + "epoch": 0.060760048988246475, + "grad_norm": 8.972564384176065, + "learning_rate": 9.97523824840437e-06, + "loss": 18.6782, + "step": 3324 + }, + { + "epoch": 0.060778328184693, + "grad_norm": 7.75198143338471, + "learning_rate": 9.975208816414288e-06, + "loss": 17.868, + "step": 3325 + }, + { + "epoch": 0.060796607381139525, + "grad_norm": 6.605808021525001, + "learning_rate": 9.97517936698653e-06, + "loss": 17.6094, + "step": 3326 + }, + { + "epoch": 0.06081488657758605, + "grad_norm": 6.495524699742615, + "learning_rate": 9.975149900121201e-06, + "loss": 17.3402, + "step": 3327 + }, + { + "epoch": 0.060833165774032576, + "grad_norm": 8.519181664837557, + "learning_rate": 9.975120415818403e-06, + "loss": 18.2129, + "step": 3328 + }, + { + "epoch": 0.0608514449704791, + "grad_norm": 6.830774350900405, + "learning_rate": 9.97509091407824e-06, + "loss": 17.6935, + "step": 3329 + }, + { + "epoch": 0.06086972416692562, + "grad_norm": 6.619043740946671, + "learning_rate": 9.975061394900814e-06, + "loss": 17.4408, + "step": 3330 + }, + { + "epoch": 0.06088800336337215, + "grad_norm": 7.538531552487765, + "learning_rate": 9.97503185828623e-06, + "loss": 18.0834, + "step": 3331 + }, + { + "epoch": 0.06090628255981867, + "grad_norm": 8.20921101532402, + "learning_rate": 9.975002304234593e-06, + "loss": 18.2413, + "step": 3332 + }, + { + "epoch": 0.06092456175626519, + "grad_norm": 9.01460801019503, + "learning_rate": 9.974972732746002e-06, + "loss": 18.0666, + "step": 3333 + }, + { + "epoch": 0.06094284095271172, + "grad_norm": 7.724620821664519, + "learning_rate": 9.974943143820564e-06, + "loss": 17.9919, + "step": 3334 + }, + { + "epoch": 0.06096112014915824, + "grad_norm": 7.1984340650835215, + "learning_rate": 9.974913537458384e-06, + "loss": 17.9003, + "step": 3335 + }, + { + "epoch": 0.06097939934560477, + "grad_norm": 6.1580223199414, + "learning_rate": 9.974883913659561e-06, + "loss": 17.3667, + "step": 3336 + }, + { + "epoch": 0.06099767854205129, + "grad_norm": 7.75991150855965, + "learning_rate": 9.974854272424203e-06, + "loss": 17.8572, + "step": 3337 + }, + { + "epoch": 0.061015957738497814, + "grad_norm": 7.0164285715245605, + "learning_rate": 9.974824613752412e-06, + "loss": 17.5536, + "step": 3338 + }, + { + "epoch": 0.06103423693494434, + "grad_norm": 7.859173707418357, + "learning_rate": 9.974794937644292e-06, + "loss": 17.8944, + "step": 3339 + }, + { + "epoch": 0.061052516131390865, + "grad_norm": 7.5441465924666735, + "learning_rate": 9.97476524409995e-06, + "loss": 17.7754, + "step": 3340 + }, + { + "epoch": 0.061070795327837386, + "grad_norm": 6.853068682320698, + "learning_rate": 9.974735533119485e-06, + "loss": 17.7593, + "step": 3341 + }, + { + "epoch": 0.061089074524283915, + "grad_norm": 7.482705010266999, + "learning_rate": 9.974705804703002e-06, + "loss": 17.8316, + "step": 3342 + }, + { + "epoch": 0.06110735372073044, + "grad_norm": 8.469190246912817, + "learning_rate": 9.97467605885061e-06, + "loss": 18.1225, + "step": 3343 + }, + { + "epoch": 0.06112563291717696, + "grad_norm": 6.541346402218264, + "learning_rate": 9.97464629556241e-06, + "loss": 17.1778, + "step": 3344 + }, + { + "epoch": 0.06114391211362349, + "grad_norm": 8.319928154406021, + "learning_rate": 9.974616514838504e-06, + "loss": 18.5495, + "step": 3345 + }, + { + "epoch": 0.06116219131007001, + "grad_norm": 8.379862459573424, + "learning_rate": 9.974586716679e-06, + "loss": 18.2181, + "step": 3346 + }, + { + "epoch": 0.06118047050651653, + "grad_norm": 8.57272529587977, + "learning_rate": 9.974556901084002e-06, + "loss": 18.0759, + "step": 3347 + }, + { + "epoch": 0.06119874970296306, + "grad_norm": 9.490933256616948, + "learning_rate": 9.974527068053613e-06, + "loss": 18.5071, + "step": 3348 + }, + { + "epoch": 0.06121702889940958, + "grad_norm": 8.791459883568644, + "learning_rate": 9.97449721758794e-06, + "loss": 18.239, + "step": 3349 + }, + { + "epoch": 0.0612353080958561, + "grad_norm": 7.428366883134674, + "learning_rate": 9.974467349687082e-06, + "loss": 17.9357, + "step": 3350 + }, + { + "epoch": 0.06125358729230263, + "grad_norm": 9.111407809345186, + "learning_rate": 9.974437464351151e-06, + "loss": 18.6755, + "step": 3351 + }, + { + "epoch": 0.061271866488749153, + "grad_norm": 8.635498076159992, + "learning_rate": 9.974407561580248e-06, + "loss": 18.2551, + "step": 3352 + }, + { + "epoch": 0.06129014568519568, + "grad_norm": 6.650086668964261, + "learning_rate": 9.974377641374477e-06, + "loss": 17.3573, + "step": 3353 + }, + { + "epoch": 0.061308424881642204, + "grad_norm": 7.135388716063664, + "learning_rate": 9.974347703733945e-06, + "loss": 17.7386, + "step": 3354 + }, + { + "epoch": 0.061326704078088726, + "grad_norm": 7.224218446437377, + "learning_rate": 9.974317748658754e-06, + "loss": 17.7415, + "step": 3355 + }, + { + "epoch": 0.061344983274535254, + "grad_norm": 6.7564680913700315, + "learning_rate": 9.974287776149013e-06, + "loss": 17.8325, + "step": 3356 + }, + { + "epoch": 0.061363262470981776, + "grad_norm": 6.540021535054071, + "learning_rate": 9.974257786204826e-06, + "loss": 17.4231, + "step": 3357 + }, + { + "epoch": 0.0613815416674283, + "grad_norm": 6.815830767117462, + "learning_rate": 9.974227778826296e-06, + "loss": 17.4986, + "step": 3358 + }, + { + "epoch": 0.06139982086387483, + "grad_norm": 7.129445299972657, + "learning_rate": 9.974197754013527e-06, + "loss": 17.8041, + "step": 3359 + }, + { + "epoch": 0.06141810006032135, + "grad_norm": 7.153791344324305, + "learning_rate": 9.974167711766629e-06, + "loss": 17.6922, + "step": 3360 + }, + { + "epoch": 0.06143637925676787, + "grad_norm": 8.150271325666516, + "learning_rate": 9.974137652085705e-06, + "loss": 18.0894, + "step": 3361 + }, + { + "epoch": 0.0614546584532144, + "grad_norm": 6.76458176038737, + "learning_rate": 9.974107574970858e-06, + "loss": 17.9596, + "step": 3362 + }, + { + "epoch": 0.06147293764966092, + "grad_norm": 6.562435612957236, + "learning_rate": 9.974077480422197e-06, + "loss": 17.5366, + "step": 3363 + }, + { + "epoch": 0.06149121684610744, + "grad_norm": 8.310894250512407, + "learning_rate": 9.974047368439827e-06, + "loss": 17.8685, + "step": 3364 + }, + { + "epoch": 0.06150949604255397, + "grad_norm": 6.424976980092459, + "learning_rate": 9.974017239023851e-06, + "loss": 17.3777, + "step": 3365 + }, + { + "epoch": 0.06152777523900049, + "grad_norm": 9.159875494056582, + "learning_rate": 9.973987092174377e-06, + "loss": 18.3677, + "step": 3366 + }, + { + "epoch": 0.061546054435447015, + "grad_norm": 9.207276557072449, + "learning_rate": 9.97395692789151e-06, + "loss": 19.1496, + "step": 3367 + }, + { + "epoch": 0.06156433363189354, + "grad_norm": 8.280780761104374, + "learning_rate": 9.973926746175354e-06, + "loss": 18.1527, + "step": 3368 + }, + { + "epoch": 0.061582612828340065, + "grad_norm": 9.105685183822006, + "learning_rate": 9.973896547026019e-06, + "loss": 18.2787, + "step": 3369 + }, + { + "epoch": 0.061600892024786594, + "grad_norm": 7.568047989769413, + "learning_rate": 9.973866330443606e-06, + "loss": 17.6217, + "step": 3370 + }, + { + "epoch": 0.061619171221233116, + "grad_norm": 8.525715853579365, + "learning_rate": 9.973836096428224e-06, + "loss": 17.9766, + "step": 3371 + }, + { + "epoch": 0.06163745041767964, + "grad_norm": 8.70039255709809, + "learning_rate": 9.973805844979978e-06, + "loss": 18.2261, + "step": 3372 + }, + { + "epoch": 0.061655729614126166, + "grad_norm": 7.227871794954165, + "learning_rate": 9.973775576098974e-06, + "loss": 17.7765, + "step": 3373 + }, + { + "epoch": 0.06167400881057269, + "grad_norm": 6.400738094315887, + "learning_rate": 9.973745289785318e-06, + "loss": 17.3244, + "step": 3374 + }, + { + "epoch": 0.06169228800701921, + "grad_norm": 5.9325840074765885, + "learning_rate": 9.973714986039117e-06, + "loss": 17.1561, + "step": 3375 + }, + { + "epoch": 0.06171056720346574, + "grad_norm": 7.481540854368121, + "learning_rate": 9.973684664860477e-06, + "loss": 17.8805, + "step": 3376 + }, + { + "epoch": 0.06172884639991226, + "grad_norm": 8.170644267216476, + "learning_rate": 9.973654326249502e-06, + "loss": 18.4836, + "step": 3377 + }, + { + "epoch": 0.06174712559635878, + "grad_norm": 6.232161029270701, + "learning_rate": 9.973623970206302e-06, + "loss": 17.2242, + "step": 3378 + }, + { + "epoch": 0.06176540479280531, + "grad_norm": 8.752431819138764, + "learning_rate": 9.97359359673098e-06, + "loss": 18.2505, + "step": 3379 + }, + { + "epoch": 0.06178368398925183, + "grad_norm": 8.109599262971056, + "learning_rate": 9.973563205823645e-06, + "loss": 17.9687, + "step": 3380 + }, + { + "epoch": 0.061801963185698354, + "grad_norm": 8.134697727474459, + "learning_rate": 9.973532797484403e-06, + "loss": 18.2258, + "step": 3381 + }, + { + "epoch": 0.06182024238214488, + "grad_norm": 7.605585006406203, + "learning_rate": 9.973502371713359e-06, + "loss": 18.0899, + "step": 3382 + }, + { + "epoch": 0.061838521578591404, + "grad_norm": 7.353451548277089, + "learning_rate": 9.973471928510621e-06, + "loss": 17.7314, + "step": 3383 + }, + { + "epoch": 0.061856800775037926, + "grad_norm": 7.641356288460134, + "learning_rate": 9.973441467876298e-06, + "loss": 17.9315, + "step": 3384 + }, + { + "epoch": 0.061875079971484455, + "grad_norm": 7.034745775133391, + "learning_rate": 9.97341098981049e-06, + "loss": 17.6319, + "step": 3385 + }, + { + "epoch": 0.06189335916793098, + "grad_norm": 8.052019579235315, + "learning_rate": 9.973380494313312e-06, + "loss": 18.3079, + "step": 3386 + }, + { + "epoch": 0.061911638364377505, + "grad_norm": 7.919250964595605, + "learning_rate": 9.973349981384864e-06, + "loss": 18.1278, + "step": 3387 + }, + { + "epoch": 0.06192991756082403, + "grad_norm": 7.239696372922274, + "learning_rate": 9.973319451025256e-06, + "loss": 17.9743, + "step": 3388 + }, + { + "epoch": 0.06194819675727055, + "grad_norm": 6.624433741140478, + "learning_rate": 9.973288903234597e-06, + "loss": 17.6247, + "step": 3389 + }, + { + "epoch": 0.06196647595371708, + "grad_norm": 7.931092143699261, + "learning_rate": 9.97325833801299e-06, + "loss": 18.0357, + "step": 3390 + }, + { + "epoch": 0.0619847551501636, + "grad_norm": 7.642311280538421, + "learning_rate": 9.973227755360547e-06, + "loss": 17.9035, + "step": 3391 + }, + { + "epoch": 0.06200303434661012, + "grad_norm": 7.292890780594306, + "learning_rate": 9.973197155277368e-06, + "loss": 17.7307, + "step": 3392 + }, + { + "epoch": 0.06202131354305665, + "grad_norm": 7.668085599362952, + "learning_rate": 9.973166537763568e-06, + "loss": 17.962, + "step": 3393 + }, + { + "epoch": 0.06203959273950317, + "grad_norm": 7.360767722070579, + "learning_rate": 9.973135902819249e-06, + "loss": 17.6829, + "step": 3394 + }, + { + "epoch": 0.06205787193594969, + "grad_norm": 8.017153441352935, + "learning_rate": 9.973105250444522e-06, + "loss": 18.1401, + "step": 3395 + }, + { + "epoch": 0.06207615113239622, + "grad_norm": 7.94897649970752, + "learning_rate": 9.97307458063949e-06, + "loss": 17.9615, + "step": 3396 + }, + { + "epoch": 0.062094430328842744, + "grad_norm": 6.8966507827622445, + "learning_rate": 9.973043893404264e-06, + "loss": 17.6592, + "step": 3397 + }, + { + "epoch": 0.062112709525289266, + "grad_norm": 7.875780027846666, + "learning_rate": 9.97301318873895e-06, + "loss": 18.0234, + "step": 3398 + }, + { + "epoch": 0.062130988721735794, + "grad_norm": 8.625055502829102, + "learning_rate": 9.97298246664366e-06, + "loss": 18.2048, + "step": 3399 + }, + { + "epoch": 0.062149267918182316, + "grad_norm": 9.224413111984235, + "learning_rate": 9.972951727118493e-06, + "loss": 18.4906, + "step": 3400 + }, + { + "epoch": 0.06216754711462884, + "grad_norm": 8.224259994090788, + "learning_rate": 9.972920970163566e-06, + "loss": 17.6905, + "step": 3401 + }, + { + "epoch": 0.062185826311075366, + "grad_norm": 8.59232117911046, + "learning_rate": 9.972890195778982e-06, + "loss": 18.0538, + "step": 3402 + }, + { + "epoch": 0.06220410550752189, + "grad_norm": 6.533585167274885, + "learning_rate": 9.972859403964848e-06, + "loss": 17.3578, + "step": 3403 + }, + { + "epoch": 0.06222238470396842, + "grad_norm": 8.334520291081128, + "learning_rate": 9.972828594721272e-06, + "loss": 18.3773, + "step": 3404 + }, + { + "epoch": 0.06224066390041494, + "grad_norm": 8.596086287415487, + "learning_rate": 9.972797768048366e-06, + "loss": 18.4815, + "step": 3405 + }, + { + "epoch": 0.06225894309686146, + "grad_norm": 6.803339182577718, + "learning_rate": 9.972766923946233e-06, + "loss": 17.5434, + "step": 3406 + }, + { + "epoch": 0.06227722229330799, + "grad_norm": 8.767755354595359, + "learning_rate": 9.972736062414985e-06, + "loss": 18.2653, + "step": 3407 + }, + { + "epoch": 0.06229550148975451, + "grad_norm": 8.634460171921882, + "learning_rate": 9.972705183454728e-06, + "loss": 17.8145, + "step": 3408 + }, + { + "epoch": 0.06231378068620103, + "grad_norm": 8.138618267251033, + "learning_rate": 9.972674287065572e-06, + "loss": 17.9175, + "step": 3409 + }, + { + "epoch": 0.06233205988264756, + "grad_norm": 8.218118739045496, + "learning_rate": 9.972643373247622e-06, + "loss": 18.3895, + "step": 3410 + }, + { + "epoch": 0.06235033907909408, + "grad_norm": 7.523550041610053, + "learning_rate": 9.97261244200099e-06, + "loss": 17.7614, + "step": 3411 + }, + { + "epoch": 0.062368618275540605, + "grad_norm": 6.8500248444360405, + "learning_rate": 9.972581493325781e-06, + "loss": 17.5702, + "step": 3412 + }, + { + "epoch": 0.062386897471987134, + "grad_norm": 7.924404364025476, + "learning_rate": 9.972550527222107e-06, + "loss": 18.0084, + "step": 3413 + }, + { + "epoch": 0.062405176668433655, + "grad_norm": 7.130359191753166, + "learning_rate": 9.972519543690076e-06, + "loss": 17.6316, + "step": 3414 + }, + { + "epoch": 0.06242345586488018, + "grad_norm": 6.881607236454959, + "learning_rate": 9.972488542729795e-06, + "loss": 17.5451, + "step": 3415 + }, + { + "epoch": 0.062441735061326706, + "grad_norm": 6.049960297311258, + "learning_rate": 9.972457524341372e-06, + "loss": 17.432, + "step": 3416 + }, + { + "epoch": 0.06246001425777323, + "grad_norm": 5.9836785326342214, + "learning_rate": 9.972426488524916e-06, + "loss": 17.3387, + "step": 3417 + }, + { + "epoch": 0.06247829345421975, + "grad_norm": 7.300580050033733, + "learning_rate": 9.972395435280539e-06, + "loss": 17.7574, + "step": 3418 + }, + { + "epoch": 0.06249657265066628, + "grad_norm": 7.969434870318104, + "learning_rate": 9.972364364608347e-06, + "loss": 18.3107, + "step": 3419 + }, + { + "epoch": 0.0625148518471128, + "grad_norm": 9.14055506522522, + "learning_rate": 9.972333276508449e-06, + "loss": 18.5925, + "step": 3420 + }, + { + "epoch": 0.06253313104355933, + "grad_norm": 8.606727375443928, + "learning_rate": 9.972302170980953e-06, + "loss": 18.408, + "step": 3421 + }, + { + "epoch": 0.06255141024000585, + "grad_norm": 7.729437250438963, + "learning_rate": 9.97227104802597e-06, + "loss": 17.821, + "step": 3422 + }, + { + "epoch": 0.06256968943645237, + "grad_norm": 7.356468589302877, + "learning_rate": 9.97223990764361e-06, + "loss": 18.1116, + "step": 3423 + }, + { + "epoch": 0.0625879686328989, + "grad_norm": 6.923663093282652, + "learning_rate": 9.97220874983398e-06, + "loss": 17.5378, + "step": 3424 + }, + { + "epoch": 0.06260624782934542, + "grad_norm": 7.690729163444116, + "learning_rate": 9.972177574597188e-06, + "loss": 17.9849, + "step": 3425 + }, + { + "epoch": 0.06262452702579195, + "grad_norm": 7.548743588291021, + "learning_rate": 9.972146381933348e-06, + "loss": 17.9003, + "step": 3426 + }, + { + "epoch": 0.06264280622223847, + "grad_norm": 9.100394284286327, + "learning_rate": 9.972115171842565e-06, + "loss": 18.5092, + "step": 3427 + }, + { + "epoch": 0.062661085418685, + "grad_norm": 8.163577626288586, + "learning_rate": 9.972083944324948e-06, + "loss": 18.0888, + "step": 3428 + }, + { + "epoch": 0.06267936461513152, + "grad_norm": 6.535376515979392, + "learning_rate": 9.97205269938061e-06, + "loss": 17.5992, + "step": 3429 + }, + { + "epoch": 0.06269764381157804, + "grad_norm": 8.565121635484441, + "learning_rate": 9.972021437009659e-06, + "loss": 18.2891, + "step": 3430 + }, + { + "epoch": 0.06271592300802457, + "grad_norm": 7.981921959868312, + "learning_rate": 9.971990157212203e-06, + "loss": 17.9579, + "step": 3431 + }, + { + "epoch": 0.0627342022044711, + "grad_norm": 8.589852885032029, + "learning_rate": 9.971958859988356e-06, + "loss": 18.361, + "step": 3432 + }, + { + "epoch": 0.06275248140091762, + "grad_norm": 7.383544479894661, + "learning_rate": 9.971927545338222e-06, + "loss": 17.6967, + "step": 3433 + }, + { + "epoch": 0.06277076059736414, + "grad_norm": 8.73758303850346, + "learning_rate": 9.971896213261913e-06, + "loss": 18.295, + "step": 3434 + }, + { + "epoch": 0.06278903979381066, + "grad_norm": 7.249676291582927, + "learning_rate": 9.971864863759539e-06, + "loss": 17.9134, + "step": 3435 + }, + { + "epoch": 0.06280731899025718, + "grad_norm": 6.715105176458013, + "learning_rate": 9.971833496831212e-06, + "loss": 17.5182, + "step": 3436 + }, + { + "epoch": 0.06282559818670372, + "grad_norm": 7.243179566201054, + "learning_rate": 9.97180211247704e-06, + "loss": 17.8722, + "step": 3437 + }, + { + "epoch": 0.06284387738315024, + "grad_norm": 9.627863930721766, + "learning_rate": 9.971770710697132e-06, + "loss": 18.5971, + "step": 3438 + }, + { + "epoch": 0.06286215657959676, + "grad_norm": 7.2162061993454545, + "learning_rate": 9.9717392914916e-06, + "loss": 17.6061, + "step": 3439 + }, + { + "epoch": 0.06288043577604328, + "grad_norm": 7.9558665182817165, + "learning_rate": 9.971707854860552e-06, + "loss": 17.984, + "step": 3440 + }, + { + "epoch": 0.0628987149724898, + "grad_norm": 7.062158461069705, + "learning_rate": 9.9716764008041e-06, + "loss": 17.6622, + "step": 3441 + }, + { + "epoch": 0.06291699416893633, + "grad_norm": 8.35263761669116, + "learning_rate": 9.971644929322352e-06, + "loss": 18.0986, + "step": 3442 + }, + { + "epoch": 0.06293527336538286, + "grad_norm": 6.708141528712058, + "learning_rate": 9.971613440415423e-06, + "loss": 17.6747, + "step": 3443 + }, + { + "epoch": 0.06295355256182938, + "grad_norm": 8.084755472624233, + "learning_rate": 9.971581934083419e-06, + "loss": 18.0422, + "step": 3444 + }, + { + "epoch": 0.0629718317582759, + "grad_norm": 6.789099319257001, + "learning_rate": 9.971550410326452e-06, + "loss": 17.6722, + "step": 3445 + }, + { + "epoch": 0.06299011095472243, + "grad_norm": 8.053466806811533, + "learning_rate": 9.971518869144632e-06, + "loss": 18.0015, + "step": 3446 + }, + { + "epoch": 0.06300839015116895, + "grad_norm": 7.713966160652883, + "learning_rate": 9.971487310538068e-06, + "loss": 17.8359, + "step": 3447 + }, + { + "epoch": 0.06302666934761549, + "grad_norm": 6.868844053402038, + "learning_rate": 9.971455734506875e-06, + "loss": 17.6808, + "step": 3448 + }, + { + "epoch": 0.06304494854406201, + "grad_norm": 7.828485730439966, + "learning_rate": 9.97142414105116e-06, + "loss": 18.2127, + "step": 3449 + }, + { + "epoch": 0.06306322774050853, + "grad_norm": 7.415642665524057, + "learning_rate": 9.971392530171034e-06, + "loss": 17.7132, + "step": 3450 + }, + { + "epoch": 0.06308150693695505, + "grad_norm": 8.8612358790264, + "learning_rate": 9.971360901866609e-06, + "loss": 18.7788, + "step": 3451 + }, + { + "epoch": 0.06309978613340157, + "grad_norm": 7.967788386559646, + "learning_rate": 9.971329256137996e-06, + "loss": 17.5848, + "step": 3452 + }, + { + "epoch": 0.0631180653298481, + "grad_norm": 7.6248665283854535, + "learning_rate": 9.971297592985305e-06, + "loss": 17.7204, + "step": 3453 + }, + { + "epoch": 0.06313634452629463, + "grad_norm": 7.280807643531678, + "learning_rate": 9.971265912408647e-06, + "loss": 17.9504, + "step": 3454 + }, + { + "epoch": 0.06315462372274115, + "grad_norm": 9.04288844435571, + "learning_rate": 9.971234214408135e-06, + "loss": 18.5277, + "step": 3455 + }, + { + "epoch": 0.06317290291918767, + "grad_norm": 6.844101706341605, + "learning_rate": 9.971202498983878e-06, + "loss": 17.5276, + "step": 3456 + }, + { + "epoch": 0.0631911821156342, + "grad_norm": 6.796439532736444, + "learning_rate": 9.971170766135986e-06, + "loss": 17.5491, + "step": 3457 + }, + { + "epoch": 0.06320946131208072, + "grad_norm": 8.0182914134866, + "learning_rate": 9.971139015864573e-06, + "loss": 18.0436, + "step": 3458 + }, + { + "epoch": 0.06322774050852724, + "grad_norm": 7.014998252217009, + "learning_rate": 9.97110724816975e-06, + "loss": 17.7785, + "step": 3459 + }, + { + "epoch": 0.06324601970497377, + "grad_norm": 7.01435714493046, + "learning_rate": 9.971075463051625e-06, + "loss": 17.6574, + "step": 3460 + }, + { + "epoch": 0.0632642989014203, + "grad_norm": 7.469032959375107, + "learning_rate": 9.971043660510313e-06, + "loss": 17.9465, + "step": 3461 + }, + { + "epoch": 0.06328257809786682, + "grad_norm": 7.121677354722678, + "learning_rate": 9.971011840545925e-06, + "loss": 17.7377, + "step": 3462 + }, + { + "epoch": 0.06330085729431334, + "grad_norm": 6.821687248846872, + "learning_rate": 9.970980003158573e-06, + "loss": 17.4453, + "step": 3463 + }, + { + "epoch": 0.06331913649075986, + "grad_norm": 8.478505236474815, + "learning_rate": 9.970948148348365e-06, + "loss": 17.7499, + "step": 3464 + }, + { + "epoch": 0.0633374156872064, + "grad_norm": 7.661870204257442, + "learning_rate": 9.970916276115416e-06, + "loss": 17.9667, + "step": 3465 + }, + { + "epoch": 0.06335569488365292, + "grad_norm": 9.228132847856529, + "learning_rate": 9.970884386459835e-06, + "loss": 18.3447, + "step": 3466 + }, + { + "epoch": 0.06337397408009944, + "grad_norm": 7.207955661898897, + "learning_rate": 9.970852479381739e-06, + "loss": 17.6804, + "step": 3467 + }, + { + "epoch": 0.06339225327654596, + "grad_norm": 7.677179675585283, + "learning_rate": 9.970820554881235e-06, + "loss": 17.9184, + "step": 3468 + }, + { + "epoch": 0.06341053247299248, + "grad_norm": 7.036440996411475, + "learning_rate": 9.970788612958435e-06, + "loss": 17.6884, + "step": 3469 + }, + { + "epoch": 0.063428811669439, + "grad_norm": 6.1990578642071785, + "learning_rate": 9.970756653613454e-06, + "loss": 17.3292, + "step": 3470 + }, + { + "epoch": 0.06344709086588554, + "grad_norm": 6.978394586225296, + "learning_rate": 9.970724676846401e-06, + "loss": 17.8265, + "step": 3471 + }, + { + "epoch": 0.06346537006233206, + "grad_norm": 7.80215406458022, + "learning_rate": 9.97069268265739e-06, + "loss": 17.8772, + "step": 3472 + }, + { + "epoch": 0.06348364925877859, + "grad_norm": 6.220208366230502, + "learning_rate": 9.970660671046533e-06, + "loss": 17.3685, + "step": 3473 + }, + { + "epoch": 0.0635019284552251, + "grad_norm": 7.043690752149071, + "learning_rate": 9.97062864201394e-06, + "loss": 17.2635, + "step": 3474 + }, + { + "epoch": 0.06352020765167163, + "grad_norm": 7.7375271177917435, + "learning_rate": 9.970596595559727e-06, + "loss": 17.8664, + "step": 3475 + }, + { + "epoch": 0.06353848684811815, + "grad_norm": 6.803555219732546, + "learning_rate": 9.970564531684005e-06, + "loss": 17.731, + "step": 3476 + }, + { + "epoch": 0.06355676604456469, + "grad_norm": 9.905881943875691, + "learning_rate": 9.970532450386883e-06, + "loss": 18.3341, + "step": 3477 + }, + { + "epoch": 0.06357504524101121, + "grad_norm": 7.17266199032207, + "learning_rate": 9.970500351668476e-06, + "loss": 17.8205, + "step": 3478 + }, + { + "epoch": 0.06359332443745773, + "grad_norm": 8.859449122632668, + "learning_rate": 9.970468235528898e-06, + "loss": 18.7001, + "step": 3479 + }, + { + "epoch": 0.06361160363390425, + "grad_norm": 7.684843891034342, + "learning_rate": 9.97043610196826e-06, + "loss": 17.6728, + "step": 3480 + }, + { + "epoch": 0.06362988283035077, + "grad_norm": 7.3613624030745965, + "learning_rate": 9.970403950986675e-06, + "loss": 17.8042, + "step": 3481 + }, + { + "epoch": 0.06364816202679731, + "grad_norm": 7.933813741236401, + "learning_rate": 9.970371782584254e-06, + "loss": 17.8566, + "step": 3482 + }, + { + "epoch": 0.06366644122324383, + "grad_norm": 7.20114050981594, + "learning_rate": 9.970339596761113e-06, + "loss": 17.7873, + "step": 3483 + }, + { + "epoch": 0.06368472041969035, + "grad_norm": 6.539420780296843, + "learning_rate": 9.970307393517363e-06, + "loss": 17.6053, + "step": 3484 + }, + { + "epoch": 0.06370299961613687, + "grad_norm": 7.198210037702214, + "learning_rate": 9.970275172853116e-06, + "loss": 17.8825, + "step": 3485 + }, + { + "epoch": 0.0637212788125834, + "grad_norm": 6.423065344478874, + "learning_rate": 9.970242934768486e-06, + "loss": 17.1835, + "step": 3486 + }, + { + "epoch": 0.06373955800902992, + "grad_norm": 9.592006472164796, + "learning_rate": 9.970210679263585e-06, + "loss": 18.8019, + "step": 3487 + }, + { + "epoch": 0.06375783720547645, + "grad_norm": 8.381054912742002, + "learning_rate": 9.970178406338528e-06, + "loss": 17.8104, + "step": 3488 + }, + { + "epoch": 0.06377611640192297, + "grad_norm": 6.2245779054760995, + "learning_rate": 9.970146115993426e-06, + "loss": 17.3555, + "step": 3489 + }, + { + "epoch": 0.0637943955983695, + "grad_norm": 8.6187944119128, + "learning_rate": 9.970113808228395e-06, + "loss": 18.4059, + "step": 3490 + }, + { + "epoch": 0.06381267479481602, + "grad_norm": 6.55115468548905, + "learning_rate": 9.970081483043545e-06, + "loss": 17.3778, + "step": 3491 + }, + { + "epoch": 0.06383095399126254, + "grad_norm": 7.6721607603954, + "learning_rate": 9.970049140438991e-06, + "loss": 18.1595, + "step": 3492 + }, + { + "epoch": 0.06384923318770906, + "grad_norm": 8.173171947955758, + "learning_rate": 9.970016780414844e-06, + "loss": 18.1178, + "step": 3493 + }, + { + "epoch": 0.0638675123841556, + "grad_norm": 8.001892357122205, + "learning_rate": 9.969984402971223e-06, + "loss": 17.9552, + "step": 3494 + }, + { + "epoch": 0.06388579158060212, + "grad_norm": 8.106211428069072, + "learning_rate": 9.969952008108236e-06, + "loss": 17.9411, + "step": 3495 + }, + { + "epoch": 0.06390407077704864, + "grad_norm": 6.928555843489604, + "learning_rate": 9.969919595825999e-06, + "loss": 17.5931, + "step": 3496 + }, + { + "epoch": 0.06392234997349516, + "grad_norm": 9.80360507054187, + "learning_rate": 9.969887166124625e-06, + "loss": 18.8677, + "step": 3497 + }, + { + "epoch": 0.06394062916994168, + "grad_norm": 6.728350926935754, + "learning_rate": 9.969854719004227e-06, + "loss": 17.6105, + "step": 3498 + }, + { + "epoch": 0.06395890836638822, + "grad_norm": 7.422988096770044, + "learning_rate": 9.96982225446492e-06, + "loss": 17.9216, + "step": 3499 + }, + { + "epoch": 0.06397718756283474, + "grad_norm": 8.024234797780116, + "learning_rate": 9.969789772506817e-06, + "loss": 18.2083, + "step": 3500 + }, + { + "epoch": 0.06399546675928126, + "grad_norm": 8.045249821909978, + "learning_rate": 9.969757273130032e-06, + "loss": 18.256, + "step": 3501 + }, + { + "epoch": 0.06401374595572779, + "grad_norm": 7.986518270825566, + "learning_rate": 9.96972475633468e-06, + "loss": 18.283, + "step": 3502 + }, + { + "epoch": 0.06403202515217431, + "grad_norm": 7.573442982924717, + "learning_rate": 9.969692222120875e-06, + "loss": 18.0723, + "step": 3503 + }, + { + "epoch": 0.06405030434862083, + "grad_norm": 7.784550718432774, + "learning_rate": 9.969659670488728e-06, + "loss": 17.801, + "step": 3504 + }, + { + "epoch": 0.06406858354506736, + "grad_norm": 8.831415996964335, + "learning_rate": 9.969627101438356e-06, + "loss": 17.9731, + "step": 3505 + }, + { + "epoch": 0.06408686274151389, + "grad_norm": 7.652962288303948, + "learning_rate": 9.969594514969871e-06, + "loss": 17.7879, + "step": 3506 + }, + { + "epoch": 0.06410514193796041, + "grad_norm": 7.566719122832205, + "learning_rate": 9.96956191108339e-06, + "loss": 17.9537, + "step": 3507 + }, + { + "epoch": 0.06412342113440693, + "grad_norm": 6.850498608584895, + "learning_rate": 9.969529289779024e-06, + "loss": 17.5989, + "step": 3508 + }, + { + "epoch": 0.06414170033085345, + "grad_norm": 7.642963557059988, + "learning_rate": 9.96949665105689e-06, + "loss": 18.1601, + "step": 3509 + }, + { + "epoch": 0.06415997952729997, + "grad_norm": 6.92207305517525, + "learning_rate": 9.9694639949171e-06, + "loss": 17.8639, + "step": 3510 + }, + { + "epoch": 0.06417825872374651, + "grad_norm": 6.9900197831105135, + "learning_rate": 9.969431321359773e-06, + "loss": 18.0146, + "step": 3511 + }, + { + "epoch": 0.06419653792019303, + "grad_norm": 7.962592126136193, + "learning_rate": 9.969398630385019e-06, + "loss": 18.0781, + "step": 3512 + }, + { + "epoch": 0.06421481711663955, + "grad_norm": 8.439722553655928, + "learning_rate": 9.969365921992955e-06, + "loss": 17.9793, + "step": 3513 + }, + { + "epoch": 0.06423309631308607, + "grad_norm": 7.4717803840423835, + "learning_rate": 9.969333196183693e-06, + "loss": 17.9807, + "step": 3514 + }, + { + "epoch": 0.0642513755095326, + "grad_norm": 7.821943658249452, + "learning_rate": 9.96930045295735e-06, + "loss": 18.2078, + "step": 3515 + }, + { + "epoch": 0.06426965470597913, + "grad_norm": 7.15850318514372, + "learning_rate": 9.969267692314039e-06, + "loss": 17.624, + "step": 3516 + }, + { + "epoch": 0.06428793390242565, + "grad_norm": 7.166171418224422, + "learning_rate": 9.969234914253877e-06, + "loss": 17.6763, + "step": 3517 + }, + { + "epoch": 0.06430621309887218, + "grad_norm": 7.645142143798253, + "learning_rate": 9.969202118776979e-06, + "loss": 18.1204, + "step": 3518 + }, + { + "epoch": 0.0643244922953187, + "grad_norm": 7.800160231317408, + "learning_rate": 9.969169305883458e-06, + "loss": 17.9998, + "step": 3519 + }, + { + "epoch": 0.06434277149176522, + "grad_norm": 6.9924045145022955, + "learning_rate": 9.969136475573429e-06, + "loss": 17.7088, + "step": 3520 + }, + { + "epoch": 0.06436105068821174, + "grad_norm": 7.876068104956508, + "learning_rate": 9.969103627847008e-06, + "loss": 17.8554, + "step": 3521 + }, + { + "epoch": 0.06437932988465828, + "grad_norm": 7.203601413722107, + "learning_rate": 9.969070762704311e-06, + "loss": 17.8915, + "step": 3522 + }, + { + "epoch": 0.0643976090811048, + "grad_norm": 7.71843516495048, + "learning_rate": 9.96903788014545e-06, + "loss": 18.3282, + "step": 3523 + }, + { + "epoch": 0.06441588827755132, + "grad_norm": 7.691478339940075, + "learning_rate": 9.969004980170546e-06, + "loss": 18.2412, + "step": 3524 + }, + { + "epoch": 0.06443416747399784, + "grad_norm": 6.741784960109834, + "learning_rate": 9.968972062779708e-06, + "loss": 17.612, + "step": 3525 + }, + { + "epoch": 0.06445244667044436, + "grad_norm": 9.279574291796338, + "learning_rate": 9.968939127973055e-06, + "loss": 17.8691, + "step": 3526 + }, + { + "epoch": 0.06447072586689089, + "grad_norm": 8.218887362358476, + "learning_rate": 9.9689061757507e-06, + "loss": 18.3941, + "step": 3527 + }, + { + "epoch": 0.06448900506333742, + "grad_norm": 9.030415553386225, + "learning_rate": 9.968873206112764e-06, + "loss": 18.4235, + "step": 3528 + }, + { + "epoch": 0.06450728425978394, + "grad_norm": 8.968290073544493, + "learning_rate": 9.968840219059355e-06, + "loss": 18.5045, + "step": 3529 + }, + { + "epoch": 0.06452556345623046, + "grad_norm": 6.169962621475131, + "learning_rate": 9.968807214590592e-06, + "loss": 17.5488, + "step": 3530 + }, + { + "epoch": 0.06454384265267699, + "grad_norm": 8.821595914720909, + "learning_rate": 9.968774192706593e-06, + "loss": 18.3084, + "step": 3531 + }, + { + "epoch": 0.06456212184912351, + "grad_norm": 6.222977406102921, + "learning_rate": 9.96874115340747e-06, + "loss": 17.3397, + "step": 3532 + }, + { + "epoch": 0.06458040104557004, + "grad_norm": 7.376944655592562, + "learning_rate": 9.968708096693343e-06, + "loss": 17.4941, + "step": 3533 + }, + { + "epoch": 0.06459868024201657, + "grad_norm": 7.297119054188936, + "learning_rate": 9.968675022564322e-06, + "loss": 17.7489, + "step": 3534 + }, + { + "epoch": 0.06461695943846309, + "grad_norm": 6.189290074236147, + "learning_rate": 9.968641931020528e-06, + "loss": 17.1373, + "step": 3535 + }, + { + "epoch": 0.06463523863490961, + "grad_norm": 7.243176252691162, + "learning_rate": 9.968608822062075e-06, + "loss": 17.8784, + "step": 3536 + }, + { + "epoch": 0.06465351783135613, + "grad_norm": 7.9600401586531495, + "learning_rate": 9.968575695689078e-06, + "loss": 18.1522, + "step": 3537 + }, + { + "epoch": 0.06467179702780265, + "grad_norm": 8.094219192503523, + "learning_rate": 9.968542551901657e-06, + "loss": 18.0917, + "step": 3538 + }, + { + "epoch": 0.06469007622424919, + "grad_norm": 6.900213326533915, + "learning_rate": 9.968509390699923e-06, + "loss": 17.6893, + "step": 3539 + }, + { + "epoch": 0.06470835542069571, + "grad_norm": 7.771757318070501, + "learning_rate": 9.968476212083994e-06, + "loss": 17.6794, + "step": 3540 + }, + { + "epoch": 0.06472663461714223, + "grad_norm": 7.796128646024294, + "learning_rate": 9.96844301605399e-06, + "loss": 18.0542, + "step": 3541 + }, + { + "epoch": 0.06474491381358875, + "grad_norm": 6.333926310879374, + "learning_rate": 9.968409802610024e-06, + "loss": 17.492, + "step": 3542 + }, + { + "epoch": 0.06476319301003527, + "grad_norm": 7.893493169383418, + "learning_rate": 9.96837657175221e-06, + "loss": 17.8904, + "step": 3543 + }, + { + "epoch": 0.0647814722064818, + "grad_norm": 7.682323622852044, + "learning_rate": 9.96834332348067e-06, + "loss": 18.0747, + "step": 3544 + }, + { + "epoch": 0.06479975140292833, + "grad_norm": 7.6343847131599825, + "learning_rate": 9.968310057795516e-06, + "loss": 17.9874, + "step": 3545 + }, + { + "epoch": 0.06481803059937485, + "grad_norm": 7.70931668635223, + "learning_rate": 9.968276774696867e-06, + "loss": 18.1432, + "step": 3546 + }, + { + "epoch": 0.06483630979582138, + "grad_norm": 7.042406319373183, + "learning_rate": 9.96824347418484e-06, + "loss": 17.7371, + "step": 3547 + }, + { + "epoch": 0.0648545889922679, + "grad_norm": 7.032820555371767, + "learning_rate": 9.96821015625955e-06, + "loss": 17.6376, + "step": 3548 + }, + { + "epoch": 0.06487286818871442, + "grad_norm": 8.465692286677994, + "learning_rate": 9.968176820921113e-06, + "loss": 18.5406, + "step": 3549 + }, + { + "epoch": 0.06489114738516095, + "grad_norm": 8.325907663854666, + "learning_rate": 9.968143468169651e-06, + "loss": 17.8953, + "step": 3550 + }, + { + "epoch": 0.06490942658160748, + "grad_norm": 6.391093030845671, + "learning_rate": 9.968110098005274e-06, + "loss": 17.4928, + "step": 3551 + }, + { + "epoch": 0.064927705778054, + "grad_norm": 8.014652913343959, + "learning_rate": 9.968076710428103e-06, + "loss": 18.3309, + "step": 3552 + }, + { + "epoch": 0.06494598497450052, + "grad_norm": 7.211976385444955, + "learning_rate": 9.968043305438256e-06, + "loss": 17.7902, + "step": 3553 + }, + { + "epoch": 0.06496426417094704, + "grad_norm": 8.320694553004552, + "learning_rate": 9.968009883035847e-06, + "loss": 18.1843, + "step": 3554 + }, + { + "epoch": 0.06498254336739356, + "grad_norm": 7.695379480450924, + "learning_rate": 9.967976443220994e-06, + "loss": 18.1156, + "step": 3555 + }, + { + "epoch": 0.0650008225638401, + "grad_norm": 7.5360398867864555, + "learning_rate": 9.967942985993815e-06, + "loss": 17.8755, + "step": 3556 + }, + { + "epoch": 0.06501910176028662, + "grad_norm": 7.394783515888027, + "learning_rate": 9.967909511354427e-06, + "loss": 17.8335, + "step": 3557 + }, + { + "epoch": 0.06503738095673314, + "grad_norm": 6.878805886311911, + "learning_rate": 9.967876019302947e-06, + "loss": 17.758, + "step": 3558 + }, + { + "epoch": 0.06505566015317966, + "grad_norm": 7.069124085978708, + "learning_rate": 9.967842509839493e-06, + "loss": 17.4327, + "step": 3559 + }, + { + "epoch": 0.06507393934962619, + "grad_norm": 7.8435432108511085, + "learning_rate": 9.967808982964183e-06, + "loss": 17.4035, + "step": 3560 + }, + { + "epoch": 0.06509221854607271, + "grad_norm": 8.897590660997398, + "learning_rate": 9.967775438677131e-06, + "loss": 18.4507, + "step": 3561 + }, + { + "epoch": 0.06511049774251924, + "grad_norm": 7.665305953256472, + "learning_rate": 9.967741876978459e-06, + "loss": 17.8019, + "step": 3562 + }, + { + "epoch": 0.06512877693896577, + "grad_norm": 7.7893858413132655, + "learning_rate": 9.967708297868282e-06, + "loss": 17.9123, + "step": 3563 + }, + { + "epoch": 0.06514705613541229, + "grad_norm": 7.407263317066761, + "learning_rate": 9.96767470134672e-06, + "loss": 17.5609, + "step": 3564 + }, + { + "epoch": 0.06516533533185881, + "grad_norm": 8.131104674091887, + "learning_rate": 9.967641087413888e-06, + "loss": 18.0436, + "step": 3565 + }, + { + "epoch": 0.06518361452830533, + "grad_norm": 7.596401755151315, + "learning_rate": 9.967607456069905e-06, + "loss": 17.6687, + "step": 3566 + }, + { + "epoch": 0.06520189372475187, + "grad_norm": 7.74646869203929, + "learning_rate": 9.96757380731489e-06, + "loss": 17.9164, + "step": 3567 + }, + { + "epoch": 0.06522017292119839, + "grad_norm": 7.045659198787928, + "learning_rate": 9.967540141148959e-06, + "loss": 17.8673, + "step": 3568 + }, + { + "epoch": 0.06523845211764491, + "grad_norm": 8.146337097205732, + "learning_rate": 9.96750645757223e-06, + "loss": 17.9246, + "step": 3569 + }, + { + "epoch": 0.06525673131409143, + "grad_norm": 8.405984154371131, + "learning_rate": 9.967472756584823e-06, + "loss": 18.0451, + "step": 3570 + }, + { + "epoch": 0.06527501051053795, + "grad_norm": 6.775109016848172, + "learning_rate": 9.967439038186855e-06, + "loss": 17.445, + "step": 3571 + }, + { + "epoch": 0.06529328970698448, + "grad_norm": 7.400385393023171, + "learning_rate": 9.967405302378444e-06, + "loss": 17.738, + "step": 3572 + }, + { + "epoch": 0.06531156890343101, + "grad_norm": 7.833313423241914, + "learning_rate": 9.96737154915971e-06, + "loss": 18.1323, + "step": 3573 + }, + { + "epoch": 0.06532984809987753, + "grad_norm": 7.106764032621514, + "learning_rate": 9.967337778530769e-06, + "loss": 17.7625, + "step": 3574 + }, + { + "epoch": 0.06534812729632405, + "grad_norm": 10.22666235137378, + "learning_rate": 9.967303990491738e-06, + "loss": 18.1572, + "step": 3575 + }, + { + "epoch": 0.06536640649277058, + "grad_norm": 8.277992830655526, + "learning_rate": 9.96727018504274e-06, + "loss": 18.3004, + "step": 3576 + }, + { + "epoch": 0.0653846856892171, + "grad_norm": 7.567160386383156, + "learning_rate": 9.96723636218389e-06, + "loss": 17.5543, + "step": 3577 + }, + { + "epoch": 0.06540296488566362, + "grad_norm": 7.00449520450629, + "learning_rate": 9.967202521915307e-06, + "loss": 17.7054, + "step": 3578 + }, + { + "epoch": 0.06542124408211016, + "grad_norm": 7.73123805219894, + "learning_rate": 9.96716866423711e-06, + "loss": 17.8919, + "step": 3579 + }, + { + "epoch": 0.06543952327855668, + "grad_norm": 6.885900831776829, + "learning_rate": 9.967134789149419e-06, + "loss": 17.3299, + "step": 3580 + }, + { + "epoch": 0.0654578024750032, + "grad_norm": 8.112229552955466, + "learning_rate": 9.967100896652352e-06, + "loss": 17.5914, + "step": 3581 + }, + { + "epoch": 0.06547608167144972, + "grad_norm": 6.4199801399130765, + "learning_rate": 9.967066986746026e-06, + "loss": 17.2268, + "step": 3582 + }, + { + "epoch": 0.06549436086789624, + "grad_norm": 7.675054421496131, + "learning_rate": 9.967033059430562e-06, + "loss": 17.9806, + "step": 3583 + }, + { + "epoch": 0.06551264006434278, + "grad_norm": 7.478719143186732, + "learning_rate": 9.96699911470608e-06, + "loss": 17.7654, + "step": 3584 + }, + { + "epoch": 0.0655309192607893, + "grad_norm": 8.405844893480488, + "learning_rate": 9.966965152572694e-06, + "loss": 18.1927, + "step": 3585 + }, + { + "epoch": 0.06554919845723582, + "grad_norm": 7.736951161334362, + "learning_rate": 9.966931173030528e-06, + "loss": 18.1067, + "step": 3586 + }, + { + "epoch": 0.06556747765368234, + "grad_norm": 7.903216192741018, + "learning_rate": 9.9668971760797e-06, + "loss": 17.7961, + "step": 3587 + }, + { + "epoch": 0.06558575685012887, + "grad_norm": 8.324500951199788, + "learning_rate": 9.966863161720326e-06, + "loss": 18.0361, + "step": 3588 + }, + { + "epoch": 0.06560403604657539, + "grad_norm": 6.013914722533633, + "learning_rate": 9.966829129952528e-06, + "loss": 17.18, + "step": 3589 + }, + { + "epoch": 0.06562231524302192, + "grad_norm": 8.545617171751642, + "learning_rate": 9.966795080776425e-06, + "loss": 18.2204, + "step": 3590 + }, + { + "epoch": 0.06564059443946844, + "grad_norm": 8.526968088011222, + "learning_rate": 9.966761014192138e-06, + "loss": 18.2618, + "step": 3591 + }, + { + "epoch": 0.06565887363591497, + "grad_norm": 6.06931638243933, + "learning_rate": 9.966726930199784e-06, + "loss": 17.3711, + "step": 3592 + }, + { + "epoch": 0.06567715283236149, + "grad_norm": 9.795067314208612, + "learning_rate": 9.966692828799483e-06, + "loss": 18.8647, + "step": 3593 + }, + { + "epoch": 0.06569543202880801, + "grad_norm": 7.331816713163978, + "learning_rate": 9.966658709991352e-06, + "loss": 17.7088, + "step": 3594 + }, + { + "epoch": 0.06571371122525453, + "grad_norm": 7.639954185195284, + "learning_rate": 9.966624573775517e-06, + "loss": 18.0959, + "step": 3595 + }, + { + "epoch": 0.06573199042170107, + "grad_norm": 7.700168932923506, + "learning_rate": 9.96659042015209e-06, + "loss": 18.3082, + "step": 3596 + }, + { + "epoch": 0.06575026961814759, + "grad_norm": 9.13690457500145, + "learning_rate": 9.966556249121199e-06, + "loss": 18.2918, + "step": 3597 + }, + { + "epoch": 0.06576854881459411, + "grad_norm": 7.598414664686019, + "learning_rate": 9.966522060682957e-06, + "loss": 18.0308, + "step": 3598 + }, + { + "epoch": 0.06578682801104063, + "grad_norm": 7.102651558654934, + "learning_rate": 9.966487854837485e-06, + "loss": 17.8665, + "step": 3599 + }, + { + "epoch": 0.06580510720748715, + "grad_norm": 6.876939283695511, + "learning_rate": 9.966453631584906e-06, + "loss": 17.6633, + "step": 3600 + }, + { + "epoch": 0.06582338640393369, + "grad_norm": 8.127901195429326, + "learning_rate": 9.966419390925336e-06, + "loss": 18.1879, + "step": 3601 + }, + { + "epoch": 0.06584166560038021, + "grad_norm": 8.314206067549518, + "learning_rate": 9.9663851328589e-06, + "loss": 18.4616, + "step": 3602 + }, + { + "epoch": 0.06585994479682673, + "grad_norm": 7.930742515215014, + "learning_rate": 9.966350857385714e-06, + "loss": 18.1745, + "step": 3603 + }, + { + "epoch": 0.06587822399327325, + "grad_norm": 7.938715755344231, + "learning_rate": 9.966316564505897e-06, + "loss": 18.0051, + "step": 3604 + }, + { + "epoch": 0.06589650318971978, + "grad_norm": 7.412150240046248, + "learning_rate": 9.966282254219575e-06, + "loss": 17.49, + "step": 3605 + }, + { + "epoch": 0.0659147823861663, + "grad_norm": 8.901639329323402, + "learning_rate": 9.966247926526862e-06, + "loss": 18.5454, + "step": 3606 + }, + { + "epoch": 0.06593306158261283, + "grad_norm": 7.632601527695011, + "learning_rate": 9.96621358142788e-06, + "loss": 18.0513, + "step": 3607 + }, + { + "epoch": 0.06595134077905936, + "grad_norm": 7.813855987024707, + "learning_rate": 9.966179218922754e-06, + "loss": 17.5087, + "step": 3608 + }, + { + "epoch": 0.06596961997550588, + "grad_norm": 7.983668250431304, + "learning_rate": 9.966144839011597e-06, + "loss": 18.0178, + "step": 3609 + }, + { + "epoch": 0.0659878991719524, + "grad_norm": 7.489202970740367, + "learning_rate": 9.966110441694536e-06, + "loss": 17.7378, + "step": 3610 + }, + { + "epoch": 0.06600617836839892, + "grad_norm": 6.415050654753606, + "learning_rate": 9.966076026971688e-06, + "loss": 17.3449, + "step": 3611 + }, + { + "epoch": 0.06602445756484544, + "grad_norm": 7.864728428559402, + "learning_rate": 9.966041594843175e-06, + "loss": 18.1784, + "step": 3612 + }, + { + "epoch": 0.06604273676129198, + "grad_norm": 7.576611071022064, + "learning_rate": 9.966007145309115e-06, + "loss": 17.6168, + "step": 3613 + }, + { + "epoch": 0.0660610159577385, + "grad_norm": 7.670906182898218, + "learning_rate": 9.965972678369633e-06, + "loss": 18.0569, + "step": 3614 + }, + { + "epoch": 0.06607929515418502, + "grad_norm": 7.881212668285415, + "learning_rate": 9.965938194024846e-06, + "loss": 18.0989, + "step": 3615 + }, + { + "epoch": 0.06609757435063154, + "grad_norm": 7.617339270567452, + "learning_rate": 9.965903692274878e-06, + "loss": 18.0277, + "step": 3616 + }, + { + "epoch": 0.06611585354707807, + "grad_norm": 7.220682379213172, + "learning_rate": 9.965869173119849e-06, + "loss": 17.8478, + "step": 3617 + }, + { + "epoch": 0.0661341327435246, + "grad_norm": 7.9974297402963614, + "learning_rate": 9.96583463655988e-06, + "loss": 18.1157, + "step": 3618 + }, + { + "epoch": 0.06615241193997112, + "grad_norm": 8.25383357327032, + "learning_rate": 9.96580008259509e-06, + "loss": 18.1823, + "step": 3619 + }, + { + "epoch": 0.06617069113641764, + "grad_norm": 9.085583064693404, + "learning_rate": 9.9657655112256e-06, + "loss": 18.2514, + "step": 3620 + }, + { + "epoch": 0.06618897033286417, + "grad_norm": 8.827278744364282, + "learning_rate": 9.965730922451535e-06, + "loss": 18.6595, + "step": 3621 + }, + { + "epoch": 0.06620724952931069, + "grad_norm": 6.40800024285416, + "learning_rate": 9.965696316273013e-06, + "loss": 17.4722, + "step": 3622 + }, + { + "epoch": 0.06622552872575721, + "grad_norm": 7.322330542321388, + "learning_rate": 9.965661692690158e-06, + "loss": 17.9229, + "step": 3623 + }, + { + "epoch": 0.06624380792220375, + "grad_norm": 8.63134294616613, + "learning_rate": 9.965627051703088e-06, + "loss": 18.2308, + "step": 3624 + }, + { + "epoch": 0.06626208711865027, + "grad_norm": 8.221315315450456, + "learning_rate": 9.965592393311927e-06, + "loss": 18.0214, + "step": 3625 + }, + { + "epoch": 0.06628036631509679, + "grad_norm": 7.948911081925447, + "learning_rate": 9.965557717516794e-06, + "loss": 17.9538, + "step": 3626 + }, + { + "epoch": 0.06629864551154331, + "grad_norm": 7.98964185910891, + "learning_rate": 9.965523024317814e-06, + "loss": 18.4565, + "step": 3627 + }, + { + "epoch": 0.06631692470798983, + "grad_norm": 6.571445289451289, + "learning_rate": 9.965488313715107e-06, + "loss": 17.5406, + "step": 3628 + }, + { + "epoch": 0.06633520390443635, + "grad_norm": 7.668087049288176, + "learning_rate": 9.965453585708791e-06, + "loss": 17.8423, + "step": 3629 + }, + { + "epoch": 0.06635348310088289, + "grad_norm": 9.127646705308155, + "learning_rate": 9.965418840298995e-06, + "loss": 18.2477, + "step": 3630 + }, + { + "epoch": 0.06637176229732941, + "grad_norm": 7.320556338144031, + "learning_rate": 9.965384077485834e-06, + "loss": 17.6702, + "step": 3631 + }, + { + "epoch": 0.06639004149377593, + "grad_norm": 8.18728420801698, + "learning_rate": 9.965349297269435e-06, + "loss": 18.4822, + "step": 3632 + }, + { + "epoch": 0.06640832069022246, + "grad_norm": 8.238194702977536, + "learning_rate": 9.965314499649917e-06, + "loss": 17.9435, + "step": 3633 + }, + { + "epoch": 0.06642659988666898, + "grad_norm": 7.770663751371553, + "learning_rate": 9.9652796846274e-06, + "loss": 17.8512, + "step": 3634 + }, + { + "epoch": 0.06644487908311551, + "grad_norm": 6.931472580092162, + "learning_rate": 9.965244852202011e-06, + "loss": 17.6323, + "step": 3635 + }, + { + "epoch": 0.06646315827956203, + "grad_norm": 7.978645203749096, + "learning_rate": 9.965210002373869e-06, + "loss": 18.0182, + "step": 3636 + }, + { + "epoch": 0.06648143747600856, + "grad_norm": 7.060176683857077, + "learning_rate": 9.965175135143098e-06, + "loss": 17.7228, + "step": 3637 + }, + { + "epoch": 0.06649971667245508, + "grad_norm": 6.91300065249346, + "learning_rate": 9.965140250509818e-06, + "loss": 18.0372, + "step": 3638 + }, + { + "epoch": 0.0665179958689016, + "grad_norm": 7.198185264977327, + "learning_rate": 9.965105348474153e-06, + "loss": 17.6783, + "step": 3639 + }, + { + "epoch": 0.06653627506534812, + "grad_norm": 7.877918246822304, + "learning_rate": 9.965070429036223e-06, + "loss": 18.0394, + "step": 3640 + }, + { + "epoch": 0.06655455426179466, + "grad_norm": 7.018533476143148, + "learning_rate": 9.965035492196154e-06, + "loss": 17.5641, + "step": 3641 + }, + { + "epoch": 0.06657283345824118, + "grad_norm": 7.8852153569181525, + "learning_rate": 9.965000537954064e-06, + "loss": 17.7846, + "step": 3642 + }, + { + "epoch": 0.0665911126546877, + "grad_norm": 7.746320258295745, + "learning_rate": 9.96496556631008e-06, + "loss": 17.9326, + "step": 3643 + }, + { + "epoch": 0.06660939185113422, + "grad_norm": 7.22849019265795, + "learning_rate": 9.964930577264323e-06, + "loss": 17.7381, + "step": 3644 + }, + { + "epoch": 0.06662767104758074, + "grad_norm": 7.310177395722805, + "learning_rate": 9.964895570816912e-06, + "loss": 17.9833, + "step": 3645 + }, + { + "epoch": 0.06664595024402727, + "grad_norm": 6.936029700317219, + "learning_rate": 9.964860546967976e-06, + "loss": 17.7155, + "step": 3646 + }, + { + "epoch": 0.0666642294404738, + "grad_norm": 7.711533693081667, + "learning_rate": 9.964825505717633e-06, + "loss": 18.1309, + "step": 3647 + }, + { + "epoch": 0.06668250863692032, + "grad_norm": 7.779978389906678, + "learning_rate": 9.964790447066008e-06, + "loss": 17.9141, + "step": 3648 + }, + { + "epoch": 0.06670078783336685, + "grad_norm": 7.150644276614255, + "learning_rate": 9.964755371013225e-06, + "loss": 17.8392, + "step": 3649 + }, + { + "epoch": 0.06671906702981337, + "grad_norm": 8.12025679040642, + "learning_rate": 9.964720277559401e-06, + "loss": 18.2699, + "step": 3650 + }, + { + "epoch": 0.06673734622625989, + "grad_norm": 7.177619422772814, + "learning_rate": 9.964685166704666e-06, + "loss": 17.6499, + "step": 3651 + }, + { + "epoch": 0.06675562542270642, + "grad_norm": 6.934823447479643, + "learning_rate": 9.964650038449139e-06, + "loss": 17.798, + "step": 3652 + }, + { + "epoch": 0.06677390461915295, + "grad_norm": 6.407166485372936, + "learning_rate": 9.964614892792948e-06, + "loss": 17.2369, + "step": 3653 + }, + { + "epoch": 0.06679218381559947, + "grad_norm": 8.633064407304595, + "learning_rate": 9.96457972973621e-06, + "loss": 18.3923, + "step": 3654 + }, + { + "epoch": 0.06681046301204599, + "grad_norm": 7.686353050424286, + "learning_rate": 9.96454454927905e-06, + "loss": 17.9912, + "step": 3655 + }, + { + "epoch": 0.06682874220849251, + "grad_norm": 7.543979592879128, + "learning_rate": 9.964509351421593e-06, + "loss": 17.9041, + "step": 3656 + }, + { + "epoch": 0.06684702140493903, + "grad_norm": 7.682844748231194, + "learning_rate": 9.964474136163961e-06, + "loss": 18.1909, + "step": 3657 + }, + { + "epoch": 0.06686530060138557, + "grad_norm": 7.542047685677389, + "learning_rate": 9.96443890350628e-06, + "loss": 17.5756, + "step": 3658 + }, + { + "epoch": 0.06688357979783209, + "grad_norm": 7.144669916419882, + "learning_rate": 9.964403653448669e-06, + "loss": 17.7512, + "step": 3659 + }, + { + "epoch": 0.06690185899427861, + "grad_norm": 7.516784141610715, + "learning_rate": 9.964368385991254e-06, + "loss": 17.7277, + "step": 3660 + }, + { + "epoch": 0.06692013819072513, + "grad_norm": 6.809284532807876, + "learning_rate": 9.964333101134161e-06, + "loss": 17.6542, + "step": 3661 + }, + { + "epoch": 0.06693841738717166, + "grad_norm": 8.016323300616692, + "learning_rate": 9.964297798877509e-06, + "loss": 17.992, + "step": 3662 + }, + { + "epoch": 0.06695669658361818, + "grad_norm": 7.236700239267418, + "learning_rate": 9.964262479221424e-06, + "loss": 17.6687, + "step": 3663 + }, + { + "epoch": 0.06697497578006471, + "grad_norm": 8.445816640265996, + "learning_rate": 9.964227142166033e-06, + "loss": 18.187, + "step": 3664 + }, + { + "epoch": 0.06699325497651124, + "grad_norm": 6.898403266475036, + "learning_rate": 9.964191787711453e-06, + "loss": 17.3689, + "step": 3665 + }, + { + "epoch": 0.06701153417295776, + "grad_norm": 7.603489925944102, + "learning_rate": 9.964156415857813e-06, + "loss": 18.1952, + "step": 3666 + }, + { + "epoch": 0.06702981336940428, + "grad_norm": 7.130434251094245, + "learning_rate": 9.964121026605236e-06, + "loss": 17.8387, + "step": 3667 + }, + { + "epoch": 0.0670480925658508, + "grad_norm": 7.173812988187238, + "learning_rate": 9.964085619953845e-06, + "loss": 17.7426, + "step": 3668 + }, + { + "epoch": 0.06706637176229734, + "grad_norm": 7.663182796601119, + "learning_rate": 9.964050195903764e-06, + "loss": 17.9353, + "step": 3669 + }, + { + "epoch": 0.06708465095874386, + "grad_norm": 7.781673745246008, + "learning_rate": 9.964014754455117e-06, + "loss": 17.958, + "step": 3670 + }, + { + "epoch": 0.06710293015519038, + "grad_norm": 6.557163244961542, + "learning_rate": 9.963979295608031e-06, + "loss": 17.3812, + "step": 3671 + }, + { + "epoch": 0.0671212093516369, + "grad_norm": 8.194036113397624, + "learning_rate": 9.963943819362628e-06, + "loss": 18.3379, + "step": 3672 + }, + { + "epoch": 0.06713948854808342, + "grad_norm": 7.0195021885865065, + "learning_rate": 9.963908325719034e-06, + "loss": 17.6027, + "step": 3673 + }, + { + "epoch": 0.06715776774452994, + "grad_norm": 6.562676858630681, + "learning_rate": 9.963872814677369e-06, + "loss": 17.4976, + "step": 3674 + }, + { + "epoch": 0.06717604694097648, + "grad_norm": 7.478989990416041, + "learning_rate": 9.963837286237763e-06, + "loss": 17.799, + "step": 3675 + }, + { + "epoch": 0.067194326137423, + "grad_norm": 8.057254508520662, + "learning_rate": 9.963801740400336e-06, + "loss": 18.1576, + "step": 3676 + }, + { + "epoch": 0.06721260533386952, + "grad_norm": 7.333838327509158, + "learning_rate": 9.963766177165216e-06, + "loss": 17.8813, + "step": 3677 + }, + { + "epoch": 0.06723088453031605, + "grad_norm": 7.007514023860895, + "learning_rate": 9.963730596532526e-06, + "loss": 17.8473, + "step": 3678 + }, + { + "epoch": 0.06724916372676257, + "grad_norm": 8.248034560183765, + "learning_rate": 9.963694998502392e-06, + "loss": 17.9892, + "step": 3679 + }, + { + "epoch": 0.06726744292320909, + "grad_norm": 7.664262096770969, + "learning_rate": 9.963659383074937e-06, + "loss": 17.9312, + "step": 3680 + }, + { + "epoch": 0.06728572211965562, + "grad_norm": 7.487524861207348, + "learning_rate": 9.963623750250285e-06, + "loss": 17.9139, + "step": 3681 + }, + { + "epoch": 0.06730400131610215, + "grad_norm": 7.707866711653239, + "learning_rate": 9.963588100028565e-06, + "loss": 17.8018, + "step": 3682 + }, + { + "epoch": 0.06732228051254867, + "grad_norm": 7.78447837652025, + "learning_rate": 9.963552432409897e-06, + "loss": 17.9661, + "step": 3683 + }, + { + "epoch": 0.06734055970899519, + "grad_norm": 8.626494614214739, + "learning_rate": 9.963516747394411e-06, + "loss": 18.0817, + "step": 3684 + }, + { + "epoch": 0.06735883890544171, + "grad_norm": 7.607945441358762, + "learning_rate": 9.963481044982227e-06, + "loss": 18.0393, + "step": 3685 + }, + { + "epoch": 0.06737711810188825, + "grad_norm": 6.902426308636471, + "learning_rate": 9.963445325173473e-06, + "loss": 17.4743, + "step": 3686 + }, + { + "epoch": 0.06739539729833477, + "grad_norm": 8.311685125637775, + "learning_rate": 9.963409587968276e-06, + "loss": 17.8539, + "step": 3687 + }, + { + "epoch": 0.06741367649478129, + "grad_norm": 7.048286794631574, + "learning_rate": 9.963373833366758e-06, + "loss": 18.0255, + "step": 3688 + }, + { + "epoch": 0.06743195569122781, + "grad_norm": 7.1412325068410105, + "learning_rate": 9.963338061369045e-06, + "loss": 18.0242, + "step": 3689 + }, + { + "epoch": 0.06745023488767433, + "grad_norm": 7.348625419574826, + "learning_rate": 9.963302271975263e-06, + "loss": 17.469, + "step": 3690 + }, + { + "epoch": 0.06746851408412086, + "grad_norm": 16.995275294386243, + "learning_rate": 9.963266465185536e-06, + "loss": 18.8894, + "step": 3691 + }, + { + "epoch": 0.06748679328056739, + "grad_norm": 7.533170174551889, + "learning_rate": 9.963230640999993e-06, + "loss": 17.7024, + "step": 3692 + }, + { + "epoch": 0.06750507247701391, + "grad_norm": 7.248560779489402, + "learning_rate": 9.963194799418755e-06, + "loss": 17.7748, + "step": 3693 + }, + { + "epoch": 0.06752335167346044, + "grad_norm": 8.587210185826397, + "learning_rate": 9.963158940441953e-06, + "loss": 18.3314, + "step": 3694 + }, + { + "epoch": 0.06754163086990696, + "grad_norm": 8.66856194071746, + "learning_rate": 9.963123064069707e-06, + "loss": 18.6316, + "step": 3695 + }, + { + "epoch": 0.06755991006635348, + "grad_norm": 6.8005485687612275, + "learning_rate": 9.963087170302146e-06, + "loss": 17.366, + "step": 3696 + }, + { + "epoch": 0.0675781892628, + "grad_norm": 7.590585740008545, + "learning_rate": 9.963051259139395e-06, + "loss": 17.9532, + "step": 3697 + }, + { + "epoch": 0.06759646845924654, + "grad_norm": 7.415068222981645, + "learning_rate": 9.963015330581581e-06, + "loss": 17.8159, + "step": 3698 + }, + { + "epoch": 0.06761474765569306, + "grad_norm": 7.445670101225243, + "learning_rate": 9.962979384628829e-06, + "loss": 17.7052, + "step": 3699 + }, + { + "epoch": 0.06763302685213958, + "grad_norm": 6.826785358678163, + "learning_rate": 9.962943421281264e-06, + "loss": 17.737, + "step": 3700 + }, + { + "epoch": 0.0676513060485861, + "grad_norm": 7.432857338679918, + "learning_rate": 9.962907440539012e-06, + "loss": 18.2263, + "step": 3701 + }, + { + "epoch": 0.06766958524503262, + "grad_norm": 6.261111150705873, + "learning_rate": 9.962871442402203e-06, + "loss": 17.2048, + "step": 3702 + }, + { + "epoch": 0.06768786444147916, + "grad_norm": 7.454775341619104, + "learning_rate": 9.962835426870958e-06, + "loss": 17.999, + "step": 3703 + }, + { + "epoch": 0.06770614363792568, + "grad_norm": 7.56542599926941, + "learning_rate": 9.962799393945406e-06, + "loss": 18.0887, + "step": 3704 + }, + { + "epoch": 0.0677244228343722, + "grad_norm": 7.967020769632536, + "learning_rate": 9.962763343625672e-06, + "loss": 17.9031, + "step": 3705 + }, + { + "epoch": 0.06774270203081872, + "grad_norm": 7.667758830754452, + "learning_rate": 9.962727275911883e-06, + "loss": 17.941, + "step": 3706 + }, + { + "epoch": 0.06776098122726525, + "grad_norm": 7.1873448141762335, + "learning_rate": 9.962691190804167e-06, + "loss": 17.7057, + "step": 3707 + }, + { + "epoch": 0.06777926042371177, + "grad_norm": 7.984551617788175, + "learning_rate": 9.962655088302648e-06, + "loss": 18.1008, + "step": 3708 + }, + { + "epoch": 0.0677975396201583, + "grad_norm": 7.350720256152015, + "learning_rate": 9.962618968407455e-06, + "loss": 17.7906, + "step": 3709 + }, + { + "epoch": 0.06781581881660483, + "grad_norm": 8.235914722238629, + "learning_rate": 9.962582831118712e-06, + "loss": 18.0387, + "step": 3710 + }, + { + "epoch": 0.06783409801305135, + "grad_norm": 7.3465049296915215, + "learning_rate": 9.962546676436547e-06, + "loss": 18.0271, + "step": 3711 + }, + { + "epoch": 0.06785237720949787, + "grad_norm": 9.091010809250884, + "learning_rate": 9.962510504361087e-06, + "loss": 18.4861, + "step": 3712 + }, + { + "epoch": 0.06787065640594439, + "grad_norm": 6.606151229758446, + "learning_rate": 9.962474314892456e-06, + "loss": 17.4758, + "step": 3713 + }, + { + "epoch": 0.06788893560239091, + "grad_norm": 8.364753569679415, + "learning_rate": 9.962438108030786e-06, + "loss": 18.0549, + "step": 3714 + }, + { + "epoch": 0.06790721479883745, + "grad_norm": 5.8204707977722965, + "learning_rate": 9.9624018837762e-06, + "loss": 17.1909, + "step": 3715 + }, + { + "epoch": 0.06792549399528397, + "grad_norm": 8.532616712133498, + "learning_rate": 9.962365642128825e-06, + "loss": 18.3254, + "step": 3716 + }, + { + "epoch": 0.06794377319173049, + "grad_norm": 7.6865609996747155, + "learning_rate": 9.962329383088791e-06, + "loss": 18.0385, + "step": 3717 + }, + { + "epoch": 0.06796205238817701, + "grad_norm": 8.483761109157722, + "learning_rate": 9.962293106656222e-06, + "loss": 18.2805, + "step": 3718 + }, + { + "epoch": 0.06798033158462354, + "grad_norm": 7.960116500492674, + "learning_rate": 9.962256812831245e-06, + "loss": 18.2548, + "step": 3719 + }, + { + "epoch": 0.06799861078107007, + "grad_norm": 7.351339077773364, + "learning_rate": 9.962220501613991e-06, + "loss": 17.7659, + "step": 3720 + }, + { + "epoch": 0.06801688997751659, + "grad_norm": 7.306086424667269, + "learning_rate": 9.962184173004583e-06, + "loss": 17.9074, + "step": 3721 + }, + { + "epoch": 0.06803516917396311, + "grad_norm": 7.366303798764439, + "learning_rate": 9.962147827003152e-06, + "loss": 17.5614, + "step": 3722 + }, + { + "epoch": 0.06805344837040964, + "grad_norm": 7.477827781803198, + "learning_rate": 9.962111463609822e-06, + "loss": 17.7797, + "step": 3723 + }, + { + "epoch": 0.06807172756685616, + "grad_norm": 8.984259687353106, + "learning_rate": 9.962075082824722e-06, + "loss": 18.0538, + "step": 3724 + }, + { + "epoch": 0.06809000676330268, + "grad_norm": 6.958962392731023, + "learning_rate": 9.96203868464798e-06, + "loss": 17.5696, + "step": 3725 + }, + { + "epoch": 0.06810828595974922, + "grad_norm": 6.8283761609229074, + "learning_rate": 9.962002269079722e-06, + "loss": 17.5826, + "step": 3726 + }, + { + "epoch": 0.06812656515619574, + "grad_norm": 7.9269979599067355, + "learning_rate": 9.961965836120076e-06, + "loss": 17.9868, + "step": 3727 + }, + { + "epoch": 0.06814484435264226, + "grad_norm": 7.339234417282792, + "learning_rate": 9.961929385769172e-06, + "loss": 17.7234, + "step": 3728 + }, + { + "epoch": 0.06816312354908878, + "grad_norm": 7.82796076854723, + "learning_rate": 9.961892918027136e-06, + "loss": 18.6493, + "step": 3729 + }, + { + "epoch": 0.0681814027455353, + "grad_norm": 7.671431642947557, + "learning_rate": 9.961856432894096e-06, + "loss": 17.8904, + "step": 3730 + }, + { + "epoch": 0.06819968194198182, + "grad_norm": 8.118278044873199, + "learning_rate": 9.961819930370178e-06, + "loss": 18.332, + "step": 3731 + }, + { + "epoch": 0.06821796113842836, + "grad_norm": 7.163604470953865, + "learning_rate": 9.961783410455514e-06, + "loss": 17.7846, + "step": 3732 + }, + { + "epoch": 0.06823624033487488, + "grad_norm": 7.088079444102592, + "learning_rate": 9.961746873150228e-06, + "loss": 17.6912, + "step": 3733 + }, + { + "epoch": 0.0682545195313214, + "grad_norm": 8.259109711126358, + "learning_rate": 9.96171031845445e-06, + "loss": 18.3023, + "step": 3734 + }, + { + "epoch": 0.06827279872776792, + "grad_norm": 6.390717546983588, + "learning_rate": 9.961673746368307e-06, + "loss": 17.4842, + "step": 3735 + }, + { + "epoch": 0.06829107792421445, + "grad_norm": 8.54189722197786, + "learning_rate": 9.961637156891931e-06, + "loss": 18.6318, + "step": 3736 + }, + { + "epoch": 0.06830935712066098, + "grad_norm": 8.003033235527171, + "learning_rate": 9.961600550025445e-06, + "loss": 18.1477, + "step": 3737 + }, + { + "epoch": 0.0683276363171075, + "grad_norm": 6.786485587457048, + "learning_rate": 9.961563925768981e-06, + "loss": 17.5007, + "step": 3738 + }, + { + "epoch": 0.06834591551355403, + "grad_norm": 9.1781443473539, + "learning_rate": 9.961527284122665e-06, + "loss": 18.4901, + "step": 3739 + }, + { + "epoch": 0.06836419471000055, + "grad_norm": 6.747170850842066, + "learning_rate": 9.961490625086627e-06, + "loss": 17.6464, + "step": 3740 + }, + { + "epoch": 0.06838247390644707, + "grad_norm": 9.24932579666635, + "learning_rate": 9.961453948660995e-06, + "loss": 18.4559, + "step": 3741 + }, + { + "epoch": 0.06840075310289359, + "grad_norm": 8.075774718250974, + "learning_rate": 9.961417254845897e-06, + "loss": 18.0175, + "step": 3742 + }, + { + "epoch": 0.06841903229934013, + "grad_norm": 7.389772889445342, + "learning_rate": 9.961380543641461e-06, + "loss": 17.7848, + "step": 3743 + }, + { + "epoch": 0.06843731149578665, + "grad_norm": 7.2004802013132885, + "learning_rate": 9.961343815047818e-06, + "loss": 17.598, + "step": 3744 + }, + { + "epoch": 0.06845559069223317, + "grad_norm": 8.669813104855471, + "learning_rate": 9.961307069065096e-06, + "loss": 18.007, + "step": 3745 + }, + { + "epoch": 0.06847386988867969, + "grad_norm": 7.563564649745429, + "learning_rate": 9.961270305693422e-06, + "loss": 17.961, + "step": 3746 + }, + { + "epoch": 0.06849214908512621, + "grad_norm": 7.855342442049126, + "learning_rate": 9.961233524932927e-06, + "loss": 18.0859, + "step": 3747 + }, + { + "epoch": 0.06851042828157274, + "grad_norm": 6.835256251563989, + "learning_rate": 9.961196726783738e-06, + "loss": 17.5802, + "step": 3748 + }, + { + "epoch": 0.06852870747801927, + "grad_norm": 9.09076091118749, + "learning_rate": 9.961159911245986e-06, + "loss": 18.1888, + "step": 3749 + }, + { + "epoch": 0.06854698667446579, + "grad_norm": 8.082008770565626, + "learning_rate": 9.961123078319798e-06, + "loss": 18.0489, + "step": 3750 + }, + { + "epoch": 0.06856526587091231, + "grad_norm": 7.637977939444455, + "learning_rate": 9.961086228005305e-06, + "loss": 17.9384, + "step": 3751 + }, + { + "epoch": 0.06858354506735884, + "grad_norm": 8.208068112705568, + "learning_rate": 9.961049360302634e-06, + "loss": 18.1634, + "step": 3752 + }, + { + "epoch": 0.06860182426380536, + "grad_norm": 7.719268232487972, + "learning_rate": 9.961012475211917e-06, + "loss": 18.1333, + "step": 3753 + }, + { + "epoch": 0.0686201034602519, + "grad_norm": 7.367828581028043, + "learning_rate": 9.960975572733282e-06, + "loss": 17.7244, + "step": 3754 + }, + { + "epoch": 0.06863838265669842, + "grad_norm": 7.456793050592895, + "learning_rate": 9.960938652866857e-06, + "loss": 17.7094, + "step": 3755 + }, + { + "epoch": 0.06865666185314494, + "grad_norm": 7.800264409200894, + "learning_rate": 9.960901715612774e-06, + "loss": 18.1531, + "step": 3756 + }, + { + "epoch": 0.06867494104959146, + "grad_norm": 6.7314809857282665, + "learning_rate": 9.960864760971159e-06, + "loss": 17.6805, + "step": 3757 + }, + { + "epoch": 0.06869322024603798, + "grad_norm": 7.6788020934336405, + "learning_rate": 9.960827788942144e-06, + "loss": 17.6742, + "step": 3758 + }, + { + "epoch": 0.0687114994424845, + "grad_norm": 8.146205801876379, + "learning_rate": 9.96079079952586e-06, + "loss": 18.6369, + "step": 3759 + }, + { + "epoch": 0.06872977863893104, + "grad_norm": 8.26115784078957, + "learning_rate": 9.960753792722432e-06, + "loss": 18.5603, + "step": 3760 + }, + { + "epoch": 0.06874805783537756, + "grad_norm": 9.019812898584123, + "learning_rate": 9.960716768531993e-06, + "loss": 18.0269, + "step": 3761 + }, + { + "epoch": 0.06876633703182408, + "grad_norm": 6.465686684918821, + "learning_rate": 9.960679726954674e-06, + "loss": 17.5717, + "step": 3762 + }, + { + "epoch": 0.0687846162282706, + "grad_norm": 8.125270644039192, + "learning_rate": 9.960642667990601e-06, + "loss": 18.1933, + "step": 3763 + }, + { + "epoch": 0.06880289542471713, + "grad_norm": 8.771975936087479, + "learning_rate": 9.960605591639908e-06, + "loss": 18.4901, + "step": 3764 + }, + { + "epoch": 0.06882117462116365, + "grad_norm": 6.339127645571222, + "learning_rate": 9.96056849790272e-06, + "loss": 17.5171, + "step": 3765 + }, + { + "epoch": 0.06883945381761018, + "grad_norm": 6.245842990050287, + "learning_rate": 9.960531386779173e-06, + "loss": 17.5162, + "step": 3766 + }, + { + "epoch": 0.0688577330140567, + "grad_norm": 7.009487915994345, + "learning_rate": 9.960494258269391e-06, + "loss": 17.6565, + "step": 3767 + }, + { + "epoch": 0.06887601221050323, + "grad_norm": 8.15864628189637, + "learning_rate": 9.96045711237351e-06, + "loss": 17.8422, + "step": 3768 + }, + { + "epoch": 0.06889429140694975, + "grad_norm": 8.073448513013414, + "learning_rate": 9.960419949091655e-06, + "loss": 18.0036, + "step": 3769 + }, + { + "epoch": 0.06891257060339627, + "grad_norm": 7.674304767211397, + "learning_rate": 9.96038276842396e-06, + "loss": 18.26, + "step": 3770 + }, + { + "epoch": 0.0689308497998428, + "grad_norm": 7.740456753546985, + "learning_rate": 9.960345570370552e-06, + "loss": 18.2854, + "step": 3771 + }, + { + "epoch": 0.06894912899628933, + "grad_norm": 7.139536987911921, + "learning_rate": 9.960308354931564e-06, + "loss": 17.8847, + "step": 3772 + }, + { + "epoch": 0.06896740819273585, + "grad_norm": 6.721470384251413, + "learning_rate": 9.960271122107127e-06, + "loss": 17.5681, + "step": 3773 + }, + { + "epoch": 0.06898568738918237, + "grad_norm": 7.326324475271985, + "learning_rate": 9.96023387189737e-06, + "loss": 17.8968, + "step": 3774 + }, + { + "epoch": 0.06900396658562889, + "grad_norm": 7.8901041168503045, + "learning_rate": 9.960196604302421e-06, + "loss": 17.9237, + "step": 3775 + }, + { + "epoch": 0.06902224578207541, + "grad_norm": 9.181082980181333, + "learning_rate": 9.960159319322415e-06, + "loss": 18.29, + "step": 3776 + }, + { + "epoch": 0.06904052497852195, + "grad_norm": 7.311520840686915, + "learning_rate": 9.960122016957483e-06, + "loss": 17.7929, + "step": 3777 + }, + { + "epoch": 0.06905880417496847, + "grad_norm": 6.928751391376276, + "learning_rate": 9.96008469720775e-06, + "loss": 17.477, + "step": 3778 + }, + { + "epoch": 0.069077083371415, + "grad_norm": 8.172636126545248, + "learning_rate": 9.960047360073353e-06, + "loss": 18.1091, + "step": 3779 + }, + { + "epoch": 0.06909536256786152, + "grad_norm": 8.057727424639237, + "learning_rate": 9.96001000555442e-06, + "loss": 17.8632, + "step": 3780 + }, + { + "epoch": 0.06911364176430804, + "grad_norm": 7.684672935232596, + "learning_rate": 9.959972633651083e-06, + "loss": 18.0447, + "step": 3781 + }, + { + "epoch": 0.06913192096075456, + "grad_norm": 7.216855034027728, + "learning_rate": 9.95993524436347e-06, + "loss": 17.7419, + "step": 3782 + }, + { + "epoch": 0.0691502001572011, + "grad_norm": 7.853531177231895, + "learning_rate": 9.959897837691717e-06, + "loss": 18.1424, + "step": 3783 + }, + { + "epoch": 0.06916847935364762, + "grad_norm": 7.374683926499741, + "learning_rate": 9.959860413635951e-06, + "loss": 17.782, + "step": 3784 + }, + { + "epoch": 0.06918675855009414, + "grad_norm": 6.963507141610772, + "learning_rate": 9.959822972196306e-06, + "loss": 17.3137, + "step": 3785 + }, + { + "epoch": 0.06920503774654066, + "grad_norm": 7.795562780346318, + "learning_rate": 9.95978551337291e-06, + "loss": 17.886, + "step": 3786 + }, + { + "epoch": 0.06922331694298718, + "grad_norm": 8.110133946860046, + "learning_rate": 9.959748037165897e-06, + "loss": 18.2596, + "step": 3787 + }, + { + "epoch": 0.06924159613943372, + "grad_norm": 7.645011338232504, + "learning_rate": 9.959710543575397e-06, + "loss": 17.8402, + "step": 3788 + }, + { + "epoch": 0.06925987533588024, + "grad_norm": 6.859118651318195, + "learning_rate": 9.959673032601544e-06, + "loss": 17.7896, + "step": 3789 + }, + { + "epoch": 0.06927815453232676, + "grad_norm": 7.007663177435595, + "learning_rate": 9.959635504244466e-06, + "loss": 17.732, + "step": 3790 + }, + { + "epoch": 0.06929643372877328, + "grad_norm": 7.4868070815628815, + "learning_rate": 9.959597958504296e-06, + "loss": 17.9148, + "step": 3791 + }, + { + "epoch": 0.0693147129252198, + "grad_norm": 6.576444156646345, + "learning_rate": 9.959560395381166e-06, + "loss": 17.5448, + "step": 3792 + }, + { + "epoch": 0.06933299212166633, + "grad_norm": 6.707517850683992, + "learning_rate": 9.959522814875206e-06, + "loss": 17.8627, + "step": 3793 + }, + { + "epoch": 0.06935127131811286, + "grad_norm": 6.707322458438496, + "learning_rate": 9.959485216986548e-06, + "loss": 17.4698, + "step": 3794 + }, + { + "epoch": 0.06936955051455938, + "grad_norm": 7.485522659872726, + "learning_rate": 9.959447601715328e-06, + "loss": 17.8513, + "step": 3795 + }, + { + "epoch": 0.0693878297110059, + "grad_norm": 7.275228060747942, + "learning_rate": 9.959409969061673e-06, + "loss": 17.8599, + "step": 3796 + }, + { + "epoch": 0.06940610890745243, + "grad_norm": 7.809365144659854, + "learning_rate": 9.959372319025715e-06, + "loss": 17.8961, + "step": 3797 + }, + { + "epoch": 0.06942438810389895, + "grad_norm": 6.528049873087958, + "learning_rate": 9.959334651607589e-06, + "loss": 17.2796, + "step": 3798 + }, + { + "epoch": 0.06944266730034547, + "grad_norm": 6.793507717491632, + "learning_rate": 9.959296966807425e-06, + "loss": 17.607, + "step": 3799 + }, + { + "epoch": 0.069460946496792, + "grad_norm": 5.979708116887712, + "learning_rate": 9.959259264625355e-06, + "loss": 17.1363, + "step": 3800 + }, + { + "epoch": 0.06947922569323853, + "grad_norm": 7.522838490297659, + "learning_rate": 9.959221545061512e-06, + "loss": 17.9469, + "step": 3801 + }, + { + "epoch": 0.06949750488968505, + "grad_norm": 7.249327032656113, + "learning_rate": 9.959183808116028e-06, + "loss": 17.6304, + "step": 3802 + }, + { + "epoch": 0.06951578408613157, + "grad_norm": 8.86934517787543, + "learning_rate": 9.959146053789035e-06, + "loss": 18.454, + "step": 3803 + }, + { + "epoch": 0.06953406328257809, + "grad_norm": 8.022226081210857, + "learning_rate": 9.959108282080665e-06, + "loss": 18.1153, + "step": 3804 + }, + { + "epoch": 0.06955234247902463, + "grad_norm": 6.8823684446573665, + "learning_rate": 9.959070492991052e-06, + "loss": 17.4093, + "step": 3805 + }, + { + "epoch": 0.06957062167547115, + "grad_norm": 7.2076966204602, + "learning_rate": 9.959032686520326e-06, + "loss": 17.7794, + "step": 3806 + }, + { + "epoch": 0.06958890087191767, + "grad_norm": 7.8839595532464415, + "learning_rate": 9.95899486266862e-06, + "loss": 18.0942, + "step": 3807 + }, + { + "epoch": 0.0696071800683642, + "grad_norm": 8.653682956089938, + "learning_rate": 9.95895702143607e-06, + "loss": 18.2604, + "step": 3808 + }, + { + "epoch": 0.06962545926481072, + "grad_norm": 9.14225694344878, + "learning_rate": 9.958919162822805e-06, + "loss": 18.6332, + "step": 3809 + }, + { + "epoch": 0.06964373846125724, + "grad_norm": 7.246660931457984, + "learning_rate": 9.958881286828956e-06, + "loss": 17.7692, + "step": 3810 + }, + { + "epoch": 0.06966201765770377, + "grad_norm": 7.45850903341891, + "learning_rate": 9.95884339345466e-06, + "loss": 18.0857, + "step": 3811 + }, + { + "epoch": 0.0696802968541503, + "grad_norm": 8.270664122826734, + "learning_rate": 9.95880548270005e-06, + "loss": 18.2097, + "step": 3812 + }, + { + "epoch": 0.06969857605059682, + "grad_norm": 7.304124010185371, + "learning_rate": 9.958767554565256e-06, + "loss": 17.8819, + "step": 3813 + }, + { + "epoch": 0.06971685524704334, + "grad_norm": 6.074939583078227, + "learning_rate": 9.958729609050412e-06, + "loss": 17.3484, + "step": 3814 + }, + { + "epoch": 0.06973513444348986, + "grad_norm": 7.497330253821664, + "learning_rate": 9.95869164615565e-06, + "loss": 18.0494, + "step": 3815 + }, + { + "epoch": 0.06975341363993638, + "grad_norm": 7.511123475752274, + "learning_rate": 9.958653665881106e-06, + "loss": 17.6961, + "step": 3816 + }, + { + "epoch": 0.06977169283638292, + "grad_norm": 7.020476709462503, + "learning_rate": 9.95861566822691e-06, + "loss": 17.5925, + "step": 3817 + }, + { + "epoch": 0.06978997203282944, + "grad_norm": 7.613458171651699, + "learning_rate": 9.958577653193195e-06, + "loss": 18.1222, + "step": 3818 + }, + { + "epoch": 0.06980825122927596, + "grad_norm": 6.7776175462570425, + "learning_rate": 9.9585396207801e-06, + "loss": 17.5001, + "step": 3819 + }, + { + "epoch": 0.06982653042572248, + "grad_norm": 7.048944765465509, + "learning_rate": 9.958501570987748e-06, + "loss": 17.6315, + "step": 3820 + }, + { + "epoch": 0.069844809622169, + "grad_norm": 6.996495382862664, + "learning_rate": 9.958463503816283e-06, + "loss": 17.7513, + "step": 3821 + }, + { + "epoch": 0.06986308881861554, + "grad_norm": 6.207166277887507, + "learning_rate": 9.958425419265831e-06, + "loss": 17.2869, + "step": 3822 + }, + { + "epoch": 0.06988136801506206, + "grad_norm": 7.289063728499782, + "learning_rate": 9.958387317336529e-06, + "loss": 18.0618, + "step": 3823 + }, + { + "epoch": 0.06989964721150858, + "grad_norm": 6.313155286089765, + "learning_rate": 9.95834919802851e-06, + "loss": 17.2904, + "step": 3824 + }, + { + "epoch": 0.0699179264079551, + "grad_norm": 7.962502631421321, + "learning_rate": 9.958311061341908e-06, + "loss": 17.9601, + "step": 3825 + }, + { + "epoch": 0.06993620560440163, + "grad_norm": 8.332043311595505, + "learning_rate": 9.958272907276855e-06, + "loss": 18.1206, + "step": 3826 + }, + { + "epoch": 0.06995448480084815, + "grad_norm": 7.400477058760804, + "learning_rate": 9.958234735833485e-06, + "loss": 17.89, + "step": 3827 + }, + { + "epoch": 0.06997276399729468, + "grad_norm": 8.613183284713479, + "learning_rate": 9.958196547011934e-06, + "loss": 18.0946, + "step": 3828 + }, + { + "epoch": 0.0699910431937412, + "grad_norm": 7.362058331686377, + "learning_rate": 9.958158340812333e-06, + "loss": 17.6704, + "step": 3829 + }, + { + "epoch": 0.07000932239018773, + "grad_norm": 8.011238170474346, + "learning_rate": 9.958120117234818e-06, + "loss": 18.2923, + "step": 3830 + }, + { + "epoch": 0.07002760158663425, + "grad_norm": 7.26819504568335, + "learning_rate": 9.958081876279523e-06, + "loss": 18.0602, + "step": 3831 + }, + { + "epoch": 0.07004588078308077, + "grad_norm": 6.505793670949363, + "learning_rate": 9.95804361794658e-06, + "loss": 17.2733, + "step": 3832 + }, + { + "epoch": 0.0700641599795273, + "grad_norm": 8.085243306574576, + "learning_rate": 9.958005342236126e-06, + "loss": 18.3954, + "step": 3833 + }, + { + "epoch": 0.07008243917597383, + "grad_norm": 8.094394056718064, + "learning_rate": 9.957967049148292e-06, + "loss": 18.2679, + "step": 3834 + }, + { + "epoch": 0.07010071837242035, + "grad_norm": 8.984286766048601, + "learning_rate": 9.957928738683213e-06, + "loss": 18.5315, + "step": 3835 + }, + { + "epoch": 0.07011899756886687, + "grad_norm": 7.361178826776351, + "learning_rate": 9.957890410841026e-06, + "loss": 17.8741, + "step": 3836 + }, + { + "epoch": 0.0701372767653134, + "grad_norm": 8.090209621958795, + "learning_rate": 9.957852065621862e-06, + "loss": 18.0183, + "step": 3837 + }, + { + "epoch": 0.07015555596175992, + "grad_norm": 7.679469249415488, + "learning_rate": 9.957813703025857e-06, + "loss": 17.987, + "step": 3838 + }, + { + "epoch": 0.07017383515820645, + "grad_norm": 7.349116689247304, + "learning_rate": 9.957775323053146e-06, + "loss": 17.8111, + "step": 3839 + }, + { + "epoch": 0.07019211435465297, + "grad_norm": 9.154605941566015, + "learning_rate": 9.957736925703863e-06, + "loss": 18.5949, + "step": 3840 + }, + { + "epoch": 0.0702103935510995, + "grad_norm": 8.187506651271368, + "learning_rate": 9.95769851097814e-06, + "loss": 18.2681, + "step": 3841 + }, + { + "epoch": 0.07022867274754602, + "grad_norm": 5.924782325222919, + "learning_rate": 9.957660078876116e-06, + "loss": 17.3512, + "step": 3842 + }, + { + "epoch": 0.07024695194399254, + "grad_norm": 6.55882708458667, + "learning_rate": 9.957621629397921e-06, + "loss": 17.4009, + "step": 3843 + }, + { + "epoch": 0.07026523114043906, + "grad_norm": 7.487796551520118, + "learning_rate": 9.957583162543696e-06, + "loss": 17.9639, + "step": 3844 + }, + { + "epoch": 0.0702835103368856, + "grad_norm": 7.275007051117932, + "learning_rate": 9.95754467831357e-06, + "loss": 17.5755, + "step": 3845 + }, + { + "epoch": 0.07030178953333212, + "grad_norm": 12.569893684604146, + "learning_rate": 9.95750617670768e-06, + "loss": 17.2282, + "step": 3846 + }, + { + "epoch": 0.07032006872977864, + "grad_norm": 8.240543272249862, + "learning_rate": 9.957467657726163e-06, + "loss": 17.7524, + "step": 3847 + }, + { + "epoch": 0.07033834792622516, + "grad_norm": 7.302420800858163, + "learning_rate": 9.95742912136915e-06, + "loss": 17.7059, + "step": 3848 + }, + { + "epoch": 0.07035662712267168, + "grad_norm": 7.840809494218581, + "learning_rate": 9.95739056763678e-06, + "loss": 17.8699, + "step": 3849 + }, + { + "epoch": 0.0703749063191182, + "grad_norm": 7.174100674831677, + "learning_rate": 9.957351996529185e-06, + "loss": 17.8066, + "step": 3850 + }, + { + "epoch": 0.07039318551556474, + "grad_norm": 8.467747837492595, + "learning_rate": 9.9573134080465e-06, + "loss": 18.4042, + "step": 3851 + }, + { + "epoch": 0.07041146471201126, + "grad_norm": 5.958143703909017, + "learning_rate": 9.957274802188864e-06, + "loss": 17.1516, + "step": 3852 + }, + { + "epoch": 0.07042974390845778, + "grad_norm": 6.857922505161306, + "learning_rate": 9.95723617895641e-06, + "loss": 17.5791, + "step": 3853 + }, + { + "epoch": 0.0704480231049043, + "grad_norm": 8.567716149969353, + "learning_rate": 9.957197538349273e-06, + "loss": 18.0442, + "step": 3854 + }, + { + "epoch": 0.07046630230135083, + "grad_norm": 8.980549231024167, + "learning_rate": 9.957158880367589e-06, + "loss": 17.886, + "step": 3855 + }, + { + "epoch": 0.07048458149779736, + "grad_norm": 6.891651257083474, + "learning_rate": 9.957120205011492e-06, + "loss": 17.8852, + "step": 3856 + }, + { + "epoch": 0.07050286069424389, + "grad_norm": 8.484493154188359, + "learning_rate": 9.95708151228112e-06, + "loss": 18.456, + "step": 3857 + }, + { + "epoch": 0.0705211398906904, + "grad_norm": 6.898141184519592, + "learning_rate": 9.957042802176606e-06, + "loss": 17.7343, + "step": 3858 + }, + { + "epoch": 0.07053941908713693, + "grad_norm": 6.88328401354471, + "learning_rate": 9.957004074698088e-06, + "loss": 17.7004, + "step": 3859 + }, + { + "epoch": 0.07055769828358345, + "grad_norm": 6.211656798973414, + "learning_rate": 9.956965329845701e-06, + "loss": 17.2023, + "step": 3860 + }, + { + "epoch": 0.07057597748002997, + "grad_norm": 8.845461155674911, + "learning_rate": 9.95692656761958e-06, + "loss": 18.26, + "step": 3861 + }, + { + "epoch": 0.07059425667647651, + "grad_norm": 7.889961654619663, + "learning_rate": 9.956887788019863e-06, + "loss": 17.9673, + "step": 3862 + }, + { + "epoch": 0.07061253587292303, + "grad_norm": 7.44714448615704, + "learning_rate": 9.956848991046682e-06, + "loss": 18.1349, + "step": 3863 + }, + { + "epoch": 0.07063081506936955, + "grad_norm": 6.90674757093021, + "learning_rate": 9.956810176700178e-06, + "loss": 17.8154, + "step": 3864 + }, + { + "epoch": 0.07064909426581607, + "grad_norm": 8.046083685464394, + "learning_rate": 9.956771344980483e-06, + "loss": 17.9673, + "step": 3865 + }, + { + "epoch": 0.0706673734622626, + "grad_norm": 7.859387615224365, + "learning_rate": 9.956732495887736e-06, + "loss": 18.0598, + "step": 3866 + }, + { + "epoch": 0.07068565265870912, + "grad_norm": 7.641056445194432, + "learning_rate": 9.95669362942207e-06, + "loss": 18.1409, + "step": 3867 + }, + { + "epoch": 0.07070393185515565, + "grad_norm": 8.94055665340746, + "learning_rate": 9.956654745583623e-06, + "loss": 18.8027, + "step": 3868 + }, + { + "epoch": 0.07072221105160217, + "grad_norm": 6.691974164414335, + "learning_rate": 9.956615844372532e-06, + "loss": 17.6638, + "step": 3869 + }, + { + "epoch": 0.0707404902480487, + "grad_norm": 6.057754083197547, + "learning_rate": 9.956576925788934e-06, + "loss": 17.2145, + "step": 3870 + }, + { + "epoch": 0.07075876944449522, + "grad_norm": 7.604425340976951, + "learning_rate": 9.956537989832961e-06, + "loss": 17.7097, + "step": 3871 + }, + { + "epoch": 0.07077704864094174, + "grad_norm": 6.472158783529807, + "learning_rate": 9.956499036504754e-06, + "loss": 17.372, + "step": 3872 + }, + { + "epoch": 0.07079532783738827, + "grad_norm": 6.465712103051698, + "learning_rate": 9.956460065804449e-06, + "loss": 17.5418, + "step": 3873 + }, + { + "epoch": 0.0708136070338348, + "grad_norm": 6.280648814493716, + "learning_rate": 9.95642107773218e-06, + "loss": 17.2867, + "step": 3874 + }, + { + "epoch": 0.07083188623028132, + "grad_norm": 7.0407610117133945, + "learning_rate": 9.956382072288086e-06, + "loss": 17.6109, + "step": 3875 + }, + { + "epoch": 0.07085016542672784, + "grad_norm": 7.994480393016992, + "learning_rate": 9.956343049472302e-06, + "loss": 17.8335, + "step": 3876 + }, + { + "epoch": 0.07086844462317436, + "grad_norm": 7.107490375781242, + "learning_rate": 9.956304009284968e-06, + "loss": 17.9981, + "step": 3877 + }, + { + "epoch": 0.07088672381962088, + "grad_norm": 6.842196758503717, + "learning_rate": 9.956264951726216e-06, + "loss": 17.5187, + "step": 3878 + }, + { + "epoch": 0.07090500301606742, + "grad_norm": 8.76140509406288, + "learning_rate": 9.956225876796185e-06, + "loss": 18.4938, + "step": 3879 + }, + { + "epoch": 0.07092328221251394, + "grad_norm": 8.632549706370371, + "learning_rate": 9.956186784495014e-06, + "loss": 18.1769, + "step": 3880 + }, + { + "epoch": 0.07094156140896046, + "grad_norm": 6.453701664390098, + "learning_rate": 9.956147674822838e-06, + "loss": 17.4795, + "step": 3881 + }, + { + "epoch": 0.07095984060540698, + "grad_norm": 6.693906139208951, + "learning_rate": 9.956108547779795e-06, + "loss": 17.4784, + "step": 3882 + }, + { + "epoch": 0.0709781198018535, + "grad_norm": 8.410888034663625, + "learning_rate": 9.956069403366022e-06, + "loss": 18.2885, + "step": 3883 + }, + { + "epoch": 0.07099639899830003, + "grad_norm": 7.609992702552747, + "learning_rate": 9.956030241581655e-06, + "loss": 18.289, + "step": 3884 + }, + { + "epoch": 0.07101467819474656, + "grad_norm": 7.361900219256501, + "learning_rate": 9.955991062426831e-06, + "loss": 18.0587, + "step": 3885 + }, + { + "epoch": 0.07103295739119309, + "grad_norm": 7.517307551589767, + "learning_rate": 9.955951865901689e-06, + "loss": 17.9746, + "step": 3886 + }, + { + "epoch": 0.07105123658763961, + "grad_norm": 7.414760374856745, + "learning_rate": 9.955912652006368e-06, + "loss": 17.9533, + "step": 3887 + }, + { + "epoch": 0.07106951578408613, + "grad_norm": 6.804474401638567, + "learning_rate": 9.955873420741001e-06, + "loss": 17.9495, + "step": 3888 + }, + { + "epoch": 0.07108779498053265, + "grad_norm": 8.028523634313913, + "learning_rate": 9.955834172105728e-06, + "loss": 17.9176, + "step": 3889 + }, + { + "epoch": 0.07110607417697919, + "grad_norm": 7.412426346205773, + "learning_rate": 9.955794906100687e-06, + "loss": 17.7117, + "step": 3890 + }, + { + "epoch": 0.07112435337342571, + "grad_norm": 8.429497997814615, + "learning_rate": 9.955755622726013e-06, + "loss": 18.1698, + "step": 3891 + }, + { + "epoch": 0.07114263256987223, + "grad_norm": 7.746424875395029, + "learning_rate": 9.955716321981847e-06, + "loss": 18.2551, + "step": 3892 + }, + { + "epoch": 0.07116091176631875, + "grad_norm": 6.949464628106947, + "learning_rate": 9.955677003868326e-06, + "loss": 17.7316, + "step": 3893 + }, + { + "epoch": 0.07117919096276527, + "grad_norm": 6.86136952791035, + "learning_rate": 9.955637668385585e-06, + "loss": 17.5325, + "step": 3894 + }, + { + "epoch": 0.0711974701592118, + "grad_norm": 7.210314086051575, + "learning_rate": 9.955598315533767e-06, + "loss": 17.4802, + "step": 3895 + }, + { + "epoch": 0.07121574935565833, + "grad_norm": 8.799671632444277, + "learning_rate": 9.955558945313005e-06, + "loss": 18.1703, + "step": 3896 + }, + { + "epoch": 0.07123402855210485, + "grad_norm": 9.348757079785866, + "learning_rate": 9.955519557723438e-06, + "loss": 18.8315, + "step": 3897 + }, + { + "epoch": 0.07125230774855137, + "grad_norm": 7.394544857227317, + "learning_rate": 9.955480152765206e-06, + "loss": 17.5749, + "step": 3898 + }, + { + "epoch": 0.0712705869449979, + "grad_norm": 8.004974836420036, + "learning_rate": 9.955440730438448e-06, + "loss": 18.1448, + "step": 3899 + }, + { + "epoch": 0.07128886614144442, + "grad_norm": 6.7621017788199325, + "learning_rate": 9.9554012907433e-06, + "loss": 17.7479, + "step": 3900 + }, + { + "epoch": 0.07130714533789094, + "grad_norm": 6.965136473977121, + "learning_rate": 9.955361833679898e-06, + "loss": 17.7969, + "step": 3901 + }, + { + "epoch": 0.07132542453433748, + "grad_norm": 6.460864792601465, + "learning_rate": 9.955322359248382e-06, + "loss": 17.5655, + "step": 3902 + }, + { + "epoch": 0.071343703730784, + "grad_norm": 7.078763293573598, + "learning_rate": 9.955282867448893e-06, + "loss": 17.7961, + "step": 3903 + }, + { + "epoch": 0.07136198292723052, + "grad_norm": 7.453499886376171, + "learning_rate": 9.955243358281567e-06, + "loss": 17.8465, + "step": 3904 + }, + { + "epoch": 0.07138026212367704, + "grad_norm": 7.344132117766187, + "learning_rate": 9.955203831746544e-06, + "loss": 17.7976, + "step": 3905 + }, + { + "epoch": 0.07139854132012356, + "grad_norm": 6.28162514941662, + "learning_rate": 9.955164287843961e-06, + "loss": 17.5085, + "step": 3906 + }, + { + "epoch": 0.0714168205165701, + "grad_norm": 6.301605044284223, + "learning_rate": 9.955124726573957e-06, + "loss": 17.2057, + "step": 3907 + }, + { + "epoch": 0.07143509971301662, + "grad_norm": 8.324558992093518, + "learning_rate": 9.955085147936672e-06, + "loss": 18.319, + "step": 3908 + }, + { + "epoch": 0.07145337890946314, + "grad_norm": 39.34073829515376, + "learning_rate": 9.955045551932244e-06, + "loss": 18.6678, + "step": 3909 + }, + { + "epoch": 0.07147165810590966, + "grad_norm": 7.0815990079285065, + "learning_rate": 9.95500593856081e-06, + "loss": 17.5787, + "step": 3910 + }, + { + "epoch": 0.07148993730235619, + "grad_norm": 7.758661495696111, + "learning_rate": 9.954966307822509e-06, + "loss": 18.1176, + "step": 3911 + }, + { + "epoch": 0.0715082164988027, + "grad_norm": 5.4359752949865365, + "learning_rate": 9.95492665971748e-06, + "loss": 16.9614, + "step": 3912 + }, + { + "epoch": 0.07152649569524924, + "grad_norm": 6.701826729506655, + "learning_rate": 9.954886994245868e-06, + "loss": 17.541, + "step": 3913 + }, + { + "epoch": 0.07154477489169576, + "grad_norm": 7.496048174752599, + "learning_rate": 9.954847311407802e-06, + "loss": 17.9465, + "step": 3914 + }, + { + "epoch": 0.07156305408814229, + "grad_norm": 7.645185163814274, + "learning_rate": 9.954807611203429e-06, + "loss": 17.8904, + "step": 3915 + }, + { + "epoch": 0.07158133328458881, + "grad_norm": 8.495001972004014, + "learning_rate": 9.954767893632885e-06, + "loss": 18.0269, + "step": 3916 + }, + { + "epoch": 0.07159961248103533, + "grad_norm": 7.298855466968175, + "learning_rate": 9.95472815869631e-06, + "loss": 17.8787, + "step": 3917 + }, + { + "epoch": 0.07161789167748185, + "grad_norm": 8.149951520511012, + "learning_rate": 9.954688406393842e-06, + "loss": 18.1366, + "step": 3918 + }, + { + "epoch": 0.07163617087392839, + "grad_norm": 8.855197248094138, + "learning_rate": 9.95464863672562e-06, + "loss": 18.1028, + "step": 3919 + }, + { + "epoch": 0.07165445007037491, + "grad_norm": 8.326114062175687, + "learning_rate": 9.954608849691785e-06, + "loss": 18.048, + "step": 3920 + }, + { + "epoch": 0.07167272926682143, + "grad_norm": 8.429519275282749, + "learning_rate": 9.954569045292478e-06, + "loss": 18.2035, + "step": 3921 + }, + { + "epoch": 0.07169100846326795, + "grad_norm": 6.783247772055827, + "learning_rate": 9.954529223527833e-06, + "loss": 17.6596, + "step": 3922 + }, + { + "epoch": 0.07170928765971447, + "grad_norm": 9.927533382657202, + "learning_rate": 9.954489384397994e-06, + "loss": 18.119, + "step": 3923 + }, + { + "epoch": 0.07172756685616101, + "grad_norm": 8.204292956300954, + "learning_rate": 9.954449527903101e-06, + "loss": 17.7851, + "step": 3924 + }, + { + "epoch": 0.07174584605260753, + "grad_norm": 7.3709617881107485, + "learning_rate": 9.954409654043292e-06, + "loss": 17.9231, + "step": 3925 + }, + { + "epoch": 0.07176412524905405, + "grad_norm": 6.613022753483986, + "learning_rate": 9.954369762818708e-06, + "loss": 17.7306, + "step": 3926 + }, + { + "epoch": 0.07178240444550057, + "grad_norm": 7.856914378194838, + "learning_rate": 9.954329854229485e-06, + "loss": 18.1396, + "step": 3927 + }, + { + "epoch": 0.0718006836419471, + "grad_norm": 7.518274259323055, + "learning_rate": 9.954289928275769e-06, + "loss": 17.5279, + "step": 3928 + }, + { + "epoch": 0.07181896283839362, + "grad_norm": 8.176998030566098, + "learning_rate": 9.954249984957695e-06, + "loss": 17.9249, + "step": 3929 + }, + { + "epoch": 0.07183724203484015, + "grad_norm": 7.313919425219405, + "learning_rate": 9.954210024275403e-06, + "loss": 17.8802, + "step": 3930 + }, + { + "epoch": 0.07185552123128668, + "grad_norm": 6.774606189206345, + "learning_rate": 9.954170046229039e-06, + "loss": 17.6312, + "step": 3931 + }, + { + "epoch": 0.0718738004277332, + "grad_norm": 8.35589329756528, + "learning_rate": 9.954130050818735e-06, + "loss": 18.0618, + "step": 3932 + }, + { + "epoch": 0.07189207962417972, + "grad_norm": 7.354747245044039, + "learning_rate": 9.954090038044637e-06, + "loss": 17.6745, + "step": 3933 + }, + { + "epoch": 0.07191035882062624, + "grad_norm": 6.369932691161392, + "learning_rate": 9.954050007906883e-06, + "loss": 17.4517, + "step": 3934 + }, + { + "epoch": 0.07192863801707276, + "grad_norm": 6.04383158675737, + "learning_rate": 9.954009960405613e-06, + "loss": 17.2224, + "step": 3935 + }, + { + "epoch": 0.0719469172135193, + "grad_norm": 8.684643178179634, + "learning_rate": 9.953969895540969e-06, + "loss": 18.2651, + "step": 3936 + }, + { + "epoch": 0.07196519640996582, + "grad_norm": 8.006094844175474, + "learning_rate": 9.95392981331309e-06, + "loss": 18.0621, + "step": 3937 + }, + { + "epoch": 0.07198347560641234, + "grad_norm": 7.1188474842092635, + "learning_rate": 9.953889713722118e-06, + "loss": 17.6638, + "step": 3938 + }, + { + "epoch": 0.07200175480285886, + "grad_norm": 7.686894365864788, + "learning_rate": 9.95384959676819e-06, + "loss": 17.7463, + "step": 3939 + }, + { + "epoch": 0.07202003399930539, + "grad_norm": 7.501717431496252, + "learning_rate": 9.953809462451452e-06, + "loss": 17.8232, + "step": 3940 + }, + { + "epoch": 0.07203831319575192, + "grad_norm": 8.094419695444945, + "learning_rate": 9.95376931077204e-06, + "loss": 18.1749, + "step": 3941 + }, + { + "epoch": 0.07205659239219844, + "grad_norm": 7.936655585513716, + "learning_rate": 9.953729141730095e-06, + "loss": 17.7579, + "step": 3942 + }, + { + "epoch": 0.07207487158864496, + "grad_norm": 6.066799466763743, + "learning_rate": 9.95368895532576e-06, + "loss": 17.4134, + "step": 3943 + }, + { + "epoch": 0.07209315078509149, + "grad_norm": 7.904978332949921, + "learning_rate": 9.953648751559177e-06, + "loss": 17.931, + "step": 3944 + }, + { + "epoch": 0.07211142998153801, + "grad_norm": 7.5199540264729325, + "learning_rate": 9.953608530430483e-06, + "loss": 18.0643, + "step": 3945 + }, + { + "epoch": 0.07212970917798453, + "grad_norm": 7.682338263648552, + "learning_rate": 9.953568291939822e-06, + "loss": 17.7252, + "step": 3946 + }, + { + "epoch": 0.07214798837443107, + "grad_norm": 8.975695313385652, + "learning_rate": 9.953528036087334e-06, + "loss": 18.3403, + "step": 3947 + }, + { + "epoch": 0.07216626757087759, + "grad_norm": 7.9441040428145655, + "learning_rate": 9.95348776287316e-06, + "loss": 17.718, + "step": 3948 + }, + { + "epoch": 0.07218454676732411, + "grad_norm": 7.4782322260295855, + "learning_rate": 9.953447472297442e-06, + "loss": 18.0214, + "step": 3949 + }, + { + "epoch": 0.07220282596377063, + "grad_norm": 7.993625387885855, + "learning_rate": 9.953407164360317e-06, + "loss": 17.5924, + "step": 3950 + }, + { + "epoch": 0.07222110516021715, + "grad_norm": 6.53021490621029, + "learning_rate": 9.953366839061932e-06, + "loss": 17.3348, + "step": 3951 + }, + { + "epoch": 0.07223938435666367, + "grad_norm": 7.648185006668104, + "learning_rate": 9.953326496402428e-06, + "loss": 17.8196, + "step": 3952 + }, + { + "epoch": 0.07225766355311021, + "grad_norm": 8.067465309558568, + "learning_rate": 9.953286136381944e-06, + "loss": 18.3634, + "step": 3953 + }, + { + "epoch": 0.07227594274955673, + "grad_norm": 7.379918960516362, + "learning_rate": 9.95324575900062e-06, + "loss": 17.9421, + "step": 3954 + }, + { + "epoch": 0.07229422194600325, + "grad_norm": 7.497042354897406, + "learning_rate": 9.9532053642586e-06, + "loss": 17.9736, + "step": 3955 + }, + { + "epoch": 0.07231250114244978, + "grad_norm": 9.806995871791335, + "learning_rate": 9.953164952156024e-06, + "loss": 18.8548, + "step": 3956 + }, + { + "epoch": 0.0723307803388963, + "grad_norm": 6.247902363816244, + "learning_rate": 9.953124522693036e-06, + "loss": 17.4974, + "step": 3957 + }, + { + "epoch": 0.07234905953534283, + "grad_norm": 7.512323418953835, + "learning_rate": 9.953084075869777e-06, + "loss": 17.755, + "step": 3958 + }, + { + "epoch": 0.07236733873178935, + "grad_norm": 8.35778571923837, + "learning_rate": 9.953043611686387e-06, + "loss": 18.5947, + "step": 3959 + }, + { + "epoch": 0.07238561792823588, + "grad_norm": 7.266903566371978, + "learning_rate": 9.953003130143008e-06, + "loss": 17.7397, + "step": 3960 + }, + { + "epoch": 0.0724038971246824, + "grad_norm": 8.868681862384477, + "learning_rate": 9.952962631239785e-06, + "loss": 18.2875, + "step": 3961 + }, + { + "epoch": 0.07242217632112892, + "grad_norm": 7.137759581150072, + "learning_rate": 9.952922114976856e-06, + "loss": 17.76, + "step": 3962 + }, + { + "epoch": 0.07244045551757544, + "grad_norm": 6.571967067078624, + "learning_rate": 9.952881581354366e-06, + "loss": 17.7174, + "step": 3963 + }, + { + "epoch": 0.07245873471402198, + "grad_norm": 8.084110521015178, + "learning_rate": 9.952841030372455e-06, + "loss": 18.1417, + "step": 3964 + }, + { + "epoch": 0.0724770139104685, + "grad_norm": 6.855205404867292, + "learning_rate": 9.952800462031267e-06, + "loss": 17.7353, + "step": 3965 + }, + { + "epoch": 0.07249529310691502, + "grad_norm": 7.362992009751969, + "learning_rate": 9.952759876330941e-06, + "loss": 18.034, + "step": 3966 + }, + { + "epoch": 0.07251357230336154, + "grad_norm": 7.209436599592481, + "learning_rate": 9.952719273271625e-06, + "loss": 17.7053, + "step": 3967 + }, + { + "epoch": 0.07253185149980806, + "grad_norm": 10.08601339002671, + "learning_rate": 9.952678652853457e-06, + "loss": 18.1241, + "step": 3968 + }, + { + "epoch": 0.07255013069625459, + "grad_norm": 7.466547884926921, + "learning_rate": 9.952638015076577e-06, + "loss": 18.0381, + "step": 3969 + }, + { + "epoch": 0.07256840989270112, + "grad_norm": 7.990963338576961, + "learning_rate": 9.952597359941132e-06, + "loss": 18.2016, + "step": 3970 + }, + { + "epoch": 0.07258668908914764, + "grad_norm": 7.256105294918094, + "learning_rate": 9.952556687447264e-06, + "loss": 17.9083, + "step": 3971 + }, + { + "epoch": 0.07260496828559417, + "grad_norm": 9.089438335438436, + "learning_rate": 9.952515997595113e-06, + "loss": 18.5281, + "step": 3972 + }, + { + "epoch": 0.07262324748204069, + "grad_norm": 8.329081459579776, + "learning_rate": 9.952475290384824e-06, + "loss": 18.3693, + "step": 3973 + }, + { + "epoch": 0.07264152667848721, + "grad_norm": 6.519775676102489, + "learning_rate": 9.95243456581654e-06, + "loss": 17.5964, + "step": 3974 + }, + { + "epoch": 0.07265980587493374, + "grad_norm": 8.470011468438747, + "learning_rate": 9.9523938238904e-06, + "loss": 18.4528, + "step": 3975 + }, + { + "epoch": 0.07267808507138027, + "grad_norm": 6.759815054276921, + "learning_rate": 9.952353064606553e-06, + "loss": 17.523, + "step": 3976 + }, + { + "epoch": 0.07269636426782679, + "grad_norm": 6.796952167284505, + "learning_rate": 9.952312287965136e-06, + "loss": 17.618, + "step": 3977 + }, + { + "epoch": 0.07271464346427331, + "grad_norm": 6.743480792586531, + "learning_rate": 9.952271493966293e-06, + "loss": 17.6924, + "step": 3978 + }, + { + "epoch": 0.07273292266071983, + "grad_norm": 7.624318428914813, + "learning_rate": 9.95223068261017e-06, + "loss": 17.9983, + "step": 3979 + }, + { + "epoch": 0.07275120185716635, + "grad_norm": 8.376676897364284, + "learning_rate": 9.952189853896907e-06, + "loss": 18.4505, + "step": 3980 + }, + { + "epoch": 0.07276948105361289, + "grad_norm": 6.885788415651327, + "learning_rate": 9.95214900782665e-06, + "loss": 17.576, + "step": 3981 + }, + { + "epoch": 0.07278776025005941, + "grad_norm": 6.942396515459639, + "learning_rate": 9.952108144399539e-06, + "loss": 17.5863, + "step": 3982 + }, + { + "epoch": 0.07280603944650593, + "grad_norm": 7.3150546757752855, + "learning_rate": 9.952067263615719e-06, + "loss": 18.1692, + "step": 3983 + }, + { + "epoch": 0.07282431864295245, + "grad_norm": 7.031306967707324, + "learning_rate": 9.952026365475333e-06, + "loss": 17.7734, + "step": 3984 + }, + { + "epoch": 0.07284259783939898, + "grad_norm": 9.289935613803255, + "learning_rate": 9.951985449978524e-06, + "loss": 18.4523, + "step": 3985 + }, + { + "epoch": 0.0728608770358455, + "grad_norm": 7.277500424707788, + "learning_rate": 9.951944517125436e-06, + "loss": 17.8173, + "step": 3986 + }, + { + "epoch": 0.07287915623229203, + "grad_norm": 7.301225178483241, + "learning_rate": 9.951903566916213e-06, + "loss": 17.8435, + "step": 3987 + }, + { + "epoch": 0.07289743542873856, + "grad_norm": 8.825591628752957, + "learning_rate": 9.951862599350998e-06, + "loss": 18.2613, + "step": 3988 + }, + { + "epoch": 0.07291571462518508, + "grad_norm": 8.208799393223494, + "learning_rate": 9.951821614429934e-06, + "loss": 17.8988, + "step": 3989 + }, + { + "epoch": 0.0729339938216316, + "grad_norm": 8.139743030720322, + "learning_rate": 9.951780612153163e-06, + "loss": 18.1046, + "step": 3990 + }, + { + "epoch": 0.07295227301807812, + "grad_norm": 6.784330373655954, + "learning_rate": 9.951739592520833e-06, + "loss": 17.2882, + "step": 3991 + }, + { + "epoch": 0.07297055221452466, + "grad_norm": 7.61520459988752, + "learning_rate": 9.951698555533082e-06, + "loss": 18.0849, + "step": 3992 + }, + { + "epoch": 0.07298883141097118, + "grad_norm": 7.072648312272576, + "learning_rate": 9.951657501190061e-06, + "loss": 17.6032, + "step": 3993 + }, + { + "epoch": 0.0730071106074177, + "grad_norm": 7.917906172330156, + "learning_rate": 9.951616429491908e-06, + "loss": 18.2392, + "step": 3994 + }, + { + "epoch": 0.07302538980386422, + "grad_norm": 6.925692444592428, + "learning_rate": 9.95157534043877e-06, + "loss": 17.6648, + "step": 3995 + }, + { + "epoch": 0.07304366900031074, + "grad_norm": 7.54444893535727, + "learning_rate": 9.951534234030791e-06, + "loss": 17.9823, + "step": 3996 + }, + { + "epoch": 0.07306194819675726, + "grad_norm": 6.612402859175103, + "learning_rate": 9.951493110268111e-06, + "loss": 17.8484, + "step": 3997 + }, + { + "epoch": 0.0730802273932038, + "grad_norm": 8.501304712562513, + "learning_rate": 9.95145196915088e-06, + "loss": 18.386, + "step": 3998 + }, + { + "epoch": 0.07309850658965032, + "grad_norm": 7.528057117856636, + "learning_rate": 9.95141081067924e-06, + "loss": 17.9842, + "step": 3999 + }, + { + "epoch": 0.07311678578609684, + "grad_norm": 6.8067888312337725, + "learning_rate": 9.951369634853335e-06, + "loss": 17.2365, + "step": 4000 + }, + { + "epoch": 0.07313506498254337, + "grad_norm": 6.125900010871575, + "learning_rate": 9.951328441673307e-06, + "loss": 17.3047, + "step": 4001 + }, + { + "epoch": 0.07315334417898989, + "grad_norm": 6.362462663547174, + "learning_rate": 9.951287231139303e-06, + "loss": 17.2798, + "step": 4002 + }, + { + "epoch": 0.07317162337543641, + "grad_norm": 7.327786497261743, + "learning_rate": 9.951246003251466e-06, + "loss": 17.7502, + "step": 4003 + }, + { + "epoch": 0.07318990257188294, + "grad_norm": 7.816029605754106, + "learning_rate": 9.951204758009944e-06, + "loss": 18.0539, + "step": 4004 + }, + { + "epoch": 0.07320818176832947, + "grad_norm": 6.597918062036601, + "learning_rate": 9.951163495414878e-06, + "loss": 17.4532, + "step": 4005 + }, + { + "epoch": 0.07322646096477599, + "grad_norm": 6.82620713681258, + "learning_rate": 9.951122215466412e-06, + "loss": 17.7182, + "step": 4006 + }, + { + "epoch": 0.07324474016122251, + "grad_norm": 7.169722973585692, + "learning_rate": 9.951080918164693e-06, + "loss": 17.589, + "step": 4007 + }, + { + "epoch": 0.07326301935766903, + "grad_norm": 8.637690090782502, + "learning_rate": 9.951039603509867e-06, + "loss": 18.7023, + "step": 4008 + }, + { + "epoch": 0.07328129855411557, + "grad_norm": 7.390510482823445, + "learning_rate": 9.950998271502074e-06, + "loss": 17.6186, + "step": 4009 + }, + { + "epoch": 0.07329957775056209, + "grad_norm": 8.516459019691691, + "learning_rate": 9.950956922141464e-06, + "loss": 18.2599, + "step": 4010 + }, + { + "epoch": 0.07331785694700861, + "grad_norm": 8.216007324001174, + "learning_rate": 9.950915555428178e-06, + "loss": 18.036, + "step": 4011 + }, + { + "epoch": 0.07333613614345513, + "grad_norm": 7.413571630216674, + "learning_rate": 9.950874171362364e-06, + "loss": 17.9904, + "step": 4012 + }, + { + "epoch": 0.07335441533990165, + "grad_norm": 6.988794377704065, + "learning_rate": 9.950832769944162e-06, + "loss": 17.4064, + "step": 4013 + }, + { + "epoch": 0.07337269453634818, + "grad_norm": 7.667425168599029, + "learning_rate": 9.950791351173726e-06, + "loss": 18.2205, + "step": 4014 + }, + { + "epoch": 0.07339097373279471, + "grad_norm": 8.019972368439227, + "learning_rate": 9.950749915051192e-06, + "loss": 18.0422, + "step": 4015 + }, + { + "epoch": 0.07340925292924123, + "grad_norm": 7.834211179579535, + "learning_rate": 9.950708461576711e-06, + "loss": 18.1822, + "step": 4016 + }, + { + "epoch": 0.07342753212568776, + "grad_norm": 6.469877667609167, + "learning_rate": 9.950666990750426e-06, + "loss": 17.3202, + "step": 4017 + }, + { + "epoch": 0.07344581132213428, + "grad_norm": 7.870032854928659, + "learning_rate": 9.950625502572484e-06, + "loss": 18.0954, + "step": 4018 + }, + { + "epoch": 0.0734640905185808, + "grad_norm": 7.566350062095447, + "learning_rate": 9.950583997043027e-06, + "loss": 17.8075, + "step": 4019 + }, + { + "epoch": 0.07348236971502732, + "grad_norm": 7.780315938865126, + "learning_rate": 9.950542474162204e-06, + "loss": 17.9308, + "step": 4020 + }, + { + "epoch": 0.07350064891147386, + "grad_norm": 7.922106515004746, + "learning_rate": 9.950500933930158e-06, + "loss": 17.9235, + "step": 4021 + }, + { + "epoch": 0.07351892810792038, + "grad_norm": 7.45719124248031, + "learning_rate": 9.950459376347039e-06, + "loss": 18.1789, + "step": 4022 + }, + { + "epoch": 0.0735372073043669, + "grad_norm": 7.22212766677821, + "learning_rate": 9.950417801412986e-06, + "loss": 17.7704, + "step": 4023 + }, + { + "epoch": 0.07355548650081342, + "grad_norm": 7.677127034832653, + "learning_rate": 9.95037620912815e-06, + "loss": 17.8526, + "step": 4024 + }, + { + "epoch": 0.07357376569725994, + "grad_norm": 8.816380996715482, + "learning_rate": 9.950334599492674e-06, + "loss": 18.3883, + "step": 4025 + }, + { + "epoch": 0.07359204489370648, + "grad_norm": 8.390724021524845, + "learning_rate": 9.950292972506706e-06, + "loss": 18.2191, + "step": 4026 + }, + { + "epoch": 0.073610324090153, + "grad_norm": 7.368897272812623, + "learning_rate": 9.950251328170389e-06, + "loss": 17.4886, + "step": 4027 + }, + { + "epoch": 0.07362860328659952, + "grad_norm": 7.692438911894966, + "learning_rate": 9.950209666483873e-06, + "loss": 17.9969, + "step": 4028 + }, + { + "epoch": 0.07364688248304604, + "grad_norm": 7.742024905837131, + "learning_rate": 9.9501679874473e-06, + "loss": 18.0472, + "step": 4029 + }, + { + "epoch": 0.07366516167949257, + "grad_norm": 6.126921754427087, + "learning_rate": 9.950126291060818e-06, + "loss": 17.1164, + "step": 4030 + }, + { + "epoch": 0.07368344087593909, + "grad_norm": 8.124959370701161, + "learning_rate": 9.950084577324573e-06, + "loss": 18.0901, + "step": 4031 + }, + { + "epoch": 0.07370172007238562, + "grad_norm": 7.026397606492624, + "learning_rate": 9.95004284623871e-06, + "loss": 17.8502, + "step": 4032 + }, + { + "epoch": 0.07371999926883215, + "grad_norm": 7.895297019900992, + "learning_rate": 9.950001097803377e-06, + "loss": 17.9183, + "step": 4033 + }, + { + "epoch": 0.07373827846527867, + "grad_norm": 6.902516650119397, + "learning_rate": 9.94995933201872e-06, + "loss": 17.728, + "step": 4034 + }, + { + "epoch": 0.07375655766172519, + "grad_norm": 6.500875070735881, + "learning_rate": 9.949917548884886e-06, + "loss": 17.5944, + "step": 4035 + }, + { + "epoch": 0.07377483685817171, + "grad_norm": 7.61340253150107, + "learning_rate": 9.94987574840202e-06, + "loss": 17.8991, + "step": 4036 + }, + { + "epoch": 0.07379311605461823, + "grad_norm": 7.249979086155434, + "learning_rate": 9.949833930570267e-06, + "loss": 17.8095, + "step": 4037 + }, + { + "epoch": 0.07381139525106477, + "grad_norm": 6.905778959602601, + "learning_rate": 9.949792095389778e-06, + "loss": 17.5323, + "step": 4038 + }, + { + "epoch": 0.07382967444751129, + "grad_norm": 7.230588307395547, + "learning_rate": 9.949750242860696e-06, + "loss": 17.9275, + "step": 4039 + }, + { + "epoch": 0.07384795364395781, + "grad_norm": 8.082270819590986, + "learning_rate": 9.94970837298317e-06, + "loss": 18.1718, + "step": 4040 + }, + { + "epoch": 0.07386623284040433, + "grad_norm": 7.657347363032943, + "learning_rate": 9.949666485757345e-06, + "loss": 17.7518, + "step": 4041 + }, + { + "epoch": 0.07388451203685086, + "grad_norm": 6.796068906671142, + "learning_rate": 9.949624581183369e-06, + "loss": 17.4804, + "step": 4042 + }, + { + "epoch": 0.07390279123329739, + "grad_norm": 7.122222622706773, + "learning_rate": 9.949582659261387e-06, + "loss": 17.6666, + "step": 4043 + }, + { + "epoch": 0.07392107042974391, + "grad_norm": 8.144535069668137, + "learning_rate": 9.949540719991549e-06, + "loss": 17.8327, + "step": 4044 + }, + { + "epoch": 0.07393934962619043, + "grad_norm": 7.21638860677488, + "learning_rate": 9.949498763374e-06, + "loss": 17.7529, + "step": 4045 + }, + { + "epoch": 0.07395762882263696, + "grad_norm": 6.333474243227332, + "learning_rate": 9.949456789408887e-06, + "loss": 17.2955, + "step": 4046 + }, + { + "epoch": 0.07397590801908348, + "grad_norm": 6.911443461586524, + "learning_rate": 9.949414798096357e-06, + "loss": 17.5879, + "step": 4047 + }, + { + "epoch": 0.07399418721553, + "grad_norm": 6.792686640770893, + "learning_rate": 9.949372789436559e-06, + "loss": 17.7279, + "step": 4048 + }, + { + "epoch": 0.07401246641197654, + "grad_norm": 7.241880275905046, + "learning_rate": 9.949330763429637e-06, + "loss": 18.0251, + "step": 4049 + }, + { + "epoch": 0.07403074560842306, + "grad_norm": 8.604560593156664, + "learning_rate": 9.949288720075741e-06, + "loss": 18.1001, + "step": 4050 + }, + { + "epoch": 0.07404902480486958, + "grad_norm": 6.813629230605672, + "learning_rate": 9.949246659375017e-06, + "loss": 17.4705, + "step": 4051 + }, + { + "epoch": 0.0740673040013161, + "grad_norm": 8.35377728722315, + "learning_rate": 9.949204581327615e-06, + "loss": 18.4, + "step": 4052 + }, + { + "epoch": 0.07408558319776262, + "grad_norm": 7.689640765761798, + "learning_rate": 9.949162485933677e-06, + "loss": 18.1278, + "step": 4053 + }, + { + "epoch": 0.07410386239420914, + "grad_norm": 6.838277020464443, + "learning_rate": 9.949120373193354e-06, + "loss": 17.4119, + "step": 4054 + }, + { + "epoch": 0.07412214159065568, + "grad_norm": 6.290167469321867, + "learning_rate": 9.949078243106797e-06, + "loss": 17.1962, + "step": 4055 + }, + { + "epoch": 0.0741404207871022, + "grad_norm": 7.008453919533337, + "learning_rate": 9.949036095674148e-06, + "loss": 17.5201, + "step": 4056 + }, + { + "epoch": 0.07415869998354872, + "grad_norm": 7.558657647542814, + "learning_rate": 9.948993930895558e-06, + "loss": 17.9684, + "step": 4057 + }, + { + "epoch": 0.07417697917999524, + "grad_norm": 7.365638752495691, + "learning_rate": 9.948951748771174e-06, + "loss": 17.7711, + "step": 4058 + }, + { + "epoch": 0.07419525837644177, + "grad_norm": 8.646260946036872, + "learning_rate": 9.948909549301143e-06, + "loss": 18.1282, + "step": 4059 + }, + { + "epoch": 0.0742135375728883, + "grad_norm": 8.438851141537185, + "learning_rate": 9.948867332485613e-06, + "loss": 18.2028, + "step": 4060 + }, + { + "epoch": 0.07423181676933482, + "grad_norm": 8.511358088612571, + "learning_rate": 9.948825098324732e-06, + "loss": 18.4583, + "step": 4061 + }, + { + "epoch": 0.07425009596578135, + "grad_norm": 6.702850206941317, + "learning_rate": 9.948782846818649e-06, + "loss": 17.29, + "step": 4062 + }, + { + "epoch": 0.07426837516222787, + "grad_norm": 9.318593419797512, + "learning_rate": 9.948740577967512e-06, + "loss": 18.9032, + "step": 4063 + }, + { + "epoch": 0.07428665435867439, + "grad_norm": 8.15920685077243, + "learning_rate": 9.948698291771467e-06, + "loss": 17.9272, + "step": 4064 + }, + { + "epoch": 0.07430493355512091, + "grad_norm": 7.552555566038091, + "learning_rate": 9.948655988230667e-06, + "loss": 17.9185, + "step": 4065 + }, + { + "epoch": 0.07432321275156745, + "grad_norm": 7.421642743888335, + "learning_rate": 9.948613667345255e-06, + "loss": 17.9962, + "step": 4066 + }, + { + "epoch": 0.07434149194801397, + "grad_norm": 7.589915691254926, + "learning_rate": 9.948571329115382e-06, + "loss": 17.811, + "step": 4067 + }, + { + "epoch": 0.07435977114446049, + "grad_norm": 6.773882338508222, + "learning_rate": 9.948528973541195e-06, + "loss": 17.598, + "step": 4068 + }, + { + "epoch": 0.07437805034090701, + "grad_norm": 7.744106412655492, + "learning_rate": 9.948486600622845e-06, + "loss": 17.9758, + "step": 4069 + }, + { + "epoch": 0.07439632953735353, + "grad_norm": 6.691671558962903, + "learning_rate": 9.948444210360478e-06, + "loss": 17.4987, + "step": 4070 + }, + { + "epoch": 0.07441460873380006, + "grad_norm": 7.116877447696861, + "learning_rate": 9.948401802754245e-06, + "loss": 17.7262, + "step": 4071 + }, + { + "epoch": 0.07443288793024659, + "grad_norm": 6.537008838427648, + "learning_rate": 9.948359377804291e-06, + "loss": 17.6175, + "step": 4072 + }, + { + "epoch": 0.07445116712669311, + "grad_norm": 9.792917696642098, + "learning_rate": 9.948316935510766e-06, + "loss": 18.7539, + "step": 4073 + }, + { + "epoch": 0.07446944632313963, + "grad_norm": 7.123510681040195, + "learning_rate": 9.948274475873823e-06, + "loss": 17.885, + "step": 4074 + }, + { + "epoch": 0.07448772551958616, + "grad_norm": 7.761363184437969, + "learning_rate": 9.948231998893605e-06, + "loss": 18.178, + "step": 4075 + }, + { + "epoch": 0.07450600471603268, + "grad_norm": 7.701810438309446, + "learning_rate": 9.948189504570263e-06, + "loss": 18.1316, + "step": 4076 + }, + { + "epoch": 0.07452428391247921, + "grad_norm": 8.334436625697984, + "learning_rate": 9.948146992903947e-06, + "loss": 17.8821, + "step": 4077 + }, + { + "epoch": 0.07454256310892574, + "grad_norm": 7.408338899300994, + "learning_rate": 9.948104463894805e-06, + "loss": 17.8913, + "step": 4078 + }, + { + "epoch": 0.07456084230537226, + "grad_norm": 6.739679926354867, + "learning_rate": 9.948061917542986e-06, + "loss": 17.7448, + "step": 4079 + }, + { + "epoch": 0.07457912150181878, + "grad_norm": 7.439166133164964, + "learning_rate": 9.94801935384864e-06, + "loss": 18.0901, + "step": 4080 + }, + { + "epoch": 0.0745974006982653, + "grad_norm": 7.322434827980138, + "learning_rate": 9.947976772811915e-06, + "loss": 17.8107, + "step": 4081 + }, + { + "epoch": 0.07461567989471182, + "grad_norm": 7.209222093140329, + "learning_rate": 9.94793417443296e-06, + "loss": 17.9077, + "step": 4082 + }, + { + "epoch": 0.07463395909115836, + "grad_norm": 7.28439817785033, + "learning_rate": 9.947891558711926e-06, + "loss": 17.854, + "step": 4083 + }, + { + "epoch": 0.07465223828760488, + "grad_norm": 6.633480726270058, + "learning_rate": 9.947848925648961e-06, + "loss": 17.5347, + "step": 4084 + }, + { + "epoch": 0.0746705174840514, + "grad_norm": 8.384485505266616, + "learning_rate": 9.947806275244216e-06, + "loss": 18.024, + "step": 4085 + }, + { + "epoch": 0.07468879668049792, + "grad_norm": 6.944517935602008, + "learning_rate": 9.947763607497839e-06, + "loss": 17.8275, + "step": 4086 + }, + { + "epoch": 0.07470707587694445, + "grad_norm": 7.061560685453435, + "learning_rate": 9.94772092240998e-06, + "loss": 17.4181, + "step": 4087 + }, + { + "epoch": 0.07472535507339097, + "grad_norm": 9.971019832050642, + "learning_rate": 9.947678219980788e-06, + "loss": 19.1205, + "step": 4088 + }, + { + "epoch": 0.0747436342698375, + "grad_norm": 6.843703280412826, + "learning_rate": 9.947635500210413e-06, + "loss": 17.4305, + "step": 4089 + }, + { + "epoch": 0.07476191346628402, + "grad_norm": 6.489921106555448, + "learning_rate": 9.947592763099005e-06, + "loss": 17.2924, + "step": 4090 + }, + { + "epoch": 0.07478019266273055, + "grad_norm": 6.624219617143493, + "learning_rate": 9.947550008646713e-06, + "loss": 17.3873, + "step": 4091 + }, + { + "epoch": 0.07479847185917707, + "grad_norm": 7.034030188330044, + "learning_rate": 9.947507236853689e-06, + "loss": 17.6899, + "step": 4092 + }, + { + "epoch": 0.07481675105562359, + "grad_norm": 8.931076521754736, + "learning_rate": 9.947464447720082e-06, + "loss": 18.4088, + "step": 4093 + }, + { + "epoch": 0.07483503025207013, + "grad_norm": 7.8038980993245515, + "learning_rate": 9.947421641246038e-06, + "loss": 18.0855, + "step": 4094 + }, + { + "epoch": 0.07485330944851665, + "grad_norm": 7.184042813121832, + "learning_rate": 9.947378817431714e-06, + "loss": 17.754, + "step": 4095 + }, + { + "epoch": 0.07487158864496317, + "grad_norm": 6.9364919302493835, + "learning_rate": 9.947335976277256e-06, + "loss": 17.6305, + "step": 4096 + }, + { + "epoch": 0.07488986784140969, + "grad_norm": 7.332355591307151, + "learning_rate": 9.947293117782813e-06, + "loss": 17.736, + "step": 4097 + }, + { + "epoch": 0.07490814703785621, + "grad_norm": 6.37794444152234, + "learning_rate": 9.947250241948538e-06, + "loss": 17.3367, + "step": 4098 + }, + { + "epoch": 0.07492642623430273, + "grad_norm": 7.422075968050891, + "learning_rate": 9.94720734877458e-06, + "loss": 17.7243, + "step": 4099 + }, + { + "epoch": 0.07494470543074927, + "grad_norm": 6.284693446169713, + "learning_rate": 9.94716443826109e-06, + "loss": 17.6514, + "step": 4100 + }, + { + "epoch": 0.07496298462719579, + "grad_norm": 6.969561100740545, + "learning_rate": 9.947121510408218e-06, + "loss": 17.3915, + "step": 4101 + }, + { + "epoch": 0.07498126382364231, + "grad_norm": 7.057265356575579, + "learning_rate": 9.947078565216112e-06, + "loss": 17.8751, + "step": 4102 + }, + { + "epoch": 0.07499954302008884, + "grad_norm": 8.521052023661452, + "learning_rate": 9.947035602684929e-06, + "loss": 18.1689, + "step": 4103 + }, + { + "epoch": 0.07501782221653536, + "grad_norm": 8.966329658182065, + "learning_rate": 9.946992622814813e-06, + "loss": 18.0758, + "step": 4104 + }, + { + "epoch": 0.07503610141298188, + "grad_norm": 6.917196134949586, + "learning_rate": 9.946949625605918e-06, + "loss": 17.7005, + "step": 4105 + }, + { + "epoch": 0.07505438060942841, + "grad_norm": 7.431754061761407, + "learning_rate": 9.946906611058394e-06, + "loss": 17.5164, + "step": 4106 + }, + { + "epoch": 0.07507265980587494, + "grad_norm": 7.824983042402745, + "learning_rate": 9.94686357917239e-06, + "loss": 18.0068, + "step": 4107 + }, + { + "epoch": 0.07509093900232146, + "grad_norm": 6.246252175853513, + "learning_rate": 9.94682052994806e-06, + "loss": 17.4046, + "step": 4108 + }, + { + "epoch": 0.07510921819876798, + "grad_norm": 8.081686411815316, + "learning_rate": 9.946777463385552e-06, + "loss": 18.3135, + "step": 4109 + }, + { + "epoch": 0.0751274973952145, + "grad_norm": 7.337183123648379, + "learning_rate": 9.94673437948502e-06, + "loss": 17.6567, + "step": 4110 + }, + { + "epoch": 0.07514577659166104, + "grad_norm": 7.780530171818472, + "learning_rate": 9.946691278246611e-06, + "loss": 17.9705, + "step": 4111 + }, + { + "epoch": 0.07516405578810756, + "grad_norm": 6.994340854220237, + "learning_rate": 9.94664815967048e-06, + "loss": 17.965, + "step": 4112 + }, + { + "epoch": 0.07518233498455408, + "grad_norm": 9.142171164436062, + "learning_rate": 9.946605023756776e-06, + "loss": 18.7063, + "step": 4113 + }, + { + "epoch": 0.0752006141810006, + "grad_norm": 8.526814159440248, + "learning_rate": 9.94656187050565e-06, + "loss": 18.3471, + "step": 4114 + }, + { + "epoch": 0.07521889337744712, + "grad_norm": 8.498938940911465, + "learning_rate": 9.946518699917254e-06, + "loss": 18.1441, + "step": 4115 + }, + { + "epoch": 0.07523717257389365, + "grad_norm": 8.097700734472312, + "learning_rate": 9.94647551199174e-06, + "loss": 18.1501, + "step": 4116 + }, + { + "epoch": 0.07525545177034018, + "grad_norm": 7.225025441865226, + "learning_rate": 9.94643230672926e-06, + "loss": 17.9419, + "step": 4117 + }, + { + "epoch": 0.0752737309667867, + "grad_norm": 6.190358687026648, + "learning_rate": 9.946389084129962e-06, + "loss": 17.3257, + "step": 4118 + }, + { + "epoch": 0.07529201016323323, + "grad_norm": 7.727382977574147, + "learning_rate": 9.946345844194e-06, + "loss": 18.0806, + "step": 4119 + }, + { + "epoch": 0.07531028935967975, + "grad_norm": 8.843536075563375, + "learning_rate": 9.946302586921523e-06, + "loss": 18.3376, + "step": 4120 + }, + { + "epoch": 0.07532856855612627, + "grad_norm": 7.095710743979115, + "learning_rate": 9.946259312312687e-06, + "loss": 17.499, + "step": 4121 + }, + { + "epoch": 0.07534684775257279, + "grad_norm": 9.882176262423766, + "learning_rate": 9.946216020367641e-06, + "loss": 18.5481, + "step": 4122 + }, + { + "epoch": 0.07536512694901933, + "grad_norm": 6.449542517877799, + "learning_rate": 9.946172711086538e-06, + "loss": 17.4244, + "step": 4123 + }, + { + "epoch": 0.07538340614546585, + "grad_norm": 6.409241883041255, + "learning_rate": 9.946129384469526e-06, + "loss": 17.2406, + "step": 4124 + }, + { + "epoch": 0.07540168534191237, + "grad_norm": 7.892889481909539, + "learning_rate": 9.946086040516762e-06, + "loss": 18.6329, + "step": 4125 + }, + { + "epoch": 0.07541996453835889, + "grad_norm": 7.326463313717324, + "learning_rate": 9.946042679228394e-06, + "loss": 17.5176, + "step": 4126 + }, + { + "epoch": 0.07543824373480541, + "grad_norm": 7.5983136570427785, + "learning_rate": 9.945999300604578e-06, + "loss": 18.0372, + "step": 4127 + }, + { + "epoch": 0.07545652293125195, + "grad_norm": 8.566667387731414, + "learning_rate": 9.945955904645461e-06, + "loss": 18.5739, + "step": 4128 + }, + { + "epoch": 0.07547480212769847, + "grad_norm": 6.308984762709798, + "learning_rate": 9.945912491351199e-06, + "loss": 17.4579, + "step": 4129 + }, + { + "epoch": 0.07549308132414499, + "grad_norm": 7.636671245026625, + "learning_rate": 9.945869060721945e-06, + "loss": 17.8382, + "step": 4130 + }, + { + "epoch": 0.07551136052059151, + "grad_norm": 7.152409547096006, + "learning_rate": 9.945825612757846e-06, + "loss": 17.6699, + "step": 4131 + }, + { + "epoch": 0.07552963971703804, + "grad_norm": 6.251052994274618, + "learning_rate": 9.945782147459057e-06, + "loss": 17.6965, + "step": 4132 + }, + { + "epoch": 0.07554791891348456, + "grad_norm": 6.497513929491633, + "learning_rate": 9.945738664825734e-06, + "loss": 17.6654, + "step": 4133 + }, + { + "epoch": 0.0755661981099311, + "grad_norm": 7.20433608214677, + "learning_rate": 9.945695164858024e-06, + "loss": 17.6154, + "step": 4134 + }, + { + "epoch": 0.07558447730637761, + "grad_norm": 8.678988533293408, + "learning_rate": 9.945651647556083e-06, + "loss": 18.2711, + "step": 4135 + }, + { + "epoch": 0.07560275650282414, + "grad_norm": 7.675365740961694, + "learning_rate": 9.94560811292006e-06, + "loss": 17.7857, + "step": 4136 + }, + { + "epoch": 0.07562103569927066, + "grad_norm": 6.623604759756009, + "learning_rate": 9.945564560950112e-06, + "loss": 17.5991, + "step": 4137 + }, + { + "epoch": 0.07563931489571718, + "grad_norm": 6.792192435163245, + "learning_rate": 9.945520991646387e-06, + "loss": 17.7569, + "step": 4138 + }, + { + "epoch": 0.0756575940921637, + "grad_norm": 6.2358037854952535, + "learning_rate": 9.945477405009041e-06, + "loss": 17.4349, + "step": 4139 + }, + { + "epoch": 0.07567587328861024, + "grad_norm": 6.7391726750467225, + "learning_rate": 9.945433801038226e-06, + "loss": 17.6439, + "step": 4140 + }, + { + "epoch": 0.07569415248505676, + "grad_norm": 6.574972610136312, + "learning_rate": 9.945390179734095e-06, + "loss": 17.4439, + "step": 4141 + }, + { + "epoch": 0.07571243168150328, + "grad_norm": 6.728158734735512, + "learning_rate": 9.9453465410968e-06, + "loss": 17.617, + "step": 4142 + }, + { + "epoch": 0.0757307108779498, + "grad_norm": 7.279794345302788, + "learning_rate": 9.945302885126496e-06, + "loss": 18.1515, + "step": 4143 + }, + { + "epoch": 0.07574899007439632, + "grad_norm": 7.039529505380779, + "learning_rate": 9.945259211823332e-06, + "loss": 17.6819, + "step": 4144 + }, + { + "epoch": 0.07576726927084286, + "grad_norm": 6.995006558034378, + "learning_rate": 9.945215521187465e-06, + "loss": 17.8893, + "step": 4145 + }, + { + "epoch": 0.07578554846728938, + "grad_norm": 7.013601369894375, + "learning_rate": 9.945171813219048e-06, + "loss": 17.7406, + "step": 4146 + }, + { + "epoch": 0.0758038276637359, + "grad_norm": 7.644985109226018, + "learning_rate": 9.945128087918233e-06, + "loss": 17.93, + "step": 4147 + }, + { + "epoch": 0.07582210686018243, + "grad_norm": 8.513745268468803, + "learning_rate": 9.945084345285172e-06, + "loss": 18.1143, + "step": 4148 + }, + { + "epoch": 0.07584038605662895, + "grad_norm": 8.338563306688147, + "learning_rate": 9.945040585320019e-06, + "loss": 18.039, + "step": 4149 + }, + { + "epoch": 0.07585866525307547, + "grad_norm": 8.422151833158328, + "learning_rate": 9.944996808022926e-06, + "loss": 17.9575, + "step": 4150 + }, + { + "epoch": 0.075876944449522, + "grad_norm": 7.36414342599897, + "learning_rate": 9.944953013394053e-06, + "loss": 17.6962, + "step": 4151 + }, + { + "epoch": 0.07589522364596853, + "grad_norm": 6.611161073638273, + "learning_rate": 9.944909201433545e-06, + "loss": 17.4743, + "step": 4152 + }, + { + "epoch": 0.07591350284241505, + "grad_norm": 8.11113165044071, + "learning_rate": 9.944865372141562e-06, + "loss": 17.8998, + "step": 4153 + }, + { + "epoch": 0.07593178203886157, + "grad_norm": 8.147540883815966, + "learning_rate": 9.94482152551825e-06, + "loss": 17.7785, + "step": 4154 + }, + { + "epoch": 0.07595006123530809, + "grad_norm": 6.037791007125641, + "learning_rate": 9.944777661563774e-06, + "loss": 17.1556, + "step": 4155 + }, + { + "epoch": 0.07596834043175461, + "grad_norm": 8.005885219212052, + "learning_rate": 9.944733780278277e-06, + "loss": 18.1231, + "step": 4156 + }, + { + "epoch": 0.07598661962820115, + "grad_norm": 7.258942344087547, + "learning_rate": 9.94468988166192e-06, + "loss": 17.6645, + "step": 4157 + }, + { + "epoch": 0.07600489882464767, + "grad_norm": 6.8830099394215285, + "learning_rate": 9.944645965714851e-06, + "loss": 17.6559, + "step": 4158 + }, + { + "epoch": 0.07602317802109419, + "grad_norm": 6.907603205447108, + "learning_rate": 9.94460203243723e-06, + "loss": 17.5304, + "step": 4159 + }, + { + "epoch": 0.07604145721754071, + "grad_norm": 7.609951382296154, + "learning_rate": 9.944558081829208e-06, + "loss": 17.7897, + "step": 4160 + }, + { + "epoch": 0.07605973641398724, + "grad_norm": 8.234379233176638, + "learning_rate": 9.944514113890938e-06, + "loss": 18.4789, + "step": 4161 + }, + { + "epoch": 0.07607801561043377, + "grad_norm": 8.28646045976863, + "learning_rate": 9.944470128622574e-06, + "loss": 17.8717, + "step": 4162 + }, + { + "epoch": 0.0760962948068803, + "grad_norm": 7.497066969234497, + "learning_rate": 9.944426126024271e-06, + "loss": 17.9437, + "step": 4163 + }, + { + "epoch": 0.07611457400332682, + "grad_norm": 7.739957927204389, + "learning_rate": 9.944382106096186e-06, + "loss": 17.8309, + "step": 4164 + }, + { + "epoch": 0.07613285319977334, + "grad_norm": 8.3163273818476, + "learning_rate": 9.944338068838469e-06, + "loss": 18.029, + "step": 4165 + }, + { + "epoch": 0.07615113239621986, + "grad_norm": 7.273911546813022, + "learning_rate": 9.944294014251276e-06, + "loss": 17.7673, + "step": 4166 + }, + { + "epoch": 0.07616941159266638, + "grad_norm": 7.337268981521612, + "learning_rate": 9.944249942334763e-06, + "loss": 17.8419, + "step": 4167 + }, + { + "epoch": 0.07618769078911292, + "grad_norm": 8.363235672027164, + "learning_rate": 9.944205853089081e-06, + "loss": 18.3541, + "step": 4168 + }, + { + "epoch": 0.07620596998555944, + "grad_norm": 8.452200252193368, + "learning_rate": 9.944161746514387e-06, + "loss": 17.9309, + "step": 4169 + }, + { + "epoch": 0.07622424918200596, + "grad_norm": 8.218646460160082, + "learning_rate": 9.944117622610836e-06, + "loss": 18.2633, + "step": 4170 + }, + { + "epoch": 0.07624252837845248, + "grad_norm": 6.357008423240061, + "learning_rate": 9.944073481378581e-06, + "loss": 17.5283, + "step": 4171 + }, + { + "epoch": 0.076260807574899, + "grad_norm": 7.7439428807246715, + "learning_rate": 9.94402932281778e-06, + "loss": 17.9626, + "step": 4172 + }, + { + "epoch": 0.07627908677134553, + "grad_norm": 7.774958259158812, + "learning_rate": 9.94398514692858e-06, + "loss": 18.0529, + "step": 4173 + }, + { + "epoch": 0.07629736596779206, + "grad_norm": 7.238700095656024, + "learning_rate": 9.943940953711147e-06, + "loss": 17.8406, + "step": 4174 + }, + { + "epoch": 0.07631564516423858, + "grad_norm": 6.849452434091139, + "learning_rate": 9.943896743165626e-06, + "loss": 17.5181, + "step": 4175 + }, + { + "epoch": 0.0763339243606851, + "grad_norm": 6.757854383035227, + "learning_rate": 9.943852515292177e-06, + "loss": 17.6835, + "step": 4176 + }, + { + "epoch": 0.07635220355713163, + "grad_norm": 7.455393760134097, + "learning_rate": 9.943808270090953e-06, + "loss": 17.6921, + "step": 4177 + }, + { + "epoch": 0.07637048275357815, + "grad_norm": 5.992826452646012, + "learning_rate": 9.943764007562113e-06, + "loss": 17.6179, + "step": 4178 + }, + { + "epoch": 0.07638876195002468, + "grad_norm": 6.886465080017079, + "learning_rate": 9.943719727705807e-06, + "loss": 17.6493, + "step": 4179 + }, + { + "epoch": 0.0764070411464712, + "grad_norm": 8.684834787939241, + "learning_rate": 9.943675430522191e-06, + "loss": 18.7955, + "step": 4180 + }, + { + "epoch": 0.07642532034291773, + "grad_norm": 7.323220417378751, + "learning_rate": 9.943631116011424e-06, + "loss": 17.9174, + "step": 4181 + }, + { + "epoch": 0.07644359953936425, + "grad_norm": 10.592096380410188, + "learning_rate": 9.943586784173657e-06, + "loss": 18.4581, + "step": 4182 + }, + { + "epoch": 0.07646187873581077, + "grad_norm": 7.469103899829183, + "learning_rate": 9.94354243500905e-06, + "loss": 17.918, + "step": 4183 + }, + { + "epoch": 0.07648015793225729, + "grad_norm": 8.429324116324565, + "learning_rate": 9.943498068517754e-06, + "loss": 18.197, + "step": 4184 + }, + { + "epoch": 0.07649843712870383, + "grad_norm": 7.630150917105338, + "learning_rate": 9.943453684699927e-06, + "loss": 18.0674, + "step": 4185 + }, + { + "epoch": 0.07651671632515035, + "grad_norm": 7.5848018531771855, + "learning_rate": 9.943409283555722e-06, + "loss": 17.9029, + "step": 4186 + }, + { + "epoch": 0.07653499552159687, + "grad_norm": 6.024753089526956, + "learning_rate": 9.943364865085298e-06, + "loss": 17.2247, + "step": 4187 + }, + { + "epoch": 0.0765532747180434, + "grad_norm": 7.86967665440768, + "learning_rate": 9.943320429288808e-06, + "loss": 17.8321, + "step": 4188 + }, + { + "epoch": 0.07657155391448991, + "grad_norm": 6.302985308683977, + "learning_rate": 9.943275976166412e-06, + "loss": 17.1027, + "step": 4189 + }, + { + "epoch": 0.07658983311093644, + "grad_norm": 6.833109397879678, + "learning_rate": 9.943231505718258e-06, + "loss": 17.6025, + "step": 4190 + }, + { + "epoch": 0.07660811230738297, + "grad_norm": 8.365916398266814, + "learning_rate": 9.94318701794451e-06, + "loss": 18.2189, + "step": 4191 + }, + { + "epoch": 0.0766263915038295, + "grad_norm": 8.477496443844009, + "learning_rate": 9.943142512845318e-06, + "loss": 18.3351, + "step": 4192 + }, + { + "epoch": 0.07664467070027602, + "grad_norm": 6.698769837563342, + "learning_rate": 9.943097990420843e-06, + "loss": 17.3659, + "step": 4193 + }, + { + "epoch": 0.07666294989672254, + "grad_norm": 7.8840085628076935, + "learning_rate": 9.943053450671236e-06, + "loss": 18.0192, + "step": 4194 + }, + { + "epoch": 0.07668122909316906, + "grad_norm": 6.602380845223933, + "learning_rate": 9.943008893596657e-06, + "loss": 17.426, + "step": 4195 + }, + { + "epoch": 0.0766995082896156, + "grad_norm": 6.888029864878589, + "learning_rate": 9.94296431919726e-06, + "loss": 17.6183, + "step": 4196 + }, + { + "epoch": 0.07671778748606212, + "grad_norm": 8.251367427605121, + "learning_rate": 9.942919727473204e-06, + "loss": 18.3592, + "step": 4197 + }, + { + "epoch": 0.07673606668250864, + "grad_norm": 7.964296308652757, + "learning_rate": 9.942875118424641e-06, + "loss": 18.2928, + "step": 4198 + }, + { + "epoch": 0.07675434587895516, + "grad_norm": 8.075699505373022, + "learning_rate": 9.94283049205173e-06, + "loss": 18.2313, + "step": 4199 + }, + { + "epoch": 0.07677262507540168, + "grad_norm": 7.184827022794858, + "learning_rate": 9.942785848354628e-06, + "loss": 17.7186, + "step": 4200 + }, + { + "epoch": 0.0767909042718482, + "grad_norm": 7.626331857415009, + "learning_rate": 9.94274118733349e-06, + "loss": 17.826, + "step": 4201 + }, + { + "epoch": 0.07680918346829474, + "grad_norm": 6.239000698427575, + "learning_rate": 9.942696508988472e-06, + "loss": 17.4026, + "step": 4202 + }, + { + "epoch": 0.07682746266474126, + "grad_norm": 6.543866858237177, + "learning_rate": 9.942651813319733e-06, + "loss": 17.5936, + "step": 4203 + }, + { + "epoch": 0.07684574186118778, + "grad_norm": 8.238881612293955, + "learning_rate": 9.942607100327428e-06, + "loss": 18.183, + "step": 4204 + }, + { + "epoch": 0.0768640210576343, + "grad_norm": 7.722407507706026, + "learning_rate": 9.942562370011714e-06, + "loss": 18.4659, + "step": 4205 + }, + { + "epoch": 0.07688230025408083, + "grad_norm": 7.565818940073065, + "learning_rate": 9.942517622372749e-06, + "loss": 17.9724, + "step": 4206 + }, + { + "epoch": 0.07690057945052735, + "grad_norm": 7.277322276406721, + "learning_rate": 9.942472857410687e-06, + "loss": 17.6006, + "step": 4207 + }, + { + "epoch": 0.07691885864697388, + "grad_norm": 6.97125103493985, + "learning_rate": 9.942428075125688e-06, + "loss": 17.4252, + "step": 4208 + }, + { + "epoch": 0.0769371378434204, + "grad_norm": 8.40918433822153, + "learning_rate": 9.942383275517905e-06, + "loss": 18.2623, + "step": 4209 + }, + { + "epoch": 0.07695541703986693, + "grad_norm": 7.667828948326944, + "learning_rate": 9.942338458587499e-06, + "loss": 17.804, + "step": 4210 + }, + { + "epoch": 0.07697369623631345, + "grad_norm": 8.03912646465718, + "learning_rate": 9.942293624334627e-06, + "loss": 18.2884, + "step": 4211 + }, + { + "epoch": 0.07699197543275997, + "grad_norm": 7.589292928257367, + "learning_rate": 9.942248772759443e-06, + "loss": 17.6766, + "step": 4212 + }, + { + "epoch": 0.0770102546292065, + "grad_norm": 7.67375910134019, + "learning_rate": 9.942203903862105e-06, + "loss": 17.9565, + "step": 4213 + }, + { + "epoch": 0.07702853382565303, + "grad_norm": 8.881236727814375, + "learning_rate": 9.942159017642772e-06, + "loss": 18.1565, + "step": 4214 + }, + { + "epoch": 0.07704681302209955, + "grad_norm": 7.164802997735672, + "learning_rate": 9.9421141141016e-06, + "loss": 17.4539, + "step": 4215 + }, + { + "epoch": 0.07706509221854607, + "grad_norm": 7.440014489282296, + "learning_rate": 9.942069193238749e-06, + "loss": 17.8582, + "step": 4216 + }, + { + "epoch": 0.0770833714149926, + "grad_norm": 7.292631624423447, + "learning_rate": 9.942024255054373e-06, + "loss": 17.6257, + "step": 4217 + }, + { + "epoch": 0.07710165061143912, + "grad_norm": 7.431778358488795, + "learning_rate": 9.941979299548631e-06, + "loss": 17.9583, + "step": 4218 + }, + { + "epoch": 0.07711992980788565, + "grad_norm": 8.686717295254155, + "learning_rate": 9.94193432672168e-06, + "loss": 18.0693, + "step": 4219 + }, + { + "epoch": 0.07713820900433217, + "grad_norm": 7.077506617393422, + "learning_rate": 9.941889336573678e-06, + "loss": 17.575, + "step": 4220 + }, + { + "epoch": 0.0771564882007787, + "grad_norm": 7.241133479233001, + "learning_rate": 9.941844329104784e-06, + "loss": 17.4906, + "step": 4221 + }, + { + "epoch": 0.07717476739722522, + "grad_norm": 8.624029205933166, + "learning_rate": 9.941799304315152e-06, + "loss": 18.6572, + "step": 4222 + }, + { + "epoch": 0.07719304659367174, + "grad_norm": 6.033995693434952, + "learning_rate": 9.941754262204945e-06, + "loss": 17.4807, + "step": 4223 + }, + { + "epoch": 0.07721132579011826, + "grad_norm": 6.659671442220002, + "learning_rate": 9.941709202774317e-06, + "loss": 17.5266, + "step": 4224 + }, + { + "epoch": 0.0772296049865648, + "grad_norm": 7.65291781378316, + "learning_rate": 9.941664126023427e-06, + "loss": 17.9475, + "step": 4225 + }, + { + "epoch": 0.07724788418301132, + "grad_norm": 7.73272908049448, + "learning_rate": 9.941619031952433e-06, + "loss": 18.2557, + "step": 4226 + }, + { + "epoch": 0.07726616337945784, + "grad_norm": 7.436490386916161, + "learning_rate": 9.941573920561493e-06, + "loss": 17.7364, + "step": 4227 + }, + { + "epoch": 0.07728444257590436, + "grad_norm": 7.1492946285383985, + "learning_rate": 9.941528791850765e-06, + "loss": 17.4325, + "step": 4228 + }, + { + "epoch": 0.07730272177235088, + "grad_norm": 7.145356946317946, + "learning_rate": 9.941483645820406e-06, + "loss": 17.8871, + "step": 4229 + }, + { + "epoch": 0.07732100096879742, + "grad_norm": 7.033499107558293, + "learning_rate": 9.941438482470578e-06, + "loss": 17.6935, + "step": 4230 + }, + { + "epoch": 0.07733928016524394, + "grad_norm": 9.827010628938945, + "learning_rate": 9.941393301801436e-06, + "loss": 18.3654, + "step": 4231 + }, + { + "epoch": 0.07735755936169046, + "grad_norm": 7.0975941131186735, + "learning_rate": 9.94134810381314e-06, + "loss": 18.0554, + "step": 4232 + }, + { + "epoch": 0.07737583855813698, + "grad_norm": 5.92454778215829, + "learning_rate": 9.941302888505848e-06, + "loss": 17.1777, + "step": 4233 + }, + { + "epoch": 0.0773941177545835, + "grad_norm": 6.959103247304966, + "learning_rate": 9.941257655879717e-06, + "loss": 17.8269, + "step": 4234 + }, + { + "epoch": 0.07741239695103003, + "grad_norm": 7.022073491872154, + "learning_rate": 9.941212405934907e-06, + "loss": 17.5797, + "step": 4235 + }, + { + "epoch": 0.07743067614747656, + "grad_norm": 7.1063690036947715, + "learning_rate": 9.941167138671577e-06, + "loss": 17.7828, + "step": 4236 + }, + { + "epoch": 0.07744895534392308, + "grad_norm": 7.954584175919247, + "learning_rate": 9.941121854089883e-06, + "loss": 17.995, + "step": 4237 + }, + { + "epoch": 0.0774672345403696, + "grad_norm": 6.729010062244928, + "learning_rate": 9.941076552189987e-06, + "loss": 17.4944, + "step": 4238 + }, + { + "epoch": 0.07748551373681613, + "grad_norm": 6.587494040315997, + "learning_rate": 9.941031232972047e-06, + "loss": 17.5591, + "step": 4239 + }, + { + "epoch": 0.07750379293326265, + "grad_norm": 5.837888384091902, + "learning_rate": 9.940985896436221e-06, + "loss": 17.161, + "step": 4240 + }, + { + "epoch": 0.07752207212970917, + "grad_norm": 7.4309429538383425, + "learning_rate": 9.940940542582668e-06, + "loss": 17.7261, + "step": 4241 + }, + { + "epoch": 0.07754035132615571, + "grad_norm": 7.308490142334014, + "learning_rate": 9.940895171411547e-06, + "loss": 18.0319, + "step": 4242 + }, + { + "epoch": 0.07755863052260223, + "grad_norm": 7.27763941755643, + "learning_rate": 9.940849782923016e-06, + "loss": 17.8745, + "step": 4243 + }, + { + "epoch": 0.07757690971904875, + "grad_norm": 7.929012297932565, + "learning_rate": 9.940804377117238e-06, + "loss": 17.8578, + "step": 4244 + }, + { + "epoch": 0.07759518891549527, + "grad_norm": 8.097441680087801, + "learning_rate": 9.940758953994367e-06, + "loss": 18.4698, + "step": 4245 + }, + { + "epoch": 0.0776134681119418, + "grad_norm": 8.606117810919702, + "learning_rate": 9.940713513554567e-06, + "loss": 18.5981, + "step": 4246 + }, + { + "epoch": 0.07763174730838833, + "grad_norm": 7.9802614013269615, + "learning_rate": 9.940668055797993e-06, + "loss": 17.9609, + "step": 4247 + }, + { + "epoch": 0.07765002650483485, + "grad_norm": 6.401310771525295, + "learning_rate": 9.940622580724806e-06, + "loss": 17.5281, + "step": 4248 + }, + { + "epoch": 0.07766830570128137, + "grad_norm": 7.765152466156165, + "learning_rate": 9.940577088335167e-06, + "loss": 18.4588, + "step": 4249 + }, + { + "epoch": 0.0776865848977279, + "grad_norm": 11.812201013267776, + "learning_rate": 9.940531578629234e-06, + "loss": 18.3213, + "step": 4250 + }, + { + "epoch": 0.07770486409417442, + "grad_norm": 8.107179031342193, + "learning_rate": 9.940486051607164e-06, + "loss": 18.1375, + "step": 4251 + }, + { + "epoch": 0.07772314329062094, + "grad_norm": 7.390909636393926, + "learning_rate": 9.940440507269121e-06, + "loss": 18.0795, + "step": 4252 + }, + { + "epoch": 0.07774142248706747, + "grad_norm": 6.608999100204548, + "learning_rate": 9.940394945615262e-06, + "loss": 17.4263, + "step": 4253 + }, + { + "epoch": 0.077759701683514, + "grad_norm": 7.24455911996213, + "learning_rate": 9.94034936664575e-06, + "loss": 17.7903, + "step": 4254 + }, + { + "epoch": 0.07777798087996052, + "grad_norm": 6.510455486033311, + "learning_rate": 9.94030377036074e-06, + "loss": 17.4657, + "step": 4255 + }, + { + "epoch": 0.07779626007640704, + "grad_norm": 6.702799210893266, + "learning_rate": 9.940258156760393e-06, + "loss": 17.7637, + "step": 4256 + }, + { + "epoch": 0.07781453927285356, + "grad_norm": 6.25184406311624, + "learning_rate": 9.940212525844872e-06, + "loss": 17.3767, + "step": 4257 + }, + { + "epoch": 0.07783281846930008, + "grad_norm": 6.063002518150071, + "learning_rate": 9.940166877614332e-06, + "loss": 17.1084, + "step": 4258 + }, + { + "epoch": 0.07785109766574662, + "grad_norm": 6.809055593118283, + "learning_rate": 9.940121212068938e-06, + "loss": 17.5896, + "step": 4259 + }, + { + "epoch": 0.07786937686219314, + "grad_norm": 7.500670999454073, + "learning_rate": 9.940075529208848e-06, + "loss": 18.0046, + "step": 4260 + }, + { + "epoch": 0.07788765605863966, + "grad_norm": 7.555990688721894, + "learning_rate": 9.940029829034221e-06, + "loss": 17.9302, + "step": 4261 + }, + { + "epoch": 0.07790593525508618, + "grad_norm": 7.888456038024813, + "learning_rate": 9.93998411154522e-06, + "loss": 18.5802, + "step": 4262 + }, + { + "epoch": 0.0779242144515327, + "grad_norm": 7.8178178993913745, + "learning_rate": 9.939938376742e-06, + "loss": 18.2034, + "step": 4263 + }, + { + "epoch": 0.07794249364797924, + "grad_norm": 7.175630237951054, + "learning_rate": 9.939892624624727e-06, + "loss": 17.7728, + "step": 4264 + }, + { + "epoch": 0.07796077284442576, + "grad_norm": 7.269642141011237, + "learning_rate": 9.939846855193558e-06, + "loss": 17.5007, + "step": 4265 + }, + { + "epoch": 0.07797905204087228, + "grad_norm": 8.343995757845725, + "learning_rate": 9.939801068448655e-06, + "loss": 18.0206, + "step": 4266 + }, + { + "epoch": 0.0779973312373188, + "grad_norm": 8.104335902213569, + "learning_rate": 9.939755264390179e-06, + "loss": 17.8248, + "step": 4267 + }, + { + "epoch": 0.07801561043376533, + "grad_norm": 6.52825714404, + "learning_rate": 9.939709443018288e-06, + "loss": 17.5734, + "step": 4268 + }, + { + "epoch": 0.07803388963021185, + "grad_norm": 7.635623842563316, + "learning_rate": 9.939663604333144e-06, + "loss": 18.1016, + "step": 4269 + }, + { + "epoch": 0.07805216882665839, + "grad_norm": 7.086943496847928, + "learning_rate": 9.939617748334908e-06, + "loss": 17.8211, + "step": 4270 + }, + { + "epoch": 0.07807044802310491, + "grad_norm": 6.366837535999178, + "learning_rate": 9.939571875023742e-06, + "loss": 17.3253, + "step": 4271 + }, + { + "epoch": 0.07808872721955143, + "grad_norm": 10.72186509949408, + "learning_rate": 9.939525984399803e-06, + "loss": 18.0906, + "step": 4272 + }, + { + "epoch": 0.07810700641599795, + "grad_norm": 8.041045026155597, + "learning_rate": 9.939480076463255e-06, + "loss": 17.9549, + "step": 4273 + }, + { + "epoch": 0.07812528561244447, + "grad_norm": 8.001466904872732, + "learning_rate": 9.939434151214258e-06, + "loss": 18.0185, + "step": 4274 + }, + { + "epoch": 0.078143564808891, + "grad_norm": 8.31181029522459, + "learning_rate": 9.93938820865297e-06, + "loss": 17.6518, + "step": 4275 + }, + { + "epoch": 0.07816184400533753, + "grad_norm": 7.490995115319857, + "learning_rate": 9.93934224877956e-06, + "loss": 17.8578, + "step": 4276 + }, + { + "epoch": 0.07818012320178405, + "grad_norm": 6.490426920828199, + "learning_rate": 9.939296271594181e-06, + "loss": 17.338, + "step": 4277 + }, + { + "epoch": 0.07819840239823057, + "grad_norm": 7.262720232209258, + "learning_rate": 9.939250277096998e-06, + "loss": 17.7793, + "step": 4278 + }, + { + "epoch": 0.0782166815946771, + "grad_norm": 8.556554607249234, + "learning_rate": 9.939204265288172e-06, + "loss": 18.791, + "step": 4279 + }, + { + "epoch": 0.07823496079112362, + "grad_norm": 6.868949045187712, + "learning_rate": 9.939158236167863e-06, + "loss": 17.5968, + "step": 4280 + }, + { + "epoch": 0.07825323998757015, + "grad_norm": 6.888572727906375, + "learning_rate": 9.939112189736231e-06, + "loss": 17.615, + "step": 4281 + }, + { + "epoch": 0.07827151918401667, + "grad_norm": 8.452397434426997, + "learning_rate": 9.939066125993442e-06, + "loss": 18.4356, + "step": 4282 + }, + { + "epoch": 0.0782897983804632, + "grad_norm": 7.715575386065501, + "learning_rate": 9.939020044939654e-06, + "loss": 18.2356, + "step": 4283 + }, + { + "epoch": 0.07830807757690972, + "grad_norm": 7.778073512619908, + "learning_rate": 9.93897394657503e-06, + "loss": 18.0708, + "step": 4284 + }, + { + "epoch": 0.07832635677335624, + "grad_norm": 6.902819822531594, + "learning_rate": 9.938927830899729e-06, + "loss": 17.7574, + "step": 4285 + }, + { + "epoch": 0.07834463596980276, + "grad_norm": 6.658068507941403, + "learning_rate": 9.938881697913917e-06, + "loss": 17.5977, + "step": 4286 + }, + { + "epoch": 0.0783629151662493, + "grad_norm": 7.359021623098024, + "learning_rate": 9.93883554761775e-06, + "loss": 17.7763, + "step": 4287 + }, + { + "epoch": 0.07838119436269582, + "grad_norm": 7.15910064266547, + "learning_rate": 9.938789380011396e-06, + "loss": 17.4, + "step": 4288 + }, + { + "epoch": 0.07839947355914234, + "grad_norm": 7.149554481888531, + "learning_rate": 9.938743195095013e-06, + "loss": 17.796, + "step": 4289 + }, + { + "epoch": 0.07841775275558886, + "grad_norm": 6.898549445763817, + "learning_rate": 9.938696992868764e-06, + "loss": 17.9461, + "step": 4290 + }, + { + "epoch": 0.07843603195203538, + "grad_norm": 7.216662003346921, + "learning_rate": 9.93865077333281e-06, + "loss": 17.7372, + "step": 4291 + }, + { + "epoch": 0.0784543111484819, + "grad_norm": 8.810328140667025, + "learning_rate": 9.938604536487314e-06, + "loss": 17.9731, + "step": 4292 + }, + { + "epoch": 0.07847259034492844, + "grad_norm": 8.580926615759612, + "learning_rate": 9.938558282332438e-06, + "loss": 17.9521, + "step": 4293 + }, + { + "epoch": 0.07849086954137496, + "grad_norm": 7.240334581817721, + "learning_rate": 9.938512010868342e-06, + "loss": 17.9799, + "step": 4294 + }, + { + "epoch": 0.07850914873782149, + "grad_norm": 7.753443804416742, + "learning_rate": 9.938465722095192e-06, + "loss": 18.105, + "step": 4295 + }, + { + "epoch": 0.07852742793426801, + "grad_norm": 7.328982040101361, + "learning_rate": 9.938419416013146e-06, + "loss": 17.6112, + "step": 4296 + }, + { + "epoch": 0.07854570713071453, + "grad_norm": 8.148839296910063, + "learning_rate": 9.93837309262237e-06, + "loss": 18.1112, + "step": 4297 + }, + { + "epoch": 0.07856398632716106, + "grad_norm": 7.801790674173154, + "learning_rate": 9.938326751923025e-06, + "loss": 17.9724, + "step": 4298 + }, + { + "epoch": 0.07858226552360759, + "grad_norm": 7.505775094420256, + "learning_rate": 9.938280393915272e-06, + "loss": 17.8813, + "step": 4299 + }, + { + "epoch": 0.07860054472005411, + "grad_norm": 7.702707200415012, + "learning_rate": 9.938234018599275e-06, + "loss": 17.8986, + "step": 4300 + }, + { + "epoch": 0.07861882391650063, + "grad_norm": 7.049950975054393, + "learning_rate": 9.938187625975197e-06, + "loss": 17.6547, + "step": 4301 + }, + { + "epoch": 0.07863710311294715, + "grad_norm": 8.44393217676752, + "learning_rate": 9.9381412160432e-06, + "loss": 17.6841, + "step": 4302 + }, + { + "epoch": 0.07865538230939367, + "grad_norm": 7.337931856883426, + "learning_rate": 9.938094788803448e-06, + "loss": 17.7211, + "step": 4303 + }, + { + "epoch": 0.07867366150584021, + "grad_norm": 8.163342510925498, + "learning_rate": 9.938048344256099e-06, + "loss": 18.1403, + "step": 4304 + }, + { + "epoch": 0.07869194070228673, + "grad_norm": 6.903131863262117, + "learning_rate": 9.938001882401321e-06, + "loss": 17.3792, + "step": 4305 + }, + { + "epoch": 0.07871021989873325, + "grad_norm": 6.557663602936739, + "learning_rate": 9.937955403239275e-06, + "loss": 17.4879, + "step": 4306 + }, + { + "epoch": 0.07872849909517977, + "grad_norm": 8.7068292546031, + "learning_rate": 9.937908906770122e-06, + "loss": 18.3442, + "step": 4307 + }, + { + "epoch": 0.0787467782916263, + "grad_norm": 7.533992396360987, + "learning_rate": 9.937862392994029e-06, + "loss": 18.1053, + "step": 4308 + }, + { + "epoch": 0.07876505748807282, + "grad_norm": 7.633646651233243, + "learning_rate": 9.937815861911155e-06, + "loss": 18.0692, + "step": 4309 + }, + { + "epoch": 0.07878333668451935, + "grad_norm": 8.756093481318945, + "learning_rate": 9.937769313521666e-06, + "loss": 18.8583, + "step": 4310 + }, + { + "epoch": 0.07880161588096588, + "grad_norm": 7.024380608001842, + "learning_rate": 9.937722747825724e-06, + "loss": 18.0651, + "step": 4311 + }, + { + "epoch": 0.0788198950774124, + "grad_norm": 6.308581936823587, + "learning_rate": 9.937676164823493e-06, + "loss": 17.2601, + "step": 4312 + }, + { + "epoch": 0.07883817427385892, + "grad_norm": 8.201208200163915, + "learning_rate": 9.937629564515134e-06, + "loss": 18.1754, + "step": 4313 + }, + { + "epoch": 0.07885645347030544, + "grad_norm": 6.615981830198527, + "learning_rate": 9.937582946900812e-06, + "loss": 17.5142, + "step": 4314 + }, + { + "epoch": 0.07887473266675198, + "grad_norm": 6.925562656026488, + "learning_rate": 9.93753631198069e-06, + "loss": 17.6064, + "step": 4315 + }, + { + "epoch": 0.0788930118631985, + "grad_norm": 7.071526950789263, + "learning_rate": 9.937489659754933e-06, + "loss": 17.6915, + "step": 4316 + }, + { + "epoch": 0.07891129105964502, + "grad_norm": 6.93320424115655, + "learning_rate": 9.937442990223703e-06, + "loss": 17.8624, + "step": 4317 + }, + { + "epoch": 0.07892957025609154, + "grad_norm": 7.17766099048035, + "learning_rate": 9.937396303387164e-06, + "loss": 17.812, + "step": 4318 + }, + { + "epoch": 0.07894784945253806, + "grad_norm": 6.987283203511792, + "learning_rate": 9.937349599245478e-06, + "loss": 17.4972, + "step": 4319 + }, + { + "epoch": 0.07896612864898458, + "grad_norm": 7.177250193446535, + "learning_rate": 9.937302877798811e-06, + "loss": 17.7235, + "step": 4320 + }, + { + "epoch": 0.07898440784543112, + "grad_norm": 5.464667511273553, + "learning_rate": 9.937256139047326e-06, + "loss": 17.2885, + "step": 4321 + }, + { + "epoch": 0.07900268704187764, + "grad_norm": 7.365873459882448, + "learning_rate": 9.937209382991187e-06, + "loss": 17.5206, + "step": 4322 + }, + { + "epoch": 0.07902096623832416, + "grad_norm": 7.726819290001141, + "learning_rate": 9.937162609630556e-06, + "loss": 18.4063, + "step": 4323 + }, + { + "epoch": 0.07903924543477069, + "grad_norm": 9.092986251084202, + "learning_rate": 9.937115818965599e-06, + "loss": 18.6479, + "step": 4324 + }, + { + "epoch": 0.07905752463121721, + "grad_norm": 8.531475571100337, + "learning_rate": 9.93706901099648e-06, + "loss": 18.4475, + "step": 4325 + }, + { + "epoch": 0.07907580382766373, + "grad_norm": 7.452186063785753, + "learning_rate": 9.937022185723363e-06, + "loss": 18.0895, + "step": 4326 + }, + { + "epoch": 0.07909408302411026, + "grad_norm": 7.9536205749635345, + "learning_rate": 9.936975343146411e-06, + "loss": 18.2044, + "step": 4327 + }, + { + "epoch": 0.07911236222055679, + "grad_norm": 9.890426772928912, + "learning_rate": 9.93692848326579e-06, + "loss": 18.667, + "step": 4328 + }, + { + "epoch": 0.07913064141700331, + "grad_norm": 6.224300772800686, + "learning_rate": 9.936881606081662e-06, + "loss": 17.4493, + "step": 4329 + }, + { + "epoch": 0.07914892061344983, + "grad_norm": 7.671174616757518, + "learning_rate": 9.93683471159419e-06, + "loss": 17.6893, + "step": 4330 + }, + { + "epoch": 0.07916719980989635, + "grad_norm": 7.377338604220668, + "learning_rate": 9.936787799803544e-06, + "loss": 17.8501, + "step": 4331 + }, + { + "epoch": 0.07918547900634289, + "grad_norm": 6.423033613389302, + "learning_rate": 9.936740870709885e-06, + "loss": 17.4775, + "step": 4332 + }, + { + "epoch": 0.07920375820278941, + "grad_norm": 9.213909832510181, + "learning_rate": 9.936693924313377e-06, + "loss": 18.5094, + "step": 4333 + }, + { + "epoch": 0.07922203739923593, + "grad_norm": 7.030187660722593, + "learning_rate": 9.936646960614185e-06, + "loss": 17.8259, + "step": 4334 + }, + { + "epoch": 0.07924031659568245, + "grad_norm": 6.524362985021903, + "learning_rate": 9.936599979612473e-06, + "loss": 17.3938, + "step": 4335 + }, + { + "epoch": 0.07925859579212897, + "grad_norm": 6.345926930675069, + "learning_rate": 9.936552981308408e-06, + "loss": 17.2927, + "step": 4336 + }, + { + "epoch": 0.0792768749885755, + "grad_norm": 8.18555463990253, + "learning_rate": 9.93650596570215e-06, + "loss": 17.9193, + "step": 4337 + }, + { + "epoch": 0.07929515418502203, + "grad_norm": 7.172318325546454, + "learning_rate": 9.936458932793872e-06, + "loss": 17.9054, + "step": 4338 + }, + { + "epoch": 0.07931343338146855, + "grad_norm": 7.01926924122479, + "learning_rate": 9.93641188258373e-06, + "loss": 17.7644, + "step": 4339 + }, + { + "epoch": 0.07933171257791508, + "grad_norm": 7.223352662426126, + "learning_rate": 9.936364815071894e-06, + "loss": 17.6415, + "step": 4340 + }, + { + "epoch": 0.0793499917743616, + "grad_norm": 6.99194164800054, + "learning_rate": 9.936317730258527e-06, + "loss": 17.6133, + "step": 4341 + }, + { + "epoch": 0.07936827097080812, + "grad_norm": 7.2545172317223034, + "learning_rate": 9.936270628143796e-06, + "loss": 17.6298, + "step": 4342 + }, + { + "epoch": 0.07938655016725464, + "grad_norm": 7.276928721931867, + "learning_rate": 9.936223508727864e-06, + "loss": 17.9755, + "step": 4343 + }, + { + "epoch": 0.07940482936370118, + "grad_norm": 6.7277421681572, + "learning_rate": 9.936176372010898e-06, + "loss": 17.3964, + "step": 4344 + }, + { + "epoch": 0.0794231085601477, + "grad_norm": 7.525938658288535, + "learning_rate": 9.93612921799306e-06, + "loss": 17.7782, + "step": 4345 + }, + { + "epoch": 0.07944138775659422, + "grad_norm": 6.957726541270671, + "learning_rate": 9.93608204667452e-06, + "loss": 17.4248, + "step": 4346 + }, + { + "epoch": 0.07945966695304074, + "grad_norm": 7.6856434204431885, + "learning_rate": 9.936034858055439e-06, + "loss": 18.1327, + "step": 4347 + }, + { + "epoch": 0.07947794614948726, + "grad_norm": 6.898323366882175, + "learning_rate": 9.935987652135983e-06, + "loss": 17.616, + "step": 4348 + }, + { + "epoch": 0.0794962253459338, + "grad_norm": 8.20052684478149, + "learning_rate": 9.93594042891632e-06, + "loss": 17.9773, + "step": 4349 + }, + { + "epoch": 0.07951450454238032, + "grad_norm": 7.194753526930869, + "learning_rate": 9.935893188396614e-06, + "loss": 17.8848, + "step": 4350 + }, + { + "epoch": 0.07953278373882684, + "grad_norm": 10.113227968945113, + "learning_rate": 9.935845930577032e-06, + "loss": 19.0136, + "step": 4351 + }, + { + "epoch": 0.07955106293527336, + "grad_norm": 8.986283506326176, + "learning_rate": 9.935798655457737e-06, + "loss": 18.8284, + "step": 4352 + }, + { + "epoch": 0.07956934213171989, + "grad_norm": 7.133922398959805, + "learning_rate": 9.935751363038897e-06, + "loss": 17.5253, + "step": 4353 + }, + { + "epoch": 0.07958762132816641, + "grad_norm": 8.27673277270031, + "learning_rate": 9.935704053320676e-06, + "loss": 18.2957, + "step": 4354 + }, + { + "epoch": 0.07960590052461294, + "grad_norm": 7.676831202603974, + "learning_rate": 9.93565672630324e-06, + "loss": 17.9769, + "step": 4355 + }, + { + "epoch": 0.07962417972105947, + "grad_norm": 6.54709400823552, + "learning_rate": 9.935609381986756e-06, + "loss": 17.472, + "step": 4356 + }, + { + "epoch": 0.07964245891750599, + "grad_norm": 7.033668989331142, + "learning_rate": 9.93556202037139e-06, + "loss": 17.5056, + "step": 4357 + }, + { + "epoch": 0.07966073811395251, + "grad_norm": 5.438645737245922, + "learning_rate": 9.935514641457307e-06, + "loss": 16.8826, + "step": 4358 + }, + { + "epoch": 0.07967901731039903, + "grad_norm": 7.4635302373287224, + "learning_rate": 9.935467245244674e-06, + "loss": 18.0071, + "step": 4359 + }, + { + "epoch": 0.07969729650684555, + "grad_norm": 8.001907384704584, + "learning_rate": 9.935419831733655e-06, + "loss": 18.1448, + "step": 4360 + }, + { + "epoch": 0.07971557570329209, + "grad_norm": 5.58895525179972, + "learning_rate": 9.93537240092442e-06, + "loss": 16.9925, + "step": 4361 + }, + { + "epoch": 0.07973385489973861, + "grad_norm": 8.394101913852397, + "learning_rate": 9.935324952817132e-06, + "loss": 18.2056, + "step": 4362 + }, + { + "epoch": 0.07975213409618513, + "grad_norm": 6.185491922238464, + "learning_rate": 9.935277487411957e-06, + "loss": 17.406, + "step": 4363 + }, + { + "epoch": 0.07977041329263165, + "grad_norm": 7.82182368160327, + "learning_rate": 9.935230004709064e-06, + "loss": 18.0795, + "step": 4364 + }, + { + "epoch": 0.07978869248907818, + "grad_norm": 7.832177300994306, + "learning_rate": 9.935182504708619e-06, + "loss": 17.9343, + "step": 4365 + }, + { + "epoch": 0.07980697168552471, + "grad_norm": 8.041377318900343, + "learning_rate": 9.935134987410786e-06, + "loss": 17.9911, + "step": 4366 + }, + { + "epoch": 0.07982525088197123, + "grad_norm": 8.440419909385117, + "learning_rate": 9.935087452815733e-06, + "loss": 18.4141, + "step": 4367 + }, + { + "epoch": 0.07984353007841775, + "grad_norm": 7.705909091524087, + "learning_rate": 9.935039900923627e-06, + "loss": 18.129, + "step": 4368 + }, + { + "epoch": 0.07986180927486428, + "grad_norm": 7.256927719180046, + "learning_rate": 9.934992331734635e-06, + "loss": 17.8265, + "step": 4369 + }, + { + "epoch": 0.0798800884713108, + "grad_norm": 6.727981028320891, + "learning_rate": 9.934944745248922e-06, + "loss": 17.3921, + "step": 4370 + }, + { + "epoch": 0.07989836766775732, + "grad_norm": 8.888879986522245, + "learning_rate": 9.934897141466658e-06, + "loss": 18.2255, + "step": 4371 + }, + { + "epoch": 0.07991664686420386, + "grad_norm": 7.5446180440391295, + "learning_rate": 9.934849520388006e-06, + "loss": 17.8565, + "step": 4372 + }, + { + "epoch": 0.07993492606065038, + "grad_norm": 7.762442570123377, + "learning_rate": 9.934801882013136e-06, + "loss": 17.9927, + "step": 4373 + }, + { + "epoch": 0.0799532052570969, + "grad_norm": 7.63817698154924, + "learning_rate": 9.934754226342212e-06, + "loss": 17.7823, + "step": 4374 + }, + { + "epoch": 0.07997148445354342, + "grad_norm": 7.623312193856963, + "learning_rate": 9.934706553375404e-06, + "loss": 17.9167, + "step": 4375 + }, + { + "epoch": 0.07998976364998994, + "grad_norm": 7.403930180012222, + "learning_rate": 9.934658863112877e-06, + "loss": 17.728, + "step": 4376 + }, + { + "epoch": 0.08000804284643646, + "grad_norm": 7.913128623022587, + "learning_rate": 9.934611155554799e-06, + "loss": 17.9917, + "step": 4377 + }, + { + "epoch": 0.080026322042883, + "grad_norm": 7.8683142771557, + "learning_rate": 9.934563430701336e-06, + "loss": 17.7655, + "step": 4378 + }, + { + "epoch": 0.08004460123932952, + "grad_norm": 8.258553859426897, + "learning_rate": 9.934515688552656e-06, + "loss": 18.4457, + "step": 4379 + }, + { + "epoch": 0.08006288043577604, + "grad_norm": 6.9581842199049735, + "learning_rate": 9.934467929108931e-06, + "loss": 17.6692, + "step": 4380 + }, + { + "epoch": 0.08008115963222256, + "grad_norm": 6.791523737572199, + "learning_rate": 9.934420152370321e-06, + "loss": 17.255, + "step": 4381 + }, + { + "epoch": 0.08009943882866909, + "grad_norm": 8.517797819825347, + "learning_rate": 9.934372358336996e-06, + "loss": 18.1414, + "step": 4382 + }, + { + "epoch": 0.08011771802511562, + "grad_norm": 7.555933529906602, + "learning_rate": 9.934324547009125e-06, + "loss": 17.8544, + "step": 4383 + }, + { + "epoch": 0.08013599722156214, + "grad_norm": 7.502407572634842, + "learning_rate": 9.934276718386874e-06, + "loss": 17.6756, + "step": 4384 + }, + { + "epoch": 0.08015427641800867, + "grad_norm": 8.479011498340665, + "learning_rate": 9.934228872470413e-06, + "loss": 18.5501, + "step": 4385 + }, + { + "epoch": 0.08017255561445519, + "grad_norm": 7.359950844823468, + "learning_rate": 9.934181009259907e-06, + "loss": 17.9449, + "step": 4386 + }, + { + "epoch": 0.08019083481090171, + "grad_norm": 8.057010421147657, + "learning_rate": 9.934133128755524e-06, + "loss": 17.7369, + "step": 4387 + }, + { + "epoch": 0.08020911400734823, + "grad_norm": 7.298814909341879, + "learning_rate": 9.934085230957434e-06, + "loss": 17.8067, + "step": 4388 + }, + { + "epoch": 0.08022739320379477, + "grad_norm": 6.168865086056675, + "learning_rate": 9.934037315865802e-06, + "loss": 17.2977, + "step": 4389 + }, + { + "epoch": 0.08024567240024129, + "grad_norm": 8.712264724286051, + "learning_rate": 9.933989383480797e-06, + "loss": 18.6987, + "step": 4390 + }, + { + "epoch": 0.08026395159668781, + "grad_norm": 8.715454270171263, + "learning_rate": 9.93394143380259e-06, + "loss": 18.5641, + "step": 4391 + }, + { + "epoch": 0.08028223079313433, + "grad_norm": 6.871268595151202, + "learning_rate": 9.933893466831344e-06, + "loss": 17.7549, + "step": 4392 + }, + { + "epoch": 0.08030050998958085, + "grad_norm": 6.50455131844434, + "learning_rate": 9.933845482567231e-06, + "loss": 17.4704, + "step": 4393 + }, + { + "epoch": 0.08031878918602738, + "grad_norm": 7.495903605026569, + "learning_rate": 9.933797481010417e-06, + "loss": 17.7965, + "step": 4394 + }, + { + "epoch": 0.08033706838247391, + "grad_norm": 7.308092299369829, + "learning_rate": 9.93374946216107e-06, + "loss": 17.8812, + "step": 4395 + }, + { + "epoch": 0.08035534757892043, + "grad_norm": 8.936969907894749, + "learning_rate": 9.93370142601936e-06, + "loss": 18.3376, + "step": 4396 + }, + { + "epoch": 0.08037362677536695, + "grad_norm": 7.012556279337693, + "learning_rate": 9.933653372585454e-06, + "loss": 17.6605, + "step": 4397 + }, + { + "epoch": 0.08039190597181348, + "grad_norm": 8.659173418122123, + "learning_rate": 9.933605301859523e-06, + "loss": 18.6341, + "step": 4398 + }, + { + "epoch": 0.08041018516826, + "grad_norm": 5.968503921611904, + "learning_rate": 9.933557213841733e-06, + "loss": 17.2194, + "step": 4399 + }, + { + "epoch": 0.08042846436470653, + "grad_norm": 7.426403281013527, + "learning_rate": 9.933509108532251e-06, + "loss": 17.8206, + "step": 4400 + }, + { + "epoch": 0.08044674356115306, + "grad_norm": 8.197047539262414, + "learning_rate": 9.93346098593125e-06, + "loss": 18.3993, + "step": 4401 + }, + { + "epoch": 0.08046502275759958, + "grad_norm": 7.065256372234396, + "learning_rate": 9.933412846038896e-06, + "loss": 17.9417, + "step": 4402 + }, + { + "epoch": 0.0804833019540461, + "grad_norm": 6.907884075342692, + "learning_rate": 9.933364688855357e-06, + "loss": 17.8059, + "step": 4403 + }, + { + "epoch": 0.08050158115049262, + "grad_norm": 7.340969901260953, + "learning_rate": 9.933316514380804e-06, + "loss": 17.6631, + "step": 4404 + }, + { + "epoch": 0.08051986034693914, + "grad_norm": 7.9374478532920465, + "learning_rate": 9.933268322615403e-06, + "loss": 18.5271, + "step": 4405 + }, + { + "epoch": 0.08053813954338568, + "grad_norm": 7.4468071554846444, + "learning_rate": 9.933220113559327e-06, + "loss": 17.822, + "step": 4406 + }, + { + "epoch": 0.0805564187398322, + "grad_norm": 6.920858580541004, + "learning_rate": 9.93317188721274e-06, + "loss": 17.5771, + "step": 4407 + }, + { + "epoch": 0.08057469793627872, + "grad_norm": 7.114072198051114, + "learning_rate": 9.933123643575816e-06, + "loss": 17.8293, + "step": 4408 + }, + { + "epoch": 0.08059297713272524, + "grad_norm": 6.841327818725455, + "learning_rate": 9.933075382648721e-06, + "loss": 17.4228, + "step": 4409 + }, + { + "epoch": 0.08061125632917177, + "grad_norm": 7.100727522736129, + "learning_rate": 9.933027104431624e-06, + "loss": 17.6061, + "step": 4410 + }, + { + "epoch": 0.08062953552561829, + "grad_norm": 9.30397315079259, + "learning_rate": 9.932978808924696e-06, + "loss": 18.1698, + "step": 4411 + }, + { + "epoch": 0.08064781472206482, + "grad_norm": 6.896796777753139, + "learning_rate": 9.932930496128105e-06, + "loss": 17.8227, + "step": 4412 + }, + { + "epoch": 0.08066609391851134, + "grad_norm": 7.270965623141832, + "learning_rate": 9.93288216604202e-06, + "loss": 17.7673, + "step": 4413 + }, + { + "epoch": 0.08068437311495787, + "grad_norm": 8.296988554740144, + "learning_rate": 9.932833818666611e-06, + "loss": 18.2694, + "step": 4414 + }, + { + "epoch": 0.08070265231140439, + "grad_norm": 8.778225644823923, + "learning_rate": 9.93278545400205e-06, + "loss": 18.4442, + "step": 4415 + }, + { + "epoch": 0.08072093150785091, + "grad_norm": 7.208020527454145, + "learning_rate": 9.932737072048502e-06, + "loss": 17.9922, + "step": 4416 + }, + { + "epoch": 0.08073921070429745, + "grad_norm": 7.010547771327252, + "learning_rate": 9.932688672806138e-06, + "loss": 17.7569, + "step": 4417 + }, + { + "epoch": 0.08075748990074397, + "grad_norm": 7.83570712937895, + "learning_rate": 9.93264025627513e-06, + "loss": 17.9074, + "step": 4418 + }, + { + "epoch": 0.08077576909719049, + "grad_norm": 8.072153380337696, + "learning_rate": 9.932591822455643e-06, + "loss": 18.5003, + "step": 4419 + }, + { + "epoch": 0.08079404829363701, + "grad_norm": 7.838063096096014, + "learning_rate": 9.932543371347853e-06, + "loss": 18.0529, + "step": 4420 + }, + { + "epoch": 0.08081232749008353, + "grad_norm": 7.6052270885843045, + "learning_rate": 9.932494902951925e-06, + "loss": 18.0015, + "step": 4421 + }, + { + "epoch": 0.08083060668653005, + "grad_norm": 7.646716818032306, + "learning_rate": 9.932446417268031e-06, + "loss": 17.9335, + "step": 4422 + }, + { + "epoch": 0.08084888588297659, + "grad_norm": 6.74951305218201, + "learning_rate": 9.93239791429634e-06, + "loss": 17.895, + "step": 4423 + }, + { + "epoch": 0.08086716507942311, + "grad_norm": 7.638016311406168, + "learning_rate": 9.932349394037022e-06, + "loss": 17.9338, + "step": 4424 + }, + { + "epoch": 0.08088544427586963, + "grad_norm": 6.775714397132939, + "learning_rate": 9.932300856490248e-06, + "loss": 17.6984, + "step": 4425 + }, + { + "epoch": 0.08090372347231616, + "grad_norm": 6.715265997146286, + "learning_rate": 9.932252301656187e-06, + "loss": 17.8797, + "step": 4426 + }, + { + "epoch": 0.08092200266876268, + "grad_norm": 6.708322945726105, + "learning_rate": 9.932203729535011e-06, + "loss": 17.7322, + "step": 4427 + }, + { + "epoch": 0.0809402818652092, + "grad_norm": 7.674689554676557, + "learning_rate": 9.932155140126886e-06, + "loss": 18.0179, + "step": 4428 + }, + { + "epoch": 0.08095856106165573, + "grad_norm": 6.0953774890285946, + "learning_rate": 9.932106533431988e-06, + "loss": 17.266, + "step": 4429 + }, + { + "epoch": 0.08097684025810226, + "grad_norm": 8.123767572496034, + "learning_rate": 9.932057909450483e-06, + "loss": 17.9947, + "step": 4430 + }, + { + "epoch": 0.08099511945454878, + "grad_norm": 8.007848703055455, + "learning_rate": 9.932009268182542e-06, + "loss": 18.0589, + "step": 4431 + }, + { + "epoch": 0.0810133986509953, + "grad_norm": 7.728613347671448, + "learning_rate": 9.93196060962834e-06, + "loss": 18.0519, + "step": 4432 + }, + { + "epoch": 0.08103167784744182, + "grad_norm": 6.914287168216555, + "learning_rate": 9.931911933788043e-06, + "loss": 17.6688, + "step": 4433 + }, + { + "epoch": 0.08104995704388836, + "grad_norm": 6.594437207858085, + "learning_rate": 9.931863240661821e-06, + "loss": 17.6638, + "step": 4434 + }, + { + "epoch": 0.08106823624033488, + "grad_norm": 6.6198252529318395, + "learning_rate": 9.931814530249847e-06, + "loss": 17.5418, + "step": 4435 + }, + { + "epoch": 0.0810865154367814, + "grad_norm": 11.144267892914211, + "learning_rate": 9.93176580255229e-06, + "loss": 18.2372, + "step": 4436 + }, + { + "epoch": 0.08110479463322792, + "grad_norm": 7.014288301350778, + "learning_rate": 9.931717057569323e-06, + "loss": 17.6551, + "step": 4437 + }, + { + "epoch": 0.08112307382967444, + "grad_norm": 7.318544846682362, + "learning_rate": 9.931668295301116e-06, + "loss": 17.9492, + "step": 4438 + }, + { + "epoch": 0.08114135302612097, + "grad_norm": 10.872672173263489, + "learning_rate": 9.931619515747838e-06, + "loss": 17.9141, + "step": 4439 + }, + { + "epoch": 0.0811596322225675, + "grad_norm": 6.240998164763673, + "learning_rate": 9.931570718909664e-06, + "loss": 17.1257, + "step": 4440 + }, + { + "epoch": 0.08117791141901402, + "grad_norm": 7.99350319505303, + "learning_rate": 9.931521904786761e-06, + "loss": 17.9102, + "step": 4441 + }, + { + "epoch": 0.08119619061546055, + "grad_norm": 6.712015008086295, + "learning_rate": 9.931473073379304e-06, + "loss": 17.6588, + "step": 4442 + }, + { + "epoch": 0.08121446981190707, + "grad_norm": 7.902877742430194, + "learning_rate": 9.931424224687458e-06, + "loss": 18.1366, + "step": 4443 + }, + { + "epoch": 0.08123274900835359, + "grad_norm": 7.78949894709229, + "learning_rate": 9.9313753587114e-06, + "loss": 18.3117, + "step": 4444 + }, + { + "epoch": 0.08125102820480011, + "grad_norm": 7.1016311906620855, + "learning_rate": 9.9313264754513e-06, + "loss": 17.7372, + "step": 4445 + }, + { + "epoch": 0.08126930740124665, + "grad_norm": 6.478915491179522, + "learning_rate": 9.931277574907328e-06, + "loss": 17.4951, + "step": 4446 + }, + { + "epoch": 0.08128758659769317, + "grad_norm": 8.509384946617994, + "learning_rate": 9.931228657079655e-06, + "loss": 18.6333, + "step": 4447 + }, + { + "epoch": 0.08130586579413969, + "grad_norm": 7.650938902341948, + "learning_rate": 9.931179721968453e-06, + "loss": 17.7259, + "step": 4448 + }, + { + "epoch": 0.08132414499058621, + "grad_norm": 7.7217687624076765, + "learning_rate": 9.931130769573896e-06, + "loss": 18.0419, + "step": 4449 + }, + { + "epoch": 0.08134242418703273, + "grad_norm": 8.289211316693782, + "learning_rate": 9.931081799896153e-06, + "loss": 18.0502, + "step": 4450 + }, + { + "epoch": 0.08136070338347927, + "grad_norm": 7.714887931684799, + "learning_rate": 9.931032812935397e-06, + "loss": 17.8473, + "step": 4451 + }, + { + "epoch": 0.08137898257992579, + "grad_norm": 7.736070745534125, + "learning_rate": 9.930983808691796e-06, + "loss": 18.1318, + "step": 4452 + }, + { + "epoch": 0.08139726177637231, + "grad_norm": 6.1809140483148965, + "learning_rate": 9.930934787165527e-06, + "loss": 17.2089, + "step": 4453 + }, + { + "epoch": 0.08141554097281883, + "grad_norm": 6.949007285441479, + "learning_rate": 9.930885748356759e-06, + "loss": 17.5245, + "step": 4454 + }, + { + "epoch": 0.08143382016926536, + "grad_norm": 6.645321717408077, + "learning_rate": 9.930836692265664e-06, + "loss": 17.6439, + "step": 4455 + }, + { + "epoch": 0.08145209936571188, + "grad_norm": 6.932555337897875, + "learning_rate": 9.930787618892415e-06, + "loss": 17.7075, + "step": 4456 + }, + { + "epoch": 0.08147037856215841, + "grad_norm": 7.62905891946172, + "learning_rate": 9.930738528237183e-06, + "loss": 17.7215, + "step": 4457 + }, + { + "epoch": 0.08148865775860493, + "grad_norm": 6.926128597480996, + "learning_rate": 9.93068942030014e-06, + "loss": 17.4867, + "step": 4458 + }, + { + "epoch": 0.08150693695505146, + "grad_norm": 6.5618999252141545, + "learning_rate": 9.930640295081458e-06, + "loss": 17.3902, + "step": 4459 + }, + { + "epoch": 0.08152521615149798, + "grad_norm": 6.971387561714625, + "learning_rate": 9.93059115258131e-06, + "loss": 17.6631, + "step": 4460 + }, + { + "epoch": 0.0815434953479445, + "grad_norm": 7.164499672355387, + "learning_rate": 9.930541992799868e-06, + "loss": 17.5324, + "step": 4461 + }, + { + "epoch": 0.08156177454439102, + "grad_norm": 8.462225980434976, + "learning_rate": 9.930492815737305e-06, + "loss": 18.4413, + "step": 4462 + }, + { + "epoch": 0.08158005374083756, + "grad_norm": 7.348675859790189, + "learning_rate": 9.930443621393792e-06, + "loss": 17.9459, + "step": 4463 + }, + { + "epoch": 0.08159833293728408, + "grad_norm": 5.875306457033801, + "learning_rate": 9.9303944097695e-06, + "loss": 17.3848, + "step": 4464 + }, + { + "epoch": 0.0816166121337306, + "grad_norm": 6.196416695570449, + "learning_rate": 9.930345180864606e-06, + "loss": 17.3206, + "step": 4465 + }, + { + "epoch": 0.08163489133017712, + "grad_norm": 6.784910250618494, + "learning_rate": 9.93029593467928e-06, + "loss": 17.3085, + "step": 4466 + }, + { + "epoch": 0.08165317052662364, + "grad_norm": 7.625413552891611, + "learning_rate": 9.930246671213693e-06, + "loss": 18.3514, + "step": 4467 + }, + { + "epoch": 0.08167144972307018, + "grad_norm": 5.547533240305542, + "learning_rate": 9.93019739046802e-06, + "loss": 17.0815, + "step": 4468 + }, + { + "epoch": 0.0816897289195167, + "grad_norm": 7.210661771640025, + "learning_rate": 9.930148092442433e-06, + "loss": 17.8987, + "step": 4469 + }, + { + "epoch": 0.08170800811596322, + "grad_norm": 8.65273559111609, + "learning_rate": 9.930098777137104e-06, + "loss": 18.4925, + "step": 4470 + }, + { + "epoch": 0.08172628731240975, + "grad_norm": 8.22570860272434, + "learning_rate": 9.930049444552207e-06, + "loss": 18.1606, + "step": 4471 + }, + { + "epoch": 0.08174456650885627, + "grad_norm": 7.6859657155698065, + "learning_rate": 9.930000094687916e-06, + "loss": 17.8538, + "step": 4472 + }, + { + "epoch": 0.08176284570530279, + "grad_norm": 7.92076233119735, + "learning_rate": 9.929950727544401e-06, + "loss": 18.4207, + "step": 4473 + }, + { + "epoch": 0.08178112490174932, + "grad_norm": 7.3580475160391945, + "learning_rate": 9.929901343121838e-06, + "loss": 17.9415, + "step": 4474 + }, + { + "epoch": 0.08179940409819585, + "grad_norm": 9.13179251316065, + "learning_rate": 9.929851941420396e-06, + "loss": 18.4544, + "step": 4475 + }, + { + "epoch": 0.08181768329464237, + "grad_norm": 7.910382887037255, + "learning_rate": 9.929802522440252e-06, + "loss": 18.353, + "step": 4476 + }, + { + "epoch": 0.08183596249108889, + "grad_norm": 6.534012703103533, + "learning_rate": 9.929753086181578e-06, + "loss": 17.5061, + "step": 4477 + }, + { + "epoch": 0.08185424168753541, + "grad_norm": 7.269820604266411, + "learning_rate": 9.929703632644547e-06, + "loss": 17.5173, + "step": 4478 + }, + { + "epoch": 0.08187252088398193, + "grad_norm": 6.758277820192978, + "learning_rate": 9.929654161829333e-06, + "loss": 17.5249, + "step": 4479 + }, + { + "epoch": 0.08189080008042847, + "grad_norm": 6.214356646569967, + "learning_rate": 9.929604673736108e-06, + "loss": 17.3733, + "step": 4480 + }, + { + "epoch": 0.08190907927687499, + "grad_norm": 6.651755790691796, + "learning_rate": 9.929555168365048e-06, + "loss": 17.4953, + "step": 4481 + }, + { + "epoch": 0.08192735847332151, + "grad_norm": 5.65275080863377, + "learning_rate": 9.929505645716323e-06, + "loss": 17.0668, + "step": 4482 + }, + { + "epoch": 0.08194563766976803, + "grad_norm": 7.818691995099381, + "learning_rate": 9.92945610579011e-06, + "loss": 17.8689, + "step": 4483 + }, + { + "epoch": 0.08196391686621456, + "grad_norm": 6.846748702401642, + "learning_rate": 9.92940654858658e-06, + "loss": 17.832, + "step": 4484 + }, + { + "epoch": 0.08198219606266109, + "grad_norm": 6.774227618839608, + "learning_rate": 9.929356974105909e-06, + "loss": 17.4402, + "step": 4485 + }, + { + "epoch": 0.08200047525910761, + "grad_norm": 7.18282719284545, + "learning_rate": 9.92930738234827e-06, + "loss": 17.7953, + "step": 4486 + }, + { + "epoch": 0.08201875445555414, + "grad_norm": 7.10034438045775, + "learning_rate": 9.929257773313834e-06, + "loss": 17.7903, + "step": 4487 + }, + { + "epoch": 0.08203703365200066, + "grad_norm": 8.918489618016627, + "learning_rate": 9.92920814700278e-06, + "loss": 18.1086, + "step": 4488 + }, + { + "epoch": 0.08205531284844718, + "grad_norm": 7.117493121501945, + "learning_rate": 9.929158503415277e-06, + "loss": 17.6571, + "step": 4489 + }, + { + "epoch": 0.0820735920448937, + "grad_norm": 8.701853632068818, + "learning_rate": 9.929108842551502e-06, + "loss": 18.5493, + "step": 4490 + }, + { + "epoch": 0.08209187124134024, + "grad_norm": 7.033729075683853, + "learning_rate": 9.929059164411627e-06, + "loss": 17.6714, + "step": 4491 + }, + { + "epoch": 0.08211015043778676, + "grad_norm": 7.01506236368206, + "learning_rate": 9.92900946899583e-06, + "loss": 17.5937, + "step": 4492 + }, + { + "epoch": 0.08212842963423328, + "grad_norm": 7.028807544280329, + "learning_rate": 9.92895975630428e-06, + "loss": 17.6652, + "step": 4493 + }, + { + "epoch": 0.0821467088306798, + "grad_norm": 6.729522681157575, + "learning_rate": 9.928910026337154e-06, + "loss": 17.4521, + "step": 4494 + }, + { + "epoch": 0.08216498802712632, + "grad_norm": 8.344437666062118, + "learning_rate": 9.928860279094628e-06, + "loss": 17.9272, + "step": 4495 + }, + { + "epoch": 0.08218326722357285, + "grad_norm": 6.37483648731223, + "learning_rate": 9.928810514576874e-06, + "loss": 17.2194, + "step": 4496 + }, + { + "epoch": 0.08220154642001938, + "grad_norm": 6.102383968868863, + "learning_rate": 9.928760732784067e-06, + "loss": 17.4369, + "step": 4497 + }, + { + "epoch": 0.0822198256164659, + "grad_norm": 6.770251894115572, + "learning_rate": 9.928710933716379e-06, + "loss": 17.6148, + "step": 4498 + }, + { + "epoch": 0.08223810481291242, + "grad_norm": 7.365270009701503, + "learning_rate": 9.92866111737399e-06, + "loss": 17.896, + "step": 4499 + }, + { + "epoch": 0.08225638400935895, + "grad_norm": 9.041320640724681, + "learning_rate": 9.928611283757068e-06, + "loss": 18.3787, + "step": 4500 + }, + { + "epoch": 0.08227466320580547, + "grad_norm": 7.341942300548844, + "learning_rate": 9.928561432865793e-06, + "loss": 17.7244, + "step": 4501 + }, + { + "epoch": 0.082292942402252, + "grad_norm": 9.128635582132091, + "learning_rate": 9.928511564700339e-06, + "loss": 18.6112, + "step": 4502 + }, + { + "epoch": 0.08231122159869853, + "grad_norm": 8.78146881450675, + "learning_rate": 9.928461679260877e-06, + "loss": 18.7338, + "step": 4503 + }, + { + "epoch": 0.08232950079514505, + "grad_norm": 8.036743843156453, + "learning_rate": 9.928411776547587e-06, + "loss": 17.936, + "step": 4504 + }, + { + "epoch": 0.08234777999159157, + "grad_norm": 7.310140149812051, + "learning_rate": 9.92836185656064e-06, + "loss": 17.5116, + "step": 4505 + }, + { + "epoch": 0.08236605918803809, + "grad_norm": 7.1757266381119, + "learning_rate": 9.928311919300214e-06, + "loss": 17.4799, + "step": 4506 + }, + { + "epoch": 0.08238433838448461, + "grad_norm": 7.604737458119529, + "learning_rate": 9.928261964766481e-06, + "loss": 18.138, + "step": 4507 + }, + { + "epoch": 0.08240261758093115, + "grad_norm": 8.247450984783418, + "learning_rate": 9.928211992959617e-06, + "loss": 18.3907, + "step": 4508 + }, + { + "epoch": 0.08242089677737767, + "grad_norm": 7.957011173756499, + "learning_rate": 9.928162003879797e-06, + "loss": 18.2793, + "step": 4509 + }, + { + "epoch": 0.08243917597382419, + "grad_norm": 6.8367147643001385, + "learning_rate": 9.9281119975272e-06, + "loss": 17.653, + "step": 4510 + }, + { + "epoch": 0.08245745517027071, + "grad_norm": 7.324051785613831, + "learning_rate": 9.928061973901995e-06, + "loss": 17.7182, + "step": 4511 + }, + { + "epoch": 0.08247573436671723, + "grad_norm": 6.355023708620587, + "learning_rate": 9.928011933004363e-06, + "loss": 17.3312, + "step": 4512 + }, + { + "epoch": 0.08249401356316376, + "grad_norm": 7.022166479823654, + "learning_rate": 9.927961874834473e-06, + "loss": 17.8033, + "step": 4513 + }, + { + "epoch": 0.08251229275961029, + "grad_norm": 6.956236792848833, + "learning_rate": 9.927911799392506e-06, + "loss": 17.7529, + "step": 4514 + }, + { + "epoch": 0.08253057195605681, + "grad_norm": 8.524558072843963, + "learning_rate": 9.927861706678638e-06, + "loss": 18.4426, + "step": 4515 + }, + { + "epoch": 0.08254885115250334, + "grad_norm": 7.456906784203291, + "learning_rate": 9.92781159669304e-06, + "loss": 18.0318, + "step": 4516 + }, + { + "epoch": 0.08256713034894986, + "grad_norm": 6.328814492522303, + "learning_rate": 9.92776146943589e-06, + "loss": 17.3499, + "step": 4517 + }, + { + "epoch": 0.08258540954539638, + "grad_norm": 6.642606672615187, + "learning_rate": 9.927711324907366e-06, + "loss": 17.5684, + "step": 4518 + }, + { + "epoch": 0.08260368874184291, + "grad_norm": 7.2954811864296865, + "learning_rate": 9.927661163107639e-06, + "loss": 18.0425, + "step": 4519 + }, + { + "epoch": 0.08262196793828944, + "grad_norm": 7.460025589824433, + "learning_rate": 9.927610984036888e-06, + "loss": 17.6947, + "step": 4520 + }, + { + "epoch": 0.08264024713473596, + "grad_norm": 8.381247810702762, + "learning_rate": 9.927560787695288e-06, + "loss": 18.1759, + "step": 4521 + }, + { + "epoch": 0.08265852633118248, + "grad_norm": 5.7085072531084355, + "learning_rate": 9.927510574083014e-06, + "loss": 17.1358, + "step": 4522 + }, + { + "epoch": 0.082676805527629, + "grad_norm": 7.967265722628898, + "learning_rate": 9.927460343200245e-06, + "loss": 18.0888, + "step": 4523 + }, + { + "epoch": 0.08269508472407552, + "grad_norm": 6.815552621381163, + "learning_rate": 9.927410095047154e-06, + "loss": 17.6987, + "step": 4524 + }, + { + "epoch": 0.08271336392052206, + "grad_norm": 7.394571511538061, + "learning_rate": 9.92735982962392e-06, + "loss": 17.7943, + "step": 4525 + }, + { + "epoch": 0.08273164311696858, + "grad_norm": 9.67717658562339, + "learning_rate": 9.927309546930714e-06, + "loss": 18.2716, + "step": 4526 + }, + { + "epoch": 0.0827499223134151, + "grad_norm": 8.130599036793969, + "learning_rate": 9.927259246967718e-06, + "loss": 18.3201, + "step": 4527 + }, + { + "epoch": 0.08276820150986162, + "grad_norm": 7.964373664485016, + "learning_rate": 9.927208929735105e-06, + "loss": 18.0592, + "step": 4528 + }, + { + "epoch": 0.08278648070630815, + "grad_norm": 7.2245216886891495, + "learning_rate": 9.927158595233053e-06, + "loss": 17.7279, + "step": 4529 + }, + { + "epoch": 0.08280475990275467, + "grad_norm": 7.0316490672981695, + "learning_rate": 9.927108243461737e-06, + "loss": 17.7399, + "step": 4530 + }, + { + "epoch": 0.0828230390992012, + "grad_norm": 7.761029426910646, + "learning_rate": 9.927057874421335e-06, + "loss": 17.9445, + "step": 4531 + }, + { + "epoch": 0.08284131829564773, + "grad_norm": 5.900261226061749, + "learning_rate": 9.927007488112022e-06, + "loss": 17.4243, + "step": 4532 + }, + { + "epoch": 0.08285959749209425, + "grad_norm": 8.414529587707785, + "learning_rate": 9.926957084533975e-06, + "loss": 17.9991, + "step": 4533 + }, + { + "epoch": 0.08287787668854077, + "grad_norm": 7.605849402215815, + "learning_rate": 9.926906663687371e-06, + "loss": 17.9538, + "step": 4534 + }, + { + "epoch": 0.08289615588498729, + "grad_norm": 6.576826027627958, + "learning_rate": 9.926856225572388e-06, + "loss": 17.4712, + "step": 4535 + }, + { + "epoch": 0.08291443508143383, + "grad_norm": 6.501336675316235, + "learning_rate": 9.9268057701892e-06, + "loss": 17.5895, + "step": 4536 + }, + { + "epoch": 0.08293271427788035, + "grad_norm": 6.672729074948429, + "learning_rate": 9.926755297537985e-06, + "loss": 17.6235, + "step": 4537 + }, + { + "epoch": 0.08295099347432687, + "grad_norm": 7.931759467513436, + "learning_rate": 9.92670480761892e-06, + "loss": 17.9637, + "step": 4538 + }, + { + "epoch": 0.08296927267077339, + "grad_norm": 6.402769235168133, + "learning_rate": 9.926654300432185e-06, + "loss": 17.4368, + "step": 4539 + }, + { + "epoch": 0.08298755186721991, + "grad_norm": 7.409572920926637, + "learning_rate": 9.926603775977953e-06, + "loss": 17.9268, + "step": 4540 + }, + { + "epoch": 0.08300583106366644, + "grad_norm": 7.917990749667958, + "learning_rate": 9.926553234256401e-06, + "loss": 18.1149, + "step": 4541 + }, + { + "epoch": 0.08302411026011297, + "grad_norm": 8.082940647734613, + "learning_rate": 9.92650267526771e-06, + "loss": 18.0548, + "step": 4542 + }, + { + "epoch": 0.08304238945655949, + "grad_norm": 7.277137111479075, + "learning_rate": 9.92645209901205e-06, + "loss": 17.7861, + "step": 4543 + }, + { + "epoch": 0.08306066865300601, + "grad_norm": 7.4733012873198685, + "learning_rate": 9.926401505489606e-06, + "loss": 18.0153, + "step": 4544 + }, + { + "epoch": 0.08307894784945254, + "grad_norm": 7.9215653230932395, + "learning_rate": 9.926350894700552e-06, + "loss": 17.9856, + "step": 4545 + }, + { + "epoch": 0.08309722704589906, + "grad_norm": 7.791247708572704, + "learning_rate": 9.926300266645066e-06, + "loss": 18.0828, + "step": 4546 + }, + { + "epoch": 0.08311550624234558, + "grad_norm": 5.959070382662814, + "learning_rate": 9.926249621323325e-06, + "loss": 17.1554, + "step": 4547 + }, + { + "epoch": 0.08313378543879212, + "grad_norm": 8.26889967906215, + "learning_rate": 9.926198958735505e-06, + "loss": 18.0522, + "step": 4548 + }, + { + "epoch": 0.08315206463523864, + "grad_norm": 7.152983459665711, + "learning_rate": 9.926148278881787e-06, + "loss": 17.669, + "step": 4549 + }, + { + "epoch": 0.08317034383168516, + "grad_norm": 8.371490963724002, + "learning_rate": 9.926097581762345e-06, + "loss": 18.0951, + "step": 4550 + }, + { + "epoch": 0.08318862302813168, + "grad_norm": 6.357697406914699, + "learning_rate": 9.92604686737736e-06, + "loss": 17.2828, + "step": 4551 + }, + { + "epoch": 0.0832069022245782, + "grad_norm": 7.768501300521136, + "learning_rate": 9.925996135727008e-06, + "loss": 17.9727, + "step": 4552 + }, + { + "epoch": 0.08322518142102474, + "grad_norm": 9.06060336298556, + "learning_rate": 9.925945386811465e-06, + "loss": 18.6941, + "step": 4553 + }, + { + "epoch": 0.08324346061747126, + "grad_norm": 7.674919283870002, + "learning_rate": 9.925894620630913e-06, + "loss": 17.8699, + "step": 4554 + }, + { + "epoch": 0.08326173981391778, + "grad_norm": 7.289462358372745, + "learning_rate": 9.925843837185527e-06, + "loss": 17.9227, + "step": 4555 + }, + { + "epoch": 0.0832800190103643, + "grad_norm": 7.090399647235003, + "learning_rate": 9.925793036475487e-06, + "loss": 17.5483, + "step": 4556 + }, + { + "epoch": 0.08329829820681083, + "grad_norm": 7.85372437426703, + "learning_rate": 9.925742218500968e-06, + "loss": 18.0304, + "step": 4557 + }, + { + "epoch": 0.08331657740325735, + "grad_norm": 7.387644049768619, + "learning_rate": 9.92569138326215e-06, + "loss": 17.8261, + "step": 4558 + }, + { + "epoch": 0.08333485659970388, + "grad_norm": 8.319402741482605, + "learning_rate": 9.925640530759213e-06, + "loss": 18.2823, + "step": 4559 + }, + { + "epoch": 0.0833531357961504, + "grad_norm": 7.572417281552823, + "learning_rate": 9.925589660992332e-06, + "loss": 17.8767, + "step": 4560 + }, + { + "epoch": 0.08337141499259693, + "grad_norm": 8.27022005659377, + "learning_rate": 9.925538773961687e-06, + "loss": 18.3284, + "step": 4561 + }, + { + "epoch": 0.08338969418904345, + "grad_norm": 7.35226779602324, + "learning_rate": 9.925487869667456e-06, + "loss": 17.6777, + "step": 4562 + }, + { + "epoch": 0.08340797338548997, + "grad_norm": 8.234002551773756, + "learning_rate": 9.925436948109817e-06, + "loss": 17.6783, + "step": 4563 + }, + { + "epoch": 0.08342625258193649, + "grad_norm": 8.303573018991921, + "learning_rate": 9.92538600928895e-06, + "loss": 18.1547, + "step": 4564 + }, + { + "epoch": 0.08344453177838303, + "grad_norm": 7.785953917779978, + "learning_rate": 9.925335053205032e-06, + "loss": 18.1239, + "step": 4565 + }, + { + "epoch": 0.08346281097482955, + "grad_norm": 6.477639244898419, + "learning_rate": 9.925284079858242e-06, + "loss": 17.4685, + "step": 4566 + }, + { + "epoch": 0.08348109017127607, + "grad_norm": 7.880983959901846, + "learning_rate": 9.925233089248758e-06, + "loss": 17.9942, + "step": 4567 + }, + { + "epoch": 0.08349936936772259, + "grad_norm": 7.570587483189525, + "learning_rate": 9.92518208137676e-06, + "loss": 17.9259, + "step": 4568 + }, + { + "epoch": 0.08351764856416911, + "grad_norm": 6.457233257151615, + "learning_rate": 9.925131056242426e-06, + "loss": 17.7171, + "step": 4569 + }, + { + "epoch": 0.08353592776061565, + "grad_norm": 5.575333496408933, + "learning_rate": 9.925080013845935e-06, + "loss": 17.086, + "step": 4570 + }, + { + "epoch": 0.08355420695706217, + "grad_norm": 6.370758261416243, + "learning_rate": 9.925028954187466e-06, + "loss": 17.4101, + "step": 4571 + }, + { + "epoch": 0.0835724861535087, + "grad_norm": 7.532208336609889, + "learning_rate": 9.924977877267198e-06, + "loss": 17.9726, + "step": 4572 + }, + { + "epoch": 0.08359076534995522, + "grad_norm": 7.79379453646087, + "learning_rate": 9.924926783085309e-06, + "loss": 17.7622, + "step": 4573 + }, + { + "epoch": 0.08360904454640174, + "grad_norm": 7.349686634113788, + "learning_rate": 9.92487567164198e-06, + "loss": 17.5063, + "step": 4574 + }, + { + "epoch": 0.08362732374284826, + "grad_norm": 7.145670600779787, + "learning_rate": 9.92482454293739e-06, + "loss": 17.713, + "step": 4575 + }, + { + "epoch": 0.0836456029392948, + "grad_norm": 7.5937590159500505, + "learning_rate": 9.924773396971716e-06, + "loss": 18.2291, + "step": 4576 + }, + { + "epoch": 0.08366388213574132, + "grad_norm": 6.435686012792753, + "learning_rate": 9.924722233745139e-06, + "loss": 17.3644, + "step": 4577 + }, + { + "epoch": 0.08368216133218784, + "grad_norm": 6.725851490081413, + "learning_rate": 9.924671053257838e-06, + "loss": 17.9066, + "step": 4578 + }, + { + "epoch": 0.08370044052863436, + "grad_norm": 8.07687328527373, + "learning_rate": 9.924619855509992e-06, + "loss": 18.215, + "step": 4579 + }, + { + "epoch": 0.08371871972508088, + "grad_norm": 7.684491657654788, + "learning_rate": 9.92456864050178e-06, + "loss": 18.1529, + "step": 4580 + }, + { + "epoch": 0.0837369989215274, + "grad_norm": 6.807024820501327, + "learning_rate": 9.924517408233385e-06, + "loss": 17.4883, + "step": 4581 + }, + { + "epoch": 0.08375527811797394, + "grad_norm": 7.203634154142472, + "learning_rate": 9.92446615870498e-06, + "loss": 18.0135, + "step": 4582 + }, + { + "epoch": 0.08377355731442046, + "grad_norm": 6.9425479112905855, + "learning_rate": 9.924414891916752e-06, + "loss": 17.6753, + "step": 4583 + }, + { + "epoch": 0.08379183651086698, + "grad_norm": 10.335705400356025, + "learning_rate": 9.924363607868875e-06, + "loss": 18.7902, + "step": 4584 + }, + { + "epoch": 0.0838101157073135, + "grad_norm": 6.551381299515295, + "learning_rate": 9.92431230656153e-06, + "loss": 17.2856, + "step": 4585 + }, + { + "epoch": 0.08382839490376003, + "grad_norm": 6.193330792540202, + "learning_rate": 9.9242609879949e-06, + "loss": 17.3872, + "step": 4586 + }, + { + "epoch": 0.08384667410020656, + "grad_norm": 7.093456749267061, + "learning_rate": 9.92420965216916e-06, + "loss": 17.6227, + "step": 4587 + }, + { + "epoch": 0.08386495329665308, + "grad_norm": 6.386760426569066, + "learning_rate": 9.924158299084497e-06, + "loss": 17.5053, + "step": 4588 + }, + { + "epoch": 0.0838832324930996, + "grad_norm": 7.250432946564252, + "learning_rate": 9.924106928741081e-06, + "loss": 17.8459, + "step": 4589 + }, + { + "epoch": 0.08390151168954613, + "grad_norm": 8.420710002988253, + "learning_rate": 9.924055541139101e-06, + "loss": 18.6423, + "step": 4590 + }, + { + "epoch": 0.08391979088599265, + "grad_norm": 7.091370597423831, + "learning_rate": 9.924004136278734e-06, + "loss": 17.7346, + "step": 4591 + }, + { + "epoch": 0.08393807008243917, + "grad_norm": 7.407183416785795, + "learning_rate": 9.923952714160158e-06, + "loss": 17.9074, + "step": 4592 + }, + { + "epoch": 0.0839563492788857, + "grad_norm": 6.229878431683514, + "learning_rate": 9.923901274783556e-06, + "loss": 17.4906, + "step": 4593 + }, + { + "epoch": 0.08397462847533223, + "grad_norm": 6.24420542102906, + "learning_rate": 9.923849818149106e-06, + "loss": 17.3914, + "step": 4594 + }, + { + "epoch": 0.08399290767177875, + "grad_norm": 7.694442797149234, + "learning_rate": 9.92379834425699e-06, + "loss": 17.9646, + "step": 4595 + }, + { + "epoch": 0.08401118686822527, + "grad_norm": 7.534197946991661, + "learning_rate": 9.923746853107389e-06, + "loss": 17.7066, + "step": 4596 + }, + { + "epoch": 0.08402946606467179, + "grad_norm": 7.5512725979457676, + "learning_rate": 9.92369534470048e-06, + "loss": 17.7111, + "step": 4597 + }, + { + "epoch": 0.08404774526111831, + "grad_norm": 6.850704097721616, + "learning_rate": 9.92364381903645e-06, + "loss": 17.7127, + "step": 4598 + }, + { + "epoch": 0.08406602445756485, + "grad_norm": 7.458877663742378, + "learning_rate": 9.923592276115474e-06, + "loss": 17.9754, + "step": 4599 + }, + { + "epoch": 0.08408430365401137, + "grad_norm": 7.719899131278976, + "learning_rate": 9.923540715937734e-06, + "loss": 17.8802, + "step": 4600 + }, + { + "epoch": 0.0841025828504579, + "grad_norm": 7.38570962640599, + "learning_rate": 9.92348913850341e-06, + "loss": 17.7526, + "step": 4601 + }, + { + "epoch": 0.08412086204690442, + "grad_norm": 7.310302922438322, + "learning_rate": 9.923437543812686e-06, + "loss": 18.011, + "step": 4602 + }, + { + "epoch": 0.08413914124335094, + "grad_norm": 7.199501970253766, + "learning_rate": 9.92338593186574e-06, + "loss": 17.6991, + "step": 4603 + }, + { + "epoch": 0.08415742043979747, + "grad_norm": 7.660553506333359, + "learning_rate": 9.923334302662753e-06, + "loss": 18.1197, + "step": 4604 + }, + { + "epoch": 0.084175699636244, + "grad_norm": 7.4994971748351285, + "learning_rate": 9.923282656203906e-06, + "loss": 17.9303, + "step": 4605 + }, + { + "epoch": 0.08419397883269052, + "grad_norm": 7.022139786012747, + "learning_rate": 9.92323099248938e-06, + "loss": 17.6392, + "step": 4606 + }, + { + "epoch": 0.08421225802913704, + "grad_norm": 6.842919258651133, + "learning_rate": 9.92317931151936e-06, + "loss": 17.9257, + "step": 4607 + }, + { + "epoch": 0.08423053722558356, + "grad_norm": 6.455226973441174, + "learning_rate": 9.923127613294021e-06, + "loss": 17.3945, + "step": 4608 + }, + { + "epoch": 0.08424881642203008, + "grad_norm": 7.221120648143459, + "learning_rate": 9.923075897813548e-06, + "loss": 17.7731, + "step": 4609 + }, + { + "epoch": 0.08426709561847662, + "grad_norm": 7.568262938606192, + "learning_rate": 9.92302416507812e-06, + "loss": 18.06, + "step": 4610 + }, + { + "epoch": 0.08428537481492314, + "grad_norm": 6.058389900149614, + "learning_rate": 9.92297241508792e-06, + "loss": 17.4748, + "step": 4611 + }, + { + "epoch": 0.08430365401136966, + "grad_norm": 7.583642015596835, + "learning_rate": 9.922920647843128e-06, + "loss": 17.8556, + "step": 4612 + }, + { + "epoch": 0.08432193320781618, + "grad_norm": 6.698944587393052, + "learning_rate": 9.922868863343929e-06, + "loss": 17.72, + "step": 4613 + }, + { + "epoch": 0.0843402124042627, + "grad_norm": 7.125979338649392, + "learning_rate": 9.922817061590499e-06, + "loss": 17.6758, + "step": 4614 + }, + { + "epoch": 0.08435849160070923, + "grad_norm": 8.228399202299315, + "learning_rate": 9.922765242583023e-06, + "loss": 18.3639, + "step": 4615 + }, + { + "epoch": 0.08437677079715576, + "grad_norm": 9.651616808629765, + "learning_rate": 9.922713406321684e-06, + "loss": 18.5995, + "step": 4616 + }, + { + "epoch": 0.08439504999360228, + "grad_norm": 6.877989458037605, + "learning_rate": 9.92266155280666e-06, + "loss": 17.4893, + "step": 4617 + }, + { + "epoch": 0.0844133291900488, + "grad_norm": 6.939083593763451, + "learning_rate": 9.922609682038134e-06, + "loss": 17.2929, + "step": 4618 + }, + { + "epoch": 0.08443160838649533, + "grad_norm": 8.447030077035594, + "learning_rate": 9.92255779401629e-06, + "loss": 18.0655, + "step": 4619 + }, + { + "epoch": 0.08444988758294185, + "grad_norm": 7.385349730703015, + "learning_rate": 9.922505888741306e-06, + "loss": 17.8132, + "step": 4620 + }, + { + "epoch": 0.08446816677938838, + "grad_norm": 7.611129849504548, + "learning_rate": 9.922453966213366e-06, + "loss": 17.9683, + "step": 4621 + }, + { + "epoch": 0.0844864459758349, + "grad_norm": 6.948264508071709, + "learning_rate": 9.922402026432653e-06, + "loss": 17.9001, + "step": 4622 + }, + { + "epoch": 0.08450472517228143, + "grad_norm": 7.269876331297846, + "learning_rate": 9.922350069399349e-06, + "loss": 17.7451, + "step": 4623 + }, + { + "epoch": 0.08452300436872795, + "grad_norm": 8.361984673894524, + "learning_rate": 9.922298095113634e-06, + "loss": 17.8335, + "step": 4624 + }, + { + "epoch": 0.08454128356517447, + "grad_norm": 6.842570747235827, + "learning_rate": 9.922246103575692e-06, + "loss": 17.7474, + "step": 4625 + }, + { + "epoch": 0.084559562761621, + "grad_norm": 7.79425463051433, + "learning_rate": 9.922194094785704e-06, + "loss": 18.3362, + "step": 4626 + }, + { + "epoch": 0.08457784195806753, + "grad_norm": 7.305390359806515, + "learning_rate": 9.922142068743852e-06, + "loss": 17.7493, + "step": 4627 + }, + { + "epoch": 0.08459612115451405, + "grad_norm": 7.481217680342079, + "learning_rate": 9.92209002545032e-06, + "loss": 17.9787, + "step": 4628 + }, + { + "epoch": 0.08461440035096057, + "grad_norm": 7.037488075031782, + "learning_rate": 9.92203796490529e-06, + "loss": 17.7031, + "step": 4629 + }, + { + "epoch": 0.0846326795474071, + "grad_norm": 7.394315921256108, + "learning_rate": 9.921985887108944e-06, + "loss": 18.1961, + "step": 4630 + }, + { + "epoch": 0.08465095874385362, + "grad_norm": 7.456021612583716, + "learning_rate": 9.921933792061464e-06, + "loss": 17.7568, + "step": 4631 + }, + { + "epoch": 0.08466923794030014, + "grad_norm": 6.620630947578043, + "learning_rate": 9.921881679763033e-06, + "loss": 17.6499, + "step": 4632 + }, + { + "epoch": 0.08468751713674667, + "grad_norm": 6.172522661410363, + "learning_rate": 9.921829550213834e-06, + "loss": 17.5983, + "step": 4633 + }, + { + "epoch": 0.0847057963331932, + "grad_norm": 7.4829460278418996, + "learning_rate": 9.92177740341405e-06, + "loss": 18.0093, + "step": 4634 + }, + { + "epoch": 0.08472407552963972, + "grad_norm": 6.460635217610971, + "learning_rate": 9.921725239363862e-06, + "loss": 17.4151, + "step": 4635 + }, + { + "epoch": 0.08474235472608624, + "grad_norm": 7.598256342533212, + "learning_rate": 9.921673058063456e-06, + "loss": 17.6448, + "step": 4636 + }, + { + "epoch": 0.08476063392253276, + "grad_norm": 7.915591943281325, + "learning_rate": 9.921620859513012e-06, + "loss": 17.9776, + "step": 4637 + }, + { + "epoch": 0.0847789131189793, + "grad_norm": 6.605683142047098, + "learning_rate": 9.921568643712715e-06, + "loss": 17.6836, + "step": 4638 + }, + { + "epoch": 0.08479719231542582, + "grad_norm": 7.004548467830464, + "learning_rate": 9.921516410662745e-06, + "loss": 17.6931, + "step": 4639 + }, + { + "epoch": 0.08481547151187234, + "grad_norm": 7.748531232232952, + "learning_rate": 9.921464160363288e-06, + "loss": 18.0534, + "step": 4640 + }, + { + "epoch": 0.08483375070831886, + "grad_norm": 6.109900033572359, + "learning_rate": 9.921411892814527e-06, + "loss": 17.3033, + "step": 4641 + }, + { + "epoch": 0.08485202990476538, + "grad_norm": 6.663640655959265, + "learning_rate": 9.921359608016644e-06, + "loss": 17.5133, + "step": 4642 + }, + { + "epoch": 0.0848703091012119, + "grad_norm": 8.913487294427815, + "learning_rate": 9.921307305969822e-06, + "loss": 18.4524, + "step": 4643 + }, + { + "epoch": 0.08488858829765844, + "grad_norm": 8.592237863143653, + "learning_rate": 9.921254986674245e-06, + "loss": 18.2266, + "step": 4644 + }, + { + "epoch": 0.08490686749410496, + "grad_norm": 7.136635747172818, + "learning_rate": 9.921202650130098e-06, + "loss": 17.7482, + "step": 4645 + }, + { + "epoch": 0.08492514669055148, + "grad_norm": 9.632606101164654, + "learning_rate": 9.92115029633756e-06, + "loss": 19.086, + "step": 4646 + }, + { + "epoch": 0.084943425886998, + "grad_norm": 7.265895092754076, + "learning_rate": 9.921097925296819e-06, + "loss": 17.9276, + "step": 4647 + }, + { + "epoch": 0.08496170508344453, + "grad_norm": 7.297272946366155, + "learning_rate": 9.921045537008057e-06, + "loss": 17.635, + "step": 4648 + }, + { + "epoch": 0.08497998427989105, + "grad_norm": 6.623038852274819, + "learning_rate": 9.920993131471456e-06, + "loss": 17.5517, + "step": 4649 + }, + { + "epoch": 0.08499826347633758, + "grad_norm": 7.517246150877132, + "learning_rate": 9.920940708687201e-06, + "loss": 17.8812, + "step": 4650 + }, + { + "epoch": 0.0850165426727841, + "grad_norm": 7.6787574631385676, + "learning_rate": 9.920888268655477e-06, + "loss": 18.1996, + "step": 4651 + }, + { + "epoch": 0.08503482186923063, + "grad_norm": 9.662814240443105, + "learning_rate": 9.920835811376468e-06, + "loss": 18.6466, + "step": 4652 + }, + { + "epoch": 0.08505310106567715, + "grad_norm": 7.901212720931815, + "learning_rate": 9.920783336850353e-06, + "loss": 18.3602, + "step": 4653 + }, + { + "epoch": 0.08507138026212367, + "grad_norm": 6.018024588978739, + "learning_rate": 9.920730845077323e-06, + "loss": 17.4173, + "step": 4654 + }, + { + "epoch": 0.08508965945857021, + "grad_norm": 6.6539443475311835, + "learning_rate": 9.920678336057555e-06, + "loss": 17.5248, + "step": 4655 + }, + { + "epoch": 0.08510793865501673, + "grad_norm": 7.04117038173876, + "learning_rate": 9.920625809791237e-06, + "loss": 17.7653, + "step": 4656 + }, + { + "epoch": 0.08512621785146325, + "grad_norm": 6.725694182599704, + "learning_rate": 9.920573266278555e-06, + "loss": 17.6774, + "step": 4657 + }, + { + "epoch": 0.08514449704790977, + "grad_norm": 7.5109578583494585, + "learning_rate": 9.920520705519689e-06, + "loss": 18.0893, + "step": 4658 + }, + { + "epoch": 0.0851627762443563, + "grad_norm": 8.935053293856278, + "learning_rate": 9.920468127514827e-06, + "loss": 18.1475, + "step": 4659 + }, + { + "epoch": 0.08518105544080282, + "grad_norm": 6.195186641273887, + "learning_rate": 9.920415532264149e-06, + "loss": 17.4453, + "step": 4660 + }, + { + "epoch": 0.08519933463724935, + "grad_norm": 7.545760255505536, + "learning_rate": 9.920362919767841e-06, + "loss": 18.1785, + "step": 4661 + }, + { + "epoch": 0.08521761383369587, + "grad_norm": 6.711386547356334, + "learning_rate": 9.92031029002609e-06, + "loss": 17.3788, + "step": 4662 + }, + { + "epoch": 0.0852358930301424, + "grad_norm": 7.660182236696144, + "learning_rate": 9.920257643039076e-06, + "loss": 18.1192, + "step": 4663 + }, + { + "epoch": 0.08525417222658892, + "grad_norm": 7.581371210557854, + "learning_rate": 9.920204978806987e-06, + "loss": 18.0288, + "step": 4664 + }, + { + "epoch": 0.08527245142303544, + "grad_norm": 6.448097302083825, + "learning_rate": 9.920152297330009e-06, + "loss": 17.3281, + "step": 4665 + }, + { + "epoch": 0.08529073061948196, + "grad_norm": 8.203186645850211, + "learning_rate": 9.920099598608318e-06, + "loss": 18.4436, + "step": 4666 + }, + { + "epoch": 0.0853090098159285, + "grad_norm": 6.542271610026919, + "learning_rate": 9.920046882642111e-06, + "loss": 17.41, + "step": 4667 + }, + { + "epoch": 0.08532728901237502, + "grad_norm": 8.341157410091265, + "learning_rate": 9.919994149431564e-06, + "loss": 18.218, + "step": 4668 + }, + { + "epoch": 0.08534556820882154, + "grad_norm": 7.659982388627232, + "learning_rate": 9.919941398976864e-06, + "loss": 18.1263, + "step": 4669 + }, + { + "epoch": 0.08536384740526806, + "grad_norm": 7.88822957890354, + "learning_rate": 9.919888631278199e-06, + "loss": 18.1774, + "step": 4670 + }, + { + "epoch": 0.08538212660171458, + "grad_norm": 7.882926322531679, + "learning_rate": 9.919835846335748e-06, + "loss": 18.1639, + "step": 4671 + }, + { + "epoch": 0.08540040579816112, + "grad_norm": 6.636317951245712, + "learning_rate": 9.919783044149701e-06, + "loss": 17.3923, + "step": 4672 + }, + { + "epoch": 0.08541868499460764, + "grad_norm": 6.930030128289028, + "learning_rate": 9.919730224720241e-06, + "loss": 17.5655, + "step": 4673 + }, + { + "epoch": 0.08543696419105416, + "grad_norm": 7.387145584405131, + "learning_rate": 9.919677388047553e-06, + "loss": 18.1185, + "step": 4674 + }, + { + "epoch": 0.08545524338750068, + "grad_norm": 7.546342043076622, + "learning_rate": 9.919624534131824e-06, + "loss": 17.9868, + "step": 4675 + }, + { + "epoch": 0.0854735225839472, + "grad_norm": 6.770470122143234, + "learning_rate": 9.919571662973238e-06, + "loss": 17.5021, + "step": 4676 + }, + { + "epoch": 0.08549180178039373, + "grad_norm": 6.804411522980926, + "learning_rate": 9.919518774571979e-06, + "loss": 17.7389, + "step": 4677 + }, + { + "epoch": 0.08551008097684026, + "grad_norm": 8.267455277026194, + "learning_rate": 9.919465868928234e-06, + "loss": 18.3289, + "step": 4678 + }, + { + "epoch": 0.08552836017328679, + "grad_norm": 8.291586811682983, + "learning_rate": 9.919412946042186e-06, + "loss": 18.2858, + "step": 4679 + }, + { + "epoch": 0.08554663936973331, + "grad_norm": 7.4112105963364945, + "learning_rate": 9.919360005914026e-06, + "loss": 17.9025, + "step": 4680 + }, + { + "epoch": 0.08556491856617983, + "grad_norm": 6.3738661114120285, + "learning_rate": 9.919307048543933e-06, + "loss": 17.4316, + "step": 4681 + }, + { + "epoch": 0.08558319776262635, + "grad_norm": 6.371580144636659, + "learning_rate": 9.9192540739321e-06, + "loss": 17.38, + "step": 4682 + }, + { + "epoch": 0.08560147695907287, + "grad_norm": 7.31919105880218, + "learning_rate": 9.919201082078703e-06, + "loss": 17.9746, + "step": 4683 + }, + { + "epoch": 0.08561975615551941, + "grad_norm": 8.388661511336576, + "learning_rate": 9.919148072983936e-06, + "loss": 18.3804, + "step": 4684 + }, + { + "epoch": 0.08563803535196593, + "grad_norm": 8.085792428675228, + "learning_rate": 9.919095046647982e-06, + "loss": 18.1638, + "step": 4685 + }, + { + "epoch": 0.08565631454841245, + "grad_norm": 5.777711832221259, + "learning_rate": 9.919042003071026e-06, + "loss": 16.9436, + "step": 4686 + }, + { + "epoch": 0.08567459374485897, + "grad_norm": 6.629114929315756, + "learning_rate": 9.918988942253254e-06, + "loss": 17.4534, + "step": 4687 + }, + { + "epoch": 0.0856928729413055, + "grad_norm": 6.559108221099402, + "learning_rate": 9.918935864194855e-06, + "loss": 17.2541, + "step": 4688 + }, + { + "epoch": 0.08571115213775203, + "grad_norm": 6.288491885844113, + "learning_rate": 9.91888276889601e-06, + "loss": 17.3603, + "step": 4689 + }, + { + "epoch": 0.08572943133419855, + "grad_norm": 7.1051717030267465, + "learning_rate": 9.918829656356908e-06, + "loss": 17.7719, + "step": 4690 + }, + { + "epoch": 0.08574771053064507, + "grad_norm": 8.21598696323983, + "learning_rate": 9.918776526577735e-06, + "loss": 18.1435, + "step": 4691 + }, + { + "epoch": 0.0857659897270916, + "grad_norm": 6.7548119803895865, + "learning_rate": 9.918723379558679e-06, + "loss": 17.4835, + "step": 4692 + }, + { + "epoch": 0.08578426892353812, + "grad_norm": 10.160157657831514, + "learning_rate": 9.918670215299924e-06, + "loss": 18.7581, + "step": 4693 + }, + { + "epoch": 0.08580254811998464, + "grad_norm": 6.750081472867717, + "learning_rate": 9.918617033801654e-06, + "loss": 17.4776, + "step": 4694 + }, + { + "epoch": 0.08582082731643118, + "grad_norm": 6.571634199227151, + "learning_rate": 9.91856383506406e-06, + "loss": 17.4038, + "step": 4695 + }, + { + "epoch": 0.0858391065128777, + "grad_norm": 6.569445575180033, + "learning_rate": 9.918510619087327e-06, + "loss": 17.3845, + "step": 4696 + }, + { + "epoch": 0.08585738570932422, + "grad_norm": 8.874425126433062, + "learning_rate": 9.91845738587164e-06, + "loss": 18.4588, + "step": 4697 + }, + { + "epoch": 0.08587566490577074, + "grad_norm": 7.072914521496579, + "learning_rate": 9.918404135417187e-06, + "loss": 17.528, + "step": 4698 + }, + { + "epoch": 0.08589394410221726, + "grad_norm": 8.364495270293883, + "learning_rate": 9.918350867724156e-06, + "loss": 18.5498, + "step": 4699 + }, + { + "epoch": 0.08591222329866378, + "grad_norm": 7.461258301437727, + "learning_rate": 9.91829758279273e-06, + "loss": 17.431, + "step": 4700 + }, + { + "epoch": 0.08593050249511032, + "grad_norm": 7.5937863251442375, + "learning_rate": 9.9182442806231e-06, + "loss": 17.7866, + "step": 4701 + }, + { + "epoch": 0.08594878169155684, + "grad_norm": 8.314622980150814, + "learning_rate": 9.918190961215449e-06, + "loss": 18.5394, + "step": 4702 + }, + { + "epoch": 0.08596706088800336, + "grad_norm": 7.14383782736478, + "learning_rate": 9.918137624569964e-06, + "loss": 17.626, + "step": 4703 + }, + { + "epoch": 0.08598534008444988, + "grad_norm": 8.670159331909955, + "learning_rate": 9.918084270686836e-06, + "loss": 18.3442, + "step": 4704 + }, + { + "epoch": 0.0860036192808964, + "grad_norm": 6.101264289550609, + "learning_rate": 9.918030899566247e-06, + "loss": 17.3995, + "step": 4705 + }, + { + "epoch": 0.08602189847734294, + "grad_norm": 6.313456545793547, + "learning_rate": 9.917977511208388e-06, + "loss": 17.3159, + "step": 4706 + }, + { + "epoch": 0.08604017767378946, + "grad_norm": 6.812465386859783, + "learning_rate": 9.917924105613444e-06, + "loss": 17.5959, + "step": 4707 + }, + { + "epoch": 0.08605845687023599, + "grad_norm": 7.838779619179673, + "learning_rate": 9.917870682781604e-06, + "loss": 17.8477, + "step": 4708 + }, + { + "epoch": 0.08607673606668251, + "grad_norm": 6.076232743193538, + "learning_rate": 9.917817242713052e-06, + "loss": 17.3116, + "step": 4709 + }, + { + "epoch": 0.08609501526312903, + "grad_norm": 7.855771540727316, + "learning_rate": 9.917763785407979e-06, + "loss": 17.781, + "step": 4710 + }, + { + "epoch": 0.08611329445957555, + "grad_norm": 8.143194125428792, + "learning_rate": 9.917710310866571e-06, + "loss": 18.1333, + "step": 4711 + }, + { + "epoch": 0.08613157365602209, + "grad_norm": 7.099837426909703, + "learning_rate": 9.917656819089013e-06, + "loss": 17.6271, + "step": 4712 + }, + { + "epoch": 0.08614985285246861, + "grad_norm": 8.042551395431243, + "learning_rate": 9.917603310075497e-06, + "loss": 18.2618, + "step": 4713 + }, + { + "epoch": 0.08616813204891513, + "grad_norm": 8.082796459586074, + "learning_rate": 9.917549783826205e-06, + "loss": 17.8096, + "step": 4714 + }, + { + "epoch": 0.08618641124536165, + "grad_norm": 6.055973030126549, + "learning_rate": 9.917496240341332e-06, + "loss": 17.377, + "step": 4715 + }, + { + "epoch": 0.08620469044180817, + "grad_norm": 7.261066096383178, + "learning_rate": 9.917442679621057e-06, + "loss": 17.9038, + "step": 4716 + }, + { + "epoch": 0.0862229696382547, + "grad_norm": 7.210584710222747, + "learning_rate": 9.917389101665574e-06, + "loss": 17.4521, + "step": 4717 + }, + { + "epoch": 0.08624124883470123, + "grad_norm": 7.603368845283619, + "learning_rate": 9.91733550647507e-06, + "loss": 17.5343, + "step": 4718 + }, + { + "epoch": 0.08625952803114775, + "grad_norm": 6.967903521521943, + "learning_rate": 9.91728189404973e-06, + "loss": 17.827, + "step": 4719 + }, + { + "epoch": 0.08627780722759427, + "grad_norm": 7.427687752088161, + "learning_rate": 9.917228264389744e-06, + "loss": 18.0324, + "step": 4720 + }, + { + "epoch": 0.0862960864240408, + "grad_norm": 6.799035386057385, + "learning_rate": 9.9171746174953e-06, + "loss": 17.6637, + "step": 4721 + }, + { + "epoch": 0.08631436562048732, + "grad_norm": 8.780168374615268, + "learning_rate": 9.917120953366585e-06, + "loss": 18.5605, + "step": 4722 + }, + { + "epoch": 0.08633264481693385, + "grad_norm": 7.326438217879566, + "learning_rate": 9.917067272003789e-06, + "loss": 17.785, + "step": 4723 + }, + { + "epoch": 0.08635092401338038, + "grad_norm": 6.52323903292793, + "learning_rate": 9.9170135734071e-06, + "loss": 17.4051, + "step": 4724 + }, + { + "epoch": 0.0863692032098269, + "grad_norm": 11.463202013983967, + "learning_rate": 9.916959857576703e-06, + "loss": 19.7018, + "step": 4725 + }, + { + "epoch": 0.08638748240627342, + "grad_norm": 8.491255105504997, + "learning_rate": 9.916906124512789e-06, + "loss": 18.0327, + "step": 4726 + }, + { + "epoch": 0.08640576160271994, + "grad_norm": 6.7005263588979265, + "learning_rate": 9.916852374215545e-06, + "loss": 17.5082, + "step": 4727 + }, + { + "epoch": 0.08642404079916646, + "grad_norm": 7.021664812018635, + "learning_rate": 9.91679860668516e-06, + "loss": 17.7016, + "step": 4728 + }, + { + "epoch": 0.086442319995613, + "grad_norm": 7.7617505641899225, + "learning_rate": 9.916744821921824e-06, + "loss": 18.0579, + "step": 4729 + }, + { + "epoch": 0.08646059919205952, + "grad_norm": 6.873010943974916, + "learning_rate": 9.916691019925723e-06, + "loss": 17.5972, + "step": 4730 + }, + { + "epoch": 0.08647887838850604, + "grad_norm": 6.5318766190601645, + "learning_rate": 9.916637200697047e-06, + "loss": 17.35, + "step": 4731 + }, + { + "epoch": 0.08649715758495256, + "grad_norm": 6.739330700608552, + "learning_rate": 9.916583364235985e-06, + "loss": 17.579, + "step": 4732 + }, + { + "epoch": 0.08651543678139909, + "grad_norm": 7.6456354628620975, + "learning_rate": 9.916529510542722e-06, + "loss": 18.0121, + "step": 4733 + }, + { + "epoch": 0.08653371597784561, + "grad_norm": 7.915953905378684, + "learning_rate": 9.916475639617454e-06, + "loss": 18.1782, + "step": 4734 + }, + { + "epoch": 0.08655199517429214, + "grad_norm": 7.95960802520631, + "learning_rate": 9.916421751460363e-06, + "loss": 17.9121, + "step": 4735 + }, + { + "epoch": 0.08657027437073866, + "grad_norm": 7.2095480650830615, + "learning_rate": 9.91636784607164e-06, + "loss": 17.8019, + "step": 4736 + }, + { + "epoch": 0.08658855356718519, + "grad_norm": 7.394382143574761, + "learning_rate": 9.916313923451475e-06, + "loss": 17.7964, + "step": 4737 + }, + { + "epoch": 0.08660683276363171, + "grad_norm": 8.569244455072077, + "learning_rate": 9.916259983600056e-06, + "loss": 18.3305, + "step": 4738 + }, + { + "epoch": 0.08662511196007823, + "grad_norm": 7.378206640272703, + "learning_rate": 9.916206026517572e-06, + "loss": 17.888, + "step": 4739 + }, + { + "epoch": 0.08664339115652477, + "grad_norm": 8.314015635870948, + "learning_rate": 9.916152052204215e-06, + "loss": 18.3373, + "step": 4740 + }, + { + "epoch": 0.08666167035297129, + "grad_norm": 5.5408637013582505, + "learning_rate": 9.916098060660169e-06, + "loss": 17.0137, + "step": 4741 + }, + { + "epoch": 0.08667994954941781, + "grad_norm": 6.79331002480117, + "learning_rate": 9.916044051885627e-06, + "loss": 17.6932, + "step": 4742 + }, + { + "epoch": 0.08669822874586433, + "grad_norm": 7.744255218459719, + "learning_rate": 9.915990025880777e-06, + "loss": 17.5147, + "step": 4743 + }, + { + "epoch": 0.08671650794231085, + "grad_norm": 6.530015917386727, + "learning_rate": 9.915935982645807e-06, + "loss": 17.4595, + "step": 4744 + }, + { + "epoch": 0.08673478713875737, + "grad_norm": 10.463815575400252, + "learning_rate": 9.915881922180911e-06, + "loss": 18.8998, + "step": 4745 + }, + { + "epoch": 0.08675306633520391, + "grad_norm": 7.452022149867731, + "learning_rate": 9.915827844486275e-06, + "loss": 18.0894, + "step": 4746 + }, + { + "epoch": 0.08677134553165043, + "grad_norm": 6.207915039399545, + "learning_rate": 9.915773749562086e-06, + "loss": 17.3329, + "step": 4747 + }, + { + "epoch": 0.08678962472809695, + "grad_norm": 7.126148411356433, + "learning_rate": 9.915719637408538e-06, + "loss": 17.8531, + "step": 4748 + }, + { + "epoch": 0.08680790392454348, + "grad_norm": 8.512808138400677, + "learning_rate": 9.91566550802582e-06, + "loss": 18.3305, + "step": 4749 + }, + { + "epoch": 0.08682618312099, + "grad_norm": 7.792977232722275, + "learning_rate": 9.91561136141412e-06, + "loss": 18.0873, + "step": 4750 + }, + { + "epoch": 0.08684446231743652, + "grad_norm": 6.565287707543799, + "learning_rate": 9.915557197573631e-06, + "loss": 17.6469, + "step": 4751 + }, + { + "epoch": 0.08686274151388305, + "grad_norm": 8.4981894700173, + "learning_rate": 9.915503016504539e-06, + "loss": 18.4064, + "step": 4752 + }, + { + "epoch": 0.08688102071032958, + "grad_norm": 6.643736434631383, + "learning_rate": 9.915448818207035e-06, + "loss": 17.5126, + "step": 4753 + }, + { + "epoch": 0.0868992999067761, + "grad_norm": 6.366278565981884, + "learning_rate": 9.91539460268131e-06, + "loss": 17.3545, + "step": 4754 + }, + { + "epoch": 0.08691757910322262, + "grad_norm": 6.333523566708861, + "learning_rate": 9.915340369927553e-06, + "loss": 17.4884, + "step": 4755 + }, + { + "epoch": 0.08693585829966914, + "grad_norm": 7.293604589598897, + "learning_rate": 9.915286119945955e-06, + "loss": 17.9825, + "step": 4756 + }, + { + "epoch": 0.08695413749611568, + "grad_norm": 6.0694639902843495, + "learning_rate": 9.915231852736707e-06, + "loss": 17.5076, + "step": 4757 + }, + { + "epoch": 0.0869724166925622, + "grad_norm": 6.599357207289291, + "learning_rate": 9.915177568299995e-06, + "loss": 17.5141, + "step": 4758 + }, + { + "epoch": 0.08699069588900872, + "grad_norm": 6.2672771878372595, + "learning_rate": 9.915123266636013e-06, + "loss": 17.5482, + "step": 4759 + }, + { + "epoch": 0.08700897508545524, + "grad_norm": 7.239215769355867, + "learning_rate": 9.915068947744953e-06, + "loss": 17.615, + "step": 4760 + }, + { + "epoch": 0.08702725428190176, + "grad_norm": 7.145135515623931, + "learning_rate": 9.915014611627e-06, + "loss": 17.8667, + "step": 4761 + }, + { + "epoch": 0.08704553347834829, + "grad_norm": 7.1987700167538975, + "learning_rate": 9.914960258282348e-06, + "loss": 17.9123, + "step": 4762 + }, + { + "epoch": 0.08706381267479482, + "grad_norm": 6.962138340360233, + "learning_rate": 9.914905887711187e-06, + "loss": 17.4851, + "step": 4763 + }, + { + "epoch": 0.08708209187124134, + "grad_norm": 6.647227321497944, + "learning_rate": 9.914851499913707e-06, + "loss": 17.4062, + "step": 4764 + }, + { + "epoch": 0.08710037106768787, + "grad_norm": 6.563736064089683, + "learning_rate": 9.9147970948901e-06, + "loss": 17.3302, + "step": 4765 + }, + { + "epoch": 0.08711865026413439, + "grad_norm": 8.042800811705872, + "learning_rate": 9.914742672640554e-06, + "loss": 18.0595, + "step": 4766 + }, + { + "epoch": 0.08713692946058091, + "grad_norm": 7.16015375190771, + "learning_rate": 9.914688233165262e-06, + "loss": 17.533, + "step": 4767 + }, + { + "epoch": 0.08715520865702743, + "grad_norm": 8.225380882641392, + "learning_rate": 9.914633776464415e-06, + "loss": 17.943, + "step": 4768 + }, + { + "epoch": 0.08717348785347397, + "grad_norm": 7.895018666215797, + "learning_rate": 9.914579302538203e-06, + "loss": 18.1833, + "step": 4769 + }, + { + "epoch": 0.08719176704992049, + "grad_norm": 8.523766580365836, + "learning_rate": 9.914524811386816e-06, + "loss": 18.2362, + "step": 4770 + }, + { + "epoch": 0.08721004624636701, + "grad_norm": 8.483258717599346, + "learning_rate": 9.914470303010447e-06, + "loss": 17.8643, + "step": 4771 + }, + { + "epoch": 0.08722832544281353, + "grad_norm": 7.704944567254081, + "learning_rate": 9.914415777409286e-06, + "loss": 18.081, + "step": 4772 + }, + { + "epoch": 0.08724660463926005, + "grad_norm": 7.574746124933421, + "learning_rate": 9.914361234583524e-06, + "loss": 17.983, + "step": 4773 + }, + { + "epoch": 0.08726488383570659, + "grad_norm": 6.792623067204275, + "learning_rate": 9.914306674533352e-06, + "loss": 17.5434, + "step": 4774 + }, + { + "epoch": 0.08728316303215311, + "grad_norm": 8.007057542549422, + "learning_rate": 9.914252097258964e-06, + "loss": 18.0533, + "step": 4775 + }, + { + "epoch": 0.08730144222859963, + "grad_norm": 8.021933047667138, + "learning_rate": 9.914197502760545e-06, + "loss": 18.1056, + "step": 4776 + }, + { + "epoch": 0.08731972142504615, + "grad_norm": 7.911876140707684, + "learning_rate": 9.914142891038291e-06, + "loss": 18.0806, + "step": 4777 + }, + { + "epoch": 0.08733800062149268, + "grad_norm": 7.287706255735107, + "learning_rate": 9.914088262092393e-06, + "loss": 17.8173, + "step": 4778 + }, + { + "epoch": 0.0873562798179392, + "grad_norm": 8.259683811458032, + "learning_rate": 9.914033615923044e-06, + "loss": 18.1553, + "step": 4779 + }, + { + "epoch": 0.08737455901438573, + "grad_norm": 8.524263562062561, + "learning_rate": 9.913978952530432e-06, + "loss": 18.0442, + "step": 4780 + }, + { + "epoch": 0.08739283821083225, + "grad_norm": 6.98562030134132, + "learning_rate": 9.91392427191475e-06, + "loss": 17.7846, + "step": 4781 + }, + { + "epoch": 0.08741111740727878, + "grad_norm": 7.762663006939629, + "learning_rate": 9.913869574076189e-06, + "loss": 18.0809, + "step": 4782 + }, + { + "epoch": 0.0874293966037253, + "grad_norm": 7.434293300734314, + "learning_rate": 9.913814859014943e-06, + "loss": 18.0468, + "step": 4783 + }, + { + "epoch": 0.08744767580017182, + "grad_norm": 8.033458339692666, + "learning_rate": 9.913760126731201e-06, + "loss": 18.3616, + "step": 4784 + }, + { + "epoch": 0.08746595499661834, + "grad_norm": 6.832057399654449, + "learning_rate": 9.913705377225157e-06, + "loss": 17.6117, + "step": 4785 + }, + { + "epoch": 0.08748423419306488, + "grad_norm": 6.430135623362784, + "learning_rate": 9.913650610497002e-06, + "loss": 17.6004, + "step": 4786 + }, + { + "epoch": 0.0875025133895114, + "grad_norm": 7.710422672894228, + "learning_rate": 9.91359582654693e-06, + "loss": 17.7769, + "step": 4787 + }, + { + "epoch": 0.08752079258595792, + "grad_norm": 6.469440017204641, + "learning_rate": 9.913541025375128e-06, + "loss": 17.4185, + "step": 4788 + }, + { + "epoch": 0.08753907178240444, + "grad_norm": 6.6826980747643105, + "learning_rate": 9.913486206981794e-06, + "loss": 17.6902, + "step": 4789 + }, + { + "epoch": 0.08755735097885096, + "grad_norm": 6.941843842252527, + "learning_rate": 9.913431371367115e-06, + "loss": 17.7312, + "step": 4790 + }, + { + "epoch": 0.0875756301752975, + "grad_norm": 7.046209410946652, + "learning_rate": 9.913376518531287e-06, + "loss": 17.7428, + "step": 4791 + }, + { + "epoch": 0.08759390937174402, + "grad_norm": 7.9225176278770055, + "learning_rate": 9.913321648474499e-06, + "loss": 18.0596, + "step": 4792 + }, + { + "epoch": 0.08761218856819054, + "grad_norm": 8.505779030910574, + "learning_rate": 9.913266761196945e-06, + "loss": 18.3003, + "step": 4793 + }, + { + "epoch": 0.08763046776463707, + "grad_norm": 7.72827539043185, + "learning_rate": 9.913211856698817e-06, + "loss": 18.2406, + "step": 4794 + }, + { + "epoch": 0.08764874696108359, + "grad_norm": 7.219869508615633, + "learning_rate": 9.913156934980309e-06, + "loss": 17.7627, + "step": 4795 + }, + { + "epoch": 0.08766702615753011, + "grad_norm": 8.101370575717883, + "learning_rate": 9.913101996041612e-06, + "loss": 18.3698, + "step": 4796 + }, + { + "epoch": 0.08768530535397664, + "grad_norm": 6.325286891409339, + "learning_rate": 9.913047039882919e-06, + "loss": 17.4737, + "step": 4797 + }, + { + "epoch": 0.08770358455042317, + "grad_norm": 7.524763115589298, + "learning_rate": 9.912992066504422e-06, + "loss": 17.5426, + "step": 4798 + }, + { + "epoch": 0.08772186374686969, + "grad_norm": 8.507126810654984, + "learning_rate": 9.912937075906315e-06, + "loss": 17.987, + "step": 4799 + }, + { + "epoch": 0.08774014294331621, + "grad_norm": 9.281446893128436, + "learning_rate": 9.91288206808879e-06, + "loss": 18.6362, + "step": 4800 + }, + { + "epoch": 0.08775842213976273, + "grad_norm": 8.169945727922903, + "learning_rate": 9.912827043052038e-06, + "loss": 18.0307, + "step": 4801 + }, + { + "epoch": 0.08777670133620925, + "grad_norm": 6.166911678655189, + "learning_rate": 9.912772000796253e-06, + "loss": 17.4021, + "step": 4802 + }, + { + "epoch": 0.08779498053265579, + "grad_norm": 7.306370136133016, + "learning_rate": 9.912716941321632e-06, + "loss": 18.0219, + "step": 4803 + }, + { + "epoch": 0.08781325972910231, + "grad_norm": 7.204213842910007, + "learning_rate": 9.912661864628362e-06, + "loss": 17.8936, + "step": 4804 + }, + { + "epoch": 0.08783153892554883, + "grad_norm": 6.472655632465372, + "learning_rate": 9.912606770716638e-06, + "loss": 17.6235, + "step": 4805 + }, + { + "epoch": 0.08784981812199535, + "grad_norm": 8.738770305828906, + "learning_rate": 9.912551659586655e-06, + "loss": 18.3371, + "step": 4806 + }, + { + "epoch": 0.08786809731844188, + "grad_norm": 9.395092312423438, + "learning_rate": 9.912496531238605e-06, + "loss": 18.6876, + "step": 4807 + }, + { + "epoch": 0.08788637651488841, + "grad_norm": 7.667475452363174, + "learning_rate": 9.912441385672679e-06, + "loss": 17.7632, + "step": 4808 + }, + { + "epoch": 0.08790465571133493, + "grad_norm": 6.495863960634322, + "learning_rate": 9.912386222889073e-06, + "loss": 17.563, + "step": 4809 + }, + { + "epoch": 0.08792293490778146, + "grad_norm": 7.017018985938606, + "learning_rate": 9.91233104288798e-06, + "loss": 17.7194, + "step": 4810 + }, + { + "epoch": 0.08794121410422798, + "grad_norm": 8.260612330153704, + "learning_rate": 9.912275845669592e-06, + "loss": 17.9905, + "step": 4811 + }, + { + "epoch": 0.0879594933006745, + "grad_norm": 6.019433249129388, + "learning_rate": 9.912220631234105e-06, + "loss": 17.2822, + "step": 4812 + }, + { + "epoch": 0.08797777249712102, + "grad_norm": 7.593604052343888, + "learning_rate": 9.91216539958171e-06, + "loss": 18.2624, + "step": 4813 + }, + { + "epoch": 0.08799605169356756, + "grad_norm": 7.076792624049634, + "learning_rate": 9.912110150712601e-06, + "loss": 17.86, + "step": 4814 + }, + { + "epoch": 0.08801433089001408, + "grad_norm": 8.710339145917724, + "learning_rate": 9.912054884626974e-06, + "loss": 17.9205, + "step": 4815 + }, + { + "epoch": 0.0880326100864606, + "grad_norm": 9.585008062178307, + "learning_rate": 9.91199960132502e-06, + "loss": 19.4597, + "step": 4816 + }, + { + "epoch": 0.08805088928290712, + "grad_norm": 5.69442629687945, + "learning_rate": 9.911944300806932e-06, + "loss": 17.1384, + "step": 4817 + }, + { + "epoch": 0.08806916847935364, + "grad_norm": 7.750747831978089, + "learning_rate": 9.911888983072908e-06, + "loss": 18.1424, + "step": 4818 + }, + { + "epoch": 0.08808744767580017, + "grad_norm": 8.011201559127326, + "learning_rate": 9.911833648123139e-06, + "loss": 18.2933, + "step": 4819 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 8.256293581137744, + "learning_rate": 9.911778295957817e-06, + "loss": 18.3694, + "step": 4820 + }, + { + "epoch": 0.08812400606869322, + "grad_norm": 6.96854580648387, + "learning_rate": 9.911722926577141e-06, + "loss": 17.6918, + "step": 4821 + }, + { + "epoch": 0.08814228526513974, + "grad_norm": 7.4813761705247, + "learning_rate": 9.9116675399813e-06, + "loss": 18.2377, + "step": 4822 + }, + { + "epoch": 0.08816056446158627, + "grad_norm": 6.499835442153083, + "learning_rate": 9.911612136170492e-06, + "loss": 17.6314, + "step": 4823 + }, + { + "epoch": 0.08817884365803279, + "grad_norm": 9.329402462013125, + "learning_rate": 9.91155671514491e-06, + "loss": 17.9491, + "step": 4824 + }, + { + "epoch": 0.08819712285447932, + "grad_norm": 7.134590714179622, + "learning_rate": 9.911501276904746e-06, + "loss": 17.8917, + "step": 4825 + }, + { + "epoch": 0.08821540205092585, + "grad_norm": 6.014950998411903, + "learning_rate": 9.911445821450199e-06, + "loss": 17.2828, + "step": 4826 + }, + { + "epoch": 0.08823368124737237, + "grad_norm": 8.36064771060975, + "learning_rate": 9.911390348781458e-06, + "loss": 18.8198, + "step": 4827 + }, + { + "epoch": 0.08825196044381889, + "grad_norm": 6.492310583631008, + "learning_rate": 9.911334858898721e-06, + "loss": 17.488, + "step": 4828 + }, + { + "epoch": 0.08827023964026541, + "grad_norm": 7.191727143888856, + "learning_rate": 9.911279351802182e-06, + "loss": 17.7162, + "step": 4829 + }, + { + "epoch": 0.08828851883671193, + "grad_norm": 7.072271640359286, + "learning_rate": 9.911223827492035e-06, + "loss": 17.6843, + "step": 4830 + }, + { + "epoch": 0.08830679803315847, + "grad_norm": 6.8461577876455655, + "learning_rate": 9.911168285968474e-06, + "loss": 17.5735, + "step": 4831 + }, + { + "epoch": 0.08832507722960499, + "grad_norm": 8.769748706606327, + "learning_rate": 9.911112727231694e-06, + "loss": 18.546, + "step": 4832 + }, + { + "epoch": 0.08834335642605151, + "grad_norm": 6.88971957797013, + "learning_rate": 9.911057151281892e-06, + "loss": 17.524, + "step": 4833 + }, + { + "epoch": 0.08836163562249803, + "grad_norm": 8.044215283794703, + "learning_rate": 9.911001558119258e-06, + "loss": 18.0864, + "step": 4834 + }, + { + "epoch": 0.08837991481894455, + "grad_norm": 7.21672603923443, + "learning_rate": 9.910945947743992e-06, + "loss": 17.9637, + "step": 4835 + }, + { + "epoch": 0.08839819401539108, + "grad_norm": 8.055828634294114, + "learning_rate": 9.910890320156285e-06, + "loss": 18.0511, + "step": 4836 + }, + { + "epoch": 0.08841647321183761, + "grad_norm": 7.4949272257948865, + "learning_rate": 9.910834675356336e-06, + "loss": 18.0165, + "step": 4837 + }, + { + "epoch": 0.08843475240828413, + "grad_norm": 9.754306941590027, + "learning_rate": 9.910779013344336e-06, + "loss": 18.4512, + "step": 4838 + }, + { + "epoch": 0.08845303160473066, + "grad_norm": 7.5429698147869235, + "learning_rate": 9.910723334120482e-06, + "loss": 17.7982, + "step": 4839 + }, + { + "epoch": 0.08847131080117718, + "grad_norm": 6.962705737769747, + "learning_rate": 9.910667637684968e-06, + "loss": 17.6604, + "step": 4840 + }, + { + "epoch": 0.0884895899976237, + "grad_norm": 6.602786660120906, + "learning_rate": 9.91061192403799e-06, + "loss": 17.5104, + "step": 4841 + }, + { + "epoch": 0.08850786919407024, + "grad_norm": 8.19722256545566, + "learning_rate": 9.910556193179744e-06, + "loss": 18.0938, + "step": 4842 + }, + { + "epoch": 0.08852614839051676, + "grad_norm": 6.613293074237941, + "learning_rate": 9.910500445110424e-06, + "loss": 17.3452, + "step": 4843 + }, + { + "epoch": 0.08854442758696328, + "grad_norm": 7.168804242379337, + "learning_rate": 9.910444679830227e-06, + "loss": 17.9111, + "step": 4844 + }, + { + "epoch": 0.0885627067834098, + "grad_norm": 5.794603819679539, + "learning_rate": 9.910388897339347e-06, + "loss": 17.1512, + "step": 4845 + }, + { + "epoch": 0.08858098597985632, + "grad_norm": 7.296220367182794, + "learning_rate": 9.91033309763798e-06, + "loss": 17.3953, + "step": 4846 + }, + { + "epoch": 0.08859926517630284, + "grad_norm": 6.924173425291186, + "learning_rate": 9.910277280726322e-06, + "loss": 17.7091, + "step": 4847 + }, + { + "epoch": 0.08861754437274938, + "grad_norm": 7.9899815344020135, + "learning_rate": 9.910221446604569e-06, + "loss": 18.2742, + "step": 4848 + }, + { + "epoch": 0.0886358235691959, + "grad_norm": 7.09281784394237, + "learning_rate": 9.910165595272913e-06, + "loss": 17.5841, + "step": 4849 + }, + { + "epoch": 0.08865410276564242, + "grad_norm": 6.455267811002289, + "learning_rate": 9.910109726731556e-06, + "loss": 17.4705, + "step": 4850 + }, + { + "epoch": 0.08867238196208894, + "grad_norm": 6.142848707058533, + "learning_rate": 9.910053840980688e-06, + "loss": 17.2928, + "step": 4851 + }, + { + "epoch": 0.08869066115853547, + "grad_norm": 7.6149171844813655, + "learning_rate": 9.90999793802051e-06, + "loss": 17.9281, + "step": 4852 + }, + { + "epoch": 0.08870894035498199, + "grad_norm": 6.384730092886496, + "learning_rate": 9.909942017851212e-06, + "loss": 17.5453, + "step": 4853 + }, + { + "epoch": 0.08872721955142852, + "grad_norm": 6.718959207291093, + "learning_rate": 9.909886080472997e-06, + "loss": 17.5757, + "step": 4854 + }, + { + "epoch": 0.08874549874787505, + "grad_norm": 5.89251746724107, + "learning_rate": 9.909830125886055e-06, + "loss": 17.2406, + "step": 4855 + }, + { + "epoch": 0.08876377794432157, + "grad_norm": 8.454824001567015, + "learning_rate": 9.909774154090584e-06, + "loss": 17.9786, + "step": 4856 + }, + { + "epoch": 0.08878205714076809, + "grad_norm": 6.098006609564664, + "learning_rate": 9.909718165086781e-06, + "loss": 17.1602, + "step": 4857 + }, + { + "epoch": 0.08880033633721461, + "grad_norm": 7.064665276083377, + "learning_rate": 9.909662158874845e-06, + "loss": 17.7347, + "step": 4858 + }, + { + "epoch": 0.08881861553366115, + "grad_norm": 6.158050025592081, + "learning_rate": 9.909606135454965e-06, + "loss": 17.4858, + "step": 4859 + }, + { + "epoch": 0.08883689473010767, + "grad_norm": 7.752886612954595, + "learning_rate": 9.909550094827343e-06, + "loss": 18.1125, + "step": 4860 + }, + { + "epoch": 0.08885517392655419, + "grad_norm": 8.195774319177469, + "learning_rate": 9.909494036992174e-06, + "loss": 18.2586, + "step": 4861 + }, + { + "epoch": 0.08887345312300071, + "grad_norm": 7.892604259341922, + "learning_rate": 9.909437961949655e-06, + "loss": 18.0855, + "step": 4862 + }, + { + "epoch": 0.08889173231944723, + "grad_norm": 7.542061459784922, + "learning_rate": 9.909381869699981e-06, + "loss": 18.0079, + "step": 4863 + }, + { + "epoch": 0.08891001151589376, + "grad_norm": 6.491660444918835, + "learning_rate": 9.90932576024335e-06, + "loss": 17.4933, + "step": 4864 + }, + { + "epoch": 0.08892829071234029, + "grad_norm": 8.473319499724814, + "learning_rate": 9.909269633579959e-06, + "loss": 18.123, + "step": 4865 + }, + { + "epoch": 0.08894656990878681, + "grad_norm": 6.984022328522888, + "learning_rate": 9.909213489710002e-06, + "loss": 17.9093, + "step": 4866 + }, + { + "epoch": 0.08896484910523333, + "grad_norm": 7.477287314187048, + "learning_rate": 9.909157328633678e-06, + "loss": 18.1638, + "step": 4867 + }, + { + "epoch": 0.08898312830167986, + "grad_norm": 6.4279946010692495, + "learning_rate": 9.909101150351186e-06, + "loss": 17.2532, + "step": 4868 + }, + { + "epoch": 0.08900140749812638, + "grad_norm": 6.248805498100486, + "learning_rate": 9.909044954862718e-06, + "loss": 17.4604, + "step": 4869 + }, + { + "epoch": 0.0890196866945729, + "grad_norm": 6.761360549226345, + "learning_rate": 9.908988742168474e-06, + "loss": 17.7246, + "step": 4870 + }, + { + "epoch": 0.08903796589101944, + "grad_norm": 7.103710072593917, + "learning_rate": 9.908932512268652e-06, + "loss": 17.7659, + "step": 4871 + }, + { + "epoch": 0.08905624508746596, + "grad_norm": 8.684011467300552, + "learning_rate": 9.908876265163446e-06, + "loss": 18.0777, + "step": 4872 + }, + { + "epoch": 0.08907452428391248, + "grad_norm": 6.801153374397417, + "learning_rate": 9.908820000853054e-06, + "loss": 17.4871, + "step": 4873 + }, + { + "epoch": 0.089092803480359, + "grad_norm": 6.996604349004501, + "learning_rate": 9.908763719337675e-06, + "loss": 17.7669, + "step": 4874 + }, + { + "epoch": 0.08911108267680552, + "grad_norm": 6.95254617222263, + "learning_rate": 9.908707420617505e-06, + "loss": 17.7054, + "step": 4875 + }, + { + "epoch": 0.08912936187325206, + "grad_norm": 5.833851020191169, + "learning_rate": 9.908651104692742e-06, + "loss": 17.3961, + "step": 4876 + }, + { + "epoch": 0.08914764106969858, + "grad_norm": 7.028065768944944, + "learning_rate": 9.908594771563583e-06, + "loss": 17.7726, + "step": 4877 + }, + { + "epoch": 0.0891659202661451, + "grad_norm": 5.877460860277679, + "learning_rate": 9.908538421230224e-06, + "loss": 17.1981, + "step": 4878 + }, + { + "epoch": 0.08918419946259162, + "grad_norm": 8.044787459744423, + "learning_rate": 9.908482053692864e-06, + "loss": 18.3129, + "step": 4879 + }, + { + "epoch": 0.08920247865903815, + "grad_norm": 5.537464004149115, + "learning_rate": 9.9084256689517e-06, + "loss": 16.954, + "step": 4880 + }, + { + "epoch": 0.08922075785548467, + "grad_norm": 6.421239681199296, + "learning_rate": 9.908369267006932e-06, + "loss": 17.4468, + "step": 4881 + }, + { + "epoch": 0.0892390370519312, + "grad_norm": 7.060424910594412, + "learning_rate": 9.908312847858753e-06, + "loss": 17.726, + "step": 4882 + }, + { + "epoch": 0.08925731624837772, + "grad_norm": 6.713220527294039, + "learning_rate": 9.908256411507363e-06, + "loss": 17.8644, + "step": 4883 + }, + { + "epoch": 0.08927559544482425, + "grad_norm": 7.28506267164789, + "learning_rate": 9.908199957952964e-06, + "loss": 17.9029, + "step": 4884 + }, + { + "epoch": 0.08929387464127077, + "grad_norm": 7.159823541978301, + "learning_rate": 9.908143487195747e-06, + "loss": 17.6019, + "step": 4885 + }, + { + "epoch": 0.08931215383771729, + "grad_norm": 7.448306011287324, + "learning_rate": 9.908086999235914e-06, + "loss": 17.7881, + "step": 4886 + }, + { + "epoch": 0.08933043303416381, + "grad_norm": 7.490854975147979, + "learning_rate": 9.908030494073662e-06, + "loss": 17.9684, + "step": 4887 + }, + { + "epoch": 0.08934871223061035, + "grad_norm": 7.010397319852804, + "learning_rate": 9.907973971709189e-06, + "loss": 17.8058, + "step": 4888 + }, + { + "epoch": 0.08936699142705687, + "grad_norm": 8.304541211986436, + "learning_rate": 9.907917432142693e-06, + "loss": 18.5242, + "step": 4889 + }, + { + "epoch": 0.08938527062350339, + "grad_norm": 6.9716640880108764, + "learning_rate": 9.907860875374373e-06, + "loss": 17.8519, + "step": 4890 + }, + { + "epoch": 0.08940354981994991, + "grad_norm": 7.075654539312293, + "learning_rate": 9.907804301404424e-06, + "loss": 17.5409, + "step": 4891 + }, + { + "epoch": 0.08942182901639643, + "grad_norm": 7.748924403105278, + "learning_rate": 9.907747710233049e-06, + "loss": 18.0154, + "step": 4892 + }, + { + "epoch": 0.08944010821284297, + "grad_norm": 8.908085631203509, + "learning_rate": 9.907691101860444e-06, + "loss": 18.3611, + "step": 4893 + }, + { + "epoch": 0.08945838740928949, + "grad_norm": 6.3659490031572465, + "learning_rate": 9.907634476286807e-06, + "loss": 17.4746, + "step": 4894 + }, + { + "epoch": 0.08947666660573601, + "grad_norm": 6.763138824222327, + "learning_rate": 9.907577833512338e-06, + "loss": 17.5551, + "step": 4895 + }, + { + "epoch": 0.08949494580218254, + "grad_norm": 6.652661034895993, + "learning_rate": 9.907521173537234e-06, + "loss": 17.5258, + "step": 4896 + }, + { + "epoch": 0.08951322499862906, + "grad_norm": 6.924303028863853, + "learning_rate": 9.907464496361694e-06, + "loss": 17.5145, + "step": 4897 + }, + { + "epoch": 0.08953150419507558, + "grad_norm": 7.8742076124462805, + "learning_rate": 9.907407801985916e-06, + "loss": 17.8574, + "step": 4898 + }, + { + "epoch": 0.08954978339152211, + "grad_norm": 7.02700500083737, + "learning_rate": 9.9073510904101e-06, + "loss": 17.9066, + "step": 4899 + }, + { + "epoch": 0.08956806258796864, + "grad_norm": 8.325333538927772, + "learning_rate": 9.907294361634445e-06, + "loss": 18.1865, + "step": 4900 + }, + { + "epoch": 0.08958634178441516, + "grad_norm": 7.366991577966159, + "learning_rate": 9.907237615659149e-06, + "loss": 17.8358, + "step": 4901 + }, + { + "epoch": 0.08960462098086168, + "grad_norm": 7.4898641121745815, + "learning_rate": 9.90718085248441e-06, + "loss": 18.0713, + "step": 4902 + }, + { + "epoch": 0.0896229001773082, + "grad_norm": 8.208024418378578, + "learning_rate": 9.907124072110428e-06, + "loss": 18.3265, + "step": 4903 + }, + { + "epoch": 0.08964117937375472, + "grad_norm": 8.606868574897145, + "learning_rate": 9.907067274537404e-06, + "loss": 18.0227, + "step": 4904 + }, + { + "epoch": 0.08965945857020126, + "grad_norm": 6.479156336338076, + "learning_rate": 9.907010459765534e-06, + "loss": 17.5345, + "step": 4905 + }, + { + "epoch": 0.08967773776664778, + "grad_norm": 7.722528397467274, + "learning_rate": 9.906953627795018e-06, + "loss": 17.8959, + "step": 4906 + }, + { + "epoch": 0.0896960169630943, + "grad_norm": 7.752109692661014, + "learning_rate": 9.906896778626054e-06, + "loss": 17.8554, + "step": 4907 + }, + { + "epoch": 0.08971429615954082, + "grad_norm": 6.47635829372542, + "learning_rate": 9.906839912258843e-06, + "loss": 17.3513, + "step": 4908 + }, + { + "epoch": 0.08973257535598735, + "grad_norm": 7.908803958084195, + "learning_rate": 9.906783028693585e-06, + "loss": 18.1248, + "step": 4909 + }, + { + "epoch": 0.08975085455243388, + "grad_norm": 7.819372055678718, + "learning_rate": 9.906726127930477e-06, + "loss": 18.1958, + "step": 4910 + }, + { + "epoch": 0.0897691337488804, + "grad_norm": 7.131489980549072, + "learning_rate": 9.906669209969721e-06, + "loss": 17.9138, + "step": 4911 + }, + { + "epoch": 0.08978741294532692, + "grad_norm": 9.451767695330918, + "learning_rate": 9.906612274811516e-06, + "loss": 18.75, + "step": 4912 + }, + { + "epoch": 0.08980569214177345, + "grad_norm": 8.053565778914212, + "learning_rate": 9.906555322456059e-06, + "loss": 18.0745, + "step": 4913 + }, + { + "epoch": 0.08982397133821997, + "grad_norm": 5.741438465348301, + "learning_rate": 9.90649835290355e-06, + "loss": 17.3908, + "step": 4914 + }, + { + "epoch": 0.08984225053466649, + "grad_norm": 7.753535854981617, + "learning_rate": 9.906441366154194e-06, + "loss": 18.1406, + "step": 4915 + }, + { + "epoch": 0.08986052973111303, + "grad_norm": 7.358112511580242, + "learning_rate": 9.906384362208183e-06, + "loss": 17.8397, + "step": 4916 + }, + { + "epoch": 0.08987880892755955, + "grad_norm": 7.459473289924685, + "learning_rate": 9.906327341065722e-06, + "loss": 18.116, + "step": 4917 + }, + { + "epoch": 0.08989708812400607, + "grad_norm": 5.350762216150077, + "learning_rate": 9.90627030272701e-06, + "loss": 17.2346, + "step": 4918 + }, + { + "epoch": 0.08991536732045259, + "grad_norm": 6.29069708317689, + "learning_rate": 9.906213247192246e-06, + "loss": 17.4428, + "step": 4919 + }, + { + "epoch": 0.08993364651689911, + "grad_norm": 7.490105325337597, + "learning_rate": 9.906156174461631e-06, + "loss": 18.0593, + "step": 4920 + }, + { + "epoch": 0.08995192571334563, + "grad_norm": 7.813982124712491, + "learning_rate": 9.906099084535362e-06, + "loss": 17.9516, + "step": 4921 + }, + { + "epoch": 0.08997020490979217, + "grad_norm": 5.685757307791417, + "learning_rate": 9.906041977413646e-06, + "loss": 17.2931, + "step": 4922 + }, + { + "epoch": 0.08998848410623869, + "grad_norm": 8.182059085116599, + "learning_rate": 9.905984853096676e-06, + "loss": 18.2598, + "step": 4923 + }, + { + "epoch": 0.09000676330268521, + "grad_norm": 7.562270683203775, + "learning_rate": 9.905927711584652e-06, + "loss": 17.8086, + "step": 4924 + }, + { + "epoch": 0.09002504249913174, + "grad_norm": 7.620203216056942, + "learning_rate": 9.90587055287778e-06, + "loss": 17.9578, + "step": 4925 + }, + { + "epoch": 0.09004332169557826, + "grad_norm": 7.301869554789333, + "learning_rate": 9.905813376976257e-06, + "loss": 17.7887, + "step": 4926 + }, + { + "epoch": 0.09006160089202479, + "grad_norm": 5.655472240721557, + "learning_rate": 9.905756183880284e-06, + "loss": 17.0582, + "step": 4927 + }, + { + "epoch": 0.09007988008847131, + "grad_norm": 6.698368435760824, + "learning_rate": 9.905698973590061e-06, + "loss": 17.7889, + "step": 4928 + }, + { + "epoch": 0.09009815928491784, + "grad_norm": 9.527879894553903, + "learning_rate": 9.90564174610579e-06, + "loss": 18.563, + "step": 4929 + }, + { + "epoch": 0.09011643848136436, + "grad_norm": 7.603525625587197, + "learning_rate": 9.905584501427669e-06, + "loss": 17.9311, + "step": 4930 + }, + { + "epoch": 0.09013471767781088, + "grad_norm": 8.398402960489525, + "learning_rate": 9.905527239555899e-06, + "loss": 18.5133, + "step": 4931 + }, + { + "epoch": 0.0901529968742574, + "grad_norm": 7.928934032367986, + "learning_rate": 9.905469960490683e-06, + "loss": 18.0871, + "step": 4932 + }, + { + "epoch": 0.09017127607070394, + "grad_norm": 8.12174092784868, + "learning_rate": 9.905412664232222e-06, + "loss": 18.1964, + "step": 4933 + }, + { + "epoch": 0.09018955526715046, + "grad_norm": 6.392108989691651, + "learning_rate": 9.905355350780713e-06, + "loss": 17.1349, + "step": 4934 + }, + { + "epoch": 0.09020783446359698, + "grad_norm": 6.851478822719325, + "learning_rate": 9.90529802013636e-06, + "loss": 17.6792, + "step": 4935 + }, + { + "epoch": 0.0902261136600435, + "grad_norm": 6.760071373340467, + "learning_rate": 9.905240672299363e-06, + "loss": 17.478, + "step": 4936 + }, + { + "epoch": 0.09024439285649002, + "grad_norm": 8.275567365192778, + "learning_rate": 9.905183307269922e-06, + "loss": 18.2972, + "step": 4937 + }, + { + "epoch": 0.09026267205293655, + "grad_norm": 6.7658162244016, + "learning_rate": 9.90512592504824e-06, + "loss": 17.516, + "step": 4938 + }, + { + "epoch": 0.09028095124938308, + "grad_norm": 6.968360606268438, + "learning_rate": 9.905068525634519e-06, + "loss": 17.7538, + "step": 4939 + }, + { + "epoch": 0.0902992304458296, + "grad_norm": 6.082148817294284, + "learning_rate": 9.905011109028957e-06, + "loss": 17.4172, + "step": 4940 + }, + { + "epoch": 0.09031750964227613, + "grad_norm": 7.345070877970718, + "learning_rate": 9.904953675231757e-06, + "loss": 17.7168, + "step": 4941 + }, + { + "epoch": 0.09033578883872265, + "grad_norm": 7.134130876369556, + "learning_rate": 9.90489622424312e-06, + "loss": 17.6517, + "step": 4942 + }, + { + "epoch": 0.09035406803516917, + "grad_norm": 7.403768346109211, + "learning_rate": 9.904838756063246e-06, + "loss": 18.0293, + "step": 4943 + }, + { + "epoch": 0.0903723472316157, + "grad_norm": 7.579206572533839, + "learning_rate": 9.904781270692338e-06, + "loss": 17.9277, + "step": 4944 + }, + { + "epoch": 0.09039062642806223, + "grad_norm": 7.01991912954351, + "learning_rate": 9.904723768130598e-06, + "loss": 17.7241, + "step": 4945 + }, + { + "epoch": 0.09040890562450875, + "grad_norm": 8.3797845230764, + "learning_rate": 9.904666248378228e-06, + "loss": 18.167, + "step": 4946 + }, + { + "epoch": 0.09042718482095527, + "grad_norm": 9.131412748192803, + "learning_rate": 9.904608711435426e-06, + "loss": 18.1724, + "step": 4947 + }, + { + "epoch": 0.09044546401740179, + "grad_norm": 7.08883566417059, + "learning_rate": 9.904551157302398e-06, + "loss": 17.7404, + "step": 4948 + }, + { + "epoch": 0.09046374321384831, + "grad_norm": 7.861559932400002, + "learning_rate": 9.904493585979343e-06, + "loss": 17.9231, + "step": 4949 + }, + { + "epoch": 0.09048202241029485, + "grad_norm": 6.353152324499593, + "learning_rate": 9.904435997466463e-06, + "loss": 17.2822, + "step": 4950 + }, + { + "epoch": 0.09050030160674137, + "grad_norm": 7.584299532570088, + "learning_rate": 9.904378391763962e-06, + "loss": 17.9455, + "step": 4951 + }, + { + "epoch": 0.09051858080318789, + "grad_norm": 6.9418694944928445, + "learning_rate": 9.90432076887204e-06, + "loss": 17.6901, + "step": 4952 + }, + { + "epoch": 0.09053685999963441, + "grad_norm": 8.779481141723359, + "learning_rate": 9.904263128790899e-06, + "loss": 18.6719, + "step": 4953 + }, + { + "epoch": 0.09055513919608094, + "grad_norm": 7.6466083260157145, + "learning_rate": 9.90420547152074e-06, + "loss": 17.713, + "step": 4954 + }, + { + "epoch": 0.09057341839252746, + "grad_norm": 6.944497802117786, + "learning_rate": 9.904147797061767e-06, + "loss": 17.5404, + "step": 4955 + }, + { + "epoch": 0.090591697588974, + "grad_norm": 6.788706594050242, + "learning_rate": 9.904090105414184e-06, + "loss": 17.6658, + "step": 4956 + }, + { + "epoch": 0.09060997678542052, + "grad_norm": 6.270923613989111, + "learning_rate": 9.904032396578188e-06, + "loss": 17.4603, + "step": 4957 + }, + { + "epoch": 0.09062825598186704, + "grad_norm": 6.404011945641866, + "learning_rate": 9.903974670553984e-06, + "loss": 17.3798, + "step": 4958 + }, + { + "epoch": 0.09064653517831356, + "grad_norm": 7.196345513098332, + "learning_rate": 9.903916927341776e-06, + "loss": 17.7213, + "step": 4959 + }, + { + "epoch": 0.09066481437476008, + "grad_norm": 8.873870347748749, + "learning_rate": 9.903859166941762e-06, + "loss": 18.2185, + "step": 4960 + }, + { + "epoch": 0.09068309357120662, + "grad_norm": 7.8470185747211065, + "learning_rate": 9.90380138935415e-06, + "loss": 17.9964, + "step": 4961 + }, + { + "epoch": 0.09070137276765314, + "grad_norm": 8.355801206994641, + "learning_rate": 9.903743594579139e-06, + "loss": 18.1677, + "step": 4962 + }, + { + "epoch": 0.09071965196409966, + "grad_norm": 6.500662505566888, + "learning_rate": 9.903685782616932e-06, + "loss": 17.3051, + "step": 4963 + }, + { + "epoch": 0.09073793116054618, + "grad_norm": 7.8126043232462985, + "learning_rate": 9.903627953467731e-06, + "loss": 18.1204, + "step": 4964 + }, + { + "epoch": 0.0907562103569927, + "grad_norm": 7.284690556818036, + "learning_rate": 9.90357010713174e-06, + "loss": 18.1257, + "step": 4965 + }, + { + "epoch": 0.09077448955343922, + "grad_norm": 8.89816870388913, + "learning_rate": 9.903512243609161e-06, + "loss": 18.6274, + "step": 4966 + }, + { + "epoch": 0.09079276874988576, + "grad_norm": 9.319042947402458, + "learning_rate": 9.903454362900197e-06, + "loss": 18.1239, + "step": 4967 + }, + { + "epoch": 0.09081104794633228, + "grad_norm": 8.13528093699929, + "learning_rate": 9.903396465005054e-06, + "loss": 18.4412, + "step": 4968 + }, + { + "epoch": 0.0908293271427788, + "grad_norm": 8.30774638527725, + "learning_rate": 9.903338549923928e-06, + "loss": 18.2453, + "step": 4969 + }, + { + "epoch": 0.09084760633922533, + "grad_norm": 7.597627094489424, + "learning_rate": 9.903280617657027e-06, + "loss": 18.0494, + "step": 4970 + }, + { + "epoch": 0.09086588553567185, + "grad_norm": 6.0707407297946645, + "learning_rate": 9.903222668204553e-06, + "loss": 17.3874, + "step": 4971 + }, + { + "epoch": 0.09088416473211837, + "grad_norm": 6.115617491789437, + "learning_rate": 9.90316470156671e-06, + "loss": 17.3557, + "step": 4972 + }, + { + "epoch": 0.0909024439285649, + "grad_norm": 8.01193067033986, + "learning_rate": 9.9031067177437e-06, + "loss": 18.2971, + "step": 4973 + }, + { + "epoch": 0.09092072312501143, + "grad_norm": 8.204679748834103, + "learning_rate": 9.903048716735725e-06, + "loss": 17.9572, + "step": 4974 + }, + { + "epoch": 0.09093900232145795, + "grad_norm": 7.497913081293649, + "learning_rate": 9.902990698542992e-06, + "loss": 17.9283, + "step": 4975 + }, + { + "epoch": 0.09095728151790447, + "grad_norm": 7.121495298556463, + "learning_rate": 9.9029326631657e-06, + "loss": 17.752, + "step": 4976 + }, + { + "epoch": 0.09097556071435099, + "grad_norm": 7.31486842562587, + "learning_rate": 9.902874610604054e-06, + "loss": 17.6785, + "step": 4977 + }, + { + "epoch": 0.09099383991079753, + "grad_norm": 9.291313903532426, + "learning_rate": 9.90281654085826e-06, + "loss": 18.8064, + "step": 4978 + }, + { + "epoch": 0.09101211910724405, + "grad_norm": 9.22651887095488, + "learning_rate": 9.902758453928519e-06, + "loss": 18.6168, + "step": 4979 + }, + { + "epoch": 0.09103039830369057, + "grad_norm": 7.6036357163146695, + "learning_rate": 9.902700349815035e-06, + "loss": 17.8308, + "step": 4980 + }, + { + "epoch": 0.09104867750013709, + "grad_norm": 8.618788747025125, + "learning_rate": 9.90264222851801e-06, + "loss": 18.4711, + "step": 4981 + }, + { + "epoch": 0.09106695669658361, + "grad_norm": 6.951221154408608, + "learning_rate": 9.902584090037651e-06, + "loss": 17.8209, + "step": 4982 + }, + { + "epoch": 0.09108523589303014, + "grad_norm": 7.433568318100594, + "learning_rate": 9.90252593437416e-06, + "loss": 17.7275, + "step": 4983 + }, + { + "epoch": 0.09110351508947667, + "grad_norm": 8.384901533021308, + "learning_rate": 9.902467761527741e-06, + "loss": 18.0449, + "step": 4984 + }, + { + "epoch": 0.0911217942859232, + "grad_norm": 9.209068538222029, + "learning_rate": 9.902409571498598e-06, + "loss": 18.3365, + "step": 4985 + }, + { + "epoch": 0.09114007348236972, + "grad_norm": 8.04366712785056, + "learning_rate": 9.902351364286935e-06, + "loss": 18.1736, + "step": 4986 + }, + { + "epoch": 0.09115835267881624, + "grad_norm": 7.321603522303677, + "learning_rate": 9.902293139892956e-06, + "loss": 17.9776, + "step": 4987 + }, + { + "epoch": 0.09117663187526276, + "grad_norm": 8.305108390363309, + "learning_rate": 9.902234898316863e-06, + "loss": 18.4951, + "step": 4988 + }, + { + "epoch": 0.09119491107170928, + "grad_norm": 8.580880097757682, + "learning_rate": 9.902176639558865e-06, + "loss": 18.4488, + "step": 4989 + }, + { + "epoch": 0.09121319026815582, + "grad_norm": 7.17768760344289, + "learning_rate": 9.902118363619163e-06, + "loss": 17.8056, + "step": 4990 + }, + { + "epoch": 0.09123146946460234, + "grad_norm": 7.56579698012164, + "learning_rate": 9.902060070497958e-06, + "loss": 17.8564, + "step": 4991 + }, + { + "epoch": 0.09124974866104886, + "grad_norm": 6.686694426761798, + "learning_rate": 9.90200176019546e-06, + "loss": 17.4764, + "step": 4992 + }, + { + "epoch": 0.09126802785749538, + "grad_norm": 7.136554403411247, + "learning_rate": 9.901943432711872e-06, + "loss": 17.6427, + "step": 4993 + }, + { + "epoch": 0.0912863070539419, + "grad_norm": 6.136446274441091, + "learning_rate": 9.901885088047398e-06, + "loss": 17.3511, + "step": 4994 + }, + { + "epoch": 0.09130458625038844, + "grad_norm": 6.57630498772527, + "learning_rate": 9.901826726202242e-06, + "loss": 17.8012, + "step": 4995 + }, + { + "epoch": 0.09132286544683496, + "grad_norm": 7.830314180937311, + "learning_rate": 9.901768347176607e-06, + "loss": 17.9419, + "step": 4996 + }, + { + "epoch": 0.09134114464328148, + "grad_norm": 6.2905804893550705, + "learning_rate": 9.9017099509707e-06, + "loss": 17.2592, + "step": 4997 + }, + { + "epoch": 0.091359423839728, + "grad_norm": 7.436038988566306, + "learning_rate": 9.901651537584725e-06, + "loss": 17.9945, + "step": 4998 + }, + { + "epoch": 0.09137770303617453, + "grad_norm": 7.705229230160873, + "learning_rate": 9.901593107018887e-06, + "loss": 18.1507, + "step": 4999 + }, + { + "epoch": 0.09139598223262105, + "grad_norm": 6.445126525326775, + "learning_rate": 9.901534659273391e-06, + "loss": 17.2738, + "step": 5000 + }, + { + "epoch": 0.09141426142906758, + "grad_norm": 8.461811534463543, + "learning_rate": 9.901476194348441e-06, + "loss": 18.2201, + "step": 5001 + }, + { + "epoch": 0.0914325406255141, + "grad_norm": 6.924860783049049, + "learning_rate": 9.901417712244242e-06, + "loss": 17.7411, + "step": 5002 + }, + { + "epoch": 0.09145081982196063, + "grad_norm": 5.649922596212064, + "learning_rate": 9.901359212960999e-06, + "loss": 17.3142, + "step": 5003 + }, + { + "epoch": 0.09146909901840715, + "grad_norm": 6.6697417774244485, + "learning_rate": 9.901300696498917e-06, + "loss": 17.4254, + "step": 5004 + }, + { + "epoch": 0.09148737821485367, + "grad_norm": 6.3284775144210075, + "learning_rate": 9.901242162858202e-06, + "loss": 17.2525, + "step": 5005 + }, + { + "epoch": 0.09150565741130019, + "grad_norm": 7.828468842486411, + "learning_rate": 9.901183612039058e-06, + "loss": 17.6558, + "step": 5006 + }, + { + "epoch": 0.09152393660774673, + "grad_norm": 6.945227607333281, + "learning_rate": 9.901125044041692e-06, + "loss": 17.6458, + "step": 5007 + }, + { + "epoch": 0.09154221580419325, + "grad_norm": 8.33917572690878, + "learning_rate": 9.901066458866306e-06, + "loss": 18.0863, + "step": 5008 + }, + { + "epoch": 0.09156049500063977, + "grad_norm": 8.694129911562369, + "learning_rate": 9.901007856513109e-06, + "loss": 18.4328, + "step": 5009 + }, + { + "epoch": 0.0915787741970863, + "grad_norm": 7.191391130756593, + "learning_rate": 9.900949236982305e-06, + "loss": 17.7385, + "step": 5010 + }, + { + "epoch": 0.09159705339353282, + "grad_norm": 7.075946211949113, + "learning_rate": 9.900890600274097e-06, + "loss": 17.6208, + "step": 5011 + }, + { + "epoch": 0.09161533258997935, + "grad_norm": 7.265462122920251, + "learning_rate": 9.900831946388696e-06, + "loss": 17.6656, + "step": 5012 + }, + { + "epoch": 0.09163361178642587, + "grad_norm": 7.713863902687523, + "learning_rate": 9.900773275326302e-06, + "loss": 17.8481, + "step": 5013 + }, + { + "epoch": 0.0916518909828724, + "grad_norm": 7.118312489912055, + "learning_rate": 9.900714587087123e-06, + "loss": 17.8214, + "step": 5014 + }, + { + "epoch": 0.09167017017931892, + "grad_norm": 8.160408961123743, + "learning_rate": 9.900655881671365e-06, + "loss": 18.3873, + "step": 5015 + }, + { + "epoch": 0.09168844937576544, + "grad_norm": 7.1922571340580905, + "learning_rate": 9.900597159079235e-06, + "loss": 17.6674, + "step": 5016 + }, + { + "epoch": 0.09170672857221196, + "grad_norm": 6.552480056514527, + "learning_rate": 9.900538419310935e-06, + "loss": 17.5884, + "step": 5017 + }, + { + "epoch": 0.0917250077686585, + "grad_norm": 7.554329320381714, + "learning_rate": 9.900479662366673e-06, + "loss": 18.0221, + "step": 5018 + }, + { + "epoch": 0.09174328696510502, + "grad_norm": 7.2563650475563515, + "learning_rate": 9.900420888246657e-06, + "loss": 17.7067, + "step": 5019 + }, + { + "epoch": 0.09176156616155154, + "grad_norm": 7.587595855844159, + "learning_rate": 9.90036209695109e-06, + "loss": 17.8569, + "step": 5020 + }, + { + "epoch": 0.09177984535799806, + "grad_norm": 7.588997794433961, + "learning_rate": 9.900303288480178e-06, + "loss": 17.9387, + "step": 5021 + }, + { + "epoch": 0.09179812455444458, + "grad_norm": 7.94984395924185, + "learning_rate": 9.90024446283413e-06, + "loss": 18.1619, + "step": 5022 + }, + { + "epoch": 0.0918164037508911, + "grad_norm": 7.614288176046099, + "learning_rate": 9.90018562001315e-06, + "loss": 17.8588, + "step": 5023 + }, + { + "epoch": 0.09183468294733764, + "grad_norm": 7.117966262212396, + "learning_rate": 9.900126760017444e-06, + "loss": 17.8255, + "step": 5024 + }, + { + "epoch": 0.09185296214378416, + "grad_norm": 7.546946708664759, + "learning_rate": 9.900067882847218e-06, + "loss": 17.7908, + "step": 5025 + }, + { + "epoch": 0.09187124134023068, + "grad_norm": 6.669129871761577, + "learning_rate": 9.900008988502681e-06, + "loss": 17.6401, + "step": 5026 + }, + { + "epoch": 0.0918895205366772, + "grad_norm": 7.45925356671338, + "learning_rate": 9.899950076984038e-06, + "loss": 17.9925, + "step": 5027 + }, + { + "epoch": 0.09190779973312373, + "grad_norm": 7.081006160114588, + "learning_rate": 9.899891148291493e-06, + "loss": 17.8191, + "step": 5028 + }, + { + "epoch": 0.09192607892957026, + "grad_norm": 8.581660837824266, + "learning_rate": 9.899832202425256e-06, + "loss": 18.2224, + "step": 5029 + }, + { + "epoch": 0.09194435812601678, + "grad_norm": 7.0700686828812405, + "learning_rate": 9.899773239385533e-06, + "loss": 17.6403, + "step": 5030 + }, + { + "epoch": 0.0919626373224633, + "grad_norm": 8.272419729877685, + "learning_rate": 9.899714259172528e-06, + "loss": 18.4661, + "step": 5031 + }, + { + "epoch": 0.09198091651890983, + "grad_norm": 6.328804256362053, + "learning_rate": 9.899655261786452e-06, + "loss": 17.404, + "step": 5032 + }, + { + "epoch": 0.09199919571535635, + "grad_norm": 6.251582485630017, + "learning_rate": 9.899596247227508e-06, + "loss": 16.9598, + "step": 5033 + }, + { + "epoch": 0.09201747491180287, + "grad_norm": 7.446638770898681, + "learning_rate": 9.899537215495905e-06, + "loss": 18.0199, + "step": 5034 + }, + { + "epoch": 0.0920357541082494, + "grad_norm": 6.4283726762518025, + "learning_rate": 9.899478166591849e-06, + "loss": 17.5608, + "step": 5035 + }, + { + "epoch": 0.09205403330469593, + "grad_norm": 6.0300426778732374, + "learning_rate": 9.899419100515547e-06, + "loss": 17.3951, + "step": 5036 + }, + { + "epoch": 0.09207231250114245, + "grad_norm": 8.184646622407854, + "learning_rate": 9.899360017267205e-06, + "loss": 17.9212, + "step": 5037 + }, + { + "epoch": 0.09209059169758897, + "grad_norm": 6.638060436107272, + "learning_rate": 9.899300916847034e-06, + "loss": 17.4981, + "step": 5038 + }, + { + "epoch": 0.0921088708940355, + "grad_norm": 5.517966476871986, + "learning_rate": 9.899241799255236e-06, + "loss": 17.0434, + "step": 5039 + }, + { + "epoch": 0.09212715009048202, + "grad_norm": 6.673691804201888, + "learning_rate": 9.899182664492022e-06, + "loss": 17.409, + "step": 5040 + }, + { + "epoch": 0.09214542928692855, + "grad_norm": 6.796683992349393, + "learning_rate": 9.899123512557598e-06, + "loss": 17.5109, + "step": 5041 + }, + { + "epoch": 0.09216370848337507, + "grad_norm": 8.356684608781904, + "learning_rate": 9.899064343452171e-06, + "loss": 18.1871, + "step": 5042 + }, + { + "epoch": 0.0921819876798216, + "grad_norm": 6.329480264267073, + "learning_rate": 9.899005157175949e-06, + "loss": 17.3431, + "step": 5043 + }, + { + "epoch": 0.09220026687626812, + "grad_norm": 6.529968145350565, + "learning_rate": 9.898945953729138e-06, + "loss": 17.5718, + "step": 5044 + }, + { + "epoch": 0.09221854607271464, + "grad_norm": 6.9738124871435465, + "learning_rate": 9.898886733111948e-06, + "loss": 17.7357, + "step": 5045 + }, + { + "epoch": 0.09223682526916117, + "grad_norm": 7.533163221333678, + "learning_rate": 9.898827495324582e-06, + "loss": 17.8633, + "step": 5046 + }, + { + "epoch": 0.0922551044656077, + "grad_norm": 7.957121859668378, + "learning_rate": 9.898768240367254e-06, + "loss": 17.8633, + "step": 5047 + }, + { + "epoch": 0.09227338366205422, + "grad_norm": 7.630567599799659, + "learning_rate": 9.898708968240168e-06, + "loss": 17.6692, + "step": 5048 + }, + { + "epoch": 0.09229166285850074, + "grad_norm": 7.732342182942792, + "learning_rate": 9.89864967894353e-06, + "loss": 17.956, + "step": 5049 + }, + { + "epoch": 0.09230994205494726, + "grad_norm": 9.629260157681703, + "learning_rate": 9.898590372477553e-06, + "loss": 18.385, + "step": 5050 + }, + { + "epoch": 0.09232822125139378, + "grad_norm": 6.843438829330014, + "learning_rate": 9.898531048842439e-06, + "loss": 17.8654, + "step": 5051 + }, + { + "epoch": 0.09234650044784032, + "grad_norm": 6.593065429758437, + "learning_rate": 9.898471708038399e-06, + "loss": 17.6917, + "step": 5052 + }, + { + "epoch": 0.09236477964428684, + "grad_norm": 7.098351330255455, + "learning_rate": 9.898412350065643e-06, + "loss": 17.6001, + "step": 5053 + }, + { + "epoch": 0.09238305884073336, + "grad_norm": 6.341244681315394, + "learning_rate": 9.898352974924375e-06, + "loss": 17.5316, + "step": 5054 + }, + { + "epoch": 0.09240133803717988, + "grad_norm": 7.729340030369656, + "learning_rate": 9.898293582614807e-06, + "loss": 18.3122, + "step": 5055 + }, + { + "epoch": 0.0924196172336264, + "grad_norm": 7.956024790318432, + "learning_rate": 9.898234173137142e-06, + "loss": 17.9008, + "step": 5056 + }, + { + "epoch": 0.09243789643007293, + "grad_norm": 5.546061194674989, + "learning_rate": 9.898174746491593e-06, + "loss": 17.1677, + "step": 5057 + }, + { + "epoch": 0.09245617562651946, + "grad_norm": 6.732295760779667, + "learning_rate": 9.898115302678366e-06, + "loss": 17.6001, + "step": 5058 + }, + { + "epoch": 0.09247445482296598, + "grad_norm": 6.84673457871143, + "learning_rate": 9.898055841697671e-06, + "loss": 17.5328, + "step": 5059 + }, + { + "epoch": 0.0924927340194125, + "grad_norm": 7.606882429479325, + "learning_rate": 9.897996363549714e-06, + "loss": 18.1756, + "step": 5060 + }, + { + "epoch": 0.09251101321585903, + "grad_norm": 6.977198794202178, + "learning_rate": 9.897936868234706e-06, + "loss": 18.0095, + "step": 5061 + }, + { + "epoch": 0.09252929241230555, + "grad_norm": 9.701642926743286, + "learning_rate": 9.897877355752851e-06, + "loss": 18.3897, + "step": 5062 + }, + { + "epoch": 0.09254757160875209, + "grad_norm": 6.549979390468102, + "learning_rate": 9.897817826104364e-06, + "loss": 17.5967, + "step": 5063 + }, + { + "epoch": 0.09256585080519861, + "grad_norm": 8.463070939744693, + "learning_rate": 9.897758279289452e-06, + "loss": 18.212, + "step": 5064 + }, + { + "epoch": 0.09258413000164513, + "grad_norm": 10.159200115442955, + "learning_rate": 9.897698715308319e-06, + "loss": 18.6003, + "step": 5065 + }, + { + "epoch": 0.09260240919809165, + "grad_norm": 7.8889470110630215, + "learning_rate": 9.897639134161177e-06, + "loss": 18.4059, + "step": 5066 + }, + { + "epoch": 0.09262068839453817, + "grad_norm": 5.849307190283272, + "learning_rate": 9.897579535848236e-06, + "loss": 17.1294, + "step": 5067 + }, + { + "epoch": 0.0926389675909847, + "grad_norm": 7.288718858669303, + "learning_rate": 9.897519920369705e-06, + "loss": 17.8934, + "step": 5068 + }, + { + "epoch": 0.09265724678743123, + "grad_norm": 6.839732780830549, + "learning_rate": 9.89746028772579e-06, + "loss": 17.7358, + "step": 5069 + }, + { + "epoch": 0.09267552598387775, + "grad_norm": 7.413594438277912, + "learning_rate": 9.897400637916702e-06, + "loss": 18.1762, + "step": 5070 + }, + { + "epoch": 0.09269380518032427, + "grad_norm": 7.062197968061983, + "learning_rate": 9.89734097094265e-06, + "loss": 17.5506, + "step": 5071 + }, + { + "epoch": 0.0927120843767708, + "grad_norm": 7.603135725863668, + "learning_rate": 9.897281286803842e-06, + "loss": 17.9349, + "step": 5072 + }, + { + "epoch": 0.09273036357321732, + "grad_norm": 8.952409962326763, + "learning_rate": 9.89722158550049e-06, + "loss": 18.6004, + "step": 5073 + }, + { + "epoch": 0.09274864276966384, + "grad_norm": 6.36242751579794, + "learning_rate": 9.897161867032799e-06, + "loss": 17.4027, + "step": 5074 + }, + { + "epoch": 0.09276692196611037, + "grad_norm": 7.772562341085767, + "learning_rate": 9.897102131400981e-06, + "loss": 18.0767, + "step": 5075 + }, + { + "epoch": 0.0927852011625569, + "grad_norm": 6.650751263867157, + "learning_rate": 9.897042378605245e-06, + "loss": 17.1936, + "step": 5076 + }, + { + "epoch": 0.09280348035900342, + "grad_norm": 7.866651835631159, + "learning_rate": 9.896982608645802e-06, + "loss": 18.0655, + "step": 5077 + }, + { + "epoch": 0.09282175955544994, + "grad_norm": 7.146689741848216, + "learning_rate": 9.896922821522858e-06, + "loss": 17.6362, + "step": 5078 + }, + { + "epoch": 0.09284003875189646, + "grad_norm": 7.2105555521385725, + "learning_rate": 9.896863017236626e-06, + "loss": 17.8813, + "step": 5079 + }, + { + "epoch": 0.092858317948343, + "grad_norm": 8.091388948800052, + "learning_rate": 9.896803195787315e-06, + "loss": 18.272, + "step": 5080 + }, + { + "epoch": 0.09287659714478952, + "grad_norm": 6.17918857902524, + "learning_rate": 9.896743357175131e-06, + "loss": 17.3417, + "step": 5081 + }, + { + "epoch": 0.09289487634123604, + "grad_norm": 6.513701464355702, + "learning_rate": 9.896683501400289e-06, + "loss": 17.4773, + "step": 5082 + }, + { + "epoch": 0.09291315553768256, + "grad_norm": 8.14203944656455, + "learning_rate": 9.896623628462994e-06, + "loss": 18.1933, + "step": 5083 + }, + { + "epoch": 0.09293143473412908, + "grad_norm": 7.145422400847804, + "learning_rate": 9.896563738363458e-06, + "loss": 18.1071, + "step": 5084 + }, + { + "epoch": 0.0929497139305756, + "grad_norm": 9.565680857666239, + "learning_rate": 9.896503831101893e-06, + "loss": 18.2842, + "step": 5085 + }, + { + "epoch": 0.09296799312702214, + "grad_norm": 6.958968934837667, + "learning_rate": 9.896443906678505e-06, + "loss": 17.7952, + "step": 5086 + }, + { + "epoch": 0.09298627232346866, + "grad_norm": 6.937333260918055, + "learning_rate": 9.896383965093508e-06, + "loss": 17.833, + "step": 5087 + }, + { + "epoch": 0.09300455151991519, + "grad_norm": 6.517897336294009, + "learning_rate": 9.896324006347109e-06, + "loss": 17.704, + "step": 5088 + }, + { + "epoch": 0.0930228307163617, + "grad_norm": 8.297352501735967, + "learning_rate": 9.896264030439518e-06, + "loss": 18.3063, + "step": 5089 + }, + { + "epoch": 0.09304110991280823, + "grad_norm": 7.175602043362299, + "learning_rate": 9.896204037370949e-06, + "loss": 18.0502, + "step": 5090 + }, + { + "epoch": 0.09305938910925475, + "grad_norm": 6.07682584332193, + "learning_rate": 9.896144027141608e-06, + "loss": 17.2171, + "step": 5091 + }, + { + "epoch": 0.09307766830570129, + "grad_norm": 7.418730929559951, + "learning_rate": 9.896083999751707e-06, + "loss": 18.0613, + "step": 5092 + }, + { + "epoch": 0.09309594750214781, + "grad_norm": 7.917331776771053, + "learning_rate": 9.896023955201456e-06, + "loss": 17.8008, + "step": 5093 + }, + { + "epoch": 0.09311422669859433, + "grad_norm": 7.2096344389067895, + "learning_rate": 9.895963893491068e-06, + "loss": 17.872, + "step": 5094 + }, + { + "epoch": 0.09313250589504085, + "grad_norm": 6.838032571383418, + "learning_rate": 9.895903814620752e-06, + "loss": 17.6463, + "step": 5095 + }, + { + "epoch": 0.09315078509148737, + "grad_norm": 7.1996244387813135, + "learning_rate": 9.895843718590715e-06, + "loss": 17.8283, + "step": 5096 + }, + { + "epoch": 0.09316906428793391, + "grad_norm": 6.547244638406464, + "learning_rate": 9.89578360540117e-06, + "loss": 17.31, + "step": 5097 + }, + { + "epoch": 0.09318734348438043, + "grad_norm": 8.132690254906443, + "learning_rate": 9.895723475052332e-06, + "loss": 18.2499, + "step": 5098 + }, + { + "epoch": 0.09320562268082695, + "grad_norm": 5.954262308126594, + "learning_rate": 9.895663327544405e-06, + "loss": 17.1972, + "step": 5099 + }, + { + "epoch": 0.09322390187727347, + "grad_norm": 7.4598041309153995, + "learning_rate": 9.895603162877606e-06, + "loss": 17.6992, + "step": 5100 + }, + { + "epoch": 0.09324218107372, + "grad_norm": 7.397237648454, + "learning_rate": 9.89554298105214e-06, + "loss": 17.9287, + "step": 5101 + }, + { + "epoch": 0.09326046027016652, + "grad_norm": 6.987429839174725, + "learning_rate": 9.89548278206822e-06, + "loss": 17.5882, + "step": 5102 + }, + { + "epoch": 0.09327873946661305, + "grad_norm": 8.65559946879881, + "learning_rate": 9.89542256592606e-06, + "loss": 18.4917, + "step": 5103 + }, + { + "epoch": 0.09329701866305957, + "grad_norm": 7.200226542093818, + "learning_rate": 9.895362332625867e-06, + "loss": 17.8861, + "step": 5104 + }, + { + "epoch": 0.0933152978595061, + "grad_norm": 6.4526778792290225, + "learning_rate": 9.895302082167854e-06, + "loss": 17.6469, + "step": 5105 + }, + { + "epoch": 0.09333357705595262, + "grad_norm": 7.965510530399193, + "learning_rate": 9.895241814552234e-06, + "loss": 18.2435, + "step": 5106 + }, + { + "epoch": 0.09335185625239914, + "grad_norm": 8.022839044254871, + "learning_rate": 9.895181529779214e-06, + "loss": 18.2214, + "step": 5107 + }, + { + "epoch": 0.09337013544884566, + "grad_norm": 7.814000778915349, + "learning_rate": 9.895121227849009e-06, + "loss": 18.5997, + "step": 5108 + }, + { + "epoch": 0.0933884146452922, + "grad_norm": 7.707830413959619, + "learning_rate": 9.895060908761829e-06, + "loss": 18.0669, + "step": 5109 + }, + { + "epoch": 0.09340669384173872, + "grad_norm": 5.822862398431597, + "learning_rate": 9.895000572517883e-06, + "loss": 17.2046, + "step": 5110 + }, + { + "epoch": 0.09342497303818524, + "grad_norm": 7.583345046065925, + "learning_rate": 9.894940219117386e-06, + "loss": 17.8696, + "step": 5111 + }, + { + "epoch": 0.09344325223463176, + "grad_norm": 7.070215747881176, + "learning_rate": 9.89487984856055e-06, + "loss": 17.8606, + "step": 5112 + }, + { + "epoch": 0.09346153143107828, + "grad_norm": 8.945290950233952, + "learning_rate": 9.894819460847583e-06, + "loss": 18.428, + "step": 5113 + }, + { + "epoch": 0.09347981062752482, + "grad_norm": 6.672497587160126, + "learning_rate": 9.894759055978698e-06, + "loss": 17.673, + "step": 5114 + }, + { + "epoch": 0.09349808982397134, + "grad_norm": 7.727826447480226, + "learning_rate": 9.89469863395411e-06, + "loss": 17.9795, + "step": 5115 + }, + { + "epoch": 0.09351636902041786, + "grad_norm": 7.988666697174414, + "learning_rate": 9.894638194774026e-06, + "loss": 17.7235, + "step": 5116 + }, + { + "epoch": 0.09353464821686439, + "grad_norm": 7.320396905888832, + "learning_rate": 9.89457773843866e-06, + "loss": 18.0135, + "step": 5117 + }, + { + "epoch": 0.09355292741331091, + "grad_norm": 8.546963096960763, + "learning_rate": 9.894517264948223e-06, + "loss": 17.8598, + "step": 5118 + }, + { + "epoch": 0.09357120660975743, + "grad_norm": 9.650559515995681, + "learning_rate": 9.89445677430293e-06, + "loss": 18.5405, + "step": 5119 + }, + { + "epoch": 0.09358948580620396, + "grad_norm": 6.276272669282278, + "learning_rate": 9.894396266502988e-06, + "loss": 17.2553, + "step": 5120 + }, + { + "epoch": 0.09360776500265049, + "grad_norm": 8.242730452685143, + "learning_rate": 9.894335741548612e-06, + "loss": 18.464, + "step": 5121 + }, + { + "epoch": 0.09362604419909701, + "grad_norm": 7.62750328126906, + "learning_rate": 9.894275199440017e-06, + "loss": 17.6168, + "step": 5122 + }, + { + "epoch": 0.09364432339554353, + "grad_norm": 8.633977352790344, + "learning_rate": 9.894214640177411e-06, + "loss": 18.4184, + "step": 5123 + }, + { + "epoch": 0.09366260259199005, + "grad_norm": 6.397828277696224, + "learning_rate": 9.894154063761005e-06, + "loss": 17.3383, + "step": 5124 + }, + { + "epoch": 0.09368088178843657, + "grad_norm": 7.15244964329956, + "learning_rate": 9.894093470191016e-06, + "loss": 17.741, + "step": 5125 + }, + { + "epoch": 0.09369916098488311, + "grad_norm": 7.8472696501581325, + "learning_rate": 9.894032859467653e-06, + "loss": 17.6663, + "step": 5126 + }, + { + "epoch": 0.09371744018132963, + "grad_norm": 5.85358871564374, + "learning_rate": 9.893972231591131e-06, + "loss": 17.1793, + "step": 5127 + }, + { + "epoch": 0.09373571937777615, + "grad_norm": 6.8866450513713495, + "learning_rate": 9.89391158656166e-06, + "loss": 17.7025, + "step": 5128 + }, + { + "epoch": 0.09375399857422267, + "grad_norm": 8.32418053354413, + "learning_rate": 9.893850924379453e-06, + "loss": 17.9356, + "step": 5129 + }, + { + "epoch": 0.0937722777706692, + "grad_norm": 6.706146911778752, + "learning_rate": 9.893790245044723e-06, + "loss": 17.4315, + "step": 5130 + }, + { + "epoch": 0.09379055696711573, + "grad_norm": 9.072350200702102, + "learning_rate": 9.893729548557684e-06, + "loss": 18.5131, + "step": 5131 + }, + { + "epoch": 0.09380883616356225, + "grad_norm": 7.738821989454404, + "learning_rate": 9.893668834918547e-06, + "loss": 17.5097, + "step": 5132 + }, + { + "epoch": 0.09382711536000878, + "grad_norm": 6.246479875134676, + "learning_rate": 9.893608104127525e-06, + "loss": 17.5148, + "step": 5133 + }, + { + "epoch": 0.0938453945564553, + "grad_norm": 7.412553148077846, + "learning_rate": 9.893547356184832e-06, + "loss": 17.6774, + "step": 5134 + }, + { + "epoch": 0.09386367375290182, + "grad_norm": 6.0480645864262055, + "learning_rate": 9.89348659109068e-06, + "loss": 17.2246, + "step": 5135 + }, + { + "epoch": 0.09388195294934834, + "grad_norm": 6.586074664871277, + "learning_rate": 9.893425808845283e-06, + "loss": 17.566, + "step": 5136 + }, + { + "epoch": 0.09390023214579488, + "grad_norm": 7.415747677424086, + "learning_rate": 9.893365009448853e-06, + "loss": 17.7678, + "step": 5137 + }, + { + "epoch": 0.0939185113422414, + "grad_norm": 6.840226891812664, + "learning_rate": 9.893304192901601e-06, + "loss": 17.4448, + "step": 5138 + }, + { + "epoch": 0.09393679053868792, + "grad_norm": 7.7067826992775945, + "learning_rate": 9.893243359203743e-06, + "loss": 18.0775, + "step": 5139 + }, + { + "epoch": 0.09395506973513444, + "grad_norm": 7.110277590355808, + "learning_rate": 9.893182508355493e-06, + "loss": 17.7244, + "step": 5140 + }, + { + "epoch": 0.09397334893158096, + "grad_norm": 7.988985961336596, + "learning_rate": 9.893121640357063e-06, + "loss": 18.1297, + "step": 5141 + }, + { + "epoch": 0.09399162812802749, + "grad_norm": 6.612055979101377, + "learning_rate": 9.893060755208666e-06, + "loss": 17.2624, + "step": 5142 + }, + { + "epoch": 0.09400990732447402, + "grad_norm": 6.668732972463614, + "learning_rate": 9.892999852910515e-06, + "loss": 17.4488, + "step": 5143 + }, + { + "epoch": 0.09402818652092054, + "grad_norm": 7.944290272730593, + "learning_rate": 9.892938933462824e-06, + "loss": 18.0692, + "step": 5144 + }, + { + "epoch": 0.09404646571736706, + "grad_norm": 8.289932641940927, + "learning_rate": 9.892877996865807e-06, + "loss": 18.2843, + "step": 5145 + }, + { + "epoch": 0.09406474491381359, + "grad_norm": 6.047733902583171, + "learning_rate": 9.892817043119679e-06, + "loss": 17.3479, + "step": 5146 + }, + { + "epoch": 0.09408302411026011, + "grad_norm": 7.676574651211155, + "learning_rate": 9.89275607222465e-06, + "loss": 17.9895, + "step": 5147 + }, + { + "epoch": 0.09410130330670664, + "grad_norm": 6.572160583036424, + "learning_rate": 9.892695084180934e-06, + "loss": 17.495, + "step": 5148 + }, + { + "epoch": 0.09411958250315317, + "grad_norm": 7.760488971814796, + "learning_rate": 9.892634078988748e-06, + "loss": 17.8998, + "step": 5149 + }, + { + "epoch": 0.09413786169959969, + "grad_norm": 6.831034321957358, + "learning_rate": 9.892573056648305e-06, + "loss": 17.8413, + "step": 5150 + }, + { + "epoch": 0.09415614089604621, + "grad_norm": 6.667496624357254, + "learning_rate": 9.892512017159817e-06, + "loss": 17.3853, + "step": 5151 + }, + { + "epoch": 0.09417442009249273, + "grad_norm": 7.427303461028795, + "learning_rate": 9.892450960523499e-06, + "loss": 17.6116, + "step": 5152 + }, + { + "epoch": 0.09419269928893925, + "grad_norm": 7.9004683741857535, + "learning_rate": 9.892389886739563e-06, + "loss": 18.2583, + "step": 5153 + }, + { + "epoch": 0.09421097848538579, + "grad_norm": 9.096128004383484, + "learning_rate": 9.892328795808228e-06, + "loss": 18.4646, + "step": 5154 + }, + { + "epoch": 0.09422925768183231, + "grad_norm": 7.952199273037197, + "learning_rate": 9.892267687729704e-06, + "loss": 18.2197, + "step": 5155 + }, + { + "epoch": 0.09424753687827883, + "grad_norm": 6.217855809239918, + "learning_rate": 9.892206562504207e-06, + "loss": 17.389, + "step": 5156 + }, + { + "epoch": 0.09426581607472535, + "grad_norm": 6.659049781394738, + "learning_rate": 9.89214542013195e-06, + "loss": 17.3625, + "step": 5157 + }, + { + "epoch": 0.09428409527117187, + "grad_norm": 7.4188287578494565, + "learning_rate": 9.892084260613148e-06, + "loss": 17.8858, + "step": 5158 + }, + { + "epoch": 0.0943023744676184, + "grad_norm": 8.965030987127742, + "learning_rate": 9.892023083948015e-06, + "loss": 18.5783, + "step": 5159 + }, + { + "epoch": 0.09432065366406493, + "grad_norm": 7.567770700328357, + "learning_rate": 9.891961890136766e-06, + "loss": 17.9085, + "step": 5160 + }, + { + "epoch": 0.09433893286051145, + "grad_norm": 6.064575987108541, + "learning_rate": 9.891900679179614e-06, + "loss": 17.2038, + "step": 5161 + }, + { + "epoch": 0.09435721205695798, + "grad_norm": 7.434547395908676, + "learning_rate": 9.891839451076775e-06, + "loss": 17.9291, + "step": 5162 + }, + { + "epoch": 0.0943754912534045, + "grad_norm": 7.773537562494793, + "learning_rate": 9.891778205828465e-06, + "loss": 18.3101, + "step": 5163 + }, + { + "epoch": 0.09439377044985102, + "grad_norm": 6.476420851524025, + "learning_rate": 9.891716943434893e-06, + "loss": 17.4162, + "step": 5164 + }, + { + "epoch": 0.09441204964629756, + "grad_norm": 7.067572833939982, + "learning_rate": 9.89165566389628e-06, + "loss": 17.9595, + "step": 5165 + }, + { + "epoch": 0.09443032884274408, + "grad_norm": 6.601796886973542, + "learning_rate": 9.891594367212837e-06, + "loss": 17.614, + "step": 5166 + }, + { + "epoch": 0.0944486080391906, + "grad_norm": 8.40956824993968, + "learning_rate": 9.891533053384784e-06, + "loss": 18.378, + "step": 5167 + }, + { + "epoch": 0.09446688723563712, + "grad_norm": 7.105079782615868, + "learning_rate": 9.89147172241233e-06, + "loss": 17.7088, + "step": 5168 + }, + { + "epoch": 0.09448516643208364, + "grad_norm": 9.065696790013208, + "learning_rate": 9.89141037429569e-06, + "loss": 18.3997, + "step": 5169 + }, + { + "epoch": 0.09450344562853016, + "grad_norm": 7.408161416479056, + "learning_rate": 9.891349009035082e-06, + "loss": 17.6111, + "step": 5170 + }, + { + "epoch": 0.0945217248249767, + "grad_norm": 6.676298820376263, + "learning_rate": 9.891287626630721e-06, + "loss": 17.4411, + "step": 5171 + }, + { + "epoch": 0.09454000402142322, + "grad_norm": 7.4254242397519254, + "learning_rate": 9.891226227082822e-06, + "loss": 17.8507, + "step": 5172 + }, + { + "epoch": 0.09455828321786974, + "grad_norm": 6.595901024711817, + "learning_rate": 9.891164810391599e-06, + "loss": 17.7702, + "step": 5173 + }, + { + "epoch": 0.09457656241431626, + "grad_norm": 6.2201256864739625, + "learning_rate": 9.891103376557268e-06, + "loss": 17.4838, + "step": 5174 + }, + { + "epoch": 0.09459484161076279, + "grad_norm": 8.051874223536906, + "learning_rate": 9.891041925580043e-06, + "loss": 18.1326, + "step": 5175 + }, + { + "epoch": 0.09461312080720931, + "grad_norm": 6.810582802101014, + "learning_rate": 9.890980457460143e-06, + "loss": 17.8296, + "step": 5176 + }, + { + "epoch": 0.09463140000365584, + "grad_norm": 8.420908316405063, + "learning_rate": 9.890918972197777e-06, + "loss": 18.1608, + "step": 5177 + }, + { + "epoch": 0.09464967920010237, + "grad_norm": 6.736216916777759, + "learning_rate": 9.890857469793168e-06, + "loss": 17.6606, + "step": 5178 + }, + { + "epoch": 0.09466795839654889, + "grad_norm": 6.118770812982109, + "learning_rate": 9.890795950246526e-06, + "loss": 17.2436, + "step": 5179 + }, + { + "epoch": 0.09468623759299541, + "grad_norm": 6.537623612244409, + "learning_rate": 9.890734413558068e-06, + "loss": 17.6136, + "step": 5180 + }, + { + "epoch": 0.09470451678944193, + "grad_norm": 6.320666536498495, + "learning_rate": 9.890672859728013e-06, + "loss": 17.4728, + "step": 5181 + }, + { + "epoch": 0.09472279598588847, + "grad_norm": 7.744391672160248, + "learning_rate": 9.89061128875657e-06, + "loss": 17.9127, + "step": 5182 + }, + { + "epoch": 0.09474107518233499, + "grad_norm": 6.3098633894761065, + "learning_rate": 9.890549700643962e-06, + "loss": 17.379, + "step": 5183 + }, + { + "epoch": 0.09475935437878151, + "grad_norm": 7.4749505601951345, + "learning_rate": 9.890488095390401e-06, + "loss": 18.1063, + "step": 5184 + }, + { + "epoch": 0.09477763357522803, + "grad_norm": 7.196265894310334, + "learning_rate": 9.890426472996104e-06, + "loss": 18.1147, + "step": 5185 + }, + { + "epoch": 0.09479591277167455, + "grad_norm": 7.06196872795695, + "learning_rate": 9.890364833461285e-06, + "loss": 17.3641, + "step": 5186 + }, + { + "epoch": 0.09481419196812108, + "grad_norm": 8.512656954267568, + "learning_rate": 9.890303176786163e-06, + "loss": 18.4658, + "step": 5187 + }, + { + "epoch": 0.09483247116456761, + "grad_norm": 7.498556706900513, + "learning_rate": 9.890241502970952e-06, + "loss": 17.4943, + "step": 5188 + }, + { + "epoch": 0.09485075036101413, + "grad_norm": 6.214599113020599, + "learning_rate": 9.89017981201587e-06, + "loss": 17.2711, + "step": 5189 + }, + { + "epoch": 0.09486902955746065, + "grad_norm": 6.733266265875151, + "learning_rate": 9.890118103921132e-06, + "loss": 17.4952, + "step": 5190 + }, + { + "epoch": 0.09488730875390718, + "grad_norm": 6.724170248327515, + "learning_rate": 9.890056378686952e-06, + "loss": 17.1852, + "step": 5191 + }, + { + "epoch": 0.0949055879503537, + "grad_norm": 7.085453016784695, + "learning_rate": 9.889994636313551e-06, + "loss": 17.8099, + "step": 5192 + }, + { + "epoch": 0.09492386714680022, + "grad_norm": 8.154429760869085, + "learning_rate": 9.889932876801143e-06, + "loss": 18.3366, + "step": 5193 + }, + { + "epoch": 0.09494214634324676, + "grad_norm": 6.980399329141078, + "learning_rate": 9.889871100149944e-06, + "loss": 17.8377, + "step": 5194 + }, + { + "epoch": 0.09496042553969328, + "grad_norm": 8.254654376262852, + "learning_rate": 9.889809306360171e-06, + "loss": 17.9285, + "step": 5195 + }, + { + "epoch": 0.0949787047361398, + "grad_norm": 7.801505937494357, + "learning_rate": 9.88974749543204e-06, + "loss": 17.6987, + "step": 5196 + }, + { + "epoch": 0.09499698393258632, + "grad_norm": 7.095195996412267, + "learning_rate": 9.88968566736577e-06, + "loss": 17.8568, + "step": 5197 + }, + { + "epoch": 0.09501526312903284, + "grad_norm": 7.474276996536663, + "learning_rate": 9.889623822161575e-06, + "loss": 18.023, + "step": 5198 + }, + { + "epoch": 0.09503354232547938, + "grad_norm": 6.559234082417345, + "learning_rate": 9.889561959819673e-06, + "loss": 17.7538, + "step": 5199 + }, + { + "epoch": 0.0950518215219259, + "grad_norm": 8.300625114491275, + "learning_rate": 9.889500080340281e-06, + "loss": 17.8038, + "step": 5200 + }, + { + "epoch": 0.09507010071837242, + "grad_norm": 8.68399699423883, + "learning_rate": 9.889438183723616e-06, + "loss": 18.3276, + "step": 5201 + }, + { + "epoch": 0.09508837991481894, + "grad_norm": 5.858054324353686, + "learning_rate": 9.889376269969895e-06, + "loss": 17.1554, + "step": 5202 + }, + { + "epoch": 0.09510665911126547, + "grad_norm": 6.728176275885843, + "learning_rate": 9.889314339079332e-06, + "loss": 17.4803, + "step": 5203 + }, + { + "epoch": 0.09512493830771199, + "grad_norm": 6.44781365917043, + "learning_rate": 9.88925239105215e-06, + "loss": 17.5822, + "step": 5204 + }, + { + "epoch": 0.09514321750415852, + "grad_norm": 6.417523267951457, + "learning_rate": 9.889190425888558e-06, + "loss": 17.4344, + "step": 5205 + }, + { + "epoch": 0.09516149670060504, + "grad_norm": 7.042317903426137, + "learning_rate": 9.889128443588781e-06, + "loss": 17.7418, + "step": 5206 + }, + { + "epoch": 0.09517977589705157, + "grad_norm": 7.397451006051058, + "learning_rate": 9.889066444153032e-06, + "loss": 17.7729, + "step": 5207 + }, + { + "epoch": 0.09519805509349809, + "grad_norm": 7.143273439551852, + "learning_rate": 9.88900442758153e-06, + "loss": 18.024, + "step": 5208 + }, + { + "epoch": 0.09521633428994461, + "grad_norm": 8.615455102068974, + "learning_rate": 9.88894239387449e-06, + "loss": 18.2291, + "step": 5209 + }, + { + "epoch": 0.09523461348639113, + "grad_norm": 7.529587476696245, + "learning_rate": 9.888880343032133e-06, + "loss": 17.7377, + "step": 5210 + }, + { + "epoch": 0.09525289268283767, + "grad_norm": 6.63088867569911, + "learning_rate": 9.888818275054675e-06, + "loss": 17.6644, + "step": 5211 + }, + { + "epoch": 0.09527117187928419, + "grad_norm": 7.519668617792681, + "learning_rate": 9.888756189942333e-06, + "loss": 18.1103, + "step": 5212 + }, + { + "epoch": 0.09528945107573071, + "grad_norm": 6.972288231076231, + "learning_rate": 9.888694087695323e-06, + "loss": 17.536, + "step": 5213 + }, + { + "epoch": 0.09530773027217723, + "grad_norm": 7.115601967477167, + "learning_rate": 9.888631968313866e-06, + "loss": 17.6653, + "step": 5214 + }, + { + "epoch": 0.09532600946862375, + "grad_norm": 6.747204981196159, + "learning_rate": 9.888569831798178e-06, + "loss": 17.5997, + "step": 5215 + }, + { + "epoch": 0.09534428866507029, + "grad_norm": 7.6468415772145555, + "learning_rate": 9.888507678148475e-06, + "loss": 18.1247, + "step": 5216 + }, + { + "epoch": 0.09536256786151681, + "grad_norm": 7.070793850550537, + "learning_rate": 9.88844550736498e-06, + "loss": 17.4151, + "step": 5217 + }, + { + "epoch": 0.09538084705796333, + "grad_norm": 6.474401361103499, + "learning_rate": 9.888383319447905e-06, + "loss": 17.2561, + "step": 5218 + }, + { + "epoch": 0.09539912625440986, + "grad_norm": 7.58341485224294, + "learning_rate": 9.888321114397472e-06, + "loss": 18.0286, + "step": 5219 + }, + { + "epoch": 0.09541740545085638, + "grad_norm": 6.972150944961971, + "learning_rate": 9.888258892213898e-06, + "loss": 17.8591, + "step": 5220 + }, + { + "epoch": 0.0954356846473029, + "grad_norm": 6.744493413140763, + "learning_rate": 9.8881966528974e-06, + "loss": 17.5452, + "step": 5221 + }, + { + "epoch": 0.09545396384374943, + "grad_norm": 7.873660672430703, + "learning_rate": 9.888134396448198e-06, + "loss": 17.9007, + "step": 5222 + }, + { + "epoch": 0.09547224304019596, + "grad_norm": 7.298583415260131, + "learning_rate": 9.888072122866508e-06, + "loss": 17.6763, + "step": 5223 + }, + { + "epoch": 0.09549052223664248, + "grad_norm": 6.003618951036251, + "learning_rate": 9.888009832152549e-06, + "loss": 17.2141, + "step": 5224 + }, + { + "epoch": 0.095508801433089, + "grad_norm": 6.2177497010827105, + "learning_rate": 9.887947524306541e-06, + "loss": 17.4351, + "step": 5225 + }, + { + "epoch": 0.09552708062953552, + "grad_norm": 6.666056134453053, + "learning_rate": 9.8878851993287e-06, + "loss": 17.6842, + "step": 5226 + }, + { + "epoch": 0.09554535982598204, + "grad_norm": 6.516298719067309, + "learning_rate": 9.887822857219248e-06, + "loss": 17.4614, + "step": 5227 + }, + { + "epoch": 0.09556363902242858, + "grad_norm": 7.024816101465736, + "learning_rate": 9.887760497978397e-06, + "loss": 17.7201, + "step": 5228 + }, + { + "epoch": 0.0955819182188751, + "grad_norm": 7.200756136312985, + "learning_rate": 9.887698121606374e-06, + "loss": 17.7464, + "step": 5229 + }, + { + "epoch": 0.09560019741532162, + "grad_norm": 7.3783861132193564, + "learning_rate": 9.887635728103388e-06, + "loss": 17.7704, + "step": 5230 + }, + { + "epoch": 0.09561847661176814, + "grad_norm": 7.2206811986317945, + "learning_rate": 9.887573317469668e-06, + "loss": 17.6605, + "step": 5231 + }, + { + "epoch": 0.09563675580821467, + "grad_norm": 8.70079828108925, + "learning_rate": 9.887510889705426e-06, + "loss": 18.4645, + "step": 5232 + }, + { + "epoch": 0.0956550350046612, + "grad_norm": 7.636005799765782, + "learning_rate": 9.887448444810881e-06, + "loss": 17.893, + "step": 5233 + }, + { + "epoch": 0.09567331420110772, + "grad_norm": 6.993684305512817, + "learning_rate": 9.887385982786252e-06, + "loss": 17.7158, + "step": 5234 + }, + { + "epoch": 0.09569159339755424, + "grad_norm": 6.608801131132104, + "learning_rate": 9.887323503631762e-06, + "loss": 17.3909, + "step": 5235 + }, + { + "epoch": 0.09570987259400077, + "grad_norm": 6.771682563084261, + "learning_rate": 9.887261007347627e-06, + "loss": 17.3143, + "step": 5236 + }, + { + "epoch": 0.09572815179044729, + "grad_norm": 7.950569785671081, + "learning_rate": 9.887198493934067e-06, + "loss": 18.1489, + "step": 5237 + }, + { + "epoch": 0.09574643098689381, + "grad_norm": 7.3982490836298656, + "learning_rate": 9.8871359633913e-06, + "loss": 17.8159, + "step": 5238 + }, + { + "epoch": 0.09576471018334035, + "grad_norm": 6.990996331506278, + "learning_rate": 9.887073415719544e-06, + "loss": 17.5424, + "step": 5239 + }, + { + "epoch": 0.09578298937978687, + "grad_norm": 7.710399343547748, + "learning_rate": 9.88701085091902e-06, + "loss": 17.9353, + "step": 5240 + }, + { + "epoch": 0.09580126857623339, + "grad_norm": 8.072937592461615, + "learning_rate": 9.886948268989947e-06, + "loss": 18.0326, + "step": 5241 + }, + { + "epoch": 0.09581954777267991, + "grad_norm": 7.40238496151935, + "learning_rate": 9.886885669932545e-06, + "loss": 17.9291, + "step": 5242 + }, + { + "epoch": 0.09583782696912643, + "grad_norm": 7.78551859722493, + "learning_rate": 9.886823053747034e-06, + "loss": 18.1532, + "step": 5243 + }, + { + "epoch": 0.09585610616557295, + "grad_norm": 7.8382493474777, + "learning_rate": 9.88676042043363e-06, + "loss": 18.0345, + "step": 5244 + }, + { + "epoch": 0.09587438536201949, + "grad_norm": 7.25646190192345, + "learning_rate": 9.886697769992556e-06, + "loss": 17.8306, + "step": 5245 + }, + { + "epoch": 0.09589266455846601, + "grad_norm": 10.804791626584706, + "learning_rate": 9.88663510242403e-06, + "loss": 18.711, + "step": 5246 + }, + { + "epoch": 0.09591094375491253, + "grad_norm": 6.201093743279867, + "learning_rate": 9.886572417728274e-06, + "loss": 17.3102, + "step": 5247 + }, + { + "epoch": 0.09592922295135906, + "grad_norm": 8.3642670823898, + "learning_rate": 9.886509715905502e-06, + "loss": 17.9239, + "step": 5248 + }, + { + "epoch": 0.09594750214780558, + "grad_norm": 7.276983929820145, + "learning_rate": 9.886446996955939e-06, + "loss": 17.8457, + "step": 5249 + }, + { + "epoch": 0.09596578134425211, + "grad_norm": 6.708835063338171, + "learning_rate": 9.886384260879804e-06, + "loss": 17.6739, + "step": 5250 + }, + { + "epoch": 0.09598406054069863, + "grad_norm": 7.093126797744419, + "learning_rate": 9.886321507677316e-06, + "loss": 17.8085, + "step": 5251 + }, + { + "epoch": 0.09600233973714516, + "grad_norm": 6.497532496574756, + "learning_rate": 9.886258737348695e-06, + "loss": 17.4002, + "step": 5252 + }, + { + "epoch": 0.09602061893359168, + "grad_norm": 6.969377082513094, + "learning_rate": 9.886195949894162e-06, + "loss": 17.582, + "step": 5253 + }, + { + "epoch": 0.0960388981300382, + "grad_norm": 5.624883262985429, + "learning_rate": 9.886133145313934e-06, + "loss": 17.3625, + "step": 5254 + }, + { + "epoch": 0.09605717732648472, + "grad_norm": 7.3925160558442915, + "learning_rate": 9.886070323608236e-06, + "loss": 17.7853, + "step": 5255 + }, + { + "epoch": 0.09607545652293126, + "grad_norm": 6.872808240356434, + "learning_rate": 9.886007484777284e-06, + "loss": 17.6445, + "step": 5256 + }, + { + "epoch": 0.09609373571937778, + "grad_norm": 6.041244016989075, + "learning_rate": 9.8859446288213e-06, + "loss": 17.5063, + "step": 5257 + }, + { + "epoch": 0.0961120149158243, + "grad_norm": 6.506616084585259, + "learning_rate": 9.885881755740503e-06, + "loss": 17.3917, + "step": 5258 + }, + { + "epoch": 0.09613029411227082, + "grad_norm": 5.72404838769049, + "learning_rate": 9.885818865535115e-06, + "loss": 17.0782, + "step": 5259 + }, + { + "epoch": 0.09614857330871734, + "grad_norm": 5.600220206652367, + "learning_rate": 9.885755958205357e-06, + "loss": 17.2309, + "step": 5260 + }, + { + "epoch": 0.09616685250516387, + "grad_norm": 8.256578669744444, + "learning_rate": 9.885693033751447e-06, + "loss": 18.4663, + "step": 5261 + }, + { + "epoch": 0.0961851317016104, + "grad_norm": 7.469744356520143, + "learning_rate": 9.885630092173608e-06, + "loss": 17.8741, + "step": 5262 + }, + { + "epoch": 0.09620341089805692, + "grad_norm": 9.012604336564129, + "learning_rate": 9.885567133472059e-06, + "loss": 18.3334, + "step": 5263 + }, + { + "epoch": 0.09622169009450345, + "grad_norm": 6.558039475714017, + "learning_rate": 9.88550415764702e-06, + "loss": 17.8242, + "step": 5264 + }, + { + "epoch": 0.09623996929094997, + "grad_norm": 9.286626068016695, + "learning_rate": 9.885441164698715e-06, + "loss": 18.3687, + "step": 5265 + }, + { + "epoch": 0.09625824848739649, + "grad_norm": 6.79502705833962, + "learning_rate": 9.885378154627362e-06, + "loss": 17.3884, + "step": 5266 + }, + { + "epoch": 0.09627652768384302, + "grad_norm": 7.629505796668096, + "learning_rate": 9.885315127433182e-06, + "loss": 17.9658, + "step": 5267 + }, + { + "epoch": 0.09629480688028955, + "grad_norm": 7.347670861701307, + "learning_rate": 9.885252083116398e-06, + "loss": 17.5675, + "step": 5268 + }, + { + "epoch": 0.09631308607673607, + "grad_norm": 7.17750685681595, + "learning_rate": 9.885189021677226e-06, + "loss": 17.7342, + "step": 5269 + }, + { + "epoch": 0.09633136527318259, + "grad_norm": 7.1640515597335, + "learning_rate": 9.885125943115892e-06, + "loss": 17.7269, + "step": 5270 + }, + { + "epoch": 0.09634964446962911, + "grad_norm": 7.506204128755075, + "learning_rate": 9.885062847432615e-06, + "loss": 17.8263, + "step": 5271 + }, + { + "epoch": 0.09636792366607563, + "grad_norm": 8.211680854441479, + "learning_rate": 9.884999734627618e-06, + "loss": 18.2696, + "step": 5272 + }, + { + "epoch": 0.09638620286252217, + "grad_norm": 7.649439093091455, + "learning_rate": 9.884936604701122e-06, + "loss": 17.6967, + "step": 5273 + }, + { + "epoch": 0.09640448205896869, + "grad_norm": 7.083866308988564, + "learning_rate": 9.884873457653345e-06, + "loss": 17.6615, + "step": 5274 + }, + { + "epoch": 0.09642276125541521, + "grad_norm": 8.367405155710328, + "learning_rate": 9.88481029348451e-06, + "loss": 18.1005, + "step": 5275 + }, + { + "epoch": 0.09644104045186173, + "grad_norm": 7.022332031462355, + "learning_rate": 9.884747112194839e-06, + "loss": 17.7434, + "step": 5276 + }, + { + "epoch": 0.09645931964830826, + "grad_norm": 7.394241098382519, + "learning_rate": 9.884683913784553e-06, + "loss": 17.8212, + "step": 5277 + }, + { + "epoch": 0.09647759884475478, + "grad_norm": 7.119784720405873, + "learning_rate": 9.884620698253877e-06, + "loss": 17.0399, + "step": 5278 + }, + { + "epoch": 0.09649587804120131, + "grad_norm": 7.982731933158025, + "learning_rate": 9.884557465603026e-06, + "loss": 17.7377, + "step": 5279 + }, + { + "epoch": 0.09651415723764784, + "grad_norm": 7.178376538639677, + "learning_rate": 9.884494215832225e-06, + "loss": 17.8772, + "step": 5280 + }, + { + "epoch": 0.09653243643409436, + "grad_norm": 7.123912373964505, + "learning_rate": 9.884430948941697e-06, + "loss": 18.037, + "step": 5281 + }, + { + "epoch": 0.09655071563054088, + "grad_norm": 7.554159375338772, + "learning_rate": 9.884367664931663e-06, + "loss": 17.9028, + "step": 5282 + }, + { + "epoch": 0.0965689948269874, + "grad_norm": 6.008364724596991, + "learning_rate": 9.884304363802342e-06, + "loss": 17.0345, + "step": 5283 + }, + { + "epoch": 0.09658727402343394, + "grad_norm": 5.624135781155062, + "learning_rate": 9.88424104555396e-06, + "loss": 16.9863, + "step": 5284 + }, + { + "epoch": 0.09660555321988046, + "grad_norm": 6.8371078213504255, + "learning_rate": 9.884177710186736e-06, + "loss": 17.6411, + "step": 5285 + }, + { + "epoch": 0.09662383241632698, + "grad_norm": 6.528436990810273, + "learning_rate": 9.884114357700893e-06, + "loss": 17.2872, + "step": 5286 + }, + { + "epoch": 0.0966421116127735, + "grad_norm": 7.469541449794446, + "learning_rate": 9.884050988096651e-06, + "loss": 17.8118, + "step": 5287 + }, + { + "epoch": 0.09666039080922002, + "grad_norm": 10.212809506208576, + "learning_rate": 9.883987601374238e-06, + "loss": 18.2187, + "step": 5288 + }, + { + "epoch": 0.09667867000566654, + "grad_norm": 7.366905909371753, + "learning_rate": 9.88392419753387e-06, + "loss": 18.2504, + "step": 5289 + }, + { + "epoch": 0.09669694920211308, + "grad_norm": 7.3333108584614495, + "learning_rate": 9.883860776575772e-06, + "loss": 17.779, + "step": 5290 + }, + { + "epoch": 0.0967152283985596, + "grad_norm": 7.976466138224247, + "learning_rate": 9.883797338500165e-06, + "loss": 18.1815, + "step": 5291 + }, + { + "epoch": 0.09673350759500612, + "grad_norm": 7.365136220235471, + "learning_rate": 9.883733883307272e-06, + "loss": 17.7525, + "step": 5292 + }, + { + "epoch": 0.09675178679145265, + "grad_norm": 8.039554757285382, + "learning_rate": 9.883670410997318e-06, + "loss": 18.4228, + "step": 5293 + }, + { + "epoch": 0.09677006598789917, + "grad_norm": 7.53287053112969, + "learning_rate": 9.883606921570521e-06, + "loss": 18.0332, + "step": 5294 + }, + { + "epoch": 0.09678834518434569, + "grad_norm": 7.081858402661466, + "learning_rate": 9.883543415027107e-06, + "loss": 17.6161, + "step": 5295 + }, + { + "epoch": 0.09680662438079222, + "grad_norm": 6.785646720458371, + "learning_rate": 9.883479891367294e-06, + "loss": 17.467, + "step": 5296 + }, + { + "epoch": 0.09682490357723875, + "grad_norm": 7.477258930459312, + "learning_rate": 9.88341635059131e-06, + "loss": 18.217, + "step": 5297 + }, + { + "epoch": 0.09684318277368527, + "grad_norm": 6.869902920200466, + "learning_rate": 9.883352792699375e-06, + "loss": 17.8621, + "step": 5298 + }, + { + "epoch": 0.09686146197013179, + "grad_norm": 7.789336928668197, + "learning_rate": 9.883289217691712e-06, + "loss": 18.2331, + "step": 5299 + }, + { + "epoch": 0.09687974116657831, + "grad_norm": 8.526389110172985, + "learning_rate": 9.883225625568544e-06, + "loss": 18.3998, + "step": 5300 + }, + { + "epoch": 0.09689802036302485, + "grad_norm": 6.130281431362759, + "learning_rate": 9.883162016330094e-06, + "loss": 17.2481, + "step": 5301 + }, + { + "epoch": 0.09691629955947137, + "grad_norm": 6.948104012679294, + "learning_rate": 9.883098389976586e-06, + "loss": 17.8148, + "step": 5302 + }, + { + "epoch": 0.09693457875591789, + "grad_norm": 6.542422082061749, + "learning_rate": 9.88303474650824e-06, + "loss": 17.3871, + "step": 5303 + }, + { + "epoch": 0.09695285795236441, + "grad_norm": 7.134880808521567, + "learning_rate": 9.882971085925283e-06, + "loss": 17.8606, + "step": 5304 + }, + { + "epoch": 0.09697113714881093, + "grad_norm": 8.605364075819775, + "learning_rate": 9.882907408227934e-06, + "loss": 18.4823, + "step": 5305 + }, + { + "epoch": 0.09698941634525746, + "grad_norm": 7.582757133706279, + "learning_rate": 9.882843713416421e-06, + "loss": 17.8743, + "step": 5306 + }, + { + "epoch": 0.09700769554170399, + "grad_norm": 6.79791230027872, + "learning_rate": 9.882780001490963e-06, + "loss": 17.4309, + "step": 5307 + }, + { + "epoch": 0.09702597473815051, + "grad_norm": 7.506229977541841, + "learning_rate": 9.882716272451785e-06, + "loss": 17.7997, + "step": 5308 + }, + { + "epoch": 0.09704425393459704, + "grad_norm": 6.909299202803373, + "learning_rate": 9.882652526299109e-06, + "loss": 17.583, + "step": 5309 + }, + { + "epoch": 0.09706253313104356, + "grad_norm": 8.08860684609603, + "learning_rate": 9.88258876303316e-06, + "loss": 17.9465, + "step": 5310 + }, + { + "epoch": 0.09708081232749008, + "grad_norm": 6.7712705978128405, + "learning_rate": 9.882524982654162e-06, + "loss": 17.6611, + "step": 5311 + }, + { + "epoch": 0.0970990915239366, + "grad_norm": 7.700096808984828, + "learning_rate": 9.882461185162338e-06, + "loss": 18.1099, + "step": 5312 + }, + { + "epoch": 0.09711737072038314, + "grad_norm": 7.1087451538688, + "learning_rate": 9.88239737055791e-06, + "loss": 18.051, + "step": 5313 + }, + { + "epoch": 0.09713564991682966, + "grad_norm": 6.897441875558762, + "learning_rate": 9.882333538841103e-06, + "loss": 17.5645, + "step": 5314 + }, + { + "epoch": 0.09715392911327618, + "grad_norm": 6.684874766663511, + "learning_rate": 9.88226969001214e-06, + "loss": 17.5739, + "step": 5315 + }, + { + "epoch": 0.0971722083097227, + "grad_norm": 6.078619167484666, + "learning_rate": 9.882205824071246e-06, + "loss": 17.1586, + "step": 5316 + }, + { + "epoch": 0.09719048750616922, + "grad_norm": 6.92733611336219, + "learning_rate": 9.882141941018644e-06, + "loss": 17.6455, + "step": 5317 + }, + { + "epoch": 0.09720876670261576, + "grad_norm": 7.5784526259969995, + "learning_rate": 9.882078040854559e-06, + "loss": 17.7522, + "step": 5318 + }, + { + "epoch": 0.09722704589906228, + "grad_norm": 7.36961789501853, + "learning_rate": 9.882014123579215e-06, + "loss": 17.5854, + "step": 5319 + }, + { + "epoch": 0.0972453250955088, + "grad_norm": 7.317098772859468, + "learning_rate": 9.881950189192833e-06, + "loss": 17.8084, + "step": 5320 + }, + { + "epoch": 0.09726360429195532, + "grad_norm": 5.787194122942332, + "learning_rate": 9.88188623769564e-06, + "loss": 17.2275, + "step": 5321 + }, + { + "epoch": 0.09728188348840185, + "grad_norm": 7.168416748681587, + "learning_rate": 9.88182226908786e-06, + "loss": 17.7341, + "step": 5322 + }, + { + "epoch": 0.09730016268484837, + "grad_norm": 7.407609911779401, + "learning_rate": 9.881758283369715e-06, + "loss": 17.9142, + "step": 5323 + }, + { + "epoch": 0.0973184418812949, + "grad_norm": 6.927321379262219, + "learning_rate": 9.881694280541431e-06, + "loss": 17.6755, + "step": 5324 + }, + { + "epoch": 0.09733672107774143, + "grad_norm": 7.267632829932315, + "learning_rate": 9.881630260603234e-06, + "loss": 17.7475, + "step": 5325 + }, + { + "epoch": 0.09735500027418795, + "grad_norm": 7.04217802331472, + "learning_rate": 9.881566223555345e-06, + "loss": 17.3577, + "step": 5326 + }, + { + "epoch": 0.09737327947063447, + "grad_norm": 6.746752510385598, + "learning_rate": 9.881502169397991e-06, + "loss": 17.5972, + "step": 5327 + }, + { + "epoch": 0.09739155866708099, + "grad_norm": 5.2912247121295435, + "learning_rate": 9.881438098131395e-06, + "loss": 17.1553, + "step": 5328 + }, + { + "epoch": 0.09740983786352751, + "grad_norm": 7.325975113070174, + "learning_rate": 9.881374009755784e-06, + "loss": 18.1632, + "step": 5329 + }, + { + "epoch": 0.09742811705997405, + "grad_norm": 6.080116422589618, + "learning_rate": 9.881309904271378e-06, + "loss": 17.3491, + "step": 5330 + }, + { + "epoch": 0.09744639625642057, + "grad_norm": 6.3866380418005635, + "learning_rate": 9.881245781678405e-06, + "loss": 17.1599, + "step": 5331 + }, + { + "epoch": 0.09746467545286709, + "grad_norm": 6.035010066941613, + "learning_rate": 9.881181641977088e-06, + "loss": 17.3709, + "step": 5332 + }, + { + "epoch": 0.09748295464931361, + "grad_norm": 7.703056986080128, + "learning_rate": 9.881117485167656e-06, + "loss": 17.9805, + "step": 5333 + }, + { + "epoch": 0.09750123384576014, + "grad_norm": 8.709080770062386, + "learning_rate": 9.881053311250328e-06, + "loss": 18.6102, + "step": 5334 + }, + { + "epoch": 0.09751951304220667, + "grad_norm": 6.192408709014584, + "learning_rate": 9.880989120225333e-06, + "loss": 17.413, + "step": 5335 + }, + { + "epoch": 0.09753779223865319, + "grad_norm": 8.105822771494752, + "learning_rate": 9.880924912092894e-06, + "loss": 18.212, + "step": 5336 + }, + { + "epoch": 0.09755607143509971, + "grad_norm": 6.7656047760564775, + "learning_rate": 9.880860686853238e-06, + "loss": 17.779, + "step": 5337 + }, + { + "epoch": 0.09757435063154624, + "grad_norm": 6.374458346957026, + "learning_rate": 9.880796444506588e-06, + "loss": 17.3893, + "step": 5338 + }, + { + "epoch": 0.09759262982799276, + "grad_norm": 6.271245313507004, + "learning_rate": 9.88073218505317e-06, + "loss": 17.1529, + "step": 5339 + }, + { + "epoch": 0.09761090902443928, + "grad_norm": 7.322123584958095, + "learning_rate": 9.880667908493209e-06, + "loss": 17.7049, + "step": 5340 + }, + { + "epoch": 0.09762918822088582, + "grad_norm": 7.979414944996543, + "learning_rate": 9.88060361482693e-06, + "loss": 18.3133, + "step": 5341 + }, + { + "epoch": 0.09764746741733234, + "grad_norm": 8.61766632091909, + "learning_rate": 9.88053930405456e-06, + "loss": 18.5276, + "step": 5342 + }, + { + "epoch": 0.09766574661377886, + "grad_norm": 9.690909411811695, + "learning_rate": 9.880474976176322e-06, + "loss": 19.0181, + "step": 5343 + }, + { + "epoch": 0.09768402581022538, + "grad_norm": 7.445909909009077, + "learning_rate": 9.880410631192444e-06, + "loss": 17.7228, + "step": 5344 + }, + { + "epoch": 0.0977023050066719, + "grad_norm": 6.5210768427174095, + "learning_rate": 9.88034626910315e-06, + "loss": 17.4109, + "step": 5345 + }, + { + "epoch": 0.09772058420311842, + "grad_norm": 7.888989937175442, + "learning_rate": 9.880281889908665e-06, + "loss": 18.2857, + "step": 5346 + }, + { + "epoch": 0.09773886339956496, + "grad_norm": 7.253556566254523, + "learning_rate": 9.880217493609216e-06, + "loss": 17.6233, + "step": 5347 + }, + { + "epoch": 0.09775714259601148, + "grad_norm": 7.493275082127145, + "learning_rate": 9.880153080205028e-06, + "loss": 17.9103, + "step": 5348 + }, + { + "epoch": 0.097775421792458, + "grad_norm": 6.687304133962577, + "learning_rate": 9.880088649696327e-06, + "loss": 17.7951, + "step": 5349 + }, + { + "epoch": 0.09779370098890453, + "grad_norm": 6.6487009943221835, + "learning_rate": 9.88002420208334e-06, + "loss": 17.6598, + "step": 5350 + }, + { + "epoch": 0.09781198018535105, + "grad_norm": 6.052180943355565, + "learning_rate": 9.879959737366289e-06, + "loss": 16.9574, + "step": 5351 + }, + { + "epoch": 0.09783025938179758, + "grad_norm": 7.663162660828646, + "learning_rate": 9.879895255545405e-06, + "loss": 18.334, + "step": 5352 + }, + { + "epoch": 0.0978485385782441, + "grad_norm": 7.163208212979588, + "learning_rate": 9.87983075662091e-06, + "loss": 17.9333, + "step": 5353 + }, + { + "epoch": 0.09786681777469063, + "grad_norm": 7.711414455564948, + "learning_rate": 9.879766240593033e-06, + "loss": 17.826, + "step": 5354 + }, + { + "epoch": 0.09788509697113715, + "grad_norm": 9.910252461266577, + "learning_rate": 9.879701707461998e-06, + "loss": 18.5209, + "step": 5355 + }, + { + "epoch": 0.09790337616758367, + "grad_norm": 7.544857749664136, + "learning_rate": 9.879637157228032e-06, + "loss": 17.5333, + "step": 5356 + }, + { + "epoch": 0.09792165536403019, + "grad_norm": 7.099394526580665, + "learning_rate": 9.87957258989136e-06, + "loss": 17.7289, + "step": 5357 + }, + { + "epoch": 0.09793993456047673, + "grad_norm": 8.002312473025757, + "learning_rate": 9.879508005452212e-06, + "loss": 18.2373, + "step": 5358 + }, + { + "epoch": 0.09795821375692325, + "grad_norm": 6.8712882139932185, + "learning_rate": 9.879443403910812e-06, + "loss": 17.4482, + "step": 5359 + }, + { + "epoch": 0.09797649295336977, + "grad_norm": 6.542254007103261, + "learning_rate": 9.879378785267383e-06, + "loss": 17.6043, + "step": 5360 + }, + { + "epoch": 0.09799477214981629, + "grad_norm": 8.079079767229963, + "learning_rate": 9.879314149522156e-06, + "loss": 18.41, + "step": 5361 + }, + { + "epoch": 0.09801305134626281, + "grad_norm": 7.230662786934817, + "learning_rate": 9.879249496675359e-06, + "loss": 17.8467, + "step": 5362 + }, + { + "epoch": 0.09803133054270934, + "grad_norm": 8.237757438238472, + "learning_rate": 9.879184826727213e-06, + "loss": 18.0755, + "step": 5363 + }, + { + "epoch": 0.09804960973915587, + "grad_norm": 6.64001757792681, + "learning_rate": 9.879120139677949e-06, + "loss": 17.7599, + "step": 5364 + }, + { + "epoch": 0.0980678889356024, + "grad_norm": 9.00030856218242, + "learning_rate": 9.87905543552779e-06, + "loss": 18.5465, + "step": 5365 + }, + { + "epoch": 0.09808616813204891, + "grad_norm": 5.9319514025512206, + "learning_rate": 9.878990714276968e-06, + "loss": 17.1628, + "step": 5366 + }, + { + "epoch": 0.09810444732849544, + "grad_norm": 6.982178255073946, + "learning_rate": 9.878925975925707e-06, + "loss": 17.8482, + "step": 5367 + }, + { + "epoch": 0.09812272652494196, + "grad_norm": 7.016553498668367, + "learning_rate": 9.878861220474232e-06, + "loss": 17.7667, + "step": 5368 + }, + { + "epoch": 0.0981410057213885, + "grad_norm": 7.201736280021446, + "learning_rate": 9.878796447922771e-06, + "loss": 17.5916, + "step": 5369 + }, + { + "epoch": 0.09815928491783502, + "grad_norm": 7.532481678533857, + "learning_rate": 9.878731658271554e-06, + "loss": 17.8754, + "step": 5370 + }, + { + "epoch": 0.09817756411428154, + "grad_norm": 6.311230681650899, + "learning_rate": 9.878666851520805e-06, + "loss": 17.4986, + "step": 5371 + }, + { + "epoch": 0.09819584331072806, + "grad_norm": 6.475750631197108, + "learning_rate": 9.878602027670751e-06, + "loss": 17.5613, + "step": 5372 + }, + { + "epoch": 0.09821412250717458, + "grad_norm": 7.224910645473922, + "learning_rate": 9.878537186721623e-06, + "loss": 17.752, + "step": 5373 + }, + { + "epoch": 0.0982324017036211, + "grad_norm": 8.045719608506186, + "learning_rate": 9.878472328673643e-06, + "loss": 18.1327, + "step": 5374 + }, + { + "epoch": 0.09825068090006764, + "grad_norm": 7.44718921926065, + "learning_rate": 9.878407453527042e-06, + "loss": 18.0817, + "step": 5375 + }, + { + "epoch": 0.09826896009651416, + "grad_norm": 8.031113172574356, + "learning_rate": 9.878342561282046e-06, + "loss": 17.434, + "step": 5376 + }, + { + "epoch": 0.09828723929296068, + "grad_norm": 7.582931894439497, + "learning_rate": 9.878277651938883e-06, + "loss": 17.8379, + "step": 5377 + }, + { + "epoch": 0.0983055184894072, + "grad_norm": 6.2691037363538324, + "learning_rate": 9.87821272549778e-06, + "loss": 17.4363, + "step": 5378 + }, + { + "epoch": 0.09832379768585373, + "grad_norm": 7.910581747723305, + "learning_rate": 9.878147781958965e-06, + "loss": 18.1192, + "step": 5379 + }, + { + "epoch": 0.09834207688230025, + "grad_norm": 7.379569009044979, + "learning_rate": 9.878082821322663e-06, + "loss": 18.0581, + "step": 5380 + }, + { + "epoch": 0.09836035607874678, + "grad_norm": 6.527518056624541, + "learning_rate": 9.878017843589107e-06, + "loss": 17.4445, + "step": 5381 + }, + { + "epoch": 0.0983786352751933, + "grad_norm": 6.78755253095713, + "learning_rate": 9.877952848758519e-06, + "loss": 17.6175, + "step": 5382 + }, + { + "epoch": 0.09839691447163983, + "grad_norm": 6.977920251600107, + "learning_rate": 9.877887836831132e-06, + "loss": 17.6141, + "step": 5383 + }, + { + "epoch": 0.09841519366808635, + "grad_norm": 8.290696312597492, + "learning_rate": 9.87782280780717e-06, + "loss": 18.2516, + "step": 5384 + }, + { + "epoch": 0.09843347286453287, + "grad_norm": 6.44443039994811, + "learning_rate": 9.877757761686864e-06, + "loss": 17.2411, + "step": 5385 + }, + { + "epoch": 0.0984517520609794, + "grad_norm": 8.039395493731158, + "learning_rate": 9.877692698470438e-06, + "loss": 18.1254, + "step": 5386 + }, + { + "epoch": 0.09847003125742593, + "grad_norm": 6.174780650552495, + "learning_rate": 9.877627618158123e-06, + "loss": 17.3012, + "step": 5387 + }, + { + "epoch": 0.09848831045387245, + "grad_norm": 8.564452027284869, + "learning_rate": 9.877562520750148e-06, + "loss": 18.3757, + "step": 5388 + }, + { + "epoch": 0.09850658965031897, + "grad_norm": 12.723700125736002, + "learning_rate": 9.877497406246739e-06, + "loss": 18.9342, + "step": 5389 + }, + { + "epoch": 0.09852486884676549, + "grad_norm": 6.570689834194891, + "learning_rate": 9.877432274648125e-06, + "loss": 17.6527, + "step": 5390 + }, + { + "epoch": 0.09854314804321201, + "grad_norm": 7.212075969847427, + "learning_rate": 9.877367125954532e-06, + "loss": 17.6892, + "step": 5391 + }, + { + "epoch": 0.09856142723965855, + "grad_norm": 7.162008135860617, + "learning_rate": 9.877301960166192e-06, + "loss": 17.8613, + "step": 5392 + }, + { + "epoch": 0.09857970643610507, + "grad_norm": 7.799232092978829, + "learning_rate": 9.877236777283332e-06, + "loss": 17.9345, + "step": 5393 + }, + { + "epoch": 0.0985979856325516, + "grad_norm": 6.31091864296637, + "learning_rate": 9.877171577306181e-06, + "loss": 17.4031, + "step": 5394 + }, + { + "epoch": 0.09861626482899812, + "grad_norm": 6.791065851383708, + "learning_rate": 9.877106360234964e-06, + "loss": 17.6314, + "step": 5395 + }, + { + "epoch": 0.09863454402544464, + "grad_norm": 7.314248203014236, + "learning_rate": 9.877041126069917e-06, + "loss": 17.7338, + "step": 5396 + }, + { + "epoch": 0.09865282322189116, + "grad_norm": 8.803413494157793, + "learning_rate": 9.876975874811261e-06, + "loss": 18.8308, + "step": 5397 + }, + { + "epoch": 0.0986711024183377, + "grad_norm": 11.129809467457516, + "learning_rate": 9.876910606459228e-06, + "loss": 18.3517, + "step": 5398 + }, + { + "epoch": 0.09868938161478422, + "grad_norm": 6.960028940931708, + "learning_rate": 9.876845321014047e-06, + "loss": 17.7416, + "step": 5399 + }, + { + "epoch": 0.09870766081123074, + "grad_norm": 7.766890205745459, + "learning_rate": 9.876780018475947e-06, + "loss": 18.0501, + "step": 5400 + }, + { + "epoch": 0.09872594000767726, + "grad_norm": 6.609650701226153, + "learning_rate": 9.876714698845153e-06, + "loss": 17.2986, + "step": 5401 + }, + { + "epoch": 0.09874421920412378, + "grad_norm": 7.285538815241017, + "learning_rate": 9.876649362121901e-06, + "loss": 17.8622, + "step": 5402 + }, + { + "epoch": 0.09876249840057032, + "grad_norm": 7.016219968300518, + "learning_rate": 9.876584008306414e-06, + "loss": 17.8093, + "step": 5403 + }, + { + "epoch": 0.09878077759701684, + "grad_norm": 7.575328071727429, + "learning_rate": 9.876518637398924e-06, + "loss": 17.7971, + "step": 5404 + }, + { + "epoch": 0.09879905679346336, + "grad_norm": 5.507656527965496, + "learning_rate": 9.87645324939966e-06, + "loss": 17.1415, + "step": 5405 + }, + { + "epoch": 0.09881733598990988, + "grad_norm": 7.515076034232811, + "learning_rate": 9.87638784430885e-06, + "loss": 17.5956, + "step": 5406 + }, + { + "epoch": 0.0988356151863564, + "grad_norm": 6.698772758642591, + "learning_rate": 9.876322422126722e-06, + "loss": 17.5607, + "step": 5407 + }, + { + "epoch": 0.09885389438280293, + "grad_norm": 7.084818200127224, + "learning_rate": 9.87625698285351e-06, + "loss": 17.8535, + "step": 5408 + }, + { + "epoch": 0.09887217357924946, + "grad_norm": 7.361523689917995, + "learning_rate": 9.876191526489438e-06, + "loss": 17.7445, + "step": 5409 + }, + { + "epoch": 0.09889045277569598, + "grad_norm": 7.5897634403978955, + "learning_rate": 9.87612605303474e-06, + "loss": 17.8592, + "step": 5410 + }, + { + "epoch": 0.0989087319721425, + "grad_norm": 8.596622229806462, + "learning_rate": 9.876060562489643e-06, + "loss": 18.291, + "step": 5411 + }, + { + "epoch": 0.09892701116858903, + "grad_norm": 7.167603474624208, + "learning_rate": 9.875995054854375e-06, + "loss": 17.9727, + "step": 5412 + }, + { + "epoch": 0.09894529036503555, + "grad_norm": 7.666953240832883, + "learning_rate": 9.875929530129167e-06, + "loss": 18.0282, + "step": 5413 + }, + { + "epoch": 0.09896356956148207, + "grad_norm": 7.604734768374753, + "learning_rate": 9.875863988314252e-06, + "loss": 18.2639, + "step": 5414 + }, + { + "epoch": 0.0989818487579286, + "grad_norm": 7.861239661676397, + "learning_rate": 9.875798429409855e-06, + "loss": 18.2204, + "step": 5415 + }, + { + "epoch": 0.09900012795437513, + "grad_norm": 7.2050207094450895, + "learning_rate": 9.875732853416208e-06, + "loss": 17.7534, + "step": 5416 + }, + { + "epoch": 0.09901840715082165, + "grad_norm": 8.512749916335364, + "learning_rate": 9.87566726033354e-06, + "loss": 17.7452, + "step": 5417 + }, + { + "epoch": 0.09903668634726817, + "grad_norm": 7.1990424107187305, + "learning_rate": 9.875601650162082e-06, + "loss": 17.7461, + "step": 5418 + }, + { + "epoch": 0.0990549655437147, + "grad_norm": 7.561257344374474, + "learning_rate": 9.875536022902064e-06, + "loss": 17.911, + "step": 5419 + }, + { + "epoch": 0.09907324474016123, + "grad_norm": 7.608397345256539, + "learning_rate": 9.875470378553714e-06, + "loss": 17.753, + "step": 5420 + }, + { + "epoch": 0.09909152393660775, + "grad_norm": 6.4574803895087705, + "learning_rate": 9.875404717117263e-06, + "loss": 17.4264, + "step": 5421 + }, + { + "epoch": 0.09910980313305427, + "grad_norm": 6.691709365344509, + "learning_rate": 9.875339038592944e-06, + "loss": 17.6796, + "step": 5422 + }, + { + "epoch": 0.0991280823295008, + "grad_norm": 7.47764722151518, + "learning_rate": 9.875273342980982e-06, + "loss": 18.1239, + "step": 5423 + }, + { + "epoch": 0.09914636152594732, + "grad_norm": 6.205851196218604, + "learning_rate": 9.875207630281611e-06, + "loss": 17.4564, + "step": 5424 + }, + { + "epoch": 0.09916464072239384, + "grad_norm": 7.055443921268711, + "learning_rate": 9.87514190049506e-06, + "loss": 17.8814, + "step": 5425 + }, + { + "epoch": 0.09918291991884037, + "grad_norm": 6.347899534679206, + "learning_rate": 9.87507615362156e-06, + "loss": 17.3062, + "step": 5426 + }, + { + "epoch": 0.0992011991152869, + "grad_norm": 6.219401244317228, + "learning_rate": 9.875010389661341e-06, + "loss": 17.2959, + "step": 5427 + }, + { + "epoch": 0.09921947831173342, + "grad_norm": 5.659634986824411, + "learning_rate": 9.874944608614634e-06, + "loss": 17.1571, + "step": 5428 + }, + { + "epoch": 0.09923775750817994, + "grad_norm": 9.489737878088745, + "learning_rate": 9.874878810481669e-06, + "loss": 18.5848, + "step": 5429 + }, + { + "epoch": 0.09925603670462646, + "grad_norm": 6.934344610746548, + "learning_rate": 9.874812995262676e-06, + "loss": 17.7542, + "step": 5430 + }, + { + "epoch": 0.09927431590107298, + "grad_norm": 7.698012600565545, + "learning_rate": 9.87474716295789e-06, + "loss": 18.2417, + "step": 5431 + }, + { + "epoch": 0.09929259509751952, + "grad_norm": 8.826267470753006, + "learning_rate": 9.874681313567533e-06, + "loss": 18.6452, + "step": 5432 + }, + { + "epoch": 0.09931087429396604, + "grad_norm": 8.153216940800014, + "learning_rate": 9.874615447091845e-06, + "loss": 17.8559, + "step": 5433 + }, + { + "epoch": 0.09932915349041256, + "grad_norm": 5.80915749006908, + "learning_rate": 9.874549563531051e-06, + "loss": 17.4492, + "step": 5434 + }, + { + "epoch": 0.09934743268685908, + "grad_norm": 7.003977421272474, + "learning_rate": 9.874483662885383e-06, + "loss": 17.6575, + "step": 5435 + }, + { + "epoch": 0.0993657118833056, + "grad_norm": 7.256371239184099, + "learning_rate": 9.874417745155075e-06, + "loss": 17.9347, + "step": 5436 + }, + { + "epoch": 0.09938399107975214, + "grad_norm": 6.660592230882827, + "learning_rate": 9.874351810340355e-06, + "loss": 17.7508, + "step": 5437 + }, + { + "epoch": 0.09940227027619866, + "grad_norm": 6.208497065931254, + "learning_rate": 9.874285858441455e-06, + "loss": 17.3796, + "step": 5438 + }, + { + "epoch": 0.09942054947264518, + "grad_norm": 7.348407501302437, + "learning_rate": 9.874219889458605e-06, + "loss": 17.5801, + "step": 5439 + }, + { + "epoch": 0.0994388286690917, + "grad_norm": 8.023627408244378, + "learning_rate": 9.874153903392037e-06, + "loss": 18.1776, + "step": 5440 + }, + { + "epoch": 0.09945710786553823, + "grad_norm": 7.112190887858089, + "learning_rate": 9.874087900241984e-06, + "loss": 17.5814, + "step": 5441 + }, + { + "epoch": 0.09947538706198475, + "grad_norm": 7.154703241715606, + "learning_rate": 9.874021880008675e-06, + "loss": 17.4067, + "step": 5442 + }, + { + "epoch": 0.09949366625843128, + "grad_norm": 6.544009814900835, + "learning_rate": 9.873955842692341e-06, + "loss": 17.3562, + "step": 5443 + }, + { + "epoch": 0.0995119454548778, + "grad_norm": 8.454864865370041, + "learning_rate": 9.873889788293217e-06, + "loss": 17.7853, + "step": 5444 + }, + { + "epoch": 0.09953022465132433, + "grad_norm": 7.003491466502003, + "learning_rate": 9.873823716811533e-06, + "loss": 17.6397, + "step": 5445 + }, + { + "epoch": 0.09954850384777085, + "grad_norm": 7.77339412187411, + "learning_rate": 9.873757628247516e-06, + "loss": 17.9778, + "step": 5446 + }, + { + "epoch": 0.09956678304421737, + "grad_norm": 6.698825218030373, + "learning_rate": 9.873691522601406e-06, + "loss": 17.5973, + "step": 5447 + }, + { + "epoch": 0.0995850622406639, + "grad_norm": 7.93327135814933, + "learning_rate": 9.873625399873426e-06, + "loss": 18.0847, + "step": 5448 + }, + { + "epoch": 0.09960334143711043, + "grad_norm": 7.123565048979408, + "learning_rate": 9.873559260063814e-06, + "loss": 17.9277, + "step": 5449 + }, + { + "epoch": 0.09962162063355695, + "grad_norm": 8.034482781471866, + "learning_rate": 9.8734931031728e-06, + "loss": 18.2641, + "step": 5450 + }, + { + "epoch": 0.09963989983000347, + "grad_norm": 7.698896824331821, + "learning_rate": 9.873426929200614e-06, + "loss": 18.0796, + "step": 5451 + }, + { + "epoch": 0.09965817902645, + "grad_norm": 6.570824200178621, + "learning_rate": 9.87336073814749e-06, + "loss": 17.3793, + "step": 5452 + }, + { + "epoch": 0.09967645822289652, + "grad_norm": 6.639729025706504, + "learning_rate": 9.873294530013661e-06, + "loss": 17.6847, + "step": 5453 + }, + { + "epoch": 0.09969473741934305, + "grad_norm": 7.220849341731857, + "learning_rate": 9.873228304799357e-06, + "loss": 17.5644, + "step": 5454 + }, + { + "epoch": 0.09971301661578957, + "grad_norm": 7.309164793387815, + "learning_rate": 9.87316206250481e-06, + "loss": 18.1547, + "step": 5455 + }, + { + "epoch": 0.0997312958122361, + "grad_norm": 7.2867175641362145, + "learning_rate": 9.873095803130252e-06, + "loss": 17.7135, + "step": 5456 + }, + { + "epoch": 0.09974957500868262, + "grad_norm": 8.09917890677358, + "learning_rate": 9.873029526675916e-06, + "loss": 17.8355, + "step": 5457 + }, + { + "epoch": 0.09976785420512914, + "grad_norm": 7.5774145356789955, + "learning_rate": 9.872963233142037e-06, + "loss": 17.8691, + "step": 5458 + }, + { + "epoch": 0.09978613340157566, + "grad_norm": 7.1023109715224, + "learning_rate": 9.872896922528842e-06, + "loss": 17.4237, + "step": 5459 + }, + { + "epoch": 0.0998044125980222, + "grad_norm": 7.081672996540094, + "learning_rate": 9.872830594836568e-06, + "loss": 17.5306, + "step": 5460 + }, + { + "epoch": 0.09982269179446872, + "grad_norm": 7.587101306162368, + "learning_rate": 9.872764250065445e-06, + "loss": 17.9003, + "step": 5461 + }, + { + "epoch": 0.09984097099091524, + "grad_norm": 7.449325638916134, + "learning_rate": 9.872697888215706e-06, + "loss": 17.6771, + "step": 5462 + }, + { + "epoch": 0.09985925018736176, + "grad_norm": 7.989129904760626, + "learning_rate": 9.872631509287585e-06, + "loss": 18.3158, + "step": 5463 + }, + { + "epoch": 0.09987752938380828, + "grad_norm": 7.944580874655752, + "learning_rate": 9.872565113281312e-06, + "loss": 17.7879, + "step": 5464 + }, + { + "epoch": 0.0998958085802548, + "grad_norm": 8.284103749673918, + "learning_rate": 9.872498700197121e-06, + "loss": 17.8936, + "step": 5465 + }, + { + "epoch": 0.09991408777670134, + "grad_norm": 8.227151047533471, + "learning_rate": 9.872432270035245e-06, + "loss": 17.9144, + "step": 5466 + }, + { + "epoch": 0.09993236697314786, + "grad_norm": 6.448364487132646, + "learning_rate": 9.872365822795917e-06, + "loss": 17.2946, + "step": 5467 + }, + { + "epoch": 0.09995064616959438, + "grad_norm": 8.653589408219817, + "learning_rate": 9.872299358479369e-06, + "loss": 18.6194, + "step": 5468 + }, + { + "epoch": 0.0999689253660409, + "grad_norm": 8.562719262758154, + "learning_rate": 9.872232877085835e-06, + "loss": 18.3316, + "step": 5469 + }, + { + "epoch": 0.09998720456248743, + "grad_norm": 8.422446447160278, + "learning_rate": 9.872166378615547e-06, + "loss": 18.5237, + "step": 5470 + }, + { + "epoch": 0.10000548375893396, + "grad_norm": 7.674541089888339, + "learning_rate": 9.87209986306874e-06, + "loss": 17.6449, + "step": 5471 + }, + { + "epoch": 0.10002376295538049, + "grad_norm": 6.9285136410043044, + "learning_rate": 9.872033330445645e-06, + "loss": 17.7269, + "step": 5472 + }, + { + "epoch": 0.10004204215182701, + "grad_norm": 7.052208472841874, + "learning_rate": 9.871966780746495e-06, + "loss": 17.7994, + "step": 5473 + }, + { + "epoch": 0.10006032134827353, + "grad_norm": 6.902007178782599, + "learning_rate": 9.871900213971527e-06, + "loss": 17.5682, + "step": 5474 + }, + { + "epoch": 0.10007860054472005, + "grad_norm": 6.6627383854906626, + "learning_rate": 9.871833630120968e-06, + "loss": 17.4002, + "step": 5475 + }, + { + "epoch": 0.10009687974116657, + "grad_norm": 7.244903023072964, + "learning_rate": 9.871767029195058e-06, + "loss": 17.9044, + "step": 5476 + }, + { + "epoch": 0.10011515893761311, + "grad_norm": 7.31228976482119, + "learning_rate": 9.871700411194025e-06, + "loss": 17.7802, + "step": 5477 + }, + { + "epoch": 0.10013343813405963, + "grad_norm": 7.614321893886776, + "learning_rate": 9.871633776118106e-06, + "loss": 17.8275, + "step": 5478 + }, + { + "epoch": 0.10015171733050615, + "grad_norm": 8.593718313171816, + "learning_rate": 9.871567123967533e-06, + "loss": 18.3694, + "step": 5479 + }, + { + "epoch": 0.10016999652695267, + "grad_norm": 7.081722185202732, + "learning_rate": 9.87150045474254e-06, + "loss": 17.7706, + "step": 5480 + }, + { + "epoch": 0.1001882757233992, + "grad_norm": 7.434183754851058, + "learning_rate": 9.87143376844336e-06, + "loss": 17.7374, + "step": 5481 + }, + { + "epoch": 0.10020655491984572, + "grad_norm": 8.14436549996752, + "learning_rate": 9.871367065070228e-06, + "loss": 18.0334, + "step": 5482 + }, + { + "epoch": 0.10022483411629225, + "grad_norm": 6.6415783121783205, + "learning_rate": 9.871300344623378e-06, + "loss": 17.6521, + "step": 5483 + }, + { + "epoch": 0.10024311331273877, + "grad_norm": 8.553573215241945, + "learning_rate": 9.871233607103042e-06, + "loss": 18.6418, + "step": 5484 + }, + { + "epoch": 0.1002613925091853, + "grad_norm": 7.890120053246864, + "learning_rate": 9.871166852509456e-06, + "loss": 18.1467, + "step": 5485 + }, + { + "epoch": 0.10027967170563182, + "grad_norm": 6.657974996457115, + "learning_rate": 9.87110008084285e-06, + "loss": 17.6226, + "step": 5486 + }, + { + "epoch": 0.10029795090207834, + "grad_norm": 8.216213128635172, + "learning_rate": 9.871033292103462e-06, + "loss": 18.5853, + "step": 5487 + }, + { + "epoch": 0.10031623009852488, + "grad_norm": 6.911988755346808, + "learning_rate": 9.870966486291527e-06, + "loss": 17.5637, + "step": 5488 + }, + { + "epoch": 0.1003345092949714, + "grad_norm": 8.964046214265261, + "learning_rate": 9.870899663407276e-06, + "loss": 18.7191, + "step": 5489 + }, + { + "epoch": 0.10035278849141792, + "grad_norm": 6.00516386692326, + "learning_rate": 9.870832823450945e-06, + "loss": 17.3014, + "step": 5490 + }, + { + "epoch": 0.10037106768786444, + "grad_norm": 7.277179726087112, + "learning_rate": 9.870765966422766e-06, + "loss": 17.6824, + "step": 5491 + }, + { + "epoch": 0.10038934688431096, + "grad_norm": 7.756126483330481, + "learning_rate": 9.870699092322977e-06, + "loss": 18.0805, + "step": 5492 + }, + { + "epoch": 0.10040762608075748, + "grad_norm": 8.112557487227987, + "learning_rate": 9.870632201151808e-06, + "loss": 17.9869, + "step": 5493 + }, + { + "epoch": 0.10042590527720402, + "grad_norm": 6.362690345339067, + "learning_rate": 9.870565292909498e-06, + "loss": 17.3655, + "step": 5494 + }, + { + "epoch": 0.10044418447365054, + "grad_norm": 6.883410614087181, + "learning_rate": 9.870498367596278e-06, + "loss": 17.4808, + "step": 5495 + }, + { + "epoch": 0.10046246367009706, + "grad_norm": 7.798277815013971, + "learning_rate": 9.870431425212384e-06, + "loss": 17.8913, + "step": 5496 + }, + { + "epoch": 0.10048074286654358, + "grad_norm": 7.343465589360618, + "learning_rate": 9.870364465758052e-06, + "loss": 17.8053, + "step": 5497 + }, + { + "epoch": 0.1004990220629901, + "grad_norm": 7.755293902232316, + "learning_rate": 9.870297489233512e-06, + "loss": 18.117, + "step": 5498 + }, + { + "epoch": 0.10051730125943663, + "grad_norm": 6.895189993995462, + "learning_rate": 9.870230495639004e-06, + "loss": 17.591, + "step": 5499 + }, + { + "epoch": 0.10053558045588316, + "grad_norm": 9.636986893842327, + "learning_rate": 9.870163484974761e-06, + "loss": 17.9947, + "step": 5500 + }, + { + "epoch": 0.10055385965232969, + "grad_norm": 7.017073909550062, + "learning_rate": 9.870096457241016e-06, + "loss": 17.8571, + "step": 5501 + }, + { + "epoch": 0.10057213884877621, + "grad_norm": 7.872760716791785, + "learning_rate": 9.870029412438007e-06, + "loss": 18.1904, + "step": 5502 + }, + { + "epoch": 0.10059041804522273, + "grad_norm": 6.872200858062733, + "learning_rate": 9.869962350565967e-06, + "loss": 17.6585, + "step": 5503 + }, + { + "epoch": 0.10060869724166925, + "grad_norm": 7.059145424434454, + "learning_rate": 9.869895271625131e-06, + "loss": 17.6791, + "step": 5504 + }, + { + "epoch": 0.10062697643811579, + "grad_norm": 6.207781969423849, + "learning_rate": 9.869828175615737e-06, + "loss": 17.2415, + "step": 5505 + }, + { + "epoch": 0.10064525563456231, + "grad_norm": 5.979674323002116, + "learning_rate": 9.869761062538016e-06, + "loss": 17.2384, + "step": 5506 + }, + { + "epoch": 0.10066353483100883, + "grad_norm": 6.240413722493483, + "learning_rate": 9.869693932392205e-06, + "loss": 17.3594, + "step": 5507 + }, + { + "epoch": 0.10068181402745535, + "grad_norm": 7.930051945652052, + "learning_rate": 9.86962678517854e-06, + "loss": 18.0924, + "step": 5508 + }, + { + "epoch": 0.10070009322390187, + "grad_norm": 8.069429040495264, + "learning_rate": 9.869559620897255e-06, + "loss": 18.4979, + "step": 5509 + }, + { + "epoch": 0.1007183724203484, + "grad_norm": 6.733855373581168, + "learning_rate": 9.869492439548587e-06, + "loss": 17.3737, + "step": 5510 + }, + { + "epoch": 0.10073665161679493, + "grad_norm": 7.640919666323922, + "learning_rate": 9.86942524113277e-06, + "loss": 17.77, + "step": 5511 + }, + { + "epoch": 0.10075493081324145, + "grad_norm": 6.810456747217254, + "learning_rate": 9.86935802565004e-06, + "loss": 17.5015, + "step": 5512 + }, + { + "epoch": 0.10077321000968797, + "grad_norm": 7.358783064217443, + "learning_rate": 9.869290793100631e-06, + "loss": 17.9874, + "step": 5513 + }, + { + "epoch": 0.1007914892061345, + "grad_norm": 7.266051700607326, + "learning_rate": 9.869223543484782e-06, + "loss": 17.5573, + "step": 5514 + }, + { + "epoch": 0.10080976840258102, + "grad_norm": 7.477396230815225, + "learning_rate": 9.869156276802729e-06, + "loss": 17.7798, + "step": 5515 + }, + { + "epoch": 0.10082804759902754, + "grad_norm": 8.008841306883555, + "learning_rate": 9.869088993054703e-06, + "loss": 18.2135, + "step": 5516 + }, + { + "epoch": 0.10084632679547408, + "grad_norm": 7.280361453713527, + "learning_rate": 9.869021692240943e-06, + "loss": 17.9519, + "step": 5517 + }, + { + "epoch": 0.1008646059919206, + "grad_norm": 6.833982330880781, + "learning_rate": 9.868954374361685e-06, + "loss": 17.7122, + "step": 5518 + }, + { + "epoch": 0.10088288518836712, + "grad_norm": 8.068587522092342, + "learning_rate": 9.868887039417163e-06, + "loss": 18.1906, + "step": 5519 + }, + { + "epoch": 0.10090116438481364, + "grad_norm": 7.51306169463813, + "learning_rate": 9.868819687407616e-06, + "loss": 17.8529, + "step": 5520 + }, + { + "epoch": 0.10091944358126016, + "grad_norm": 6.7116284526960435, + "learning_rate": 9.868752318333279e-06, + "loss": 17.6203, + "step": 5521 + }, + { + "epoch": 0.1009377227777067, + "grad_norm": 7.722773093298634, + "learning_rate": 9.868684932194387e-06, + "loss": 18.0917, + "step": 5522 + }, + { + "epoch": 0.10095600197415322, + "grad_norm": 6.92277413530129, + "learning_rate": 9.868617528991177e-06, + "loss": 17.6773, + "step": 5523 + }, + { + "epoch": 0.10097428117059974, + "grad_norm": 7.725717045192423, + "learning_rate": 9.868550108723884e-06, + "loss": 18.1749, + "step": 5524 + }, + { + "epoch": 0.10099256036704626, + "grad_norm": 7.57412875694219, + "learning_rate": 9.868482671392747e-06, + "loss": 17.7129, + "step": 5525 + }, + { + "epoch": 0.10101083956349279, + "grad_norm": 8.740221519151344, + "learning_rate": 9.868415216998e-06, + "loss": 18.6339, + "step": 5526 + }, + { + "epoch": 0.10102911875993931, + "grad_norm": 7.5562932901323165, + "learning_rate": 9.86834774553988e-06, + "loss": 18.1447, + "step": 5527 + }, + { + "epoch": 0.10104739795638584, + "grad_norm": 6.976972762478665, + "learning_rate": 9.868280257018623e-06, + "loss": 17.7476, + "step": 5528 + }, + { + "epoch": 0.10106567715283236, + "grad_norm": 6.569964094907622, + "learning_rate": 9.868212751434467e-06, + "loss": 17.5981, + "step": 5529 + }, + { + "epoch": 0.10108395634927889, + "grad_norm": 7.021708305851113, + "learning_rate": 9.868145228787647e-06, + "loss": 17.5095, + "step": 5530 + }, + { + "epoch": 0.10110223554572541, + "grad_norm": 5.597428921716891, + "learning_rate": 9.8680776890784e-06, + "loss": 17.2427, + "step": 5531 + }, + { + "epoch": 0.10112051474217193, + "grad_norm": 6.9568685905472805, + "learning_rate": 9.868010132306965e-06, + "loss": 17.5931, + "step": 5532 + }, + { + "epoch": 0.10113879393861845, + "grad_norm": 8.334661748358617, + "learning_rate": 9.867942558473575e-06, + "loss": 18.3263, + "step": 5533 + }, + { + "epoch": 0.10115707313506499, + "grad_norm": 8.3366835042184, + "learning_rate": 9.86787496757847e-06, + "loss": 18.0709, + "step": 5534 + }, + { + "epoch": 0.10117535233151151, + "grad_norm": 7.667699375805215, + "learning_rate": 9.867807359621885e-06, + "loss": 17.7879, + "step": 5535 + }, + { + "epoch": 0.10119363152795803, + "grad_norm": 8.824124664629514, + "learning_rate": 9.867739734604059e-06, + "loss": 18.2866, + "step": 5536 + }, + { + "epoch": 0.10121191072440455, + "grad_norm": 7.017944125814402, + "learning_rate": 9.867672092525224e-06, + "loss": 17.7497, + "step": 5537 + }, + { + "epoch": 0.10123018992085107, + "grad_norm": 8.18842017569041, + "learning_rate": 9.867604433385625e-06, + "loss": 18.0759, + "step": 5538 + }, + { + "epoch": 0.10124846911729761, + "grad_norm": 7.997107795168364, + "learning_rate": 9.867536757185491e-06, + "loss": 18.5344, + "step": 5539 + }, + { + "epoch": 0.10126674831374413, + "grad_norm": 7.065617845960572, + "learning_rate": 9.867469063925065e-06, + "loss": 17.7069, + "step": 5540 + }, + { + "epoch": 0.10128502751019065, + "grad_norm": 8.911140431793497, + "learning_rate": 9.867401353604582e-06, + "loss": 18.6952, + "step": 5541 + }, + { + "epoch": 0.10130330670663718, + "grad_norm": 7.068744400916903, + "learning_rate": 9.867333626224282e-06, + "loss": 17.6487, + "step": 5542 + }, + { + "epoch": 0.1013215859030837, + "grad_norm": 6.838812273696042, + "learning_rate": 9.867265881784399e-06, + "loss": 17.5511, + "step": 5543 + }, + { + "epoch": 0.10133986509953022, + "grad_norm": 7.812591132063666, + "learning_rate": 9.867198120285169e-06, + "loss": 18.1763, + "step": 5544 + }, + { + "epoch": 0.10135814429597675, + "grad_norm": 6.59172368394794, + "learning_rate": 9.867130341726835e-06, + "loss": 17.3611, + "step": 5545 + }, + { + "epoch": 0.10137642349242328, + "grad_norm": 5.833388458503554, + "learning_rate": 9.867062546109627e-06, + "loss": 17.216, + "step": 5546 + }, + { + "epoch": 0.1013947026888698, + "grad_norm": 7.554618888036021, + "learning_rate": 9.866994733433792e-06, + "loss": 17.9252, + "step": 5547 + }, + { + "epoch": 0.10141298188531632, + "grad_norm": 6.992305674904265, + "learning_rate": 9.866926903699561e-06, + "loss": 17.5893, + "step": 5548 + }, + { + "epoch": 0.10143126108176284, + "grad_norm": 7.343410832162275, + "learning_rate": 9.866859056907171e-06, + "loss": 17.8714, + "step": 5549 + }, + { + "epoch": 0.10144954027820936, + "grad_norm": 6.688629495531107, + "learning_rate": 9.866791193056866e-06, + "loss": 17.5182, + "step": 5550 + }, + { + "epoch": 0.1014678194746559, + "grad_norm": 8.989511728832127, + "learning_rate": 9.86672331214888e-06, + "loss": 18.3834, + "step": 5551 + }, + { + "epoch": 0.10148609867110242, + "grad_norm": 7.0848265652247235, + "learning_rate": 9.86665541418345e-06, + "loss": 17.724, + "step": 5552 + }, + { + "epoch": 0.10150437786754894, + "grad_norm": 6.465072724557792, + "learning_rate": 9.866587499160813e-06, + "loss": 17.5554, + "step": 5553 + }, + { + "epoch": 0.10152265706399546, + "grad_norm": 8.03355687173735, + "learning_rate": 9.866519567081213e-06, + "loss": 18.4101, + "step": 5554 + }, + { + "epoch": 0.10154093626044199, + "grad_norm": 7.843843302158278, + "learning_rate": 9.866451617944881e-06, + "loss": 17.962, + "step": 5555 + }, + { + "epoch": 0.10155921545688852, + "grad_norm": 8.061815569234346, + "learning_rate": 9.86638365175206e-06, + "loss": 18.1134, + "step": 5556 + }, + { + "epoch": 0.10157749465333504, + "grad_norm": 6.913228063862076, + "learning_rate": 9.866315668502986e-06, + "loss": 17.8261, + "step": 5557 + }, + { + "epoch": 0.10159577384978156, + "grad_norm": 7.602059052770785, + "learning_rate": 9.8662476681979e-06, + "loss": 17.9167, + "step": 5558 + }, + { + "epoch": 0.10161405304622809, + "grad_norm": 7.284534676397395, + "learning_rate": 9.866179650837035e-06, + "loss": 17.9744, + "step": 5559 + }, + { + "epoch": 0.10163233224267461, + "grad_norm": 8.078679404599374, + "learning_rate": 9.866111616420635e-06, + "loss": 17.7994, + "step": 5560 + }, + { + "epoch": 0.10165061143912113, + "grad_norm": 7.880967909463967, + "learning_rate": 9.866043564948935e-06, + "loss": 17.8808, + "step": 5561 + }, + { + "epoch": 0.10166889063556767, + "grad_norm": 7.9904514906741415, + "learning_rate": 9.865975496422175e-06, + "loss": 18.4223, + "step": 5562 + }, + { + "epoch": 0.10168716983201419, + "grad_norm": 7.984160213272381, + "learning_rate": 9.865907410840592e-06, + "loss": 18.4557, + "step": 5563 + }, + { + "epoch": 0.10170544902846071, + "grad_norm": 7.195063687482623, + "learning_rate": 9.865839308204425e-06, + "loss": 17.646, + "step": 5564 + }, + { + "epoch": 0.10172372822490723, + "grad_norm": 8.913339901124274, + "learning_rate": 9.865771188513917e-06, + "loss": 18.6782, + "step": 5565 + }, + { + "epoch": 0.10174200742135375, + "grad_norm": 6.952717261414831, + "learning_rate": 9.8657030517693e-06, + "loss": 17.8719, + "step": 5566 + }, + { + "epoch": 0.10176028661780027, + "grad_norm": 7.409917137842797, + "learning_rate": 9.865634897970817e-06, + "loss": 18.1012, + "step": 5567 + }, + { + "epoch": 0.10177856581424681, + "grad_norm": 6.704729071553811, + "learning_rate": 9.865566727118708e-06, + "loss": 17.6171, + "step": 5568 + }, + { + "epoch": 0.10179684501069333, + "grad_norm": 6.115780679568604, + "learning_rate": 9.865498539213207e-06, + "loss": 17.1877, + "step": 5569 + }, + { + "epoch": 0.10181512420713985, + "grad_norm": 7.119916928732996, + "learning_rate": 9.865430334254557e-06, + "loss": 17.8221, + "step": 5570 + }, + { + "epoch": 0.10183340340358638, + "grad_norm": 6.134259618783842, + "learning_rate": 9.865362112242995e-06, + "loss": 17.0704, + "step": 5571 + }, + { + "epoch": 0.1018516826000329, + "grad_norm": 7.978678414155514, + "learning_rate": 9.865293873178762e-06, + "loss": 18.0233, + "step": 5572 + }, + { + "epoch": 0.10186996179647943, + "grad_norm": 7.215070387118926, + "learning_rate": 9.865225617062096e-06, + "loss": 17.801, + "step": 5573 + }, + { + "epoch": 0.10188824099292595, + "grad_norm": 7.31919282192941, + "learning_rate": 9.865157343893238e-06, + "loss": 17.9607, + "step": 5574 + }, + { + "epoch": 0.10190652018937248, + "grad_norm": 7.913734248080607, + "learning_rate": 9.865089053672422e-06, + "loss": 18.0854, + "step": 5575 + }, + { + "epoch": 0.101924799385819, + "grad_norm": 6.924365059570353, + "learning_rate": 9.865020746399894e-06, + "loss": 17.7547, + "step": 5576 + }, + { + "epoch": 0.10194307858226552, + "grad_norm": 8.725882960381123, + "learning_rate": 9.864952422075889e-06, + "loss": 18.7398, + "step": 5577 + }, + { + "epoch": 0.10196135777871204, + "grad_norm": 7.353269797435892, + "learning_rate": 9.864884080700648e-06, + "loss": 18.0692, + "step": 5578 + }, + { + "epoch": 0.10197963697515858, + "grad_norm": 6.545132518592989, + "learning_rate": 9.86481572227441e-06, + "loss": 17.5665, + "step": 5579 + }, + { + "epoch": 0.1019979161716051, + "grad_norm": 7.193654720088067, + "learning_rate": 9.864747346797416e-06, + "loss": 17.8518, + "step": 5580 + }, + { + "epoch": 0.10201619536805162, + "grad_norm": 6.715525651364767, + "learning_rate": 9.864678954269904e-06, + "loss": 17.4871, + "step": 5581 + }, + { + "epoch": 0.10203447456449814, + "grad_norm": 6.364410358375826, + "learning_rate": 9.864610544692115e-06, + "loss": 17.2479, + "step": 5582 + }, + { + "epoch": 0.10205275376094466, + "grad_norm": 7.787402952253585, + "learning_rate": 9.864542118064289e-06, + "loss": 18.2042, + "step": 5583 + }, + { + "epoch": 0.10207103295739119, + "grad_norm": 8.656929520314254, + "learning_rate": 9.864473674386663e-06, + "loss": 18.2817, + "step": 5584 + }, + { + "epoch": 0.10208931215383772, + "grad_norm": 7.1349927267277815, + "learning_rate": 9.86440521365948e-06, + "loss": 17.9565, + "step": 5585 + }, + { + "epoch": 0.10210759135028424, + "grad_norm": 8.451405774295017, + "learning_rate": 9.86433673588298e-06, + "loss": 18.2044, + "step": 5586 + }, + { + "epoch": 0.10212587054673077, + "grad_norm": 6.227900189800435, + "learning_rate": 9.8642682410574e-06, + "loss": 17.0415, + "step": 5587 + }, + { + "epoch": 0.10214414974317729, + "grad_norm": 7.342311981082835, + "learning_rate": 9.864199729182983e-06, + "loss": 17.8797, + "step": 5588 + }, + { + "epoch": 0.10216242893962381, + "grad_norm": 7.0039651354517805, + "learning_rate": 9.864131200259967e-06, + "loss": 17.7251, + "step": 5589 + }, + { + "epoch": 0.10218070813607034, + "grad_norm": 6.62975218233901, + "learning_rate": 9.864062654288595e-06, + "loss": 17.4632, + "step": 5590 + }, + { + "epoch": 0.10219898733251687, + "grad_norm": 7.889146122312645, + "learning_rate": 9.863994091269104e-06, + "loss": 18.0917, + "step": 5591 + }, + { + "epoch": 0.10221726652896339, + "grad_norm": 7.2736825293141285, + "learning_rate": 9.863925511201737e-06, + "loss": 17.9487, + "step": 5592 + }, + { + "epoch": 0.10223554572540991, + "grad_norm": 6.440638029377337, + "learning_rate": 9.863856914086732e-06, + "loss": 17.2682, + "step": 5593 + }, + { + "epoch": 0.10225382492185643, + "grad_norm": 7.658628036651079, + "learning_rate": 9.86378829992433e-06, + "loss": 17.8727, + "step": 5594 + }, + { + "epoch": 0.10227210411830295, + "grad_norm": 6.086356623607372, + "learning_rate": 9.863719668714774e-06, + "loss": 17.2037, + "step": 5595 + }, + { + "epoch": 0.10229038331474949, + "grad_norm": 6.786964768966258, + "learning_rate": 9.8636510204583e-06, + "loss": 17.5606, + "step": 5596 + }, + { + "epoch": 0.10230866251119601, + "grad_norm": 7.003310323853189, + "learning_rate": 9.863582355155154e-06, + "loss": 17.6939, + "step": 5597 + }, + { + "epoch": 0.10232694170764253, + "grad_norm": 7.884423014166429, + "learning_rate": 9.863513672805572e-06, + "loss": 18.2019, + "step": 5598 + }, + { + "epoch": 0.10234522090408905, + "grad_norm": 7.115546747698863, + "learning_rate": 9.863444973409797e-06, + "loss": 17.6195, + "step": 5599 + }, + { + "epoch": 0.10236350010053558, + "grad_norm": 7.908531301386409, + "learning_rate": 9.86337625696807e-06, + "loss": 17.6112, + "step": 5600 + }, + { + "epoch": 0.1023817792969821, + "grad_norm": 9.359206717718962, + "learning_rate": 9.86330752348063e-06, + "loss": 18.813, + "step": 5601 + }, + { + "epoch": 0.10240005849342863, + "grad_norm": 7.512060769664707, + "learning_rate": 9.86323877294772e-06, + "loss": 18.0456, + "step": 5602 + }, + { + "epoch": 0.10241833768987516, + "grad_norm": 7.484972278754374, + "learning_rate": 9.863170005369581e-06, + "loss": 17.7385, + "step": 5603 + }, + { + "epoch": 0.10243661688632168, + "grad_norm": 7.909413744077415, + "learning_rate": 9.863101220746452e-06, + "loss": 18.1526, + "step": 5604 + }, + { + "epoch": 0.1024548960827682, + "grad_norm": 6.501674884686735, + "learning_rate": 9.863032419078576e-06, + "loss": 17.4334, + "step": 5605 + }, + { + "epoch": 0.10247317527921472, + "grad_norm": 7.725305001947346, + "learning_rate": 9.862963600366193e-06, + "loss": 17.8977, + "step": 5606 + }, + { + "epoch": 0.10249145447566126, + "grad_norm": 7.262891534764362, + "learning_rate": 9.862894764609545e-06, + "loss": 17.6885, + "step": 5607 + }, + { + "epoch": 0.10250973367210778, + "grad_norm": 5.806943049019033, + "learning_rate": 9.862825911808872e-06, + "loss": 17.182, + "step": 5608 + }, + { + "epoch": 0.1025280128685543, + "grad_norm": 6.841754023206308, + "learning_rate": 9.862757041964417e-06, + "loss": 17.8432, + "step": 5609 + }, + { + "epoch": 0.10254629206500082, + "grad_norm": 8.892583362257891, + "learning_rate": 9.862688155076418e-06, + "loss": 18.766, + "step": 5610 + }, + { + "epoch": 0.10256457126144734, + "grad_norm": 6.444854193252805, + "learning_rate": 9.862619251145123e-06, + "loss": 17.5268, + "step": 5611 + }, + { + "epoch": 0.10258285045789386, + "grad_norm": 7.675333679960771, + "learning_rate": 9.862550330170767e-06, + "loss": 17.7674, + "step": 5612 + }, + { + "epoch": 0.1026011296543404, + "grad_norm": 6.055736683653771, + "learning_rate": 9.862481392153595e-06, + "loss": 17.2301, + "step": 5613 + }, + { + "epoch": 0.10261940885078692, + "grad_norm": 7.384604145089058, + "learning_rate": 9.862412437093846e-06, + "loss": 17.5344, + "step": 5614 + }, + { + "epoch": 0.10263768804723344, + "grad_norm": 7.949191918095978, + "learning_rate": 9.862343464991765e-06, + "loss": 17.8813, + "step": 5615 + }, + { + "epoch": 0.10265596724367997, + "grad_norm": 8.488756267526043, + "learning_rate": 9.862274475847591e-06, + "loss": 18.6542, + "step": 5616 + }, + { + "epoch": 0.10267424644012649, + "grad_norm": 8.487859338016493, + "learning_rate": 9.862205469661567e-06, + "loss": 18.5466, + "step": 5617 + }, + { + "epoch": 0.10269252563657301, + "grad_norm": 7.933801626997772, + "learning_rate": 9.862136446433936e-06, + "loss": 18.0013, + "step": 5618 + }, + { + "epoch": 0.10271080483301955, + "grad_norm": 7.866191316386422, + "learning_rate": 9.862067406164939e-06, + "loss": 17.9831, + "step": 5619 + }, + { + "epoch": 0.10272908402946607, + "grad_norm": 6.736840911650169, + "learning_rate": 9.861998348854815e-06, + "loss": 17.8773, + "step": 5620 + }, + { + "epoch": 0.10274736322591259, + "grad_norm": 8.099231571762177, + "learning_rate": 9.861929274503812e-06, + "loss": 18.3194, + "step": 5621 + }, + { + "epoch": 0.10276564242235911, + "grad_norm": 8.313637183638072, + "learning_rate": 9.861860183112167e-06, + "loss": 18.4127, + "step": 5622 + }, + { + "epoch": 0.10278392161880563, + "grad_norm": 8.375544465998125, + "learning_rate": 9.861791074680123e-06, + "loss": 18.2892, + "step": 5623 + }, + { + "epoch": 0.10280220081525217, + "grad_norm": 7.308802441418126, + "learning_rate": 9.861721949207924e-06, + "loss": 17.8897, + "step": 5624 + }, + { + "epoch": 0.10282048001169869, + "grad_norm": 7.4435089226258935, + "learning_rate": 9.861652806695811e-06, + "loss": 18.1091, + "step": 5625 + }, + { + "epoch": 0.10283875920814521, + "grad_norm": 7.672842510905069, + "learning_rate": 9.861583647144028e-06, + "loss": 18.1591, + "step": 5626 + }, + { + "epoch": 0.10285703840459173, + "grad_norm": 6.804388092466486, + "learning_rate": 9.861514470552817e-06, + "loss": 17.5523, + "step": 5627 + }, + { + "epoch": 0.10287531760103825, + "grad_norm": 7.947931883356575, + "learning_rate": 9.861445276922416e-06, + "loss": 17.6266, + "step": 5628 + }, + { + "epoch": 0.10289359679748478, + "grad_norm": 6.9495225887857135, + "learning_rate": 9.861376066253075e-06, + "loss": 17.6151, + "step": 5629 + }, + { + "epoch": 0.10291187599393131, + "grad_norm": 7.0574007178605696, + "learning_rate": 9.861306838545031e-06, + "loss": 17.5255, + "step": 5630 + }, + { + "epoch": 0.10293015519037783, + "grad_norm": 7.42909000481963, + "learning_rate": 9.86123759379853e-06, + "loss": 18.3477, + "step": 5631 + }, + { + "epoch": 0.10294843438682436, + "grad_norm": 7.400343424466621, + "learning_rate": 9.86116833201381e-06, + "loss": 17.5692, + "step": 5632 + }, + { + "epoch": 0.10296671358327088, + "grad_norm": 6.656659719531691, + "learning_rate": 9.86109905319112e-06, + "loss": 17.5363, + "step": 5633 + }, + { + "epoch": 0.1029849927797174, + "grad_norm": 8.869011790536229, + "learning_rate": 9.861029757330696e-06, + "loss": 18.233, + "step": 5634 + }, + { + "epoch": 0.10300327197616392, + "grad_norm": 10.78138637955777, + "learning_rate": 9.860960444432788e-06, + "loss": 19.547, + "step": 5635 + }, + { + "epoch": 0.10302155117261046, + "grad_norm": 8.69105743680655, + "learning_rate": 9.860891114497632e-06, + "loss": 18.1938, + "step": 5636 + }, + { + "epoch": 0.10303983036905698, + "grad_norm": 7.406666591015115, + "learning_rate": 9.860821767525478e-06, + "loss": 17.8067, + "step": 5637 + }, + { + "epoch": 0.1030581095655035, + "grad_norm": 7.236512474195321, + "learning_rate": 9.860752403516565e-06, + "loss": 17.8224, + "step": 5638 + }, + { + "epoch": 0.10307638876195002, + "grad_norm": 6.740368023501267, + "learning_rate": 9.860683022471134e-06, + "loss": 17.7572, + "step": 5639 + }, + { + "epoch": 0.10309466795839654, + "grad_norm": 6.801684355389477, + "learning_rate": 9.860613624389433e-06, + "loss": 17.1892, + "step": 5640 + }, + { + "epoch": 0.10311294715484308, + "grad_norm": 6.728856808681663, + "learning_rate": 9.860544209271702e-06, + "loss": 17.2071, + "step": 5641 + }, + { + "epoch": 0.1031312263512896, + "grad_norm": 6.5262368131426785, + "learning_rate": 9.860474777118186e-06, + "loss": 17.3267, + "step": 5642 + }, + { + "epoch": 0.10314950554773612, + "grad_norm": 6.389057697504511, + "learning_rate": 9.860405327929127e-06, + "loss": 17.3653, + "step": 5643 + }, + { + "epoch": 0.10316778474418264, + "grad_norm": 8.191552019361835, + "learning_rate": 9.860335861704771e-06, + "loss": 18.0956, + "step": 5644 + }, + { + "epoch": 0.10318606394062917, + "grad_norm": 6.7991328360954, + "learning_rate": 9.860266378445357e-06, + "loss": 17.534, + "step": 5645 + }, + { + "epoch": 0.10320434313707569, + "grad_norm": 7.887544108450004, + "learning_rate": 9.860196878151132e-06, + "loss": 17.6669, + "step": 5646 + }, + { + "epoch": 0.10322262233352222, + "grad_norm": 7.62854995909802, + "learning_rate": 9.86012736082234e-06, + "loss": 18.1849, + "step": 5647 + }, + { + "epoch": 0.10324090152996875, + "grad_norm": 8.87544728712017, + "learning_rate": 9.860057826459221e-06, + "loss": 18.2109, + "step": 5648 + }, + { + "epoch": 0.10325918072641527, + "grad_norm": 7.432743426683649, + "learning_rate": 9.859988275062023e-06, + "loss": 18.0735, + "step": 5649 + }, + { + "epoch": 0.10327745992286179, + "grad_norm": 7.19049960693097, + "learning_rate": 9.859918706630988e-06, + "loss": 17.8247, + "step": 5650 + }, + { + "epoch": 0.10329573911930831, + "grad_norm": 8.581469903701562, + "learning_rate": 9.859849121166358e-06, + "loss": 18.3868, + "step": 5651 + }, + { + "epoch": 0.10331401831575483, + "grad_norm": 7.167860600574844, + "learning_rate": 9.85977951866838e-06, + "loss": 17.7402, + "step": 5652 + }, + { + "epoch": 0.10333229751220137, + "grad_norm": 7.317461419644228, + "learning_rate": 9.859709899137296e-06, + "loss": 17.716, + "step": 5653 + }, + { + "epoch": 0.10335057670864789, + "grad_norm": 8.126845056444337, + "learning_rate": 9.85964026257335e-06, + "loss": 17.937, + "step": 5654 + }, + { + "epoch": 0.10336885590509441, + "grad_norm": 6.775535340147825, + "learning_rate": 9.859570608976788e-06, + "loss": 17.7823, + "step": 5655 + }, + { + "epoch": 0.10338713510154093, + "grad_norm": 8.02478382232567, + "learning_rate": 9.85950093834785e-06, + "loss": 18.0325, + "step": 5656 + }, + { + "epoch": 0.10340541429798746, + "grad_norm": 7.84770673825979, + "learning_rate": 9.859431250686786e-06, + "loss": 18.0718, + "step": 5657 + }, + { + "epoch": 0.10342369349443399, + "grad_norm": 7.928305293440953, + "learning_rate": 9.859361545993835e-06, + "loss": 18.3205, + "step": 5658 + }, + { + "epoch": 0.10344197269088051, + "grad_norm": 6.308012622949614, + "learning_rate": 9.859291824269244e-06, + "loss": 17.5742, + "step": 5659 + }, + { + "epoch": 0.10346025188732703, + "grad_norm": 6.553638426485782, + "learning_rate": 9.859222085513257e-06, + "loss": 17.3746, + "step": 5660 + }, + { + "epoch": 0.10347853108377356, + "grad_norm": 7.8237799859017985, + "learning_rate": 9.859152329726119e-06, + "loss": 18.0363, + "step": 5661 + }, + { + "epoch": 0.10349681028022008, + "grad_norm": 8.70988408675445, + "learning_rate": 9.859082556908074e-06, + "loss": 18.5131, + "step": 5662 + }, + { + "epoch": 0.1035150894766666, + "grad_norm": 5.983760418825584, + "learning_rate": 9.859012767059364e-06, + "loss": 17.1997, + "step": 5663 + }, + { + "epoch": 0.10353336867311314, + "grad_norm": 6.314545435734154, + "learning_rate": 9.858942960180236e-06, + "loss": 17.3162, + "step": 5664 + }, + { + "epoch": 0.10355164786955966, + "grad_norm": 7.783637790785526, + "learning_rate": 9.858873136270936e-06, + "loss": 18.0471, + "step": 5665 + }, + { + "epoch": 0.10356992706600618, + "grad_norm": 8.911846521787316, + "learning_rate": 9.858803295331708e-06, + "loss": 18.5961, + "step": 5666 + }, + { + "epoch": 0.1035882062624527, + "grad_norm": 7.104290413771939, + "learning_rate": 9.858733437362794e-06, + "loss": 17.9537, + "step": 5667 + }, + { + "epoch": 0.10360648545889922, + "grad_norm": 7.441246196882701, + "learning_rate": 9.858663562364442e-06, + "loss": 17.6768, + "step": 5668 + }, + { + "epoch": 0.10362476465534574, + "grad_norm": 6.230320373537345, + "learning_rate": 9.858593670336896e-06, + "loss": 17.5156, + "step": 5669 + }, + { + "epoch": 0.10364304385179228, + "grad_norm": 7.054016654154026, + "learning_rate": 9.8585237612804e-06, + "loss": 18.0473, + "step": 5670 + }, + { + "epoch": 0.1036613230482388, + "grad_norm": 6.80265935611233, + "learning_rate": 9.858453835195201e-06, + "loss": 17.611, + "step": 5671 + }, + { + "epoch": 0.10367960224468532, + "grad_norm": 8.505054168058228, + "learning_rate": 9.858383892081543e-06, + "loss": 18.0694, + "step": 5672 + }, + { + "epoch": 0.10369788144113185, + "grad_norm": 7.974423983390342, + "learning_rate": 9.858313931939671e-06, + "loss": 17.7729, + "step": 5673 + }, + { + "epoch": 0.10371616063757837, + "grad_norm": 7.314855359130303, + "learning_rate": 9.858243954769828e-06, + "loss": 17.6772, + "step": 5674 + }, + { + "epoch": 0.1037344398340249, + "grad_norm": 6.987436129085345, + "learning_rate": 9.858173960572263e-06, + "loss": 17.7026, + "step": 5675 + }, + { + "epoch": 0.10375271903047142, + "grad_norm": 6.998286275181286, + "learning_rate": 9.85810394934722e-06, + "loss": 17.6099, + "step": 5676 + }, + { + "epoch": 0.10377099822691795, + "grad_norm": 7.006892086593356, + "learning_rate": 9.858033921094945e-06, + "loss": 17.7675, + "step": 5677 + }, + { + "epoch": 0.10378927742336447, + "grad_norm": 6.879778033254961, + "learning_rate": 9.85796387581568e-06, + "loss": 17.5298, + "step": 5678 + }, + { + "epoch": 0.10380755661981099, + "grad_norm": 6.76985971738366, + "learning_rate": 9.857893813509679e-06, + "loss": 17.6621, + "step": 5679 + }, + { + "epoch": 0.10382583581625751, + "grad_norm": 6.72691845560067, + "learning_rate": 9.857823734177176e-06, + "loss": 17.5424, + "step": 5680 + }, + { + "epoch": 0.10384411501270405, + "grad_norm": 8.53200920093947, + "learning_rate": 9.857753637818424e-06, + "loss": 18.1764, + "step": 5681 + }, + { + "epoch": 0.10386239420915057, + "grad_norm": 7.741867404671852, + "learning_rate": 9.857683524433667e-06, + "loss": 17.9676, + "step": 5682 + }, + { + "epoch": 0.10388067340559709, + "grad_norm": 7.869945756666038, + "learning_rate": 9.857613394023151e-06, + "loss": 18.0102, + "step": 5683 + }, + { + "epoch": 0.10389895260204361, + "grad_norm": 8.34587849748405, + "learning_rate": 9.857543246587123e-06, + "loss": 18.0476, + "step": 5684 + }, + { + "epoch": 0.10391723179849013, + "grad_norm": 7.460410896072189, + "learning_rate": 9.857473082125826e-06, + "loss": 17.6671, + "step": 5685 + }, + { + "epoch": 0.10393551099493666, + "grad_norm": 5.469985484096757, + "learning_rate": 9.857402900639506e-06, + "loss": 17.071, + "step": 5686 + }, + { + "epoch": 0.10395379019138319, + "grad_norm": 7.54252787671895, + "learning_rate": 9.857332702128413e-06, + "loss": 18.142, + "step": 5687 + }, + { + "epoch": 0.10397206938782971, + "grad_norm": 7.176446966677361, + "learning_rate": 9.857262486592789e-06, + "loss": 17.9131, + "step": 5688 + }, + { + "epoch": 0.10399034858427623, + "grad_norm": 7.231311940389295, + "learning_rate": 9.857192254032881e-06, + "loss": 17.7265, + "step": 5689 + }, + { + "epoch": 0.10400862778072276, + "grad_norm": 5.430374545880149, + "learning_rate": 9.857122004448937e-06, + "loss": 16.9641, + "step": 5690 + }, + { + "epoch": 0.10402690697716928, + "grad_norm": 6.5152725917889835, + "learning_rate": 9.8570517378412e-06, + "loss": 17.713, + "step": 5691 + }, + { + "epoch": 0.10404518617361581, + "grad_norm": 6.808305671955995, + "learning_rate": 9.856981454209919e-06, + "loss": 17.5756, + "step": 5692 + }, + { + "epoch": 0.10406346537006234, + "grad_norm": 7.811642796693837, + "learning_rate": 9.856911153555339e-06, + "loss": 18.1096, + "step": 5693 + }, + { + "epoch": 0.10408174456650886, + "grad_norm": 7.481230820798903, + "learning_rate": 9.856840835877708e-06, + "loss": 17.877, + "step": 5694 + }, + { + "epoch": 0.10410002376295538, + "grad_norm": 8.124339711772812, + "learning_rate": 9.85677050117727e-06, + "loss": 18.1425, + "step": 5695 + }, + { + "epoch": 0.1041183029594019, + "grad_norm": 7.089440559584176, + "learning_rate": 9.856700149454274e-06, + "loss": 18.2303, + "step": 5696 + }, + { + "epoch": 0.10413658215584842, + "grad_norm": 6.9627328745022545, + "learning_rate": 9.856629780708966e-06, + "loss": 17.7311, + "step": 5697 + }, + { + "epoch": 0.10415486135229496, + "grad_norm": 7.262370127311515, + "learning_rate": 9.856559394941591e-06, + "loss": 17.6369, + "step": 5698 + }, + { + "epoch": 0.10417314054874148, + "grad_norm": 6.9301616574918805, + "learning_rate": 9.856488992152398e-06, + "loss": 17.872, + "step": 5699 + }, + { + "epoch": 0.104191419745188, + "grad_norm": 8.421575270284432, + "learning_rate": 9.85641857234163e-06, + "loss": 18.5551, + "step": 5700 + }, + { + "epoch": 0.10420969894163452, + "grad_norm": 8.354074911118072, + "learning_rate": 9.85634813550954e-06, + "loss": 18.44, + "step": 5701 + }, + { + "epoch": 0.10422797813808105, + "grad_norm": 7.699607647268898, + "learning_rate": 9.856277681656367e-06, + "loss": 18.0969, + "step": 5702 + }, + { + "epoch": 0.10424625733452757, + "grad_norm": 6.76819580678942, + "learning_rate": 9.856207210782365e-06, + "loss": 17.6372, + "step": 5703 + }, + { + "epoch": 0.1042645365309741, + "grad_norm": 8.428388101099076, + "learning_rate": 9.856136722887777e-06, + "loss": 18.3778, + "step": 5704 + }, + { + "epoch": 0.10428281572742062, + "grad_norm": 6.669753854352136, + "learning_rate": 9.85606621797285e-06, + "loss": 17.6416, + "step": 5705 + }, + { + "epoch": 0.10430109492386715, + "grad_norm": 6.133803018299268, + "learning_rate": 9.855995696037835e-06, + "loss": 17.3228, + "step": 5706 + }, + { + "epoch": 0.10431937412031367, + "grad_norm": 7.26969586228127, + "learning_rate": 9.855925157082975e-06, + "loss": 17.8272, + "step": 5707 + }, + { + "epoch": 0.10433765331676019, + "grad_norm": 6.995612745175263, + "learning_rate": 9.85585460110852e-06, + "loss": 17.711, + "step": 5708 + }, + { + "epoch": 0.10435593251320673, + "grad_norm": 6.963593528382055, + "learning_rate": 9.855784028114715e-06, + "loss": 17.802, + "step": 5709 + }, + { + "epoch": 0.10437421170965325, + "grad_norm": 7.495000273619035, + "learning_rate": 9.855713438101807e-06, + "loss": 17.9456, + "step": 5710 + }, + { + "epoch": 0.10439249090609977, + "grad_norm": 6.8303246224641585, + "learning_rate": 9.855642831070046e-06, + "loss": 17.7261, + "step": 5711 + }, + { + "epoch": 0.10441077010254629, + "grad_norm": 8.105013767699267, + "learning_rate": 9.85557220701968e-06, + "loss": 18.0448, + "step": 5712 + }, + { + "epoch": 0.10442904929899281, + "grad_norm": 7.403994990361161, + "learning_rate": 9.85550156595095e-06, + "loss": 17.8851, + "step": 5713 + }, + { + "epoch": 0.10444732849543933, + "grad_norm": 7.770646153599739, + "learning_rate": 9.855430907864113e-06, + "loss": 18.2886, + "step": 5714 + }, + { + "epoch": 0.10446560769188587, + "grad_norm": 7.474763922450885, + "learning_rate": 9.85536023275941e-06, + "loss": 17.843, + "step": 5715 + }, + { + "epoch": 0.10448388688833239, + "grad_norm": 7.064507913735838, + "learning_rate": 9.855289540637092e-06, + "loss": 17.8636, + "step": 5716 + }, + { + "epoch": 0.10450216608477891, + "grad_norm": 6.756623432684942, + "learning_rate": 9.855218831497403e-06, + "loss": 17.421, + "step": 5717 + }, + { + "epoch": 0.10452044528122544, + "grad_norm": 7.261625310230588, + "learning_rate": 9.855148105340595e-06, + "loss": 17.5943, + "step": 5718 + }, + { + "epoch": 0.10453872447767196, + "grad_norm": 7.1522997287521, + "learning_rate": 9.855077362166914e-06, + "loss": 17.6506, + "step": 5719 + }, + { + "epoch": 0.10455700367411848, + "grad_norm": 7.21845105333718, + "learning_rate": 9.855006601976608e-06, + "loss": 17.5548, + "step": 5720 + }, + { + "epoch": 0.10457528287056501, + "grad_norm": 5.992877911997206, + "learning_rate": 9.854935824769926e-06, + "loss": 17.5561, + "step": 5721 + }, + { + "epoch": 0.10459356206701154, + "grad_norm": 8.51209455874131, + "learning_rate": 9.854865030547115e-06, + "loss": 18.3696, + "step": 5722 + }, + { + "epoch": 0.10461184126345806, + "grad_norm": 5.996228208506489, + "learning_rate": 9.854794219308424e-06, + "loss": 17.2701, + "step": 5723 + }, + { + "epoch": 0.10463012045990458, + "grad_norm": 6.118078868879499, + "learning_rate": 9.854723391054097e-06, + "loss": 17.5607, + "step": 5724 + }, + { + "epoch": 0.1046483996563511, + "grad_norm": 7.117069409509465, + "learning_rate": 9.854652545784388e-06, + "loss": 17.538, + "step": 5725 + }, + { + "epoch": 0.10466667885279764, + "grad_norm": 6.266101120638053, + "learning_rate": 9.854581683499544e-06, + "loss": 17.4521, + "step": 5726 + }, + { + "epoch": 0.10468495804924416, + "grad_norm": 6.146426237738708, + "learning_rate": 9.854510804199813e-06, + "loss": 17.2019, + "step": 5727 + }, + { + "epoch": 0.10470323724569068, + "grad_norm": 6.1851417483044235, + "learning_rate": 9.854439907885441e-06, + "loss": 17.437, + "step": 5728 + }, + { + "epoch": 0.1047215164421372, + "grad_norm": 6.894948126716912, + "learning_rate": 9.85436899455668e-06, + "loss": 17.8609, + "step": 5729 + }, + { + "epoch": 0.10473979563858372, + "grad_norm": 6.171758143473966, + "learning_rate": 9.854298064213775e-06, + "loss": 17.4036, + "step": 5730 + }, + { + "epoch": 0.10475807483503025, + "grad_norm": 8.770487531018796, + "learning_rate": 9.854227116856977e-06, + "loss": 18.0638, + "step": 5731 + }, + { + "epoch": 0.10477635403147678, + "grad_norm": 6.636523335196349, + "learning_rate": 9.854156152486536e-06, + "loss": 17.4718, + "step": 5732 + }, + { + "epoch": 0.1047946332279233, + "grad_norm": 7.392082438690228, + "learning_rate": 9.854085171102697e-06, + "loss": 17.9873, + "step": 5733 + }, + { + "epoch": 0.10481291242436983, + "grad_norm": 7.909454206722237, + "learning_rate": 9.85401417270571e-06, + "loss": 18.2473, + "step": 5734 + }, + { + "epoch": 0.10483119162081635, + "grad_norm": 7.006888620312817, + "learning_rate": 9.853943157295826e-06, + "loss": 17.7185, + "step": 5735 + }, + { + "epoch": 0.10484947081726287, + "grad_norm": 7.782189659230426, + "learning_rate": 9.853872124873294e-06, + "loss": 18.2295, + "step": 5736 + }, + { + "epoch": 0.10486775001370939, + "grad_norm": 5.875710856288465, + "learning_rate": 9.85380107543836e-06, + "loss": 17.2333, + "step": 5737 + }, + { + "epoch": 0.10488602921015593, + "grad_norm": 7.165194222622087, + "learning_rate": 9.853730008991274e-06, + "loss": 18.0052, + "step": 5738 + }, + { + "epoch": 0.10490430840660245, + "grad_norm": 7.001827406262003, + "learning_rate": 9.853658925532285e-06, + "loss": 17.6866, + "step": 5739 + }, + { + "epoch": 0.10492258760304897, + "grad_norm": 6.194436513322145, + "learning_rate": 9.853587825061644e-06, + "loss": 17.4752, + "step": 5740 + }, + { + "epoch": 0.10494086679949549, + "grad_norm": 7.2999102677724155, + "learning_rate": 9.853516707579598e-06, + "loss": 17.6792, + "step": 5741 + }, + { + "epoch": 0.10495914599594201, + "grad_norm": 11.076180042171714, + "learning_rate": 9.853445573086396e-06, + "loss": 18.2204, + "step": 5742 + }, + { + "epoch": 0.10497742519238855, + "grad_norm": 6.769448177150094, + "learning_rate": 9.853374421582291e-06, + "loss": 17.4852, + "step": 5743 + }, + { + "epoch": 0.10499570438883507, + "grad_norm": 7.339161168489227, + "learning_rate": 9.853303253067528e-06, + "loss": 17.9064, + "step": 5744 + }, + { + "epoch": 0.10501398358528159, + "grad_norm": 8.842002938007269, + "learning_rate": 9.853232067542358e-06, + "loss": 18.4983, + "step": 5745 + }, + { + "epoch": 0.10503226278172811, + "grad_norm": 7.149292050165079, + "learning_rate": 9.853160865007032e-06, + "loss": 17.9278, + "step": 5746 + }, + { + "epoch": 0.10505054197817464, + "grad_norm": 7.63771705425234, + "learning_rate": 9.853089645461798e-06, + "loss": 18.2209, + "step": 5747 + }, + { + "epoch": 0.10506882117462116, + "grad_norm": 7.203846304017191, + "learning_rate": 9.853018408906905e-06, + "loss": 17.4716, + "step": 5748 + }, + { + "epoch": 0.1050871003710677, + "grad_norm": 7.158352887577222, + "learning_rate": 9.852947155342606e-06, + "loss": 17.6188, + "step": 5749 + }, + { + "epoch": 0.10510537956751421, + "grad_norm": 8.11297657185183, + "learning_rate": 9.852875884769146e-06, + "loss": 18.0543, + "step": 5750 + }, + { + "epoch": 0.10512365876396074, + "grad_norm": 7.727460131316621, + "learning_rate": 9.852804597186777e-06, + "loss": 17.9759, + "step": 5751 + }, + { + "epoch": 0.10514193796040726, + "grad_norm": 7.057238262452905, + "learning_rate": 9.85273329259575e-06, + "loss": 17.6428, + "step": 5752 + }, + { + "epoch": 0.10516021715685378, + "grad_norm": 6.714331539931519, + "learning_rate": 9.852661970996314e-06, + "loss": 17.4928, + "step": 5753 + }, + { + "epoch": 0.1051784963533003, + "grad_norm": 7.166353058410686, + "learning_rate": 9.852590632388719e-06, + "loss": 17.9264, + "step": 5754 + }, + { + "epoch": 0.10519677554974684, + "grad_norm": 7.137643891664648, + "learning_rate": 9.852519276773214e-06, + "loss": 17.9394, + "step": 5755 + }, + { + "epoch": 0.10521505474619336, + "grad_norm": 7.616904490886513, + "learning_rate": 9.85244790415005e-06, + "loss": 18.1653, + "step": 5756 + }, + { + "epoch": 0.10523333394263988, + "grad_norm": 7.455770390212299, + "learning_rate": 9.852376514519478e-06, + "loss": 18.1438, + "step": 5757 + }, + { + "epoch": 0.1052516131390864, + "grad_norm": 9.109545469929985, + "learning_rate": 9.852305107881747e-06, + "loss": 18.4404, + "step": 5758 + }, + { + "epoch": 0.10526989233553292, + "grad_norm": 7.181036814075525, + "learning_rate": 9.852233684237107e-06, + "loss": 17.8544, + "step": 5759 + }, + { + "epoch": 0.10528817153197946, + "grad_norm": 6.999738257101895, + "learning_rate": 9.85216224358581e-06, + "loss": 17.8273, + "step": 5760 + }, + { + "epoch": 0.10530645072842598, + "grad_norm": 8.094214736012356, + "learning_rate": 9.852090785928105e-06, + "loss": 18.148, + "step": 5761 + }, + { + "epoch": 0.1053247299248725, + "grad_norm": 7.561774993680934, + "learning_rate": 9.852019311264242e-06, + "loss": 18.1062, + "step": 5762 + }, + { + "epoch": 0.10534300912131903, + "grad_norm": 6.070270020687608, + "learning_rate": 9.851947819594474e-06, + "loss": 17.2214, + "step": 5763 + }, + { + "epoch": 0.10536128831776555, + "grad_norm": 6.835985532335688, + "learning_rate": 9.85187631091905e-06, + "loss": 17.7722, + "step": 5764 + }, + { + "epoch": 0.10537956751421207, + "grad_norm": 7.871504429435472, + "learning_rate": 9.85180478523822e-06, + "loss": 18.1517, + "step": 5765 + }, + { + "epoch": 0.1053978467106586, + "grad_norm": 7.179465741023101, + "learning_rate": 9.851733242552234e-06, + "loss": 17.8843, + "step": 5766 + }, + { + "epoch": 0.10541612590710513, + "grad_norm": 8.111602194577884, + "learning_rate": 9.851661682861346e-06, + "loss": 18.4305, + "step": 5767 + }, + { + "epoch": 0.10543440510355165, + "grad_norm": 7.511389701700761, + "learning_rate": 9.851590106165802e-06, + "loss": 18.0181, + "step": 5768 + }, + { + "epoch": 0.10545268429999817, + "grad_norm": 7.887199715998375, + "learning_rate": 9.851518512465858e-06, + "loss": 17.847, + "step": 5769 + }, + { + "epoch": 0.10547096349644469, + "grad_norm": 7.949776827589897, + "learning_rate": 9.851446901761763e-06, + "loss": 18.3067, + "step": 5770 + }, + { + "epoch": 0.10548924269289121, + "grad_norm": 7.935419213525369, + "learning_rate": 9.851375274053766e-06, + "loss": 18.0919, + "step": 5771 + }, + { + "epoch": 0.10550752188933775, + "grad_norm": 8.700440051234581, + "learning_rate": 9.851303629342122e-06, + "loss": 18.42, + "step": 5772 + }, + { + "epoch": 0.10552580108578427, + "grad_norm": 7.515220487706339, + "learning_rate": 9.851231967627077e-06, + "loss": 17.6783, + "step": 5773 + }, + { + "epoch": 0.10554408028223079, + "grad_norm": 7.473438351789878, + "learning_rate": 9.851160288908885e-06, + "loss": 17.8782, + "step": 5774 + }, + { + "epoch": 0.10556235947867731, + "grad_norm": 7.902432249555323, + "learning_rate": 9.851088593187798e-06, + "loss": 17.3811, + "step": 5775 + }, + { + "epoch": 0.10558063867512384, + "grad_norm": 7.851866298826325, + "learning_rate": 9.851016880464069e-06, + "loss": 18.4099, + "step": 5776 + }, + { + "epoch": 0.10559891787157037, + "grad_norm": 7.944718451043134, + "learning_rate": 9.850945150737943e-06, + "loss": 18.0104, + "step": 5777 + }, + { + "epoch": 0.1056171970680169, + "grad_norm": 6.751090297928792, + "learning_rate": 9.850873404009678e-06, + "loss": 17.6698, + "step": 5778 + }, + { + "epoch": 0.10563547626446342, + "grad_norm": 6.202448126840248, + "learning_rate": 9.85080164027952e-06, + "loss": 17.3501, + "step": 5779 + }, + { + "epoch": 0.10565375546090994, + "grad_norm": 9.054764255352312, + "learning_rate": 9.850729859547725e-06, + "loss": 18.7583, + "step": 5780 + }, + { + "epoch": 0.10567203465735646, + "grad_norm": 8.027026165249456, + "learning_rate": 9.850658061814542e-06, + "loss": 18.2361, + "step": 5781 + }, + { + "epoch": 0.10569031385380298, + "grad_norm": 6.088920807800374, + "learning_rate": 9.850586247080222e-06, + "loss": 17.3435, + "step": 5782 + }, + { + "epoch": 0.10570859305024952, + "grad_norm": 8.14422214983587, + "learning_rate": 9.850514415345021e-06, + "loss": 17.9607, + "step": 5783 + }, + { + "epoch": 0.10572687224669604, + "grad_norm": 13.53775301378071, + "learning_rate": 9.850442566609186e-06, + "loss": 18.0239, + "step": 5784 + }, + { + "epoch": 0.10574515144314256, + "grad_norm": 7.6884599728396985, + "learning_rate": 9.850370700872971e-06, + "loss": 18.2373, + "step": 5785 + }, + { + "epoch": 0.10576343063958908, + "grad_norm": 6.910462429635305, + "learning_rate": 9.85029881813663e-06, + "loss": 17.3886, + "step": 5786 + }, + { + "epoch": 0.1057817098360356, + "grad_norm": 6.389760222001589, + "learning_rate": 9.85022691840041e-06, + "loss": 17.8356, + "step": 5787 + }, + { + "epoch": 0.10579998903248213, + "grad_norm": 7.11371599600536, + "learning_rate": 9.850155001664565e-06, + "loss": 18.0679, + "step": 5788 + }, + { + "epoch": 0.10581826822892866, + "grad_norm": 8.723889814178113, + "learning_rate": 9.85008306792935e-06, + "loss": 17.9585, + "step": 5789 + }, + { + "epoch": 0.10583654742537518, + "grad_norm": 8.407616307477696, + "learning_rate": 9.850011117195013e-06, + "loss": 18.2149, + "step": 5790 + }, + { + "epoch": 0.1058548266218217, + "grad_norm": 7.7439217350300025, + "learning_rate": 9.849939149461807e-06, + "loss": 18.1134, + "step": 5791 + }, + { + "epoch": 0.10587310581826823, + "grad_norm": 7.181175190486328, + "learning_rate": 9.849867164729987e-06, + "loss": 17.6784, + "step": 5792 + }, + { + "epoch": 0.10589138501471475, + "grad_norm": 7.445632998724882, + "learning_rate": 9.849795162999803e-06, + "loss": 17.9333, + "step": 5793 + }, + { + "epoch": 0.10590966421116128, + "grad_norm": 18.655089934948883, + "learning_rate": 9.849723144271508e-06, + "loss": 18.4331, + "step": 5794 + }, + { + "epoch": 0.1059279434076078, + "grad_norm": 8.358368378192516, + "learning_rate": 9.849651108545352e-06, + "loss": 18.5816, + "step": 5795 + }, + { + "epoch": 0.10594622260405433, + "grad_norm": 7.184188658641208, + "learning_rate": 9.849579055821593e-06, + "loss": 17.4722, + "step": 5796 + }, + { + "epoch": 0.10596450180050085, + "grad_norm": 6.01263250028207, + "learning_rate": 9.84950698610048e-06, + "loss": 17.2748, + "step": 5797 + }, + { + "epoch": 0.10598278099694737, + "grad_norm": 6.957652918901836, + "learning_rate": 9.849434899382262e-06, + "loss": 17.7175, + "step": 5798 + }, + { + "epoch": 0.10600106019339389, + "grad_norm": 7.834313757092882, + "learning_rate": 9.8493627956672e-06, + "loss": 17.8026, + "step": 5799 + }, + { + "epoch": 0.10601933938984043, + "grad_norm": 6.410711562250967, + "learning_rate": 9.84929067495554e-06, + "loss": 17.3805, + "step": 5800 + }, + { + "epoch": 0.10603761858628695, + "grad_norm": 7.576663577969768, + "learning_rate": 9.849218537247536e-06, + "loss": 17.9673, + "step": 5801 + }, + { + "epoch": 0.10605589778273347, + "grad_norm": 7.310560435954196, + "learning_rate": 9.849146382543445e-06, + "loss": 18.0013, + "step": 5802 + }, + { + "epoch": 0.10607417697918, + "grad_norm": 9.029088680320438, + "learning_rate": 9.849074210843513e-06, + "loss": 18.54, + "step": 5803 + }, + { + "epoch": 0.10609245617562651, + "grad_norm": 7.417258750524977, + "learning_rate": 9.849002022148e-06, + "loss": 18.0685, + "step": 5804 + }, + { + "epoch": 0.10611073537207304, + "grad_norm": 8.217334091701655, + "learning_rate": 9.848929816457153e-06, + "loss": 18.4651, + "step": 5805 + }, + { + "epoch": 0.10612901456851957, + "grad_norm": 8.143445762336139, + "learning_rate": 9.848857593771228e-06, + "loss": 18.1567, + "step": 5806 + }, + { + "epoch": 0.1061472937649661, + "grad_norm": 7.917228134414341, + "learning_rate": 9.84878535409048e-06, + "loss": 18.3476, + "step": 5807 + }, + { + "epoch": 0.10616557296141262, + "grad_norm": 7.883135615970315, + "learning_rate": 9.848713097415159e-06, + "loss": 18.3613, + "step": 5808 + }, + { + "epoch": 0.10618385215785914, + "grad_norm": 6.170311486863312, + "learning_rate": 9.848640823745518e-06, + "loss": 17.4302, + "step": 5809 + }, + { + "epoch": 0.10620213135430566, + "grad_norm": 7.893547079062363, + "learning_rate": 9.848568533081815e-06, + "loss": 18.4974, + "step": 5810 + }, + { + "epoch": 0.1062204105507522, + "grad_norm": 5.988982428265839, + "learning_rate": 9.848496225424298e-06, + "loss": 17.2988, + "step": 5811 + }, + { + "epoch": 0.10623868974719872, + "grad_norm": 7.861364822837715, + "learning_rate": 9.84842390077322e-06, + "loss": 18.2354, + "step": 5812 + }, + { + "epoch": 0.10625696894364524, + "grad_norm": 6.942108917709645, + "learning_rate": 9.848351559128842e-06, + "loss": 17.7698, + "step": 5813 + }, + { + "epoch": 0.10627524814009176, + "grad_norm": 6.922394060836148, + "learning_rate": 9.848279200491409e-06, + "loss": 17.5157, + "step": 5814 + }, + { + "epoch": 0.10629352733653828, + "grad_norm": 6.213133657148186, + "learning_rate": 9.848206824861179e-06, + "loss": 17.3671, + "step": 5815 + }, + { + "epoch": 0.1063118065329848, + "grad_norm": 7.1841358065442265, + "learning_rate": 9.848134432238404e-06, + "loss": 17.5998, + "step": 5816 + }, + { + "epoch": 0.10633008572943134, + "grad_norm": 6.10260872789878, + "learning_rate": 9.84806202262334e-06, + "loss": 17.3957, + "step": 5817 + }, + { + "epoch": 0.10634836492587786, + "grad_norm": 8.596100792519103, + "learning_rate": 9.847989596016239e-06, + "loss": 18.6582, + "step": 5818 + }, + { + "epoch": 0.10636664412232438, + "grad_norm": 7.174812522740201, + "learning_rate": 9.847917152417354e-06, + "loss": 18.0218, + "step": 5819 + }, + { + "epoch": 0.1063849233187709, + "grad_norm": 7.463897320160827, + "learning_rate": 9.847844691826941e-06, + "loss": 17.8591, + "step": 5820 + }, + { + "epoch": 0.10640320251521743, + "grad_norm": 6.616699430849265, + "learning_rate": 9.847772214245252e-06, + "loss": 17.6413, + "step": 5821 + }, + { + "epoch": 0.10642148171166395, + "grad_norm": 7.484435459009545, + "learning_rate": 9.847699719672543e-06, + "loss": 17.8048, + "step": 5822 + }, + { + "epoch": 0.10643976090811048, + "grad_norm": 7.785509005815104, + "learning_rate": 9.847627208109067e-06, + "loss": 18.2513, + "step": 5823 + }, + { + "epoch": 0.106458040104557, + "grad_norm": 7.029673768842909, + "learning_rate": 9.847554679555078e-06, + "loss": 17.6034, + "step": 5824 + }, + { + "epoch": 0.10647631930100353, + "grad_norm": 7.060688571159415, + "learning_rate": 9.847482134010833e-06, + "loss": 17.9917, + "step": 5825 + }, + { + "epoch": 0.10649459849745005, + "grad_norm": 7.408573515883231, + "learning_rate": 9.84740957147658e-06, + "loss": 18.0168, + "step": 5826 + }, + { + "epoch": 0.10651287769389657, + "grad_norm": 6.007931771023013, + "learning_rate": 9.84733699195258e-06, + "loss": 17.0875, + "step": 5827 + }, + { + "epoch": 0.1065311568903431, + "grad_norm": 6.920791435860862, + "learning_rate": 9.847264395439083e-06, + "loss": 17.8657, + "step": 5828 + }, + { + "epoch": 0.10654943608678963, + "grad_norm": 7.043542400746885, + "learning_rate": 9.847191781936344e-06, + "loss": 17.6116, + "step": 5829 + }, + { + "epoch": 0.10656771528323615, + "grad_norm": 8.035065072369735, + "learning_rate": 9.84711915144462e-06, + "loss": 17.736, + "step": 5830 + }, + { + "epoch": 0.10658599447968267, + "grad_norm": 6.635212426764212, + "learning_rate": 9.847046503964165e-06, + "loss": 17.4266, + "step": 5831 + }, + { + "epoch": 0.1066042736761292, + "grad_norm": 7.135348780517533, + "learning_rate": 9.84697383949523e-06, + "loss": 17.7999, + "step": 5832 + }, + { + "epoch": 0.10662255287257572, + "grad_norm": 7.415046581608795, + "learning_rate": 9.846901158038074e-06, + "loss": 17.75, + "step": 5833 + }, + { + "epoch": 0.10664083206902225, + "grad_norm": 7.4214861048291, + "learning_rate": 9.846828459592949e-06, + "loss": 17.939, + "step": 5834 + }, + { + "epoch": 0.10665911126546877, + "grad_norm": 6.618460113089114, + "learning_rate": 9.84675574416011e-06, + "loss": 17.8388, + "step": 5835 + }, + { + "epoch": 0.1066773904619153, + "grad_norm": 7.193582755325592, + "learning_rate": 9.846683011739814e-06, + "loss": 17.7923, + "step": 5836 + }, + { + "epoch": 0.10669566965836182, + "grad_norm": 6.276348611044246, + "learning_rate": 9.846610262332316e-06, + "loss": 17.6146, + "step": 5837 + }, + { + "epoch": 0.10671394885480834, + "grad_norm": 6.945116714956209, + "learning_rate": 9.846537495937868e-06, + "loss": 17.5195, + "step": 5838 + }, + { + "epoch": 0.10673222805125486, + "grad_norm": 7.042928447394009, + "learning_rate": 9.846464712556727e-06, + "loss": 17.6989, + "step": 5839 + }, + { + "epoch": 0.1067505072477014, + "grad_norm": 8.330215381609719, + "learning_rate": 9.846391912189147e-06, + "loss": 18.2885, + "step": 5840 + }, + { + "epoch": 0.10676878644414792, + "grad_norm": 8.290067979149216, + "learning_rate": 9.846319094835385e-06, + "loss": 17.9417, + "step": 5841 + }, + { + "epoch": 0.10678706564059444, + "grad_norm": 7.494712764272132, + "learning_rate": 9.846246260495694e-06, + "loss": 17.9946, + "step": 5842 + }, + { + "epoch": 0.10680534483704096, + "grad_norm": 7.461643328013089, + "learning_rate": 9.846173409170333e-06, + "loss": 17.9484, + "step": 5843 + }, + { + "epoch": 0.10682362403348748, + "grad_norm": 6.459782126537119, + "learning_rate": 9.846100540859552e-06, + "loss": 17.4873, + "step": 5844 + }, + { + "epoch": 0.10684190322993402, + "grad_norm": 7.096034223286772, + "learning_rate": 9.846027655563608e-06, + "loss": 17.851, + "step": 5845 + }, + { + "epoch": 0.10686018242638054, + "grad_norm": 6.840368061161655, + "learning_rate": 9.84595475328276e-06, + "loss": 17.6252, + "step": 5846 + }, + { + "epoch": 0.10687846162282706, + "grad_norm": 8.080219112226988, + "learning_rate": 9.84588183401726e-06, + "loss": 18.2538, + "step": 5847 + }, + { + "epoch": 0.10689674081927358, + "grad_norm": 7.212462871658821, + "learning_rate": 9.845808897767366e-06, + "loss": 17.5176, + "step": 5848 + }, + { + "epoch": 0.1069150200157201, + "grad_norm": 6.984673547302289, + "learning_rate": 9.845735944533333e-06, + "loss": 17.6274, + "step": 5849 + }, + { + "epoch": 0.10693329921216663, + "grad_norm": 7.583678677218762, + "learning_rate": 9.845662974315413e-06, + "loss": 17.9241, + "step": 5850 + }, + { + "epoch": 0.10695157840861316, + "grad_norm": 6.838934157794366, + "learning_rate": 9.845589987113866e-06, + "loss": 17.6212, + "step": 5851 + }, + { + "epoch": 0.10696985760505968, + "grad_norm": 8.029517620405162, + "learning_rate": 9.845516982928948e-06, + "loss": 17.9396, + "step": 5852 + }, + { + "epoch": 0.1069881368015062, + "grad_norm": 7.383262938437746, + "learning_rate": 9.845443961760912e-06, + "loss": 17.9403, + "step": 5853 + }, + { + "epoch": 0.10700641599795273, + "grad_norm": 7.23154303085679, + "learning_rate": 9.845370923610016e-06, + "loss": 18.1187, + "step": 5854 + }, + { + "epoch": 0.10702469519439925, + "grad_norm": 9.145868149614028, + "learning_rate": 9.845297868476515e-06, + "loss": 18.387, + "step": 5855 + }, + { + "epoch": 0.10704297439084577, + "grad_norm": 8.650895626845806, + "learning_rate": 9.845224796360666e-06, + "loss": 18.1168, + "step": 5856 + }, + { + "epoch": 0.10706125358729231, + "grad_norm": 6.243190581430269, + "learning_rate": 9.845151707262724e-06, + "loss": 17.367, + "step": 5857 + }, + { + "epoch": 0.10707953278373883, + "grad_norm": 8.252484291615621, + "learning_rate": 9.845078601182945e-06, + "loss": 18.4255, + "step": 5858 + }, + { + "epoch": 0.10709781198018535, + "grad_norm": 6.875617965557466, + "learning_rate": 9.845005478121588e-06, + "loss": 17.7237, + "step": 5859 + }, + { + "epoch": 0.10711609117663187, + "grad_norm": 11.891019983136049, + "learning_rate": 9.844932338078905e-06, + "loss": 18.2797, + "step": 5860 + }, + { + "epoch": 0.1071343703730784, + "grad_norm": 8.115981877473052, + "learning_rate": 9.844859181055155e-06, + "loss": 18.3821, + "step": 5861 + }, + { + "epoch": 0.10715264956952493, + "grad_norm": 8.74529679768113, + "learning_rate": 9.844786007050595e-06, + "loss": 18.3951, + "step": 5862 + }, + { + "epoch": 0.10717092876597145, + "grad_norm": 7.823005414652111, + "learning_rate": 9.84471281606548e-06, + "loss": 17.94, + "step": 5863 + }, + { + "epoch": 0.10718920796241797, + "grad_norm": 8.660947518752678, + "learning_rate": 9.844639608100066e-06, + "loss": 18.5589, + "step": 5864 + }, + { + "epoch": 0.1072074871588645, + "grad_norm": 7.86980803121859, + "learning_rate": 9.844566383154613e-06, + "loss": 18.2445, + "step": 5865 + }, + { + "epoch": 0.10722576635531102, + "grad_norm": 7.739345685288175, + "learning_rate": 9.844493141229374e-06, + "loss": 17.8264, + "step": 5866 + }, + { + "epoch": 0.10724404555175754, + "grad_norm": 7.16751922221443, + "learning_rate": 9.844419882324606e-06, + "loss": 17.4311, + "step": 5867 + }, + { + "epoch": 0.10726232474820407, + "grad_norm": 6.227577904831335, + "learning_rate": 9.844346606440566e-06, + "loss": 17.5006, + "step": 5868 + }, + { + "epoch": 0.1072806039446506, + "grad_norm": 8.450923211490991, + "learning_rate": 9.844273313577516e-06, + "loss": 18.5062, + "step": 5869 + }, + { + "epoch": 0.10729888314109712, + "grad_norm": 6.767150093959317, + "learning_rate": 9.844200003735703e-06, + "loss": 17.5192, + "step": 5870 + }, + { + "epoch": 0.10731716233754364, + "grad_norm": 7.859555479293641, + "learning_rate": 9.844126676915393e-06, + "loss": 17.8175, + "step": 5871 + }, + { + "epoch": 0.10733544153399016, + "grad_norm": 7.985006211089855, + "learning_rate": 9.844053333116836e-06, + "loss": 18.0713, + "step": 5872 + }, + { + "epoch": 0.1073537207304367, + "grad_norm": 8.03526743666188, + "learning_rate": 9.843979972340295e-06, + "loss": 17.9783, + "step": 5873 + }, + { + "epoch": 0.10737199992688322, + "grad_norm": 7.193838453325093, + "learning_rate": 9.843906594586025e-06, + "loss": 17.7052, + "step": 5874 + }, + { + "epoch": 0.10739027912332974, + "grad_norm": 7.0876536261099305, + "learning_rate": 9.84383319985428e-06, + "loss": 17.8391, + "step": 5875 + }, + { + "epoch": 0.10740855831977626, + "grad_norm": 8.60170433420796, + "learning_rate": 9.843759788145323e-06, + "loss": 18.1661, + "step": 5876 + }, + { + "epoch": 0.10742683751622278, + "grad_norm": 8.318778188731175, + "learning_rate": 9.843686359459406e-06, + "loss": 18.1817, + "step": 5877 + }, + { + "epoch": 0.1074451167126693, + "grad_norm": 6.712672907800277, + "learning_rate": 9.84361291379679e-06, + "loss": 17.5527, + "step": 5878 + }, + { + "epoch": 0.10746339590911584, + "grad_norm": 8.150534231108727, + "learning_rate": 9.84353945115773e-06, + "loss": 18.1581, + "step": 5879 + }, + { + "epoch": 0.10748167510556236, + "grad_norm": 6.359388740830054, + "learning_rate": 9.843465971542485e-06, + "loss": 17.3468, + "step": 5880 + }, + { + "epoch": 0.10749995430200888, + "grad_norm": 6.293291538227234, + "learning_rate": 9.843392474951312e-06, + "loss": 17.4126, + "step": 5881 + }, + { + "epoch": 0.1075182334984554, + "grad_norm": 7.168148977752168, + "learning_rate": 9.843318961384469e-06, + "loss": 17.7767, + "step": 5882 + }, + { + "epoch": 0.10753651269490193, + "grad_norm": 7.088881635224715, + "learning_rate": 9.843245430842215e-06, + "loss": 17.7949, + "step": 5883 + }, + { + "epoch": 0.10755479189134845, + "grad_norm": 6.50650777081013, + "learning_rate": 9.843171883324802e-06, + "loss": 17.6451, + "step": 5884 + }, + { + "epoch": 0.10757307108779499, + "grad_norm": 6.569283480801569, + "learning_rate": 9.843098318832495e-06, + "loss": 17.816, + "step": 5885 + }, + { + "epoch": 0.10759135028424151, + "grad_norm": 7.836204033462986, + "learning_rate": 9.843024737365548e-06, + "loss": 17.3799, + "step": 5886 + }, + { + "epoch": 0.10760962948068803, + "grad_norm": 8.043309457711423, + "learning_rate": 9.84295113892422e-06, + "loss": 18.2676, + "step": 5887 + }, + { + "epoch": 0.10762790867713455, + "grad_norm": 7.7764713348071695, + "learning_rate": 9.842877523508766e-06, + "loss": 18.2114, + "step": 5888 + }, + { + "epoch": 0.10764618787358107, + "grad_norm": 8.185116511570026, + "learning_rate": 9.842803891119448e-06, + "loss": 17.9973, + "step": 5889 + }, + { + "epoch": 0.10766446707002761, + "grad_norm": 7.8794932825997295, + "learning_rate": 9.842730241756524e-06, + "loss": 18.078, + "step": 5890 + }, + { + "epoch": 0.10768274626647413, + "grad_norm": 6.539034919480205, + "learning_rate": 9.842656575420248e-06, + "loss": 17.4952, + "step": 5891 + }, + { + "epoch": 0.10770102546292065, + "grad_norm": 6.66230941878411, + "learning_rate": 9.842582892110884e-06, + "loss": 17.3589, + "step": 5892 + }, + { + "epoch": 0.10771930465936717, + "grad_norm": 7.067289793977617, + "learning_rate": 9.842509191828686e-06, + "loss": 17.7478, + "step": 5893 + }, + { + "epoch": 0.1077375838558137, + "grad_norm": 7.951195424808459, + "learning_rate": 9.842435474573912e-06, + "loss": 17.9751, + "step": 5894 + }, + { + "epoch": 0.10775586305226022, + "grad_norm": 6.353737087620511, + "learning_rate": 9.842361740346824e-06, + "loss": 17.5808, + "step": 5895 + }, + { + "epoch": 0.10777414224870675, + "grad_norm": 6.010220626394896, + "learning_rate": 9.842287989147676e-06, + "loss": 17.3642, + "step": 5896 + }, + { + "epoch": 0.10779242144515327, + "grad_norm": 7.558035373074513, + "learning_rate": 9.84221422097673e-06, + "loss": 17.7662, + "step": 5897 + }, + { + "epoch": 0.1078107006415998, + "grad_norm": 7.404483922619043, + "learning_rate": 9.842140435834245e-06, + "loss": 17.7805, + "step": 5898 + }, + { + "epoch": 0.10782897983804632, + "grad_norm": 7.124061337922959, + "learning_rate": 9.842066633720477e-06, + "loss": 17.914, + "step": 5899 + }, + { + "epoch": 0.10784725903449284, + "grad_norm": 6.873473487171974, + "learning_rate": 9.841992814635683e-06, + "loss": 17.6703, + "step": 5900 + }, + { + "epoch": 0.10786553823093936, + "grad_norm": 8.722661879244907, + "learning_rate": 9.841918978580128e-06, + "loss": 18.4554, + "step": 5901 + }, + { + "epoch": 0.1078838174273859, + "grad_norm": 6.38762554509518, + "learning_rate": 9.841845125554067e-06, + "loss": 17.3544, + "step": 5902 + }, + { + "epoch": 0.10790209662383242, + "grad_norm": 6.1568219449726405, + "learning_rate": 9.841771255557757e-06, + "loss": 17.3558, + "step": 5903 + }, + { + "epoch": 0.10792037582027894, + "grad_norm": 6.749550000349546, + "learning_rate": 9.841697368591458e-06, + "loss": 17.4397, + "step": 5904 + }, + { + "epoch": 0.10793865501672546, + "grad_norm": 5.768599118609642, + "learning_rate": 9.841623464655433e-06, + "loss": 17.223, + "step": 5905 + }, + { + "epoch": 0.10795693421317198, + "grad_norm": 6.995410955249487, + "learning_rate": 9.841549543749935e-06, + "loss": 17.6919, + "step": 5906 + }, + { + "epoch": 0.10797521340961852, + "grad_norm": 6.957556811186082, + "learning_rate": 9.841475605875227e-06, + "loss": 17.8107, + "step": 5907 + }, + { + "epoch": 0.10799349260606504, + "grad_norm": 12.347647844029785, + "learning_rate": 9.841401651031568e-06, + "loss": 18.7845, + "step": 5908 + }, + { + "epoch": 0.10801177180251156, + "grad_norm": 6.269954101478955, + "learning_rate": 9.841327679219214e-06, + "loss": 17.7051, + "step": 5909 + }, + { + "epoch": 0.10803005099895809, + "grad_norm": 6.948777492521897, + "learning_rate": 9.841253690438429e-06, + "loss": 17.6624, + "step": 5910 + }, + { + "epoch": 0.10804833019540461, + "grad_norm": 5.970114760504495, + "learning_rate": 9.84117968468947e-06, + "loss": 17.1389, + "step": 5911 + }, + { + "epoch": 0.10806660939185113, + "grad_norm": 7.510658700805964, + "learning_rate": 9.841105661972594e-06, + "loss": 17.6788, + "step": 5912 + }, + { + "epoch": 0.10808488858829766, + "grad_norm": 7.203161598491203, + "learning_rate": 9.841031622288065e-06, + "loss": 17.8298, + "step": 5913 + }, + { + "epoch": 0.10810316778474419, + "grad_norm": 6.859963289080416, + "learning_rate": 9.84095756563614e-06, + "loss": 17.3716, + "step": 5914 + }, + { + "epoch": 0.10812144698119071, + "grad_norm": 7.47334448271494, + "learning_rate": 9.840883492017078e-06, + "loss": 17.9008, + "step": 5915 + }, + { + "epoch": 0.10813972617763723, + "grad_norm": 6.113853926642932, + "learning_rate": 9.84080940143114e-06, + "loss": 17.2308, + "step": 5916 + }, + { + "epoch": 0.10815800537408375, + "grad_norm": 6.4299119208739866, + "learning_rate": 9.840735293878585e-06, + "loss": 17.5363, + "step": 5917 + }, + { + "epoch": 0.10817628457053027, + "grad_norm": 7.4029030745756215, + "learning_rate": 9.840661169359673e-06, + "loss": 18.1808, + "step": 5918 + }, + { + "epoch": 0.10819456376697681, + "grad_norm": 7.13773841709815, + "learning_rate": 9.840587027874661e-06, + "loss": 17.9393, + "step": 5919 + }, + { + "epoch": 0.10821284296342333, + "grad_norm": 7.589838535219721, + "learning_rate": 9.840512869423816e-06, + "loss": 17.9286, + "step": 5920 + }, + { + "epoch": 0.10823112215986985, + "grad_norm": 6.882416624256553, + "learning_rate": 9.840438694007391e-06, + "loss": 17.5512, + "step": 5921 + }, + { + "epoch": 0.10824940135631637, + "grad_norm": 6.743110939806677, + "learning_rate": 9.840364501625647e-06, + "loss": 17.6088, + "step": 5922 + }, + { + "epoch": 0.1082676805527629, + "grad_norm": 8.729816397999063, + "learning_rate": 9.84029029227885e-06, + "loss": 18.6794, + "step": 5923 + }, + { + "epoch": 0.10828595974920943, + "grad_norm": 7.751836073429958, + "learning_rate": 9.840216065967251e-06, + "loss": 18.2195, + "step": 5924 + }, + { + "epoch": 0.10830423894565595, + "grad_norm": 7.417194641590402, + "learning_rate": 9.840141822691116e-06, + "loss": 17.6567, + "step": 5925 + }, + { + "epoch": 0.10832251814210248, + "grad_norm": 7.6162404810380036, + "learning_rate": 9.840067562450704e-06, + "loss": 17.8032, + "step": 5926 + }, + { + "epoch": 0.108340797338549, + "grad_norm": 6.9424441577606695, + "learning_rate": 9.839993285246276e-06, + "loss": 17.9385, + "step": 5927 + }, + { + "epoch": 0.10835907653499552, + "grad_norm": 7.8256812426243645, + "learning_rate": 9.839918991078091e-06, + "loss": 17.7594, + "step": 5928 + }, + { + "epoch": 0.10837735573144204, + "grad_norm": 8.19408397305482, + "learning_rate": 9.83984467994641e-06, + "loss": 18.3648, + "step": 5929 + }, + { + "epoch": 0.10839563492788858, + "grad_norm": 7.69688900212192, + "learning_rate": 9.839770351851494e-06, + "loss": 18.1308, + "step": 5930 + }, + { + "epoch": 0.1084139141243351, + "grad_norm": 7.403797404073557, + "learning_rate": 9.839696006793601e-06, + "loss": 17.5829, + "step": 5931 + }, + { + "epoch": 0.10843219332078162, + "grad_norm": 7.783961116571769, + "learning_rate": 9.839621644772996e-06, + "loss": 18.2717, + "step": 5932 + }, + { + "epoch": 0.10845047251722814, + "grad_norm": 8.471935534814689, + "learning_rate": 9.839547265789935e-06, + "loss": 18.0535, + "step": 5933 + }, + { + "epoch": 0.10846875171367466, + "grad_norm": 6.6672925736520625, + "learning_rate": 9.839472869844683e-06, + "loss": 17.2993, + "step": 5934 + }, + { + "epoch": 0.10848703091012118, + "grad_norm": 8.592884386398634, + "learning_rate": 9.839398456937497e-06, + "loss": 18.2076, + "step": 5935 + }, + { + "epoch": 0.10850531010656772, + "grad_norm": 7.319125332836831, + "learning_rate": 9.839324027068638e-06, + "loss": 17.8035, + "step": 5936 + }, + { + "epoch": 0.10852358930301424, + "grad_norm": 7.908470337743244, + "learning_rate": 9.83924958023837e-06, + "loss": 18.4819, + "step": 5937 + }, + { + "epoch": 0.10854186849946076, + "grad_norm": 8.936807942210304, + "learning_rate": 9.839175116446953e-06, + "loss": 18.6143, + "step": 5938 + }, + { + "epoch": 0.10856014769590729, + "grad_norm": 6.446386405095052, + "learning_rate": 9.839100635694644e-06, + "loss": 17.5366, + "step": 5939 + }, + { + "epoch": 0.10857842689235381, + "grad_norm": 7.69040520412441, + "learning_rate": 9.83902613798171e-06, + "loss": 17.7365, + "step": 5940 + }, + { + "epoch": 0.10859670608880034, + "grad_norm": 6.101400336742993, + "learning_rate": 9.83895162330841e-06, + "loss": 17.2802, + "step": 5941 + }, + { + "epoch": 0.10861498528524687, + "grad_norm": 6.626590924164906, + "learning_rate": 9.838877091675001e-06, + "loss": 17.5935, + "step": 5942 + }, + { + "epoch": 0.10863326448169339, + "grad_norm": 9.680701863675633, + "learning_rate": 9.83880254308175e-06, + "loss": 18.6882, + "step": 5943 + }, + { + "epoch": 0.10865154367813991, + "grad_norm": 7.19408308711397, + "learning_rate": 9.838727977528917e-06, + "loss": 17.7942, + "step": 5944 + }, + { + "epoch": 0.10866982287458643, + "grad_norm": 6.788870980507977, + "learning_rate": 9.83865339501676e-06, + "loss": 17.4205, + "step": 5945 + }, + { + "epoch": 0.10868810207103295, + "grad_norm": 7.200683231587349, + "learning_rate": 9.838578795545544e-06, + "loss": 17.8494, + "step": 5946 + }, + { + "epoch": 0.10870638126747949, + "grad_norm": 6.88088555284488, + "learning_rate": 9.838504179115528e-06, + "loss": 17.8073, + "step": 5947 + }, + { + "epoch": 0.10872466046392601, + "grad_norm": 9.24156242485691, + "learning_rate": 9.838429545726977e-06, + "loss": 18.2847, + "step": 5948 + }, + { + "epoch": 0.10874293966037253, + "grad_norm": 7.4897225574815405, + "learning_rate": 9.83835489538015e-06, + "loss": 17.7234, + "step": 5949 + }, + { + "epoch": 0.10876121885681905, + "grad_norm": 7.260971801654793, + "learning_rate": 9.838280228075306e-06, + "loss": 18.0967, + "step": 5950 + }, + { + "epoch": 0.10877949805326557, + "grad_norm": 7.938202720302389, + "learning_rate": 9.838205543812712e-06, + "loss": 17.9975, + "step": 5951 + }, + { + "epoch": 0.1087977772497121, + "grad_norm": 7.715660297079626, + "learning_rate": 9.838130842592626e-06, + "loss": 17.9109, + "step": 5952 + }, + { + "epoch": 0.10881605644615863, + "grad_norm": 7.0959843202199675, + "learning_rate": 9.838056124415312e-06, + "loss": 17.5292, + "step": 5953 + }, + { + "epoch": 0.10883433564260515, + "grad_norm": 7.051511554435222, + "learning_rate": 9.837981389281031e-06, + "loss": 17.4281, + "step": 5954 + }, + { + "epoch": 0.10885261483905168, + "grad_norm": 7.186044339479653, + "learning_rate": 9.837906637190046e-06, + "loss": 17.6825, + "step": 5955 + }, + { + "epoch": 0.1088708940354982, + "grad_norm": 6.802459216189041, + "learning_rate": 9.837831868142618e-06, + "loss": 17.7053, + "step": 5956 + }, + { + "epoch": 0.10888917323194472, + "grad_norm": 6.841393741814351, + "learning_rate": 9.837757082139007e-06, + "loss": 17.5404, + "step": 5957 + }, + { + "epoch": 0.10890745242839125, + "grad_norm": 6.915070035197545, + "learning_rate": 9.837682279179479e-06, + "loss": 17.7828, + "step": 5958 + }, + { + "epoch": 0.10892573162483778, + "grad_norm": 6.676637498053301, + "learning_rate": 9.837607459264294e-06, + "loss": 17.5676, + "step": 5959 + }, + { + "epoch": 0.1089440108212843, + "grad_norm": 8.202980699646995, + "learning_rate": 9.837532622393716e-06, + "loss": 17.9422, + "step": 5960 + }, + { + "epoch": 0.10896229001773082, + "grad_norm": 7.886380009870115, + "learning_rate": 9.837457768568004e-06, + "loss": 18.4824, + "step": 5961 + }, + { + "epoch": 0.10898056921417734, + "grad_norm": 7.758729587760364, + "learning_rate": 9.837382897787423e-06, + "loss": 17.5843, + "step": 5962 + }, + { + "epoch": 0.10899884841062386, + "grad_norm": 7.420610363317163, + "learning_rate": 9.837308010052236e-06, + "loss": 18.0441, + "step": 5963 + }, + { + "epoch": 0.1090171276070704, + "grad_norm": 8.33113732511878, + "learning_rate": 9.837233105362703e-06, + "loss": 18.0701, + "step": 5964 + }, + { + "epoch": 0.10903540680351692, + "grad_norm": 7.841425209863305, + "learning_rate": 9.837158183719086e-06, + "loss": 17.9798, + "step": 5965 + }, + { + "epoch": 0.10905368599996344, + "grad_norm": 8.093031035859577, + "learning_rate": 9.837083245121651e-06, + "loss": 18.37, + "step": 5966 + }, + { + "epoch": 0.10907196519640996, + "grad_norm": 8.81414523026481, + "learning_rate": 9.83700828957066e-06, + "loss": 18.2651, + "step": 5967 + }, + { + "epoch": 0.10909024439285649, + "grad_norm": 6.699458389845013, + "learning_rate": 9.836933317066373e-06, + "loss": 17.5269, + "step": 5968 + }, + { + "epoch": 0.10910852358930301, + "grad_norm": 9.599192042839118, + "learning_rate": 9.836858327609055e-06, + "loss": 18.7339, + "step": 5969 + }, + { + "epoch": 0.10912680278574954, + "grad_norm": 7.296966831416328, + "learning_rate": 9.836783321198968e-06, + "loss": 18.1201, + "step": 5970 + }, + { + "epoch": 0.10914508198219607, + "grad_norm": 7.493497210818273, + "learning_rate": 9.836708297836375e-06, + "loss": 17.7271, + "step": 5971 + }, + { + "epoch": 0.10916336117864259, + "grad_norm": 6.62248749502922, + "learning_rate": 9.83663325752154e-06, + "loss": 17.6155, + "step": 5972 + }, + { + "epoch": 0.10918164037508911, + "grad_norm": 7.586502094244091, + "learning_rate": 9.836558200254725e-06, + "loss": 17.8567, + "step": 5973 + }, + { + "epoch": 0.10919991957153563, + "grad_norm": 6.763971976362987, + "learning_rate": 9.836483126036192e-06, + "loss": 17.2696, + "step": 5974 + }, + { + "epoch": 0.10921819876798217, + "grad_norm": 6.633354735131761, + "learning_rate": 9.836408034866207e-06, + "loss": 17.3721, + "step": 5975 + }, + { + "epoch": 0.10923647796442869, + "grad_norm": 6.853504839258941, + "learning_rate": 9.836332926745031e-06, + "loss": 17.5828, + "step": 5976 + }, + { + "epoch": 0.10925475716087521, + "grad_norm": 7.780416455535054, + "learning_rate": 9.836257801672927e-06, + "loss": 18.1243, + "step": 5977 + }, + { + "epoch": 0.10927303635732173, + "grad_norm": 6.5123631034928495, + "learning_rate": 9.83618265965016e-06, + "loss": 17.4059, + "step": 5978 + }, + { + "epoch": 0.10929131555376825, + "grad_norm": 8.787405722245287, + "learning_rate": 9.836107500676992e-06, + "loss": 18.1918, + "step": 5979 + }, + { + "epoch": 0.10930959475021478, + "grad_norm": 6.9205916043598235, + "learning_rate": 9.836032324753687e-06, + "loss": 17.8655, + "step": 5980 + }, + { + "epoch": 0.10932787394666131, + "grad_norm": 7.818444687855867, + "learning_rate": 9.835957131880508e-06, + "loss": 18.3451, + "step": 5981 + }, + { + "epoch": 0.10934615314310783, + "grad_norm": 7.955793940216197, + "learning_rate": 9.83588192205772e-06, + "loss": 18.0892, + "step": 5982 + }, + { + "epoch": 0.10936443233955435, + "grad_norm": 6.522733972424496, + "learning_rate": 9.835806695285583e-06, + "loss": 17.4412, + "step": 5983 + }, + { + "epoch": 0.10938271153600088, + "grad_norm": 5.546367331964139, + "learning_rate": 9.835731451564365e-06, + "loss": 17.0042, + "step": 5984 + }, + { + "epoch": 0.1094009907324474, + "grad_norm": 8.254720366045445, + "learning_rate": 9.835656190894329e-06, + "loss": 18.5262, + "step": 5985 + }, + { + "epoch": 0.10941926992889392, + "grad_norm": 8.905036048111233, + "learning_rate": 9.835580913275736e-06, + "loss": 18.3572, + "step": 5986 + }, + { + "epoch": 0.10943754912534046, + "grad_norm": 5.716347342494404, + "learning_rate": 9.835505618708851e-06, + "loss": 17.3397, + "step": 5987 + }, + { + "epoch": 0.10945582832178698, + "grad_norm": 7.297773266161931, + "learning_rate": 9.83543030719394e-06, + "loss": 17.7705, + "step": 5988 + }, + { + "epoch": 0.1094741075182335, + "grad_norm": 7.542163751455365, + "learning_rate": 9.835354978731265e-06, + "loss": 17.8389, + "step": 5989 + }, + { + "epoch": 0.10949238671468002, + "grad_norm": 7.400703798602654, + "learning_rate": 9.835279633321091e-06, + "loss": 17.7062, + "step": 5990 + }, + { + "epoch": 0.10951066591112654, + "grad_norm": 6.609218880417126, + "learning_rate": 9.83520427096368e-06, + "loss": 17.4464, + "step": 5991 + }, + { + "epoch": 0.10952894510757308, + "grad_norm": 8.27129966257969, + "learning_rate": 9.835128891659298e-06, + "loss": 18.5295, + "step": 5992 + }, + { + "epoch": 0.1095472243040196, + "grad_norm": 7.842780011688152, + "learning_rate": 9.835053495408209e-06, + "loss": 17.9532, + "step": 5993 + }, + { + "epoch": 0.10956550350046612, + "grad_norm": 7.941038020737538, + "learning_rate": 9.834978082210678e-06, + "loss": 18.1383, + "step": 5994 + }, + { + "epoch": 0.10958378269691264, + "grad_norm": 7.385043901041865, + "learning_rate": 9.834902652066966e-06, + "loss": 18.058, + "step": 5995 + }, + { + "epoch": 0.10960206189335917, + "grad_norm": 6.432381254149862, + "learning_rate": 9.834827204977342e-06, + "loss": 17.1324, + "step": 5996 + }, + { + "epoch": 0.10962034108980569, + "grad_norm": 7.004934340731052, + "learning_rate": 9.834751740942068e-06, + "loss": 17.6494, + "step": 5997 + }, + { + "epoch": 0.10963862028625222, + "grad_norm": 6.879043813875452, + "learning_rate": 9.834676259961407e-06, + "loss": 17.6899, + "step": 5998 + }, + { + "epoch": 0.10965689948269874, + "grad_norm": 8.461280155671439, + "learning_rate": 9.834600762035626e-06, + "loss": 18.5268, + "step": 5999 + }, + { + "epoch": 0.10967517867914527, + "grad_norm": 7.585188465613532, + "learning_rate": 9.83452524716499e-06, + "loss": 17.8837, + "step": 6000 + }, + { + "epoch": 0.10969345787559179, + "grad_norm": 7.404804223634506, + "learning_rate": 9.83444971534976e-06, + "loss": 18.0314, + "step": 6001 + }, + { + "epoch": 0.10971173707203831, + "grad_norm": 8.341881637201688, + "learning_rate": 9.834374166590206e-06, + "loss": 17.9829, + "step": 6002 + }, + { + "epoch": 0.10973001626848483, + "grad_norm": 6.846917649382119, + "learning_rate": 9.834298600886589e-06, + "loss": 17.9417, + "step": 6003 + }, + { + "epoch": 0.10974829546493137, + "grad_norm": 6.829582477175553, + "learning_rate": 9.834223018239175e-06, + "loss": 17.6357, + "step": 6004 + }, + { + "epoch": 0.10976657466137789, + "grad_norm": 6.599843832524429, + "learning_rate": 9.83414741864823e-06, + "loss": 17.4774, + "step": 6005 + }, + { + "epoch": 0.10978485385782441, + "grad_norm": 6.424816102653562, + "learning_rate": 9.834071802114016e-06, + "loss": 17.4823, + "step": 6006 + }, + { + "epoch": 0.10980313305427093, + "grad_norm": 6.48361962247988, + "learning_rate": 9.833996168636801e-06, + "loss": 17.4452, + "step": 6007 + }, + { + "epoch": 0.10982141225071745, + "grad_norm": 6.130064538530384, + "learning_rate": 9.833920518216848e-06, + "loss": 17.2327, + "step": 6008 + }, + { + "epoch": 0.10983969144716399, + "grad_norm": 7.414484958983045, + "learning_rate": 9.833844850854422e-06, + "loss": 18.0387, + "step": 6009 + }, + { + "epoch": 0.10985797064361051, + "grad_norm": 6.878425341927839, + "learning_rate": 9.833769166549792e-06, + "loss": 17.406, + "step": 6010 + }, + { + "epoch": 0.10987624984005703, + "grad_norm": 7.473652822725783, + "learning_rate": 9.833693465303217e-06, + "loss": 17.9417, + "step": 6011 + }, + { + "epoch": 0.10989452903650355, + "grad_norm": 5.7167561414418095, + "learning_rate": 9.833617747114969e-06, + "loss": 17.4244, + "step": 6012 + }, + { + "epoch": 0.10991280823295008, + "grad_norm": 7.021581532495483, + "learning_rate": 9.83354201198531e-06, + "loss": 17.5103, + "step": 6013 + }, + { + "epoch": 0.1099310874293966, + "grad_norm": 6.459942931667375, + "learning_rate": 9.833466259914503e-06, + "loss": 17.5817, + "step": 6014 + }, + { + "epoch": 0.10994936662584313, + "grad_norm": 7.1900032351537515, + "learning_rate": 9.833390490902819e-06, + "loss": 17.847, + "step": 6015 + }, + { + "epoch": 0.10996764582228966, + "grad_norm": 7.904453231222815, + "learning_rate": 9.83331470495052e-06, + "loss": 17.956, + "step": 6016 + }, + { + "epoch": 0.10998592501873618, + "grad_norm": 7.427178197504679, + "learning_rate": 9.833238902057873e-06, + "loss": 17.8389, + "step": 6017 + }, + { + "epoch": 0.1100042042151827, + "grad_norm": 6.208686767567184, + "learning_rate": 9.83316308222514e-06, + "loss": 17.3005, + "step": 6018 + }, + { + "epoch": 0.11002248341162922, + "grad_norm": 7.847786718059512, + "learning_rate": 9.833087245452594e-06, + "loss": 17.9887, + "step": 6019 + }, + { + "epoch": 0.11004076260807574, + "grad_norm": 6.654436993277937, + "learning_rate": 9.833011391740494e-06, + "loss": 17.688, + "step": 6020 + }, + { + "epoch": 0.11005904180452228, + "grad_norm": 7.973594040381017, + "learning_rate": 9.832935521089109e-06, + "loss": 18.3191, + "step": 6021 + }, + { + "epoch": 0.1100773210009688, + "grad_norm": 8.951738839041905, + "learning_rate": 9.832859633498704e-06, + "loss": 18.1994, + "step": 6022 + }, + { + "epoch": 0.11009560019741532, + "grad_norm": 5.833255689683318, + "learning_rate": 9.832783728969546e-06, + "loss": 17.0594, + "step": 6023 + }, + { + "epoch": 0.11011387939386184, + "grad_norm": 6.54454264521597, + "learning_rate": 9.832707807501902e-06, + "loss": 17.3724, + "step": 6024 + }, + { + "epoch": 0.11013215859030837, + "grad_norm": 7.690288277658039, + "learning_rate": 9.832631869096034e-06, + "loss": 18.2661, + "step": 6025 + }, + { + "epoch": 0.1101504377867549, + "grad_norm": 7.299021017295833, + "learning_rate": 9.832555913752211e-06, + "loss": 17.555, + "step": 6026 + }, + { + "epoch": 0.11016871698320142, + "grad_norm": 6.828135335092559, + "learning_rate": 9.832479941470699e-06, + "loss": 17.5603, + "step": 6027 + }, + { + "epoch": 0.11018699617964794, + "grad_norm": 7.864703887956888, + "learning_rate": 9.832403952251765e-06, + "loss": 18.39, + "step": 6028 + }, + { + "epoch": 0.11020527537609447, + "grad_norm": 6.4129004093808515, + "learning_rate": 9.832327946095674e-06, + "loss": 17.4481, + "step": 6029 + }, + { + "epoch": 0.11022355457254099, + "grad_norm": 10.061475671645136, + "learning_rate": 9.832251923002692e-06, + "loss": 19.0137, + "step": 6030 + }, + { + "epoch": 0.11024183376898751, + "grad_norm": 6.325548363634723, + "learning_rate": 9.832175882973088e-06, + "loss": 17.4535, + "step": 6031 + }, + { + "epoch": 0.11026011296543405, + "grad_norm": 7.493245225301195, + "learning_rate": 9.832099826007126e-06, + "loss": 17.9325, + "step": 6032 + }, + { + "epoch": 0.11027839216188057, + "grad_norm": 7.8488940376631255, + "learning_rate": 9.832023752105073e-06, + "loss": 17.7612, + "step": 6033 + }, + { + "epoch": 0.11029667135832709, + "grad_norm": 7.88173752708001, + "learning_rate": 9.831947661267196e-06, + "loss": 18.057, + "step": 6034 + }, + { + "epoch": 0.11031495055477361, + "grad_norm": 6.316696587296195, + "learning_rate": 9.831871553493763e-06, + "loss": 17.3385, + "step": 6035 + }, + { + "epoch": 0.11033322975122013, + "grad_norm": 6.573038168903045, + "learning_rate": 9.831795428785038e-06, + "loss": 17.3735, + "step": 6036 + }, + { + "epoch": 0.11035150894766665, + "grad_norm": 7.502583056333343, + "learning_rate": 9.83171928714129e-06, + "loss": 18.2043, + "step": 6037 + }, + { + "epoch": 0.11036978814411319, + "grad_norm": 6.639241512757685, + "learning_rate": 9.831643128562786e-06, + "loss": 17.56, + "step": 6038 + }, + { + "epoch": 0.11038806734055971, + "grad_norm": 6.481501088163189, + "learning_rate": 9.831566953049791e-06, + "loss": 17.4618, + "step": 6039 + }, + { + "epoch": 0.11040634653700623, + "grad_norm": 7.354046600163907, + "learning_rate": 9.831490760602573e-06, + "loss": 18.3112, + "step": 6040 + }, + { + "epoch": 0.11042462573345276, + "grad_norm": 7.255419059578754, + "learning_rate": 9.8314145512214e-06, + "loss": 17.9431, + "step": 6041 + }, + { + "epoch": 0.11044290492989928, + "grad_norm": 6.92008124755534, + "learning_rate": 9.831338324906537e-06, + "loss": 17.8271, + "step": 6042 + }, + { + "epoch": 0.11046118412634581, + "grad_norm": 6.3004293240695235, + "learning_rate": 9.831262081658253e-06, + "loss": 17.1417, + "step": 6043 + }, + { + "epoch": 0.11047946332279233, + "grad_norm": 6.620006993373256, + "learning_rate": 9.831185821476815e-06, + "loss": 17.4947, + "step": 6044 + }, + { + "epoch": 0.11049774251923886, + "grad_norm": 6.307453629542876, + "learning_rate": 9.831109544362489e-06, + "loss": 17.689, + "step": 6045 + }, + { + "epoch": 0.11051602171568538, + "grad_norm": 7.194822447342063, + "learning_rate": 9.831033250315544e-06, + "loss": 17.7781, + "step": 6046 + }, + { + "epoch": 0.1105343009121319, + "grad_norm": 8.625339065623256, + "learning_rate": 9.830956939336248e-06, + "loss": 18.3123, + "step": 6047 + }, + { + "epoch": 0.11055258010857842, + "grad_norm": 6.474551579719272, + "learning_rate": 9.830880611424866e-06, + "loss": 17.5376, + "step": 6048 + }, + { + "epoch": 0.11057085930502496, + "grad_norm": 7.036493049200884, + "learning_rate": 9.830804266581667e-06, + "loss": 17.7203, + "step": 6049 + }, + { + "epoch": 0.11058913850147148, + "grad_norm": 8.329100061696156, + "learning_rate": 9.830727904806918e-06, + "loss": 18.3742, + "step": 6050 + }, + { + "epoch": 0.110607417697918, + "grad_norm": 7.052059043051446, + "learning_rate": 9.830651526100884e-06, + "loss": 17.7059, + "step": 6051 + }, + { + "epoch": 0.11062569689436452, + "grad_norm": 6.58886304139191, + "learning_rate": 9.83057513046384e-06, + "loss": 17.5383, + "step": 6052 + }, + { + "epoch": 0.11064397609081104, + "grad_norm": 6.97142208965401, + "learning_rate": 9.830498717896047e-06, + "loss": 17.6627, + "step": 6053 + }, + { + "epoch": 0.11066225528725757, + "grad_norm": 6.601100442211125, + "learning_rate": 9.830422288397776e-06, + "loss": 17.5335, + "step": 6054 + }, + { + "epoch": 0.1106805344837041, + "grad_norm": 6.573928381752774, + "learning_rate": 9.830345841969294e-06, + "loss": 17.7606, + "step": 6055 + }, + { + "epoch": 0.11069881368015062, + "grad_norm": 6.799830932989246, + "learning_rate": 9.830269378610868e-06, + "loss": 17.5729, + "step": 6056 + }, + { + "epoch": 0.11071709287659715, + "grad_norm": 7.876836877064901, + "learning_rate": 9.830192898322768e-06, + "loss": 17.851, + "step": 6057 + }, + { + "epoch": 0.11073537207304367, + "grad_norm": 7.555648849237257, + "learning_rate": 9.83011640110526e-06, + "loss": 17.9312, + "step": 6058 + }, + { + "epoch": 0.11075365126949019, + "grad_norm": 7.203739739028518, + "learning_rate": 9.830039886958615e-06, + "loss": 17.7517, + "step": 6059 + }, + { + "epoch": 0.11077193046593672, + "grad_norm": 7.177766089570443, + "learning_rate": 9.829963355883098e-06, + "loss": 17.9962, + "step": 6060 + }, + { + "epoch": 0.11079020966238325, + "grad_norm": 6.5571110304539, + "learning_rate": 9.829886807878979e-06, + "loss": 17.6729, + "step": 6061 + }, + { + "epoch": 0.11080848885882977, + "grad_norm": 6.139209121599606, + "learning_rate": 9.829810242946525e-06, + "loss": 17.3619, + "step": 6062 + }, + { + "epoch": 0.11082676805527629, + "grad_norm": 7.792781352189638, + "learning_rate": 9.829733661086005e-06, + "loss": 17.941, + "step": 6063 + }, + { + "epoch": 0.11084504725172281, + "grad_norm": 7.615161081426064, + "learning_rate": 9.82965706229769e-06, + "loss": 17.8296, + "step": 6064 + }, + { + "epoch": 0.11086332644816933, + "grad_norm": 8.869587136548185, + "learning_rate": 9.829580446581843e-06, + "loss": 18.6859, + "step": 6065 + }, + { + "epoch": 0.11088160564461587, + "grad_norm": 7.0451364874647595, + "learning_rate": 9.82950381393874e-06, + "loss": 17.4014, + "step": 6066 + }, + { + "epoch": 0.11089988484106239, + "grad_norm": 6.641124738432329, + "learning_rate": 9.82942716436864e-06, + "loss": 17.6367, + "step": 6067 + }, + { + "epoch": 0.11091816403750891, + "grad_norm": 6.614254508646868, + "learning_rate": 9.82935049787182e-06, + "loss": 17.6394, + "step": 6068 + }, + { + "epoch": 0.11093644323395543, + "grad_norm": 6.62975390893817, + "learning_rate": 9.829273814448546e-06, + "loss": 17.7465, + "step": 6069 + }, + { + "epoch": 0.11095472243040196, + "grad_norm": 7.512657342412706, + "learning_rate": 9.829197114099084e-06, + "loss": 17.6712, + "step": 6070 + }, + { + "epoch": 0.11097300162684848, + "grad_norm": 6.07668994729175, + "learning_rate": 9.829120396823706e-06, + "loss": 17.2348, + "step": 6071 + }, + { + "epoch": 0.11099128082329501, + "grad_norm": 8.122956038676117, + "learning_rate": 9.829043662622681e-06, + "loss": 18.3812, + "step": 6072 + }, + { + "epoch": 0.11100956001974153, + "grad_norm": 7.771634793500091, + "learning_rate": 9.828966911496277e-06, + "loss": 17.8931, + "step": 6073 + }, + { + "epoch": 0.11102783921618806, + "grad_norm": 7.414760524464213, + "learning_rate": 9.828890143444763e-06, + "loss": 18.34, + "step": 6074 + }, + { + "epoch": 0.11104611841263458, + "grad_norm": 6.855095671572821, + "learning_rate": 9.828813358468406e-06, + "loss": 17.9983, + "step": 6075 + }, + { + "epoch": 0.1110643976090811, + "grad_norm": 6.077699519279136, + "learning_rate": 9.82873655656748e-06, + "loss": 17.3106, + "step": 6076 + }, + { + "epoch": 0.11108267680552764, + "grad_norm": 7.0648388877619075, + "learning_rate": 9.82865973774225e-06, + "loss": 17.9716, + "step": 6077 + }, + { + "epoch": 0.11110095600197416, + "grad_norm": 6.766820816789677, + "learning_rate": 9.828582901992987e-06, + "loss": 17.4199, + "step": 6078 + }, + { + "epoch": 0.11111923519842068, + "grad_norm": 7.267164185676141, + "learning_rate": 9.82850604931996e-06, + "loss": 17.7048, + "step": 6079 + }, + { + "epoch": 0.1111375143948672, + "grad_norm": 6.3016461713514484, + "learning_rate": 9.828429179723437e-06, + "loss": 17.3628, + "step": 6080 + }, + { + "epoch": 0.11115579359131372, + "grad_norm": 8.301755776026813, + "learning_rate": 9.82835229320369e-06, + "loss": 18.1444, + "step": 6081 + }, + { + "epoch": 0.11117407278776024, + "grad_norm": 7.392556694316968, + "learning_rate": 9.828275389760986e-06, + "loss": 17.7959, + "step": 6082 + }, + { + "epoch": 0.11119235198420678, + "grad_norm": 6.276445797133483, + "learning_rate": 9.828198469395598e-06, + "loss": 17.5543, + "step": 6083 + }, + { + "epoch": 0.1112106311806533, + "grad_norm": 7.318058488520969, + "learning_rate": 9.82812153210779e-06, + "loss": 17.8725, + "step": 6084 + }, + { + "epoch": 0.11122891037709982, + "grad_norm": 7.157296095234143, + "learning_rate": 9.828044577897837e-06, + "loss": 18.1602, + "step": 6085 + }, + { + "epoch": 0.11124718957354635, + "grad_norm": 7.323118370588629, + "learning_rate": 9.827967606766009e-06, + "loss": 17.5684, + "step": 6086 + }, + { + "epoch": 0.11126546876999287, + "grad_norm": 6.531660504221052, + "learning_rate": 9.82789061871257e-06, + "loss": 17.4564, + "step": 6087 + }, + { + "epoch": 0.11128374796643939, + "grad_norm": 6.230555906074557, + "learning_rate": 9.827813613737795e-06, + "loss": 17.376, + "step": 6088 + }, + { + "epoch": 0.11130202716288592, + "grad_norm": 6.835899987099556, + "learning_rate": 9.82773659184195e-06, + "loss": 18.0869, + "step": 6089 + }, + { + "epoch": 0.11132030635933245, + "grad_norm": 6.9819025137206285, + "learning_rate": 9.827659553025311e-06, + "loss": 17.5553, + "step": 6090 + }, + { + "epoch": 0.11133858555577897, + "grad_norm": 6.9195461465580825, + "learning_rate": 9.827582497288142e-06, + "loss": 17.494, + "step": 6091 + }, + { + "epoch": 0.11135686475222549, + "grad_norm": 7.16870145138108, + "learning_rate": 9.827505424630714e-06, + "loss": 18.086, + "step": 6092 + }, + { + "epoch": 0.11137514394867201, + "grad_norm": 7.75470910521547, + "learning_rate": 9.827428335053301e-06, + "loss": 17.9726, + "step": 6093 + }, + { + "epoch": 0.11139342314511855, + "grad_norm": 6.356708930907695, + "learning_rate": 9.827351228556168e-06, + "loss": 17.4048, + "step": 6094 + }, + { + "epoch": 0.11141170234156507, + "grad_norm": 8.195102079708166, + "learning_rate": 9.82727410513959e-06, + "loss": 17.998, + "step": 6095 + }, + { + "epoch": 0.11142998153801159, + "grad_norm": 6.096094184633512, + "learning_rate": 9.827196964803833e-06, + "loss": 17.219, + "step": 6096 + }, + { + "epoch": 0.11144826073445811, + "grad_norm": 6.789787708293502, + "learning_rate": 9.827119807549172e-06, + "loss": 17.4854, + "step": 6097 + }, + { + "epoch": 0.11146653993090463, + "grad_norm": 7.9023625358560015, + "learning_rate": 9.827042633375873e-06, + "loss": 17.9394, + "step": 6098 + }, + { + "epoch": 0.11148481912735116, + "grad_norm": 6.9377014505029715, + "learning_rate": 9.826965442284212e-06, + "loss": 17.1389, + "step": 6099 + }, + { + "epoch": 0.11150309832379769, + "grad_norm": 8.037621294410371, + "learning_rate": 9.826888234274452e-06, + "loss": 18.2612, + "step": 6100 + }, + { + "epoch": 0.11152137752024421, + "grad_norm": 7.304666527307093, + "learning_rate": 9.826811009346869e-06, + "loss": 17.7392, + "step": 6101 + }, + { + "epoch": 0.11153965671669074, + "grad_norm": 6.3745404473633425, + "learning_rate": 9.826733767501731e-06, + "loss": 17.3288, + "step": 6102 + }, + { + "epoch": 0.11155793591313726, + "grad_norm": 10.343701118274206, + "learning_rate": 9.826656508739311e-06, + "loss": 18.9521, + "step": 6103 + }, + { + "epoch": 0.11157621510958378, + "grad_norm": 8.011024726129985, + "learning_rate": 9.826579233059878e-06, + "loss": 18.0134, + "step": 6104 + }, + { + "epoch": 0.1115944943060303, + "grad_norm": 6.7895829546955175, + "learning_rate": 9.826501940463706e-06, + "loss": 17.5846, + "step": 6105 + }, + { + "epoch": 0.11161277350247684, + "grad_norm": 7.49594915628895, + "learning_rate": 9.826424630951062e-06, + "loss": 17.7317, + "step": 6106 + }, + { + "epoch": 0.11163105269892336, + "grad_norm": 7.049010994020828, + "learning_rate": 9.826347304522219e-06, + "loss": 17.5041, + "step": 6107 + }, + { + "epoch": 0.11164933189536988, + "grad_norm": 6.625397145218589, + "learning_rate": 9.826269961177447e-06, + "loss": 17.3501, + "step": 6108 + }, + { + "epoch": 0.1116676110918164, + "grad_norm": 6.79675307076588, + "learning_rate": 9.826192600917017e-06, + "loss": 17.8181, + "step": 6109 + }, + { + "epoch": 0.11168589028826292, + "grad_norm": 6.889365418259622, + "learning_rate": 9.826115223741202e-06, + "loss": 17.5375, + "step": 6110 + }, + { + "epoch": 0.11170416948470946, + "grad_norm": 7.6520421109127295, + "learning_rate": 9.826037829650271e-06, + "loss": 18.1953, + "step": 6111 + }, + { + "epoch": 0.11172244868115598, + "grad_norm": 6.8452885499139215, + "learning_rate": 9.825960418644495e-06, + "loss": 17.5787, + "step": 6112 + }, + { + "epoch": 0.1117407278776025, + "grad_norm": 6.151882389545902, + "learning_rate": 9.82588299072415e-06, + "loss": 17.4467, + "step": 6113 + }, + { + "epoch": 0.11175900707404902, + "grad_norm": 6.83752001813512, + "learning_rate": 9.8258055458895e-06, + "loss": 17.3972, + "step": 6114 + }, + { + "epoch": 0.11177728627049555, + "grad_norm": 7.396162960704652, + "learning_rate": 9.825728084140824e-06, + "loss": 17.8534, + "step": 6115 + }, + { + "epoch": 0.11179556546694207, + "grad_norm": 8.660874668974742, + "learning_rate": 9.825650605478386e-06, + "loss": 18.5825, + "step": 6116 + }, + { + "epoch": 0.1118138446633886, + "grad_norm": 7.5702717335581715, + "learning_rate": 9.825573109902465e-06, + "loss": 18.273, + "step": 6117 + }, + { + "epoch": 0.11183212385983513, + "grad_norm": 7.7147435530331245, + "learning_rate": 9.825495597413328e-06, + "loss": 18.2381, + "step": 6118 + }, + { + "epoch": 0.11185040305628165, + "grad_norm": 7.152143115685819, + "learning_rate": 9.825418068011245e-06, + "loss": 17.8437, + "step": 6119 + }, + { + "epoch": 0.11186868225272817, + "grad_norm": 6.567868181370384, + "learning_rate": 9.825340521696493e-06, + "loss": 17.3631, + "step": 6120 + }, + { + "epoch": 0.11188696144917469, + "grad_norm": 7.48768227300808, + "learning_rate": 9.825262958469342e-06, + "loss": 17.8795, + "step": 6121 + }, + { + "epoch": 0.11190524064562121, + "grad_norm": 5.681827530665296, + "learning_rate": 9.82518537833006e-06, + "loss": 16.9753, + "step": 6122 + }, + { + "epoch": 0.11192351984206775, + "grad_norm": 7.042510126919909, + "learning_rate": 9.825107781278924e-06, + "loss": 17.9713, + "step": 6123 + }, + { + "epoch": 0.11194179903851427, + "grad_norm": 6.066116133662346, + "learning_rate": 9.825030167316204e-06, + "loss": 17.4079, + "step": 6124 + }, + { + "epoch": 0.11196007823496079, + "grad_norm": 8.930087487018563, + "learning_rate": 9.824952536442171e-06, + "loss": 18.8651, + "step": 6125 + }, + { + "epoch": 0.11197835743140731, + "grad_norm": 8.021392553948088, + "learning_rate": 9.824874888657099e-06, + "loss": 18.0091, + "step": 6126 + }, + { + "epoch": 0.11199663662785383, + "grad_norm": 6.0462716777884795, + "learning_rate": 9.824797223961259e-06, + "loss": 17.4654, + "step": 6127 + }, + { + "epoch": 0.11201491582430037, + "grad_norm": 7.870660144820459, + "learning_rate": 9.824719542354923e-06, + "loss": 18.1762, + "step": 6128 + }, + { + "epoch": 0.11203319502074689, + "grad_norm": 7.304537260867494, + "learning_rate": 9.824641843838364e-06, + "loss": 17.9305, + "step": 6129 + }, + { + "epoch": 0.11205147421719341, + "grad_norm": 6.882291977888932, + "learning_rate": 9.824564128411854e-06, + "loss": 17.6084, + "step": 6130 + }, + { + "epoch": 0.11206975341363994, + "grad_norm": 8.883903749463636, + "learning_rate": 9.824486396075665e-06, + "loss": 18.6727, + "step": 6131 + }, + { + "epoch": 0.11208803261008646, + "grad_norm": 7.763208221495181, + "learning_rate": 9.82440864683007e-06, + "loss": 18.1982, + "step": 6132 + }, + { + "epoch": 0.11210631180653298, + "grad_norm": 7.494317510006399, + "learning_rate": 9.824330880675341e-06, + "loss": 17.5689, + "step": 6133 + }, + { + "epoch": 0.11212459100297952, + "grad_norm": 6.734347902921629, + "learning_rate": 9.824253097611751e-06, + "loss": 17.6065, + "step": 6134 + }, + { + "epoch": 0.11214287019942604, + "grad_norm": 6.903138003440501, + "learning_rate": 9.824175297639573e-06, + "loss": 17.7784, + "step": 6135 + }, + { + "epoch": 0.11216114939587256, + "grad_norm": 8.975992327832438, + "learning_rate": 9.82409748075908e-06, + "loss": 18.3752, + "step": 6136 + }, + { + "epoch": 0.11217942859231908, + "grad_norm": 6.789633529830011, + "learning_rate": 9.824019646970543e-06, + "loss": 17.6402, + "step": 6137 + }, + { + "epoch": 0.1121977077887656, + "grad_norm": 6.409081435221622, + "learning_rate": 9.823941796274235e-06, + "loss": 17.5977, + "step": 6138 + }, + { + "epoch": 0.11221598698521212, + "grad_norm": 6.116320825092029, + "learning_rate": 9.823863928670431e-06, + "loss": 17.6209, + "step": 6139 + }, + { + "epoch": 0.11223426618165866, + "grad_norm": 7.867902646461152, + "learning_rate": 9.823786044159403e-06, + "loss": 18.1709, + "step": 6140 + }, + { + "epoch": 0.11225254537810518, + "grad_norm": 5.947024102002984, + "learning_rate": 9.823708142741422e-06, + "loss": 17.2042, + "step": 6141 + }, + { + "epoch": 0.1122708245745517, + "grad_norm": 6.851079550186919, + "learning_rate": 9.823630224416762e-06, + "loss": 17.4814, + "step": 6142 + }, + { + "epoch": 0.11228910377099822, + "grad_norm": 7.498051350606899, + "learning_rate": 9.823552289185699e-06, + "loss": 18.1426, + "step": 6143 + }, + { + "epoch": 0.11230738296744475, + "grad_norm": 6.564015601323493, + "learning_rate": 9.823474337048502e-06, + "loss": 17.9036, + "step": 6144 + }, + { + "epoch": 0.11232566216389128, + "grad_norm": 7.85380257407971, + "learning_rate": 9.823396368005445e-06, + "loss": 17.9739, + "step": 6145 + }, + { + "epoch": 0.1123439413603378, + "grad_norm": 7.5435428933914475, + "learning_rate": 9.823318382056803e-06, + "loss": 17.4072, + "step": 6146 + }, + { + "epoch": 0.11236222055678433, + "grad_norm": 7.9263872401537645, + "learning_rate": 9.823240379202851e-06, + "loss": 18.1183, + "step": 6147 + }, + { + "epoch": 0.11238049975323085, + "grad_norm": 6.175677186935788, + "learning_rate": 9.823162359443858e-06, + "loss": 17.1602, + "step": 6148 + }, + { + "epoch": 0.11239877894967737, + "grad_norm": 8.336031690932213, + "learning_rate": 9.8230843227801e-06, + "loss": 18.207, + "step": 6149 + }, + { + "epoch": 0.11241705814612389, + "grad_norm": 7.549068160390573, + "learning_rate": 9.82300626921185e-06, + "loss": 17.9894, + "step": 6150 + }, + { + "epoch": 0.11243533734257043, + "grad_norm": 5.67824785647607, + "learning_rate": 9.822928198739381e-06, + "loss": 17.1215, + "step": 6151 + }, + { + "epoch": 0.11245361653901695, + "grad_norm": 6.724954031901365, + "learning_rate": 9.822850111362968e-06, + "loss": 17.609, + "step": 6152 + }, + { + "epoch": 0.11247189573546347, + "grad_norm": 5.731028461003114, + "learning_rate": 9.822772007082883e-06, + "loss": 17.1988, + "step": 6153 + }, + { + "epoch": 0.11249017493190999, + "grad_norm": 7.958600282388625, + "learning_rate": 9.8226938858994e-06, + "loss": 18.0315, + "step": 6154 + }, + { + "epoch": 0.11250845412835651, + "grad_norm": 5.418456092849641, + "learning_rate": 9.822615747812794e-06, + "loss": 17.0944, + "step": 6155 + }, + { + "epoch": 0.11252673332480304, + "grad_norm": 6.250556961926755, + "learning_rate": 9.82253759282334e-06, + "loss": 17.3175, + "step": 6156 + }, + { + "epoch": 0.11254501252124957, + "grad_norm": 7.435879048175508, + "learning_rate": 9.822459420931308e-06, + "loss": 17.8584, + "step": 6157 + }, + { + "epoch": 0.11256329171769609, + "grad_norm": 7.8249285387608305, + "learning_rate": 9.822381232136974e-06, + "loss": 17.9644, + "step": 6158 + }, + { + "epoch": 0.11258157091414261, + "grad_norm": 6.8331697717654984, + "learning_rate": 9.822303026440614e-06, + "loss": 17.6083, + "step": 6159 + }, + { + "epoch": 0.11259985011058914, + "grad_norm": 6.290695489058567, + "learning_rate": 9.822224803842501e-06, + "loss": 17.3292, + "step": 6160 + }, + { + "epoch": 0.11261812930703566, + "grad_norm": 6.905225875045079, + "learning_rate": 9.822146564342907e-06, + "loss": 17.5662, + "step": 6161 + }, + { + "epoch": 0.1126364085034822, + "grad_norm": 7.702543026983446, + "learning_rate": 9.822068307942107e-06, + "loss": 18.0518, + "step": 6162 + }, + { + "epoch": 0.11265468769992872, + "grad_norm": 9.555595004282456, + "learning_rate": 9.821990034640377e-06, + "loss": 18.3942, + "step": 6163 + }, + { + "epoch": 0.11267296689637524, + "grad_norm": 7.863144492117609, + "learning_rate": 9.82191174443799e-06, + "loss": 18.304, + "step": 6164 + }, + { + "epoch": 0.11269124609282176, + "grad_norm": 6.460215556460458, + "learning_rate": 9.821833437335222e-06, + "loss": 17.5291, + "step": 6165 + }, + { + "epoch": 0.11270952528926828, + "grad_norm": 8.68694114150609, + "learning_rate": 9.821755113332346e-06, + "loss": 18.0716, + "step": 6166 + }, + { + "epoch": 0.1127278044857148, + "grad_norm": 6.3340952582156484, + "learning_rate": 9.821676772429635e-06, + "loss": 17.5139, + "step": 6167 + }, + { + "epoch": 0.11274608368216134, + "grad_norm": 8.46967776440184, + "learning_rate": 9.821598414627366e-06, + "loss": 18.1872, + "step": 6168 + }, + { + "epoch": 0.11276436287860786, + "grad_norm": 6.4976312789064865, + "learning_rate": 9.821520039925813e-06, + "loss": 17.483, + "step": 6169 + }, + { + "epoch": 0.11278264207505438, + "grad_norm": 7.102957846646031, + "learning_rate": 9.821441648325251e-06, + "loss": 17.8098, + "step": 6170 + }, + { + "epoch": 0.1128009212715009, + "grad_norm": 8.305200924217363, + "learning_rate": 9.821363239825955e-06, + "loss": 18.351, + "step": 6171 + }, + { + "epoch": 0.11281920046794743, + "grad_norm": 6.250879529314112, + "learning_rate": 9.821284814428198e-06, + "loss": 17.3108, + "step": 6172 + }, + { + "epoch": 0.11283747966439395, + "grad_norm": 7.34027541996215, + "learning_rate": 9.821206372132256e-06, + "loss": 17.4873, + "step": 6173 + }, + { + "epoch": 0.11285575886084048, + "grad_norm": 6.9034516263348475, + "learning_rate": 9.821127912938406e-06, + "loss": 17.8491, + "step": 6174 + }, + { + "epoch": 0.112874038057287, + "grad_norm": 6.760558702840803, + "learning_rate": 9.82104943684692e-06, + "loss": 17.6755, + "step": 6175 + }, + { + "epoch": 0.11289231725373353, + "grad_norm": 7.475051783595172, + "learning_rate": 9.820970943858074e-06, + "loss": 17.6946, + "step": 6176 + }, + { + "epoch": 0.11291059645018005, + "grad_norm": 8.605565217503512, + "learning_rate": 9.820892433972143e-06, + "loss": 18.4892, + "step": 6177 + }, + { + "epoch": 0.11292887564662657, + "grad_norm": 7.463849238422301, + "learning_rate": 9.820813907189401e-06, + "loss": 17.7594, + "step": 6178 + }, + { + "epoch": 0.1129471548430731, + "grad_norm": 6.493951292443605, + "learning_rate": 9.820735363510128e-06, + "loss": 17.5372, + "step": 6179 + }, + { + "epoch": 0.11296543403951963, + "grad_norm": 6.778429363469027, + "learning_rate": 9.820656802934593e-06, + "loss": 17.8115, + "step": 6180 + }, + { + "epoch": 0.11298371323596615, + "grad_norm": 5.498516120360731, + "learning_rate": 9.820578225463076e-06, + "loss": 17.1009, + "step": 6181 + }, + { + "epoch": 0.11300199243241267, + "grad_norm": 6.7522866139760644, + "learning_rate": 9.820499631095847e-06, + "loss": 17.7332, + "step": 6182 + }, + { + "epoch": 0.11302027162885919, + "grad_norm": 7.128485909347858, + "learning_rate": 9.82042101983319e-06, + "loss": 17.7156, + "step": 6183 + }, + { + "epoch": 0.11303855082530571, + "grad_norm": 6.56856444299014, + "learning_rate": 9.820342391675373e-06, + "loss": 17.7751, + "step": 6184 + }, + { + "epoch": 0.11305683002175225, + "grad_norm": 6.936302922317391, + "learning_rate": 9.820263746622674e-06, + "loss": 17.6575, + "step": 6185 + }, + { + "epoch": 0.11307510921819877, + "grad_norm": 7.840124363272216, + "learning_rate": 9.82018508467537e-06, + "loss": 18.32, + "step": 6186 + }, + { + "epoch": 0.1130933884146453, + "grad_norm": 6.888341902062848, + "learning_rate": 9.820106405833735e-06, + "loss": 17.6309, + "step": 6187 + }, + { + "epoch": 0.11311166761109182, + "grad_norm": 7.271992003510105, + "learning_rate": 9.820027710098045e-06, + "loss": 17.9614, + "step": 6188 + }, + { + "epoch": 0.11312994680753834, + "grad_norm": 8.010739408017542, + "learning_rate": 9.819948997468577e-06, + "loss": 18.4273, + "step": 6189 + }, + { + "epoch": 0.11314822600398486, + "grad_norm": 8.042282447923073, + "learning_rate": 9.819870267945605e-06, + "loss": 18.3378, + "step": 6190 + }, + { + "epoch": 0.1131665052004314, + "grad_norm": 7.8011236944362325, + "learning_rate": 9.819791521529408e-06, + "loss": 18.1001, + "step": 6191 + }, + { + "epoch": 0.11318478439687792, + "grad_norm": 7.728160979327711, + "learning_rate": 9.819712758220257e-06, + "loss": 17.9143, + "step": 6192 + }, + { + "epoch": 0.11320306359332444, + "grad_norm": 6.325758801504882, + "learning_rate": 9.819633978018432e-06, + "loss": 17.5259, + "step": 6193 + }, + { + "epoch": 0.11322134278977096, + "grad_norm": 5.32983254100174, + "learning_rate": 9.81955518092421e-06, + "loss": 17.1029, + "step": 6194 + }, + { + "epoch": 0.11323962198621748, + "grad_norm": 6.033918154854671, + "learning_rate": 9.819476366937863e-06, + "loss": 17.5463, + "step": 6195 + }, + { + "epoch": 0.11325790118266402, + "grad_norm": 8.331254585623563, + "learning_rate": 9.81939753605967e-06, + "loss": 18.2479, + "step": 6196 + }, + { + "epoch": 0.11327618037911054, + "grad_norm": 8.115214118595839, + "learning_rate": 9.819318688289907e-06, + "loss": 18.2687, + "step": 6197 + }, + { + "epoch": 0.11329445957555706, + "grad_norm": 7.051646875887396, + "learning_rate": 9.819239823628852e-06, + "loss": 17.8834, + "step": 6198 + }, + { + "epoch": 0.11331273877200358, + "grad_norm": 9.367605477448407, + "learning_rate": 9.819160942076778e-06, + "loss": 18.6733, + "step": 6199 + }, + { + "epoch": 0.1133310179684501, + "grad_norm": 9.731200169257715, + "learning_rate": 9.819082043633963e-06, + "loss": 18.1608, + "step": 6200 + }, + { + "epoch": 0.11334929716489663, + "grad_norm": 7.864027832001035, + "learning_rate": 9.819003128300684e-06, + "loss": 18.2864, + "step": 6201 + }, + { + "epoch": 0.11336757636134316, + "grad_norm": 7.5567191452454345, + "learning_rate": 9.818924196077215e-06, + "loss": 18.0862, + "step": 6202 + }, + { + "epoch": 0.11338585555778968, + "grad_norm": 6.385528665166163, + "learning_rate": 9.818845246963838e-06, + "loss": 17.6081, + "step": 6203 + }, + { + "epoch": 0.1134041347542362, + "grad_norm": 8.142955437200175, + "learning_rate": 9.818766280960825e-06, + "loss": 17.981, + "step": 6204 + }, + { + "epoch": 0.11342241395068273, + "grad_norm": 6.624707244814315, + "learning_rate": 9.818687298068454e-06, + "loss": 17.707, + "step": 6205 + }, + { + "epoch": 0.11344069314712925, + "grad_norm": 9.578494114196964, + "learning_rate": 9.818608298287004e-06, + "loss": 17.9611, + "step": 6206 + }, + { + "epoch": 0.11345897234357577, + "grad_norm": 8.46016392350176, + "learning_rate": 9.81852928161675e-06, + "loss": 18.2633, + "step": 6207 + }, + { + "epoch": 0.1134772515400223, + "grad_norm": 6.596661582164722, + "learning_rate": 9.818450248057967e-06, + "loss": 17.3136, + "step": 6208 + }, + { + "epoch": 0.11349553073646883, + "grad_norm": 7.295401665588104, + "learning_rate": 9.818371197610935e-06, + "loss": 18.0474, + "step": 6209 + }, + { + "epoch": 0.11351380993291535, + "grad_norm": 7.639919598354987, + "learning_rate": 9.81829213027593e-06, + "loss": 17.8791, + "step": 6210 + }, + { + "epoch": 0.11353208912936187, + "grad_norm": 6.652570304730243, + "learning_rate": 9.818213046053228e-06, + "loss": 17.9262, + "step": 6211 + }, + { + "epoch": 0.11355036832580839, + "grad_norm": 6.35751685274225, + "learning_rate": 9.81813394494311e-06, + "loss": 17.6467, + "step": 6212 + }, + { + "epoch": 0.11356864752225493, + "grad_norm": 6.7906346431503195, + "learning_rate": 9.818054826945848e-06, + "loss": 17.6755, + "step": 6213 + }, + { + "epoch": 0.11358692671870145, + "grad_norm": 6.576625503696223, + "learning_rate": 9.817975692061723e-06, + "loss": 17.6876, + "step": 6214 + }, + { + "epoch": 0.11360520591514797, + "grad_norm": 7.090944854282477, + "learning_rate": 9.817896540291013e-06, + "loss": 17.7948, + "step": 6215 + }, + { + "epoch": 0.1136234851115945, + "grad_norm": 6.508156125368024, + "learning_rate": 9.817817371633992e-06, + "loss": 17.5299, + "step": 6216 + }, + { + "epoch": 0.11364176430804102, + "grad_norm": 7.270076954305761, + "learning_rate": 9.81773818609094e-06, + "loss": 17.8332, + "step": 6217 + }, + { + "epoch": 0.11366004350448754, + "grad_norm": 6.599159646684854, + "learning_rate": 9.817658983662132e-06, + "loss": 17.6176, + "step": 6218 + }, + { + "epoch": 0.11367832270093407, + "grad_norm": 7.421039349125596, + "learning_rate": 9.817579764347849e-06, + "loss": 17.7094, + "step": 6219 + }, + { + "epoch": 0.1136966018973806, + "grad_norm": 7.875083752384193, + "learning_rate": 9.817500528148365e-06, + "loss": 18.0289, + "step": 6220 + }, + { + "epoch": 0.11371488109382712, + "grad_norm": 7.348782010366022, + "learning_rate": 9.817421275063962e-06, + "loss": 17.9597, + "step": 6221 + }, + { + "epoch": 0.11373316029027364, + "grad_norm": 6.3017741227720165, + "learning_rate": 9.817342005094915e-06, + "loss": 17.4832, + "step": 6222 + }, + { + "epoch": 0.11375143948672016, + "grad_norm": 7.030648821542163, + "learning_rate": 9.8172627182415e-06, + "loss": 17.8352, + "step": 6223 + }, + { + "epoch": 0.11376971868316668, + "grad_norm": 7.744853782477074, + "learning_rate": 9.817183414504e-06, + "loss": 18.175, + "step": 6224 + }, + { + "epoch": 0.11378799787961322, + "grad_norm": 6.717189735431535, + "learning_rate": 9.81710409388269e-06, + "loss": 17.5284, + "step": 6225 + }, + { + "epoch": 0.11380627707605974, + "grad_norm": 6.592936080943766, + "learning_rate": 9.817024756377847e-06, + "loss": 17.6753, + "step": 6226 + }, + { + "epoch": 0.11382455627250626, + "grad_norm": 7.49766059181947, + "learning_rate": 9.81694540198975e-06, + "loss": 17.4403, + "step": 6227 + }, + { + "epoch": 0.11384283546895278, + "grad_norm": 8.463039411504896, + "learning_rate": 9.81686603071868e-06, + "loss": 17.8722, + "step": 6228 + }, + { + "epoch": 0.1138611146653993, + "grad_norm": 5.78079434548121, + "learning_rate": 9.816786642564909e-06, + "loss": 17.2611, + "step": 6229 + }, + { + "epoch": 0.11387939386184584, + "grad_norm": 6.61962245135833, + "learning_rate": 9.816707237528719e-06, + "loss": 17.7266, + "step": 6230 + }, + { + "epoch": 0.11389767305829236, + "grad_norm": 7.793304355250502, + "learning_rate": 9.81662781561039e-06, + "loss": 17.872, + "step": 6231 + }, + { + "epoch": 0.11391595225473888, + "grad_norm": 6.104698234880663, + "learning_rate": 9.816548376810199e-06, + "loss": 17.2371, + "step": 6232 + }, + { + "epoch": 0.1139342314511854, + "grad_norm": 7.53738109773018, + "learning_rate": 9.816468921128422e-06, + "loss": 17.5866, + "step": 6233 + }, + { + "epoch": 0.11395251064763193, + "grad_norm": 7.47770185312477, + "learning_rate": 9.81638944856534e-06, + "loss": 18.0712, + "step": 6234 + }, + { + "epoch": 0.11397078984407845, + "grad_norm": 9.253215609884657, + "learning_rate": 9.816309959121231e-06, + "loss": 18.1412, + "step": 6235 + }, + { + "epoch": 0.11398906904052498, + "grad_norm": 6.569894728303271, + "learning_rate": 9.816230452796373e-06, + "loss": 17.4748, + "step": 6236 + }, + { + "epoch": 0.1140073482369715, + "grad_norm": 9.180895486113846, + "learning_rate": 9.816150929591046e-06, + "loss": 17.4326, + "step": 6237 + }, + { + "epoch": 0.11402562743341803, + "grad_norm": 7.557170943012073, + "learning_rate": 9.816071389505529e-06, + "loss": 17.9139, + "step": 6238 + }, + { + "epoch": 0.11404390662986455, + "grad_norm": 7.487304070854714, + "learning_rate": 9.815991832540098e-06, + "loss": 17.8918, + "step": 6239 + }, + { + "epoch": 0.11406218582631107, + "grad_norm": 7.379009782163196, + "learning_rate": 9.815912258695034e-06, + "loss": 17.9594, + "step": 6240 + }, + { + "epoch": 0.1140804650227576, + "grad_norm": 6.995329675365152, + "learning_rate": 9.815832667970615e-06, + "loss": 17.6184, + "step": 6241 + }, + { + "epoch": 0.11409874421920413, + "grad_norm": 10.00764842834887, + "learning_rate": 9.815753060367122e-06, + "loss": 18.0156, + "step": 6242 + }, + { + "epoch": 0.11411702341565065, + "grad_norm": 6.87247120564153, + "learning_rate": 9.815673435884831e-06, + "loss": 17.6839, + "step": 6243 + }, + { + "epoch": 0.11413530261209717, + "grad_norm": 6.830404337435096, + "learning_rate": 9.815593794524022e-06, + "loss": 18.0374, + "step": 6244 + }, + { + "epoch": 0.1141535818085437, + "grad_norm": 7.639694761758586, + "learning_rate": 9.815514136284977e-06, + "loss": 17.8666, + "step": 6245 + }, + { + "epoch": 0.11417186100499022, + "grad_norm": 6.708206899982387, + "learning_rate": 9.81543446116797e-06, + "loss": 17.6746, + "step": 6246 + }, + { + "epoch": 0.11419014020143675, + "grad_norm": 6.541288388294831, + "learning_rate": 9.815354769173284e-06, + "loss": 17.2896, + "step": 6247 + }, + { + "epoch": 0.11420841939788327, + "grad_norm": 6.76058205704926, + "learning_rate": 9.815275060301198e-06, + "loss": 17.5915, + "step": 6248 + }, + { + "epoch": 0.1142266985943298, + "grad_norm": 7.19369988720146, + "learning_rate": 9.81519533455199e-06, + "loss": 17.7179, + "step": 6249 + }, + { + "epoch": 0.11424497779077632, + "grad_norm": 7.1423211120451615, + "learning_rate": 9.81511559192594e-06, + "loss": 17.7816, + "step": 6250 + }, + { + "epoch": 0.11426325698722284, + "grad_norm": 7.522866178135697, + "learning_rate": 9.815035832423329e-06, + "loss": 18.0128, + "step": 6251 + }, + { + "epoch": 0.11428153618366936, + "grad_norm": 7.608876775665035, + "learning_rate": 9.814956056044433e-06, + "loss": 17.9453, + "step": 6252 + }, + { + "epoch": 0.1142998153801159, + "grad_norm": 7.774338330810066, + "learning_rate": 9.814876262789537e-06, + "loss": 18.0482, + "step": 6253 + }, + { + "epoch": 0.11431809457656242, + "grad_norm": 8.76759266751559, + "learning_rate": 9.814796452658915e-06, + "loss": 18.689, + "step": 6254 + }, + { + "epoch": 0.11433637377300894, + "grad_norm": 6.949687430786227, + "learning_rate": 9.81471662565285e-06, + "loss": 17.6662, + "step": 6255 + }, + { + "epoch": 0.11435465296945546, + "grad_norm": 7.502716402019781, + "learning_rate": 9.814636781771621e-06, + "loss": 17.7238, + "step": 6256 + }, + { + "epoch": 0.11437293216590198, + "grad_norm": 7.893843327867272, + "learning_rate": 9.814556921015509e-06, + "loss": 18.1044, + "step": 6257 + }, + { + "epoch": 0.1143912113623485, + "grad_norm": 8.017401807950714, + "learning_rate": 9.814477043384791e-06, + "loss": 17.8748, + "step": 6258 + }, + { + "epoch": 0.11440949055879504, + "grad_norm": 6.854928440046626, + "learning_rate": 9.814397148879751e-06, + "loss": 17.5991, + "step": 6259 + }, + { + "epoch": 0.11442776975524156, + "grad_norm": 10.515771082598214, + "learning_rate": 9.814317237500664e-06, + "loss": 18.2981, + "step": 6260 + }, + { + "epoch": 0.11444604895168808, + "grad_norm": 6.037075326124429, + "learning_rate": 9.814237309247814e-06, + "loss": 17.3793, + "step": 6261 + }, + { + "epoch": 0.1144643281481346, + "grad_norm": 6.462855231352157, + "learning_rate": 9.81415736412148e-06, + "loss": 17.5027, + "step": 6262 + }, + { + "epoch": 0.11448260734458113, + "grad_norm": 6.400781963784588, + "learning_rate": 9.814077402121943e-06, + "loss": 17.5447, + "step": 6263 + }, + { + "epoch": 0.11450088654102766, + "grad_norm": 7.292791950012517, + "learning_rate": 9.813997423249482e-06, + "loss": 17.9095, + "step": 6264 + }, + { + "epoch": 0.11451916573747419, + "grad_norm": 7.009803397848102, + "learning_rate": 9.813917427504378e-06, + "loss": 17.8501, + "step": 6265 + }, + { + "epoch": 0.1145374449339207, + "grad_norm": 6.570670431757007, + "learning_rate": 9.813837414886909e-06, + "loss": 17.7744, + "step": 6266 + }, + { + "epoch": 0.11455572413036723, + "grad_norm": 8.574922859793212, + "learning_rate": 9.81375738539736e-06, + "loss": 17.9815, + "step": 6267 + }, + { + "epoch": 0.11457400332681375, + "grad_norm": 6.7876422049688285, + "learning_rate": 9.813677339036009e-06, + "loss": 17.5424, + "step": 6268 + }, + { + "epoch": 0.11459228252326027, + "grad_norm": 8.11468122302412, + "learning_rate": 9.813597275803135e-06, + "loss": 18.5315, + "step": 6269 + }, + { + "epoch": 0.11461056171970681, + "grad_norm": 6.6613413290634895, + "learning_rate": 9.813517195699022e-06, + "loss": 17.5249, + "step": 6270 + }, + { + "epoch": 0.11462884091615333, + "grad_norm": 6.895460372431868, + "learning_rate": 9.813437098723948e-06, + "loss": 17.5528, + "step": 6271 + }, + { + "epoch": 0.11464712011259985, + "grad_norm": 6.422300246062773, + "learning_rate": 9.813356984878196e-06, + "loss": 17.1556, + "step": 6272 + }, + { + "epoch": 0.11466539930904637, + "grad_norm": 7.886666955754807, + "learning_rate": 9.813276854162043e-06, + "loss": 18.1806, + "step": 6273 + }, + { + "epoch": 0.1146836785054929, + "grad_norm": 7.4543683702574794, + "learning_rate": 9.813196706575774e-06, + "loss": 17.779, + "step": 6274 + }, + { + "epoch": 0.11470195770193942, + "grad_norm": 7.156585439808601, + "learning_rate": 9.813116542119666e-06, + "loss": 17.9412, + "step": 6275 + }, + { + "epoch": 0.11472023689838595, + "grad_norm": 9.138870548783235, + "learning_rate": 9.813036360794007e-06, + "loss": 18.5025, + "step": 6276 + }, + { + "epoch": 0.11473851609483247, + "grad_norm": 6.259275145245058, + "learning_rate": 9.81295616259907e-06, + "loss": 17.3672, + "step": 6277 + }, + { + "epoch": 0.114756795291279, + "grad_norm": 7.411431181384231, + "learning_rate": 9.812875947535138e-06, + "loss": 17.8309, + "step": 6278 + }, + { + "epoch": 0.11477507448772552, + "grad_norm": 7.031345429104616, + "learning_rate": 9.812795715602495e-06, + "loss": 17.736, + "step": 6279 + }, + { + "epoch": 0.11479335368417204, + "grad_norm": 6.921873970480513, + "learning_rate": 9.812715466801422e-06, + "loss": 17.3918, + "step": 6280 + }, + { + "epoch": 0.11481163288061857, + "grad_norm": 7.009409342805002, + "learning_rate": 9.812635201132197e-06, + "loss": 17.6489, + "step": 6281 + }, + { + "epoch": 0.1148299120770651, + "grad_norm": 6.468373114451255, + "learning_rate": 9.812554918595103e-06, + "loss": 17.5001, + "step": 6282 + }, + { + "epoch": 0.11484819127351162, + "grad_norm": 6.844055770329313, + "learning_rate": 9.812474619190422e-06, + "loss": 17.5318, + "step": 6283 + }, + { + "epoch": 0.11486647046995814, + "grad_norm": 5.93876635730014, + "learning_rate": 9.812394302918436e-06, + "loss": 17.2376, + "step": 6284 + }, + { + "epoch": 0.11488474966640466, + "grad_norm": 7.332919688746114, + "learning_rate": 9.812313969779426e-06, + "loss": 17.7357, + "step": 6285 + }, + { + "epoch": 0.11490302886285118, + "grad_norm": 7.43211164073885, + "learning_rate": 9.812233619773673e-06, + "loss": 17.6347, + "step": 6286 + }, + { + "epoch": 0.11492130805929772, + "grad_norm": 6.855720150266296, + "learning_rate": 9.812153252901457e-06, + "loss": 17.539, + "step": 6287 + }, + { + "epoch": 0.11493958725574424, + "grad_norm": 7.8882473645624565, + "learning_rate": 9.812072869163063e-06, + "loss": 17.6707, + "step": 6288 + }, + { + "epoch": 0.11495786645219076, + "grad_norm": 9.086128872398263, + "learning_rate": 9.811992468558769e-06, + "loss": 18.2784, + "step": 6289 + }, + { + "epoch": 0.11497614564863728, + "grad_norm": 8.208285616076992, + "learning_rate": 9.811912051088861e-06, + "loss": 18.4073, + "step": 6290 + }, + { + "epoch": 0.1149944248450838, + "grad_norm": 9.03003262984523, + "learning_rate": 9.811831616753618e-06, + "loss": 18.3147, + "step": 6291 + }, + { + "epoch": 0.11501270404153033, + "grad_norm": 7.348070894525367, + "learning_rate": 9.811751165553322e-06, + "loss": 17.6348, + "step": 6292 + }, + { + "epoch": 0.11503098323797686, + "grad_norm": 6.825424625846023, + "learning_rate": 9.811670697488258e-06, + "loss": 17.5614, + "step": 6293 + }, + { + "epoch": 0.11504926243442339, + "grad_norm": 7.763856822305459, + "learning_rate": 9.811590212558704e-06, + "loss": 17.9533, + "step": 6294 + }, + { + "epoch": 0.11506754163086991, + "grad_norm": 6.819436018457505, + "learning_rate": 9.811509710764945e-06, + "loss": 17.4559, + "step": 6295 + }, + { + "epoch": 0.11508582082731643, + "grad_norm": 6.698750109017797, + "learning_rate": 9.81142919210726e-06, + "loss": 17.5214, + "step": 6296 + }, + { + "epoch": 0.11510410002376295, + "grad_norm": 7.438442475434122, + "learning_rate": 9.811348656585936e-06, + "loss": 18.0474, + "step": 6297 + }, + { + "epoch": 0.11512237922020949, + "grad_norm": 7.1820486597397055, + "learning_rate": 9.81126810420125e-06, + "loss": 17.9241, + "step": 6298 + }, + { + "epoch": 0.11514065841665601, + "grad_norm": 6.977489322993325, + "learning_rate": 9.811187534953488e-06, + "loss": 17.8234, + "step": 6299 + }, + { + "epoch": 0.11515893761310253, + "grad_norm": 6.914916530668801, + "learning_rate": 9.811106948842931e-06, + "loss": 17.5373, + "step": 6300 + }, + { + "epoch": 0.11517721680954905, + "grad_norm": 7.063726615733678, + "learning_rate": 9.811026345869862e-06, + "loss": 17.5611, + "step": 6301 + }, + { + "epoch": 0.11519549600599557, + "grad_norm": 7.1751863227292905, + "learning_rate": 9.810945726034563e-06, + "loss": 17.7032, + "step": 6302 + }, + { + "epoch": 0.1152137752024421, + "grad_norm": 6.682785236121788, + "learning_rate": 9.810865089337316e-06, + "loss": 17.662, + "step": 6303 + }, + { + "epoch": 0.11523205439888863, + "grad_norm": 8.435816868510173, + "learning_rate": 9.810784435778404e-06, + "loss": 18.1695, + "step": 6304 + }, + { + "epoch": 0.11525033359533515, + "grad_norm": 6.802208917897139, + "learning_rate": 9.810703765358111e-06, + "loss": 17.7794, + "step": 6305 + }, + { + "epoch": 0.11526861279178167, + "grad_norm": 8.180548363814108, + "learning_rate": 9.810623078076719e-06, + "loss": 18.2576, + "step": 6306 + }, + { + "epoch": 0.1152868919882282, + "grad_norm": 7.930981478452358, + "learning_rate": 9.810542373934511e-06, + "loss": 18.0729, + "step": 6307 + }, + { + "epoch": 0.11530517118467472, + "grad_norm": 6.838572780336669, + "learning_rate": 9.810461652931768e-06, + "loss": 17.4724, + "step": 6308 + }, + { + "epoch": 0.11532345038112124, + "grad_norm": 8.068676228106554, + "learning_rate": 9.810380915068775e-06, + "loss": 17.8672, + "step": 6309 + }, + { + "epoch": 0.11534172957756778, + "grad_norm": 7.196540776256688, + "learning_rate": 9.810300160345814e-06, + "loss": 17.9999, + "step": 6310 + }, + { + "epoch": 0.1153600087740143, + "grad_norm": 12.344777323849373, + "learning_rate": 9.810219388763168e-06, + "loss": 18.4425, + "step": 6311 + }, + { + "epoch": 0.11537828797046082, + "grad_norm": 6.833460845059853, + "learning_rate": 9.810138600321122e-06, + "loss": 17.389, + "step": 6312 + }, + { + "epoch": 0.11539656716690734, + "grad_norm": 7.2852103617293, + "learning_rate": 9.810057795019956e-06, + "loss": 17.7295, + "step": 6313 + }, + { + "epoch": 0.11541484636335386, + "grad_norm": 7.220185693867019, + "learning_rate": 9.809976972859956e-06, + "loss": 18.0016, + "step": 6314 + }, + { + "epoch": 0.1154331255598004, + "grad_norm": 7.8150757138486115, + "learning_rate": 9.809896133841404e-06, + "loss": 17.8048, + "step": 6315 + }, + { + "epoch": 0.11545140475624692, + "grad_norm": 5.557317761478397, + "learning_rate": 9.809815277964582e-06, + "loss": 17.2288, + "step": 6316 + }, + { + "epoch": 0.11546968395269344, + "grad_norm": 7.061668131990722, + "learning_rate": 9.809734405229776e-06, + "loss": 17.369, + "step": 6317 + }, + { + "epoch": 0.11548796314913996, + "grad_norm": 7.244504281890309, + "learning_rate": 9.809653515637268e-06, + "loss": 18.0361, + "step": 6318 + }, + { + "epoch": 0.11550624234558649, + "grad_norm": 8.180238124252234, + "learning_rate": 9.809572609187341e-06, + "loss": 18.1929, + "step": 6319 + }, + { + "epoch": 0.115524521542033, + "grad_norm": 7.531069188380508, + "learning_rate": 9.80949168588028e-06, + "loss": 17.9694, + "step": 6320 + }, + { + "epoch": 0.11554280073847954, + "grad_norm": 6.1840911491435095, + "learning_rate": 9.809410745716367e-06, + "loss": 17.4163, + "step": 6321 + }, + { + "epoch": 0.11556107993492606, + "grad_norm": 5.799217051722377, + "learning_rate": 9.80932978869589e-06, + "loss": 17.3084, + "step": 6322 + }, + { + "epoch": 0.11557935913137259, + "grad_norm": 6.4627063889616165, + "learning_rate": 9.809248814819126e-06, + "loss": 17.5635, + "step": 6323 + }, + { + "epoch": 0.11559763832781911, + "grad_norm": 7.339022253426556, + "learning_rate": 9.809167824086365e-06, + "loss": 18.1535, + "step": 6324 + }, + { + "epoch": 0.11561591752426563, + "grad_norm": 8.484544218942316, + "learning_rate": 9.809086816497886e-06, + "loss": 18.4038, + "step": 6325 + }, + { + "epoch": 0.11563419672071215, + "grad_norm": 7.2863558889769875, + "learning_rate": 9.809005792053976e-06, + "loss": 17.6745, + "step": 6326 + }, + { + "epoch": 0.11565247591715869, + "grad_norm": 8.3274098521248, + "learning_rate": 9.808924750754918e-06, + "loss": 17.3246, + "step": 6327 + }, + { + "epoch": 0.11567075511360521, + "grad_norm": 6.559594966952365, + "learning_rate": 9.808843692600995e-06, + "loss": 17.5227, + "step": 6328 + }, + { + "epoch": 0.11568903431005173, + "grad_norm": 6.639002728992262, + "learning_rate": 9.808762617592494e-06, + "loss": 17.4937, + "step": 6329 + }, + { + "epoch": 0.11570731350649825, + "grad_norm": 6.373177086925029, + "learning_rate": 9.808681525729696e-06, + "loss": 17.3868, + "step": 6330 + }, + { + "epoch": 0.11572559270294477, + "grad_norm": 9.208906527418707, + "learning_rate": 9.808600417012886e-06, + "loss": 19.1502, + "step": 6331 + }, + { + "epoch": 0.11574387189939131, + "grad_norm": 6.392016892776988, + "learning_rate": 9.80851929144235e-06, + "loss": 17.2708, + "step": 6332 + }, + { + "epoch": 0.11576215109583783, + "grad_norm": 9.832547492016513, + "learning_rate": 9.80843814901837e-06, + "loss": 18.5383, + "step": 6333 + }, + { + "epoch": 0.11578043029228435, + "grad_norm": 7.692122147960202, + "learning_rate": 9.808356989741231e-06, + "loss": 18.2824, + "step": 6334 + }, + { + "epoch": 0.11579870948873087, + "grad_norm": 7.55166145720147, + "learning_rate": 9.80827581361122e-06, + "loss": 17.9764, + "step": 6335 + }, + { + "epoch": 0.1158169886851774, + "grad_norm": 6.364236199278281, + "learning_rate": 9.808194620628619e-06, + "loss": 17.2458, + "step": 6336 + }, + { + "epoch": 0.11583526788162392, + "grad_norm": 6.98256771151916, + "learning_rate": 9.808113410793713e-06, + "loss": 17.7631, + "step": 6337 + }, + { + "epoch": 0.11585354707807045, + "grad_norm": 8.25863248027287, + "learning_rate": 9.808032184106786e-06, + "loss": 18.33, + "step": 6338 + }, + { + "epoch": 0.11587182627451698, + "grad_norm": 6.55601754729298, + "learning_rate": 9.807950940568124e-06, + "loss": 17.4528, + "step": 6339 + }, + { + "epoch": 0.1158901054709635, + "grad_norm": 6.976318082824107, + "learning_rate": 9.80786968017801e-06, + "loss": 17.8263, + "step": 6340 + }, + { + "epoch": 0.11590838466741002, + "grad_norm": 6.232620318070562, + "learning_rate": 9.807788402936732e-06, + "loss": 17.338, + "step": 6341 + }, + { + "epoch": 0.11592666386385654, + "grad_norm": 6.2455593197647925, + "learning_rate": 9.807707108844572e-06, + "loss": 17.5911, + "step": 6342 + }, + { + "epoch": 0.11594494306030306, + "grad_norm": 6.7502933548553745, + "learning_rate": 9.807625797901817e-06, + "loss": 17.7904, + "step": 6343 + }, + { + "epoch": 0.1159632222567496, + "grad_norm": 7.082728421548852, + "learning_rate": 9.807544470108748e-06, + "loss": 17.7445, + "step": 6344 + }, + { + "epoch": 0.11598150145319612, + "grad_norm": 6.19434001023653, + "learning_rate": 9.807463125465655e-06, + "loss": 17.1742, + "step": 6345 + }, + { + "epoch": 0.11599978064964264, + "grad_norm": 6.948315780633028, + "learning_rate": 9.80738176397282e-06, + "loss": 17.83, + "step": 6346 + }, + { + "epoch": 0.11601805984608916, + "grad_norm": 7.265773545397512, + "learning_rate": 9.80730038563053e-06, + "loss": 17.7742, + "step": 6347 + }, + { + "epoch": 0.11603633904253569, + "grad_norm": 6.466850105613359, + "learning_rate": 9.807218990439068e-06, + "loss": 17.6626, + "step": 6348 + }, + { + "epoch": 0.11605461823898222, + "grad_norm": 6.787913398772441, + "learning_rate": 9.80713757839872e-06, + "loss": 17.5217, + "step": 6349 + }, + { + "epoch": 0.11607289743542874, + "grad_norm": 7.117216201535235, + "learning_rate": 9.807056149509775e-06, + "loss": 17.6632, + "step": 6350 + }, + { + "epoch": 0.11609117663187526, + "grad_norm": 5.943020646342875, + "learning_rate": 9.806974703772513e-06, + "loss": 17.3552, + "step": 6351 + }, + { + "epoch": 0.11610945582832179, + "grad_norm": 6.792120512788166, + "learning_rate": 9.806893241187223e-06, + "loss": 17.508, + "step": 6352 + }, + { + "epoch": 0.11612773502476831, + "grad_norm": 7.289555594562179, + "learning_rate": 9.806811761754188e-06, + "loss": 17.7113, + "step": 6353 + }, + { + "epoch": 0.11614601422121483, + "grad_norm": 7.061349160039012, + "learning_rate": 9.806730265473694e-06, + "loss": 17.7871, + "step": 6354 + }, + { + "epoch": 0.11616429341766137, + "grad_norm": 7.775418162119797, + "learning_rate": 9.806648752346029e-06, + "loss": 18.329, + "step": 6355 + }, + { + "epoch": 0.11618257261410789, + "grad_norm": 5.823594541185173, + "learning_rate": 9.806567222371478e-06, + "loss": 17.2493, + "step": 6356 + }, + { + "epoch": 0.11620085181055441, + "grad_norm": 9.340370280309296, + "learning_rate": 9.806485675550326e-06, + "loss": 18.6586, + "step": 6357 + }, + { + "epoch": 0.11621913100700093, + "grad_norm": 6.957343250007208, + "learning_rate": 9.806404111882857e-06, + "loss": 17.7079, + "step": 6358 + }, + { + "epoch": 0.11623741020344745, + "grad_norm": 7.179960364024654, + "learning_rate": 9.80632253136936e-06, + "loss": 17.9265, + "step": 6359 + }, + { + "epoch": 0.11625568939989397, + "grad_norm": 7.021714206245028, + "learning_rate": 9.806240934010118e-06, + "loss": 17.7393, + "step": 6360 + }, + { + "epoch": 0.11627396859634051, + "grad_norm": 6.334167341151742, + "learning_rate": 9.806159319805421e-06, + "loss": 17.5247, + "step": 6361 + }, + { + "epoch": 0.11629224779278703, + "grad_norm": 7.539256044216627, + "learning_rate": 9.80607768875555e-06, + "loss": 17.9543, + "step": 6362 + }, + { + "epoch": 0.11631052698923355, + "grad_norm": 7.686195639813259, + "learning_rate": 9.805996040860796e-06, + "loss": 18.1302, + "step": 6363 + }, + { + "epoch": 0.11632880618568008, + "grad_norm": 6.545812377155543, + "learning_rate": 9.805914376121443e-06, + "loss": 17.5107, + "step": 6364 + }, + { + "epoch": 0.1163470853821266, + "grad_norm": 7.668924002686527, + "learning_rate": 9.805832694537777e-06, + "loss": 18.2149, + "step": 6365 + }, + { + "epoch": 0.11636536457857313, + "grad_norm": 7.675284574438888, + "learning_rate": 9.805750996110082e-06, + "loss": 18.0789, + "step": 6366 + }, + { + "epoch": 0.11638364377501965, + "grad_norm": 6.274208863681385, + "learning_rate": 9.80566928083865e-06, + "loss": 17.5136, + "step": 6367 + }, + { + "epoch": 0.11640192297146618, + "grad_norm": 6.975382543720655, + "learning_rate": 9.805587548723763e-06, + "loss": 17.9814, + "step": 6368 + }, + { + "epoch": 0.1164202021679127, + "grad_norm": 5.754036728902357, + "learning_rate": 9.805505799765708e-06, + "loss": 17.1506, + "step": 6369 + }, + { + "epoch": 0.11643848136435922, + "grad_norm": 8.085855456180017, + "learning_rate": 9.805424033964773e-06, + "loss": 18.0149, + "step": 6370 + }, + { + "epoch": 0.11645676056080574, + "grad_norm": 6.392223105722064, + "learning_rate": 9.805342251321242e-06, + "loss": 17.6478, + "step": 6371 + }, + { + "epoch": 0.11647503975725228, + "grad_norm": 7.042051932028251, + "learning_rate": 9.805260451835405e-06, + "loss": 17.6313, + "step": 6372 + }, + { + "epoch": 0.1164933189536988, + "grad_norm": 6.753130957874067, + "learning_rate": 9.805178635507547e-06, + "loss": 17.733, + "step": 6373 + }, + { + "epoch": 0.11651159815014532, + "grad_norm": 6.277624375642367, + "learning_rate": 9.805096802337954e-06, + "loss": 17.4527, + "step": 6374 + }, + { + "epoch": 0.11652987734659184, + "grad_norm": 8.559251219177527, + "learning_rate": 9.805014952326915e-06, + "loss": 18.4475, + "step": 6375 + }, + { + "epoch": 0.11654815654303836, + "grad_norm": 6.427180880428312, + "learning_rate": 9.804933085474715e-06, + "loss": 17.4267, + "step": 6376 + }, + { + "epoch": 0.11656643573948489, + "grad_norm": 6.625726581180322, + "learning_rate": 9.804851201781641e-06, + "loss": 17.6082, + "step": 6377 + }, + { + "epoch": 0.11658471493593142, + "grad_norm": 6.868704675957353, + "learning_rate": 9.80476930124798e-06, + "loss": 17.6177, + "step": 6378 + }, + { + "epoch": 0.11660299413237794, + "grad_norm": 7.023585021843053, + "learning_rate": 9.804687383874021e-06, + "loss": 17.7069, + "step": 6379 + }, + { + "epoch": 0.11662127332882447, + "grad_norm": 8.013740996817095, + "learning_rate": 9.80460544966005e-06, + "loss": 18.3447, + "step": 6380 + }, + { + "epoch": 0.11663955252527099, + "grad_norm": 7.301934200534333, + "learning_rate": 9.804523498606351e-06, + "loss": 18.204, + "step": 6381 + }, + { + "epoch": 0.11665783172171751, + "grad_norm": 8.691675307504308, + "learning_rate": 9.804441530713217e-06, + "loss": 18.1091, + "step": 6382 + }, + { + "epoch": 0.11667611091816404, + "grad_norm": 5.254428444675815, + "learning_rate": 9.804359545980931e-06, + "loss": 16.9172, + "step": 6383 + }, + { + "epoch": 0.11669439011461057, + "grad_norm": 6.155877543797338, + "learning_rate": 9.804277544409782e-06, + "loss": 17.4798, + "step": 6384 + }, + { + "epoch": 0.11671266931105709, + "grad_norm": 6.564385697941725, + "learning_rate": 9.804195526000057e-06, + "loss": 17.5002, + "step": 6385 + }, + { + "epoch": 0.11673094850750361, + "grad_norm": 8.185972406187952, + "learning_rate": 9.804113490752044e-06, + "loss": 18.2783, + "step": 6386 + }, + { + "epoch": 0.11674922770395013, + "grad_norm": 8.422075775427512, + "learning_rate": 9.80403143866603e-06, + "loss": 18.0337, + "step": 6387 + }, + { + "epoch": 0.11676750690039665, + "grad_norm": 7.796217893917706, + "learning_rate": 9.803949369742303e-06, + "loss": 18.2966, + "step": 6388 + }, + { + "epoch": 0.11678578609684319, + "grad_norm": 7.3098209433518875, + "learning_rate": 9.803867283981149e-06, + "loss": 17.8664, + "step": 6389 + }, + { + "epoch": 0.11680406529328971, + "grad_norm": 7.785805337739391, + "learning_rate": 9.803785181382858e-06, + "loss": 17.8803, + "step": 6390 + }, + { + "epoch": 0.11682234448973623, + "grad_norm": 7.111380704612183, + "learning_rate": 9.803703061947716e-06, + "loss": 17.5372, + "step": 6391 + }, + { + "epoch": 0.11684062368618275, + "grad_norm": 6.453292618911451, + "learning_rate": 9.803620925676011e-06, + "loss": 17.5098, + "step": 6392 + }, + { + "epoch": 0.11685890288262928, + "grad_norm": 8.43288871285481, + "learning_rate": 9.803538772568034e-06, + "loss": 18.4735, + "step": 6393 + }, + { + "epoch": 0.1168771820790758, + "grad_norm": 7.025785259728636, + "learning_rate": 9.803456602624069e-06, + "loss": 17.8423, + "step": 6394 + }, + { + "epoch": 0.11689546127552233, + "grad_norm": 7.078785596352402, + "learning_rate": 9.803374415844406e-06, + "loss": 17.6048, + "step": 6395 + }, + { + "epoch": 0.11691374047196885, + "grad_norm": 7.191447585990892, + "learning_rate": 9.803292212229332e-06, + "loss": 17.6487, + "step": 6396 + }, + { + "epoch": 0.11693201966841538, + "grad_norm": 6.977973314987887, + "learning_rate": 9.803209991779134e-06, + "loss": 17.8112, + "step": 6397 + }, + { + "epoch": 0.1169502988648619, + "grad_norm": 6.160629282524603, + "learning_rate": 9.803127754494105e-06, + "loss": 17.5099, + "step": 6398 + }, + { + "epoch": 0.11696857806130842, + "grad_norm": 6.456529053780158, + "learning_rate": 9.803045500374528e-06, + "loss": 17.5749, + "step": 6399 + }, + { + "epoch": 0.11698685725775496, + "grad_norm": 6.625938420317166, + "learning_rate": 9.802963229420694e-06, + "loss": 17.5932, + "step": 6400 + }, + { + "epoch": 0.11700513645420148, + "grad_norm": 8.576589550161236, + "learning_rate": 9.802880941632891e-06, + "loss": 18.4689, + "step": 6401 + }, + { + "epoch": 0.117023415650648, + "grad_norm": 6.944979006020021, + "learning_rate": 9.802798637011406e-06, + "loss": 17.1432, + "step": 6402 + }, + { + "epoch": 0.11704169484709452, + "grad_norm": 6.443120159768358, + "learning_rate": 9.802716315556528e-06, + "loss": 17.5015, + "step": 6403 + }, + { + "epoch": 0.11705997404354104, + "grad_norm": 6.125188488621004, + "learning_rate": 9.802633977268547e-06, + "loss": 17.443, + "step": 6404 + }, + { + "epoch": 0.11707825323998756, + "grad_norm": 9.200141359939387, + "learning_rate": 9.80255162214775e-06, + "loss": 18.4959, + "step": 6405 + }, + { + "epoch": 0.1170965324364341, + "grad_norm": 8.204597681266769, + "learning_rate": 9.802469250194429e-06, + "loss": 18.3473, + "step": 6406 + }, + { + "epoch": 0.11711481163288062, + "grad_norm": 6.374708048046521, + "learning_rate": 9.802386861408868e-06, + "loss": 17.453, + "step": 6407 + }, + { + "epoch": 0.11713309082932714, + "grad_norm": 8.756956476021765, + "learning_rate": 9.802304455791358e-06, + "loss": 18.4878, + "step": 6408 + }, + { + "epoch": 0.11715137002577367, + "grad_norm": 7.145826288671658, + "learning_rate": 9.802222033342187e-06, + "loss": 17.8972, + "step": 6409 + }, + { + "epoch": 0.11716964922222019, + "grad_norm": 7.308038267239658, + "learning_rate": 9.802139594061645e-06, + "loss": 17.4195, + "step": 6410 + }, + { + "epoch": 0.11718792841866671, + "grad_norm": 6.691694777386175, + "learning_rate": 9.80205713795002e-06, + "loss": 17.5466, + "step": 6411 + }, + { + "epoch": 0.11720620761511324, + "grad_norm": 10.977301694384694, + "learning_rate": 9.801974665007602e-06, + "loss": 18.3623, + "step": 6412 + }, + { + "epoch": 0.11722448681155977, + "grad_norm": 7.136042291072314, + "learning_rate": 9.80189217523468e-06, + "loss": 18.1256, + "step": 6413 + }, + { + "epoch": 0.11724276600800629, + "grad_norm": 7.732945738852875, + "learning_rate": 9.801809668631542e-06, + "loss": 18.0761, + "step": 6414 + }, + { + "epoch": 0.11726104520445281, + "grad_norm": 6.625258155185103, + "learning_rate": 9.801727145198478e-06, + "loss": 17.6355, + "step": 6415 + }, + { + "epoch": 0.11727932440089933, + "grad_norm": 7.129781251303618, + "learning_rate": 9.801644604935776e-06, + "loss": 17.8709, + "step": 6416 + }, + { + "epoch": 0.11729760359734587, + "grad_norm": 6.751901967124584, + "learning_rate": 9.801562047843727e-06, + "loss": 17.5563, + "step": 6417 + }, + { + "epoch": 0.11731588279379239, + "grad_norm": 7.896663379369354, + "learning_rate": 9.80147947392262e-06, + "loss": 18.2147, + "step": 6418 + }, + { + "epoch": 0.11733416199023891, + "grad_norm": 6.829852354286273, + "learning_rate": 9.801396883172744e-06, + "loss": 17.8405, + "step": 6419 + }, + { + "epoch": 0.11735244118668543, + "grad_norm": 8.027455516082357, + "learning_rate": 9.801314275594389e-06, + "loss": 17.7207, + "step": 6420 + }, + { + "epoch": 0.11737072038313195, + "grad_norm": 5.83849562073646, + "learning_rate": 9.801231651187844e-06, + "loss": 17.2513, + "step": 6421 + }, + { + "epoch": 0.11738899957957848, + "grad_norm": 6.498057274825543, + "learning_rate": 9.801149009953397e-06, + "loss": 17.5261, + "step": 6422 + }, + { + "epoch": 0.11740727877602501, + "grad_norm": 6.481551183585061, + "learning_rate": 9.801066351891341e-06, + "loss": 17.5379, + "step": 6423 + }, + { + "epoch": 0.11742555797247153, + "grad_norm": 8.404955598684786, + "learning_rate": 9.800983677001962e-06, + "loss": 17.6598, + "step": 6424 + }, + { + "epoch": 0.11744383716891806, + "grad_norm": 8.42212805213432, + "learning_rate": 9.800900985285554e-06, + "loss": 18.046, + "step": 6425 + }, + { + "epoch": 0.11746211636536458, + "grad_norm": 7.176008702550471, + "learning_rate": 9.800818276742405e-06, + "loss": 17.7202, + "step": 6426 + }, + { + "epoch": 0.1174803955618111, + "grad_norm": 8.417184757751102, + "learning_rate": 9.800735551372804e-06, + "loss": 18.2777, + "step": 6427 + }, + { + "epoch": 0.11749867475825762, + "grad_norm": 6.548088527082842, + "learning_rate": 9.80065280917704e-06, + "loss": 17.5299, + "step": 6428 + }, + { + "epoch": 0.11751695395470416, + "grad_norm": 10.397837457025595, + "learning_rate": 9.800570050155406e-06, + "loss": 18.4056, + "step": 6429 + }, + { + "epoch": 0.11753523315115068, + "grad_norm": 6.12273016604697, + "learning_rate": 9.800487274308191e-06, + "loss": 17.3399, + "step": 6430 + }, + { + "epoch": 0.1175535123475972, + "grad_norm": 7.357488772562387, + "learning_rate": 9.800404481635683e-06, + "loss": 18.1048, + "step": 6431 + }, + { + "epoch": 0.11757179154404372, + "grad_norm": 7.470281827265112, + "learning_rate": 9.800321672138176e-06, + "loss": 17.644, + "step": 6432 + }, + { + "epoch": 0.11759007074049024, + "grad_norm": 7.999336679854122, + "learning_rate": 9.800238845815956e-06, + "loss": 18.5439, + "step": 6433 + }, + { + "epoch": 0.11760834993693678, + "grad_norm": 8.806640978534022, + "learning_rate": 9.800156002669317e-06, + "loss": 18.4895, + "step": 6434 + }, + { + "epoch": 0.1176266291333833, + "grad_norm": 7.52573875475446, + "learning_rate": 9.800073142698545e-06, + "loss": 18.1755, + "step": 6435 + }, + { + "epoch": 0.11764490832982982, + "grad_norm": 6.675257678545468, + "learning_rate": 9.799990265903936e-06, + "loss": 17.564, + "step": 6436 + }, + { + "epoch": 0.11766318752627634, + "grad_norm": 7.144905545526197, + "learning_rate": 9.799907372285778e-06, + "loss": 17.9521, + "step": 6437 + }, + { + "epoch": 0.11768146672272287, + "grad_norm": 6.362649613459145, + "learning_rate": 9.799824461844358e-06, + "loss": 17.2563, + "step": 6438 + }, + { + "epoch": 0.11769974591916939, + "grad_norm": 6.904921285489309, + "learning_rate": 9.799741534579972e-06, + "loss": 17.6227, + "step": 6439 + }, + { + "epoch": 0.11771802511561592, + "grad_norm": 7.443050796729453, + "learning_rate": 9.799658590492909e-06, + "loss": 17.8672, + "step": 6440 + }, + { + "epoch": 0.11773630431206245, + "grad_norm": 6.956501862183829, + "learning_rate": 9.799575629583457e-06, + "loss": 17.602, + "step": 6441 + }, + { + "epoch": 0.11775458350850897, + "grad_norm": 7.315264576892508, + "learning_rate": 9.79949265185191e-06, + "loss": 17.796, + "step": 6442 + }, + { + "epoch": 0.11777286270495549, + "grad_norm": 7.571036138757203, + "learning_rate": 9.799409657298559e-06, + "loss": 18.1444, + "step": 6443 + }, + { + "epoch": 0.11779114190140201, + "grad_norm": 6.906011070661719, + "learning_rate": 9.799326645923692e-06, + "loss": 17.7722, + "step": 6444 + }, + { + "epoch": 0.11780942109784853, + "grad_norm": 6.901236194432724, + "learning_rate": 9.799243617727603e-06, + "loss": 17.7963, + "step": 6445 + }, + { + "epoch": 0.11782770029429507, + "grad_norm": 8.612221112737895, + "learning_rate": 9.79916057271058e-06, + "loss": 18.0595, + "step": 6446 + }, + { + "epoch": 0.11784597949074159, + "grad_norm": 8.554722330225086, + "learning_rate": 9.799077510872916e-06, + "loss": 18.0822, + "step": 6447 + }, + { + "epoch": 0.11786425868718811, + "grad_norm": 6.5271612616553, + "learning_rate": 9.798994432214901e-06, + "loss": 17.4995, + "step": 6448 + }, + { + "epoch": 0.11788253788363463, + "grad_norm": 6.553220588831177, + "learning_rate": 9.798911336736829e-06, + "loss": 17.3902, + "step": 6449 + }, + { + "epoch": 0.11790081708008116, + "grad_norm": 7.953352621405939, + "learning_rate": 9.79882822443899e-06, + "loss": 18.1591, + "step": 6450 + }, + { + "epoch": 0.11791909627652769, + "grad_norm": 7.450150154473341, + "learning_rate": 9.79874509532167e-06, + "loss": 17.8105, + "step": 6451 + }, + { + "epoch": 0.11793737547297421, + "grad_norm": 7.344572475847884, + "learning_rate": 9.79866194938517e-06, + "loss": 17.9767, + "step": 6452 + }, + { + "epoch": 0.11795565466942073, + "grad_norm": 8.533770454549908, + "learning_rate": 9.798578786629774e-06, + "loss": 18.6686, + "step": 6453 + }, + { + "epoch": 0.11797393386586726, + "grad_norm": 6.325517074232184, + "learning_rate": 9.798495607055773e-06, + "loss": 17.2783, + "step": 6454 + }, + { + "epoch": 0.11799221306231378, + "grad_norm": 8.253855300487562, + "learning_rate": 9.798412410663466e-06, + "loss": 18.3235, + "step": 6455 + }, + { + "epoch": 0.1180104922587603, + "grad_norm": 8.596405157604142, + "learning_rate": 9.798329197453136e-06, + "loss": 18.1615, + "step": 6456 + }, + { + "epoch": 0.11802877145520684, + "grad_norm": 5.917307156883019, + "learning_rate": 9.798245967425081e-06, + "loss": 17.3634, + "step": 6457 + }, + { + "epoch": 0.11804705065165336, + "grad_norm": 8.188917822781782, + "learning_rate": 9.79816272057959e-06, + "loss": 18.2016, + "step": 6458 + }, + { + "epoch": 0.11806532984809988, + "grad_norm": 6.486414674149526, + "learning_rate": 9.798079456916954e-06, + "loss": 17.3954, + "step": 6459 + }, + { + "epoch": 0.1180836090445464, + "grad_norm": 7.555090203044301, + "learning_rate": 9.797996176437467e-06, + "loss": 17.7253, + "step": 6460 + }, + { + "epoch": 0.11810188824099292, + "grad_norm": 8.175442463062188, + "learning_rate": 9.79791287914142e-06, + "loss": 17.8376, + "step": 6461 + }, + { + "epoch": 0.11812016743743944, + "grad_norm": 6.995608011331642, + "learning_rate": 9.797829565029103e-06, + "loss": 17.6152, + "step": 6462 + }, + { + "epoch": 0.11813844663388598, + "grad_norm": 6.1874870425570165, + "learning_rate": 9.797746234100811e-06, + "loss": 17.4161, + "step": 6463 + }, + { + "epoch": 0.1181567258303325, + "grad_norm": 7.507360658716251, + "learning_rate": 9.797662886356833e-06, + "loss": 17.953, + "step": 6464 + }, + { + "epoch": 0.11817500502677902, + "grad_norm": 6.90773495193432, + "learning_rate": 9.797579521797466e-06, + "loss": 17.7612, + "step": 6465 + }, + { + "epoch": 0.11819328422322554, + "grad_norm": 6.636870108478468, + "learning_rate": 9.797496140422997e-06, + "loss": 17.5357, + "step": 6466 + }, + { + "epoch": 0.11821156341967207, + "grad_norm": 8.033725427160187, + "learning_rate": 9.797412742233721e-06, + "loss": 17.9367, + "step": 6467 + }, + { + "epoch": 0.1182298426161186, + "grad_norm": 7.471387238579187, + "learning_rate": 9.79732932722993e-06, + "loss": 18.1184, + "step": 6468 + }, + { + "epoch": 0.11824812181256512, + "grad_norm": 6.63298260552759, + "learning_rate": 9.797245895411915e-06, + "loss": 17.6509, + "step": 6469 + }, + { + "epoch": 0.11826640100901165, + "grad_norm": 7.265387902671119, + "learning_rate": 9.797162446779969e-06, + "loss": 17.6117, + "step": 6470 + }, + { + "epoch": 0.11828468020545817, + "grad_norm": 7.474828687299429, + "learning_rate": 9.797078981334386e-06, + "loss": 18.049, + "step": 6471 + }, + { + "epoch": 0.11830295940190469, + "grad_norm": 6.423152761167049, + "learning_rate": 9.796995499075457e-06, + "loss": 17.5033, + "step": 6472 + }, + { + "epoch": 0.11832123859835121, + "grad_norm": 6.747138888401617, + "learning_rate": 9.796912000003475e-06, + "loss": 17.7517, + "step": 6473 + }, + { + "epoch": 0.11833951779479775, + "grad_norm": 7.160861374191156, + "learning_rate": 9.796828484118734e-06, + "loss": 17.9269, + "step": 6474 + }, + { + "epoch": 0.11835779699124427, + "grad_norm": 6.613910339644413, + "learning_rate": 9.796744951421524e-06, + "loss": 17.4928, + "step": 6475 + }, + { + "epoch": 0.11837607618769079, + "grad_norm": 8.225071522638741, + "learning_rate": 9.796661401912138e-06, + "loss": 18.0207, + "step": 6476 + }, + { + "epoch": 0.11839435538413731, + "grad_norm": 7.226856116001023, + "learning_rate": 9.796577835590873e-06, + "loss": 17.8802, + "step": 6477 + }, + { + "epoch": 0.11841263458058383, + "grad_norm": 6.713185030496041, + "learning_rate": 9.796494252458018e-06, + "loss": 17.7731, + "step": 6478 + }, + { + "epoch": 0.11843091377703036, + "grad_norm": 7.02622367697721, + "learning_rate": 9.796410652513866e-06, + "loss": 18.0381, + "step": 6479 + }, + { + "epoch": 0.11844919297347689, + "grad_norm": 7.305191679821685, + "learning_rate": 9.79632703575871e-06, + "loss": 17.9136, + "step": 6480 + }, + { + "epoch": 0.11846747216992341, + "grad_norm": 7.283521586820067, + "learning_rate": 9.796243402192845e-06, + "loss": 17.5094, + "step": 6481 + }, + { + "epoch": 0.11848575136636993, + "grad_norm": 7.068732044920499, + "learning_rate": 9.796159751816563e-06, + "loss": 18.1282, + "step": 6482 + }, + { + "epoch": 0.11850403056281646, + "grad_norm": 8.194073750688434, + "learning_rate": 9.796076084630157e-06, + "loss": 17.8692, + "step": 6483 + }, + { + "epoch": 0.11852230975926298, + "grad_norm": 6.827089857509455, + "learning_rate": 9.795992400633923e-06, + "loss": 17.8544, + "step": 6484 + }, + { + "epoch": 0.11854058895570951, + "grad_norm": 7.3389057459812, + "learning_rate": 9.79590869982815e-06, + "loss": 17.486, + "step": 6485 + }, + { + "epoch": 0.11855886815215604, + "grad_norm": 6.334095457705487, + "learning_rate": 9.795824982213132e-06, + "loss": 17.6059, + "step": 6486 + }, + { + "epoch": 0.11857714734860256, + "grad_norm": 5.770866119526604, + "learning_rate": 9.795741247789164e-06, + "loss": 17.2927, + "step": 6487 + }, + { + "epoch": 0.11859542654504908, + "grad_norm": 6.7825318697015105, + "learning_rate": 9.79565749655654e-06, + "loss": 17.5687, + "step": 6488 + }, + { + "epoch": 0.1186137057414956, + "grad_norm": 7.812693039624006, + "learning_rate": 9.795573728515553e-06, + "loss": 18.2304, + "step": 6489 + }, + { + "epoch": 0.11863198493794212, + "grad_norm": 6.52843103040547, + "learning_rate": 9.795489943666494e-06, + "loss": 17.6061, + "step": 6490 + }, + { + "epoch": 0.11865026413438866, + "grad_norm": 6.42790272523629, + "learning_rate": 9.79540614200966e-06, + "loss": 17.6545, + "step": 6491 + }, + { + "epoch": 0.11866854333083518, + "grad_norm": 7.890960366904658, + "learning_rate": 9.795322323545345e-06, + "loss": 17.8171, + "step": 6492 + }, + { + "epoch": 0.1186868225272817, + "grad_norm": 8.097096802324884, + "learning_rate": 9.795238488273841e-06, + "loss": 18.2555, + "step": 6493 + }, + { + "epoch": 0.11870510172372822, + "grad_norm": 8.405031935416428, + "learning_rate": 9.79515463619544e-06, + "loss": 18.142, + "step": 6494 + }, + { + "epoch": 0.11872338092017475, + "grad_norm": 6.724443918721763, + "learning_rate": 9.79507076731044e-06, + "loss": 17.6722, + "step": 6495 + }, + { + "epoch": 0.11874166011662127, + "grad_norm": 7.80273706210259, + "learning_rate": 9.794986881619132e-06, + "loss": 18.2217, + "step": 6496 + }, + { + "epoch": 0.1187599393130678, + "grad_norm": 6.818244420135292, + "learning_rate": 9.794902979121813e-06, + "loss": 17.4039, + "step": 6497 + }, + { + "epoch": 0.11877821850951432, + "grad_norm": 7.656089440816266, + "learning_rate": 9.794819059818775e-06, + "loss": 17.6009, + "step": 6498 + }, + { + "epoch": 0.11879649770596085, + "grad_norm": 7.1953928996191046, + "learning_rate": 9.794735123710311e-06, + "loss": 17.8115, + "step": 6499 + }, + { + "epoch": 0.11881477690240737, + "grad_norm": 7.191384965477584, + "learning_rate": 9.794651170796717e-06, + "loss": 17.7287, + "step": 6500 + }, + { + "epoch": 0.11883305609885389, + "grad_norm": 5.405317409410683, + "learning_rate": 9.794567201078284e-06, + "loss": 16.9769, + "step": 6501 + }, + { + "epoch": 0.11885133529530043, + "grad_norm": 5.916881991659906, + "learning_rate": 9.794483214555313e-06, + "loss": 17.3112, + "step": 6502 + }, + { + "epoch": 0.11886961449174695, + "grad_norm": 8.955135080248823, + "learning_rate": 9.794399211228092e-06, + "loss": 18.3316, + "step": 6503 + }, + { + "epoch": 0.11888789368819347, + "grad_norm": 7.922817280931787, + "learning_rate": 9.79431519109692e-06, + "loss": 18.2057, + "step": 6504 + }, + { + "epoch": 0.11890617288463999, + "grad_norm": 7.3757294383020975, + "learning_rate": 9.794231154162087e-06, + "loss": 17.7756, + "step": 6505 + }, + { + "epoch": 0.11892445208108651, + "grad_norm": 6.700722597819988, + "learning_rate": 9.79414710042389e-06, + "loss": 17.5947, + "step": 6506 + }, + { + "epoch": 0.11894273127753303, + "grad_norm": 8.167591367137772, + "learning_rate": 9.794063029882625e-06, + "loss": 18.071, + "step": 6507 + }, + { + "epoch": 0.11896101047397957, + "grad_norm": 7.338127437379368, + "learning_rate": 9.793978942538583e-06, + "loss": 17.724, + "step": 6508 + }, + { + "epoch": 0.11897928967042609, + "grad_norm": 7.703296730988331, + "learning_rate": 9.793894838392062e-06, + "loss": 17.9528, + "step": 6509 + }, + { + "epoch": 0.11899756886687261, + "grad_norm": 6.490671664928666, + "learning_rate": 9.793810717443356e-06, + "loss": 17.4084, + "step": 6510 + }, + { + "epoch": 0.11901584806331914, + "grad_norm": 10.372900084340094, + "learning_rate": 9.79372657969276e-06, + "loss": 19.1718, + "step": 6511 + }, + { + "epoch": 0.11903412725976566, + "grad_norm": 9.095616702660106, + "learning_rate": 9.793642425140567e-06, + "loss": 18.216, + "step": 6512 + }, + { + "epoch": 0.11905240645621218, + "grad_norm": 9.791638205380364, + "learning_rate": 9.793558253787072e-06, + "loss": 19.3034, + "step": 6513 + }, + { + "epoch": 0.11907068565265871, + "grad_norm": 6.938984583399672, + "learning_rate": 9.793474065632574e-06, + "loss": 17.6442, + "step": 6514 + }, + { + "epoch": 0.11908896484910524, + "grad_norm": 6.617292142701157, + "learning_rate": 9.793389860677364e-06, + "loss": 17.734, + "step": 6515 + }, + { + "epoch": 0.11910724404555176, + "grad_norm": 8.419519258940735, + "learning_rate": 9.793305638921738e-06, + "loss": 18.0048, + "step": 6516 + }, + { + "epoch": 0.11912552324199828, + "grad_norm": 7.748242283614979, + "learning_rate": 9.793221400365993e-06, + "loss": 17.7052, + "step": 6517 + }, + { + "epoch": 0.1191438024384448, + "grad_norm": 8.192685354082709, + "learning_rate": 9.793137145010423e-06, + "loss": 18.4234, + "step": 6518 + }, + { + "epoch": 0.11916208163489134, + "grad_norm": 6.9733892634945995, + "learning_rate": 9.793052872855322e-06, + "loss": 17.5065, + "step": 6519 + }, + { + "epoch": 0.11918036083133786, + "grad_norm": 7.444446036851159, + "learning_rate": 9.792968583900988e-06, + "loss": 17.8992, + "step": 6520 + }, + { + "epoch": 0.11919864002778438, + "grad_norm": 6.807499423338599, + "learning_rate": 9.792884278147714e-06, + "loss": 17.6398, + "step": 6521 + }, + { + "epoch": 0.1192169192242309, + "grad_norm": 6.096419198087533, + "learning_rate": 9.792799955595796e-06, + "loss": 17.264, + "step": 6522 + }, + { + "epoch": 0.11923519842067742, + "grad_norm": 6.42210350027216, + "learning_rate": 9.792715616245532e-06, + "loss": 17.5809, + "step": 6523 + }, + { + "epoch": 0.11925347761712395, + "grad_norm": 7.436633208657352, + "learning_rate": 9.792631260097214e-06, + "loss": 17.7595, + "step": 6524 + }, + { + "epoch": 0.11927175681357048, + "grad_norm": 9.280548299826297, + "learning_rate": 9.79254688715114e-06, + "loss": 18.5013, + "step": 6525 + }, + { + "epoch": 0.119290036010017, + "grad_norm": 8.279025851654675, + "learning_rate": 9.792462497407604e-06, + "loss": 17.7648, + "step": 6526 + }, + { + "epoch": 0.11930831520646352, + "grad_norm": 8.371288874130814, + "learning_rate": 9.792378090866904e-06, + "loss": 18.0224, + "step": 6527 + }, + { + "epoch": 0.11932659440291005, + "grad_norm": 6.9459894384664755, + "learning_rate": 9.792293667529334e-06, + "loss": 17.8487, + "step": 6528 + }, + { + "epoch": 0.11934487359935657, + "grad_norm": 5.985626661647108, + "learning_rate": 9.79220922739519e-06, + "loss": 17.2801, + "step": 6529 + }, + { + "epoch": 0.11936315279580309, + "grad_norm": 7.520338406965312, + "learning_rate": 9.79212477046477e-06, + "loss": 17.7614, + "step": 6530 + }, + { + "epoch": 0.11938143199224963, + "grad_norm": 8.01768301805792, + "learning_rate": 9.792040296738367e-06, + "loss": 18.0922, + "step": 6531 + }, + { + "epoch": 0.11939971118869615, + "grad_norm": 6.270714170837754, + "learning_rate": 9.79195580621628e-06, + "loss": 17.7132, + "step": 6532 + }, + { + "epoch": 0.11941799038514267, + "grad_norm": 7.937258621373097, + "learning_rate": 9.791871298898804e-06, + "loss": 18.1272, + "step": 6533 + }, + { + "epoch": 0.11943626958158919, + "grad_norm": 8.66782154213655, + "learning_rate": 9.791786774786234e-06, + "loss": 18.029, + "step": 6534 + }, + { + "epoch": 0.11945454877803571, + "grad_norm": 6.578285988582351, + "learning_rate": 9.791702233878867e-06, + "loss": 17.6913, + "step": 6535 + }, + { + "epoch": 0.11947282797448225, + "grad_norm": 6.336874696532593, + "learning_rate": 9.791617676176999e-06, + "loss": 17.4965, + "step": 6536 + }, + { + "epoch": 0.11949110717092877, + "grad_norm": 7.599347049717939, + "learning_rate": 9.791533101680928e-06, + "loss": 18.0085, + "step": 6537 + }, + { + "epoch": 0.11950938636737529, + "grad_norm": 5.948878988910011, + "learning_rate": 9.791448510390948e-06, + "loss": 17.3512, + "step": 6538 + }, + { + "epoch": 0.11952766556382181, + "grad_norm": 6.822620871610262, + "learning_rate": 9.791363902307357e-06, + "loss": 17.5646, + "step": 6539 + }, + { + "epoch": 0.11954594476026834, + "grad_norm": 8.442392421658504, + "learning_rate": 9.791279277430453e-06, + "loss": 18.2527, + "step": 6540 + }, + { + "epoch": 0.11956422395671486, + "grad_norm": 7.432941474388194, + "learning_rate": 9.79119463576053e-06, + "loss": 17.8236, + "step": 6541 + }, + { + "epoch": 0.11958250315316139, + "grad_norm": 8.100810925325277, + "learning_rate": 9.791109977297886e-06, + "loss": 18.1798, + "step": 6542 + }, + { + "epoch": 0.11960078234960791, + "grad_norm": 6.695797003240366, + "learning_rate": 9.791025302042816e-06, + "loss": 17.7148, + "step": 6543 + }, + { + "epoch": 0.11961906154605444, + "grad_norm": 7.120505481509592, + "learning_rate": 9.790940609995618e-06, + "loss": 17.9138, + "step": 6544 + }, + { + "epoch": 0.11963734074250096, + "grad_norm": 7.296967713881201, + "learning_rate": 9.79085590115659e-06, + "loss": 17.8762, + "step": 6545 + }, + { + "epoch": 0.11965561993894748, + "grad_norm": 7.328045571999824, + "learning_rate": 9.790771175526028e-06, + "loss": 17.5075, + "step": 6546 + }, + { + "epoch": 0.119673899135394, + "grad_norm": 6.998914971455784, + "learning_rate": 9.790686433104229e-06, + "loss": 17.4713, + "step": 6547 + }, + { + "epoch": 0.11969217833184054, + "grad_norm": 8.160249802553942, + "learning_rate": 9.790601673891488e-06, + "loss": 18.2393, + "step": 6548 + }, + { + "epoch": 0.11971045752828706, + "grad_norm": 6.258968282972264, + "learning_rate": 9.790516897888105e-06, + "loss": 17.227, + "step": 6549 + }, + { + "epoch": 0.11972873672473358, + "grad_norm": 6.358986838184495, + "learning_rate": 9.790432105094376e-06, + "loss": 17.3255, + "step": 6550 + }, + { + "epoch": 0.1197470159211801, + "grad_norm": 6.693172918968336, + "learning_rate": 9.790347295510597e-06, + "loss": 17.4555, + "step": 6551 + }, + { + "epoch": 0.11976529511762662, + "grad_norm": 6.735622330137925, + "learning_rate": 9.790262469137068e-06, + "loss": 17.8505, + "step": 6552 + }, + { + "epoch": 0.11978357431407316, + "grad_norm": 7.432447853508037, + "learning_rate": 9.790177625974084e-06, + "loss": 17.8742, + "step": 6553 + }, + { + "epoch": 0.11980185351051968, + "grad_norm": 7.732242969332778, + "learning_rate": 9.790092766021943e-06, + "loss": 18.0215, + "step": 6554 + }, + { + "epoch": 0.1198201327069662, + "grad_norm": 7.625990731517096, + "learning_rate": 9.790007889280942e-06, + "loss": 18.2303, + "step": 6555 + }, + { + "epoch": 0.11983841190341273, + "grad_norm": 7.136837631975101, + "learning_rate": 9.78992299575138e-06, + "loss": 18.2816, + "step": 6556 + }, + { + "epoch": 0.11985669109985925, + "grad_norm": 8.04377617285793, + "learning_rate": 9.789838085433554e-06, + "loss": 18.6148, + "step": 6557 + }, + { + "epoch": 0.11987497029630577, + "grad_norm": 7.731295894701705, + "learning_rate": 9.78975315832776e-06, + "loss": 18.208, + "step": 6558 + }, + { + "epoch": 0.1198932494927523, + "grad_norm": 7.052510981025374, + "learning_rate": 9.789668214434296e-06, + "loss": 17.6294, + "step": 6559 + }, + { + "epoch": 0.11991152868919883, + "grad_norm": 8.321861239998073, + "learning_rate": 9.789583253753463e-06, + "loss": 17.7319, + "step": 6560 + }, + { + "epoch": 0.11992980788564535, + "grad_norm": 6.498838604629319, + "learning_rate": 9.789498276285554e-06, + "loss": 17.6915, + "step": 6561 + }, + { + "epoch": 0.11994808708209187, + "grad_norm": 7.443228388632784, + "learning_rate": 9.78941328203087e-06, + "loss": 17.7216, + "step": 6562 + }, + { + "epoch": 0.11996636627853839, + "grad_norm": 6.529059873526048, + "learning_rate": 9.789328270989709e-06, + "loss": 17.5777, + "step": 6563 + }, + { + "epoch": 0.11998464547498491, + "grad_norm": 6.723451701396521, + "learning_rate": 9.789243243162368e-06, + "loss": 17.6907, + "step": 6564 + }, + { + "epoch": 0.12000292467143145, + "grad_norm": 6.8791003187608855, + "learning_rate": 9.789158198549142e-06, + "loss": 17.7716, + "step": 6565 + }, + { + "epoch": 0.12002120386787797, + "grad_norm": 7.197813593107489, + "learning_rate": 9.789073137150335e-06, + "loss": 18.0089, + "step": 6566 + }, + { + "epoch": 0.12003948306432449, + "grad_norm": 7.657156468405905, + "learning_rate": 9.788988058966242e-06, + "loss": 17.79, + "step": 6567 + }, + { + "epoch": 0.12005776226077101, + "grad_norm": 7.010305958903088, + "learning_rate": 9.788902963997161e-06, + "loss": 17.4947, + "step": 6568 + }, + { + "epoch": 0.12007604145721754, + "grad_norm": 7.652939229137941, + "learning_rate": 9.78881785224339e-06, + "loss": 18.1834, + "step": 6569 + }, + { + "epoch": 0.12009432065366407, + "grad_norm": 7.3832717808589905, + "learning_rate": 9.78873272370523e-06, + "loss": 17.7098, + "step": 6570 + }, + { + "epoch": 0.1201125998501106, + "grad_norm": 6.797808442800341, + "learning_rate": 9.788647578382975e-06, + "loss": 17.3152, + "step": 6571 + }, + { + "epoch": 0.12013087904655712, + "grad_norm": 9.297017300811333, + "learning_rate": 9.788562416276928e-06, + "loss": 18.6175, + "step": 6572 + }, + { + "epoch": 0.12014915824300364, + "grad_norm": 7.311421378016831, + "learning_rate": 9.788477237387384e-06, + "loss": 17.615, + "step": 6573 + }, + { + "epoch": 0.12016743743945016, + "grad_norm": 6.93790451572851, + "learning_rate": 9.788392041714642e-06, + "loss": 17.9713, + "step": 6574 + }, + { + "epoch": 0.12018571663589668, + "grad_norm": 7.290709844297043, + "learning_rate": 9.788306829259002e-06, + "loss": 17.7142, + "step": 6575 + }, + { + "epoch": 0.12020399583234322, + "grad_norm": 6.99043375359672, + "learning_rate": 9.788221600020763e-06, + "loss": 17.4934, + "step": 6576 + }, + { + "epoch": 0.12022227502878974, + "grad_norm": 8.057731335287574, + "learning_rate": 9.788136354000221e-06, + "loss": 18.308, + "step": 6577 + }, + { + "epoch": 0.12024055422523626, + "grad_norm": 9.220128603302165, + "learning_rate": 9.788051091197679e-06, + "loss": 18.803, + "step": 6578 + }, + { + "epoch": 0.12025883342168278, + "grad_norm": 7.180361320473192, + "learning_rate": 9.78796581161343e-06, + "loss": 17.7055, + "step": 6579 + }, + { + "epoch": 0.1202771126181293, + "grad_norm": 7.202360795240422, + "learning_rate": 9.78788051524778e-06, + "loss": 17.6053, + "step": 6580 + }, + { + "epoch": 0.12029539181457582, + "grad_norm": 7.949602049517095, + "learning_rate": 9.787795202101022e-06, + "loss": 18.6818, + "step": 6581 + }, + { + "epoch": 0.12031367101102236, + "grad_norm": 7.290520032674815, + "learning_rate": 9.787709872173459e-06, + "loss": 17.8462, + "step": 6582 + }, + { + "epoch": 0.12033195020746888, + "grad_norm": 7.486266145220681, + "learning_rate": 9.787624525465386e-06, + "loss": 17.8674, + "step": 6583 + }, + { + "epoch": 0.1203502294039154, + "grad_norm": 8.300157535115929, + "learning_rate": 9.787539161977107e-06, + "loss": 18.4809, + "step": 6584 + }, + { + "epoch": 0.12036850860036193, + "grad_norm": 7.249217641057837, + "learning_rate": 9.787453781708918e-06, + "loss": 17.7896, + "step": 6585 + }, + { + "epoch": 0.12038678779680845, + "grad_norm": 8.136674193153222, + "learning_rate": 9.787368384661117e-06, + "loss": 18.0296, + "step": 6586 + }, + { + "epoch": 0.12040506699325498, + "grad_norm": 7.170638263949791, + "learning_rate": 9.787282970834008e-06, + "loss": 17.782, + "step": 6587 + }, + { + "epoch": 0.1204233461897015, + "grad_norm": 7.385747835699894, + "learning_rate": 9.787197540227887e-06, + "loss": 17.8215, + "step": 6588 + }, + { + "epoch": 0.12044162538614803, + "grad_norm": 7.774225347474626, + "learning_rate": 9.787112092843052e-06, + "loss": 18.4174, + "step": 6589 + }, + { + "epoch": 0.12045990458259455, + "grad_norm": 6.440299019895617, + "learning_rate": 9.787026628679806e-06, + "loss": 17.5166, + "step": 6590 + }, + { + "epoch": 0.12047818377904107, + "grad_norm": 6.0228606643020495, + "learning_rate": 9.786941147738446e-06, + "loss": 17.2161, + "step": 6591 + }, + { + "epoch": 0.12049646297548759, + "grad_norm": 8.424455245304133, + "learning_rate": 9.786855650019275e-06, + "loss": 18.4105, + "step": 6592 + }, + { + "epoch": 0.12051474217193413, + "grad_norm": 6.116248050842922, + "learning_rate": 9.78677013552259e-06, + "loss": 17.1726, + "step": 6593 + }, + { + "epoch": 0.12053302136838065, + "grad_norm": 7.7242918289890925, + "learning_rate": 9.786684604248688e-06, + "loss": 18.0852, + "step": 6594 + }, + { + "epoch": 0.12055130056482717, + "grad_norm": 6.654921197561543, + "learning_rate": 9.786599056197874e-06, + "loss": 17.6023, + "step": 6595 + }, + { + "epoch": 0.12056957976127369, + "grad_norm": 6.641258300875004, + "learning_rate": 9.786513491370446e-06, + "loss": 17.6764, + "step": 6596 + }, + { + "epoch": 0.12058785895772021, + "grad_norm": 7.115677003135941, + "learning_rate": 9.786427909766703e-06, + "loss": 17.7292, + "step": 6597 + }, + { + "epoch": 0.12060613815416674, + "grad_norm": 7.760198261135722, + "learning_rate": 9.786342311386946e-06, + "loss": 18.1984, + "step": 6598 + }, + { + "epoch": 0.12062441735061327, + "grad_norm": 6.9906326340819955, + "learning_rate": 9.786256696231473e-06, + "loss": 17.7639, + "step": 6599 + }, + { + "epoch": 0.1206426965470598, + "grad_norm": 7.08097195907497, + "learning_rate": 9.786171064300587e-06, + "loss": 17.4673, + "step": 6600 + }, + { + "epoch": 0.12066097574350632, + "grad_norm": 7.867628079964912, + "learning_rate": 9.786085415594588e-06, + "loss": 17.9685, + "step": 6601 + }, + { + "epoch": 0.12067925493995284, + "grad_norm": 8.08039809075577, + "learning_rate": 9.785999750113772e-06, + "loss": 18.3153, + "step": 6602 + }, + { + "epoch": 0.12069753413639936, + "grad_norm": 6.4718450271338375, + "learning_rate": 9.785914067858444e-06, + "loss": 17.5679, + "step": 6603 + }, + { + "epoch": 0.1207158133328459, + "grad_norm": 7.613223665863531, + "learning_rate": 9.785828368828903e-06, + "loss": 18.1562, + "step": 6604 + }, + { + "epoch": 0.12073409252929242, + "grad_norm": 6.654676585758407, + "learning_rate": 9.785742653025448e-06, + "loss": 17.5961, + "step": 6605 + }, + { + "epoch": 0.12075237172573894, + "grad_norm": 7.751378363143706, + "learning_rate": 9.78565692044838e-06, + "loss": 18.3553, + "step": 6606 + }, + { + "epoch": 0.12077065092218546, + "grad_norm": 7.724338371565218, + "learning_rate": 9.785571171098e-06, + "loss": 17.8555, + "step": 6607 + }, + { + "epoch": 0.12078893011863198, + "grad_norm": 7.7391000573388515, + "learning_rate": 9.785485404974608e-06, + "loss": 18.285, + "step": 6608 + }, + { + "epoch": 0.1208072093150785, + "grad_norm": 6.503173067269977, + "learning_rate": 9.785399622078505e-06, + "loss": 17.5102, + "step": 6609 + }, + { + "epoch": 0.12082548851152504, + "grad_norm": 9.305663182717261, + "learning_rate": 9.785313822409992e-06, + "loss": 17.6926, + "step": 6610 + }, + { + "epoch": 0.12084376770797156, + "grad_norm": 8.544767880225772, + "learning_rate": 9.785228005969369e-06, + "loss": 18.3077, + "step": 6611 + }, + { + "epoch": 0.12086204690441808, + "grad_norm": 7.273019054278335, + "learning_rate": 9.785142172756937e-06, + "loss": 17.7531, + "step": 6612 + }, + { + "epoch": 0.1208803261008646, + "grad_norm": 9.788212676017627, + "learning_rate": 9.785056322772997e-06, + "loss": 18.5349, + "step": 6613 + }, + { + "epoch": 0.12089860529731113, + "grad_norm": 6.958116567792477, + "learning_rate": 9.784970456017851e-06, + "loss": 17.5714, + "step": 6614 + }, + { + "epoch": 0.12091688449375765, + "grad_norm": 8.095007728727529, + "learning_rate": 9.784884572491798e-06, + "loss": 18.2647, + "step": 6615 + }, + { + "epoch": 0.12093516369020418, + "grad_norm": 6.642636193639059, + "learning_rate": 9.784798672195138e-06, + "loss": 17.8691, + "step": 6616 + }, + { + "epoch": 0.1209534428866507, + "grad_norm": 6.723446292301932, + "learning_rate": 9.784712755128176e-06, + "loss": 17.5251, + "step": 6617 + }, + { + "epoch": 0.12097172208309723, + "grad_norm": 6.4877476563560075, + "learning_rate": 9.78462682129121e-06, + "loss": 17.5709, + "step": 6618 + }, + { + "epoch": 0.12099000127954375, + "grad_norm": 5.652538915549849, + "learning_rate": 9.784540870684542e-06, + "loss": 17.1876, + "step": 6619 + }, + { + "epoch": 0.12100828047599027, + "grad_norm": 8.07854844816267, + "learning_rate": 9.784454903308475e-06, + "loss": 18.2179, + "step": 6620 + }, + { + "epoch": 0.1210265596724368, + "grad_norm": 7.189175614139578, + "learning_rate": 9.784368919163307e-06, + "loss": 18.0607, + "step": 6621 + }, + { + "epoch": 0.12104483886888333, + "grad_norm": 6.85131669886279, + "learning_rate": 9.78428291824934e-06, + "loss": 17.696, + "step": 6622 + }, + { + "epoch": 0.12106311806532985, + "grad_norm": 6.980487847130155, + "learning_rate": 9.78419690056688e-06, + "loss": 17.5203, + "step": 6623 + }, + { + "epoch": 0.12108139726177637, + "grad_norm": 7.103323988532092, + "learning_rate": 9.784110866116223e-06, + "loss": 17.7473, + "step": 6624 + }, + { + "epoch": 0.1210996764582229, + "grad_norm": 7.107746857581005, + "learning_rate": 9.784024814897675e-06, + "loss": 17.7612, + "step": 6625 + }, + { + "epoch": 0.12111795565466942, + "grad_norm": 7.899770965041136, + "learning_rate": 9.783938746911532e-06, + "loss": 18.4489, + "step": 6626 + }, + { + "epoch": 0.12113623485111595, + "grad_norm": 7.480258079191613, + "learning_rate": 9.7838526621581e-06, + "loss": 18.023, + "step": 6627 + }, + { + "epoch": 0.12115451404756247, + "grad_norm": 5.904831813224204, + "learning_rate": 9.78376656063768e-06, + "loss": 17.1418, + "step": 6628 + }, + { + "epoch": 0.121172793244009, + "grad_norm": 7.742130858072729, + "learning_rate": 9.783680442350571e-06, + "loss": 18.3106, + "step": 6629 + }, + { + "epoch": 0.12119107244045552, + "grad_norm": 6.462180802164989, + "learning_rate": 9.78359430729708e-06, + "loss": 17.504, + "step": 6630 + }, + { + "epoch": 0.12120935163690204, + "grad_norm": 5.869227175449395, + "learning_rate": 9.783508155477506e-06, + "loss": 17.2101, + "step": 6631 + }, + { + "epoch": 0.12122763083334856, + "grad_norm": 5.5460802093685455, + "learning_rate": 9.78342198689215e-06, + "loss": 17.1091, + "step": 6632 + }, + { + "epoch": 0.1212459100297951, + "grad_norm": 7.030778273171266, + "learning_rate": 9.783335801541314e-06, + "loss": 17.6532, + "step": 6633 + }, + { + "epoch": 0.12126418922624162, + "grad_norm": 7.3654630835564125, + "learning_rate": 9.783249599425302e-06, + "loss": 17.6943, + "step": 6634 + }, + { + "epoch": 0.12128246842268814, + "grad_norm": 8.340096371958918, + "learning_rate": 9.783163380544416e-06, + "loss": 18.7983, + "step": 6635 + }, + { + "epoch": 0.12130074761913466, + "grad_norm": 6.741932066440961, + "learning_rate": 9.783077144898957e-06, + "loss": 17.7768, + "step": 6636 + }, + { + "epoch": 0.12131902681558118, + "grad_norm": 7.2685583397958835, + "learning_rate": 9.782990892489227e-06, + "loss": 17.8446, + "step": 6637 + }, + { + "epoch": 0.12133730601202772, + "grad_norm": 7.525327853935263, + "learning_rate": 9.78290462331553e-06, + "loss": 18.0467, + "step": 6638 + }, + { + "epoch": 0.12135558520847424, + "grad_norm": 7.054087612526331, + "learning_rate": 9.782818337378166e-06, + "loss": 17.8198, + "step": 6639 + }, + { + "epoch": 0.12137386440492076, + "grad_norm": 6.507013946601593, + "learning_rate": 9.78273203467744e-06, + "loss": 17.4221, + "step": 6640 + }, + { + "epoch": 0.12139214360136728, + "grad_norm": 8.665104638381612, + "learning_rate": 9.782645715213651e-06, + "loss": 18.4877, + "step": 6641 + }, + { + "epoch": 0.1214104227978138, + "grad_norm": 6.2512124400221944, + "learning_rate": 9.782559378987106e-06, + "loss": 17.3166, + "step": 6642 + }, + { + "epoch": 0.12142870199426033, + "grad_norm": 6.566995098817002, + "learning_rate": 9.782473025998105e-06, + "loss": 17.5743, + "step": 6643 + }, + { + "epoch": 0.12144698119070686, + "grad_norm": 8.687345618797119, + "learning_rate": 9.782386656246951e-06, + "loss": 17.5453, + "step": 6644 + }, + { + "epoch": 0.12146526038715338, + "grad_norm": 6.810078433225566, + "learning_rate": 9.782300269733947e-06, + "loss": 17.5568, + "step": 6645 + }, + { + "epoch": 0.1214835395835999, + "grad_norm": 5.867345526931524, + "learning_rate": 9.782213866459395e-06, + "loss": 17.2831, + "step": 6646 + }, + { + "epoch": 0.12150181878004643, + "grad_norm": 6.927825558368225, + "learning_rate": 9.7821274464236e-06, + "loss": 17.8387, + "step": 6647 + }, + { + "epoch": 0.12152009797649295, + "grad_norm": 8.042051243900016, + "learning_rate": 9.78204100962686e-06, + "loss": 18.4047, + "step": 6648 + }, + { + "epoch": 0.12153837717293947, + "grad_norm": 7.815543835435438, + "learning_rate": 9.781954556069484e-06, + "loss": 18.4583, + "step": 6649 + }, + { + "epoch": 0.121556656369386, + "grad_norm": 6.365267516889937, + "learning_rate": 9.781868085751772e-06, + "loss": 17.7982, + "step": 6650 + }, + { + "epoch": 0.12157493556583253, + "grad_norm": 7.928597987709175, + "learning_rate": 9.781781598674027e-06, + "loss": 18.4748, + "step": 6651 + }, + { + "epoch": 0.12159321476227905, + "grad_norm": 6.770908180356978, + "learning_rate": 9.781695094836553e-06, + "loss": 17.5607, + "step": 6652 + }, + { + "epoch": 0.12161149395872557, + "grad_norm": 8.076061530040407, + "learning_rate": 9.78160857423965e-06, + "loss": 18.4446, + "step": 6653 + }, + { + "epoch": 0.1216297731551721, + "grad_norm": 6.989353573018683, + "learning_rate": 9.781522036883626e-06, + "loss": 18.0134, + "step": 6654 + }, + { + "epoch": 0.12164805235161863, + "grad_norm": 7.163769031715184, + "learning_rate": 9.781435482768781e-06, + "loss": 17.879, + "step": 6655 + }, + { + "epoch": 0.12166633154806515, + "grad_norm": 5.748367459077812, + "learning_rate": 9.78134891189542e-06, + "loss": 17.3384, + "step": 6656 + }, + { + "epoch": 0.12168461074451167, + "grad_norm": 7.788122691341108, + "learning_rate": 9.781262324263846e-06, + "loss": 17.8478, + "step": 6657 + }, + { + "epoch": 0.1217028899409582, + "grad_norm": 6.219641421602568, + "learning_rate": 9.781175719874364e-06, + "loss": 17.3303, + "step": 6658 + }, + { + "epoch": 0.12172116913740472, + "grad_norm": 6.823288884294493, + "learning_rate": 9.781089098727274e-06, + "loss": 17.6334, + "step": 6659 + }, + { + "epoch": 0.12173944833385124, + "grad_norm": 6.968891928695853, + "learning_rate": 9.781002460822883e-06, + "loss": 17.7669, + "step": 6660 + }, + { + "epoch": 0.12175772753029777, + "grad_norm": 6.87242956629981, + "learning_rate": 9.780915806161493e-06, + "loss": 17.5215, + "step": 6661 + }, + { + "epoch": 0.1217760067267443, + "grad_norm": 6.415485023430618, + "learning_rate": 9.780829134743408e-06, + "loss": 17.7493, + "step": 6662 + }, + { + "epoch": 0.12179428592319082, + "grad_norm": 6.771392867085264, + "learning_rate": 9.780742446568932e-06, + "loss": 17.6961, + "step": 6663 + }, + { + "epoch": 0.12181256511963734, + "grad_norm": 6.6995121966166185, + "learning_rate": 9.780655741638367e-06, + "loss": 17.6009, + "step": 6664 + }, + { + "epoch": 0.12183084431608386, + "grad_norm": 6.648799284857779, + "learning_rate": 9.78056901995202e-06, + "loss": 17.2554, + "step": 6665 + }, + { + "epoch": 0.12184912351253038, + "grad_norm": 5.713634795954836, + "learning_rate": 9.780482281510194e-06, + "loss": 17.3779, + "step": 6666 + }, + { + "epoch": 0.12186740270897692, + "grad_norm": 7.321783359721326, + "learning_rate": 9.780395526313188e-06, + "loss": 18.013, + "step": 6667 + }, + { + "epoch": 0.12188568190542344, + "grad_norm": 6.368595998721913, + "learning_rate": 9.780308754361316e-06, + "loss": 17.3284, + "step": 6668 + }, + { + "epoch": 0.12190396110186996, + "grad_norm": 7.940013034290859, + "learning_rate": 9.780221965654874e-06, + "loss": 18.2431, + "step": 6669 + }, + { + "epoch": 0.12192224029831648, + "grad_norm": 8.2597910346473, + "learning_rate": 9.780135160194168e-06, + "loss": 18.3547, + "step": 6670 + }, + { + "epoch": 0.121940519494763, + "grad_norm": 8.342409103263948, + "learning_rate": 9.780048337979505e-06, + "loss": 18.4465, + "step": 6671 + }, + { + "epoch": 0.12195879869120954, + "grad_norm": 7.896434680159246, + "learning_rate": 9.779961499011187e-06, + "loss": 18.0811, + "step": 6672 + }, + { + "epoch": 0.12197707788765606, + "grad_norm": 7.1427655665856955, + "learning_rate": 9.779874643289517e-06, + "loss": 17.8061, + "step": 6673 + }, + { + "epoch": 0.12199535708410258, + "grad_norm": 7.285996116609734, + "learning_rate": 9.779787770814804e-06, + "loss": 17.7508, + "step": 6674 + }, + { + "epoch": 0.1220136362805491, + "grad_norm": 7.961217904499843, + "learning_rate": 9.779700881587349e-06, + "loss": 17.8702, + "step": 6675 + }, + { + "epoch": 0.12203191547699563, + "grad_norm": 9.678940575997192, + "learning_rate": 9.779613975607456e-06, + "loss": 18.5119, + "step": 6676 + }, + { + "epoch": 0.12205019467344215, + "grad_norm": 7.305182452069914, + "learning_rate": 9.779527052875431e-06, + "loss": 18.0059, + "step": 6677 + }, + { + "epoch": 0.12206847386988869, + "grad_norm": 8.011995951602925, + "learning_rate": 9.779440113391578e-06, + "loss": 18.1858, + "step": 6678 + }, + { + "epoch": 0.12208675306633521, + "grad_norm": 7.799713488923933, + "learning_rate": 9.779353157156202e-06, + "loss": 17.9749, + "step": 6679 + }, + { + "epoch": 0.12210503226278173, + "grad_norm": 7.960979082936661, + "learning_rate": 9.77926618416961e-06, + "loss": 18.3202, + "step": 6680 + }, + { + "epoch": 0.12212331145922825, + "grad_norm": 6.171541488608712, + "learning_rate": 9.779179194432102e-06, + "loss": 17.2526, + "step": 6681 + }, + { + "epoch": 0.12214159065567477, + "grad_norm": 6.397595410388515, + "learning_rate": 9.779092187943988e-06, + "loss": 17.7538, + "step": 6682 + }, + { + "epoch": 0.1221598698521213, + "grad_norm": 7.348032278924981, + "learning_rate": 9.779005164705568e-06, + "loss": 17.7268, + "step": 6683 + }, + { + "epoch": 0.12217814904856783, + "grad_norm": 6.751145450140728, + "learning_rate": 9.778918124717151e-06, + "loss": 17.9829, + "step": 6684 + }, + { + "epoch": 0.12219642824501435, + "grad_norm": 6.997427910648309, + "learning_rate": 9.778831067979043e-06, + "loss": 17.7092, + "step": 6685 + }, + { + "epoch": 0.12221470744146087, + "grad_norm": 6.570589473132914, + "learning_rate": 9.778743994491544e-06, + "loss": 17.425, + "step": 6686 + }, + { + "epoch": 0.1222329866379074, + "grad_norm": 8.874818025027167, + "learning_rate": 9.778656904254962e-06, + "loss": 17.9388, + "step": 6687 + }, + { + "epoch": 0.12225126583435392, + "grad_norm": 6.758030485303756, + "learning_rate": 9.778569797269604e-06, + "loss": 17.8268, + "step": 6688 + }, + { + "epoch": 0.12226954503080045, + "grad_norm": 6.9498680833266455, + "learning_rate": 9.778482673535772e-06, + "loss": 17.806, + "step": 6689 + }, + { + "epoch": 0.12228782422724697, + "grad_norm": 7.238401612147517, + "learning_rate": 9.778395533053772e-06, + "loss": 17.9575, + "step": 6690 + }, + { + "epoch": 0.1223061034236935, + "grad_norm": 7.79205501513286, + "learning_rate": 9.778308375823912e-06, + "loss": 18.188, + "step": 6691 + }, + { + "epoch": 0.12232438262014002, + "grad_norm": 8.232803048564039, + "learning_rate": 9.778221201846496e-06, + "loss": 17.9286, + "step": 6692 + }, + { + "epoch": 0.12234266181658654, + "grad_norm": 7.463984094963095, + "learning_rate": 9.778134011121829e-06, + "loss": 17.9496, + "step": 6693 + }, + { + "epoch": 0.12236094101303306, + "grad_norm": 6.277175201204437, + "learning_rate": 9.778046803650216e-06, + "loss": 17.3479, + "step": 6694 + }, + { + "epoch": 0.1223792202094796, + "grad_norm": 7.628884492213488, + "learning_rate": 9.777959579431964e-06, + "loss": 17.9509, + "step": 6695 + }, + { + "epoch": 0.12239749940592612, + "grad_norm": 6.142749128251153, + "learning_rate": 9.77787233846738e-06, + "loss": 17.1414, + "step": 6696 + }, + { + "epoch": 0.12241577860237264, + "grad_norm": 7.916450165820055, + "learning_rate": 9.777785080756765e-06, + "loss": 18.203, + "step": 6697 + }, + { + "epoch": 0.12243405779881916, + "grad_norm": 7.338705383324853, + "learning_rate": 9.77769780630043e-06, + "loss": 17.9081, + "step": 6698 + }, + { + "epoch": 0.12245233699526568, + "grad_norm": 8.047757701247363, + "learning_rate": 9.777610515098677e-06, + "loss": 18.1902, + "step": 6699 + }, + { + "epoch": 0.1224706161917122, + "grad_norm": 7.780277635133266, + "learning_rate": 9.777523207151816e-06, + "loss": 17.9597, + "step": 6700 + }, + { + "epoch": 0.12248889538815874, + "grad_norm": 5.451371512759478, + "learning_rate": 9.777435882460149e-06, + "loss": 17.0746, + "step": 6701 + }, + { + "epoch": 0.12250717458460526, + "grad_norm": 5.919984741863563, + "learning_rate": 9.777348541023986e-06, + "loss": 17.3077, + "step": 6702 + }, + { + "epoch": 0.12252545378105179, + "grad_norm": 6.335150978536211, + "learning_rate": 9.777261182843627e-06, + "loss": 17.4041, + "step": 6703 + }, + { + "epoch": 0.12254373297749831, + "grad_norm": 7.911778775410028, + "learning_rate": 9.777173807919386e-06, + "loss": 17.6603, + "step": 6704 + }, + { + "epoch": 0.12256201217394483, + "grad_norm": 6.039306447646183, + "learning_rate": 9.777086416251564e-06, + "loss": 17.2483, + "step": 6705 + }, + { + "epoch": 0.12258029137039136, + "grad_norm": 6.443788852913615, + "learning_rate": 9.77699900784047e-06, + "loss": 17.5087, + "step": 6706 + }, + { + "epoch": 0.12259857056683789, + "grad_norm": 7.261665013006005, + "learning_rate": 9.776911582686405e-06, + "loss": 17.8151, + "step": 6707 + }, + { + "epoch": 0.12261684976328441, + "grad_norm": 6.636859775767234, + "learning_rate": 9.776824140789683e-06, + "loss": 17.4764, + "step": 6708 + }, + { + "epoch": 0.12263512895973093, + "grad_norm": 7.254475992626495, + "learning_rate": 9.776736682150606e-06, + "loss": 17.8066, + "step": 6709 + }, + { + "epoch": 0.12265340815617745, + "grad_norm": 7.229741150410805, + "learning_rate": 9.77664920676948e-06, + "loss": 18.0929, + "step": 6710 + }, + { + "epoch": 0.12267168735262397, + "grad_norm": 6.5449328205287225, + "learning_rate": 9.776561714646616e-06, + "loss": 17.3675, + "step": 6711 + }, + { + "epoch": 0.12268996654907051, + "grad_norm": 6.250228378134764, + "learning_rate": 9.776474205782315e-06, + "loss": 17.5722, + "step": 6712 + }, + { + "epoch": 0.12270824574551703, + "grad_norm": 6.9380784567055995, + "learning_rate": 9.776386680176888e-06, + "loss": 17.8353, + "step": 6713 + }, + { + "epoch": 0.12272652494196355, + "grad_norm": 5.478467383133045, + "learning_rate": 9.776299137830638e-06, + "loss": 17.147, + "step": 6714 + }, + { + "epoch": 0.12274480413841007, + "grad_norm": 8.9457506259024, + "learning_rate": 9.776211578743875e-06, + "loss": 18.5953, + "step": 6715 + }, + { + "epoch": 0.1227630833348566, + "grad_norm": 7.88474119114791, + "learning_rate": 9.776124002916907e-06, + "loss": 18.0582, + "step": 6716 + }, + { + "epoch": 0.12278136253130312, + "grad_norm": 7.879462358077435, + "learning_rate": 9.776036410350035e-06, + "loss": 17.8196, + "step": 6717 + }, + { + "epoch": 0.12279964172774965, + "grad_norm": 7.4188074690427985, + "learning_rate": 9.775948801043573e-06, + "loss": 17.9607, + "step": 6718 + }, + { + "epoch": 0.12281792092419618, + "grad_norm": 10.335397547280172, + "learning_rate": 9.775861174997824e-06, + "loss": 17.7169, + "step": 6719 + }, + { + "epoch": 0.1228362001206427, + "grad_norm": 6.594194572109577, + "learning_rate": 9.775773532213096e-06, + "loss": 17.6715, + "step": 6720 + }, + { + "epoch": 0.12285447931708922, + "grad_norm": 6.208944364137212, + "learning_rate": 9.775685872689696e-06, + "loss": 17.3092, + "step": 6721 + }, + { + "epoch": 0.12287275851353574, + "grad_norm": 5.93769402082507, + "learning_rate": 9.775598196427931e-06, + "loss": 17.423, + "step": 6722 + }, + { + "epoch": 0.12289103770998228, + "grad_norm": 10.046276137696408, + "learning_rate": 9.77551050342811e-06, + "loss": 18.779, + "step": 6723 + }, + { + "epoch": 0.1229093169064288, + "grad_norm": 7.706093370283888, + "learning_rate": 9.775422793690539e-06, + "loss": 17.8201, + "step": 6724 + }, + { + "epoch": 0.12292759610287532, + "grad_norm": 6.828056449910487, + "learning_rate": 9.775335067215524e-06, + "loss": 17.6539, + "step": 6725 + }, + { + "epoch": 0.12294587529932184, + "grad_norm": 7.260831132405209, + "learning_rate": 9.775247324003375e-06, + "loss": 17.9743, + "step": 6726 + }, + { + "epoch": 0.12296415449576836, + "grad_norm": 7.853148072594991, + "learning_rate": 9.775159564054398e-06, + "loss": 18.0551, + "step": 6727 + }, + { + "epoch": 0.12298243369221488, + "grad_norm": 6.893374319447, + "learning_rate": 9.775071787368902e-06, + "loss": 17.907, + "step": 6728 + }, + { + "epoch": 0.12300071288866142, + "grad_norm": 6.796229898152696, + "learning_rate": 9.774983993947194e-06, + "loss": 17.591, + "step": 6729 + }, + { + "epoch": 0.12301899208510794, + "grad_norm": 7.352417629549728, + "learning_rate": 9.774896183789579e-06, + "loss": 18.0169, + "step": 6730 + }, + { + "epoch": 0.12303727128155446, + "grad_norm": 7.420712217607259, + "learning_rate": 9.77480835689637e-06, + "loss": 17.8592, + "step": 6731 + }, + { + "epoch": 0.12305555047800099, + "grad_norm": 7.79655590706962, + "learning_rate": 9.77472051326787e-06, + "loss": 18.2393, + "step": 6732 + }, + { + "epoch": 0.12307382967444751, + "grad_norm": 8.562040061722492, + "learning_rate": 9.77463265290439e-06, + "loss": 18.5743, + "step": 6733 + }, + { + "epoch": 0.12309210887089403, + "grad_norm": 7.419169310113422, + "learning_rate": 9.774544775806238e-06, + "loss": 17.645, + "step": 6734 + }, + { + "epoch": 0.12311038806734056, + "grad_norm": 7.62493487074596, + "learning_rate": 9.774456881973718e-06, + "loss": 18.0855, + "step": 6735 + }, + { + "epoch": 0.12312866726378709, + "grad_norm": 5.854997719179686, + "learning_rate": 9.774368971407143e-06, + "loss": 17.136, + "step": 6736 + }, + { + "epoch": 0.12314694646023361, + "grad_norm": 8.432702579713537, + "learning_rate": 9.774281044106818e-06, + "loss": 18.8493, + "step": 6737 + }, + { + "epoch": 0.12316522565668013, + "grad_norm": 7.084231465389021, + "learning_rate": 9.774193100073054e-06, + "loss": 17.6815, + "step": 6738 + }, + { + "epoch": 0.12318350485312665, + "grad_norm": 7.2206962214256105, + "learning_rate": 9.774105139306156e-06, + "loss": 17.8568, + "step": 6739 + }, + { + "epoch": 0.12320178404957319, + "grad_norm": 8.011314791988577, + "learning_rate": 9.774017161806434e-06, + "loss": 18.0389, + "step": 6740 + }, + { + "epoch": 0.12322006324601971, + "grad_norm": 6.809095578692524, + "learning_rate": 9.773929167574197e-06, + "loss": 17.3972, + "step": 6741 + }, + { + "epoch": 0.12323834244246623, + "grad_norm": 6.631416005685428, + "learning_rate": 9.773841156609751e-06, + "loss": 17.4618, + "step": 6742 + }, + { + "epoch": 0.12325662163891275, + "grad_norm": 6.491790080792121, + "learning_rate": 9.773753128913406e-06, + "loss": 17.4074, + "step": 6743 + }, + { + "epoch": 0.12327490083535927, + "grad_norm": 7.8694925596265906, + "learning_rate": 9.773665084485472e-06, + "loss": 18.1842, + "step": 6744 + }, + { + "epoch": 0.1232931800318058, + "grad_norm": 7.138056421575226, + "learning_rate": 9.773577023326255e-06, + "loss": 17.8597, + "step": 6745 + }, + { + "epoch": 0.12331145922825233, + "grad_norm": 7.1014946326973405, + "learning_rate": 9.773488945436064e-06, + "loss": 17.6909, + "step": 6746 + }, + { + "epoch": 0.12332973842469885, + "grad_norm": 8.25137421905207, + "learning_rate": 9.77340085081521e-06, + "loss": 17.7142, + "step": 6747 + }, + { + "epoch": 0.12334801762114538, + "grad_norm": 7.22933512730961, + "learning_rate": 9.773312739464001e-06, + "loss": 17.8855, + "step": 6748 + }, + { + "epoch": 0.1233662968175919, + "grad_norm": 6.406227023502943, + "learning_rate": 9.773224611382744e-06, + "loss": 17.4731, + "step": 6749 + }, + { + "epoch": 0.12338457601403842, + "grad_norm": 6.29912276582381, + "learning_rate": 9.77313646657175e-06, + "loss": 17.3361, + "step": 6750 + }, + { + "epoch": 0.12340285521048494, + "grad_norm": 7.163796354375712, + "learning_rate": 9.773048305031324e-06, + "loss": 17.9806, + "step": 6751 + }, + { + "epoch": 0.12342113440693148, + "grad_norm": 8.083667017627093, + "learning_rate": 9.772960126761779e-06, + "loss": 17.6488, + "step": 6752 + }, + { + "epoch": 0.123439413603378, + "grad_norm": 9.335094240895968, + "learning_rate": 9.772871931763423e-06, + "loss": 18.4421, + "step": 6753 + }, + { + "epoch": 0.12345769279982452, + "grad_norm": 8.650517180629848, + "learning_rate": 9.772783720036566e-06, + "loss": 18.2238, + "step": 6754 + }, + { + "epoch": 0.12347597199627104, + "grad_norm": 7.408655689107285, + "learning_rate": 9.772695491581517e-06, + "loss": 18.178, + "step": 6755 + }, + { + "epoch": 0.12349425119271756, + "grad_norm": 6.272185991151557, + "learning_rate": 9.772607246398582e-06, + "loss": 17.3344, + "step": 6756 + }, + { + "epoch": 0.1235125303891641, + "grad_norm": 8.625669454668147, + "learning_rate": 9.772518984488076e-06, + "loss": 18.3302, + "step": 6757 + }, + { + "epoch": 0.12353080958561062, + "grad_norm": 8.106328974221528, + "learning_rate": 9.772430705850302e-06, + "loss": 18.0292, + "step": 6758 + }, + { + "epoch": 0.12354908878205714, + "grad_norm": 7.396995596141605, + "learning_rate": 9.772342410485574e-06, + "loss": 18.1042, + "step": 6759 + }, + { + "epoch": 0.12356736797850366, + "grad_norm": 8.424871113247226, + "learning_rate": 9.772254098394199e-06, + "loss": 18.2745, + "step": 6760 + }, + { + "epoch": 0.12358564717495019, + "grad_norm": 7.550498496474941, + "learning_rate": 9.772165769576487e-06, + "loss": 17.8764, + "step": 6761 + }, + { + "epoch": 0.12360392637139671, + "grad_norm": 6.283517836040569, + "learning_rate": 9.77207742403275e-06, + "loss": 17.5406, + "step": 6762 + }, + { + "epoch": 0.12362220556784324, + "grad_norm": 7.32837620388845, + "learning_rate": 9.771989061763295e-06, + "loss": 17.9969, + "step": 6763 + }, + { + "epoch": 0.12364048476428977, + "grad_norm": 6.999174504920259, + "learning_rate": 9.771900682768431e-06, + "loss": 17.6985, + "step": 6764 + }, + { + "epoch": 0.12365876396073629, + "grad_norm": 7.353166074856369, + "learning_rate": 9.771812287048473e-06, + "loss": 17.9816, + "step": 6765 + }, + { + "epoch": 0.12367704315718281, + "grad_norm": 6.872916666751801, + "learning_rate": 9.771723874603722e-06, + "loss": 17.7053, + "step": 6766 + }, + { + "epoch": 0.12369532235362933, + "grad_norm": 10.832766572986543, + "learning_rate": 9.771635445434497e-06, + "loss": 17.7978, + "step": 6767 + }, + { + "epoch": 0.12371360155007585, + "grad_norm": 6.904832355542965, + "learning_rate": 9.771546999541101e-06, + "loss": 18.0874, + "step": 6768 + }, + { + "epoch": 0.12373188074652239, + "grad_norm": 8.029770332971117, + "learning_rate": 9.77145853692385e-06, + "loss": 17.9579, + "step": 6769 + }, + { + "epoch": 0.12375015994296891, + "grad_norm": 7.006988683324624, + "learning_rate": 9.771370057583047e-06, + "loss": 17.7966, + "step": 6770 + }, + { + "epoch": 0.12376843913941543, + "grad_norm": 7.2888792726150955, + "learning_rate": 9.771281561519009e-06, + "loss": 17.9035, + "step": 6771 + }, + { + "epoch": 0.12378671833586195, + "grad_norm": 7.46306368992549, + "learning_rate": 9.771193048732041e-06, + "loss": 17.8343, + "step": 6772 + }, + { + "epoch": 0.12380499753230848, + "grad_norm": 6.408100073477963, + "learning_rate": 9.771104519222457e-06, + "loss": 17.3325, + "step": 6773 + }, + { + "epoch": 0.12382327672875501, + "grad_norm": 8.208083765211683, + "learning_rate": 9.771015972990564e-06, + "loss": 18.5437, + "step": 6774 + }, + { + "epoch": 0.12384155592520153, + "grad_norm": 6.773044224298968, + "learning_rate": 9.770927410036677e-06, + "loss": 17.7102, + "step": 6775 + }, + { + "epoch": 0.12385983512164805, + "grad_norm": 7.451165839563993, + "learning_rate": 9.770838830361101e-06, + "loss": 17.8902, + "step": 6776 + }, + { + "epoch": 0.12387811431809458, + "grad_norm": 7.838133310158093, + "learning_rate": 9.77075023396415e-06, + "loss": 17.9973, + "step": 6777 + }, + { + "epoch": 0.1238963935145411, + "grad_norm": 7.657081364273012, + "learning_rate": 9.770661620846132e-06, + "loss": 17.6833, + "step": 6778 + }, + { + "epoch": 0.12391467271098762, + "grad_norm": 6.482389867965016, + "learning_rate": 9.770572991007362e-06, + "loss": 17.4262, + "step": 6779 + }, + { + "epoch": 0.12393295190743416, + "grad_norm": 6.927975602071881, + "learning_rate": 9.770484344448144e-06, + "loss": 17.9453, + "step": 6780 + }, + { + "epoch": 0.12395123110388068, + "grad_norm": 7.236795933527305, + "learning_rate": 9.770395681168794e-06, + "loss": 17.639, + "step": 6781 + }, + { + "epoch": 0.1239695103003272, + "grad_norm": 6.495468232258661, + "learning_rate": 9.770307001169621e-06, + "loss": 17.3891, + "step": 6782 + }, + { + "epoch": 0.12398778949677372, + "grad_norm": 8.383316197209089, + "learning_rate": 9.770218304450935e-06, + "loss": 18.4597, + "step": 6783 + }, + { + "epoch": 0.12400606869322024, + "grad_norm": 5.72011174779133, + "learning_rate": 9.770129591013049e-06, + "loss": 17.4688, + "step": 6784 + }, + { + "epoch": 0.12402434788966676, + "grad_norm": 6.9603655146625325, + "learning_rate": 9.770040860856273e-06, + "loss": 17.9703, + "step": 6785 + }, + { + "epoch": 0.1240426270861133, + "grad_norm": 8.70264578219701, + "learning_rate": 9.769952113980917e-06, + "loss": 18.574, + "step": 6786 + }, + { + "epoch": 0.12406090628255982, + "grad_norm": 7.579062368171308, + "learning_rate": 9.769863350387293e-06, + "loss": 18.1362, + "step": 6787 + }, + { + "epoch": 0.12407918547900634, + "grad_norm": 6.936197691883063, + "learning_rate": 9.769774570075711e-06, + "loss": 17.8093, + "step": 6788 + }, + { + "epoch": 0.12409746467545286, + "grad_norm": 7.832351463230458, + "learning_rate": 9.769685773046484e-06, + "loss": 18.2735, + "step": 6789 + }, + { + "epoch": 0.12411574387189939, + "grad_norm": 8.877510471961573, + "learning_rate": 9.769596959299923e-06, + "loss": 18.4219, + "step": 6790 + }, + { + "epoch": 0.12413402306834592, + "grad_norm": 7.385366838914088, + "learning_rate": 9.769508128836338e-06, + "loss": 17.6268, + "step": 6791 + }, + { + "epoch": 0.12415230226479244, + "grad_norm": 8.63879098043357, + "learning_rate": 9.76941928165604e-06, + "loss": 18.8794, + "step": 6792 + }, + { + "epoch": 0.12417058146123897, + "grad_norm": 7.084520759446865, + "learning_rate": 9.769330417759342e-06, + "loss": 17.846, + "step": 6793 + }, + { + "epoch": 0.12418886065768549, + "grad_norm": 6.977025543661845, + "learning_rate": 9.769241537146555e-06, + "loss": 17.5868, + "step": 6794 + }, + { + "epoch": 0.12420713985413201, + "grad_norm": 8.314269462705171, + "learning_rate": 9.769152639817988e-06, + "loss": 18.8014, + "step": 6795 + }, + { + "epoch": 0.12422541905057853, + "grad_norm": 7.26374498994195, + "learning_rate": 9.769063725773957e-06, + "loss": 18.0784, + "step": 6796 + }, + { + "epoch": 0.12424369824702507, + "grad_norm": 7.750043978676372, + "learning_rate": 9.768974795014772e-06, + "loss": 17.9159, + "step": 6797 + }, + { + "epoch": 0.12426197744347159, + "grad_norm": 7.184436727363733, + "learning_rate": 9.768885847540743e-06, + "loss": 17.7996, + "step": 6798 + }, + { + "epoch": 0.12428025663991811, + "grad_norm": 6.598999271682578, + "learning_rate": 9.768796883352183e-06, + "loss": 17.9671, + "step": 6799 + }, + { + "epoch": 0.12429853583636463, + "grad_norm": 6.794746580189616, + "learning_rate": 9.768707902449403e-06, + "loss": 17.5868, + "step": 6800 + }, + { + "epoch": 0.12431681503281115, + "grad_norm": 8.294978666463557, + "learning_rate": 9.768618904832718e-06, + "loss": 17.8646, + "step": 6801 + }, + { + "epoch": 0.12433509422925768, + "grad_norm": 6.481829932916731, + "learning_rate": 9.768529890502435e-06, + "loss": 17.5915, + "step": 6802 + }, + { + "epoch": 0.12435337342570421, + "grad_norm": 8.799674341553661, + "learning_rate": 9.76844085945887e-06, + "loss": 17.8731, + "step": 6803 + }, + { + "epoch": 0.12437165262215073, + "grad_norm": 6.441341548209323, + "learning_rate": 9.768351811702333e-06, + "loss": 17.5145, + "step": 6804 + }, + { + "epoch": 0.12438993181859725, + "grad_norm": 7.724334044183744, + "learning_rate": 9.768262747233137e-06, + "loss": 17.9513, + "step": 6805 + }, + { + "epoch": 0.12440821101504378, + "grad_norm": 8.228683522833085, + "learning_rate": 9.768173666051594e-06, + "loss": 17.9028, + "step": 6806 + }, + { + "epoch": 0.1244264902114903, + "grad_norm": 7.413906193257426, + "learning_rate": 9.768084568158015e-06, + "loss": 17.8082, + "step": 6807 + }, + { + "epoch": 0.12444476940793683, + "grad_norm": 8.965826870301163, + "learning_rate": 9.767995453552714e-06, + "loss": 18.545, + "step": 6808 + }, + { + "epoch": 0.12446304860438336, + "grad_norm": 7.178717823470608, + "learning_rate": 9.767906322236002e-06, + "loss": 17.8343, + "step": 6809 + }, + { + "epoch": 0.12448132780082988, + "grad_norm": 8.822238423199295, + "learning_rate": 9.767817174208194e-06, + "loss": 18.6875, + "step": 6810 + }, + { + "epoch": 0.1244996069972764, + "grad_norm": 6.395336496052081, + "learning_rate": 9.7677280094696e-06, + "loss": 17.4369, + "step": 6811 + }, + { + "epoch": 0.12451788619372292, + "grad_norm": 6.550944616976022, + "learning_rate": 9.767638828020532e-06, + "loss": 17.5714, + "step": 6812 + }, + { + "epoch": 0.12453616539016944, + "grad_norm": 7.3943820592035525, + "learning_rate": 9.767549629861304e-06, + "loss": 17.8888, + "step": 6813 + }, + { + "epoch": 0.12455444458661598, + "grad_norm": 6.955204796907054, + "learning_rate": 9.767460414992229e-06, + "loss": 17.688, + "step": 6814 + }, + { + "epoch": 0.1245727237830625, + "grad_norm": 9.371640516387902, + "learning_rate": 9.767371183413619e-06, + "loss": 17.7048, + "step": 6815 + }, + { + "epoch": 0.12459100297950902, + "grad_norm": 6.945829805828666, + "learning_rate": 9.767281935125785e-06, + "loss": 17.9757, + "step": 6816 + }, + { + "epoch": 0.12460928217595554, + "grad_norm": 8.224404276896458, + "learning_rate": 9.767192670129042e-06, + "loss": 18.285, + "step": 6817 + }, + { + "epoch": 0.12462756137240207, + "grad_norm": 6.9793598341825165, + "learning_rate": 9.767103388423704e-06, + "loss": 17.9039, + "step": 6818 + }, + { + "epoch": 0.12464584056884859, + "grad_norm": 6.452361576578343, + "learning_rate": 9.767014090010081e-06, + "loss": 17.5745, + "step": 6819 + }, + { + "epoch": 0.12466411976529512, + "grad_norm": 7.7401983160125445, + "learning_rate": 9.766924774888487e-06, + "loss": 17.6759, + "step": 6820 + }, + { + "epoch": 0.12468239896174164, + "grad_norm": 6.461913283266975, + "learning_rate": 9.766835443059235e-06, + "loss": 17.2202, + "step": 6821 + }, + { + "epoch": 0.12470067815818817, + "grad_norm": 6.408831285939423, + "learning_rate": 9.76674609452264e-06, + "loss": 17.4377, + "step": 6822 + }, + { + "epoch": 0.12471895735463469, + "grad_norm": 6.4698280102972285, + "learning_rate": 9.766656729279012e-06, + "loss": 17.3235, + "step": 6823 + }, + { + "epoch": 0.12473723655108121, + "grad_norm": 7.456689630962659, + "learning_rate": 9.766567347328667e-06, + "loss": 18.078, + "step": 6824 + }, + { + "epoch": 0.12475551574752775, + "grad_norm": 6.2871557378464615, + "learning_rate": 9.766477948671918e-06, + "loss": 17.3276, + "step": 6825 + }, + { + "epoch": 0.12477379494397427, + "grad_norm": 6.4628018911268805, + "learning_rate": 9.766388533309075e-06, + "loss": 17.4461, + "step": 6826 + }, + { + "epoch": 0.12479207414042079, + "grad_norm": 8.798310200953944, + "learning_rate": 9.766299101240455e-06, + "loss": 18.3388, + "step": 6827 + }, + { + "epoch": 0.12481035333686731, + "grad_norm": 9.01801958880886, + "learning_rate": 9.76620965246637e-06, + "loss": 18.0676, + "step": 6828 + }, + { + "epoch": 0.12482863253331383, + "grad_norm": 7.637732117289141, + "learning_rate": 9.766120186987134e-06, + "loss": 17.857, + "step": 6829 + }, + { + "epoch": 0.12484691172976035, + "grad_norm": 7.3498358064409155, + "learning_rate": 9.76603070480306e-06, + "loss": 17.7859, + "step": 6830 + }, + { + "epoch": 0.12486519092620689, + "grad_norm": 7.96401297844795, + "learning_rate": 9.765941205914461e-06, + "loss": 17.8464, + "step": 6831 + }, + { + "epoch": 0.12488347012265341, + "grad_norm": 7.491558225034503, + "learning_rate": 9.765851690321652e-06, + "loss": 18.0553, + "step": 6832 + }, + { + "epoch": 0.12490174931909993, + "grad_norm": 6.684682411726688, + "learning_rate": 9.765762158024948e-06, + "loss": 17.9563, + "step": 6833 + }, + { + "epoch": 0.12492002851554646, + "grad_norm": 7.678492114914815, + "learning_rate": 9.765672609024662e-06, + "loss": 18.0928, + "step": 6834 + }, + { + "epoch": 0.12493830771199298, + "grad_norm": 6.75221266810973, + "learning_rate": 9.765583043321104e-06, + "loss": 17.642, + "step": 6835 + }, + { + "epoch": 0.1249565869084395, + "grad_norm": 5.652893022972405, + "learning_rate": 9.765493460914592e-06, + "loss": 17.2443, + "step": 6836 + }, + { + "epoch": 0.12497486610488603, + "grad_norm": 7.211616485499025, + "learning_rate": 9.76540386180544e-06, + "loss": 17.5644, + "step": 6837 + }, + { + "epoch": 0.12499314530133256, + "grad_norm": 7.491578036573935, + "learning_rate": 9.76531424599396e-06, + "loss": 18.2474, + "step": 6838 + }, + { + "epoch": 0.12501142449777908, + "grad_norm": 8.106648613675649, + "learning_rate": 9.765224613480468e-06, + "loss": 18.2582, + "step": 6839 + }, + { + "epoch": 0.1250297036942256, + "grad_norm": 7.301726902083591, + "learning_rate": 9.765134964265277e-06, + "loss": 17.7908, + "step": 6840 + }, + { + "epoch": 0.12504798289067212, + "grad_norm": 5.857463158845806, + "learning_rate": 9.765045298348701e-06, + "loss": 17.3778, + "step": 6841 + }, + { + "epoch": 0.12506626208711866, + "grad_norm": 6.6013829363255, + "learning_rate": 9.764955615731054e-06, + "loss": 17.5809, + "step": 6842 + }, + { + "epoch": 0.12508454128356516, + "grad_norm": 7.349650909699515, + "learning_rate": 9.764865916412651e-06, + "loss": 18.0713, + "step": 6843 + }, + { + "epoch": 0.1251028204800117, + "grad_norm": 7.1705446033353555, + "learning_rate": 9.764776200393809e-06, + "loss": 17.7023, + "step": 6844 + }, + { + "epoch": 0.12512109967645824, + "grad_norm": 9.594565997134804, + "learning_rate": 9.764686467674837e-06, + "loss": 18.6706, + "step": 6845 + }, + { + "epoch": 0.12513937887290474, + "grad_norm": 6.26969894834299, + "learning_rate": 9.764596718256054e-06, + "loss": 17.4378, + "step": 6846 + }, + { + "epoch": 0.12515765806935128, + "grad_norm": 6.419926332413454, + "learning_rate": 9.764506952137772e-06, + "loss": 17.5315, + "step": 6847 + }, + { + "epoch": 0.1251759372657978, + "grad_norm": 7.022424959197518, + "learning_rate": 9.764417169320308e-06, + "loss": 17.9142, + "step": 6848 + }, + { + "epoch": 0.12519421646224432, + "grad_norm": 8.789123413034922, + "learning_rate": 9.764327369803974e-06, + "loss": 18.9441, + "step": 6849 + }, + { + "epoch": 0.12521249565869083, + "grad_norm": 8.052600772208601, + "learning_rate": 9.764237553589086e-06, + "loss": 18.049, + "step": 6850 + }, + { + "epoch": 0.12523077485513737, + "grad_norm": 7.815449913994883, + "learning_rate": 9.764147720675959e-06, + "loss": 17.9423, + "step": 6851 + }, + { + "epoch": 0.1252490540515839, + "grad_norm": 7.716998644654304, + "learning_rate": 9.764057871064908e-06, + "loss": 18.2239, + "step": 6852 + }, + { + "epoch": 0.1252673332480304, + "grad_norm": 7.985093938302353, + "learning_rate": 9.763968004756248e-06, + "loss": 17.9283, + "step": 6853 + }, + { + "epoch": 0.12528561244447695, + "grad_norm": 5.863768231151736, + "learning_rate": 9.763878121750293e-06, + "loss": 17.1499, + "step": 6854 + }, + { + "epoch": 0.12530389164092345, + "grad_norm": 7.512199977568955, + "learning_rate": 9.763788222047358e-06, + "loss": 17.7701, + "step": 6855 + }, + { + "epoch": 0.12532217083737, + "grad_norm": 7.015726812898855, + "learning_rate": 9.76369830564776e-06, + "loss": 17.7275, + "step": 6856 + }, + { + "epoch": 0.12534045003381653, + "grad_norm": 8.576731202571079, + "learning_rate": 9.763608372551812e-06, + "loss": 18.4529, + "step": 6857 + }, + { + "epoch": 0.12535872923026303, + "grad_norm": 9.091430597613348, + "learning_rate": 9.763518422759829e-06, + "loss": 18.7546, + "step": 6858 + }, + { + "epoch": 0.12537700842670957, + "grad_norm": 6.60377192208082, + "learning_rate": 9.763428456272127e-06, + "loss": 17.4729, + "step": 6859 + }, + { + "epoch": 0.12539528762315608, + "grad_norm": 6.8105219824174545, + "learning_rate": 9.763338473089023e-06, + "loss": 17.6938, + "step": 6860 + }, + { + "epoch": 0.1254135668196026, + "grad_norm": 6.301755296093012, + "learning_rate": 9.76324847321083e-06, + "loss": 17.3673, + "step": 6861 + }, + { + "epoch": 0.12543184601604915, + "grad_norm": 9.262335557793726, + "learning_rate": 9.763158456637868e-06, + "loss": 18.244, + "step": 6862 + }, + { + "epoch": 0.12545012521249566, + "grad_norm": 6.558293955485627, + "learning_rate": 9.763068423370446e-06, + "loss": 17.7602, + "step": 6863 + }, + { + "epoch": 0.1254684044089422, + "grad_norm": 6.851336277162645, + "learning_rate": 9.762978373408882e-06, + "loss": 17.8758, + "step": 6864 + }, + { + "epoch": 0.1254866836053887, + "grad_norm": 6.178099064026581, + "learning_rate": 9.762888306753493e-06, + "loss": 17.2753, + "step": 6865 + }, + { + "epoch": 0.12550496280183523, + "grad_norm": 7.971854847632996, + "learning_rate": 9.762798223404595e-06, + "loss": 18.2635, + "step": 6866 + }, + { + "epoch": 0.12552324199828174, + "grad_norm": 7.461915683024421, + "learning_rate": 9.7627081233625e-06, + "loss": 17.9847, + "step": 6867 + }, + { + "epoch": 0.12554152119472828, + "grad_norm": 6.167959628386896, + "learning_rate": 9.762618006627526e-06, + "loss": 17.1975, + "step": 6868 + }, + { + "epoch": 0.12555980039117481, + "grad_norm": 5.904957131953893, + "learning_rate": 9.762527873199991e-06, + "loss": 17.3502, + "step": 6869 + }, + { + "epoch": 0.12557807958762132, + "grad_norm": 7.319643468981985, + "learning_rate": 9.762437723080209e-06, + "loss": 17.8596, + "step": 6870 + }, + { + "epoch": 0.12559635878406786, + "grad_norm": 6.776461760249466, + "learning_rate": 9.762347556268497e-06, + "loss": 17.5943, + "step": 6871 + }, + { + "epoch": 0.12561463798051437, + "grad_norm": 6.945555187723439, + "learning_rate": 9.762257372765169e-06, + "loss": 17.6469, + "step": 6872 + }, + { + "epoch": 0.1256329171769609, + "grad_norm": 7.5795008587308175, + "learning_rate": 9.762167172570541e-06, + "loss": 18.1053, + "step": 6873 + }, + { + "epoch": 0.12565119637340744, + "grad_norm": 7.625446892507235, + "learning_rate": 9.762076955684932e-06, + "loss": 18.0321, + "step": 6874 + }, + { + "epoch": 0.12566947556985394, + "grad_norm": 7.550851598821339, + "learning_rate": 9.761986722108656e-06, + "loss": 18.0622, + "step": 6875 + }, + { + "epoch": 0.12568775476630048, + "grad_norm": 5.343112907295592, + "learning_rate": 9.761896471842029e-06, + "loss": 17.2661, + "step": 6876 + }, + { + "epoch": 0.125706033962747, + "grad_norm": 6.896208545413777, + "learning_rate": 9.76180620488537e-06, + "loss": 17.6864, + "step": 6877 + }, + { + "epoch": 0.12572431315919352, + "grad_norm": 7.185623538717204, + "learning_rate": 9.76171592123899e-06, + "loss": 17.2856, + "step": 6878 + }, + { + "epoch": 0.12574259235564006, + "grad_norm": 7.686167353778615, + "learning_rate": 9.761625620903212e-06, + "loss": 18.3195, + "step": 6879 + }, + { + "epoch": 0.12576087155208657, + "grad_norm": 8.296321085350883, + "learning_rate": 9.761535303878349e-06, + "loss": 18.4403, + "step": 6880 + }, + { + "epoch": 0.1257791507485331, + "grad_norm": 6.768592561279704, + "learning_rate": 9.761444970164717e-06, + "loss": 17.6828, + "step": 6881 + }, + { + "epoch": 0.1257974299449796, + "grad_norm": 7.435027612748613, + "learning_rate": 9.761354619762634e-06, + "loss": 17.8256, + "step": 6882 + }, + { + "epoch": 0.12581570914142615, + "grad_norm": 7.849360277751968, + "learning_rate": 9.761264252672416e-06, + "loss": 18.2053, + "step": 6883 + }, + { + "epoch": 0.12583398833787265, + "grad_norm": 6.429397492132217, + "learning_rate": 9.76117386889438e-06, + "loss": 17.4916, + "step": 6884 + }, + { + "epoch": 0.1258522675343192, + "grad_norm": 6.067239574248806, + "learning_rate": 9.761083468428843e-06, + "loss": 17.3733, + "step": 6885 + }, + { + "epoch": 0.12587054673076573, + "grad_norm": 8.315848729518695, + "learning_rate": 9.760993051276121e-06, + "loss": 18.2736, + "step": 6886 + }, + { + "epoch": 0.12588882592721223, + "grad_norm": 7.32837061349687, + "learning_rate": 9.760902617436532e-06, + "loss": 17.9103, + "step": 6887 + }, + { + "epoch": 0.12590710512365877, + "grad_norm": 7.383107886144405, + "learning_rate": 9.760812166910391e-06, + "loss": 17.6546, + "step": 6888 + }, + { + "epoch": 0.12592538432010528, + "grad_norm": 7.476709793828603, + "learning_rate": 9.760721699698019e-06, + "loss": 18.0114, + "step": 6889 + }, + { + "epoch": 0.1259436635165518, + "grad_norm": 9.167709907246126, + "learning_rate": 9.76063121579973e-06, + "loss": 18.7377, + "step": 6890 + }, + { + "epoch": 0.12596194271299835, + "grad_norm": 7.271487503461663, + "learning_rate": 9.76054071521584e-06, + "loss": 17.9853, + "step": 6891 + }, + { + "epoch": 0.12598022190944486, + "grad_norm": 6.085035177977042, + "learning_rate": 9.760450197946669e-06, + "loss": 17.4638, + "step": 6892 + }, + { + "epoch": 0.1259985011058914, + "grad_norm": 6.552638913672762, + "learning_rate": 9.760359663992534e-06, + "loss": 17.5143, + "step": 6893 + }, + { + "epoch": 0.1260167803023379, + "grad_norm": 7.28866953297157, + "learning_rate": 9.760269113353751e-06, + "loss": 17.7996, + "step": 6894 + }, + { + "epoch": 0.12603505949878444, + "grad_norm": 7.506247584791319, + "learning_rate": 9.760178546030638e-06, + "loss": 18.177, + "step": 6895 + }, + { + "epoch": 0.12605333869523097, + "grad_norm": 7.568777077230304, + "learning_rate": 9.76008796202351e-06, + "loss": 17.8969, + "step": 6896 + }, + { + "epoch": 0.12607161789167748, + "grad_norm": 6.8513200679177775, + "learning_rate": 9.75999736133269e-06, + "loss": 17.728, + "step": 6897 + }, + { + "epoch": 0.12608989708812401, + "grad_norm": 6.321393301559122, + "learning_rate": 9.75990674395849e-06, + "loss": 17.4091, + "step": 6898 + }, + { + "epoch": 0.12610817628457052, + "grad_norm": 5.671400566827995, + "learning_rate": 9.75981610990123e-06, + "loss": 17.1359, + "step": 6899 + }, + { + "epoch": 0.12612645548101706, + "grad_norm": 7.333047507187936, + "learning_rate": 9.759725459161229e-06, + "loss": 17.8416, + "step": 6900 + }, + { + "epoch": 0.12614473467746357, + "grad_norm": 7.014759835628037, + "learning_rate": 9.759634791738803e-06, + "loss": 17.8615, + "step": 6901 + }, + { + "epoch": 0.1261630138739101, + "grad_norm": 7.572353559355617, + "learning_rate": 9.75954410763427e-06, + "loss": 18.027, + "step": 6902 + }, + { + "epoch": 0.12618129307035664, + "grad_norm": 7.869481255483959, + "learning_rate": 9.759453406847948e-06, + "loss": 18.0191, + "step": 6903 + }, + { + "epoch": 0.12619957226680314, + "grad_norm": 6.993283310444016, + "learning_rate": 9.759362689380154e-06, + "loss": 17.9355, + "step": 6904 + }, + { + "epoch": 0.12621785146324968, + "grad_norm": 7.073599533523402, + "learning_rate": 9.759271955231207e-06, + "loss": 17.9398, + "step": 6905 + }, + { + "epoch": 0.1262361306596962, + "grad_norm": 6.7543327331622915, + "learning_rate": 9.759181204401425e-06, + "loss": 17.6489, + "step": 6906 + }, + { + "epoch": 0.12625440985614272, + "grad_norm": 7.396854189496331, + "learning_rate": 9.759090436891126e-06, + "loss": 18.0247, + "step": 6907 + }, + { + "epoch": 0.12627268905258926, + "grad_norm": 7.473337310410864, + "learning_rate": 9.758999652700628e-06, + "loss": 17.9618, + "step": 6908 + }, + { + "epoch": 0.12629096824903577, + "grad_norm": 6.54808082938672, + "learning_rate": 9.758908851830248e-06, + "loss": 17.6502, + "step": 6909 + }, + { + "epoch": 0.1263092474454823, + "grad_norm": 7.962637624156711, + "learning_rate": 9.758818034280306e-06, + "loss": 18.1697, + "step": 6910 + }, + { + "epoch": 0.1263275266419288, + "grad_norm": 8.316194122517786, + "learning_rate": 9.75872720005112e-06, + "loss": 18.2445, + "step": 6911 + }, + { + "epoch": 0.12634580583837535, + "grad_norm": 9.076794257119204, + "learning_rate": 9.758636349143008e-06, + "loss": 18.2431, + "step": 6912 + }, + { + "epoch": 0.12636408503482188, + "grad_norm": 8.080425757154485, + "learning_rate": 9.758545481556289e-06, + "loss": 17.9978, + "step": 6913 + }, + { + "epoch": 0.1263823642312684, + "grad_norm": 7.6418440516441475, + "learning_rate": 9.758454597291282e-06, + "loss": 17.9627, + "step": 6914 + }, + { + "epoch": 0.12640064342771493, + "grad_norm": 6.644158785752914, + "learning_rate": 9.758363696348303e-06, + "loss": 17.3803, + "step": 6915 + }, + { + "epoch": 0.12641892262416143, + "grad_norm": 6.916556386304829, + "learning_rate": 9.758272778727673e-06, + "loss": 17.8038, + "step": 6916 + }, + { + "epoch": 0.12643720182060797, + "grad_norm": 6.866739546264169, + "learning_rate": 9.758181844429709e-06, + "loss": 17.7915, + "step": 6917 + }, + { + "epoch": 0.12645548101705448, + "grad_norm": 6.518695509133655, + "learning_rate": 9.75809089345473e-06, + "loss": 17.4465, + "step": 6918 + }, + { + "epoch": 0.126473760213501, + "grad_norm": 7.140291668607941, + "learning_rate": 9.757999925803057e-06, + "loss": 17.9112, + "step": 6919 + }, + { + "epoch": 0.12649203940994755, + "grad_norm": 7.05297639968776, + "learning_rate": 9.757908941475005e-06, + "loss": 17.7311, + "step": 6920 + }, + { + "epoch": 0.12651031860639406, + "grad_norm": 8.412370846508411, + "learning_rate": 9.757817940470898e-06, + "loss": 18.1769, + "step": 6921 + }, + { + "epoch": 0.1265285978028406, + "grad_norm": 7.493224194573999, + "learning_rate": 9.75772692279105e-06, + "loss": 17.8737, + "step": 6922 + }, + { + "epoch": 0.1265468769992871, + "grad_norm": 7.492658416453076, + "learning_rate": 9.75763588843578e-06, + "loss": 17.6434, + "step": 6923 + }, + { + "epoch": 0.12656515619573364, + "grad_norm": 8.217101745761529, + "learning_rate": 9.757544837405413e-06, + "loss": 17.9622, + "step": 6924 + }, + { + "epoch": 0.12658343539218017, + "grad_norm": 6.094890483992856, + "learning_rate": 9.757453769700263e-06, + "loss": 17.5712, + "step": 6925 + }, + { + "epoch": 0.12660171458862668, + "grad_norm": 8.286935384573368, + "learning_rate": 9.757362685320651e-06, + "loss": 17.9444, + "step": 6926 + }, + { + "epoch": 0.12661999378507321, + "grad_norm": 7.465689725152292, + "learning_rate": 9.757271584266894e-06, + "loss": 17.8804, + "step": 6927 + }, + { + "epoch": 0.12663827298151972, + "grad_norm": 6.505481223851254, + "learning_rate": 9.757180466539314e-06, + "loss": 17.4906, + "step": 6928 + }, + { + "epoch": 0.12665655217796626, + "grad_norm": 7.675165837939194, + "learning_rate": 9.757089332138227e-06, + "loss": 18.3166, + "step": 6929 + }, + { + "epoch": 0.1266748313744128, + "grad_norm": 8.04806473890603, + "learning_rate": 9.756998181063956e-06, + "loss": 17.9215, + "step": 6930 + }, + { + "epoch": 0.1266931105708593, + "grad_norm": 7.944870275372156, + "learning_rate": 9.75690701331682e-06, + "loss": 18.3751, + "step": 6931 + }, + { + "epoch": 0.12671138976730584, + "grad_norm": 8.007043568618746, + "learning_rate": 9.756815828897139e-06, + "loss": 18.252, + "step": 6932 + }, + { + "epoch": 0.12672966896375235, + "grad_norm": 6.366963345032425, + "learning_rate": 9.756724627805228e-06, + "loss": 17.3992, + "step": 6933 + }, + { + "epoch": 0.12674794816019888, + "grad_norm": 6.433359645818439, + "learning_rate": 9.756633410041412e-06, + "loss": 17.5832, + "step": 6934 + }, + { + "epoch": 0.1267662273566454, + "grad_norm": 8.018598139254433, + "learning_rate": 9.756542175606009e-06, + "loss": 18.1824, + "step": 6935 + }, + { + "epoch": 0.12678450655309192, + "grad_norm": 6.591706798911266, + "learning_rate": 9.756450924499337e-06, + "loss": 17.482, + "step": 6936 + }, + { + "epoch": 0.12680278574953846, + "grad_norm": 6.670475773897604, + "learning_rate": 9.756359656721718e-06, + "loss": 17.4916, + "step": 6937 + }, + { + "epoch": 0.12682106494598497, + "grad_norm": 7.389590100695097, + "learning_rate": 9.756268372273471e-06, + "loss": 18.1617, + "step": 6938 + }, + { + "epoch": 0.1268393441424315, + "grad_norm": 5.68649928573887, + "learning_rate": 9.756177071154917e-06, + "loss": 17.4034, + "step": 6939 + }, + { + "epoch": 0.126857623338878, + "grad_norm": 7.291106003259725, + "learning_rate": 9.756085753366374e-06, + "loss": 17.7263, + "step": 6940 + }, + { + "epoch": 0.12687590253532455, + "grad_norm": 7.8527776067626345, + "learning_rate": 9.755994418908163e-06, + "loss": 18.3024, + "step": 6941 + }, + { + "epoch": 0.12689418173177108, + "grad_norm": 8.270895321656669, + "learning_rate": 9.755903067780604e-06, + "loss": 18.3105, + "step": 6942 + }, + { + "epoch": 0.1269124609282176, + "grad_norm": 6.777498831085499, + "learning_rate": 9.755811699984019e-06, + "loss": 17.7027, + "step": 6943 + }, + { + "epoch": 0.12693074012466413, + "grad_norm": 8.05246083098168, + "learning_rate": 9.755720315518724e-06, + "loss": 18.021, + "step": 6944 + }, + { + "epoch": 0.12694901932111063, + "grad_norm": 7.863604419253536, + "learning_rate": 9.755628914385045e-06, + "loss": 17.9604, + "step": 6945 + }, + { + "epoch": 0.12696729851755717, + "grad_norm": 7.362790015904113, + "learning_rate": 9.755537496583299e-06, + "loss": 17.7251, + "step": 6946 + }, + { + "epoch": 0.1269855777140037, + "grad_norm": 6.790814436100857, + "learning_rate": 9.755446062113804e-06, + "loss": 17.7339, + "step": 6947 + }, + { + "epoch": 0.1270038569104502, + "grad_norm": 6.046904756191634, + "learning_rate": 9.755354610976887e-06, + "loss": 17.222, + "step": 6948 + }, + { + "epoch": 0.12702213610689675, + "grad_norm": 7.016191801706925, + "learning_rate": 9.755263143172861e-06, + "loss": 17.7207, + "step": 6949 + }, + { + "epoch": 0.12704041530334326, + "grad_norm": 6.905456591821923, + "learning_rate": 9.755171658702053e-06, + "loss": 17.7975, + "step": 6950 + }, + { + "epoch": 0.1270586944997898, + "grad_norm": 6.787073661730627, + "learning_rate": 9.75508015756478e-06, + "loss": 17.6786, + "step": 6951 + }, + { + "epoch": 0.1270769736962363, + "grad_norm": 7.194243921241274, + "learning_rate": 9.754988639761364e-06, + "loss": 17.9999, + "step": 6952 + }, + { + "epoch": 0.12709525289268284, + "grad_norm": 7.201920287961771, + "learning_rate": 9.754897105292125e-06, + "loss": 18.3577, + "step": 6953 + }, + { + "epoch": 0.12711353208912937, + "grad_norm": 6.270448311111634, + "learning_rate": 9.754805554157384e-06, + "loss": 17.4376, + "step": 6954 + }, + { + "epoch": 0.12713181128557588, + "grad_norm": 7.552657929665543, + "learning_rate": 9.754713986357462e-06, + "loss": 17.8489, + "step": 6955 + }, + { + "epoch": 0.12715009048202242, + "grad_norm": 7.505230455309283, + "learning_rate": 9.754622401892681e-06, + "loss": 18.0299, + "step": 6956 + }, + { + "epoch": 0.12716836967846892, + "grad_norm": 8.194620048769831, + "learning_rate": 9.75453080076336e-06, + "loss": 18.0597, + "step": 6957 + }, + { + "epoch": 0.12718664887491546, + "grad_norm": 6.852443864005118, + "learning_rate": 9.754439182969822e-06, + "loss": 17.8304, + "step": 6958 + }, + { + "epoch": 0.127204928071362, + "grad_norm": 8.07085489741905, + "learning_rate": 9.754347548512388e-06, + "loss": 18.2323, + "step": 6959 + }, + { + "epoch": 0.1272232072678085, + "grad_norm": 7.321412998953154, + "learning_rate": 9.754255897391378e-06, + "loss": 17.8115, + "step": 6960 + }, + { + "epoch": 0.12724148646425504, + "grad_norm": 6.655116694758986, + "learning_rate": 9.754164229607112e-06, + "loss": 17.566, + "step": 6961 + }, + { + "epoch": 0.12725976566070155, + "grad_norm": 7.635913731930229, + "learning_rate": 9.754072545159914e-06, + "loss": 17.9166, + "step": 6962 + }, + { + "epoch": 0.12727804485714808, + "grad_norm": 7.4899502176585635, + "learning_rate": 9.753980844050104e-06, + "loss": 18.1651, + "step": 6963 + }, + { + "epoch": 0.12729632405359462, + "grad_norm": 7.474395602806392, + "learning_rate": 9.753889126278004e-06, + "loss": 18.2668, + "step": 6964 + }, + { + "epoch": 0.12731460325004113, + "grad_norm": 6.253527536548421, + "learning_rate": 9.753797391843936e-06, + "loss": 17.3812, + "step": 6965 + }, + { + "epoch": 0.12733288244648766, + "grad_norm": 6.303688954789781, + "learning_rate": 9.753705640748219e-06, + "loss": 17.4008, + "step": 6966 + }, + { + "epoch": 0.12735116164293417, + "grad_norm": 7.603106855419932, + "learning_rate": 9.753613872991176e-06, + "loss": 18.1482, + "step": 6967 + }, + { + "epoch": 0.1273694408393807, + "grad_norm": 6.2429332485433395, + "learning_rate": 9.75352208857313e-06, + "loss": 17.5214, + "step": 6968 + }, + { + "epoch": 0.1273877200358272, + "grad_norm": 8.375732323577289, + "learning_rate": 9.753430287494403e-06, + "loss": 17.8937, + "step": 6969 + }, + { + "epoch": 0.12740599923227375, + "grad_norm": 7.625430775782968, + "learning_rate": 9.753338469755314e-06, + "loss": 18.2324, + "step": 6970 + }, + { + "epoch": 0.12742427842872028, + "grad_norm": 8.041214519123109, + "learning_rate": 9.753246635356186e-06, + "loss": 18.3126, + "step": 6971 + }, + { + "epoch": 0.1274425576251668, + "grad_norm": 6.434700696600225, + "learning_rate": 9.753154784297341e-06, + "loss": 17.7066, + "step": 6972 + }, + { + "epoch": 0.12746083682161333, + "grad_norm": 7.253644918125441, + "learning_rate": 9.7530629165791e-06, + "loss": 17.7911, + "step": 6973 + }, + { + "epoch": 0.12747911601805983, + "grad_norm": 7.015871110635574, + "learning_rate": 9.752971032201787e-06, + "loss": 17.5315, + "step": 6974 + }, + { + "epoch": 0.12749739521450637, + "grad_norm": 6.720732521649265, + "learning_rate": 9.752879131165722e-06, + "loss": 17.8203, + "step": 6975 + }, + { + "epoch": 0.1275156744109529, + "grad_norm": 7.3385673433295535, + "learning_rate": 9.752787213471229e-06, + "loss": 17.9138, + "step": 6976 + }, + { + "epoch": 0.12753395360739941, + "grad_norm": 8.646071528793843, + "learning_rate": 9.752695279118629e-06, + "loss": 18.4491, + "step": 6977 + }, + { + "epoch": 0.12755223280384595, + "grad_norm": 7.223292956533253, + "learning_rate": 9.752603328108245e-06, + "loss": 18.0877, + "step": 6978 + }, + { + "epoch": 0.12757051200029246, + "grad_norm": 10.667741010269506, + "learning_rate": 9.752511360440397e-06, + "loss": 18.8063, + "step": 6979 + }, + { + "epoch": 0.127588791196739, + "grad_norm": 6.884591377310287, + "learning_rate": 9.752419376115412e-06, + "loss": 17.8854, + "step": 6980 + }, + { + "epoch": 0.12760707039318553, + "grad_norm": 6.616257206443414, + "learning_rate": 9.752327375133608e-06, + "loss": 17.8793, + "step": 6981 + }, + { + "epoch": 0.12762534958963204, + "grad_norm": 8.354632150166884, + "learning_rate": 9.752235357495307e-06, + "loss": 18.566, + "step": 6982 + }, + { + "epoch": 0.12764362878607857, + "grad_norm": 5.616048601322087, + "learning_rate": 9.752143323200837e-06, + "loss": 17.2041, + "step": 6983 + }, + { + "epoch": 0.12766190798252508, + "grad_norm": 7.053696647062669, + "learning_rate": 9.752051272250516e-06, + "loss": 17.7545, + "step": 6984 + }, + { + "epoch": 0.12768018717897162, + "grad_norm": 7.646423507021111, + "learning_rate": 9.751959204644665e-06, + "loss": 18.1997, + "step": 6985 + }, + { + "epoch": 0.12769846637541812, + "grad_norm": 7.470353436516299, + "learning_rate": 9.751867120383611e-06, + "loss": 18.1901, + "step": 6986 + }, + { + "epoch": 0.12771674557186466, + "grad_norm": 5.582291147049489, + "learning_rate": 9.751775019467677e-06, + "loss": 17.353, + "step": 6987 + }, + { + "epoch": 0.1277350247683112, + "grad_norm": 7.072988608141418, + "learning_rate": 9.751682901897181e-06, + "loss": 17.7201, + "step": 6988 + }, + { + "epoch": 0.1277533039647577, + "grad_norm": 7.051201083526724, + "learning_rate": 9.751590767672451e-06, + "loss": 17.6554, + "step": 6989 + }, + { + "epoch": 0.12777158316120424, + "grad_norm": 8.399054978627413, + "learning_rate": 9.751498616793806e-06, + "loss": 18.7966, + "step": 6990 + }, + { + "epoch": 0.12778986235765075, + "grad_norm": 7.191940606262273, + "learning_rate": 9.751406449261572e-06, + "loss": 18.2619, + "step": 6991 + }, + { + "epoch": 0.12780814155409728, + "grad_norm": 7.391879516430415, + "learning_rate": 9.75131426507607e-06, + "loss": 17.8321, + "step": 6992 + }, + { + "epoch": 0.12782642075054382, + "grad_norm": 6.499690438607266, + "learning_rate": 9.751222064237624e-06, + "loss": 17.6385, + "step": 6993 + }, + { + "epoch": 0.12784469994699033, + "grad_norm": 7.034250916249036, + "learning_rate": 9.751129846746557e-06, + "loss": 17.7252, + "step": 6994 + }, + { + "epoch": 0.12786297914343686, + "grad_norm": 6.4572172651760065, + "learning_rate": 9.75103761260319e-06, + "loss": 17.5323, + "step": 6995 + }, + { + "epoch": 0.12788125833988337, + "grad_norm": 6.04905851054078, + "learning_rate": 9.750945361807852e-06, + "loss": 17.4104, + "step": 6996 + }, + { + "epoch": 0.1278995375363299, + "grad_norm": 6.133914125359956, + "learning_rate": 9.750853094360861e-06, + "loss": 17.4268, + "step": 6997 + }, + { + "epoch": 0.12791781673277644, + "grad_norm": 5.781869773694671, + "learning_rate": 9.750760810262543e-06, + "loss": 17.2417, + "step": 6998 + }, + { + "epoch": 0.12793609592922295, + "grad_norm": 6.7826479384989, + "learning_rate": 9.750668509513219e-06, + "loss": 17.4406, + "step": 6999 + }, + { + "epoch": 0.12795437512566948, + "grad_norm": 8.786267572688896, + "learning_rate": 9.750576192113216e-06, + "loss": 18.801, + "step": 7000 + }, + { + "epoch": 0.127972654322116, + "grad_norm": 5.910149796637377, + "learning_rate": 9.750483858062856e-06, + "loss": 17.5747, + "step": 7001 + }, + { + "epoch": 0.12799093351856253, + "grad_norm": 8.012497036080479, + "learning_rate": 9.75039150736246e-06, + "loss": 18.2773, + "step": 7002 + }, + { + "epoch": 0.12800921271500904, + "grad_norm": 6.451909566644818, + "learning_rate": 9.750299140012355e-06, + "loss": 17.5821, + "step": 7003 + }, + { + "epoch": 0.12802749191145557, + "grad_norm": 7.423518602264178, + "learning_rate": 9.750206756012864e-06, + "loss": 17.8494, + "step": 7004 + }, + { + "epoch": 0.1280457711079021, + "grad_norm": 8.361455105057958, + "learning_rate": 9.750114355364311e-06, + "loss": 18.4579, + "step": 7005 + }, + { + "epoch": 0.12806405030434861, + "grad_norm": 7.878516491188714, + "learning_rate": 9.750021938067018e-06, + "loss": 18.3428, + "step": 7006 + }, + { + "epoch": 0.12808232950079515, + "grad_norm": 7.737349443881559, + "learning_rate": 9.74992950412131e-06, + "loss": 17.9246, + "step": 7007 + }, + { + "epoch": 0.12810060869724166, + "grad_norm": 6.69412927981068, + "learning_rate": 9.749837053527512e-06, + "loss": 17.9627, + "step": 7008 + }, + { + "epoch": 0.1281188878936882, + "grad_norm": 8.423308730240684, + "learning_rate": 9.749744586285948e-06, + "loss": 18.1873, + "step": 7009 + }, + { + "epoch": 0.12813716709013473, + "grad_norm": 5.8435805157842475, + "learning_rate": 9.749652102396942e-06, + "loss": 17.3101, + "step": 7010 + }, + { + "epoch": 0.12815544628658124, + "grad_norm": 6.086695525064017, + "learning_rate": 9.749559601860816e-06, + "loss": 17.3515, + "step": 7011 + }, + { + "epoch": 0.12817372548302777, + "grad_norm": 8.287445424367927, + "learning_rate": 9.749467084677896e-06, + "loss": 18.5489, + "step": 7012 + }, + { + "epoch": 0.12819200467947428, + "grad_norm": 6.899548081555345, + "learning_rate": 9.749374550848506e-06, + "loss": 17.7959, + "step": 7013 + }, + { + "epoch": 0.12821028387592082, + "grad_norm": 6.732953731533117, + "learning_rate": 9.74928200037297e-06, + "loss": 17.4766, + "step": 7014 + }, + { + "epoch": 0.12822856307236735, + "grad_norm": 6.580377041773626, + "learning_rate": 9.749189433251614e-06, + "loss": 17.704, + "step": 7015 + }, + { + "epoch": 0.12824684226881386, + "grad_norm": 9.179270160637689, + "learning_rate": 9.74909684948476e-06, + "loss": 18.4665, + "step": 7016 + }, + { + "epoch": 0.1282651214652604, + "grad_norm": 7.6005357688836455, + "learning_rate": 9.749004249072735e-06, + "loss": 17.8569, + "step": 7017 + }, + { + "epoch": 0.1282834006617069, + "grad_norm": 6.72430777417885, + "learning_rate": 9.74891163201586e-06, + "loss": 17.7693, + "step": 7018 + }, + { + "epoch": 0.12830167985815344, + "grad_norm": 7.187779294795083, + "learning_rate": 9.748818998314465e-06, + "loss": 17.799, + "step": 7019 + }, + { + "epoch": 0.12831995905459995, + "grad_norm": 8.76545881250089, + "learning_rate": 9.748726347968868e-06, + "loss": 17.7423, + "step": 7020 + }, + { + "epoch": 0.12833823825104648, + "grad_norm": 6.881822173934101, + "learning_rate": 9.7486336809794e-06, + "loss": 17.5504, + "step": 7021 + }, + { + "epoch": 0.12835651744749302, + "grad_norm": 6.769410542320792, + "learning_rate": 9.748540997346382e-06, + "loss": 17.7062, + "step": 7022 + }, + { + "epoch": 0.12837479664393953, + "grad_norm": 8.1003885379815, + "learning_rate": 9.74844829707014e-06, + "loss": 18.3501, + "step": 7023 + }, + { + "epoch": 0.12839307584038606, + "grad_norm": 7.0562596310985874, + "learning_rate": 9.748355580150999e-06, + "loss": 18.0205, + "step": 7024 + }, + { + "epoch": 0.12841135503683257, + "grad_norm": 6.034469589388568, + "learning_rate": 9.748262846589282e-06, + "loss": 17.2974, + "step": 7025 + }, + { + "epoch": 0.1284296342332791, + "grad_norm": 7.393384845502981, + "learning_rate": 9.748170096385316e-06, + "loss": 17.7875, + "step": 7026 + }, + { + "epoch": 0.12844791342972564, + "grad_norm": 7.7241030594842695, + "learning_rate": 9.748077329539428e-06, + "loss": 18.3198, + "step": 7027 + }, + { + "epoch": 0.12846619262617215, + "grad_norm": 7.704276417514002, + "learning_rate": 9.747984546051941e-06, + "loss": 17.7201, + "step": 7028 + }, + { + "epoch": 0.12848447182261868, + "grad_norm": 7.267082095675214, + "learning_rate": 9.747891745923177e-06, + "loss": 17.913, + "step": 7029 + }, + { + "epoch": 0.1285027510190652, + "grad_norm": 6.5439131308136735, + "learning_rate": 9.747798929153467e-06, + "loss": 17.6853, + "step": 7030 + }, + { + "epoch": 0.12852103021551173, + "grad_norm": 7.366102539188293, + "learning_rate": 9.747706095743134e-06, + "loss": 17.9143, + "step": 7031 + }, + { + "epoch": 0.12853930941195826, + "grad_norm": 7.780048607840062, + "learning_rate": 9.747613245692503e-06, + "loss": 17.9475, + "step": 7032 + }, + { + "epoch": 0.12855758860840477, + "grad_norm": 9.331706422813205, + "learning_rate": 9.747520379001898e-06, + "loss": 18.4692, + "step": 7033 + }, + { + "epoch": 0.1285758678048513, + "grad_norm": 6.554863952948978, + "learning_rate": 9.747427495671646e-06, + "loss": 17.8817, + "step": 7034 + }, + { + "epoch": 0.12859414700129781, + "grad_norm": 6.619621923388074, + "learning_rate": 9.747334595702073e-06, + "loss": 17.5481, + "step": 7035 + }, + { + "epoch": 0.12861242619774435, + "grad_norm": 7.2867554019539496, + "learning_rate": 9.747241679093506e-06, + "loss": 17.7352, + "step": 7036 + }, + { + "epoch": 0.12863070539419086, + "grad_norm": 6.168823558666478, + "learning_rate": 9.747148745846266e-06, + "loss": 17.3254, + "step": 7037 + }, + { + "epoch": 0.1286489845906374, + "grad_norm": 7.343672397809512, + "learning_rate": 9.747055795960685e-06, + "loss": 17.8599, + "step": 7038 + }, + { + "epoch": 0.12866726378708393, + "grad_norm": 7.788202047403616, + "learning_rate": 9.746962829437084e-06, + "loss": 17.5477, + "step": 7039 + }, + { + "epoch": 0.12868554298353044, + "grad_norm": 7.753770133924901, + "learning_rate": 9.746869846275788e-06, + "loss": 18.0494, + "step": 7040 + }, + { + "epoch": 0.12870382217997697, + "grad_norm": 7.254213768272908, + "learning_rate": 9.746776846477127e-06, + "loss": 18.1565, + "step": 7041 + }, + { + "epoch": 0.12872210137642348, + "grad_norm": 5.482515321584168, + "learning_rate": 9.746683830041425e-06, + "loss": 16.9708, + "step": 7042 + }, + { + "epoch": 0.12874038057287002, + "grad_norm": 8.073657743389086, + "learning_rate": 9.746590796969009e-06, + "loss": 18.122, + "step": 7043 + }, + { + "epoch": 0.12875865976931655, + "grad_norm": 6.337742609991647, + "learning_rate": 9.746497747260202e-06, + "loss": 17.7471, + "step": 7044 + }, + { + "epoch": 0.12877693896576306, + "grad_norm": 6.861792871142012, + "learning_rate": 9.746404680915334e-06, + "loss": 17.674, + "step": 7045 + }, + { + "epoch": 0.1287952181622096, + "grad_norm": 6.073323009333164, + "learning_rate": 9.746311597934729e-06, + "loss": 17.5672, + "step": 7046 + }, + { + "epoch": 0.1288134973586561, + "grad_norm": 7.737803858797643, + "learning_rate": 9.746218498318713e-06, + "loss": 17.9827, + "step": 7047 + }, + { + "epoch": 0.12883177655510264, + "grad_norm": 6.595915249149195, + "learning_rate": 9.746125382067614e-06, + "loss": 17.655, + "step": 7048 + }, + { + "epoch": 0.12885005575154918, + "grad_norm": 7.94492656241729, + "learning_rate": 9.746032249181755e-06, + "loss": 17.9237, + "step": 7049 + }, + { + "epoch": 0.12886833494799568, + "grad_norm": 7.785146945906904, + "learning_rate": 9.745939099661467e-06, + "loss": 18.0476, + "step": 7050 + }, + { + "epoch": 0.12888661414444222, + "grad_norm": 7.299852152564878, + "learning_rate": 9.745845933507075e-06, + "loss": 18.0855, + "step": 7051 + }, + { + "epoch": 0.12890489334088873, + "grad_norm": 6.080794940359898, + "learning_rate": 9.745752750718904e-06, + "loss": 17.081, + "step": 7052 + }, + { + "epoch": 0.12892317253733526, + "grad_norm": 9.400370378916696, + "learning_rate": 9.745659551297282e-06, + "loss": 18.9966, + "step": 7053 + }, + { + "epoch": 0.12894145173378177, + "grad_norm": 6.607722833662794, + "learning_rate": 9.745566335242534e-06, + "loss": 17.7522, + "step": 7054 + }, + { + "epoch": 0.1289597309302283, + "grad_norm": 5.881197931608681, + "learning_rate": 9.745473102554988e-06, + "loss": 17.0914, + "step": 7055 + }, + { + "epoch": 0.12897801012667484, + "grad_norm": 8.8913460947489, + "learning_rate": 9.74537985323497e-06, + "loss": 18.1216, + "step": 7056 + }, + { + "epoch": 0.12899628932312135, + "grad_norm": 7.533926893007955, + "learning_rate": 9.74528658728281e-06, + "loss": 18.1686, + "step": 7057 + }, + { + "epoch": 0.12901456851956788, + "grad_norm": 7.346160191812552, + "learning_rate": 9.74519330469883e-06, + "loss": 17.9362, + "step": 7058 + }, + { + "epoch": 0.1290328477160144, + "grad_norm": 6.833229114822008, + "learning_rate": 9.745100005483359e-06, + "loss": 17.6809, + "step": 7059 + }, + { + "epoch": 0.12905112691246093, + "grad_norm": 9.286908375591532, + "learning_rate": 9.745006689636725e-06, + "loss": 18.9791, + "step": 7060 + }, + { + "epoch": 0.12906940610890746, + "grad_norm": 7.635266165167685, + "learning_rate": 9.744913357159253e-06, + "loss": 17.998, + "step": 7061 + }, + { + "epoch": 0.12908768530535397, + "grad_norm": 7.121177886747499, + "learning_rate": 9.744820008051275e-06, + "loss": 17.8321, + "step": 7062 + }, + { + "epoch": 0.1291059645018005, + "grad_norm": 5.4608475466962965, + "learning_rate": 9.744726642313112e-06, + "loss": 17.3149, + "step": 7063 + }, + { + "epoch": 0.12912424369824702, + "grad_norm": 7.152818697394675, + "learning_rate": 9.744633259945093e-06, + "loss": 17.515, + "step": 7064 + }, + { + "epoch": 0.12914252289469355, + "grad_norm": 6.100990233000748, + "learning_rate": 9.744539860947548e-06, + "loss": 17.2829, + "step": 7065 + }, + { + "epoch": 0.1291608020911401, + "grad_norm": 6.021616617687753, + "learning_rate": 9.744446445320801e-06, + "loss": 17.3312, + "step": 7066 + }, + { + "epoch": 0.1291790812875866, + "grad_norm": 6.535681209185815, + "learning_rate": 9.744353013065183e-06, + "loss": 17.4638, + "step": 7067 + }, + { + "epoch": 0.12919736048403313, + "grad_norm": 7.784917741765711, + "learning_rate": 9.744259564181016e-06, + "loss": 18.0221, + "step": 7068 + }, + { + "epoch": 0.12921563968047964, + "grad_norm": 7.617343393835883, + "learning_rate": 9.744166098668635e-06, + "loss": 17.9527, + "step": 7069 + }, + { + "epoch": 0.12923391887692617, + "grad_norm": 7.90601165287111, + "learning_rate": 9.74407261652836e-06, + "loss": 18.1686, + "step": 7070 + }, + { + "epoch": 0.12925219807337268, + "grad_norm": 7.008037531889261, + "learning_rate": 9.743979117760525e-06, + "loss": 17.8141, + "step": 7071 + }, + { + "epoch": 0.12927047726981922, + "grad_norm": 8.37047423130954, + "learning_rate": 9.743885602365453e-06, + "loss": 18.0465, + "step": 7072 + }, + { + "epoch": 0.12928875646626575, + "grad_norm": 6.9934475487925525, + "learning_rate": 9.743792070343474e-06, + "loss": 17.8282, + "step": 7073 + }, + { + "epoch": 0.12930703566271226, + "grad_norm": 7.237583451545216, + "learning_rate": 9.743698521694915e-06, + "loss": 17.7588, + "step": 7074 + }, + { + "epoch": 0.1293253148591588, + "grad_norm": 7.070649931872262, + "learning_rate": 9.743604956420105e-06, + "loss": 17.8541, + "step": 7075 + }, + { + "epoch": 0.1293435940556053, + "grad_norm": 7.078642539692684, + "learning_rate": 9.743511374519371e-06, + "loss": 17.8196, + "step": 7076 + }, + { + "epoch": 0.12936187325205184, + "grad_norm": 7.02475733593393, + "learning_rate": 9.743417775993041e-06, + "loss": 17.5844, + "step": 7077 + }, + { + "epoch": 0.12938015244849838, + "grad_norm": 9.37372271448173, + "learning_rate": 9.743324160841444e-06, + "loss": 18.5396, + "step": 7078 + }, + { + "epoch": 0.12939843164494488, + "grad_norm": 8.236034768064512, + "learning_rate": 9.743230529064906e-06, + "loss": 18.2293, + "step": 7079 + }, + { + "epoch": 0.12941671084139142, + "grad_norm": 8.536656548092834, + "learning_rate": 9.743136880663759e-06, + "loss": 17.7928, + "step": 7080 + }, + { + "epoch": 0.12943499003783793, + "grad_norm": 6.649512660286544, + "learning_rate": 9.743043215638328e-06, + "loss": 17.7391, + "step": 7081 + }, + { + "epoch": 0.12945326923428446, + "grad_norm": 6.926789336046934, + "learning_rate": 9.742949533988942e-06, + "loss": 17.7954, + "step": 7082 + }, + { + "epoch": 0.129471548430731, + "grad_norm": 7.691534260735286, + "learning_rate": 9.742855835715928e-06, + "loss": 18.1081, + "step": 7083 + }, + { + "epoch": 0.1294898276271775, + "grad_norm": 7.150770709615593, + "learning_rate": 9.742762120819618e-06, + "loss": 17.947, + "step": 7084 + }, + { + "epoch": 0.12950810682362404, + "grad_norm": 6.809459253562558, + "learning_rate": 9.742668389300335e-06, + "loss": 17.5234, + "step": 7085 + }, + { + "epoch": 0.12952638602007055, + "grad_norm": 6.585411731518869, + "learning_rate": 9.742574641158414e-06, + "loss": 17.5973, + "step": 7086 + }, + { + "epoch": 0.12954466521651709, + "grad_norm": 7.445803412206817, + "learning_rate": 9.74248087639418e-06, + "loss": 17.6825, + "step": 7087 + }, + { + "epoch": 0.1295629444129636, + "grad_norm": 7.96816585816933, + "learning_rate": 9.742387095007962e-06, + "loss": 18.1658, + "step": 7088 + }, + { + "epoch": 0.12958122360941013, + "grad_norm": 8.622071128910187, + "learning_rate": 9.742293297000088e-06, + "loss": 18.2712, + "step": 7089 + }, + { + "epoch": 0.12959950280585666, + "grad_norm": 6.31027197922364, + "learning_rate": 9.74219948237089e-06, + "loss": 17.6311, + "step": 7090 + }, + { + "epoch": 0.12961778200230317, + "grad_norm": 6.698700480938034, + "learning_rate": 9.742105651120691e-06, + "loss": 17.1604, + "step": 7091 + }, + { + "epoch": 0.1296360611987497, + "grad_norm": 8.636028796784073, + "learning_rate": 9.742011803249824e-06, + "loss": 18.434, + "step": 7092 + }, + { + "epoch": 0.12965434039519622, + "grad_norm": 7.473634804013628, + "learning_rate": 9.741917938758617e-06, + "loss": 17.9458, + "step": 7093 + }, + { + "epoch": 0.12967261959164275, + "grad_norm": 8.03960125708875, + "learning_rate": 9.7418240576474e-06, + "loss": 18.0053, + "step": 7094 + }, + { + "epoch": 0.1296908987880893, + "grad_norm": 6.118158265189932, + "learning_rate": 9.7417301599165e-06, + "loss": 17.3436, + "step": 7095 + }, + { + "epoch": 0.1297091779845358, + "grad_norm": 7.823548294120425, + "learning_rate": 9.741636245566248e-06, + "loss": 18.0909, + "step": 7096 + }, + { + "epoch": 0.12972745718098233, + "grad_norm": 8.919167180196236, + "learning_rate": 9.741542314596973e-06, + "loss": 18.4706, + "step": 7097 + }, + { + "epoch": 0.12974573637742884, + "grad_norm": 6.797446443118667, + "learning_rate": 9.741448367009003e-06, + "loss": 17.5313, + "step": 7098 + }, + { + "epoch": 0.12976401557387537, + "grad_norm": 6.891792027736653, + "learning_rate": 9.741354402802668e-06, + "loss": 17.7328, + "step": 7099 + }, + { + "epoch": 0.1297822947703219, + "grad_norm": 6.24232287085528, + "learning_rate": 9.741260421978297e-06, + "loss": 17.4414, + "step": 7100 + }, + { + "epoch": 0.12980057396676842, + "grad_norm": 8.769541465073212, + "learning_rate": 9.74116642453622e-06, + "loss": 18.6885, + "step": 7101 + }, + { + "epoch": 0.12981885316321495, + "grad_norm": 7.405110796041737, + "learning_rate": 9.741072410476766e-06, + "loss": 17.6396, + "step": 7102 + }, + { + "epoch": 0.12983713235966146, + "grad_norm": 6.332131906245619, + "learning_rate": 9.740978379800265e-06, + "loss": 17.5006, + "step": 7103 + }, + { + "epoch": 0.129855411556108, + "grad_norm": 8.541532411360873, + "learning_rate": 9.740884332507045e-06, + "loss": 18.0133, + "step": 7104 + }, + { + "epoch": 0.1298736907525545, + "grad_norm": 6.965117238326117, + "learning_rate": 9.740790268597438e-06, + "loss": 17.6445, + "step": 7105 + }, + { + "epoch": 0.12989196994900104, + "grad_norm": 7.739700403058189, + "learning_rate": 9.740696188071772e-06, + "loss": 17.6502, + "step": 7106 + }, + { + "epoch": 0.12991024914544758, + "grad_norm": 10.651807663877248, + "learning_rate": 9.740602090930378e-06, + "loss": 18.537, + "step": 7107 + }, + { + "epoch": 0.12992852834189408, + "grad_norm": 7.011919563083695, + "learning_rate": 9.740507977173585e-06, + "loss": 17.766, + "step": 7108 + }, + { + "epoch": 0.12994680753834062, + "grad_norm": 6.805176369720356, + "learning_rate": 9.740413846801722e-06, + "loss": 17.5963, + "step": 7109 + }, + { + "epoch": 0.12996508673478713, + "grad_norm": 6.300468268748072, + "learning_rate": 9.74031969981512e-06, + "loss": 17.3778, + "step": 7110 + }, + { + "epoch": 0.12998336593123366, + "grad_norm": 6.625256291228368, + "learning_rate": 9.740225536214108e-06, + "loss": 17.4592, + "step": 7111 + }, + { + "epoch": 0.1300016451276802, + "grad_norm": 6.53112538703904, + "learning_rate": 9.740131355999018e-06, + "loss": 17.4195, + "step": 7112 + }, + { + "epoch": 0.1300199243241267, + "grad_norm": 7.174879385056336, + "learning_rate": 9.740037159170179e-06, + "loss": 17.6717, + "step": 7113 + }, + { + "epoch": 0.13003820352057324, + "grad_norm": 7.357221230274843, + "learning_rate": 9.73994294572792e-06, + "loss": 17.7064, + "step": 7114 + }, + { + "epoch": 0.13005648271701975, + "grad_norm": 7.222772844113478, + "learning_rate": 9.739848715672573e-06, + "loss": 17.7126, + "step": 7115 + }, + { + "epoch": 0.13007476191346629, + "grad_norm": 8.832836088421047, + "learning_rate": 9.739754469004467e-06, + "loss": 18.5505, + "step": 7116 + }, + { + "epoch": 0.13009304110991282, + "grad_norm": 7.159356110822697, + "learning_rate": 9.739660205723935e-06, + "loss": 18.1078, + "step": 7117 + }, + { + "epoch": 0.13011132030635933, + "grad_norm": 7.163747630000917, + "learning_rate": 9.739565925831304e-06, + "loss": 17.7092, + "step": 7118 + }, + { + "epoch": 0.13012959950280586, + "grad_norm": 8.932830491978832, + "learning_rate": 9.739471629326904e-06, + "loss": 18.3991, + "step": 7119 + }, + { + "epoch": 0.13014787869925237, + "grad_norm": 7.314547240989594, + "learning_rate": 9.73937731621107e-06, + "loss": 17.7927, + "step": 7120 + }, + { + "epoch": 0.1301661578956989, + "grad_norm": 7.43226134822261, + "learning_rate": 9.73928298648413e-06, + "loss": 18.1889, + "step": 7121 + }, + { + "epoch": 0.13018443709214542, + "grad_norm": 6.246887550119452, + "learning_rate": 9.73918864014641e-06, + "loss": 17.4334, + "step": 7122 + }, + { + "epoch": 0.13020271628859195, + "grad_norm": 6.4398750407548295, + "learning_rate": 9.739094277198249e-06, + "loss": 17.699, + "step": 7123 + }, + { + "epoch": 0.1302209954850385, + "grad_norm": 6.860107349364731, + "learning_rate": 9.738999897639973e-06, + "loss": 17.8638, + "step": 7124 + }, + { + "epoch": 0.130239274681485, + "grad_norm": 9.113527874042134, + "learning_rate": 9.738905501471914e-06, + "loss": 18.2573, + "step": 7125 + }, + { + "epoch": 0.13025755387793153, + "grad_norm": 7.166982317668824, + "learning_rate": 9.738811088694401e-06, + "loss": 17.4839, + "step": 7126 + }, + { + "epoch": 0.13027583307437804, + "grad_norm": 5.822801345459667, + "learning_rate": 9.738716659307767e-06, + "loss": 17.2673, + "step": 7127 + }, + { + "epoch": 0.13029411227082457, + "grad_norm": 7.079865700002657, + "learning_rate": 9.738622213312343e-06, + "loss": 18.0549, + "step": 7128 + }, + { + "epoch": 0.1303123914672711, + "grad_norm": 6.880181479310935, + "learning_rate": 9.738527750708458e-06, + "loss": 17.8285, + "step": 7129 + }, + { + "epoch": 0.13033067066371762, + "grad_norm": 6.270793460597133, + "learning_rate": 9.738433271496445e-06, + "loss": 17.3228, + "step": 7130 + }, + { + "epoch": 0.13034894986016415, + "grad_norm": 6.520184219414416, + "learning_rate": 9.738338775676634e-06, + "loss": 17.5676, + "step": 7131 + }, + { + "epoch": 0.13036722905661066, + "grad_norm": 6.757513125586967, + "learning_rate": 9.73824426324936e-06, + "loss": 17.7184, + "step": 7132 + }, + { + "epoch": 0.1303855082530572, + "grad_norm": 8.613481182446472, + "learning_rate": 9.738149734214947e-06, + "loss": 18.1121, + "step": 7133 + }, + { + "epoch": 0.13040378744950373, + "grad_norm": 7.9340290228511705, + "learning_rate": 9.738055188573731e-06, + "loss": 18.2459, + "step": 7134 + }, + { + "epoch": 0.13042206664595024, + "grad_norm": 7.213911441171221, + "learning_rate": 9.737960626326044e-06, + "loss": 17.7702, + "step": 7135 + }, + { + "epoch": 0.13044034584239678, + "grad_norm": 7.1004579036797955, + "learning_rate": 9.737866047472215e-06, + "loss": 17.7682, + "step": 7136 + }, + { + "epoch": 0.13045862503884328, + "grad_norm": 6.839243807506448, + "learning_rate": 9.737771452012579e-06, + "loss": 17.8289, + "step": 7137 + }, + { + "epoch": 0.13047690423528982, + "grad_norm": 8.331470974201615, + "learning_rate": 9.737676839947463e-06, + "loss": 18.0984, + "step": 7138 + }, + { + "epoch": 0.13049518343173633, + "grad_norm": 6.714694618846473, + "learning_rate": 9.7375822112772e-06, + "loss": 17.7931, + "step": 7139 + }, + { + "epoch": 0.13051346262818286, + "grad_norm": 7.5737923207037205, + "learning_rate": 9.737487566002126e-06, + "loss": 17.827, + "step": 7140 + }, + { + "epoch": 0.1305317418246294, + "grad_norm": 7.105649428705973, + "learning_rate": 9.737392904122565e-06, + "loss": 17.9188, + "step": 7141 + }, + { + "epoch": 0.1305500210210759, + "grad_norm": 7.47606356144569, + "learning_rate": 9.737298225638856e-06, + "loss": 17.7877, + "step": 7142 + }, + { + "epoch": 0.13056830021752244, + "grad_norm": 7.131386039255281, + "learning_rate": 9.737203530551327e-06, + "loss": 17.7167, + "step": 7143 + }, + { + "epoch": 0.13058657941396895, + "grad_norm": 7.9805889492847255, + "learning_rate": 9.73710881886031e-06, + "loss": 18.4009, + "step": 7144 + }, + { + "epoch": 0.1306048586104155, + "grad_norm": 7.448580722022492, + "learning_rate": 9.737014090566138e-06, + "loss": 17.9732, + "step": 7145 + }, + { + "epoch": 0.13062313780686202, + "grad_norm": 7.4679063591013275, + "learning_rate": 9.736919345669142e-06, + "loss": 17.9908, + "step": 7146 + }, + { + "epoch": 0.13064141700330853, + "grad_norm": 7.384603113967751, + "learning_rate": 9.736824584169656e-06, + "loss": 17.4913, + "step": 7147 + }, + { + "epoch": 0.13065969619975507, + "grad_norm": 7.806417424820133, + "learning_rate": 9.73672980606801e-06, + "loss": 18.0279, + "step": 7148 + }, + { + "epoch": 0.13067797539620157, + "grad_norm": 6.997974581913617, + "learning_rate": 9.736635011364538e-06, + "loss": 17.92, + "step": 7149 + }, + { + "epoch": 0.1306962545926481, + "grad_norm": 7.478948956993409, + "learning_rate": 9.736540200059572e-06, + "loss": 17.796, + "step": 7150 + }, + { + "epoch": 0.13071453378909464, + "grad_norm": 6.024958741105644, + "learning_rate": 9.736445372153441e-06, + "loss": 17.4914, + "step": 7151 + }, + { + "epoch": 0.13073281298554115, + "grad_norm": 7.520078447002026, + "learning_rate": 9.736350527646481e-06, + "loss": 18.1718, + "step": 7152 + }, + { + "epoch": 0.1307510921819877, + "grad_norm": 7.159633598905982, + "learning_rate": 9.736255666539026e-06, + "loss": 17.9506, + "step": 7153 + }, + { + "epoch": 0.1307693713784342, + "grad_norm": 7.515554897738069, + "learning_rate": 9.736160788831401e-06, + "loss": 18.3908, + "step": 7154 + }, + { + "epoch": 0.13078765057488073, + "grad_norm": 8.618024991764836, + "learning_rate": 9.736065894523947e-06, + "loss": 18.1717, + "step": 7155 + }, + { + "epoch": 0.13080592977132724, + "grad_norm": 6.254382251240645, + "learning_rate": 9.735970983616992e-06, + "loss": 17.5342, + "step": 7156 + }, + { + "epoch": 0.13082420896777378, + "grad_norm": 7.67952894743319, + "learning_rate": 9.73587605611087e-06, + "loss": 18.0999, + "step": 7157 + }, + { + "epoch": 0.1308424881642203, + "grad_norm": 7.759082523709545, + "learning_rate": 9.735781112005913e-06, + "loss": 17.9696, + "step": 7158 + }, + { + "epoch": 0.13086076736066682, + "grad_norm": 6.85593738247165, + "learning_rate": 9.735686151302455e-06, + "loss": 17.8862, + "step": 7159 + }, + { + "epoch": 0.13087904655711335, + "grad_norm": 6.5318907715428685, + "learning_rate": 9.735591174000828e-06, + "loss": 17.4204, + "step": 7160 + }, + { + "epoch": 0.13089732575355986, + "grad_norm": 7.0040496867619115, + "learning_rate": 9.735496180101362e-06, + "loss": 17.637, + "step": 7161 + }, + { + "epoch": 0.1309156049500064, + "grad_norm": 5.7779722178513016, + "learning_rate": 9.735401169604396e-06, + "loss": 17.3363, + "step": 7162 + }, + { + "epoch": 0.13093388414645293, + "grad_norm": 8.303287070527276, + "learning_rate": 9.735306142510259e-06, + "loss": 18.3085, + "step": 7163 + }, + { + "epoch": 0.13095216334289944, + "grad_norm": 7.110065308053239, + "learning_rate": 9.735211098819283e-06, + "loss": 17.6274, + "step": 7164 + }, + { + "epoch": 0.13097044253934598, + "grad_norm": 6.262788790202189, + "learning_rate": 9.735116038531806e-06, + "loss": 17.3397, + "step": 7165 + }, + { + "epoch": 0.13098872173579248, + "grad_norm": 8.564939431946543, + "learning_rate": 9.735020961648156e-06, + "loss": 17.9833, + "step": 7166 + }, + { + "epoch": 0.13100700093223902, + "grad_norm": 6.293016637755449, + "learning_rate": 9.73492586816867e-06, + "loss": 17.4136, + "step": 7167 + }, + { + "epoch": 0.13102528012868556, + "grad_norm": 6.822811223665426, + "learning_rate": 9.734830758093679e-06, + "loss": 17.5242, + "step": 7168 + }, + { + "epoch": 0.13104355932513206, + "grad_norm": 7.397815281411342, + "learning_rate": 9.734735631423517e-06, + "loss": 17.8625, + "step": 7169 + }, + { + "epoch": 0.1310618385215786, + "grad_norm": 6.853785375824345, + "learning_rate": 9.734640488158517e-06, + "loss": 17.7668, + "step": 7170 + }, + { + "epoch": 0.1310801177180251, + "grad_norm": 8.2618386970712, + "learning_rate": 9.734545328299014e-06, + "loss": 18.3031, + "step": 7171 + }, + { + "epoch": 0.13109839691447164, + "grad_norm": 6.532337976922704, + "learning_rate": 9.734450151845341e-06, + "loss": 17.4171, + "step": 7172 + }, + { + "epoch": 0.13111667611091815, + "grad_norm": 7.061745401124181, + "learning_rate": 9.734354958797829e-06, + "loss": 17.7711, + "step": 7173 + }, + { + "epoch": 0.1311349553073647, + "grad_norm": 8.452499012569488, + "learning_rate": 9.734259749156815e-06, + "loss": 18.0384, + "step": 7174 + }, + { + "epoch": 0.13115323450381122, + "grad_norm": 6.123261611912143, + "learning_rate": 9.734164522922631e-06, + "loss": 17.498, + "step": 7175 + }, + { + "epoch": 0.13117151370025773, + "grad_norm": 7.517392231596765, + "learning_rate": 9.73406928009561e-06, + "loss": 17.7518, + "step": 7176 + }, + { + "epoch": 0.13118979289670427, + "grad_norm": 7.742621226274448, + "learning_rate": 9.733974020676089e-06, + "loss": 18.1766, + "step": 7177 + }, + { + "epoch": 0.13120807209315077, + "grad_norm": 7.387095435548084, + "learning_rate": 9.7338787446644e-06, + "loss": 17.6182, + "step": 7178 + }, + { + "epoch": 0.1312263512895973, + "grad_norm": 7.446250508000315, + "learning_rate": 9.733783452060874e-06, + "loss": 18.1282, + "step": 7179 + }, + { + "epoch": 0.13124463048604385, + "grad_norm": 7.5390636994354505, + "learning_rate": 9.73368814286585e-06, + "loss": 18.2006, + "step": 7180 + }, + { + "epoch": 0.13126290968249035, + "grad_norm": 7.599898660645298, + "learning_rate": 9.733592817079661e-06, + "loss": 17.602, + "step": 7181 + }, + { + "epoch": 0.1312811888789369, + "grad_norm": 6.262963191082417, + "learning_rate": 9.733497474702638e-06, + "loss": 17.0976, + "step": 7182 + }, + { + "epoch": 0.1312994680753834, + "grad_norm": 6.14727093181255, + "learning_rate": 9.733402115735117e-06, + "loss": 17.4255, + "step": 7183 + }, + { + "epoch": 0.13131774727182993, + "grad_norm": 5.7133178099579665, + "learning_rate": 9.733306740177432e-06, + "loss": 16.9744, + "step": 7184 + }, + { + "epoch": 0.13133602646827647, + "grad_norm": 8.507051019811717, + "learning_rate": 9.73321134802992e-06, + "loss": 18.5356, + "step": 7185 + }, + { + "epoch": 0.13135430566472298, + "grad_norm": 9.555240157396033, + "learning_rate": 9.73311593929291e-06, + "loss": 18.7287, + "step": 7186 + }, + { + "epoch": 0.1313725848611695, + "grad_norm": 7.73195458476903, + "learning_rate": 9.73302051396674e-06, + "loss": 17.776, + "step": 7187 + }, + { + "epoch": 0.13139086405761602, + "grad_norm": 6.3066851006271785, + "learning_rate": 9.732925072051746e-06, + "loss": 17.4364, + "step": 7188 + }, + { + "epoch": 0.13140914325406255, + "grad_norm": 6.530512281586624, + "learning_rate": 9.732829613548258e-06, + "loss": 17.4735, + "step": 7189 + }, + { + "epoch": 0.13142742245050906, + "grad_norm": 7.670285926957407, + "learning_rate": 9.732734138456614e-06, + "loss": 18.0228, + "step": 7190 + }, + { + "epoch": 0.1314457016469556, + "grad_norm": 6.881575360490226, + "learning_rate": 9.732638646777148e-06, + "loss": 17.6759, + "step": 7191 + }, + { + "epoch": 0.13146398084340213, + "grad_norm": 9.05563547716694, + "learning_rate": 9.732543138510193e-06, + "loss": 18.2275, + "step": 7192 + }, + { + "epoch": 0.13148226003984864, + "grad_norm": 7.306381515657076, + "learning_rate": 9.732447613656087e-06, + "loss": 17.9175, + "step": 7193 + }, + { + "epoch": 0.13150053923629518, + "grad_norm": 9.280282371695481, + "learning_rate": 9.732352072215162e-06, + "loss": 17.8701, + "step": 7194 + }, + { + "epoch": 0.13151881843274169, + "grad_norm": 7.755610675155056, + "learning_rate": 9.73225651418775e-06, + "loss": 17.9723, + "step": 7195 + }, + { + "epoch": 0.13153709762918822, + "grad_norm": 5.94216702467463, + "learning_rate": 9.732160939574194e-06, + "loss": 17.276, + "step": 7196 + }, + { + "epoch": 0.13155537682563476, + "grad_norm": 5.8325769245965935, + "learning_rate": 9.732065348374821e-06, + "loss": 17.3216, + "step": 7197 + }, + { + "epoch": 0.13157365602208126, + "grad_norm": 7.8918461462941405, + "learning_rate": 9.731969740589972e-06, + "loss": 17.9062, + "step": 7198 + }, + { + "epoch": 0.1315919352185278, + "grad_norm": 5.9828504034800085, + "learning_rate": 9.731874116219981e-06, + "loss": 17.3003, + "step": 7199 + }, + { + "epoch": 0.1316102144149743, + "grad_norm": 6.271695218333179, + "learning_rate": 9.73177847526518e-06, + "loss": 17.408, + "step": 7200 + }, + { + "epoch": 0.13162849361142084, + "grad_norm": 6.884913084335879, + "learning_rate": 9.731682817725907e-06, + "loss": 17.5766, + "step": 7201 + }, + { + "epoch": 0.13164677280786738, + "grad_norm": 6.887401816460929, + "learning_rate": 9.731587143602494e-06, + "loss": 17.8024, + "step": 7202 + }, + { + "epoch": 0.1316650520043139, + "grad_norm": 6.825550600345836, + "learning_rate": 9.731491452895281e-06, + "loss": 17.7197, + "step": 7203 + }, + { + "epoch": 0.13168333120076042, + "grad_norm": 8.208850888222933, + "learning_rate": 9.7313957456046e-06, + "loss": 18.3821, + "step": 7204 + }, + { + "epoch": 0.13170161039720693, + "grad_norm": 8.074895936854027, + "learning_rate": 9.731300021730787e-06, + "loss": 18.7384, + "step": 7205 + }, + { + "epoch": 0.13171988959365347, + "grad_norm": 7.821903652140027, + "learning_rate": 9.731204281274178e-06, + "loss": 18.4072, + "step": 7206 + }, + { + "epoch": 0.13173816879009997, + "grad_norm": 7.682609503179762, + "learning_rate": 9.73110852423511e-06, + "loss": 17.8128, + "step": 7207 + }, + { + "epoch": 0.1317564479865465, + "grad_norm": 6.782940292836674, + "learning_rate": 9.731012750613918e-06, + "loss": 17.7275, + "step": 7208 + }, + { + "epoch": 0.13177472718299305, + "grad_norm": 6.679171848756854, + "learning_rate": 9.730916960410934e-06, + "loss": 17.4771, + "step": 7209 + }, + { + "epoch": 0.13179300637943955, + "grad_norm": 10.636301648491877, + "learning_rate": 9.730821153626497e-06, + "loss": 17.6422, + "step": 7210 + }, + { + "epoch": 0.1318112855758861, + "grad_norm": 7.2774302201233985, + "learning_rate": 9.730725330260945e-06, + "loss": 17.6334, + "step": 7211 + }, + { + "epoch": 0.1318295647723326, + "grad_norm": 9.41703242932954, + "learning_rate": 9.730629490314609e-06, + "loss": 18.422, + "step": 7212 + }, + { + "epoch": 0.13184784396877913, + "grad_norm": 6.869794181421306, + "learning_rate": 9.730533633787827e-06, + "loss": 17.5357, + "step": 7213 + }, + { + "epoch": 0.13186612316522567, + "grad_norm": 8.155327972728386, + "learning_rate": 9.730437760680936e-06, + "loss": 18.3217, + "step": 7214 + }, + { + "epoch": 0.13188440236167218, + "grad_norm": 7.624147658658768, + "learning_rate": 9.73034187099427e-06, + "loss": 17.7722, + "step": 7215 + }, + { + "epoch": 0.1319026815581187, + "grad_norm": 7.420377534640088, + "learning_rate": 9.730245964728167e-06, + "loss": 17.9153, + "step": 7216 + }, + { + "epoch": 0.13192096075456522, + "grad_norm": 8.872178765800202, + "learning_rate": 9.730150041882962e-06, + "loss": 18.5382, + "step": 7217 + }, + { + "epoch": 0.13193923995101176, + "grad_norm": 7.320922652635634, + "learning_rate": 9.730054102458992e-06, + "loss": 18.0761, + "step": 7218 + }, + { + "epoch": 0.1319575191474583, + "grad_norm": 7.280140268895857, + "learning_rate": 9.729958146456593e-06, + "loss": 17.4843, + "step": 7219 + }, + { + "epoch": 0.1319757983439048, + "grad_norm": 7.547182011375449, + "learning_rate": 9.729862173876102e-06, + "loss": 17.7176, + "step": 7220 + }, + { + "epoch": 0.13199407754035133, + "grad_norm": 7.300794574100374, + "learning_rate": 9.729766184717853e-06, + "loss": 17.5465, + "step": 7221 + }, + { + "epoch": 0.13201235673679784, + "grad_norm": 7.611837634531434, + "learning_rate": 9.729670178982184e-06, + "loss": 17.9243, + "step": 7222 + }, + { + "epoch": 0.13203063593324438, + "grad_norm": 7.081527995399049, + "learning_rate": 9.729574156669433e-06, + "loss": 17.7627, + "step": 7223 + }, + { + "epoch": 0.13204891512969089, + "grad_norm": 8.915259132218761, + "learning_rate": 9.729478117779933e-06, + "loss": 18.1401, + "step": 7224 + }, + { + "epoch": 0.13206719432613742, + "grad_norm": 8.292487473290121, + "learning_rate": 9.729382062314023e-06, + "loss": 18.0996, + "step": 7225 + }, + { + "epoch": 0.13208547352258396, + "grad_norm": 6.296746454594278, + "learning_rate": 9.72928599027204e-06, + "loss": 17.5431, + "step": 7226 + }, + { + "epoch": 0.13210375271903047, + "grad_norm": 7.2671164091935285, + "learning_rate": 9.72918990165432e-06, + "loss": 17.8938, + "step": 7227 + }, + { + "epoch": 0.132122031915477, + "grad_norm": 6.9395733615132835, + "learning_rate": 9.7290937964612e-06, + "loss": 17.7086, + "step": 7228 + }, + { + "epoch": 0.1321403111119235, + "grad_norm": 7.3315954949805295, + "learning_rate": 9.728997674693015e-06, + "loss": 17.3778, + "step": 7229 + }, + { + "epoch": 0.13215859030837004, + "grad_norm": 7.9153149440768855, + "learning_rate": 9.728901536350106e-06, + "loss": 18.2457, + "step": 7230 + }, + { + "epoch": 0.13217686950481658, + "grad_norm": 6.8914822685486925, + "learning_rate": 9.728805381432805e-06, + "loss": 17.5621, + "step": 7231 + }, + { + "epoch": 0.1321951487012631, + "grad_norm": 8.83534302114429, + "learning_rate": 9.728709209941453e-06, + "loss": 18.7026, + "step": 7232 + }, + { + "epoch": 0.13221342789770962, + "grad_norm": 6.966925026227877, + "learning_rate": 9.728613021876385e-06, + "loss": 17.4717, + "step": 7233 + }, + { + "epoch": 0.13223170709415613, + "grad_norm": 6.289173813053085, + "learning_rate": 9.728516817237939e-06, + "loss": 17.4835, + "step": 7234 + }, + { + "epoch": 0.13224998629060267, + "grad_norm": 6.135824237560181, + "learning_rate": 9.72842059602645e-06, + "loss": 17.3179, + "step": 7235 + }, + { + "epoch": 0.1322682654870492, + "grad_norm": 6.6226126286860865, + "learning_rate": 9.72832435824226e-06, + "loss": 17.4737, + "step": 7236 + }, + { + "epoch": 0.1322865446834957, + "grad_norm": 6.62079242021924, + "learning_rate": 9.728228103885702e-06, + "loss": 17.5764, + "step": 7237 + }, + { + "epoch": 0.13230482387994225, + "grad_norm": 10.527562765818056, + "learning_rate": 9.728131832957115e-06, + "loss": 17.4243, + "step": 7238 + }, + { + "epoch": 0.13232310307638875, + "grad_norm": 7.049840722526844, + "learning_rate": 9.728035545456837e-06, + "loss": 17.6683, + "step": 7239 + }, + { + "epoch": 0.1323413822728353, + "grad_norm": 5.980215077656075, + "learning_rate": 9.727939241385203e-06, + "loss": 17.0616, + "step": 7240 + }, + { + "epoch": 0.1323596614692818, + "grad_norm": 7.133091112035594, + "learning_rate": 9.727842920742554e-06, + "loss": 17.6764, + "step": 7241 + }, + { + "epoch": 0.13237794066572833, + "grad_norm": 7.454956226368991, + "learning_rate": 9.727746583529225e-06, + "loss": 18.0583, + "step": 7242 + }, + { + "epoch": 0.13239621986217487, + "grad_norm": 8.062267426384313, + "learning_rate": 9.727650229745554e-06, + "loss": 18.0237, + "step": 7243 + }, + { + "epoch": 0.13241449905862138, + "grad_norm": 7.5373814766321, + "learning_rate": 9.727553859391881e-06, + "loss": 17.7991, + "step": 7244 + }, + { + "epoch": 0.1324327782550679, + "grad_norm": 7.597588160329692, + "learning_rate": 9.72745747246854e-06, + "loss": 18.081, + "step": 7245 + }, + { + "epoch": 0.13245105745151442, + "grad_norm": 6.602527071310561, + "learning_rate": 9.727361068975871e-06, + "loss": 17.7052, + "step": 7246 + }, + { + "epoch": 0.13246933664796096, + "grad_norm": 6.3018292516121575, + "learning_rate": 9.727264648914212e-06, + "loss": 17.4789, + "step": 7247 + }, + { + "epoch": 0.1324876158444075, + "grad_norm": 6.348523634478017, + "learning_rate": 9.727168212283902e-06, + "loss": 17.2301, + "step": 7248 + }, + { + "epoch": 0.132505895040854, + "grad_norm": 7.582114574713264, + "learning_rate": 9.727071759085275e-06, + "loss": 18.2832, + "step": 7249 + }, + { + "epoch": 0.13252417423730053, + "grad_norm": 6.408570717052727, + "learning_rate": 9.726975289318674e-06, + "loss": 17.2156, + "step": 7250 + }, + { + "epoch": 0.13254245343374704, + "grad_norm": 6.670369978913939, + "learning_rate": 9.726878802984434e-06, + "loss": 17.6147, + "step": 7251 + }, + { + "epoch": 0.13256073263019358, + "grad_norm": 8.150834883500712, + "learning_rate": 9.726782300082893e-06, + "loss": 17.8667, + "step": 7252 + }, + { + "epoch": 0.13257901182664011, + "grad_norm": 8.54348521469152, + "learning_rate": 9.72668578061439e-06, + "loss": 19.1827, + "step": 7253 + }, + { + "epoch": 0.13259729102308662, + "grad_norm": 7.161062456562297, + "learning_rate": 9.726589244579265e-06, + "loss": 17.9603, + "step": 7254 + }, + { + "epoch": 0.13261557021953316, + "grad_norm": 6.987094257255618, + "learning_rate": 9.726492691977856e-06, + "loss": 17.6345, + "step": 7255 + }, + { + "epoch": 0.13263384941597967, + "grad_norm": 7.33513881872116, + "learning_rate": 9.726396122810497e-06, + "loss": 17.6273, + "step": 7256 + }, + { + "epoch": 0.1326521286124262, + "grad_norm": 5.771420058036647, + "learning_rate": 9.726299537077533e-06, + "loss": 17.1635, + "step": 7257 + }, + { + "epoch": 0.1326704078088727, + "grad_norm": 5.634709424961658, + "learning_rate": 9.726202934779297e-06, + "loss": 16.9322, + "step": 7258 + }, + { + "epoch": 0.13268868700531924, + "grad_norm": 6.113352690103021, + "learning_rate": 9.726106315916131e-06, + "loss": 17.2111, + "step": 7259 + }, + { + "epoch": 0.13270696620176578, + "grad_norm": 7.089025293622658, + "learning_rate": 9.726009680488371e-06, + "loss": 17.8984, + "step": 7260 + }, + { + "epoch": 0.1327252453982123, + "grad_norm": 6.380160783041163, + "learning_rate": 9.725913028496359e-06, + "loss": 17.4098, + "step": 7261 + }, + { + "epoch": 0.13274352459465882, + "grad_norm": 7.805516414406185, + "learning_rate": 9.72581635994043e-06, + "loss": 18.1524, + "step": 7262 + }, + { + "epoch": 0.13276180379110533, + "grad_norm": 6.972621356864681, + "learning_rate": 9.725719674820926e-06, + "loss": 17.4139, + "step": 7263 + }, + { + "epoch": 0.13278008298755187, + "grad_norm": 6.571282474587021, + "learning_rate": 9.725622973138185e-06, + "loss": 17.2932, + "step": 7264 + }, + { + "epoch": 0.1327983621839984, + "grad_norm": 7.598398266168324, + "learning_rate": 9.725526254892544e-06, + "loss": 18.0883, + "step": 7265 + }, + { + "epoch": 0.1328166413804449, + "grad_norm": 8.096978763128138, + "learning_rate": 9.725429520084345e-06, + "loss": 18.2204, + "step": 7266 + }, + { + "epoch": 0.13283492057689145, + "grad_norm": 7.3218974170460225, + "learning_rate": 9.725332768713924e-06, + "loss": 18.1063, + "step": 7267 + }, + { + "epoch": 0.13285319977333795, + "grad_norm": 6.885678643390233, + "learning_rate": 9.725236000781623e-06, + "loss": 17.9394, + "step": 7268 + }, + { + "epoch": 0.1328714789697845, + "grad_norm": 6.873700299264502, + "learning_rate": 9.72513921628778e-06, + "loss": 17.9246, + "step": 7269 + }, + { + "epoch": 0.13288975816623103, + "grad_norm": 6.7128758381037725, + "learning_rate": 9.725042415232734e-06, + "loss": 17.5954, + "step": 7270 + }, + { + "epoch": 0.13290803736267753, + "grad_norm": 12.04240883480532, + "learning_rate": 9.724945597616824e-06, + "loss": 17.5589, + "step": 7271 + }, + { + "epoch": 0.13292631655912407, + "grad_norm": 5.888560453507167, + "learning_rate": 9.724848763440389e-06, + "loss": 17.2392, + "step": 7272 + }, + { + "epoch": 0.13294459575557058, + "grad_norm": 5.959386286115697, + "learning_rate": 9.72475191270377e-06, + "loss": 17.0283, + "step": 7273 + }, + { + "epoch": 0.1329628749520171, + "grad_norm": 7.266865757445522, + "learning_rate": 9.724655045407306e-06, + "loss": 18.0163, + "step": 7274 + }, + { + "epoch": 0.13298115414846362, + "grad_norm": 8.959452733145644, + "learning_rate": 9.724558161551335e-06, + "loss": 18.9249, + "step": 7275 + }, + { + "epoch": 0.13299943334491016, + "grad_norm": 7.953370336785721, + "learning_rate": 9.724461261136198e-06, + "loss": 18.1367, + "step": 7276 + }, + { + "epoch": 0.1330177125413567, + "grad_norm": 6.98834233648399, + "learning_rate": 9.724364344162234e-06, + "loss": 18.0126, + "step": 7277 + }, + { + "epoch": 0.1330359917378032, + "grad_norm": 8.68664005119283, + "learning_rate": 9.724267410629785e-06, + "loss": 18.7515, + "step": 7278 + }, + { + "epoch": 0.13305427093424974, + "grad_norm": 7.220871982516756, + "learning_rate": 9.724170460539185e-06, + "loss": 17.8004, + "step": 7279 + }, + { + "epoch": 0.13307255013069624, + "grad_norm": 7.41225811833119, + "learning_rate": 9.72407349389078e-06, + "loss": 17.9176, + "step": 7280 + }, + { + "epoch": 0.13309082932714278, + "grad_norm": 9.264816320895811, + "learning_rate": 9.723976510684907e-06, + "loss": 18.2521, + "step": 7281 + }, + { + "epoch": 0.13310910852358931, + "grad_norm": 6.796793887604724, + "learning_rate": 9.723879510921904e-06, + "loss": 17.8195, + "step": 7282 + }, + { + "epoch": 0.13312738772003582, + "grad_norm": 6.742387205753071, + "learning_rate": 9.723782494602117e-06, + "loss": 17.3863, + "step": 7283 + }, + { + "epoch": 0.13314566691648236, + "grad_norm": 7.413465376909987, + "learning_rate": 9.72368546172588e-06, + "loss": 17.8029, + "step": 7284 + }, + { + "epoch": 0.13316394611292887, + "grad_norm": 7.306590371199734, + "learning_rate": 9.723588412293536e-06, + "loss": 17.8976, + "step": 7285 + }, + { + "epoch": 0.1331822253093754, + "grad_norm": 7.805619937583306, + "learning_rate": 9.723491346305426e-06, + "loss": 18.1022, + "step": 7286 + }, + { + "epoch": 0.13320050450582194, + "grad_norm": 9.380610587391823, + "learning_rate": 9.723394263761885e-06, + "loss": 18.6342, + "step": 7287 + }, + { + "epoch": 0.13321878370226845, + "grad_norm": 7.2669165607591655, + "learning_rate": 9.72329716466326e-06, + "loss": 17.7078, + "step": 7288 + }, + { + "epoch": 0.13323706289871498, + "grad_norm": 6.1314312177878065, + "learning_rate": 9.723200049009886e-06, + "loss": 17.2683, + "step": 7289 + }, + { + "epoch": 0.1332553420951615, + "grad_norm": 7.459467060270579, + "learning_rate": 9.723102916802108e-06, + "loss": 17.8149, + "step": 7290 + }, + { + "epoch": 0.13327362129160802, + "grad_norm": 5.240316852090448, + "learning_rate": 9.723005768040264e-06, + "loss": 16.8855, + "step": 7291 + }, + { + "epoch": 0.13329190048805453, + "grad_norm": 6.234999252504696, + "learning_rate": 9.722908602724693e-06, + "loss": 17.2466, + "step": 7292 + }, + { + "epoch": 0.13331017968450107, + "grad_norm": 7.8177911789367, + "learning_rate": 9.722811420855738e-06, + "loss": 18.1054, + "step": 7293 + }, + { + "epoch": 0.1333284588809476, + "grad_norm": 7.301076125871522, + "learning_rate": 9.722714222433738e-06, + "loss": 17.9058, + "step": 7294 + }, + { + "epoch": 0.1333467380773941, + "grad_norm": 7.592960509600471, + "learning_rate": 9.722617007459037e-06, + "loss": 18.154, + "step": 7295 + }, + { + "epoch": 0.13336501727384065, + "grad_norm": 6.44851950623422, + "learning_rate": 9.72251977593197e-06, + "loss": 17.6372, + "step": 7296 + }, + { + "epoch": 0.13338329647028715, + "grad_norm": 6.131893496868285, + "learning_rate": 9.722422527852883e-06, + "loss": 17.4133, + "step": 7297 + }, + { + "epoch": 0.1334015756667337, + "grad_norm": 7.4913639236958, + "learning_rate": 9.722325263222114e-06, + "loss": 17.7869, + "step": 7298 + }, + { + "epoch": 0.13341985486318023, + "grad_norm": 8.281805478945252, + "learning_rate": 9.722227982040004e-06, + "loss": 18.1848, + "step": 7299 + }, + { + "epoch": 0.13343813405962673, + "grad_norm": 8.013137361924885, + "learning_rate": 9.722130684306897e-06, + "loss": 18.0568, + "step": 7300 + }, + { + "epoch": 0.13345641325607327, + "grad_norm": 6.442051320790764, + "learning_rate": 9.722033370023129e-06, + "loss": 17.4183, + "step": 7301 + }, + { + "epoch": 0.13347469245251978, + "grad_norm": 7.030411239913883, + "learning_rate": 9.721936039189046e-06, + "loss": 17.4873, + "step": 7302 + }, + { + "epoch": 0.1334929716489663, + "grad_norm": 8.713878967086993, + "learning_rate": 9.721838691804986e-06, + "loss": 18.4822, + "step": 7303 + }, + { + "epoch": 0.13351125084541285, + "grad_norm": 7.3964301208580014, + "learning_rate": 9.721741327871291e-06, + "loss": 17.7409, + "step": 7304 + }, + { + "epoch": 0.13352953004185936, + "grad_norm": 6.409802596344187, + "learning_rate": 9.721643947388304e-06, + "loss": 17.4356, + "step": 7305 + }, + { + "epoch": 0.1335478092383059, + "grad_norm": 6.553199538093425, + "learning_rate": 9.721546550356362e-06, + "loss": 17.635, + "step": 7306 + }, + { + "epoch": 0.1335660884347524, + "grad_norm": 7.855535169751103, + "learning_rate": 9.721449136775811e-06, + "loss": 17.5149, + "step": 7307 + }, + { + "epoch": 0.13358436763119894, + "grad_norm": 7.926555715016358, + "learning_rate": 9.72135170664699e-06, + "loss": 17.9294, + "step": 7308 + }, + { + "epoch": 0.13360264682764544, + "grad_norm": 8.247210705388246, + "learning_rate": 9.721254259970241e-06, + "loss": 17.9499, + "step": 7309 + }, + { + "epoch": 0.13362092602409198, + "grad_norm": 6.916463039840437, + "learning_rate": 9.721156796745905e-06, + "loss": 17.5853, + "step": 7310 + }, + { + "epoch": 0.13363920522053852, + "grad_norm": 8.408292063266318, + "learning_rate": 9.721059316974324e-06, + "loss": 17.7235, + "step": 7311 + }, + { + "epoch": 0.13365748441698502, + "grad_norm": 6.352660543096414, + "learning_rate": 9.72096182065584e-06, + "loss": 17.5232, + "step": 7312 + }, + { + "epoch": 0.13367576361343156, + "grad_norm": 7.349600166175085, + "learning_rate": 9.720864307790796e-06, + "loss": 18.1619, + "step": 7313 + }, + { + "epoch": 0.13369404280987807, + "grad_norm": 7.874987001024496, + "learning_rate": 9.720766778379531e-06, + "loss": 18.2057, + "step": 7314 + }, + { + "epoch": 0.1337123220063246, + "grad_norm": 7.334889029040655, + "learning_rate": 9.720669232422388e-06, + "loss": 17.6721, + "step": 7315 + }, + { + "epoch": 0.13373060120277114, + "grad_norm": 6.819419836540446, + "learning_rate": 9.72057166991971e-06, + "loss": 17.7121, + "step": 7316 + }, + { + "epoch": 0.13374888039921765, + "grad_norm": 6.804240942531425, + "learning_rate": 9.720474090871836e-06, + "loss": 17.8763, + "step": 7317 + }, + { + "epoch": 0.13376715959566418, + "grad_norm": 5.970110473439147, + "learning_rate": 9.720376495279111e-06, + "loss": 17.3412, + "step": 7318 + }, + { + "epoch": 0.1337854387921107, + "grad_norm": 7.229349135900173, + "learning_rate": 9.720278883141876e-06, + "loss": 17.9148, + "step": 7319 + }, + { + "epoch": 0.13380371798855722, + "grad_norm": 8.226883619932043, + "learning_rate": 9.720181254460473e-06, + "loss": 18.4782, + "step": 7320 + }, + { + "epoch": 0.13382199718500376, + "grad_norm": 7.727062975450835, + "learning_rate": 9.720083609235244e-06, + "loss": 18.0103, + "step": 7321 + }, + { + "epoch": 0.13384027638145027, + "grad_norm": 7.473018375793038, + "learning_rate": 9.719985947466532e-06, + "loss": 17.8981, + "step": 7322 + }, + { + "epoch": 0.1338585555778968, + "grad_norm": 6.779991513233267, + "learning_rate": 9.719888269154679e-06, + "loss": 17.7578, + "step": 7323 + }, + { + "epoch": 0.1338768347743433, + "grad_norm": 8.484725122790097, + "learning_rate": 9.719790574300026e-06, + "loss": 18.3271, + "step": 7324 + }, + { + "epoch": 0.13389511397078985, + "grad_norm": 6.646616396806981, + "learning_rate": 9.719692862902919e-06, + "loss": 17.3037, + "step": 7325 + }, + { + "epoch": 0.13391339316723636, + "grad_norm": 7.235995780937685, + "learning_rate": 9.719595134963694e-06, + "loss": 17.7542, + "step": 7326 + }, + { + "epoch": 0.1339316723636829, + "grad_norm": 7.072527221787121, + "learning_rate": 9.719497390482701e-06, + "loss": 17.8879, + "step": 7327 + }, + { + "epoch": 0.13394995156012943, + "grad_norm": 6.8811664976307645, + "learning_rate": 9.71939962946028e-06, + "loss": 17.525, + "step": 7328 + }, + { + "epoch": 0.13396823075657593, + "grad_norm": 8.695005723193574, + "learning_rate": 9.71930185189677e-06, + "loss": 18.2196, + "step": 7329 + }, + { + "epoch": 0.13398650995302247, + "grad_norm": 6.1970851068351935, + "learning_rate": 9.719204057792517e-06, + "loss": 17.4303, + "step": 7330 + }, + { + "epoch": 0.13400478914946898, + "grad_norm": 7.213331679947561, + "learning_rate": 9.719106247147864e-06, + "loss": 17.6202, + "step": 7331 + }, + { + "epoch": 0.1340230683459155, + "grad_norm": 5.970498912276426, + "learning_rate": 9.719008419963153e-06, + "loss": 17.2458, + "step": 7332 + }, + { + "epoch": 0.13404134754236205, + "grad_norm": 7.097097701811893, + "learning_rate": 9.718910576238728e-06, + "loss": 17.7279, + "step": 7333 + }, + { + "epoch": 0.13405962673880856, + "grad_norm": 7.759600890157557, + "learning_rate": 9.71881271597493e-06, + "loss": 17.9428, + "step": 7334 + }, + { + "epoch": 0.1340779059352551, + "grad_norm": 6.609218161336705, + "learning_rate": 9.718714839172103e-06, + "loss": 17.6602, + "step": 7335 + }, + { + "epoch": 0.1340961851317016, + "grad_norm": 7.715512359303132, + "learning_rate": 9.71861694583059e-06, + "loss": 18.1546, + "step": 7336 + }, + { + "epoch": 0.13411446432814814, + "grad_norm": 7.113875413837293, + "learning_rate": 9.718519035950733e-06, + "loss": 17.6451, + "step": 7337 + }, + { + "epoch": 0.13413274352459467, + "grad_norm": 6.7710599191576515, + "learning_rate": 9.718421109532879e-06, + "loss": 17.474, + "step": 7338 + }, + { + "epoch": 0.13415102272104118, + "grad_norm": 7.337391508280774, + "learning_rate": 9.718323166577367e-06, + "loss": 17.5065, + "step": 7339 + }, + { + "epoch": 0.13416930191748772, + "grad_norm": 6.347988914894903, + "learning_rate": 9.718225207084539e-06, + "loss": 17.6424, + "step": 7340 + }, + { + "epoch": 0.13418758111393422, + "grad_norm": 7.442010441490582, + "learning_rate": 9.718127231054745e-06, + "loss": 18.2821, + "step": 7341 + }, + { + "epoch": 0.13420586031038076, + "grad_norm": 7.816204751329472, + "learning_rate": 9.718029238488322e-06, + "loss": 18.1054, + "step": 7342 + }, + { + "epoch": 0.13422413950682727, + "grad_norm": 6.817636326102619, + "learning_rate": 9.717931229385618e-06, + "loss": 17.7242, + "step": 7343 + }, + { + "epoch": 0.1342424187032738, + "grad_norm": 7.613804038438869, + "learning_rate": 9.717833203746974e-06, + "loss": 17.922, + "step": 7344 + }, + { + "epoch": 0.13426069789972034, + "grad_norm": 6.112081337830192, + "learning_rate": 9.717735161572732e-06, + "loss": 17.2962, + "step": 7345 + }, + { + "epoch": 0.13427897709616685, + "grad_norm": 6.61055652895237, + "learning_rate": 9.71763710286324e-06, + "loss": 17.5574, + "step": 7346 + }, + { + "epoch": 0.13429725629261338, + "grad_norm": 7.126469762062783, + "learning_rate": 9.717539027618837e-06, + "loss": 17.8764, + "step": 7347 + }, + { + "epoch": 0.1343155354890599, + "grad_norm": 6.825928999519179, + "learning_rate": 9.71744093583987e-06, + "loss": 17.5086, + "step": 7348 + }, + { + "epoch": 0.13433381468550643, + "grad_norm": 6.6660706796394, + "learning_rate": 9.717342827526684e-06, + "loss": 17.8061, + "step": 7349 + }, + { + "epoch": 0.13435209388195296, + "grad_norm": 9.300381817120178, + "learning_rate": 9.717244702679618e-06, + "loss": 17.9765, + "step": 7350 + }, + { + "epoch": 0.13437037307839947, + "grad_norm": 6.728624399804422, + "learning_rate": 9.71714656129902e-06, + "loss": 17.5401, + "step": 7351 + }, + { + "epoch": 0.134388652274846, + "grad_norm": 6.380027503238772, + "learning_rate": 9.717048403385231e-06, + "loss": 17.2511, + "step": 7352 + }, + { + "epoch": 0.1344069314712925, + "grad_norm": 7.939909625296418, + "learning_rate": 9.7169502289386e-06, + "loss": 18.3813, + "step": 7353 + }, + { + "epoch": 0.13442521066773905, + "grad_norm": 5.937995054653439, + "learning_rate": 9.716852037959465e-06, + "loss": 17.3262, + "step": 7354 + }, + { + "epoch": 0.13444348986418558, + "grad_norm": 6.869378397025138, + "learning_rate": 9.716753830448174e-06, + "loss": 17.6853, + "step": 7355 + }, + { + "epoch": 0.1344617690606321, + "grad_norm": 6.967266107514495, + "learning_rate": 9.71665560640507e-06, + "loss": 17.6118, + "step": 7356 + }, + { + "epoch": 0.13448004825707863, + "grad_norm": 6.354453718530233, + "learning_rate": 9.716557365830496e-06, + "loss": 17.3243, + "step": 7357 + }, + { + "epoch": 0.13449832745352513, + "grad_norm": 7.8805286975140545, + "learning_rate": 9.716459108724799e-06, + "loss": 18.221, + "step": 7358 + }, + { + "epoch": 0.13451660664997167, + "grad_norm": 7.128020043174866, + "learning_rate": 9.716360835088324e-06, + "loss": 17.9753, + "step": 7359 + }, + { + "epoch": 0.13453488584641818, + "grad_norm": 6.383961886304486, + "learning_rate": 9.716262544921411e-06, + "loss": 17.5263, + "step": 7360 + }, + { + "epoch": 0.13455316504286471, + "grad_norm": 7.271209331479864, + "learning_rate": 9.716164238224406e-06, + "loss": 18.0944, + "step": 7361 + }, + { + "epoch": 0.13457144423931125, + "grad_norm": 6.937910866916811, + "learning_rate": 9.716065914997657e-06, + "loss": 17.8308, + "step": 7362 + }, + { + "epoch": 0.13458972343575776, + "grad_norm": 6.288875662832325, + "learning_rate": 9.715967575241506e-06, + "loss": 17.503, + "step": 7363 + }, + { + "epoch": 0.1346080026322043, + "grad_norm": 7.058704016267175, + "learning_rate": 9.715869218956297e-06, + "loss": 17.6936, + "step": 7364 + }, + { + "epoch": 0.1346262818286508, + "grad_norm": 6.839820264143985, + "learning_rate": 9.715770846142376e-06, + "loss": 17.9178, + "step": 7365 + }, + { + "epoch": 0.13464456102509734, + "grad_norm": 11.633991662685553, + "learning_rate": 9.715672456800087e-06, + "loss": 19.238, + "step": 7366 + }, + { + "epoch": 0.13466284022154387, + "grad_norm": 7.89246996117323, + "learning_rate": 9.715574050929775e-06, + "loss": 18.4244, + "step": 7367 + }, + { + "epoch": 0.13468111941799038, + "grad_norm": 8.79725255643353, + "learning_rate": 9.715475628531785e-06, + "loss": 18.4606, + "step": 7368 + }, + { + "epoch": 0.13469939861443692, + "grad_norm": 7.29585189884504, + "learning_rate": 9.715377189606462e-06, + "loss": 17.928, + "step": 7369 + }, + { + "epoch": 0.13471767781088342, + "grad_norm": 7.865076076181476, + "learning_rate": 9.715278734154155e-06, + "loss": 17.7899, + "step": 7370 + }, + { + "epoch": 0.13473595700732996, + "grad_norm": 6.006823076315517, + "learning_rate": 9.715180262175202e-06, + "loss": 17.1828, + "step": 7371 + }, + { + "epoch": 0.1347542362037765, + "grad_norm": 6.490649690145237, + "learning_rate": 9.715081773669949e-06, + "loss": 17.432, + "step": 7372 + }, + { + "epoch": 0.134772515400223, + "grad_norm": 8.093132151465447, + "learning_rate": 9.714983268638747e-06, + "loss": 18.1537, + "step": 7373 + }, + { + "epoch": 0.13479079459666954, + "grad_norm": 7.385779412925701, + "learning_rate": 9.714884747081937e-06, + "loss": 18.2123, + "step": 7374 + }, + { + "epoch": 0.13480907379311605, + "grad_norm": 6.923081296403451, + "learning_rate": 9.714786208999864e-06, + "loss": 17.7324, + "step": 7375 + }, + { + "epoch": 0.13482735298956258, + "grad_norm": 8.347399195705641, + "learning_rate": 9.714687654392876e-06, + "loss": 18.5434, + "step": 7376 + }, + { + "epoch": 0.1348456321860091, + "grad_norm": 6.52387836191315, + "learning_rate": 9.714589083261316e-06, + "loss": 17.4424, + "step": 7377 + }, + { + "epoch": 0.13486391138245563, + "grad_norm": 7.227908509359091, + "learning_rate": 9.714490495605531e-06, + "loss": 17.6268, + "step": 7378 + }, + { + "epoch": 0.13488219057890216, + "grad_norm": 7.676719661969092, + "learning_rate": 9.714391891425866e-06, + "loss": 17.5231, + "step": 7379 + }, + { + "epoch": 0.13490046977534867, + "grad_norm": 7.507731889119987, + "learning_rate": 9.714293270722665e-06, + "loss": 17.7889, + "step": 7380 + }, + { + "epoch": 0.1349187489717952, + "grad_norm": 7.489507476864272, + "learning_rate": 9.714194633496276e-06, + "loss": 17.8013, + "step": 7381 + }, + { + "epoch": 0.1349370281682417, + "grad_norm": 7.1894071929284715, + "learning_rate": 9.714095979747044e-06, + "loss": 17.9802, + "step": 7382 + }, + { + "epoch": 0.13495530736468825, + "grad_norm": 11.852791486406433, + "learning_rate": 9.713997309475316e-06, + "loss": 17.1034, + "step": 7383 + }, + { + "epoch": 0.13497358656113478, + "grad_norm": 8.161722880080935, + "learning_rate": 9.713898622681436e-06, + "loss": 18.4728, + "step": 7384 + }, + { + "epoch": 0.1349918657575813, + "grad_norm": 7.143327922896236, + "learning_rate": 9.71379991936575e-06, + "loss": 17.7502, + "step": 7385 + }, + { + "epoch": 0.13501014495402783, + "grad_norm": 5.735791390923804, + "learning_rate": 9.713701199528602e-06, + "loss": 17.2922, + "step": 7386 + }, + { + "epoch": 0.13502842415047434, + "grad_norm": 6.952909023256605, + "learning_rate": 9.713602463170345e-06, + "loss": 17.6708, + "step": 7387 + }, + { + "epoch": 0.13504670334692087, + "grad_norm": 7.659718136904871, + "learning_rate": 9.713503710291317e-06, + "loss": 17.9768, + "step": 7388 + }, + { + "epoch": 0.1350649825433674, + "grad_norm": 7.496735274558631, + "learning_rate": 9.713404940891867e-06, + "loss": 17.7919, + "step": 7389 + }, + { + "epoch": 0.13508326173981391, + "grad_norm": 7.251590006579304, + "learning_rate": 9.713306154972344e-06, + "loss": 17.8879, + "step": 7390 + }, + { + "epoch": 0.13510154093626045, + "grad_norm": 8.674042320462984, + "learning_rate": 9.71320735253309e-06, + "loss": 18.4063, + "step": 7391 + }, + { + "epoch": 0.13511982013270696, + "grad_norm": 8.016817300470889, + "learning_rate": 9.713108533574455e-06, + "loss": 18.3258, + "step": 7392 + }, + { + "epoch": 0.1351380993291535, + "grad_norm": 8.244659610467385, + "learning_rate": 9.713009698096782e-06, + "loss": 18.2969, + "step": 7393 + }, + { + "epoch": 0.1351563785256, + "grad_norm": 6.014535679292738, + "learning_rate": 9.71291084610042e-06, + "loss": 17.2641, + "step": 7394 + }, + { + "epoch": 0.13517465772204654, + "grad_norm": 6.153890770683312, + "learning_rate": 9.712811977585715e-06, + "loss": 17.3391, + "step": 7395 + }, + { + "epoch": 0.13519293691849307, + "grad_norm": 7.288188074912993, + "learning_rate": 9.712713092553012e-06, + "loss": 18.0131, + "step": 7396 + }, + { + "epoch": 0.13521121611493958, + "grad_norm": 6.553780700162622, + "learning_rate": 9.712614191002657e-06, + "loss": 17.7899, + "step": 7397 + }, + { + "epoch": 0.13522949531138612, + "grad_norm": 6.867287067987206, + "learning_rate": 9.712515272935e-06, + "loss": 17.8165, + "step": 7398 + }, + { + "epoch": 0.13524777450783262, + "grad_norm": 6.399640444184357, + "learning_rate": 9.712416338350386e-06, + "loss": 17.4553, + "step": 7399 + }, + { + "epoch": 0.13526605370427916, + "grad_norm": 8.089816224610908, + "learning_rate": 9.712317387249162e-06, + "loss": 17.7951, + "step": 7400 + }, + { + "epoch": 0.1352843329007257, + "grad_norm": 7.140421054078072, + "learning_rate": 9.712218419631673e-06, + "loss": 17.9161, + "step": 7401 + }, + { + "epoch": 0.1353026120971722, + "grad_norm": 6.5962874189656855, + "learning_rate": 9.712119435498268e-06, + "loss": 17.5488, + "step": 7402 + }, + { + "epoch": 0.13532089129361874, + "grad_norm": 6.718803772161681, + "learning_rate": 9.712020434849294e-06, + "loss": 17.8015, + "step": 7403 + }, + { + "epoch": 0.13533917049006525, + "grad_norm": 7.2415029216503575, + "learning_rate": 9.711921417685097e-06, + "loss": 18.0733, + "step": 7404 + }, + { + "epoch": 0.13535744968651178, + "grad_norm": 7.816229348832906, + "learning_rate": 9.711822384006025e-06, + "loss": 17.9967, + "step": 7405 + }, + { + "epoch": 0.13537572888295832, + "grad_norm": 7.116408758357093, + "learning_rate": 9.711723333812422e-06, + "loss": 17.8816, + "step": 7406 + }, + { + "epoch": 0.13539400807940483, + "grad_norm": 5.932070533501284, + "learning_rate": 9.71162426710464e-06, + "loss": 17.273, + "step": 7407 + }, + { + "epoch": 0.13541228727585136, + "grad_norm": 6.6459074293993, + "learning_rate": 9.711525183883021e-06, + "loss": 17.7613, + "step": 7408 + }, + { + "epoch": 0.13543056647229787, + "grad_norm": 6.422762667907978, + "learning_rate": 9.711426084147918e-06, + "loss": 17.4116, + "step": 7409 + }, + { + "epoch": 0.1354488456687444, + "grad_norm": 6.265882057739489, + "learning_rate": 9.711326967899674e-06, + "loss": 17.65, + "step": 7410 + }, + { + "epoch": 0.1354671248651909, + "grad_norm": 8.118062402360124, + "learning_rate": 9.71122783513864e-06, + "loss": 18.3274, + "step": 7411 + }, + { + "epoch": 0.13548540406163745, + "grad_norm": 6.59997902432031, + "learning_rate": 9.711128685865158e-06, + "loss": 17.4206, + "step": 7412 + }, + { + "epoch": 0.13550368325808398, + "grad_norm": 6.698676748752259, + "learning_rate": 9.71102952007958e-06, + "loss": 17.6831, + "step": 7413 + }, + { + "epoch": 0.1355219624545305, + "grad_norm": 7.462208115105021, + "learning_rate": 9.710930337782254e-06, + "loss": 17.9643, + "step": 7414 + }, + { + "epoch": 0.13554024165097703, + "grad_norm": 6.881641144148816, + "learning_rate": 9.710831138973524e-06, + "loss": 17.6377, + "step": 7415 + }, + { + "epoch": 0.13555852084742354, + "grad_norm": 6.74850301229193, + "learning_rate": 9.71073192365374e-06, + "loss": 17.9104, + "step": 7416 + }, + { + "epoch": 0.13557680004387007, + "grad_norm": 7.372550253170825, + "learning_rate": 9.710632691823249e-06, + "loss": 17.9916, + "step": 7417 + }, + { + "epoch": 0.1355950792403166, + "grad_norm": 8.937901853678206, + "learning_rate": 9.710533443482399e-06, + "loss": 18.3555, + "step": 7418 + }, + { + "epoch": 0.13561335843676312, + "grad_norm": 6.817766288220093, + "learning_rate": 9.71043417863154e-06, + "loss": 17.8772, + "step": 7419 + }, + { + "epoch": 0.13563163763320965, + "grad_norm": 7.757249117581919, + "learning_rate": 9.710334897271016e-06, + "loss": 18.1575, + "step": 7420 + }, + { + "epoch": 0.13564991682965616, + "grad_norm": 6.063796383370273, + "learning_rate": 9.71023559940118e-06, + "loss": 17.3542, + "step": 7421 + }, + { + "epoch": 0.1356681960261027, + "grad_norm": 7.686602160021942, + "learning_rate": 9.710136285022374e-06, + "loss": 18.175, + "step": 7422 + }, + { + "epoch": 0.13568647522254923, + "grad_norm": 8.243732315581704, + "learning_rate": 9.710036954134948e-06, + "loss": 18.5862, + "step": 7423 + }, + { + "epoch": 0.13570475441899574, + "grad_norm": 7.6194402466320055, + "learning_rate": 9.709937606739252e-06, + "loss": 18.1703, + "step": 7424 + }, + { + "epoch": 0.13572303361544227, + "grad_norm": 6.469208520653512, + "learning_rate": 9.709838242835635e-06, + "loss": 17.5793, + "step": 7425 + }, + { + "epoch": 0.13574131281188878, + "grad_norm": 6.219589586439911, + "learning_rate": 9.709738862424442e-06, + "loss": 17.587, + "step": 7426 + }, + { + "epoch": 0.13575959200833532, + "grad_norm": 8.22034380512448, + "learning_rate": 9.709639465506026e-06, + "loss": 17.7433, + "step": 7427 + }, + { + "epoch": 0.13577787120478182, + "grad_norm": 6.614378456818435, + "learning_rate": 9.70954005208073e-06, + "loss": 17.4989, + "step": 7428 + }, + { + "epoch": 0.13579615040122836, + "grad_norm": 6.906101381445755, + "learning_rate": 9.709440622148905e-06, + "loss": 17.7696, + "step": 7429 + }, + { + "epoch": 0.1358144295976749, + "grad_norm": 6.914231304372284, + "learning_rate": 9.709341175710899e-06, + "loss": 17.7633, + "step": 7430 + }, + { + "epoch": 0.1358327087941214, + "grad_norm": 7.924595493657493, + "learning_rate": 9.709241712767062e-06, + "loss": 18.3542, + "step": 7431 + }, + { + "epoch": 0.13585098799056794, + "grad_norm": 7.733756354225041, + "learning_rate": 9.709142233317739e-06, + "loss": 17.9405, + "step": 7432 + }, + { + "epoch": 0.13586926718701445, + "grad_norm": 8.678824703147397, + "learning_rate": 9.709042737363283e-06, + "loss": 18.6903, + "step": 7433 + }, + { + "epoch": 0.13588754638346098, + "grad_norm": 8.161165981960721, + "learning_rate": 9.708943224904041e-06, + "loss": 18.1929, + "step": 7434 + }, + { + "epoch": 0.13590582557990752, + "grad_norm": 5.921200411436585, + "learning_rate": 9.70884369594036e-06, + "loss": 17.2303, + "step": 7435 + }, + { + "epoch": 0.13592410477635403, + "grad_norm": 7.3493418588230375, + "learning_rate": 9.708744150472594e-06, + "loss": 17.6725, + "step": 7436 + }, + { + "epoch": 0.13594238397280056, + "grad_norm": 8.713112108397668, + "learning_rate": 9.708644588501084e-06, + "loss": 18.2403, + "step": 7437 + }, + { + "epoch": 0.13596066316924707, + "grad_norm": 8.260071257944007, + "learning_rate": 9.708545010026187e-06, + "loss": 18.6511, + "step": 7438 + }, + { + "epoch": 0.1359789423656936, + "grad_norm": 7.311836953155976, + "learning_rate": 9.708445415048245e-06, + "loss": 17.7767, + "step": 7439 + }, + { + "epoch": 0.13599722156214014, + "grad_norm": 8.075841208882839, + "learning_rate": 9.708345803567612e-06, + "loss": 18.1745, + "step": 7440 + }, + { + "epoch": 0.13601550075858665, + "grad_norm": 6.461272470575603, + "learning_rate": 9.708246175584637e-06, + "loss": 17.6392, + "step": 7441 + }, + { + "epoch": 0.13603377995503318, + "grad_norm": 6.3777355538496545, + "learning_rate": 9.708146531099665e-06, + "loss": 17.4888, + "step": 7442 + }, + { + "epoch": 0.1360520591514797, + "grad_norm": 6.305521191061348, + "learning_rate": 9.70804687011305e-06, + "loss": 17.4775, + "step": 7443 + }, + { + "epoch": 0.13607033834792623, + "grad_norm": 9.468671047870306, + "learning_rate": 9.707947192625137e-06, + "loss": 18.7227, + "step": 7444 + }, + { + "epoch": 0.13608861754437274, + "grad_norm": 7.909827586776542, + "learning_rate": 9.70784749863628e-06, + "loss": 18.0857, + "step": 7445 + }, + { + "epoch": 0.13610689674081927, + "grad_norm": 6.3380149814229085, + "learning_rate": 9.707747788146826e-06, + "loss": 17.3367, + "step": 7446 + }, + { + "epoch": 0.1361251759372658, + "grad_norm": 7.207068106258375, + "learning_rate": 9.707648061157124e-06, + "loss": 18.0903, + "step": 7447 + }, + { + "epoch": 0.13614345513371232, + "grad_norm": 9.115628758208079, + "learning_rate": 9.707548317667523e-06, + "loss": 18.3233, + "step": 7448 + }, + { + "epoch": 0.13616173433015885, + "grad_norm": 7.762250566589427, + "learning_rate": 9.707448557678374e-06, + "loss": 17.9831, + "step": 7449 + }, + { + "epoch": 0.13618001352660536, + "grad_norm": 7.74005275425354, + "learning_rate": 9.707348781190028e-06, + "loss": 18.019, + "step": 7450 + }, + { + "epoch": 0.1361982927230519, + "grad_norm": 6.637406948041515, + "learning_rate": 9.707248988202832e-06, + "loss": 17.4043, + "step": 7451 + }, + { + "epoch": 0.13621657191949843, + "grad_norm": 6.835654410013408, + "learning_rate": 9.707149178717136e-06, + "loss": 17.9024, + "step": 7452 + }, + { + "epoch": 0.13623485111594494, + "grad_norm": 6.473983229279323, + "learning_rate": 9.70704935273329e-06, + "loss": 17.3907, + "step": 7453 + }, + { + "epoch": 0.13625313031239147, + "grad_norm": 7.516646582083344, + "learning_rate": 9.706949510251647e-06, + "loss": 18.1707, + "step": 7454 + }, + { + "epoch": 0.13627140950883798, + "grad_norm": 9.462754966995899, + "learning_rate": 9.706849651272551e-06, + "loss": 18.5969, + "step": 7455 + }, + { + "epoch": 0.13628968870528452, + "grad_norm": 7.452679335354314, + "learning_rate": 9.706749775796359e-06, + "loss": 18.1997, + "step": 7456 + }, + { + "epoch": 0.13630796790173105, + "grad_norm": 8.791480284434773, + "learning_rate": 9.706649883823415e-06, + "loss": 18.5841, + "step": 7457 + }, + { + "epoch": 0.13632624709817756, + "grad_norm": 7.897907052793343, + "learning_rate": 9.706549975354073e-06, + "loss": 18.1403, + "step": 7458 + }, + { + "epoch": 0.1363445262946241, + "grad_norm": 8.894721361673117, + "learning_rate": 9.70645005038868e-06, + "loss": 18.3676, + "step": 7459 + }, + { + "epoch": 0.1363628054910706, + "grad_norm": 6.520883671534357, + "learning_rate": 9.70635010892759e-06, + "loss": 17.631, + "step": 7460 + }, + { + "epoch": 0.13638108468751714, + "grad_norm": 7.071421314080508, + "learning_rate": 9.70625015097115e-06, + "loss": 17.8233, + "step": 7461 + }, + { + "epoch": 0.13639936388396365, + "grad_norm": 5.6300301550366925, + "learning_rate": 9.706150176519713e-06, + "loss": 17.1715, + "step": 7462 + }, + { + "epoch": 0.13641764308041018, + "grad_norm": 7.852800304353618, + "learning_rate": 9.706050185573626e-06, + "loss": 17.9435, + "step": 7463 + }, + { + "epoch": 0.13643592227685672, + "grad_norm": 6.699257770943432, + "learning_rate": 9.705950178133243e-06, + "loss": 17.821, + "step": 7464 + }, + { + "epoch": 0.13645420147330323, + "grad_norm": 6.588299143131586, + "learning_rate": 9.705850154198912e-06, + "loss": 17.5653, + "step": 7465 + }, + { + "epoch": 0.13647248066974976, + "grad_norm": 6.66726651315699, + "learning_rate": 9.705750113770986e-06, + "loss": 17.6059, + "step": 7466 + }, + { + "epoch": 0.13649075986619627, + "grad_norm": 6.233600401310327, + "learning_rate": 9.705650056849813e-06, + "loss": 17.589, + "step": 7467 + }, + { + "epoch": 0.1365090390626428, + "grad_norm": 6.968923947025399, + "learning_rate": 9.705549983435744e-06, + "loss": 17.9795, + "step": 7468 + }, + { + "epoch": 0.13652731825908934, + "grad_norm": 7.938093167827697, + "learning_rate": 9.705449893529133e-06, + "loss": 17.9217, + "step": 7469 + }, + { + "epoch": 0.13654559745553585, + "grad_norm": 6.998871223151411, + "learning_rate": 9.705349787130327e-06, + "loss": 17.6893, + "step": 7470 + }, + { + "epoch": 0.13656387665198239, + "grad_norm": 7.820470612951028, + "learning_rate": 9.70524966423968e-06, + "loss": 18.362, + "step": 7471 + }, + { + "epoch": 0.1365821558484289, + "grad_norm": 7.775147417972436, + "learning_rate": 9.705149524857539e-06, + "loss": 18.1892, + "step": 7472 + }, + { + "epoch": 0.13660043504487543, + "grad_norm": 6.673182990397212, + "learning_rate": 9.705049368984259e-06, + "loss": 17.817, + "step": 7473 + }, + { + "epoch": 0.13661871424132196, + "grad_norm": 5.406971223600896, + "learning_rate": 9.704949196620188e-06, + "loss": 17.0513, + "step": 7474 + }, + { + "epoch": 0.13663699343776847, + "grad_norm": 6.563913203413318, + "learning_rate": 9.704849007765677e-06, + "loss": 17.3962, + "step": 7475 + }, + { + "epoch": 0.136655272634215, + "grad_norm": 7.338919945878583, + "learning_rate": 9.70474880242108e-06, + "loss": 17.6328, + "step": 7476 + }, + { + "epoch": 0.13667355183066152, + "grad_norm": 7.261563972332712, + "learning_rate": 9.704648580586748e-06, + "loss": 17.7875, + "step": 7477 + }, + { + "epoch": 0.13669183102710805, + "grad_norm": 8.396518761867243, + "learning_rate": 9.704548342263029e-06, + "loss": 18.0135, + "step": 7478 + }, + { + "epoch": 0.13671011022355456, + "grad_norm": 6.422952971004934, + "learning_rate": 9.704448087450278e-06, + "loss": 17.7736, + "step": 7479 + }, + { + "epoch": 0.1367283894200011, + "grad_norm": 6.094479934565342, + "learning_rate": 9.704347816148842e-06, + "loss": 17.4413, + "step": 7480 + }, + { + "epoch": 0.13674666861644763, + "grad_norm": 8.832583222945484, + "learning_rate": 9.704247528359079e-06, + "loss": 18.0457, + "step": 7481 + }, + { + "epoch": 0.13676494781289414, + "grad_norm": 6.953333687060656, + "learning_rate": 9.704147224081332e-06, + "loss": 17.827, + "step": 7482 + }, + { + "epoch": 0.13678322700934067, + "grad_norm": 6.900389147758031, + "learning_rate": 9.70404690331596e-06, + "loss": 17.7573, + "step": 7483 + }, + { + "epoch": 0.13680150620578718, + "grad_norm": 6.581673943697085, + "learning_rate": 9.703946566063314e-06, + "loss": 17.6126, + "step": 7484 + }, + { + "epoch": 0.13681978540223372, + "grad_norm": 6.340699010437135, + "learning_rate": 9.703846212323739e-06, + "loss": 17.4173, + "step": 7485 + }, + { + "epoch": 0.13683806459868025, + "grad_norm": 6.756441239325302, + "learning_rate": 9.703745842097594e-06, + "loss": 17.5447, + "step": 7486 + }, + { + "epoch": 0.13685634379512676, + "grad_norm": 7.340464170563029, + "learning_rate": 9.703645455385227e-06, + "loss": 18.3228, + "step": 7487 + }, + { + "epoch": 0.1368746229915733, + "grad_norm": 6.415627907213983, + "learning_rate": 9.703545052186992e-06, + "loss": 17.6938, + "step": 7488 + }, + { + "epoch": 0.1368929021880198, + "grad_norm": 7.493575468964602, + "learning_rate": 9.703444632503239e-06, + "loss": 17.7694, + "step": 7489 + }, + { + "epoch": 0.13691118138446634, + "grad_norm": 5.726018751368653, + "learning_rate": 9.703344196334319e-06, + "loss": 17.1624, + "step": 7490 + }, + { + "epoch": 0.13692946058091288, + "grad_norm": 6.382580616173418, + "learning_rate": 9.703243743680589e-06, + "loss": 17.5575, + "step": 7491 + }, + { + "epoch": 0.13694773977735938, + "grad_norm": 8.02022727836731, + "learning_rate": 9.703143274542395e-06, + "loss": 18.0148, + "step": 7492 + }, + { + "epoch": 0.13696601897380592, + "grad_norm": 7.796984691625947, + "learning_rate": 9.703042788920094e-06, + "loss": 18.2498, + "step": 7493 + }, + { + "epoch": 0.13698429817025243, + "grad_norm": 6.847058154079785, + "learning_rate": 9.702942286814034e-06, + "loss": 17.5306, + "step": 7494 + }, + { + "epoch": 0.13700257736669896, + "grad_norm": 6.693193351252166, + "learning_rate": 9.70284176822457e-06, + "loss": 17.6616, + "step": 7495 + }, + { + "epoch": 0.13702085656314547, + "grad_norm": 6.468462283579735, + "learning_rate": 9.702741233152055e-06, + "loss": 17.6046, + "step": 7496 + }, + { + "epoch": 0.137039135759592, + "grad_norm": 8.447953586046685, + "learning_rate": 9.702640681596839e-06, + "loss": 18.14, + "step": 7497 + }, + { + "epoch": 0.13705741495603854, + "grad_norm": 6.7410512609657784, + "learning_rate": 9.702540113559276e-06, + "loss": 17.4294, + "step": 7498 + }, + { + "epoch": 0.13707569415248505, + "grad_norm": 7.260428605576361, + "learning_rate": 9.702439529039718e-06, + "loss": 18.0614, + "step": 7499 + }, + { + "epoch": 0.13709397334893159, + "grad_norm": 7.358399236749688, + "learning_rate": 9.702338928038517e-06, + "loss": 18.1374, + "step": 7500 + }, + { + "epoch": 0.1371122525453781, + "grad_norm": 6.405876792649341, + "learning_rate": 9.702238310556027e-06, + "loss": 17.6907, + "step": 7501 + }, + { + "epoch": 0.13713053174182463, + "grad_norm": 7.4062837846026985, + "learning_rate": 9.702137676592598e-06, + "loss": 17.8288, + "step": 7502 + }, + { + "epoch": 0.13714881093827117, + "grad_norm": 6.830382935003411, + "learning_rate": 9.702037026148586e-06, + "loss": 17.947, + "step": 7503 + }, + { + "epoch": 0.13716709013471767, + "grad_norm": 7.770829752486227, + "learning_rate": 9.701936359224341e-06, + "loss": 18.0785, + "step": 7504 + }, + { + "epoch": 0.1371853693311642, + "grad_norm": 7.660208673592639, + "learning_rate": 9.701835675820218e-06, + "loss": 18.3223, + "step": 7505 + }, + { + "epoch": 0.13720364852761072, + "grad_norm": 7.583835662282823, + "learning_rate": 9.701734975936568e-06, + "loss": 18.0849, + "step": 7506 + }, + { + "epoch": 0.13722192772405725, + "grad_norm": 6.956978730707371, + "learning_rate": 9.701634259573747e-06, + "loss": 17.9131, + "step": 7507 + }, + { + "epoch": 0.1372402069205038, + "grad_norm": 7.649369274987982, + "learning_rate": 9.701533526732104e-06, + "loss": 17.9172, + "step": 7508 + }, + { + "epoch": 0.1372584861169503, + "grad_norm": 6.578214212991516, + "learning_rate": 9.701432777411995e-06, + "loss": 17.5643, + "step": 7509 + }, + { + "epoch": 0.13727676531339683, + "grad_norm": 5.534817711350081, + "learning_rate": 9.701332011613771e-06, + "loss": 17.0359, + "step": 7510 + }, + { + "epoch": 0.13729504450984334, + "grad_norm": 6.667518505261539, + "learning_rate": 9.701231229337788e-06, + "loss": 17.6601, + "step": 7511 + }, + { + "epoch": 0.13731332370628987, + "grad_norm": 8.121299665199137, + "learning_rate": 9.701130430584396e-06, + "loss": 17.7347, + "step": 7512 + }, + { + "epoch": 0.13733160290273638, + "grad_norm": 7.05480122411721, + "learning_rate": 9.701029615353949e-06, + "loss": 17.9691, + "step": 7513 + }, + { + "epoch": 0.13734988209918292, + "grad_norm": 6.894109265636699, + "learning_rate": 9.700928783646804e-06, + "loss": 17.7868, + "step": 7514 + }, + { + "epoch": 0.13736816129562945, + "grad_norm": 5.633288824085069, + "learning_rate": 9.70082793546331e-06, + "loss": 17.3854, + "step": 7515 + }, + { + "epoch": 0.13738644049207596, + "grad_norm": 6.582019190776378, + "learning_rate": 9.700727070803822e-06, + "loss": 17.5675, + "step": 7516 + }, + { + "epoch": 0.1374047196885225, + "grad_norm": 7.911740293895738, + "learning_rate": 9.700626189668694e-06, + "loss": 17.9536, + "step": 7517 + }, + { + "epoch": 0.137422998884969, + "grad_norm": 7.304427641569884, + "learning_rate": 9.700525292058278e-06, + "loss": 18.1302, + "step": 7518 + }, + { + "epoch": 0.13744127808141554, + "grad_norm": 6.930641441163955, + "learning_rate": 9.700424377972928e-06, + "loss": 17.8615, + "step": 7519 + }, + { + "epoch": 0.13745955727786208, + "grad_norm": 7.1536806801317825, + "learning_rate": 9.700323447413e-06, + "loss": 18.0929, + "step": 7520 + }, + { + "epoch": 0.13747783647430858, + "grad_norm": 6.965101634971381, + "learning_rate": 9.700222500378846e-06, + "loss": 17.8401, + "step": 7521 + }, + { + "epoch": 0.13749611567075512, + "grad_norm": 8.294417327476186, + "learning_rate": 9.700121536870822e-06, + "loss": 18.6985, + "step": 7522 + }, + { + "epoch": 0.13751439486720163, + "grad_norm": 6.73186924609752, + "learning_rate": 9.700020556889275e-06, + "loss": 17.4993, + "step": 7523 + }, + { + "epoch": 0.13753267406364816, + "grad_norm": 6.773047454341339, + "learning_rate": 9.699919560434568e-06, + "loss": 17.4125, + "step": 7524 + }, + { + "epoch": 0.1375509532600947, + "grad_norm": 7.670128620195902, + "learning_rate": 9.69981854750705e-06, + "loss": 18.2021, + "step": 7525 + }, + { + "epoch": 0.1375692324565412, + "grad_norm": 6.6562117075155856, + "learning_rate": 9.699717518107075e-06, + "loss": 17.542, + "step": 7526 + }, + { + "epoch": 0.13758751165298774, + "grad_norm": 7.134487262285221, + "learning_rate": 9.699616472234998e-06, + "loss": 17.796, + "step": 7527 + }, + { + "epoch": 0.13760579084943425, + "grad_norm": 8.67954375693659, + "learning_rate": 9.699515409891173e-06, + "loss": 19.0484, + "step": 7528 + }, + { + "epoch": 0.1376240700458808, + "grad_norm": 6.161607217740553, + "learning_rate": 9.699414331075955e-06, + "loss": 17.453, + "step": 7529 + }, + { + "epoch": 0.1376423492423273, + "grad_norm": 6.842734209054109, + "learning_rate": 9.699313235789698e-06, + "loss": 17.7173, + "step": 7530 + }, + { + "epoch": 0.13766062843877383, + "grad_norm": 6.833805176580314, + "learning_rate": 9.699212124032754e-06, + "loss": 17.4478, + "step": 7531 + }, + { + "epoch": 0.13767890763522037, + "grad_norm": 7.496615619801857, + "learning_rate": 9.699110995805481e-06, + "loss": 18.0135, + "step": 7532 + }, + { + "epoch": 0.13769718683166687, + "grad_norm": 7.622864562921994, + "learning_rate": 9.69900985110823e-06, + "loss": 18.4149, + "step": 7533 + }, + { + "epoch": 0.1377154660281134, + "grad_norm": 7.754625756622168, + "learning_rate": 9.698908689941358e-06, + "loss": 18.1251, + "step": 7534 + }, + { + "epoch": 0.13773374522455992, + "grad_norm": 6.3318055787919, + "learning_rate": 9.69880751230522e-06, + "loss": 17.5096, + "step": 7535 + }, + { + "epoch": 0.13775202442100645, + "grad_norm": 6.6294924082068, + "learning_rate": 9.698706318200169e-06, + "loss": 17.6423, + "step": 7536 + }, + { + "epoch": 0.137770303617453, + "grad_norm": 6.916513568451425, + "learning_rate": 9.698605107626559e-06, + "loss": 17.878, + "step": 7537 + }, + { + "epoch": 0.1377885828138995, + "grad_norm": 6.199471801380165, + "learning_rate": 9.698503880584746e-06, + "loss": 17.4741, + "step": 7538 + }, + { + "epoch": 0.13780686201034603, + "grad_norm": 7.6927948140504325, + "learning_rate": 9.698402637075085e-06, + "loss": 18.1918, + "step": 7539 + }, + { + "epoch": 0.13782514120679254, + "grad_norm": 7.90499299859101, + "learning_rate": 9.698301377097929e-06, + "loss": 17.8412, + "step": 7540 + }, + { + "epoch": 0.13784342040323908, + "grad_norm": 6.846420909602271, + "learning_rate": 9.698200100653636e-06, + "loss": 17.5772, + "step": 7541 + }, + { + "epoch": 0.1378616995996856, + "grad_norm": 6.531231633745436, + "learning_rate": 9.698098807742559e-06, + "loss": 17.613, + "step": 7542 + }, + { + "epoch": 0.13787997879613212, + "grad_norm": 6.329550184621328, + "learning_rate": 9.697997498365054e-06, + "loss": 17.4516, + "step": 7543 + }, + { + "epoch": 0.13789825799257865, + "grad_norm": 6.818289281580703, + "learning_rate": 9.697896172521475e-06, + "loss": 17.9329, + "step": 7544 + }, + { + "epoch": 0.13791653718902516, + "grad_norm": 6.314645548487325, + "learning_rate": 9.697794830212178e-06, + "loss": 17.4605, + "step": 7545 + }, + { + "epoch": 0.1379348163854717, + "grad_norm": 5.9485092370941395, + "learning_rate": 9.697693471437516e-06, + "loss": 17.2601, + "step": 7546 + }, + { + "epoch": 0.1379530955819182, + "grad_norm": 6.979378822346856, + "learning_rate": 9.697592096197849e-06, + "loss": 17.9736, + "step": 7547 + }, + { + "epoch": 0.13797137477836474, + "grad_norm": 6.219796983006217, + "learning_rate": 9.697490704493527e-06, + "loss": 17.4588, + "step": 7548 + }, + { + "epoch": 0.13798965397481128, + "grad_norm": 7.0799748577732355, + "learning_rate": 9.697389296324908e-06, + "loss": 18.1643, + "step": 7549 + }, + { + "epoch": 0.13800793317125779, + "grad_norm": 7.552326328445858, + "learning_rate": 9.697287871692349e-06, + "loss": 17.6755, + "step": 7550 + }, + { + "epoch": 0.13802621236770432, + "grad_norm": 6.084133107890047, + "learning_rate": 9.697186430596201e-06, + "loss": 17.4025, + "step": 7551 + }, + { + "epoch": 0.13804449156415083, + "grad_norm": 7.4388834971540225, + "learning_rate": 9.697084973036823e-06, + "loss": 17.9366, + "step": 7552 + }, + { + "epoch": 0.13806277076059736, + "grad_norm": 5.488343762675065, + "learning_rate": 9.696983499014572e-06, + "loss": 17.1962, + "step": 7553 + }, + { + "epoch": 0.1380810499570439, + "grad_norm": 6.007124088766023, + "learning_rate": 9.696882008529797e-06, + "loss": 17.4313, + "step": 7554 + }, + { + "epoch": 0.1380993291534904, + "grad_norm": 8.488478544872322, + "learning_rate": 9.696780501582862e-06, + "loss": 18.1383, + "step": 7555 + }, + { + "epoch": 0.13811760834993694, + "grad_norm": 5.140191295738998, + "learning_rate": 9.696678978174118e-06, + "loss": 17.0095, + "step": 7556 + }, + { + "epoch": 0.13813588754638345, + "grad_norm": 7.613492131171581, + "learning_rate": 9.69657743830392e-06, + "loss": 18.0298, + "step": 7557 + }, + { + "epoch": 0.13815416674283, + "grad_norm": 9.479618418224536, + "learning_rate": 9.696475881972627e-06, + "loss": 18.2041, + "step": 7558 + }, + { + "epoch": 0.13817244593927652, + "grad_norm": 7.058720172453753, + "learning_rate": 9.696374309180593e-06, + "loss": 17.9852, + "step": 7559 + }, + { + "epoch": 0.13819072513572303, + "grad_norm": 6.269282928623162, + "learning_rate": 9.696272719928177e-06, + "loss": 17.4109, + "step": 7560 + }, + { + "epoch": 0.13820900433216957, + "grad_norm": 7.357859970941546, + "learning_rate": 9.69617111421573e-06, + "loss": 18.0383, + "step": 7561 + }, + { + "epoch": 0.13822728352861607, + "grad_norm": 7.68302082306943, + "learning_rate": 9.696069492043611e-06, + "loss": 18.0667, + "step": 7562 + }, + { + "epoch": 0.1382455627250626, + "grad_norm": 6.386505217125653, + "learning_rate": 9.695967853412177e-06, + "loss": 17.5746, + "step": 7563 + }, + { + "epoch": 0.13826384192150912, + "grad_norm": 6.0812781948558365, + "learning_rate": 9.695866198321782e-06, + "loss": 17.4049, + "step": 7564 + }, + { + "epoch": 0.13828212111795565, + "grad_norm": 7.885285814407381, + "learning_rate": 9.695764526772784e-06, + "loss": 18.2242, + "step": 7565 + }, + { + "epoch": 0.1383004003144022, + "grad_norm": 8.503405783523597, + "learning_rate": 9.69566283876554e-06, + "loss": 18.4756, + "step": 7566 + }, + { + "epoch": 0.1383186795108487, + "grad_norm": 7.325141136986131, + "learning_rate": 9.695561134300403e-06, + "loss": 17.6815, + "step": 7567 + }, + { + "epoch": 0.13833695870729523, + "grad_norm": 6.819358139332634, + "learning_rate": 9.695459413377732e-06, + "loss": 17.6192, + "step": 7568 + }, + { + "epoch": 0.13835523790374174, + "grad_norm": 7.196714241431053, + "learning_rate": 9.695357675997886e-06, + "loss": 17.8749, + "step": 7569 + }, + { + "epoch": 0.13837351710018828, + "grad_norm": 7.124930042216272, + "learning_rate": 9.695255922161216e-06, + "loss": 17.8248, + "step": 7570 + }, + { + "epoch": 0.1383917962966348, + "grad_norm": 7.577439664001599, + "learning_rate": 9.695154151868082e-06, + "loss": 18.048, + "step": 7571 + }, + { + "epoch": 0.13841007549308132, + "grad_norm": 5.946709674463609, + "learning_rate": 9.69505236511884e-06, + "loss": 17.3948, + "step": 7572 + }, + { + "epoch": 0.13842835468952785, + "grad_norm": 6.849109850690806, + "learning_rate": 9.694950561913847e-06, + "loss": 17.8114, + "step": 7573 + }, + { + "epoch": 0.13844663388597436, + "grad_norm": 6.254112894892067, + "learning_rate": 9.69484874225346e-06, + "loss": 17.3582, + "step": 7574 + }, + { + "epoch": 0.1384649130824209, + "grad_norm": 6.83043995617411, + "learning_rate": 9.694746906138037e-06, + "loss": 17.7437, + "step": 7575 + }, + { + "epoch": 0.13848319227886743, + "grad_norm": 6.8425114925436485, + "learning_rate": 9.69464505356793e-06, + "loss": 17.5462, + "step": 7576 + }, + { + "epoch": 0.13850147147531394, + "grad_norm": 8.338983089259113, + "learning_rate": 9.694543184543503e-06, + "loss": 17.8099, + "step": 7577 + }, + { + "epoch": 0.13851975067176048, + "grad_norm": 8.441731655404608, + "learning_rate": 9.694441299065108e-06, + "loss": 18.0928, + "step": 7578 + }, + { + "epoch": 0.13853802986820699, + "grad_norm": 7.39996527709027, + "learning_rate": 9.694339397133103e-06, + "loss": 18.0476, + "step": 7579 + }, + { + "epoch": 0.13855630906465352, + "grad_norm": 6.481565025506474, + "learning_rate": 9.694237478747845e-06, + "loss": 17.4842, + "step": 7580 + }, + { + "epoch": 0.13857458826110003, + "grad_norm": 7.5816203621605744, + "learning_rate": 9.694135543909695e-06, + "loss": 18.1815, + "step": 7581 + }, + { + "epoch": 0.13859286745754656, + "grad_norm": 6.556496899939846, + "learning_rate": 9.694033592619005e-06, + "loss": 17.7246, + "step": 7582 + }, + { + "epoch": 0.1386111466539931, + "grad_norm": 9.199344677937912, + "learning_rate": 9.693931624876134e-06, + "loss": 19.1485, + "step": 7583 + }, + { + "epoch": 0.1386294258504396, + "grad_norm": 8.306551459532058, + "learning_rate": 9.693829640681443e-06, + "loss": 17.9562, + "step": 7584 + }, + { + "epoch": 0.13864770504688614, + "grad_norm": 7.317573061319512, + "learning_rate": 9.693727640035284e-06, + "loss": 18.0196, + "step": 7585 + }, + { + "epoch": 0.13866598424333265, + "grad_norm": 6.088249381911165, + "learning_rate": 9.693625622938016e-06, + "loss": 17.2675, + "step": 7586 + }, + { + "epoch": 0.1386842634397792, + "grad_norm": 8.32568238400912, + "learning_rate": 9.69352358939e-06, + "loss": 18.5118, + "step": 7587 + }, + { + "epoch": 0.13870254263622572, + "grad_norm": 6.881622182942428, + "learning_rate": 9.69342153939159e-06, + "loss": 17.7963, + "step": 7588 + }, + { + "epoch": 0.13872082183267223, + "grad_norm": 7.233722115198136, + "learning_rate": 9.693319472943144e-06, + "loss": 18.0334, + "step": 7589 + }, + { + "epoch": 0.13873910102911877, + "grad_norm": 6.933665393243024, + "learning_rate": 9.693217390045022e-06, + "loss": 17.3835, + "step": 7590 + }, + { + "epoch": 0.13875738022556527, + "grad_norm": 6.877804478341359, + "learning_rate": 9.693115290697579e-06, + "loss": 17.7326, + "step": 7591 + }, + { + "epoch": 0.1387756594220118, + "grad_norm": 6.088325576611347, + "learning_rate": 9.693013174901176e-06, + "loss": 17.442, + "step": 7592 + }, + { + "epoch": 0.13879393861845835, + "grad_norm": 5.491087823667131, + "learning_rate": 9.692911042656168e-06, + "loss": 17.1538, + "step": 7593 + }, + { + "epoch": 0.13881221781490485, + "grad_norm": 6.739863673958903, + "learning_rate": 9.692808893962913e-06, + "loss": 17.9538, + "step": 7594 + }, + { + "epoch": 0.1388304970113514, + "grad_norm": 7.724576329318004, + "learning_rate": 9.69270672882177e-06, + "loss": 18.2345, + "step": 7595 + }, + { + "epoch": 0.1388487762077979, + "grad_norm": 8.162198807594477, + "learning_rate": 9.6926045472331e-06, + "loss": 18.0554, + "step": 7596 + }, + { + "epoch": 0.13886705540424443, + "grad_norm": 6.825174864807807, + "learning_rate": 9.692502349197255e-06, + "loss": 17.8946, + "step": 7597 + }, + { + "epoch": 0.13888533460069094, + "grad_norm": 6.685053782309902, + "learning_rate": 9.692400134714597e-06, + "loss": 17.8092, + "step": 7598 + }, + { + "epoch": 0.13890361379713748, + "grad_norm": 7.50105563200056, + "learning_rate": 9.692297903785485e-06, + "loss": 17.4689, + "step": 7599 + }, + { + "epoch": 0.138921892993584, + "grad_norm": 8.213849230114867, + "learning_rate": 9.692195656410276e-06, + "loss": 18.4874, + "step": 7600 + }, + { + "epoch": 0.13894017219003052, + "grad_norm": 7.210729939117689, + "learning_rate": 9.692093392589328e-06, + "loss": 17.9628, + "step": 7601 + }, + { + "epoch": 0.13895845138647706, + "grad_norm": 5.780872468116353, + "learning_rate": 9.691991112323e-06, + "loss": 17.5676, + "step": 7602 + }, + { + "epoch": 0.13897673058292356, + "grad_norm": 7.7448713027600355, + "learning_rate": 9.69188881561165e-06, + "loss": 18.3937, + "step": 7603 + }, + { + "epoch": 0.1389950097793701, + "grad_norm": 6.451954438687236, + "learning_rate": 9.691786502455637e-06, + "loss": 17.5783, + "step": 7604 + }, + { + "epoch": 0.13901328897581663, + "grad_norm": 7.375328926641005, + "learning_rate": 9.691684172855318e-06, + "loss": 18.0649, + "step": 7605 + }, + { + "epoch": 0.13903156817226314, + "grad_norm": 7.42328733850513, + "learning_rate": 9.691581826811056e-06, + "loss": 18.0613, + "step": 7606 + }, + { + "epoch": 0.13904984736870968, + "grad_norm": 7.27599208052216, + "learning_rate": 9.691479464323205e-06, + "loss": 17.9711, + "step": 7607 + }, + { + "epoch": 0.13906812656515619, + "grad_norm": 6.195330420273026, + "learning_rate": 9.691377085392126e-06, + "loss": 17.3167, + "step": 7608 + }, + { + "epoch": 0.13908640576160272, + "grad_norm": 7.422560638770763, + "learning_rate": 9.691274690018177e-06, + "loss": 17.9469, + "step": 7609 + }, + { + "epoch": 0.13910468495804926, + "grad_norm": 7.982168416130319, + "learning_rate": 9.691172278201717e-06, + "loss": 18.3047, + "step": 7610 + }, + { + "epoch": 0.13912296415449577, + "grad_norm": 6.823450716965158, + "learning_rate": 9.691069849943106e-06, + "loss": 17.7405, + "step": 7611 + }, + { + "epoch": 0.1391412433509423, + "grad_norm": 6.8991255450433995, + "learning_rate": 9.690967405242702e-06, + "loss": 17.9281, + "step": 7612 + }, + { + "epoch": 0.1391595225473888, + "grad_norm": 7.549502658587642, + "learning_rate": 9.690864944100864e-06, + "loss": 18.1085, + "step": 7613 + }, + { + "epoch": 0.13917780174383534, + "grad_norm": 7.010243806112929, + "learning_rate": 9.690762466517953e-06, + "loss": 17.9053, + "step": 7614 + }, + { + "epoch": 0.13919608094028185, + "grad_norm": 6.2672935541821335, + "learning_rate": 9.690659972494325e-06, + "loss": 17.5679, + "step": 7615 + }, + { + "epoch": 0.1392143601367284, + "grad_norm": 6.51920355904111, + "learning_rate": 9.69055746203034e-06, + "loss": 17.7973, + "step": 7616 + }, + { + "epoch": 0.13923263933317492, + "grad_norm": 4.972536691426739, + "learning_rate": 9.690454935126362e-06, + "loss": 17.0055, + "step": 7617 + }, + { + "epoch": 0.13925091852962143, + "grad_norm": 9.16414804210936, + "learning_rate": 9.690352391782742e-06, + "loss": 18.683, + "step": 7618 + }, + { + "epoch": 0.13926919772606797, + "grad_norm": 6.353458986930466, + "learning_rate": 9.690249831999845e-06, + "loss": 17.6393, + "step": 7619 + }, + { + "epoch": 0.13928747692251447, + "grad_norm": 7.549727257799057, + "learning_rate": 9.69014725577803e-06, + "loss": 17.8279, + "step": 7620 + }, + { + "epoch": 0.139305756118961, + "grad_norm": 6.459371878675763, + "learning_rate": 9.690044663117657e-06, + "loss": 17.7354, + "step": 7621 + }, + { + "epoch": 0.13932403531540755, + "grad_norm": 7.096093754094462, + "learning_rate": 9.689942054019084e-06, + "loss": 17.8056, + "step": 7622 + }, + { + "epoch": 0.13934231451185405, + "grad_norm": 5.646822138702501, + "learning_rate": 9.689839428482668e-06, + "loss": 17.2833, + "step": 7623 + }, + { + "epoch": 0.1393605937083006, + "grad_norm": 6.392818847550784, + "learning_rate": 9.689736786508775e-06, + "loss": 17.5295, + "step": 7624 + }, + { + "epoch": 0.1393788729047471, + "grad_norm": 6.543122674993934, + "learning_rate": 9.68963412809776e-06, + "loss": 17.5072, + "step": 7625 + }, + { + "epoch": 0.13939715210119363, + "grad_norm": 6.044145106145599, + "learning_rate": 9.689531453249985e-06, + "loss": 17.4496, + "step": 7626 + }, + { + "epoch": 0.13941543129764017, + "grad_norm": 6.320195166276094, + "learning_rate": 9.689428761965812e-06, + "loss": 17.6063, + "step": 7627 + }, + { + "epoch": 0.13943371049408668, + "grad_norm": 5.93275108209908, + "learning_rate": 9.689326054245594e-06, + "loss": 17.4617, + "step": 7628 + }, + { + "epoch": 0.1394519896905332, + "grad_norm": 7.48701383614776, + "learning_rate": 9.689223330089697e-06, + "loss": 18.0072, + "step": 7629 + }, + { + "epoch": 0.13947026888697972, + "grad_norm": 7.892054430291195, + "learning_rate": 9.689120589498478e-06, + "loss": 18.23, + "step": 7630 + }, + { + "epoch": 0.13948854808342626, + "grad_norm": 8.388621093175491, + "learning_rate": 9.689017832472298e-06, + "loss": 18.298, + "step": 7631 + }, + { + "epoch": 0.13950682727987276, + "grad_norm": 7.940115082999377, + "learning_rate": 9.688915059011519e-06, + "loss": 17.8922, + "step": 7632 + }, + { + "epoch": 0.1395251064763193, + "grad_norm": 6.6214486318031405, + "learning_rate": 9.688812269116498e-06, + "loss": 17.3944, + "step": 7633 + }, + { + "epoch": 0.13954338567276584, + "grad_norm": 8.291700876812332, + "learning_rate": 9.688709462787598e-06, + "loss": 18.3481, + "step": 7634 + }, + { + "epoch": 0.13956166486921234, + "grad_norm": 8.239108156558169, + "learning_rate": 9.688606640025178e-06, + "loss": 18.5325, + "step": 7635 + }, + { + "epoch": 0.13957994406565888, + "grad_norm": 7.025021516402914, + "learning_rate": 9.6885038008296e-06, + "loss": 17.9765, + "step": 7636 + }, + { + "epoch": 0.1395982232621054, + "grad_norm": 6.098258722584013, + "learning_rate": 9.68840094520122e-06, + "loss": 17.4001, + "step": 7637 + }, + { + "epoch": 0.13961650245855192, + "grad_norm": 7.6009707832906725, + "learning_rate": 9.688298073140403e-06, + "loss": 18.0495, + "step": 7638 + }, + { + "epoch": 0.13963478165499846, + "grad_norm": 7.40103737490222, + "learning_rate": 9.688195184647509e-06, + "loss": 17.8624, + "step": 7639 + }, + { + "epoch": 0.13965306085144497, + "grad_norm": 8.4314965735498, + "learning_rate": 9.688092279722896e-06, + "loss": 18.8786, + "step": 7640 + }, + { + "epoch": 0.1396713400478915, + "grad_norm": 6.055062072160815, + "learning_rate": 9.687989358366927e-06, + "loss": 17.6618, + "step": 7641 + }, + { + "epoch": 0.139689619244338, + "grad_norm": 6.05575625200334, + "learning_rate": 9.687886420579962e-06, + "loss": 17.3134, + "step": 7642 + }, + { + "epoch": 0.13970789844078454, + "grad_norm": 8.622498771976538, + "learning_rate": 9.687783466362362e-06, + "loss": 18.3974, + "step": 7643 + }, + { + "epoch": 0.13972617763723108, + "grad_norm": 5.865694263033465, + "learning_rate": 9.687680495714488e-06, + "loss": 17.2247, + "step": 7644 + }, + { + "epoch": 0.1397444568336776, + "grad_norm": 6.811234057849274, + "learning_rate": 9.6875775086367e-06, + "loss": 17.7938, + "step": 7645 + }, + { + "epoch": 0.13976273603012412, + "grad_norm": 7.106931351353856, + "learning_rate": 9.687474505129362e-06, + "loss": 18.1443, + "step": 7646 + }, + { + "epoch": 0.13978101522657063, + "grad_norm": 6.482267760104404, + "learning_rate": 9.687371485192831e-06, + "loss": 17.4304, + "step": 7647 + }, + { + "epoch": 0.13979929442301717, + "grad_norm": 7.974894210989417, + "learning_rate": 9.687268448827468e-06, + "loss": 18.3447, + "step": 7648 + }, + { + "epoch": 0.13981757361946368, + "grad_norm": 7.5935022550416456, + "learning_rate": 9.687165396033638e-06, + "loss": 18.0296, + "step": 7649 + }, + { + "epoch": 0.1398358528159102, + "grad_norm": 6.468510798971305, + "learning_rate": 9.6870623268117e-06, + "loss": 17.866, + "step": 7650 + }, + { + "epoch": 0.13985413201235675, + "grad_norm": 7.5046531587992735, + "learning_rate": 9.686959241162013e-06, + "loss": 17.7633, + "step": 7651 + }, + { + "epoch": 0.13987241120880325, + "grad_norm": 7.670355311867818, + "learning_rate": 9.686856139084943e-06, + "loss": 18.0413, + "step": 7652 + }, + { + "epoch": 0.1398906904052498, + "grad_norm": 8.67865132995876, + "learning_rate": 9.686753020580847e-06, + "loss": 18.3526, + "step": 7653 + }, + { + "epoch": 0.1399089696016963, + "grad_norm": 9.344161512670489, + "learning_rate": 9.68664988565009e-06, + "loss": 18.4226, + "step": 7654 + }, + { + "epoch": 0.13992724879814283, + "grad_norm": 7.732963302029577, + "learning_rate": 9.686546734293032e-06, + "loss": 17.8059, + "step": 7655 + }, + { + "epoch": 0.13994552799458937, + "grad_norm": 5.753291438789974, + "learning_rate": 9.686443566510033e-06, + "loss": 17.315, + "step": 7656 + }, + { + "epoch": 0.13996380719103588, + "grad_norm": 6.825847797501684, + "learning_rate": 9.686340382301457e-06, + "loss": 17.8254, + "step": 7657 + }, + { + "epoch": 0.1399820863874824, + "grad_norm": 9.393549575400462, + "learning_rate": 9.686237181667664e-06, + "loss": 18.7241, + "step": 7658 + }, + { + "epoch": 0.14000036558392892, + "grad_norm": 5.97960450460996, + "learning_rate": 9.686133964609017e-06, + "loss": 17.4859, + "step": 7659 + }, + { + "epoch": 0.14001864478037546, + "grad_norm": 8.19156414760745, + "learning_rate": 9.686030731125877e-06, + "loss": 18.161, + "step": 7660 + }, + { + "epoch": 0.140036923976822, + "grad_norm": 7.13560642700512, + "learning_rate": 9.685927481218605e-06, + "loss": 17.7538, + "step": 7661 + }, + { + "epoch": 0.1400552031732685, + "grad_norm": 7.955656266645502, + "learning_rate": 9.685824214887565e-06, + "loss": 18.4976, + "step": 7662 + }, + { + "epoch": 0.14007348236971504, + "grad_norm": 10.783803224347071, + "learning_rate": 9.685720932133117e-06, + "loss": 18.1724, + "step": 7663 + }, + { + "epoch": 0.14009176156616154, + "grad_norm": 7.623917149763039, + "learning_rate": 9.685617632955625e-06, + "loss": 18.119, + "step": 7664 + }, + { + "epoch": 0.14011004076260808, + "grad_norm": 6.780303406814478, + "learning_rate": 9.685514317355446e-06, + "loss": 17.2607, + "step": 7665 + }, + { + "epoch": 0.1401283199590546, + "grad_norm": 6.972500743781565, + "learning_rate": 9.685410985332951e-06, + "loss": 17.7824, + "step": 7666 + }, + { + "epoch": 0.14014659915550112, + "grad_norm": 6.729420002075346, + "learning_rate": 9.685307636888494e-06, + "loss": 17.4817, + "step": 7667 + }, + { + "epoch": 0.14016487835194766, + "grad_norm": 7.774515179824839, + "learning_rate": 9.685204272022442e-06, + "loss": 18.115, + "step": 7668 + }, + { + "epoch": 0.14018315754839417, + "grad_norm": 6.472736167199539, + "learning_rate": 9.685100890735153e-06, + "loss": 17.8969, + "step": 7669 + }, + { + "epoch": 0.1402014367448407, + "grad_norm": 7.681000135664825, + "learning_rate": 9.684997493026994e-06, + "loss": 18.0205, + "step": 7670 + }, + { + "epoch": 0.1402197159412872, + "grad_norm": 8.360965006565488, + "learning_rate": 9.684894078898325e-06, + "loss": 18.1404, + "step": 7671 + }, + { + "epoch": 0.14023799513773375, + "grad_norm": 6.861502415358219, + "learning_rate": 9.68479064834951e-06, + "loss": 17.7546, + "step": 7672 + }, + { + "epoch": 0.14025627433418028, + "grad_norm": 7.025707723981484, + "learning_rate": 9.684687201380908e-06, + "loss": 17.9243, + "step": 7673 + }, + { + "epoch": 0.1402745535306268, + "grad_norm": 6.833530124182017, + "learning_rate": 9.684583737992884e-06, + "loss": 17.7647, + "step": 7674 + }, + { + "epoch": 0.14029283272707332, + "grad_norm": 6.082676889038833, + "learning_rate": 9.684480258185802e-06, + "loss": 17.3952, + "step": 7675 + }, + { + "epoch": 0.14031111192351983, + "grad_norm": 8.40792409343925, + "learning_rate": 9.684376761960022e-06, + "loss": 18.2987, + "step": 7676 + }, + { + "epoch": 0.14032939111996637, + "grad_norm": 7.225649769389998, + "learning_rate": 9.684273249315909e-06, + "loss": 18.0845, + "step": 7677 + }, + { + "epoch": 0.1403476703164129, + "grad_norm": 6.613362749393355, + "learning_rate": 9.684169720253824e-06, + "loss": 17.6433, + "step": 7678 + }, + { + "epoch": 0.1403659495128594, + "grad_norm": 6.17401684762471, + "learning_rate": 9.68406617477413e-06, + "loss": 17.6461, + "step": 7679 + }, + { + "epoch": 0.14038422870930595, + "grad_norm": 5.907300268861695, + "learning_rate": 9.683962612877191e-06, + "loss": 17.4147, + "step": 7680 + }, + { + "epoch": 0.14040250790575245, + "grad_norm": 6.438438388115875, + "learning_rate": 9.68385903456337e-06, + "loss": 17.7628, + "step": 7681 + }, + { + "epoch": 0.140420787102199, + "grad_norm": 7.15609337283895, + "learning_rate": 9.683755439833029e-06, + "loss": 17.9813, + "step": 7682 + }, + { + "epoch": 0.1404390662986455, + "grad_norm": 8.407826266878, + "learning_rate": 9.683651828686533e-06, + "loss": 18.6392, + "step": 7683 + }, + { + "epoch": 0.14045734549509203, + "grad_norm": 7.438446230283374, + "learning_rate": 9.683548201124242e-06, + "loss": 18.1408, + "step": 7684 + }, + { + "epoch": 0.14047562469153857, + "grad_norm": 5.922113971888481, + "learning_rate": 9.683444557146522e-06, + "loss": 17.4242, + "step": 7685 + }, + { + "epoch": 0.14049390388798508, + "grad_norm": 7.767049220233212, + "learning_rate": 9.683340896753736e-06, + "loss": 18.243, + "step": 7686 + }, + { + "epoch": 0.1405121830844316, + "grad_norm": 7.7351425289605285, + "learning_rate": 9.683237219946244e-06, + "loss": 17.8322, + "step": 7687 + }, + { + "epoch": 0.14053046228087812, + "grad_norm": 5.766262580266691, + "learning_rate": 9.683133526724413e-06, + "loss": 17.4055, + "step": 7688 + }, + { + "epoch": 0.14054874147732466, + "grad_norm": 5.425520660615894, + "learning_rate": 9.683029817088608e-06, + "loss": 17.2274, + "step": 7689 + }, + { + "epoch": 0.1405670206737712, + "grad_norm": 7.575820684465325, + "learning_rate": 9.682926091039187e-06, + "loss": 18.0752, + "step": 7690 + }, + { + "epoch": 0.1405852998702177, + "grad_norm": 8.092227554164923, + "learning_rate": 9.682822348576518e-06, + "loss": 17.9636, + "step": 7691 + }, + { + "epoch": 0.14060357906666424, + "grad_norm": 7.253130689899035, + "learning_rate": 9.68271858970096e-06, + "loss": 17.9492, + "step": 7692 + }, + { + "epoch": 0.14062185826311074, + "grad_norm": 8.145629293534444, + "learning_rate": 9.682614814412883e-06, + "loss": 18.4608, + "step": 7693 + }, + { + "epoch": 0.14064013745955728, + "grad_norm": 7.160254710914674, + "learning_rate": 9.682511022712646e-06, + "loss": 17.9893, + "step": 7694 + }, + { + "epoch": 0.14065841665600382, + "grad_norm": 7.357299262847844, + "learning_rate": 9.682407214600615e-06, + "loss": 18.0813, + "step": 7695 + }, + { + "epoch": 0.14067669585245032, + "grad_norm": 8.08579147665916, + "learning_rate": 9.682303390077153e-06, + "loss": 17.9742, + "step": 7696 + }, + { + "epoch": 0.14069497504889686, + "grad_norm": 6.973170726147441, + "learning_rate": 9.682199549142623e-06, + "loss": 17.6363, + "step": 7697 + }, + { + "epoch": 0.14071325424534337, + "grad_norm": 6.969016413291826, + "learning_rate": 9.682095691797391e-06, + "loss": 18.1067, + "step": 7698 + }, + { + "epoch": 0.1407315334417899, + "grad_norm": 7.506545156191458, + "learning_rate": 9.681991818041818e-06, + "loss": 18.1744, + "step": 7699 + }, + { + "epoch": 0.1407498126382364, + "grad_norm": 7.9121187519485705, + "learning_rate": 9.681887927876271e-06, + "loss": 17.9692, + "step": 7700 + }, + { + "epoch": 0.14076809183468295, + "grad_norm": 8.679743774834556, + "learning_rate": 9.681784021301112e-06, + "loss": 17.9166, + "step": 7701 + }, + { + "epoch": 0.14078637103112948, + "grad_norm": 7.024434714046227, + "learning_rate": 9.68168009831671e-06, + "loss": 17.8942, + "step": 7702 + }, + { + "epoch": 0.140804650227576, + "grad_norm": 7.349989985891914, + "learning_rate": 9.681576158923423e-06, + "loss": 18.1657, + "step": 7703 + }, + { + "epoch": 0.14082292942402252, + "grad_norm": 8.459378945845003, + "learning_rate": 9.681472203121617e-06, + "loss": 18.7275, + "step": 7704 + }, + { + "epoch": 0.14084120862046903, + "grad_norm": 9.695222479773127, + "learning_rate": 9.681368230911659e-06, + "loss": 18.4332, + "step": 7705 + }, + { + "epoch": 0.14085948781691557, + "grad_norm": 7.186765750034922, + "learning_rate": 9.68126424229391e-06, + "loss": 18.0776, + "step": 7706 + }, + { + "epoch": 0.1408777670133621, + "grad_norm": 6.854102062564552, + "learning_rate": 9.681160237268737e-06, + "loss": 17.9172, + "step": 7707 + }, + { + "epoch": 0.1408960462098086, + "grad_norm": 7.3632229794090005, + "learning_rate": 9.681056215836501e-06, + "loss": 18.0963, + "step": 7708 + }, + { + "epoch": 0.14091432540625515, + "grad_norm": 7.7221081410587455, + "learning_rate": 9.680952177997572e-06, + "loss": 18.1901, + "step": 7709 + }, + { + "epoch": 0.14093260460270166, + "grad_norm": 5.924403423489709, + "learning_rate": 9.680848123752312e-06, + "loss": 17.3988, + "step": 7710 + }, + { + "epoch": 0.1409508837991482, + "grad_norm": 6.993460661737724, + "learning_rate": 9.680744053101084e-06, + "loss": 17.8039, + "step": 7711 + }, + { + "epoch": 0.14096916299559473, + "grad_norm": 8.421952771436054, + "learning_rate": 9.680639966044256e-06, + "loss": 18.4236, + "step": 7712 + }, + { + "epoch": 0.14098744219204123, + "grad_norm": 6.789647333264264, + "learning_rate": 9.68053586258219e-06, + "loss": 17.6687, + "step": 7713 + }, + { + "epoch": 0.14100572138848777, + "grad_norm": 7.722165563852823, + "learning_rate": 9.680431742715252e-06, + "loss": 17.7669, + "step": 7714 + }, + { + "epoch": 0.14102400058493428, + "grad_norm": 7.719011591664162, + "learning_rate": 9.680327606443806e-06, + "loss": 18.1179, + "step": 7715 + }, + { + "epoch": 0.1410422797813808, + "grad_norm": 7.767842326935953, + "learning_rate": 9.680223453768219e-06, + "loss": 18.1272, + "step": 7716 + }, + { + "epoch": 0.14106055897782732, + "grad_norm": 7.5680030448530955, + "learning_rate": 9.680119284688855e-06, + "loss": 17.723, + "step": 7717 + }, + { + "epoch": 0.14107883817427386, + "grad_norm": 7.7507445436356015, + "learning_rate": 9.68001509920608e-06, + "loss": 18.0347, + "step": 7718 + }, + { + "epoch": 0.1410971173707204, + "grad_norm": 7.307328906945655, + "learning_rate": 9.679910897320254e-06, + "loss": 17.8308, + "step": 7719 + }, + { + "epoch": 0.1411153965671669, + "grad_norm": 7.951724740756475, + "learning_rate": 9.679806679031751e-06, + "loss": 18.0057, + "step": 7720 + }, + { + "epoch": 0.14113367576361344, + "grad_norm": 6.778070343960303, + "learning_rate": 9.67970244434093e-06, + "loss": 17.4964, + "step": 7721 + }, + { + "epoch": 0.14115195496005994, + "grad_norm": 8.07497389710269, + "learning_rate": 9.679598193248159e-06, + "loss": 18.3359, + "step": 7722 + }, + { + "epoch": 0.14117023415650648, + "grad_norm": 6.6248376575767605, + "learning_rate": 9.6794939257538e-06, + "loss": 17.7897, + "step": 7723 + }, + { + "epoch": 0.14118851335295302, + "grad_norm": 5.905110351666292, + "learning_rate": 9.679389641858224e-06, + "loss": 17.2798, + "step": 7724 + }, + { + "epoch": 0.14120679254939952, + "grad_norm": 7.142681101724337, + "learning_rate": 9.67928534156179e-06, + "loss": 17.8447, + "step": 7725 + }, + { + "epoch": 0.14122507174584606, + "grad_norm": 6.413274281557343, + "learning_rate": 9.679181024864869e-06, + "loss": 17.6408, + "step": 7726 + }, + { + "epoch": 0.14124335094229257, + "grad_norm": 8.088982050404411, + "learning_rate": 9.679076691767823e-06, + "loss": 18.3308, + "step": 7727 + }, + { + "epoch": 0.1412616301387391, + "grad_norm": 7.639545561389265, + "learning_rate": 9.678972342271023e-06, + "loss": 17.6528, + "step": 7728 + }, + { + "epoch": 0.14127990933518564, + "grad_norm": 8.758924487537499, + "learning_rate": 9.678867976374827e-06, + "loss": 18.6422, + "step": 7729 + }, + { + "epoch": 0.14129818853163215, + "grad_norm": 7.8443891245671065, + "learning_rate": 9.678763594079605e-06, + "loss": 18.2581, + "step": 7730 + }, + { + "epoch": 0.14131646772807868, + "grad_norm": 8.158889575581766, + "learning_rate": 9.678659195385724e-06, + "loss": 17.9907, + "step": 7731 + }, + { + "epoch": 0.1413347469245252, + "grad_norm": 7.128034971801864, + "learning_rate": 9.67855478029355e-06, + "loss": 18.0491, + "step": 7732 + }, + { + "epoch": 0.14135302612097173, + "grad_norm": 7.524698700334921, + "learning_rate": 9.678450348803445e-06, + "loss": 18.0207, + "step": 7733 + }, + { + "epoch": 0.14137130531741823, + "grad_norm": 7.413578969658936, + "learning_rate": 9.678345900915778e-06, + "loss": 17.9179, + "step": 7734 + }, + { + "epoch": 0.14138958451386477, + "grad_norm": 6.944010819432446, + "learning_rate": 9.678241436630916e-06, + "loss": 17.7795, + "step": 7735 + }, + { + "epoch": 0.1414078637103113, + "grad_norm": 9.25296851022195, + "learning_rate": 9.67813695594922e-06, + "loss": 18.8079, + "step": 7736 + }, + { + "epoch": 0.1414261429067578, + "grad_norm": 7.284723548912895, + "learning_rate": 9.678032458871063e-06, + "loss": 18.1341, + "step": 7737 + }, + { + "epoch": 0.14144442210320435, + "grad_norm": 8.14959581041399, + "learning_rate": 9.677927945396808e-06, + "loss": 18.3563, + "step": 7738 + }, + { + "epoch": 0.14146270129965086, + "grad_norm": 7.108870755550117, + "learning_rate": 9.677823415526822e-06, + "loss": 17.8498, + "step": 7739 + }, + { + "epoch": 0.1414809804960974, + "grad_norm": 6.407079401501023, + "learning_rate": 9.67771886926147e-06, + "loss": 17.5913, + "step": 7740 + }, + { + "epoch": 0.14149925969254393, + "grad_norm": 7.224881529647223, + "learning_rate": 9.67761430660112e-06, + "loss": 17.9471, + "step": 7741 + }, + { + "epoch": 0.14151753888899044, + "grad_norm": 6.837148647998776, + "learning_rate": 9.677509727546134e-06, + "loss": 17.4927, + "step": 7742 + }, + { + "epoch": 0.14153581808543697, + "grad_norm": 7.846606169916832, + "learning_rate": 9.677405132096887e-06, + "loss": 18.4396, + "step": 7743 + }, + { + "epoch": 0.14155409728188348, + "grad_norm": 7.524877752546834, + "learning_rate": 9.677300520253738e-06, + "loss": 18.0791, + "step": 7744 + }, + { + "epoch": 0.14157237647833001, + "grad_norm": 7.487832029774735, + "learning_rate": 9.677195892017059e-06, + "loss": 17.7253, + "step": 7745 + }, + { + "epoch": 0.14159065567477655, + "grad_norm": 6.419594968610413, + "learning_rate": 9.677091247387214e-06, + "loss": 17.5961, + "step": 7746 + }, + { + "epoch": 0.14160893487122306, + "grad_norm": 7.8628128876797945, + "learning_rate": 9.676986586364567e-06, + "loss": 18.4315, + "step": 7747 + }, + { + "epoch": 0.1416272140676696, + "grad_norm": 7.299063522511162, + "learning_rate": 9.676881908949492e-06, + "loss": 17.8903, + "step": 7748 + }, + { + "epoch": 0.1416454932641161, + "grad_norm": 5.806594807064861, + "learning_rate": 9.676777215142348e-06, + "loss": 17.2699, + "step": 7749 + }, + { + "epoch": 0.14166377246056264, + "grad_norm": 5.770711123757083, + "learning_rate": 9.676672504943508e-06, + "loss": 17.4297, + "step": 7750 + }, + { + "epoch": 0.14168205165700914, + "grad_norm": 7.502838539343097, + "learning_rate": 9.676567778353337e-06, + "loss": 17.9715, + "step": 7751 + }, + { + "epoch": 0.14170033085345568, + "grad_norm": 6.662998191797699, + "learning_rate": 9.6764630353722e-06, + "loss": 17.5581, + "step": 7752 + }, + { + "epoch": 0.14171861004990222, + "grad_norm": 7.416255808314275, + "learning_rate": 9.676358276000466e-06, + "loss": 17.9441, + "step": 7753 + }, + { + "epoch": 0.14173688924634872, + "grad_norm": 7.192597832118229, + "learning_rate": 9.676253500238503e-06, + "loss": 17.8188, + "step": 7754 + }, + { + "epoch": 0.14175516844279526, + "grad_norm": 6.303949808613113, + "learning_rate": 9.676148708086677e-06, + "loss": 17.5782, + "step": 7755 + }, + { + "epoch": 0.14177344763924177, + "grad_norm": 7.537919742112997, + "learning_rate": 9.676043899545356e-06, + "loss": 18.1434, + "step": 7756 + }, + { + "epoch": 0.1417917268356883, + "grad_norm": 6.324639285936871, + "learning_rate": 9.675939074614907e-06, + "loss": 17.4917, + "step": 7757 + }, + { + "epoch": 0.14181000603213484, + "grad_norm": 6.705794880450858, + "learning_rate": 9.675834233295696e-06, + "loss": 17.5962, + "step": 7758 + }, + { + "epoch": 0.14182828522858135, + "grad_norm": 6.831759017623418, + "learning_rate": 9.675729375588092e-06, + "loss": 17.6193, + "step": 7759 + }, + { + "epoch": 0.14184656442502788, + "grad_norm": 6.973219084686587, + "learning_rate": 9.675624501492462e-06, + "loss": 17.7955, + "step": 7760 + }, + { + "epoch": 0.1418648436214744, + "grad_norm": 6.677520348869061, + "learning_rate": 9.675519611009176e-06, + "loss": 17.5277, + "step": 7761 + }, + { + "epoch": 0.14188312281792093, + "grad_norm": 6.567747401858718, + "learning_rate": 9.675414704138596e-06, + "loss": 17.499, + "step": 7762 + }, + { + "epoch": 0.14190140201436746, + "grad_norm": 6.144341090505756, + "learning_rate": 9.675309780881097e-06, + "loss": 17.484, + "step": 7763 + }, + { + "epoch": 0.14191968121081397, + "grad_norm": 6.445653628061477, + "learning_rate": 9.67520484123704e-06, + "loss": 17.36, + "step": 7764 + }, + { + "epoch": 0.1419379604072605, + "grad_norm": 8.560003692490604, + "learning_rate": 9.675099885206798e-06, + "loss": 18.2682, + "step": 7765 + }, + { + "epoch": 0.141956239603707, + "grad_norm": 7.859397461991227, + "learning_rate": 9.674994912790736e-06, + "loss": 18.1824, + "step": 7766 + }, + { + "epoch": 0.14197451880015355, + "grad_norm": 7.106172855319866, + "learning_rate": 9.674889923989222e-06, + "loss": 17.9058, + "step": 7767 + }, + { + "epoch": 0.14199279799660006, + "grad_norm": 7.00829803230192, + "learning_rate": 9.674784918802624e-06, + "loss": 17.3542, + "step": 7768 + }, + { + "epoch": 0.1420110771930466, + "grad_norm": 7.173244707430433, + "learning_rate": 9.674679897231311e-06, + "loss": 17.8225, + "step": 7769 + }, + { + "epoch": 0.14202935638949313, + "grad_norm": 7.232542256664458, + "learning_rate": 9.67457485927565e-06, + "loss": 17.7652, + "step": 7770 + }, + { + "epoch": 0.14204763558593964, + "grad_norm": 7.01716133149273, + "learning_rate": 9.674469804936012e-06, + "loss": 17.7022, + "step": 7771 + }, + { + "epoch": 0.14206591478238617, + "grad_norm": 5.810302252713791, + "learning_rate": 9.67436473421276e-06, + "loss": 17.1869, + "step": 7772 + }, + { + "epoch": 0.14208419397883268, + "grad_norm": 7.134096152854323, + "learning_rate": 9.674259647106268e-06, + "loss": 17.8671, + "step": 7773 + }, + { + "epoch": 0.14210247317527921, + "grad_norm": 6.786937004516573, + "learning_rate": 9.6741545436169e-06, + "loss": 17.7471, + "step": 7774 + }, + { + "epoch": 0.14212075237172575, + "grad_norm": 6.500229037330626, + "learning_rate": 9.674049423745025e-06, + "loss": 17.8578, + "step": 7775 + }, + { + "epoch": 0.14213903156817226, + "grad_norm": 6.220294935342864, + "learning_rate": 9.673944287491013e-06, + "loss": 17.4893, + "step": 7776 + }, + { + "epoch": 0.1421573107646188, + "grad_norm": 6.9788471623255175, + "learning_rate": 9.673839134855233e-06, + "loss": 17.8216, + "step": 7777 + }, + { + "epoch": 0.1421755899610653, + "grad_norm": 6.73552468574272, + "learning_rate": 9.673733965838053e-06, + "loss": 17.7899, + "step": 7778 + }, + { + "epoch": 0.14219386915751184, + "grad_norm": 7.2627811451102335, + "learning_rate": 9.673628780439839e-06, + "loss": 17.8894, + "step": 7779 + }, + { + "epoch": 0.14221214835395837, + "grad_norm": 7.622253576899311, + "learning_rate": 9.673523578660962e-06, + "loss": 18.0757, + "step": 7780 + }, + { + "epoch": 0.14223042755040488, + "grad_norm": 6.749261020223755, + "learning_rate": 9.67341836050179e-06, + "loss": 17.6294, + "step": 7781 + }, + { + "epoch": 0.14224870674685142, + "grad_norm": 5.852666143775732, + "learning_rate": 9.673313125962693e-06, + "loss": 17.6006, + "step": 7782 + }, + { + "epoch": 0.14226698594329792, + "grad_norm": 5.88140172454095, + "learning_rate": 9.673207875044039e-06, + "loss": 17.4948, + "step": 7783 + }, + { + "epoch": 0.14228526513974446, + "grad_norm": 7.210294985040148, + "learning_rate": 9.673102607746198e-06, + "loss": 17.5802, + "step": 7784 + }, + { + "epoch": 0.14230354433619097, + "grad_norm": 6.494521279404934, + "learning_rate": 9.672997324069536e-06, + "loss": 17.5406, + "step": 7785 + }, + { + "epoch": 0.1423218235326375, + "grad_norm": 8.046854484935118, + "learning_rate": 9.672892024014426e-06, + "loss": 18.7805, + "step": 7786 + }, + { + "epoch": 0.14234010272908404, + "grad_norm": 7.789728345892877, + "learning_rate": 9.672786707581232e-06, + "loss": 18.1358, + "step": 7787 + }, + { + "epoch": 0.14235838192553055, + "grad_norm": 7.264851122492589, + "learning_rate": 9.672681374770328e-06, + "loss": 17.9878, + "step": 7788 + }, + { + "epoch": 0.14237666112197708, + "grad_norm": 6.076518274182032, + "learning_rate": 9.672576025582081e-06, + "loss": 17.3517, + "step": 7789 + }, + { + "epoch": 0.1423949403184236, + "grad_norm": 6.342632611380603, + "learning_rate": 9.672470660016862e-06, + "loss": 17.4663, + "step": 7790 + }, + { + "epoch": 0.14241321951487013, + "grad_norm": 6.879084098925314, + "learning_rate": 9.672365278075035e-06, + "loss": 17.7861, + "step": 7791 + }, + { + "epoch": 0.14243149871131666, + "grad_norm": 7.5159207183567025, + "learning_rate": 9.672259879756976e-06, + "loss": 17.668, + "step": 7792 + }, + { + "epoch": 0.14244977790776317, + "grad_norm": 8.093294225857699, + "learning_rate": 9.672154465063051e-06, + "loss": 18.1166, + "step": 7793 + }, + { + "epoch": 0.1424680571042097, + "grad_norm": 6.536931922435906, + "learning_rate": 9.672049033993632e-06, + "loss": 17.5174, + "step": 7794 + }, + { + "epoch": 0.1424863363006562, + "grad_norm": 5.961237974227077, + "learning_rate": 9.671943586549085e-06, + "loss": 17.2613, + "step": 7795 + }, + { + "epoch": 0.14250461549710275, + "grad_norm": 7.349228206352338, + "learning_rate": 9.67183812272978e-06, + "loss": 17.9133, + "step": 7796 + }, + { + "epoch": 0.14252289469354928, + "grad_norm": 7.074451173895017, + "learning_rate": 9.671732642536087e-06, + "loss": 17.6657, + "step": 7797 + }, + { + "epoch": 0.1425411738899958, + "grad_norm": 6.834222829579723, + "learning_rate": 9.67162714596838e-06, + "loss": 17.8446, + "step": 7798 + }, + { + "epoch": 0.14255945308644233, + "grad_norm": 6.867635214185919, + "learning_rate": 9.671521633027022e-06, + "loss": 17.5231, + "step": 7799 + }, + { + "epoch": 0.14257773228288884, + "grad_norm": 7.517409601017157, + "learning_rate": 9.671416103712389e-06, + "loss": 18.0342, + "step": 7800 + }, + { + "epoch": 0.14259601147933537, + "grad_norm": 7.247493154377069, + "learning_rate": 9.671310558024844e-06, + "loss": 17.6106, + "step": 7801 + }, + { + "epoch": 0.14261429067578188, + "grad_norm": 8.626717076846722, + "learning_rate": 9.671204995964762e-06, + "loss": 18.3426, + "step": 7802 + }, + { + "epoch": 0.14263256987222842, + "grad_norm": 6.661527484937428, + "learning_rate": 9.671099417532515e-06, + "loss": 17.5053, + "step": 7803 + }, + { + "epoch": 0.14265084906867495, + "grad_norm": 8.057202515842757, + "learning_rate": 9.670993822728467e-06, + "loss": 17.7858, + "step": 7804 + }, + { + "epoch": 0.14266912826512146, + "grad_norm": 7.05030211660043, + "learning_rate": 9.670888211552992e-06, + "loss": 17.6583, + "step": 7805 + }, + { + "epoch": 0.142687407461568, + "grad_norm": 5.706726345107122, + "learning_rate": 9.670782584006459e-06, + "loss": 17.141, + "step": 7806 + }, + { + "epoch": 0.1427056866580145, + "grad_norm": 7.344611184842882, + "learning_rate": 9.670676940089239e-06, + "loss": 17.8914, + "step": 7807 + }, + { + "epoch": 0.14272396585446104, + "grad_norm": 7.022220843795626, + "learning_rate": 9.670571279801699e-06, + "loss": 17.6126, + "step": 7808 + }, + { + "epoch": 0.14274224505090757, + "grad_norm": 7.564178950445632, + "learning_rate": 9.670465603144214e-06, + "loss": 18.0142, + "step": 7809 + }, + { + "epoch": 0.14276052424735408, + "grad_norm": 5.9335851077868735, + "learning_rate": 9.670359910117153e-06, + "loss": 17.1887, + "step": 7810 + }, + { + "epoch": 0.14277880344380062, + "grad_norm": 6.846377150268151, + "learning_rate": 9.670254200720886e-06, + "loss": 17.8817, + "step": 7811 + }, + { + "epoch": 0.14279708264024712, + "grad_norm": 6.747451226197997, + "learning_rate": 9.67014847495578e-06, + "loss": 17.6365, + "step": 7812 + }, + { + "epoch": 0.14281536183669366, + "grad_norm": 8.90710576128569, + "learning_rate": 9.670042732822212e-06, + "loss": 18.3125, + "step": 7813 + }, + { + "epoch": 0.1428336410331402, + "grad_norm": 5.612174147043535, + "learning_rate": 9.669936974320548e-06, + "loss": 17.2074, + "step": 7814 + }, + { + "epoch": 0.1428519202295867, + "grad_norm": 6.7189199468273495, + "learning_rate": 9.669831199451161e-06, + "loss": 17.4914, + "step": 7815 + }, + { + "epoch": 0.14287019942603324, + "grad_norm": 6.57693825095924, + "learning_rate": 9.66972540821442e-06, + "loss": 17.2984, + "step": 7816 + }, + { + "epoch": 0.14288847862247975, + "grad_norm": 7.099477125925888, + "learning_rate": 9.669619600610699e-06, + "loss": 17.7003, + "step": 7817 + }, + { + "epoch": 0.14290675781892628, + "grad_norm": 5.980220718531627, + "learning_rate": 9.669513776640364e-06, + "loss": 17.449, + "step": 7818 + }, + { + "epoch": 0.1429250370153728, + "grad_norm": 6.659901544676826, + "learning_rate": 9.66940793630379e-06, + "loss": 17.4634, + "step": 7819 + }, + { + "epoch": 0.14294331621181933, + "grad_norm": 7.064695813405876, + "learning_rate": 9.669302079601345e-06, + "loss": 17.9158, + "step": 7820 + }, + { + "epoch": 0.14296159540826586, + "grad_norm": 9.535819291754025, + "learning_rate": 9.669196206533402e-06, + "loss": 18.5917, + "step": 7821 + }, + { + "epoch": 0.14297987460471237, + "grad_norm": 6.882356198216137, + "learning_rate": 9.669090317100331e-06, + "loss": 17.802, + "step": 7822 + }, + { + "epoch": 0.1429981538011589, + "grad_norm": 8.259487175303988, + "learning_rate": 9.668984411302504e-06, + "loss": 18.0371, + "step": 7823 + }, + { + "epoch": 0.1430164329976054, + "grad_norm": 7.107527463967547, + "learning_rate": 9.668878489140292e-06, + "loss": 17.5423, + "step": 7824 + }, + { + "epoch": 0.14303471219405195, + "grad_norm": 6.455719803755345, + "learning_rate": 9.668772550614067e-06, + "loss": 17.4033, + "step": 7825 + }, + { + "epoch": 0.14305299139049849, + "grad_norm": 6.653754036169663, + "learning_rate": 9.668666595724196e-06, + "loss": 17.7759, + "step": 7826 + }, + { + "epoch": 0.143071270586945, + "grad_norm": 7.153991568865042, + "learning_rate": 9.668560624471057e-06, + "loss": 17.9118, + "step": 7827 + }, + { + "epoch": 0.14308954978339153, + "grad_norm": 6.569018776209766, + "learning_rate": 9.668454636855018e-06, + "loss": 17.8073, + "step": 7828 + }, + { + "epoch": 0.14310782897983804, + "grad_norm": 6.221145113425062, + "learning_rate": 9.668348632876448e-06, + "loss": 17.3776, + "step": 7829 + }, + { + "epoch": 0.14312610817628457, + "grad_norm": 6.1130743532573595, + "learning_rate": 9.668242612535723e-06, + "loss": 17.3465, + "step": 7830 + }, + { + "epoch": 0.1431443873727311, + "grad_norm": 8.126195268243293, + "learning_rate": 9.668136575833213e-06, + "loss": 18.4532, + "step": 7831 + }, + { + "epoch": 0.14316266656917762, + "grad_norm": 6.292462249411014, + "learning_rate": 9.668030522769289e-06, + "loss": 17.4285, + "step": 7832 + }, + { + "epoch": 0.14318094576562415, + "grad_norm": 6.387665515722563, + "learning_rate": 9.667924453344324e-06, + "loss": 17.534, + "step": 7833 + }, + { + "epoch": 0.14319922496207066, + "grad_norm": 5.734749013084801, + "learning_rate": 9.667818367558687e-06, + "loss": 17.2402, + "step": 7834 + }, + { + "epoch": 0.1432175041585172, + "grad_norm": 6.849385585121317, + "learning_rate": 9.667712265412751e-06, + "loss": 18.0146, + "step": 7835 + }, + { + "epoch": 0.1432357833549637, + "grad_norm": 7.558442824431708, + "learning_rate": 9.667606146906892e-06, + "loss": 17.546, + "step": 7836 + }, + { + "epoch": 0.14325406255141024, + "grad_norm": 7.460597625231734, + "learning_rate": 9.667500012041476e-06, + "loss": 17.6715, + "step": 7837 + }, + { + "epoch": 0.14327234174785677, + "grad_norm": 7.270287286908869, + "learning_rate": 9.667393860816878e-06, + "loss": 17.7303, + "step": 7838 + }, + { + "epoch": 0.14329062094430328, + "grad_norm": 6.437807876874178, + "learning_rate": 9.667287693233471e-06, + "loss": 17.6004, + "step": 7839 + }, + { + "epoch": 0.14330890014074982, + "grad_norm": 7.281635820821214, + "learning_rate": 9.667181509291623e-06, + "loss": 17.8611, + "step": 7840 + }, + { + "epoch": 0.14332717933719633, + "grad_norm": 9.048553612734262, + "learning_rate": 9.66707530899171e-06, + "loss": 18.4027, + "step": 7841 + }, + { + "epoch": 0.14334545853364286, + "grad_norm": 8.407622272493642, + "learning_rate": 9.666969092334104e-06, + "loss": 18.317, + "step": 7842 + }, + { + "epoch": 0.1433637377300894, + "grad_norm": 6.741525791108761, + "learning_rate": 9.666862859319175e-06, + "loss": 17.5255, + "step": 7843 + }, + { + "epoch": 0.1433820169265359, + "grad_norm": 7.334684755636911, + "learning_rate": 9.666756609947297e-06, + "loss": 17.7949, + "step": 7844 + }, + { + "epoch": 0.14340029612298244, + "grad_norm": 6.222155132693972, + "learning_rate": 9.666650344218842e-06, + "loss": 17.4305, + "step": 7845 + }, + { + "epoch": 0.14341857531942895, + "grad_norm": 7.249972428703729, + "learning_rate": 9.666544062134182e-06, + "loss": 17.6051, + "step": 7846 + }, + { + "epoch": 0.14343685451587548, + "grad_norm": 6.377084097167007, + "learning_rate": 9.666437763693691e-06, + "loss": 17.5562, + "step": 7847 + }, + { + "epoch": 0.14345513371232202, + "grad_norm": 7.704324039458236, + "learning_rate": 9.66633144889774e-06, + "loss": 18.0369, + "step": 7848 + }, + { + "epoch": 0.14347341290876853, + "grad_norm": 8.830517083057435, + "learning_rate": 9.666225117746703e-06, + "loss": 18.7811, + "step": 7849 + }, + { + "epoch": 0.14349169210521506, + "grad_norm": 7.704071862496725, + "learning_rate": 9.66611877024095e-06, + "loss": 17.5736, + "step": 7850 + }, + { + "epoch": 0.14350997130166157, + "grad_norm": 6.587411827645895, + "learning_rate": 9.666012406380858e-06, + "loss": 17.5842, + "step": 7851 + }, + { + "epoch": 0.1435282504981081, + "grad_norm": 10.814748016890894, + "learning_rate": 9.665906026166796e-06, + "loss": 19.9485, + "step": 7852 + }, + { + "epoch": 0.14354652969455461, + "grad_norm": 6.428524945881435, + "learning_rate": 9.66579962959914e-06, + "loss": 17.6798, + "step": 7853 + }, + { + "epoch": 0.14356480889100115, + "grad_norm": 7.238464464725674, + "learning_rate": 9.665693216678259e-06, + "loss": 17.9204, + "step": 7854 + }, + { + "epoch": 0.14358308808744769, + "grad_norm": 7.525942968671081, + "learning_rate": 9.665586787404528e-06, + "loss": 18.2486, + "step": 7855 + }, + { + "epoch": 0.1436013672838942, + "grad_norm": 6.829776281861548, + "learning_rate": 9.665480341778322e-06, + "loss": 17.51, + "step": 7856 + }, + { + "epoch": 0.14361964648034073, + "grad_norm": 7.571890437381861, + "learning_rate": 9.66537387980001e-06, + "loss": 18.1075, + "step": 7857 + }, + { + "epoch": 0.14363792567678724, + "grad_norm": 7.484927683844003, + "learning_rate": 9.66526740146997e-06, + "loss": 17.8707, + "step": 7858 + }, + { + "epoch": 0.14365620487323377, + "grad_norm": 8.00425417309483, + "learning_rate": 9.665160906788571e-06, + "loss": 18.1998, + "step": 7859 + }, + { + "epoch": 0.1436744840696803, + "grad_norm": 7.277403688336308, + "learning_rate": 9.665054395756188e-06, + "loss": 17.9342, + "step": 7860 + }, + { + "epoch": 0.14369276326612682, + "grad_norm": 7.344833164501549, + "learning_rate": 9.664947868373195e-06, + "loss": 17.8342, + "step": 7861 + }, + { + "epoch": 0.14371104246257335, + "grad_norm": 7.627254165312603, + "learning_rate": 9.664841324639963e-06, + "loss": 18.0097, + "step": 7862 + }, + { + "epoch": 0.14372932165901986, + "grad_norm": 6.655124794610509, + "learning_rate": 9.664734764556869e-06, + "loss": 17.5146, + "step": 7863 + }, + { + "epoch": 0.1437476008554664, + "grad_norm": 7.226636848068177, + "learning_rate": 9.664628188124282e-06, + "loss": 17.9697, + "step": 7864 + }, + { + "epoch": 0.14376588005191293, + "grad_norm": 7.629781323015025, + "learning_rate": 9.66452159534258e-06, + "loss": 18.1848, + "step": 7865 + }, + { + "epoch": 0.14378415924835944, + "grad_norm": 6.377562023438023, + "learning_rate": 9.664414986212134e-06, + "loss": 17.3943, + "step": 7866 + }, + { + "epoch": 0.14380243844480597, + "grad_norm": 7.888258891692893, + "learning_rate": 9.664308360733316e-06, + "loss": 18.0158, + "step": 7867 + }, + { + "epoch": 0.14382071764125248, + "grad_norm": 6.701332911179065, + "learning_rate": 9.664201718906506e-06, + "loss": 17.4241, + "step": 7868 + }, + { + "epoch": 0.14383899683769902, + "grad_norm": 6.929538947451322, + "learning_rate": 9.66409506073207e-06, + "loss": 17.7239, + "step": 7869 + }, + { + "epoch": 0.14385727603414553, + "grad_norm": 7.044341210749867, + "learning_rate": 9.663988386210388e-06, + "loss": 17.8299, + "step": 7870 + }, + { + "epoch": 0.14387555523059206, + "grad_norm": 6.927395988763612, + "learning_rate": 9.66388169534183e-06, + "loss": 17.7399, + "step": 7871 + }, + { + "epoch": 0.1438938344270386, + "grad_norm": 6.678235413763966, + "learning_rate": 9.663774988126772e-06, + "loss": 17.2341, + "step": 7872 + }, + { + "epoch": 0.1439121136234851, + "grad_norm": 6.617014339663367, + "learning_rate": 9.663668264565589e-06, + "loss": 17.5994, + "step": 7873 + }, + { + "epoch": 0.14393039281993164, + "grad_norm": 6.135451945627378, + "learning_rate": 9.663561524658652e-06, + "loss": 17.3528, + "step": 7874 + }, + { + "epoch": 0.14394867201637815, + "grad_norm": 10.054335633136397, + "learning_rate": 9.663454768406335e-06, + "loss": 17.8716, + "step": 7875 + }, + { + "epoch": 0.14396695121282468, + "grad_norm": 7.821853537649704, + "learning_rate": 9.663347995809016e-06, + "loss": 17.5585, + "step": 7876 + }, + { + "epoch": 0.14398523040927122, + "grad_norm": 6.6592194858021765, + "learning_rate": 9.663241206867065e-06, + "loss": 17.5602, + "step": 7877 + }, + { + "epoch": 0.14400350960571773, + "grad_norm": 7.406957218568989, + "learning_rate": 9.66313440158086e-06, + "loss": 17.6681, + "step": 7878 + }, + { + "epoch": 0.14402178880216426, + "grad_norm": 7.183931433865366, + "learning_rate": 9.663027579950771e-06, + "loss": 17.731, + "step": 7879 + }, + { + "epoch": 0.14404006799861077, + "grad_norm": 6.795145994337344, + "learning_rate": 9.662920741977177e-06, + "loss": 17.5502, + "step": 7880 + }, + { + "epoch": 0.1440583471950573, + "grad_norm": 7.425098615702723, + "learning_rate": 9.662813887660451e-06, + "loss": 17.3886, + "step": 7881 + }, + { + "epoch": 0.14407662639150384, + "grad_norm": 7.467302204427384, + "learning_rate": 9.662707017000967e-06, + "loss": 18.0887, + "step": 7882 + }, + { + "epoch": 0.14409490558795035, + "grad_norm": 8.129398788716378, + "learning_rate": 9.662600129999098e-06, + "loss": 18.5446, + "step": 7883 + }, + { + "epoch": 0.14411318478439689, + "grad_norm": 6.196146066413878, + "learning_rate": 9.66249322665522e-06, + "loss": 17.2915, + "step": 7884 + }, + { + "epoch": 0.1441314639808434, + "grad_norm": 6.0383151707049025, + "learning_rate": 9.662386306969708e-06, + "loss": 17.2602, + "step": 7885 + }, + { + "epoch": 0.14414974317728993, + "grad_norm": 6.422942706740273, + "learning_rate": 9.66227937094294e-06, + "loss": 17.3709, + "step": 7886 + }, + { + "epoch": 0.14416802237373644, + "grad_norm": 7.858451025377491, + "learning_rate": 9.662172418575284e-06, + "loss": 18.1276, + "step": 7887 + }, + { + "epoch": 0.14418630157018297, + "grad_norm": 7.249390263111178, + "learning_rate": 9.662065449867117e-06, + "loss": 17.5421, + "step": 7888 + }, + { + "epoch": 0.1442045807666295, + "grad_norm": 7.125012968987952, + "learning_rate": 9.661958464818818e-06, + "loss": 17.6351, + "step": 7889 + }, + { + "epoch": 0.14422285996307602, + "grad_norm": 6.6766843944047745, + "learning_rate": 9.661851463430757e-06, + "loss": 17.5045, + "step": 7890 + }, + { + "epoch": 0.14424113915952255, + "grad_norm": 8.734256701885561, + "learning_rate": 9.661744445703314e-06, + "loss": 18.4634, + "step": 7891 + }, + { + "epoch": 0.14425941835596906, + "grad_norm": 6.883130009873722, + "learning_rate": 9.661637411636859e-06, + "loss": 17.9115, + "step": 7892 + }, + { + "epoch": 0.1442776975524156, + "grad_norm": 7.642350001935957, + "learning_rate": 9.66153036123177e-06, + "loss": 18.0374, + "step": 7893 + }, + { + "epoch": 0.14429597674886213, + "grad_norm": 7.02852331794176, + "learning_rate": 9.66142329448842e-06, + "loss": 17.8951, + "step": 7894 + }, + { + "epoch": 0.14431425594530864, + "grad_norm": 8.516363209070965, + "learning_rate": 9.66131621140719e-06, + "loss": 18.2965, + "step": 7895 + }, + { + "epoch": 0.14433253514175517, + "grad_norm": 6.603701088154492, + "learning_rate": 9.661209111988448e-06, + "loss": 17.6132, + "step": 7896 + }, + { + "epoch": 0.14435081433820168, + "grad_norm": 6.540115720505726, + "learning_rate": 9.661101996232572e-06, + "loss": 17.4681, + "step": 7897 + }, + { + "epoch": 0.14436909353464822, + "grad_norm": 6.274607422961523, + "learning_rate": 9.66099486413994e-06, + "loss": 17.4182, + "step": 7898 + }, + { + "epoch": 0.14438737273109475, + "grad_norm": 6.738978375188624, + "learning_rate": 9.660887715710923e-06, + "loss": 17.4482, + "step": 7899 + }, + { + "epoch": 0.14440565192754126, + "grad_norm": 7.9156443709594555, + "learning_rate": 9.6607805509459e-06, + "loss": 18.0224, + "step": 7900 + }, + { + "epoch": 0.1444239311239878, + "grad_norm": 6.811895160844932, + "learning_rate": 9.660673369845247e-06, + "loss": 17.6024, + "step": 7901 + }, + { + "epoch": 0.1444422103204343, + "grad_norm": 6.721687375262563, + "learning_rate": 9.660566172409339e-06, + "loss": 17.6821, + "step": 7902 + }, + { + "epoch": 0.14446048951688084, + "grad_norm": 7.7652829502734955, + "learning_rate": 9.660458958638547e-06, + "loss": 17.9376, + "step": 7903 + }, + { + "epoch": 0.14447876871332735, + "grad_norm": 8.002762225554621, + "learning_rate": 9.660351728533256e-06, + "loss": 18.1968, + "step": 7904 + }, + { + "epoch": 0.14449704790977388, + "grad_norm": 6.918270285826691, + "learning_rate": 9.660244482093833e-06, + "loss": 17.6539, + "step": 7905 + }, + { + "epoch": 0.14451532710622042, + "grad_norm": 8.26402368486267, + "learning_rate": 9.660137219320658e-06, + "loss": 18.4072, + "step": 7906 + }, + { + "epoch": 0.14453360630266693, + "grad_norm": 7.355595966344457, + "learning_rate": 9.660029940214107e-06, + "loss": 18.0655, + "step": 7907 + }, + { + "epoch": 0.14455188549911346, + "grad_norm": 6.551187241907956, + "learning_rate": 9.659922644774555e-06, + "loss": 17.6814, + "step": 7908 + }, + { + "epoch": 0.14457016469555997, + "grad_norm": 6.360395957826397, + "learning_rate": 9.659815333002378e-06, + "loss": 17.6062, + "step": 7909 + }, + { + "epoch": 0.1445884438920065, + "grad_norm": 6.073180624066442, + "learning_rate": 9.659708004897953e-06, + "loss": 17.4986, + "step": 7910 + }, + { + "epoch": 0.14460672308845304, + "grad_norm": 7.76183195591155, + "learning_rate": 9.659600660461657e-06, + "loss": 18.102, + "step": 7911 + }, + { + "epoch": 0.14462500228489955, + "grad_norm": 8.64672075810826, + "learning_rate": 9.659493299693862e-06, + "loss": 17.8132, + "step": 7912 + }, + { + "epoch": 0.1446432814813461, + "grad_norm": 6.318985302818086, + "learning_rate": 9.65938592259495e-06, + "loss": 17.5563, + "step": 7913 + }, + { + "epoch": 0.1446615606777926, + "grad_norm": 6.467317018488494, + "learning_rate": 9.659278529165295e-06, + "loss": 17.3215, + "step": 7914 + }, + { + "epoch": 0.14467983987423913, + "grad_norm": 6.232911434779717, + "learning_rate": 9.659171119405272e-06, + "loss": 17.7199, + "step": 7915 + }, + { + "epoch": 0.14469811907068567, + "grad_norm": 6.687040938801778, + "learning_rate": 9.659063693315259e-06, + "loss": 17.5385, + "step": 7916 + }, + { + "epoch": 0.14471639826713217, + "grad_norm": 7.415660908423637, + "learning_rate": 9.658956250895631e-06, + "loss": 17.9783, + "step": 7917 + }, + { + "epoch": 0.1447346774635787, + "grad_norm": 7.98814069842345, + "learning_rate": 9.658848792146767e-06, + "loss": 18.0066, + "step": 7918 + }, + { + "epoch": 0.14475295666002522, + "grad_norm": 7.844484202682719, + "learning_rate": 9.658741317069042e-06, + "loss": 17.8192, + "step": 7919 + }, + { + "epoch": 0.14477123585647175, + "grad_norm": 6.42135409314389, + "learning_rate": 9.65863382566283e-06, + "loss": 17.5073, + "step": 7920 + }, + { + "epoch": 0.14478951505291826, + "grad_norm": 7.155972754662482, + "learning_rate": 9.658526317928515e-06, + "loss": 17.7637, + "step": 7921 + }, + { + "epoch": 0.1448077942493648, + "grad_norm": 7.226949915611771, + "learning_rate": 9.658418793866468e-06, + "loss": 17.7123, + "step": 7922 + }, + { + "epoch": 0.14482607344581133, + "grad_norm": 6.366337750877327, + "learning_rate": 9.658311253477066e-06, + "loss": 17.652, + "step": 7923 + }, + { + "epoch": 0.14484435264225784, + "grad_norm": 7.225688591426677, + "learning_rate": 9.658203696760688e-06, + "loss": 17.9749, + "step": 7924 + }, + { + "epoch": 0.14486263183870438, + "grad_norm": 7.059189808583485, + "learning_rate": 9.658096123717713e-06, + "loss": 17.8764, + "step": 7925 + }, + { + "epoch": 0.14488091103515088, + "grad_norm": 6.54725420805, + "learning_rate": 9.65798853434851e-06, + "loss": 17.533, + "step": 7926 + }, + { + "epoch": 0.14489919023159742, + "grad_norm": 8.026008665268165, + "learning_rate": 9.657880928653465e-06, + "loss": 18.3109, + "step": 7927 + }, + { + "epoch": 0.14491746942804395, + "grad_norm": 9.46751004244742, + "learning_rate": 9.657773306632951e-06, + "loss": 18.9406, + "step": 7928 + }, + { + "epoch": 0.14493574862449046, + "grad_norm": 6.59519055352195, + "learning_rate": 9.657665668287345e-06, + "loss": 17.6014, + "step": 7929 + }, + { + "epoch": 0.144954027820937, + "grad_norm": 7.99155854136064, + "learning_rate": 9.657558013617028e-06, + "loss": 18.1103, + "step": 7930 + }, + { + "epoch": 0.1449723070173835, + "grad_norm": 6.067982080334245, + "learning_rate": 9.657450342622371e-06, + "loss": 17.3093, + "step": 7931 + }, + { + "epoch": 0.14499058621383004, + "grad_norm": 6.81035303639027, + "learning_rate": 9.657342655303756e-06, + "loss": 17.6884, + "step": 7932 + }, + { + "epoch": 0.14500886541027658, + "grad_norm": 7.270363925155759, + "learning_rate": 9.657234951661558e-06, + "loss": 17.8257, + "step": 7933 + }, + { + "epoch": 0.14502714460672309, + "grad_norm": 5.945637240185427, + "learning_rate": 9.657127231696157e-06, + "loss": 17.4792, + "step": 7934 + }, + { + "epoch": 0.14504542380316962, + "grad_norm": 6.75617973397292, + "learning_rate": 9.657019495407929e-06, + "loss": 17.554, + "step": 7935 + }, + { + "epoch": 0.14506370299961613, + "grad_norm": 6.7116586245694405, + "learning_rate": 9.65691174279725e-06, + "loss": 17.6093, + "step": 7936 + }, + { + "epoch": 0.14508198219606266, + "grad_norm": 6.064911541197413, + "learning_rate": 9.656803973864502e-06, + "loss": 17.1471, + "step": 7937 + }, + { + "epoch": 0.14510026139250917, + "grad_norm": 8.827844568819508, + "learning_rate": 9.656696188610059e-06, + "loss": 18.6249, + "step": 7938 + }, + { + "epoch": 0.1451185405889557, + "grad_norm": 6.6565575863675965, + "learning_rate": 9.656588387034301e-06, + "loss": 17.5621, + "step": 7939 + }, + { + "epoch": 0.14513681978540224, + "grad_norm": 6.424663743895477, + "learning_rate": 9.656480569137602e-06, + "loss": 17.4067, + "step": 7940 + }, + { + "epoch": 0.14515509898184875, + "grad_norm": 7.788567478831424, + "learning_rate": 9.656372734920345e-06, + "loss": 18.0129, + "step": 7941 + }, + { + "epoch": 0.1451733781782953, + "grad_norm": 7.143982819053273, + "learning_rate": 9.656264884382905e-06, + "loss": 17.9295, + "step": 7942 + }, + { + "epoch": 0.1451916573747418, + "grad_norm": 9.235043443693883, + "learning_rate": 9.65615701752566e-06, + "loss": 17.9051, + "step": 7943 + }, + { + "epoch": 0.14520993657118833, + "grad_norm": 6.128521553350904, + "learning_rate": 9.65604913434899e-06, + "loss": 17.4178, + "step": 7944 + }, + { + "epoch": 0.14522821576763487, + "grad_norm": 6.962582465150469, + "learning_rate": 9.655941234853272e-06, + "loss": 17.841, + "step": 7945 + }, + { + "epoch": 0.14524649496408137, + "grad_norm": 6.385896950318575, + "learning_rate": 9.655833319038883e-06, + "loss": 17.4645, + "step": 7946 + }, + { + "epoch": 0.1452647741605279, + "grad_norm": 7.562271523019747, + "learning_rate": 9.655725386906202e-06, + "loss": 18.0817, + "step": 7947 + }, + { + "epoch": 0.14528305335697442, + "grad_norm": 7.199204285470184, + "learning_rate": 9.655617438455608e-06, + "loss": 17.69, + "step": 7948 + }, + { + "epoch": 0.14530133255342095, + "grad_norm": 6.840929518434979, + "learning_rate": 9.655509473687479e-06, + "loss": 17.4528, + "step": 7949 + }, + { + "epoch": 0.1453196117498675, + "grad_norm": 6.711427011156019, + "learning_rate": 9.655401492602192e-06, + "loss": 17.4159, + "step": 7950 + }, + { + "epoch": 0.145337890946314, + "grad_norm": 5.965039206783731, + "learning_rate": 9.655293495200128e-06, + "loss": 17.2832, + "step": 7951 + }, + { + "epoch": 0.14535617014276053, + "grad_norm": 7.310858054849152, + "learning_rate": 9.655185481481663e-06, + "loss": 17.6517, + "step": 7952 + }, + { + "epoch": 0.14537444933920704, + "grad_norm": 6.977513673077216, + "learning_rate": 9.655077451447179e-06, + "loss": 17.6552, + "step": 7953 + }, + { + "epoch": 0.14539272853565358, + "grad_norm": 5.821779274738296, + "learning_rate": 9.654969405097053e-06, + "loss": 17.3532, + "step": 7954 + }, + { + "epoch": 0.14541100773210008, + "grad_norm": 6.758713108971858, + "learning_rate": 9.654861342431661e-06, + "loss": 17.5775, + "step": 7955 + }, + { + "epoch": 0.14542928692854662, + "grad_norm": 7.863685222010629, + "learning_rate": 9.654753263451385e-06, + "loss": 17.8642, + "step": 7956 + }, + { + "epoch": 0.14544756612499316, + "grad_norm": 9.153913203782722, + "learning_rate": 9.654645168156601e-06, + "loss": 18.5971, + "step": 7957 + }, + { + "epoch": 0.14546584532143966, + "grad_norm": 6.745258601713347, + "learning_rate": 9.654537056547691e-06, + "loss": 17.6042, + "step": 7958 + }, + { + "epoch": 0.1454841245178862, + "grad_norm": 5.9247000548100734, + "learning_rate": 9.654428928625033e-06, + "loss": 17.271, + "step": 7959 + }, + { + "epoch": 0.1455024037143327, + "grad_norm": 7.441009387067109, + "learning_rate": 9.654320784389004e-06, + "loss": 17.9059, + "step": 7960 + }, + { + "epoch": 0.14552068291077924, + "grad_norm": 6.891244859298569, + "learning_rate": 9.654212623839985e-06, + "loss": 18.0463, + "step": 7961 + }, + { + "epoch": 0.14553896210722578, + "grad_norm": 6.297664198439215, + "learning_rate": 9.654104446978357e-06, + "loss": 17.7346, + "step": 7962 + }, + { + "epoch": 0.14555724130367229, + "grad_norm": 8.043669055116208, + "learning_rate": 9.653996253804493e-06, + "loss": 17.9576, + "step": 7963 + }, + { + "epoch": 0.14557552050011882, + "grad_norm": 7.57704490359053, + "learning_rate": 9.653888044318778e-06, + "loss": 18.1225, + "step": 7964 + }, + { + "epoch": 0.14559379969656533, + "grad_norm": 7.788256163236495, + "learning_rate": 9.65377981852159e-06, + "loss": 17.9134, + "step": 7965 + }, + { + "epoch": 0.14561207889301186, + "grad_norm": 6.710542169959897, + "learning_rate": 9.653671576413306e-06, + "loss": 17.732, + "step": 7966 + }, + { + "epoch": 0.1456303580894584, + "grad_norm": 6.024146392515028, + "learning_rate": 9.653563317994307e-06, + "loss": 17.4358, + "step": 7967 + }, + { + "epoch": 0.1456486372859049, + "grad_norm": 6.633504266152885, + "learning_rate": 9.653455043264974e-06, + "loss": 17.4505, + "step": 7968 + }, + { + "epoch": 0.14566691648235144, + "grad_norm": 6.211227687837813, + "learning_rate": 9.653346752225683e-06, + "loss": 17.4499, + "step": 7969 + }, + { + "epoch": 0.14568519567879795, + "grad_norm": 8.400148275059234, + "learning_rate": 9.653238444876817e-06, + "loss": 18.3171, + "step": 7970 + }, + { + "epoch": 0.1457034748752445, + "grad_norm": 7.739331881310937, + "learning_rate": 9.653130121218754e-06, + "loss": 18.3042, + "step": 7971 + }, + { + "epoch": 0.145721754071691, + "grad_norm": 7.955012682984678, + "learning_rate": 9.653021781251872e-06, + "loss": 17.9062, + "step": 7972 + }, + { + "epoch": 0.14574003326813753, + "grad_norm": 6.259595286305838, + "learning_rate": 9.652913424976553e-06, + "loss": 17.4814, + "step": 7973 + }, + { + "epoch": 0.14575831246458407, + "grad_norm": 6.078297879047728, + "learning_rate": 9.652805052393178e-06, + "loss": 17.4524, + "step": 7974 + }, + { + "epoch": 0.14577659166103057, + "grad_norm": 9.375932177975441, + "learning_rate": 9.652696663502123e-06, + "loss": 18.5682, + "step": 7975 + }, + { + "epoch": 0.1457948708574771, + "grad_norm": 6.257526122207114, + "learning_rate": 9.65258825830377e-06, + "loss": 17.4044, + "step": 7976 + }, + { + "epoch": 0.14581315005392362, + "grad_norm": 7.945139211966594, + "learning_rate": 9.652479836798501e-06, + "loss": 18.2119, + "step": 7977 + }, + { + "epoch": 0.14583142925037015, + "grad_norm": 7.520533804922809, + "learning_rate": 9.65237139898669e-06, + "loss": 17.8324, + "step": 7978 + }, + { + "epoch": 0.1458497084468167, + "grad_norm": 7.5445427347964, + "learning_rate": 9.652262944868724e-06, + "loss": 18.0339, + "step": 7979 + }, + { + "epoch": 0.1458679876432632, + "grad_norm": 5.744613699632315, + "learning_rate": 9.65215447444498e-06, + "loss": 17.1529, + "step": 7980 + }, + { + "epoch": 0.14588626683970973, + "grad_norm": 7.102725612497753, + "learning_rate": 9.652045987715838e-06, + "loss": 17.7757, + "step": 7981 + }, + { + "epoch": 0.14590454603615624, + "grad_norm": 9.217413539931432, + "learning_rate": 9.651937484681678e-06, + "loss": 18.5535, + "step": 7982 + }, + { + "epoch": 0.14592282523260278, + "grad_norm": 7.146597866539307, + "learning_rate": 9.651828965342882e-06, + "loss": 17.6874, + "step": 7983 + }, + { + "epoch": 0.1459411044290493, + "grad_norm": 5.622544955129633, + "learning_rate": 9.651720429699827e-06, + "loss": 17.1046, + "step": 7984 + }, + { + "epoch": 0.14595938362549582, + "grad_norm": 7.543782899563225, + "learning_rate": 9.651611877752897e-06, + "loss": 17.7099, + "step": 7985 + }, + { + "epoch": 0.14597766282194236, + "grad_norm": 7.342211067682582, + "learning_rate": 9.65150330950247e-06, + "loss": 17.8767, + "step": 7986 + }, + { + "epoch": 0.14599594201838886, + "grad_norm": 7.435391159299244, + "learning_rate": 9.651394724948929e-06, + "loss": 17.9265, + "step": 7987 + }, + { + "epoch": 0.1460142212148354, + "grad_norm": 6.28794682429356, + "learning_rate": 9.651286124092653e-06, + "loss": 17.5435, + "step": 7988 + }, + { + "epoch": 0.1460325004112819, + "grad_norm": 6.252197394803422, + "learning_rate": 9.651177506934022e-06, + "loss": 17.2206, + "step": 7989 + }, + { + "epoch": 0.14605077960772844, + "grad_norm": 7.09374348651782, + "learning_rate": 9.651068873473417e-06, + "loss": 17.8473, + "step": 7990 + }, + { + "epoch": 0.14606905880417498, + "grad_norm": 7.013619760961217, + "learning_rate": 9.65096022371122e-06, + "loss": 17.5454, + "step": 7991 + }, + { + "epoch": 0.14608733800062149, + "grad_norm": 7.179235417174337, + "learning_rate": 9.65085155764781e-06, + "loss": 17.66, + "step": 7992 + }, + { + "epoch": 0.14610561719706802, + "grad_norm": 5.4883918065536434, + "learning_rate": 9.65074287528357e-06, + "loss": 16.9709, + "step": 7993 + }, + { + "epoch": 0.14612389639351453, + "grad_norm": 8.699244438136956, + "learning_rate": 9.65063417661888e-06, + "loss": 18.3041, + "step": 7994 + }, + { + "epoch": 0.14614217558996107, + "grad_norm": 7.01955896623706, + "learning_rate": 9.65052546165412e-06, + "loss": 17.5045, + "step": 7995 + }, + { + "epoch": 0.1461604547864076, + "grad_norm": 7.715833247104848, + "learning_rate": 9.650416730389672e-06, + "loss": 18.2539, + "step": 7996 + }, + { + "epoch": 0.1461787339828541, + "grad_norm": 7.261767826837706, + "learning_rate": 9.650307982825917e-06, + "loss": 17.5952, + "step": 7997 + }, + { + "epoch": 0.14619701317930064, + "grad_norm": 6.385983497206502, + "learning_rate": 9.650199218963236e-06, + "loss": 17.6332, + "step": 7998 + }, + { + "epoch": 0.14621529237574715, + "grad_norm": 8.344318378430048, + "learning_rate": 9.650090438802012e-06, + "loss": 18.0755, + "step": 7999 + }, + { + "epoch": 0.1462335715721937, + "grad_norm": 6.472024326689775, + "learning_rate": 9.649981642342621e-06, + "loss": 17.4189, + "step": 8000 + }, + { + "epoch": 0.14625185076864022, + "grad_norm": 6.715048457193912, + "learning_rate": 9.64987282958545e-06, + "loss": 17.6987, + "step": 8001 + }, + { + "epoch": 0.14627012996508673, + "grad_norm": 6.13079996475156, + "learning_rate": 9.649764000530878e-06, + "loss": 17.2704, + "step": 8002 + }, + { + "epoch": 0.14628840916153327, + "grad_norm": 6.671879485010782, + "learning_rate": 9.649655155179287e-06, + "loss": 17.36, + "step": 8003 + }, + { + "epoch": 0.14630668835797978, + "grad_norm": 7.370770516624002, + "learning_rate": 9.649546293531057e-06, + "loss": 17.542, + "step": 8004 + }, + { + "epoch": 0.1463249675544263, + "grad_norm": 8.079342234334176, + "learning_rate": 9.64943741558657e-06, + "loss": 18.2899, + "step": 8005 + }, + { + "epoch": 0.14634324675087282, + "grad_norm": 6.903482231699645, + "learning_rate": 9.64932852134621e-06, + "loss": 17.8285, + "step": 8006 + }, + { + "epoch": 0.14636152594731935, + "grad_norm": 10.070179892659032, + "learning_rate": 9.649219610810359e-06, + "loss": 18.9447, + "step": 8007 + }, + { + "epoch": 0.1463798051437659, + "grad_norm": 7.730014505390262, + "learning_rate": 9.649110683979394e-06, + "loss": 18.2886, + "step": 8008 + }, + { + "epoch": 0.1463980843402124, + "grad_norm": 8.30281918365547, + "learning_rate": 9.6490017408537e-06, + "loss": 18.4275, + "step": 8009 + }, + { + "epoch": 0.14641636353665893, + "grad_norm": 7.1197873864420895, + "learning_rate": 9.648892781433657e-06, + "loss": 17.7806, + "step": 8010 + }, + { + "epoch": 0.14643464273310544, + "grad_norm": 8.427168269867161, + "learning_rate": 9.64878380571965e-06, + "loss": 18.1895, + "step": 8011 + }, + { + "epoch": 0.14645292192955198, + "grad_norm": 6.783781928597438, + "learning_rate": 9.648674813712059e-06, + "loss": 17.9056, + "step": 8012 + }, + { + "epoch": 0.1464712011259985, + "grad_norm": 6.678274522360001, + "learning_rate": 9.648565805411265e-06, + "loss": 17.4284, + "step": 8013 + }, + { + "epoch": 0.14648948032244502, + "grad_norm": 7.8352790288474585, + "learning_rate": 9.648456780817651e-06, + "loss": 18.2195, + "step": 8014 + }, + { + "epoch": 0.14650775951889156, + "grad_norm": 8.09375938615011, + "learning_rate": 9.648347739931603e-06, + "loss": 17.9966, + "step": 8015 + }, + { + "epoch": 0.14652603871533806, + "grad_norm": 7.659916058842947, + "learning_rate": 9.648238682753497e-06, + "loss": 17.8873, + "step": 8016 + }, + { + "epoch": 0.1465443179117846, + "grad_norm": 5.915365313583533, + "learning_rate": 9.648129609283716e-06, + "loss": 17.3144, + "step": 8017 + }, + { + "epoch": 0.14656259710823114, + "grad_norm": 7.193445790671711, + "learning_rate": 9.648020519522647e-06, + "loss": 17.7632, + "step": 8018 + }, + { + "epoch": 0.14658087630467764, + "grad_norm": 7.0641124489645, + "learning_rate": 9.647911413470668e-06, + "loss": 17.9136, + "step": 8019 + }, + { + "epoch": 0.14659915550112418, + "grad_norm": 5.959791630226095, + "learning_rate": 9.647802291128163e-06, + "loss": 17.2778, + "step": 8020 + }, + { + "epoch": 0.1466174346975707, + "grad_norm": 6.2009743065833485, + "learning_rate": 9.647693152495514e-06, + "loss": 17.4677, + "step": 8021 + }, + { + "epoch": 0.14663571389401722, + "grad_norm": 7.8164225640752045, + "learning_rate": 9.647583997573105e-06, + "loss": 18.0936, + "step": 8022 + }, + { + "epoch": 0.14665399309046373, + "grad_norm": 5.6426609914409065, + "learning_rate": 9.647474826361316e-06, + "loss": 17.1358, + "step": 8023 + }, + { + "epoch": 0.14667227228691027, + "grad_norm": 7.281189933670641, + "learning_rate": 9.64736563886053e-06, + "loss": 18.0002, + "step": 8024 + }, + { + "epoch": 0.1466905514833568, + "grad_norm": 7.156884754375371, + "learning_rate": 9.647256435071133e-06, + "loss": 17.9063, + "step": 8025 + }, + { + "epoch": 0.1467088306798033, + "grad_norm": 5.953496097128827, + "learning_rate": 9.647147214993504e-06, + "loss": 17.1232, + "step": 8026 + }, + { + "epoch": 0.14672710987624984, + "grad_norm": 7.920627054244241, + "learning_rate": 9.647037978628029e-06, + "loss": 17.8285, + "step": 8027 + }, + { + "epoch": 0.14674538907269635, + "grad_norm": 10.69892625842698, + "learning_rate": 9.646928725975087e-06, + "loss": 18.6059, + "step": 8028 + }, + { + "epoch": 0.1467636682691429, + "grad_norm": 6.3435637466396715, + "learning_rate": 9.646819457035064e-06, + "loss": 17.3268, + "step": 8029 + }, + { + "epoch": 0.14678194746558942, + "grad_norm": 7.240777143096906, + "learning_rate": 9.646710171808342e-06, + "loss": 18.0841, + "step": 8030 + }, + { + "epoch": 0.14680022666203593, + "grad_norm": 8.367751790399302, + "learning_rate": 9.646600870295305e-06, + "loss": 18.0374, + "step": 8031 + }, + { + "epoch": 0.14681850585848247, + "grad_norm": 6.4560850582753275, + "learning_rate": 9.646491552496336e-06, + "loss": 17.5513, + "step": 8032 + }, + { + "epoch": 0.14683678505492898, + "grad_norm": 7.321443836457617, + "learning_rate": 9.646382218411813e-06, + "loss": 18.0615, + "step": 8033 + }, + { + "epoch": 0.1468550642513755, + "grad_norm": 6.3652537476119555, + "learning_rate": 9.646272868042129e-06, + "loss": 17.4165, + "step": 8034 + }, + { + "epoch": 0.14687334344782205, + "grad_norm": 7.594431029046291, + "learning_rate": 9.646163501387658e-06, + "loss": 17.8259, + "step": 8035 + }, + { + "epoch": 0.14689162264426855, + "grad_norm": 7.19503817395823, + "learning_rate": 9.646054118448787e-06, + "loss": 17.8467, + "step": 8036 + }, + { + "epoch": 0.1469099018407151, + "grad_norm": 10.580051274042303, + "learning_rate": 9.645944719225902e-06, + "loss": 18.1914, + "step": 8037 + }, + { + "epoch": 0.1469281810371616, + "grad_norm": 8.852314628875943, + "learning_rate": 9.645835303719382e-06, + "loss": 18.2564, + "step": 8038 + }, + { + "epoch": 0.14694646023360813, + "grad_norm": 6.427733925668023, + "learning_rate": 9.645725871929614e-06, + "loss": 17.3538, + "step": 8039 + }, + { + "epoch": 0.14696473943005464, + "grad_norm": 7.311157127937562, + "learning_rate": 9.645616423856978e-06, + "loss": 17.7997, + "step": 8040 + }, + { + "epoch": 0.14698301862650118, + "grad_norm": 6.070143885303128, + "learning_rate": 9.64550695950186e-06, + "loss": 17.2713, + "step": 8041 + }, + { + "epoch": 0.1470012978229477, + "grad_norm": 5.817498232664236, + "learning_rate": 9.645397478864645e-06, + "loss": 17.4812, + "step": 8042 + }, + { + "epoch": 0.14701957701939422, + "grad_norm": 6.67491314100426, + "learning_rate": 9.645287981945712e-06, + "loss": 17.652, + "step": 8043 + }, + { + "epoch": 0.14703785621584076, + "grad_norm": 7.288494360246087, + "learning_rate": 9.64517846874545e-06, + "loss": 18.0519, + "step": 8044 + }, + { + "epoch": 0.14705613541228726, + "grad_norm": 7.709262361894076, + "learning_rate": 9.64506893926424e-06, + "loss": 17.821, + "step": 8045 + }, + { + "epoch": 0.1470744146087338, + "grad_norm": 7.2491534881899415, + "learning_rate": 9.644959393502467e-06, + "loss": 17.957, + "step": 8046 + }, + { + "epoch": 0.14709269380518034, + "grad_norm": 8.27859495332456, + "learning_rate": 9.644849831460513e-06, + "loss": 18.3226, + "step": 8047 + }, + { + "epoch": 0.14711097300162684, + "grad_norm": 6.229448535308603, + "learning_rate": 9.644740253138765e-06, + "loss": 17.3666, + "step": 8048 + }, + { + "epoch": 0.14712925219807338, + "grad_norm": 8.281485590991695, + "learning_rate": 9.644630658537604e-06, + "loss": 18.089, + "step": 8049 + }, + { + "epoch": 0.1471475313945199, + "grad_norm": 7.260511924938508, + "learning_rate": 9.644521047657416e-06, + "loss": 17.8782, + "step": 8050 + }, + { + "epoch": 0.14716581059096642, + "grad_norm": 8.075624424086456, + "learning_rate": 9.644411420498585e-06, + "loss": 17.9388, + "step": 8051 + }, + { + "epoch": 0.14718408978741296, + "grad_norm": 7.205715603077248, + "learning_rate": 9.644301777061495e-06, + "loss": 18.0204, + "step": 8052 + }, + { + "epoch": 0.14720236898385947, + "grad_norm": 7.077535799197512, + "learning_rate": 9.64419211734653e-06, + "loss": 17.7748, + "step": 8053 + }, + { + "epoch": 0.147220648180306, + "grad_norm": 9.997143239685304, + "learning_rate": 9.644082441354075e-06, + "loss": 17.8055, + "step": 8054 + }, + { + "epoch": 0.1472389273767525, + "grad_norm": 8.951299796489199, + "learning_rate": 9.643972749084513e-06, + "loss": 18.4639, + "step": 8055 + }, + { + "epoch": 0.14725720657319905, + "grad_norm": 6.512609840491759, + "learning_rate": 9.643863040538231e-06, + "loss": 17.4196, + "step": 8056 + }, + { + "epoch": 0.14727548576964555, + "grad_norm": 9.13164669058537, + "learning_rate": 9.64375331571561e-06, + "loss": 17.8644, + "step": 8057 + }, + { + "epoch": 0.1472937649660921, + "grad_norm": 6.442756046873929, + "learning_rate": 9.643643574617039e-06, + "loss": 17.5869, + "step": 8058 + }, + { + "epoch": 0.14731204416253862, + "grad_norm": 6.495695349937012, + "learning_rate": 9.6435338172429e-06, + "loss": 17.464, + "step": 8059 + }, + { + "epoch": 0.14733032335898513, + "grad_norm": 7.259579718403372, + "learning_rate": 9.643424043593576e-06, + "loss": 17.7688, + "step": 8060 + }, + { + "epoch": 0.14734860255543167, + "grad_norm": 7.712910100182315, + "learning_rate": 9.643314253669455e-06, + "loss": 17.8725, + "step": 8061 + }, + { + "epoch": 0.14736688175187818, + "grad_norm": 7.739286195484188, + "learning_rate": 9.643204447470922e-06, + "loss": 17.7906, + "step": 8062 + }, + { + "epoch": 0.1473851609483247, + "grad_norm": 7.248123211068903, + "learning_rate": 9.643094624998357e-06, + "loss": 17.7043, + "step": 8063 + }, + { + "epoch": 0.14740344014477125, + "grad_norm": 7.188458907414833, + "learning_rate": 9.64298478625215e-06, + "loss": 17.6625, + "step": 8064 + }, + { + "epoch": 0.14742171934121776, + "grad_norm": 7.148484029794683, + "learning_rate": 9.642874931232684e-06, + "loss": 17.8504, + "step": 8065 + }, + { + "epoch": 0.1474399985376643, + "grad_norm": 7.387959283440316, + "learning_rate": 9.642765059940344e-06, + "loss": 18.3822, + "step": 8066 + }, + { + "epoch": 0.1474582777341108, + "grad_norm": 8.856985975380033, + "learning_rate": 9.642655172375516e-06, + "loss": 18.7405, + "step": 8067 + }, + { + "epoch": 0.14747655693055733, + "grad_norm": 7.766467161611203, + "learning_rate": 9.642545268538585e-06, + "loss": 17.7352, + "step": 8068 + }, + { + "epoch": 0.14749483612700387, + "grad_norm": 6.911400134219339, + "learning_rate": 9.642435348429935e-06, + "loss": 17.7442, + "step": 8069 + }, + { + "epoch": 0.14751311532345038, + "grad_norm": 8.535640558915466, + "learning_rate": 9.642325412049952e-06, + "loss": 18.199, + "step": 8070 + }, + { + "epoch": 0.1475313945198969, + "grad_norm": 8.537335326600212, + "learning_rate": 9.64221545939902e-06, + "loss": 17.82, + "step": 8071 + }, + { + "epoch": 0.14754967371634342, + "grad_norm": 6.633873060369528, + "learning_rate": 9.642105490477527e-06, + "loss": 17.7843, + "step": 8072 + }, + { + "epoch": 0.14756795291278996, + "grad_norm": 6.4278340117422905, + "learning_rate": 9.641995505285858e-06, + "loss": 17.3694, + "step": 8073 + }, + { + "epoch": 0.14758623210923646, + "grad_norm": 7.069412562342773, + "learning_rate": 9.641885503824395e-06, + "loss": 18.1548, + "step": 8074 + }, + { + "epoch": 0.147604511305683, + "grad_norm": 6.767035618080706, + "learning_rate": 9.64177548609353e-06, + "loss": 17.9882, + "step": 8075 + }, + { + "epoch": 0.14762279050212954, + "grad_norm": 7.903839015408739, + "learning_rate": 9.641665452093641e-06, + "loss": 18.1572, + "step": 8076 + }, + { + "epoch": 0.14764106969857604, + "grad_norm": 7.2825054707225405, + "learning_rate": 9.641555401825118e-06, + "loss": 17.8129, + "step": 8077 + }, + { + "epoch": 0.14765934889502258, + "grad_norm": 6.761561047503617, + "learning_rate": 9.641445335288346e-06, + "loss": 17.7295, + "step": 8078 + }, + { + "epoch": 0.1476776280914691, + "grad_norm": 6.618816072053667, + "learning_rate": 9.641335252483712e-06, + "loss": 17.4507, + "step": 8079 + }, + { + "epoch": 0.14769590728791562, + "grad_norm": 6.487847127290106, + "learning_rate": 9.6412251534116e-06, + "loss": 17.3601, + "step": 8080 + }, + { + "epoch": 0.14771418648436216, + "grad_norm": 6.9250736911951725, + "learning_rate": 9.641115038072397e-06, + "loss": 17.8905, + "step": 8081 + }, + { + "epoch": 0.14773246568080867, + "grad_norm": 8.476983225022746, + "learning_rate": 9.641004906466488e-06, + "loss": 18.3762, + "step": 8082 + }, + { + "epoch": 0.1477507448772552, + "grad_norm": 8.761072999506217, + "learning_rate": 9.64089475859426e-06, + "loss": 18.624, + "step": 8083 + }, + { + "epoch": 0.1477690240737017, + "grad_norm": 6.328830632630538, + "learning_rate": 9.6407845944561e-06, + "loss": 17.3398, + "step": 8084 + }, + { + "epoch": 0.14778730327014825, + "grad_norm": 8.9338263657113, + "learning_rate": 9.640674414052391e-06, + "loss": 18.9664, + "step": 8085 + }, + { + "epoch": 0.14780558246659478, + "grad_norm": 7.661138891664235, + "learning_rate": 9.640564217383522e-06, + "loss": 18.3349, + "step": 8086 + }, + { + "epoch": 0.1478238616630413, + "grad_norm": 7.187970700208174, + "learning_rate": 9.640454004449877e-06, + "loss": 17.8618, + "step": 8087 + }, + { + "epoch": 0.14784214085948783, + "grad_norm": 5.871236125636451, + "learning_rate": 9.640343775251844e-06, + "loss": 17.2336, + "step": 8088 + }, + { + "epoch": 0.14786042005593433, + "grad_norm": 7.393317643417872, + "learning_rate": 9.640233529789806e-06, + "loss": 17.8336, + "step": 8089 + }, + { + "epoch": 0.14787869925238087, + "grad_norm": 9.451071523509363, + "learning_rate": 9.640123268064156e-06, + "loss": 18.0403, + "step": 8090 + }, + { + "epoch": 0.14789697844882738, + "grad_norm": 6.5374182020368, + "learning_rate": 9.640012990075274e-06, + "loss": 17.6703, + "step": 8091 + }, + { + "epoch": 0.1479152576452739, + "grad_norm": 8.233324766017134, + "learning_rate": 9.63990269582355e-06, + "loss": 17.9709, + "step": 8092 + }, + { + "epoch": 0.14793353684172045, + "grad_norm": 5.731050775772712, + "learning_rate": 9.63979238530937e-06, + "loss": 17.242, + "step": 8093 + }, + { + "epoch": 0.14795181603816696, + "grad_norm": 6.218482515209717, + "learning_rate": 9.63968205853312e-06, + "loss": 17.3829, + "step": 8094 + }, + { + "epoch": 0.1479700952346135, + "grad_norm": 6.736553892820769, + "learning_rate": 9.639571715495189e-06, + "loss": 17.6474, + "step": 8095 + }, + { + "epoch": 0.14798837443106, + "grad_norm": 7.513901910688545, + "learning_rate": 9.639461356195958e-06, + "loss": 18.1575, + "step": 8096 + }, + { + "epoch": 0.14800665362750653, + "grad_norm": 10.488684500822124, + "learning_rate": 9.63935098063582e-06, + "loss": 17.6842, + "step": 8097 + }, + { + "epoch": 0.14802493282395307, + "grad_norm": 8.081786160838991, + "learning_rate": 9.63924058881516e-06, + "loss": 18.2097, + "step": 8098 + }, + { + "epoch": 0.14804321202039958, + "grad_norm": 8.68145308215769, + "learning_rate": 9.639130180734362e-06, + "loss": 18.1371, + "step": 8099 + }, + { + "epoch": 0.14806149121684611, + "grad_norm": 8.067734968278177, + "learning_rate": 9.639019756393817e-06, + "loss": 17.8399, + "step": 8100 + }, + { + "epoch": 0.14807977041329262, + "grad_norm": 6.382101361188045, + "learning_rate": 9.63890931579391e-06, + "loss": 17.3079, + "step": 8101 + }, + { + "epoch": 0.14809804960973916, + "grad_norm": 5.183982865642566, + "learning_rate": 9.638798858935028e-06, + "loss": 16.8802, + "step": 8102 + }, + { + "epoch": 0.1481163288061857, + "grad_norm": 6.6951633909441535, + "learning_rate": 9.638688385817558e-06, + "loss": 17.7609, + "step": 8103 + }, + { + "epoch": 0.1481346080026322, + "grad_norm": 6.194876638611268, + "learning_rate": 9.63857789644189e-06, + "loss": 17.3454, + "step": 8104 + }, + { + "epoch": 0.14815288719907874, + "grad_norm": 5.317406037764646, + "learning_rate": 9.638467390808405e-06, + "loss": 17.1418, + "step": 8105 + }, + { + "epoch": 0.14817116639552524, + "grad_norm": 6.895413692636472, + "learning_rate": 9.638356868917497e-06, + "loss": 17.577, + "step": 8106 + }, + { + "epoch": 0.14818944559197178, + "grad_norm": 8.008443410440348, + "learning_rate": 9.638246330769552e-06, + "loss": 18.035, + "step": 8107 + }, + { + "epoch": 0.1482077247884183, + "grad_norm": 5.74120716951446, + "learning_rate": 9.638135776364954e-06, + "loss": 17.203, + "step": 8108 + }, + { + "epoch": 0.14822600398486482, + "grad_norm": 8.54913912805209, + "learning_rate": 9.638025205704094e-06, + "loss": 18.1013, + "step": 8109 + }, + { + "epoch": 0.14824428318131136, + "grad_norm": 8.312580974732796, + "learning_rate": 9.637914618787356e-06, + "loss": 18.1672, + "step": 8110 + }, + { + "epoch": 0.14826256237775787, + "grad_norm": 6.738350444607828, + "learning_rate": 9.63780401561513e-06, + "loss": 17.7801, + "step": 8111 + }, + { + "epoch": 0.1482808415742044, + "grad_norm": 7.886413767494075, + "learning_rate": 9.637693396187806e-06, + "loss": 17.9368, + "step": 8112 + }, + { + "epoch": 0.1482991207706509, + "grad_norm": 7.875815936487451, + "learning_rate": 9.637582760505767e-06, + "loss": 17.3937, + "step": 8113 + }, + { + "epoch": 0.14831739996709745, + "grad_norm": 6.596152726567714, + "learning_rate": 9.637472108569404e-06, + "loss": 17.5027, + "step": 8114 + }, + { + "epoch": 0.14833567916354398, + "grad_norm": 6.710516305595572, + "learning_rate": 9.637361440379102e-06, + "loss": 17.8169, + "step": 8115 + }, + { + "epoch": 0.1483539583599905, + "grad_norm": 6.928265849809754, + "learning_rate": 9.637250755935252e-06, + "loss": 17.3836, + "step": 8116 + }, + { + "epoch": 0.14837223755643703, + "grad_norm": 6.500901711677311, + "learning_rate": 9.63714005523824e-06, + "loss": 17.4885, + "step": 8117 + }, + { + "epoch": 0.14839051675288353, + "grad_norm": 7.405538827349513, + "learning_rate": 9.637029338288454e-06, + "loss": 17.837, + "step": 8118 + }, + { + "epoch": 0.14840879594933007, + "grad_norm": 7.252059624928057, + "learning_rate": 9.636918605086283e-06, + "loss": 17.5855, + "step": 8119 + }, + { + "epoch": 0.1484270751457766, + "grad_norm": 6.926791768096578, + "learning_rate": 9.636807855632115e-06, + "loss": 17.6411, + "step": 8120 + }, + { + "epoch": 0.1484453543422231, + "grad_norm": 6.5719604807225895, + "learning_rate": 9.636697089926338e-06, + "loss": 17.5774, + "step": 8121 + }, + { + "epoch": 0.14846363353866965, + "grad_norm": 6.662271402853505, + "learning_rate": 9.636586307969338e-06, + "loss": 17.7661, + "step": 8122 + }, + { + "epoch": 0.14848191273511616, + "grad_norm": 7.132505202028377, + "learning_rate": 9.636475509761507e-06, + "loss": 17.6119, + "step": 8123 + }, + { + "epoch": 0.1485001919315627, + "grad_norm": 8.094454678992484, + "learning_rate": 9.636364695303234e-06, + "loss": 17.5317, + "step": 8124 + }, + { + "epoch": 0.1485184711280092, + "grad_norm": 7.831661732240495, + "learning_rate": 9.6362538645949e-06, + "loss": 17.7834, + "step": 8125 + }, + { + "epoch": 0.14853675032445574, + "grad_norm": 6.011204911661293, + "learning_rate": 9.636143017636901e-06, + "loss": 17.1371, + "step": 8126 + }, + { + "epoch": 0.14855502952090227, + "grad_norm": 7.7689813298236645, + "learning_rate": 9.636032154429624e-06, + "loss": 18.2324, + "step": 8127 + }, + { + "epoch": 0.14857330871734878, + "grad_norm": 6.4227869864023805, + "learning_rate": 9.635921274973457e-06, + "loss": 17.782, + "step": 8128 + }, + { + "epoch": 0.14859158791379531, + "grad_norm": 7.21488812752111, + "learning_rate": 9.635810379268786e-06, + "loss": 17.543, + "step": 8129 + }, + { + "epoch": 0.14860986711024182, + "grad_norm": 7.942197203982288, + "learning_rate": 9.635699467316002e-06, + "loss": 17.9881, + "step": 8130 + }, + { + "epoch": 0.14862814630668836, + "grad_norm": 8.764202749204488, + "learning_rate": 9.635588539115495e-06, + "loss": 18.4816, + "step": 8131 + }, + { + "epoch": 0.1486464255031349, + "grad_norm": 7.777306134782962, + "learning_rate": 9.635477594667653e-06, + "loss": 18.1013, + "step": 8132 + }, + { + "epoch": 0.1486647046995814, + "grad_norm": 7.83180863384052, + "learning_rate": 9.635366633972863e-06, + "loss": 17.3537, + "step": 8133 + }, + { + "epoch": 0.14868298389602794, + "grad_norm": 6.262583354547982, + "learning_rate": 9.635255657031515e-06, + "loss": 17.5054, + "step": 8134 + }, + { + "epoch": 0.14870126309247444, + "grad_norm": 7.658041012486796, + "learning_rate": 9.635144663843999e-06, + "loss": 17.9531, + "step": 8135 + }, + { + "epoch": 0.14871954228892098, + "grad_norm": 6.808612918804337, + "learning_rate": 9.635033654410703e-06, + "loss": 17.7325, + "step": 8136 + }, + { + "epoch": 0.14873782148536752, + "grad_norm": 7.258150622509804, + "learning_rate": 9.634922628732015e-06, + "loss": 17.7368, + "step": 8137 + }, + { + "epoch": 0.14875610068181402, + "grad_norm": 6.9982574431914575, + "learning_rate": 9.634811586808327e-06, + "loss": 17.6144, + "step": 8138 + }, + { + "epoch": 0.14877437987826056, + "grad_norm": 7.857884053169739, + "learning_rate": 9.634700528640026e-06, + "loss": 17.9164, + "step": 8139 + }, + { + "epoch": 0.14879265907470707, + "grad_norm": 7.452730871277512, + "learning_rate": 9.634589454227502e-06, + "loss": 17.6904, + "step": 8140 + }, + { + "epoch": 0.1488109382711536, + "grad_norm": 6.9768223589685, + "learning_rate": 9.634478363571144e-06, + "loss": 17.6711, + "step": 8141 + }, + { + "epoch": 0.1488292174676001, + "grad_norm": 6.947093534664517, + "learning_rate": 9.634367256671342e-06, + "loss": 17.8603, + "step": 8142 + }, + { + "epoch": 0.14884749666404665, + "grad_norm": 8.065395022833629, + "learning_rate": 9.634256133528483e-06, + "loss": 17.9711, + "step": 8143 + }, + { + "epoch": 0.14886577586049318, + "grad_norm": 5.587161523282454, + "learning_rate": 9.63414499414296e-06, + "loss": 17.0566, + "step": 8144 + }, + { + "epoch": 0.1488840550569397, + "grad_norm": 6.629157279648962, + "learning_rate": 9.634033838515162e-06, + "loss": 17.5394, + "step": 8145 + }, + { + "epoch": 0.14890233425338623, + "grad_norm": 9.91512398762502, + "learning_rate": 9.633922666645475e-06, + "loss": 17.7891, + "step": 8146 + }, + { + "epoch": 0.14892061344983273, + "grad_norm": 6.5990876856195655, + "learning_rate": 9.633811478534293e-06, + "loss": 17.5892, + "step": 8147 + }, + { + "epoch": 0.14893889264627927, + "grad_norm": 7.022881311193155, + "learning_rate": 9.633700274182003e-06, + "loss": 17.5351, + "step": 8148 + }, + { + "epoch": 0.1489571718427258, + "grad_norm": 5.6144195274142055, + "learning_rate": 9.633589053588997e-06, + "loss": 16.9259, + "step": 8149 + }, + { + "epoch": 0.1489754510391723, + "grad_norm": 7.738487347820152, + "learning_rate": 9.63347781675566e-06, + "loss": 18.1853, + "step": 8150 + }, + { + "epoch": 0.14899373023561885, + "grad_norm": 6.193180519595679, + "learning_rate": 9.63336656368239e-06, + "loss": 17.5029, + "step": 8151 + }, + { + "epoch": 0.14901200943206536, + "grad_norm": 7.092227173166322, + "learning_rate": 9.633255294369569e-06, + "loss": 17.9659, + "step": 8152 + }, + { + "epoch": 0.1490302886285119, + "grad_norm": 6.745416215426979, + "learning_rate": 9.63314400881759e-06, + "loss": 17.657, + "step": 8153 + }, + { + "epoch": 0.14904856782495843, + "grad_norm": 8.15380868776113, + "learning_rate": 9.633032707026846e-06, + "loss": 18.2057, + "step": 8154 + }, + { + "epoch": 0.14906684702140494, + "grad_norm": 6.331248922332758, + "learning_rate": 9.632921388997722e-06, + "loss": 17.1826, + "step": 8155 + }, + { + "epoch": 0.14908512621785147, + "grad_norm": 6.805012976161833, + "learning_rate": 9.632810054730611e-06, + "loss": 17.6045, + "step": 8156 + }, + { + "epoch": 0.14910340541429798, + "grad_norm": 6.046630693939717, + "learning_rate": 9.632698704225904e-06, + "loss": 17.2938, + "step": 8157 + }, + { + "epoch": 0.14912168461074451, + "grad_norm": 6.757383522376019, + "learning_rate": 9.632587337483989e-06, + "loss": 17.5262, + "step": 8158 + }, + { + "epoch": 0.14913996380719102, + "grad_norm": 8.323600026726774, + "learning_rate": 9.632475954505258e-06, + "loss": 18.1282, + "step": 8159 + }, + { + "epoch": 0.14915824300363756, + "grad_norm": 6.499193928467957, + "learning_rate": 9.6323645552901e-06, + "loss": 17.0955, + "step": 8160 + }, + { + "epoch": 0.1491765222000841, + "grad_norm": 6.966078916831902, + "learning_rate": 9.632253139838906e-06, + "loss": 17.8794, + "step": 8161 + }, + { + "epoch": 0.1491948013965306, + "grad_norm": 6.599636555603426, + "learning_rate": 9.632141708152068e-06, + "loss": 17.5405, + "step": 8162 + }, + { + "epoch": 0.14921308059297714, + "grad_norm": 7.28404182257285, + "learning_rate": 9.632030260229974e-06, + "loss": 17.5913, + "step": 8163 + }, + { + "epoch": 0.14923135978942365, + "grad_norm": 7.026371351499305, + "learning_rate": 9.631918796073017e-06, + "loss": 17.6872, + "step": 8164 + }, + { + "epoch": 0.14924963898587018, + "grad_norm": 6.586875317455057, + "learning_rate": 9.631807315681586e-06, + "loss": 17.4985, + "step": 8165 + }, + { + "epoch": 0.14926791818231672, + "grad_norm": 6.5463039923326, + "learning_rate": 9.631695819056073e-06, + "loss": 17.7064, + "step": 8166 + }, + { + "epoch": 0.14928619737876322, + "grad_norm": 9.692044910483968, + "learning_rate": 9.631584306196866e-06, + "loss": 18.3559, + "step": 8167 + }, + { + "epoch": 0.14930447657520976, + "grad_norm": 7.022885606869071, + "learning_rate": 9.631472777104361e-06, + "loss": 17.4101, + "step": 8168 + }, + { + "epoch": 0.14932275577165627, + "grad_norm": 8.913693817095838, + "learning_rate": 9.631361231778944e-06, + "loss": 18.2285, + "step": 8169 + }, + { + "epoch": 0.1493410349681028, + "grad_norm": 6.636074960057482, + "learning_rate": 9.631249670221007e-06, + "loss": 17.6323, + "step": 8170 + }, + { + "epoch": 0.14935931416454934, + "grad_norm": 6.843279083385719, + "learning_rate": 9.631138092430943e-06, + "loss": 17.6723, + "step": 8171 + }, + { + "epoch": 0.14937759336099585, + "grad_norm": 6.873244248974223, + "learning_rate": 9.631026498409142e-06, + "loss": 17.5206, + "step": 8172 + }, + { + "epoch": 0.14939587255744238, + "grad_norm": 6.66895985156897, + "learning_rate": 9.630914888155993e-06, + "loss": 17.5482, + "step": 8173 + }, + { + "epoch": 0.1494141517538889, + "grad_norm": 6.853114025017008, + "learning_rate": 9.630803261671892e-06, + "loss": 17.7752, + "step": 8174 + }, + { + "epoch": 0.14943243095033543, + "grad_norm": 6.133412828735075, + "learning_rate": 9.630691618957225e-06, + "loss": 17.5157, + "step": 8175 + }, + { + "epoch": 0.14945071014678193, + "grad_norm": 7.621202411677604, + "learning_rate": 9.630579960012387e-06, + "loss": 18.3171, + "step": 8176 + }, + { + "epoch": 0.14946898934322847, + "grad_norm": 7.582676238264134, + "learning_rate": 9.630468284837769e-06, + "loss": 17.986, + "step": 8177 + }, + { + "epoch": 0.149487268539675, + "grad_norm": 6.258409215946996, + "learning_rate": 9.63035659343376e-06, + "loss": 17.3532, + "step": 8178 + }, + { + "epoch": 0.1495055477361215, + "grad_norm": 6.355855197434192, + "learning_rate": 9.630244885800753e-06, + "loss": 17.6792, + "step": 8179 + }, + { + "epoch": 0.14952382693256805, + "grad_norm": 8.220841132045704, + "learning_rate": 9.63013316193914e-06, + "loss": 18.196, + "step": 8180 + }, + { + "epoch": 0.14954210612901456, + "grad_norm": 6.636291930323637, + "learning_rate": 9.630021421849311e-06, + "loss": 17.5067, + "step": 8181 + }, + { + "epoch": 0.1495603853254611, + "grad_norm": 7.5266465981644854, + "learning_rate": 9.629909665531661e-06, + "loss": 18.0338, + "step": 8182 + }, + { + "epoch": 0.14957866452190763, + "grad_norm": 7.035010101666306, + "learning_rate": 9.629797892986576e-06, + "loss": 17.7137, + "step": 8183 + }, + { + "epoch": 0.14959694371835414, + "grad_norm": 7.163355240729544, + "learning_rate": 9.629686104214453e-06, + "loss": 17.8504, + "step": 8184 + }, + { + "epoch": 0.14961522291480067, + "grad_norm": 7.1227862610442925, + "learning_rate": 9.629574299215682e-06, + "loss": 17.8507, + "step": 8185 + }, + { + "epoch": 0.14963350211124718, + "grad_norm": 7.485342666547704, + "learning_rate": 9.629462477990656e-06, + "loss": 18.1443, + "step": 8186 + }, + { + "epoch": 0.14965178130769372, + "grad_norm": 8.198642072686255, + "learning_rate": 9.629350640539763e-06, + "loss": 18.1238, + "step": 8187 + }, + { + "epoch": 0.14967006050414025, + "grad_norm": 6.974602313031887, + "learning_rate": 9.629238786863401e-06, + "loss": 17.547, + "step": 8188 + }, + { + "epoch": 0.14968833970058676, + "grad_norm": 7.12877973991162, + "learning_rate": 9.629126916961958e-06, + "loss": 17.8723, + "step": 8189 + }, + { + "epoch": 0.1497066188970333, + "grad_norm": 7.462106769459328, + "learning_rate": 9.629015030835824e-06, + "loss": 17.7146, + "step": 8190 + }, + { + "epoch": 0.1497248980934798, + "grad_norm": 6.839041078834273, + "learning_rate": 9.628903128485396e-06, + "loss": 17.96, + "step": 8191 + }, + { + "epoch": 0.14974317728992634, + "grad_norm": 6.82052383103971, + "learning_rate": 9.628791209911063e-06, + "loss": 17.986, + "step": 8192 + }, + { + "epoch": 0.14976145648637285, + "grad_norm": 7.663386127315567, + "learning_rate": 9.62867927511322e-06, + "loss": 17.935, + "step": 8193 + }, + { + "epoch": 0.14977973568281938, + "grad_norm": 8.631183227611471, + "learning_rate": 9.628567324092259e-06, + "loss": 18.2473, + "step": 8194 + }, + { + "epoch": 0.14979801487926592, + "grad_norm": 7.283198318557219, + "learning_rate": 9.62845535684857e-06, + "loss": 17.8827, + "step": 8195 + }, + { + "epoch": 0.14981629407571243, + "grad_norm": 6.898003121324752, + "learning_rate": 9.628343373382545e-06, + "loss": 17.7923, + "step": 8196 + }, + { + "epoch": 0.14983457327215896, + "grad_norm": 7.205962008746917, + "learning_rate": 9.628231373694579e-06, + "loss": 17.7067, + "step": 8197 + }, + { + "epoch": 0.14985285246860547, + "grad_norm": 6.369033875223682, + "learning_rate": 9.628119357785064e-06, + "loss": 17.4865, + "step": 8198 + }, + { + "epoch": 0.149871131665052, + "grad_norm": 7.086959868206967, + "learning_rate": 9.628007325654392e-06, + "loss": 17.7848, + "step": 8199 + }, + { + "epoch": 0.14988941086149854, + "grad_norm": 7.554476693798808, + "learning_rate": 9.627895277302957e-06, + "loss": 17.9422, + "step": 8200 + }, + { + "epoch": 0.14990769005794505, + "grad_norm": 7.975346708415603, + "learning_rate": 9.62778321273115e-06, + "loss": 18.1469, + "step": 8201 + }, + { + "epoch": 0.14992596925439158, + "grad_norm": 7.016908788442051, + "learning_rate": 9.627671131939363e-06, + "loss": 17.8922, + "step": 8202 + }, + { + "epoch": 0.1499442484508381, + "grad_norm": 8.85927995340893, + "learning_rate": 9.627559034927992e-06, + "loss": 18.2604, + "step": 8203 + }, + { + "epoch": 0.14996252764728463, + "grad_norm": 6.774773064259678, + "learning_rate": 9.627446921697427e-06, + "loss": 17.3408, + "step": 8204 + }, + { + "epoch": 0.14998080684373116, + "grad_norm": 6.294227610418616, + "learning_rate": 9.627334792248064e-06, + "loss": 17.2782, + "step": 8205 + }, + { + "epoch": 0.14999908604017767, + "grad_norm": 7.369330199875309, + "learning_rate": 9.627222646580291e-06, + "loss": 17.9668, + "step": 8206 + }, + { + "epoch": 0.1500173652366242, + "grad_norm": 8.27917234719453, + "learning_rate": 9.627110484694506e-06, + "loss": 18.1977, + "step": 8207 + }, + { + "epoch": 0.15003564443307071, + "grad_norm": 7.8009489813351856, + "learning_rate": 9.626998306591101e-06, + "loss": 17.868, + "step": 8208 + }, + { + "epoch": 0.15005392362951725, + "grad_norm": 7.77946634764763, + "learning_rate": 9.626886112270467e-06, + "loss": 17.7802, + "step": 8209 + }, + { + "epoch": 0.15007220282596376, + "grad_norm": 6.884261644919327, + "learning_rate": 9.626773901733e-06, + "loss": 17.7854, + "step": 8210 + }, + { + "epoch": 0.1500904820224103, + "grad_norm": 8.006721368370812, + "learning_rate": 9.62666167497909e-06, + "loss": 17.9747, + "step": 8211 + }, + { + "epoch": 0.15010876121885683, + "grad_norm": 5.848516308608391, + "learning_rate": 9.626549432009135e-06, + "loss": 17.2307, + "step": 8212 + }, + { + "epoch": 0.15012704041530334, + "grad_norm": 9.007683572794024, + "learning_rate": 9.626437172823523e-06, + "loss": 18.6756, + "step": 8213 + }, + { + "epoch": 0.15014531961174987, + "grad_norm": 6.84217977983457, + "learning_rate": 9.626324897422651e-06, + "loss": 17.8017, + "step": 8214 + }, + { + "epoch": 0.15016359880819638, + "grad_norm": 6.124497949168756, + "learning_rate": 9.626212605806914e-06, + "loss": 17.3608, + "step": 8215 + }, + { + "epoch": 0.15018187800464292, + "grad_norm": 6.226158237857329, + "learning_rate": 9.626100297976702e-06, + "loss": 17.1185, + "step": 8216 + }, + { + "epoch": 0.15020015720108945, + "grad_norm": 7.583217676650587, + "learning_rate": 9.62598797393241e-06, + "loss": 17.8746, + "step": 8217 + }, + { + "epoch": 0.15021843639753596, + "grad_norm": 7.05815119157305, + "learning_rate": 9.625875633674428e-06, + "loss": 17.5794, + "step": 8218 + }, + { + "epoch": 0.1502367155939825, + "grad_norm": 6.131312477463043, + "learning_rate": 9.625763277203157e-06, + "loss": 17.2176, + "step": 8219 + }, + { + "epoch": 0.150254994790429, + "grad_norm": 9.33147680707214, + "learning_rate": 9.625650904518986e-06, + "loss": 18.486, + "step": 8220 + }, + { + "epoch": 0.15027327398687554, + "grad_norm": 7.028451742104949, + "learning_rate": 9.625538515622311e-06, + "loss": 17.3888, + "step": 8221 + }, + { + "epoch": 0.15029155318332207, + "grad_norm": 6.105778569504314, + "learning_rate": 9.625426110513524e-06, + "loss": 17.4368, + "step": 8222 + }, + { + "epoch": 0.15030983237976858, + "grad_norm": 6.231559648553829, + "learning_rate": 9.625313689193021e-06, + "loss": 17.5064, + "step": 8223 + }, + { + "epoch": 0.15032811157621512, + "grad_norm": 6.6105422614414096, + "learning_rate": 9.625201251661193e-06, + "loss": 17.4768, + "step": 8224 + }, + { + "epoch": 0.15034639077266163, + "grad_norm": 7.787476106469747, + "learning_rate": 9.625088797918437e-06, + "loss": 18.2933, + "step": 8225 + }, + { + "epoch": 0.15036466996910816, + "grad_norm": 6.97417096298724, + "learning_rate": 9.624976327965146e-06, + "loss": 17.5706, + "step": 8226 + }, + { + "epoch": 0.15038294916555467, + "grad_norm": 5.758546880269337, + "learning_rate": 9.624863841801715e-06, + "loss": 17.1828, + "step": 8227 + }, + { + "epoch": 0.1504012283620012, + "grad_norm": 6.765842795066055, + "learning_rate": 9.624751339428537e-06, + "loss": 17.3873, + "step": 8228 + }, + { + "epoch": 0.15041950755844774, + "grad_norm": 6.8451688463139675, + "learning_rate": 9.624638820846005e-06, + "loss": 17.8134, + "step": 8229 + }, + { + "epoch": 0.15043778675489425, + "grad_norm": 7.433415462032307, + "learning_rate": 9.624526286054519e-06, + "loss": 17.9911, + "step": 8230 + }, + { + "epoch": 0.15045606595134078, + "grad_norm": 6.736119752188272, + "learning_rate": 9.624413735054468e-06, + "loss": 17.5756, + "step": 8231 + }, + { + "epoch": 0.1504743451477873, + "grad_norm": 6.432736461057537, + "learning_rate": 9.624301167846246e-06, + "loss": 17.4461, + "step": 8232 + }, + { + "epoch": 0.15049262434423383, + "grad_norm": 6.918900635903595, + "learning_rate": 9.624188584430252e-06, + "loss": 17.4558, + "step": 8233 + }, + { + "epoch": 0.15051090354068036, + "grad_norm": 7.75983457150723, + "learning_rate": 9.624075984806878e-06, + "loss": 17.8908, + "step": 8234 + }, + { + "epoch": 0.15052918273712687, + "grad_norm": 8.282034442417402, + "learning_rate": 9.623963368976519e-06, + "loss": 18.27, + "step": 8235 + }, + { + "epoch": 0.1505474619335734, + "grad_norm": 7.158220967820405, + "learning_rate": 9.623850736939568e-06, + "loss": 17.8442, + "step": 8236 + }, + { + "epoch": 0.15056574113001991, + "grad_norm": 7.071703795375992, + "learning_rate": 9.623738088696425e-06, + "loss": 17.6172, + "step": 8237 + }, + { + "epoch": 0.15058402032646645, + "grad_norm": 6.619604300037013, + "learning_rate": 9.623625424247479e-06, + "loss": 17.538, + "step": 8238 + }, + { + "epoch": 0.15060229952291299, + "grad_norm": 8.159289456968516, + "learning_rate": 9.623512743593126e-06, + "loss": 18.3043, + "step": 8239 + }, + { + "epoch": 0.1506205787193595, + "grad_norm": 7.891801653755344, + "learning_rate": 9.623400046733762e-06, + "loss": 18.214, + "step": 8240 + }, + { + "epoch": 0.15063885791580603, + "grad_norm": 7.315861513886885, + "learning_rate": 9.623287333669784e-06, + "loss": 17.6339, + "step": 8241 + }, + { + "epoch": 0.15065713711225254, + "grad_norm": 6.842762964775592, + "learning_rate": 9.623174604401584e-06, + "loss": 17.8821, + "step": 8242 + }, + { + "epoch": 0.15067541630869907, + "grad_norm": 7.45071955468202, + "learning_rate": 9.623061858929558e-06, + "loss": 17.8013, + "step": 8243 + }, + { + "epoch": 0.15069369550514558, + "grad_norm": 7.278205613368842, + "learning_rate": 9.622949097254103e-06, + "loss": 17.5657, + "step": 8244 + }, + { + "epoch": 0.15071197470159212, + "grad_norm": 6.689719787695791, + "learning_rate": 9.62283631937561e-06, + "loss": 17.3731, + "step": 8245 + }, + { + "epoch": 0.15073025389803865, + "grad_norm": 5.89983201704077, + "learning_rate": 9.62272352529448e-06, + "loss": 17.1696, + "step": 8246 + }, + { + "epoch": 0.15074853309448516, + "grad_norm": 7.432211932522837, + "learning_rate": 9.622610715011103e-06, + "loss": 17.8963, + "step": 8247 + }, + { + "epoch": 0.1507668122909317, + "grad_norm": 7.287658201953378, + "learning_rate": 9.622497888525878e-06, + "loss": 18.0643, + "step": 8248 + }, + { + "epoch": 0.1507850914873782, + "grad_norm": 7.3204055710807, + "learning_rate": 9.622385045839197e-06, + "loss": 17.8771, + "step": 8249 + }, + { + "epoch": 0.15080337068382474, + "grad_norm": 7.268925788270548, + "learning_rate": 9.622272186951458e-06, + "loss": 18.2706, + "step": 8250 + }, + { + "epoch": 0.15082164988027127, + "grad_norm": 7.474373604212861, + "learning_rate": 9.622159311863057e-06, + "loss": 18.1105, + "step": 8251 + }, + { + "epoch": 0.15083992907671778, + "grad_norm": 6.301086570045507, + "learning_rate": 9.622046420574389e-06, + "loss": 17.472, + "step": 8252 + }, + { + "epoch": 0.15085820827316432, + "grad_norm": 6.698138661279464, + "learning_rate": 9.621933513085848e-06, + "loss": 17.7003, + "step": 8253 + }, + { + "epoch": 0.15087648746961083, + "grad_norm": 7.081841556616977, + "learning_rate": 9.621820589397832e-06, + "loss": 17.6653, + "step": 8254 + }, + { + "epoch": 0.15089476666605736, + "grad_norm": 7.3238418748277745, + "learning_rate": 9.621707649510736e-06, + "loss": 17.9191, + "step": 8255 + }, + { + "epoch": 0.1509130458625039, + "grad_norm": 6.742651680720778, + "learning_rate": 9.621594693424955e-06, + "loss": 17.6599, + "step": 8256 + }, + { + "epoch": 0.1509313250589504, + "grad_norm": 7.58101079672959, + "learning_rate": 9.621481721140885e-06, + "loss": 18.2079, + "step": 8257 + }, + { + "epoch": 0.15094960425539694, + "grad_norm": 6.863753551861713, + "learning_rate": 9.621368732658925e-06, + "loss": 17.8148, + "step": 8258 + }, + { + "epoch": 0.15096788345184345, + "grad_norm": 6.47264056577828, + "learning_rate": 9.621255727979467e-06, + "loss": 17.4016, + "step": 8259 + }, + { + "epoch": 0.15098616264828998, + "grad_norm": 7.603533666439708, + "learning_rate": 9.621142707102908e-06, + "loss": 18.2694, + "step": 8260 + }, + { + "epoch": 0.1510044418447365, + "grad_norm": 6.540029090559662, + "learning_rate": 9.621029670029647e-06, + "loss": 17.6542, + "step": 8261 + }, + { + "epoch": 0.15102272104118303, + "grad_norm": 7.316452676273734, + "learning_rate": 9.620916616760076e-06, + "loss": 17.7939, + "step": 8262 + }, + { + "epoch": 0.15104100023762956, + "grad_norm": 7.181725949839706, + "learning_rate": 9.620803547294595e-06, + "loss": 17.8618, + "step": 8263 + }, + { + "epoch": 0.15105927943407607, + "grad_norm": 8.046996352147591, + "learning_rate": 9.620690461633597e-06, + "loss": 18.103, + "step": 8264 + }, + { + "epoch": 0.1510775586305226, + "grad_norm": 6.452773693713817, + "learning_rate": 9.620577359777481e-06, + "loss": 17.5604, + "step": 8265 + }, + { + "epoch": 0.15109583782696911, + "grad_norm": 6.74310278963078, + "learning_rate": 9.62046424172664e-06, + "loss": 17.6312, + "step": 8266 + }, + { + "epoch": 0.15111411702341565, + "grad_norm": 8.863382977064171, + "learning_rate": 9.620351107481476e-06, + "loss": 18.6413, + "step": 8267 + }, + { + "epoch": 0.1511323962198622, + "grad_norm": 6.426130488184393, + "learning_rate": 9.620237957042382e-06, + "loss": 17.4138, + "step": 8268 + }, + { + "epoch": 0.1511506754163087, + "grad_norm": 7.165066842644639, + "learning_rate": 9.620124790409752e-06, + "loss": 17.4794, + "step": 8269 + }, + { + "epoch": 0.15116895461275523, + "grad_norm": 5.783885020588841, + "learning_rate": 9.620011607583988e-06, + "loss": 17.186, + "step": 8270 + }, + { + "epoch": 0.15118723380920174, + "grad_norm": 6.653501274399079, + "learning_rate": 9.619898408565485e-06, + "loss": 17.8724, + "step": 8271 + }, + { + "epoch": 0.15120551300564827, + "grad_norm": 9.47444081173225, + "learning_rate": 9.619785193354636e-06, + "loss": 17.9671, + "step": 8272 + }, + { + "epoch": 0.1512237922020948, + "grad_norm": 8.314644938169216, + "learning_rate": 9.619671961951843e-06, + "loss": 18.4144, + "step": 8273 + }, + { + "epoch": 0.15124207139854132, + "grad_norm": 7.804214639424101, + "learning_rate": 9.6195587143575e-06, + "loss": 17.815, + "step": 8274 + }, + { + "epoch": 0.15126035059498785, + "grad_norm": 6.494363859522821, + "learning_rate": 9.619445450572005e-06, + "loss": 17.7352, + "step": 8275 + }, + { + "epoch": 0.15127862979143436, + "grad_norm": 6.75121851459919, + "learning_rate": 9.619332170595753e-06, + "loss": 17.5873, + "step": 8276 + }, + { + "epoch": 0.1512969089878809, + "grad_norm": 8.221373152111685, + "learning_rate": 9.619218874429144e-06, + "loss": 18.549, + "step": 8277 + }, + { + "epoch": 0.1513151881843274, + "grad_norm": 7.065653786126481, + "learning_rate": 9.619105562072573e-06, + "loss": 17.5719, + "step": 8278 + }, + { + "epoch": 0.15133346738077394, + "grad_norm": 8.495427470673599, + "learning_rate": 9.618992233526438e-06, + "loss": 18.1892, + "step": 8279 + }, + { + "epoch": 0.15135174657722048, + "grad_norm": 6.594544755034357, + "learning_rate": 9.618878888791136e-06, + "loss": 17.3768, + "step": 8280 + }, + { + "epoch": 0.15137002577366698, + "grad_norm": 7.174457454636815, + "learning_rate": 9.618765527867065e-06, + "loss": 17.9663, + "step": 8281 + }, + { + "epoch": 0.15138830497011352, + "grad_norm": 6.486051817146391, + "learning_rate": 9.618652150754621e-06, + "loss": 17.5412, + "step": 8282 + }, + { + "epoch": 0.15140658416656003, + "grad_norm": 6.660156852894946, + "learning_rate": 9.618538757454202e-06, + "loss": 17.6116, + "step": 8283 + }, + { + "epoch": 0.15142486336300656, + "grad_norm": 6.652304984094611, + "learning_rate": 9.618425347966206e-06, + "loss": 17.7333, + "step": 8284 + }, + { + "epoch": 0.1514431425594531, + "grad_norm": 9.697218587626619, + "learning_rate": 9.61831192229103e-06, + "loss": 18.755, + "step": 8285 + }, + { + "epoch": 0.1514614217558996, + "grad_norm": 5.983378811259161, + "learning_rate": 9.618198480429071e-06, + "loss": 17.2025, + "step": 8286 + }, + { + "epoch": 0.15147970095234614, + "grad_norm": 6.9403136869832, + "learning_rate": 9.618085022380727e-06, + "loss": 17.7489, + "step": 8287 + }, + { + "epoch": 0.15149798014879265, + "grad_norm": 6.289242074092284, + "learning_rate": 9.617971548146395e-06, + "loss": 17.511, + "step": 8288 + }, + { + "epoch": 0.15151625934523918, + "grad_norm": 10.072604609154403, + "learning_rate": 9.617858057726474e-06, + "loss": 17.6072, + "step": 8289 + }, + { + "epoch": 0.15153453854168572, + "grad_norm": 7.738213544209298, + "learning_rate": 9.617744551121362e-06, + "loss": 18.0687, + "step": 8290 + }, + { + "epoch": 0.15155281773813223, + "grad_norm": 8.88955160968627, + "learning_rate": 9.617631028331455e-06, + "loss": 18.2794, + "step": 8291 + }, + { + "epoch": 0.15157109693457876, + "grad_norm": 7.129723897496373, + "learning_rate": 9.617517489357153e-06, + "loss": 18.0201, + "step": 8292 + }, + { + "epoch": 0.15158937613102527, + "grad_norm": 6.187657092673538, + "learning_rate": 9.617403934198852e-06, + "loss": 17.2293, + "step": 8293 + }, + { + "epoch": 0.1516076553274718, + "grad_norm": 7.77523702408604, + "learning_rate": 9.61729036285695e-06, + "loss": 18.4022, + "step": 8294 + }, + { + "epoch": 0.15162593452391832, + "grad_norm": 7.051666309443853, + "learning_rate": 9.617176775331848e-06, + "loss": 18.0027, + "step": 8295 + }, + { + "epoch": 0.15164421372036485, + "grad_norm": 7.648719821643449, + "learning_rate": 9.61706317162394e-06, + "loss": 17.4062, + "step": 8296 + }, + { + "epoch": 0.1516624929168114, + "grad_norm": 6.553220877152369, + "learning_rate": 9.61694955173363e-06, + "loss": 17.4329, + "step": 8297 + }, + { + "epoch": 0.1516807721132579, + "grad_norm": 7.048283344257818, + "learning_rate": 9.616835915661308e-06, + "loss": 17.8347, + "step": 8298 + }, + { + "epoch": 0.15169905130970443, + "grad_norm": 8.622176775355504, + "learning_rate": 9.616722263407381e-06, + "loss": 18.3363, + "step": 8299 + }, + { + "epoch": 0.15171733050615094, + "grad_norm": 7.3725743209216645, + "learning_rate": 9.61660859497224e-06, + "loss": 17.8952, + "step": 8300 + }, + { + "epoch": 0.15173560970259747, + "grad_norm": 7.667911868647525, + "learning_rate": 9.616494910356287e-06, + "loss": 18.1777, + "step": 8301 + }, + { + "epoch": 0.151753888899044, + "grad_norm": 5.6619680493594675, + "learning_rate": 9.61638120955992e-06, + "loss": 17.278, + "step": 8302 + }, + { + "epoch": 0.15177216809549052, + "grad_norm": 7.518921455476554, + "learning_rate": 9.616267492583538e-06, + "loss": 17.9855, + "step": 8303 + }, + { + "epoch": 0.15179044729193705, + "grad_norm": 7.362345100622486, + "learning_rate": 9.616153759427539e-06, + "loss": 17.8035, + "step": 8304 + }, + { + "epoch": 0.15180872648838356, + "grad_norm": 6.771119541757221, + "learning_rate": 9.616040010092322e-06, + "loss": 17.447, + "step": 8305 + }, + { + "epoch": 0.1518270056848301, + "grad_norm": 11.090780843549402, + "learning_rate": 9.615926244578283e-06, + "loss": 19.3201, + "step": 8306 + }, + { + "epoch": 0.15184528488127663, + "grad_norm": 7.69397219050792, + "learning_rate": 9.615812462885825e-06, + "loss": 18.143, + "step": 8307 + }, + { + "epoch": 0.15186356407772314, + "grad_norm": 6.581930909105399, + "learning_rate": 9.615698665015345e-06, + "loss": 17.4815, + "step": 8308 + }, + { + "epoch": 0.15188184327416968, + "grad_norm": 7.343480040899406, + "learning_rate": 9.615584850967242e-06, + "loss": 18.1581, + "step": 8309 + }, + { + "epoch": 0.15190012247061618, + "grad_norm": 7.281686300188413, + "learning_rate": 9.615471020741913e-06, + "loss": 17.8876, + "step": 8310 + }, + { + "epoch": 0.15191840166706272, + "grad_norm": 6.023210772722194, + "learning_rate": 9.615357174339759e-06, + "loss": 17.1831, + "step": 8311 + }, + { + "epoch": 0.15193668086350923, + "grad_norm": 8.554098411455712, + "learning_rate": 9.61524331176118e-06, + "loss": 18.1684, + "step": 8312 + }, + { + "epoch": 0.15195496005995576, + "grad_norm": 6.123535324372592, + "learning_rate": 9.615129433006573e-06, + "loss": 17.3887, + "step": 8313 + }, + { + "epoch": 0.1519732392564023, + "grad_norm": 5.73365668953176, + "learning_rate": 9.615015538076338e-06, + "loss": 16.9956, + "step": 8314 + }, + { + "epoch": 0.1519915184528488, + "grad_norm": 7.799627749598492, + "learning_rate": 9.614901626970873e-06, + "loss": 18.2006, + "step": 8315 + }, + { + "epoch": 0.15200979764929534, + "grad_norm": 6.543433915866202, + "learning_rate": 9.61478769969058e-06, + "loss": 17.4647, + "step": 8316 + }, + { + "epoch": 0.15202807684574185, + "grad_norm": 8.78596846594541, + "learning_rate": 9.614673756235854e-06, + "loss": 18.5613, + "step": 8317 + }, + { + "epoch": 0.15204635604218839, + "grad_norm": 7.318576023539961, + "learning_rate": 9.6145597966071e-06, + "loss": 17.8715, + "step": 8318 + }, + { + "epoch": 0.15206463523863492, + "grad_norm": 7.380504903544179, + "learning_rate": 9.614445820804711e-06, + "loss": 17.8962, + "step": 8319 + }, + { + "epoch": 0.15208291443508143, + "grad_norm": 7.335299172284095, + "learning_rate": 9.614331828829091e-06, + "loss": 18.0066, + "step": 8320 + }, + { + "epoch": 0.15210119363152796, + "grad_norm": 6.601506403760204, + "learning_rate": 9.614217820680641e-06, + "loss": 17.2911, + "step": 8321 + }, + { + "epoch": 0.15211947282797447, + "grad_norm": 8.246965261381105, + "learning_rate": 9.614103796359755e-06, + "loss": 18.3982, + "step": 8322 + }, + { + "epoch": 0.152137752024421, + "grad_norm": 5.151529787885932, + "learning_rate": 9.613989755866835e-06, + "loss": 16.8491, + "step": 8323 + }, + { + "epoch": 0.15215603122086754, + "grad_norm": 11.081636020111297, + "learning_rate": 9.613875699202284e-06, + "loss": 18.446, + "step": 8324 + }, + { + "epoch": 0.15217431041731405, + "grad_norm": 6.756162709600048, + "learning_rate": 9.613761626366498e-06, + "loss": 17.7069, + "step": 8325 + }, + { + "epoch": 0.1521925896137606, + "grad_norm": 6.15423922061458, + "learning_rate": 9.613647537359878e-06, + "loss": 17.3873, + "step": 8326 + }, + { + "epoch": 0.1522108688102071, + "grad_norm": 6.603928084287123, + "learning_rate": 9.613533432182822e-06, + "loss": 17.5039, + "step": 8327 + }, + { + "epoch": 0.15222914800665363, + "grad_norm": 5.958683100556394, + "learning_rate": 9.613419310835734e-06, + "loss": 17.2975, + "step": 8328 + }, + { + "epoch": 0.15224742720310014, + "grad_norm": 7.536025657207551, + "learning_rate": 9.61330517331901e-06, + "loss": 17.6498, + "step": 8329 + }, + { + "epoch": 0.15226570639954667, + "grad_norm": 7.580601309602559, + "learning_rate": 9.613191019633053e-06, + "loss": 18.1946, + "step": 8330 + }, + { + "epoch": 0.1522839855959932, + "grad_norm": 6.5379268220550575, + "learning_rate": 9.61307684977826e-06, + "loss": 17.5752, + "step": 8331 + }, + { + "epoch": 0.15230226479243972, + "grad_norm": 7.891329417755743, + "learning_rate": 9.612962663755035e-06, + "loss": 18.1304, + "step": 8332 + }, + { + "epoch": 0.15232054398888625, + "grad_norm": 5.96099476374749, + "learning_rate": 9.612848461563776e-06, + "loss": 17.3644, + "step": 8333 + }, + { + "epoch": 0.15233882318533276, + "grad_norm": 7.558239477538888, + "learning_rate": 9.612734243204882e-06, + "loss": 18.055, + "step": 8334 + }, + { + "epoch": 0.1523571023817793, + "grad_norm": 7.722599467178709, + "learning_rate": 9.612620008678755e-06, + "loss": 17.9425, + "step": 8335 + }, + { + "epoch": 0.15237538157822583, + "grad_norm": 7.270872958368524, + "learning_rate": 9.612505757985795e-06, + "loss": 17.782, + "step": 8336 + }, + { + "epoch": 0.15239366077467234, + "grad_norm": 7.504788950693975, + "learning_rate": 9.612391491126403e-06, + "loss": 18.0103, + "step": 8337 + }, + { + "epoch": 0.15241193997111888, + "grad_norm": 7.005902932978128, + "learning_rate": 9.612277208100979e-06, + "loss": 17.8027, + "step": 8338 + }, + { + "epoch": 0.15243021916756538, + "grad_norm": 8.191436162031327, + "learning_rate": 9.612162908909924e-06, + "loss": 18.4753, + "step": 8339 + }, + { + "epoch": 0.15244849836401192, + "grad_norm": 7.765269070423985, + "learning_rate": 9.612048593553639e-06, + "loss": 18.4064, + "step": 8340 + }, + { + "epoch": 0.15246677756045846, + "grad_norm": 6.722960780984315, + "learning_rate": 9.611934262032522e-06, + "loss": 17.2695, + "step": 8341 + }, + { + "epoch": 0.15248505675690496, + "grad_norm": 7.270974839599677, + "learning_rate": 9.611819914346978e-06, + "loss": 17.8775, + "step": 8342 + }, + { + "epoch": 0.1525033359533515, + "grad_norm": 6.80859111730569, + "learning_rate": 9.611705550497404e-06, + "loss": 17.4734, + "step": 8343 + }, + { + "epoch": 0.152521615149798, + "grad_norm": 6.633348536040492, + "learning_rate": 9.611591170484202e-06, + "loss": 17.5199, + "step": 8344 + }, + { + "epoch": 0.15253989434624454, + "grad_norm": 6.301785897670512, + "learning_rate": 9.611476774307773e-06, + "loss": 17.3887, + "step": 8345 + }, + { + "epoch": 0.15255817354269105, + "grad_norm": 7.8771904174235035, + "learning_rate": 9.611362361968519e-06, + "loss": 18.2089, + "step": 8346 + }, + { + "epoch": 0.15257645273913759, + "grad_norm": 6.054082585091909, + "learning_rate": 9.611247933466838e-06, + "loss": 17.1358, + "step": 8347 + }, + { + "epoch": 0.15259473193558412, + "grad_norm": 6.025642937983709, + "learning_rate": 9.611133488803134e-06, + "loss": 17.3968, + "step": 8348 + }, + { + "epoch": 0.15261301113203063, + "grad_norm": 6.454400638632909, + "learning_rate": 9.611019027977809e-06, + "loss": 17.4408, + "step": 8349 + }, + { + "epoch": 0.15263129032847716, + "grad_norm": 8.576715940344029, + "learning_rate": 9.610904550991262e-06, + "loss": 18.3775, + "step": 8350 + }, + { + "epoch": 0.15264956952492367, + "grad_norm": 6.125045802018629, + "learning_rate": 9.610790057843892e-06, + "loss": 17.254, + "step": 8351 + }, + { + "epoch": 0.1526678487213702, + "grad_norm": 6.4307968288897275, + "learning_rate": 9.610675548536107e-06, + "loss": 17.4323, + "step": 8352 + }, + { + "epoch": 0.15268612791781674, + "grad_norm": 7.637818889551061, + "learning_rate": 9.610561023068301e-06, + "loss": 17.7409, + "step": 8353 + }, + { + "epoch": 0.15270440711426325, + "grad_norm": 8.152296239217721, + "learning_rate": 9.61044648144088e-06, + "loss": 17.9951, + "step": 8354 + }, + { + "epoch": 0.1527226863107098, + "grad_norm": 7.319634671798852, + "learning_rate": 9.610331923654243e-06, + "loss": 18.2639, + "step": 8355 + }, + { + "epoch": 0.1527409655071563, + "grad_norm": 7.4942349441715175, + "learning_rate": 9.610217349708796e-06, + "loss": 18.0278, + "step": 8356 + }, + { + "epoch": 0.15275924470360283, + "grad_norm": 8.099602138005944, + "learning_rate": 9.610102759604934e-06, + "loss": 18.1418, + "step": 8357 + }, + { + "epoch": 0.15277752390004937, + "grad_norm": 6.82012127276604, + "learning_rate": 9.609988153343064e-06, + "loss": 17.4784, + "step": 8358 + }, + { + "epoch": 0.15279580309649587, + "grad_norm": 7.054166587710082, + "learning_rate": 9.609873530923584e-06, + "loss": 17.6846, + "step": 8359 + }, + { + "epoch": 0.1528140822929424, + "grad_norm": 7.558182667582104, + "learning_rate": 9.609758892346897e-06, + "loss": 17.7426, + "step": 8360 + }, + { + "epoch": 0.15283236148938892, + "grad_norm": 8.073310691773505, + "learning_rate": 9.609644237613407e-06, + "loss": 17.7529, + "step": 8361 + }, + { + "epoch": 0.15285064068583545, + "grad_norm": 7.4418851112152185, + "learning_rate": 9.609529566723512e-06, + "loss": 18.0476, + "step": 8362 + }, + { + "epoch": 0.15286891988228196, + "grad_norm": 6.894493856924612, + "learning_rate": 9.609414879677617e-06, + "loss": 17.5686, + "step": 8363 + }, + { + "epoch": 0.1528871990787285, + "grad_norm": 7.459148446865004, + "learning_rate": 9.609300176476123e-06, + "loss": 18.014, + "step": 8364 + }, + { + "epoch": 0.15290547827517503, + "grad_norm": 6.4473080737450115, + "learning_rate": 9.60918545711943e-06, + "loss": 17.5612, + "step": 8365 + }, + { + "epoch": 0.15292375747162154, + "grad_norm": 5.680097380401487, + "learning_rate": 9.609070721607943e-06, + "loss": 17.1274, + "step": 8366 + }, + { + "epoch": 0.15294203666806808, + "grad_norm": 6.237437755248435, + "learning_rate": 9.608955969942064e-06, + "loss": 17.4624, + "step": 8367 + }, + { + "epoch": 0.15296031586451458, + "grad_norm": 8.264595237228436, + "learning_rate": 9.608841202122193e-06, + "loss": 18.344, + "step": 8368 + }, + { + "epoch": 0.15297859506096112, + "grad_norm": 8.77713457863881, + "learning_rate": 9.608726418148736e-06, + "loss": 17.7051, + "step": 8369 + }, + { + "epoch": 0.15299687425740766, + "grad_norm": 5.972412339171528, + "learning_rate": 9.60861161802209e-06, + "loss": 17.4506, + "step": 8370 + }, + { + "epoch": 0.15301515345385416, + "grad_norm": 7.518788273320577, + "learning_rate": 9.60849680174266e-06, + "loss": 17.8397, + "step": 8371 + }, + { + "epoch": 0.1530334326503007, + "grad_norm": 6.5896332317018285, + "learning_rate": 9.608381969310851e-06, + "loss": 17.5284, + "step": 8372 + }, + { + "epoch": 0.1530517118467472, + "grad_norm": 7.233193816201468, + "learning_rate": 9.608267120727061e-06, + "loss": 17.8026, + "step": 8373 + }, + { + "epoch": 0.15306999104319374, + "grad_norm": 7.96773633243533, + "learning_rate": 9.608152255991696e-06, + "loss": 17.6385, + "step": 8374 + }, + { + "epoch": 0.15308827023964028, + "grad_norm": 6.059440494427977, + "learning_rate": 9.608037375105157e-06, + "loss": 17.675, + "step": 8375 + }, + { + "epoch": 0.1531065494360868, + "grad_norm": 7.290037755876943, + "learning_rate": 9.607922478067845e-06, + "loss": 18.0835, + "step": 8376 + }, + { + "epoch": 0.15312482863253332, + "grad_norm": 7.756126214542827, + "learning_rate": 9.607807564880168e-06, + "loss": 17.9592, + "step": 8377 + }, + { + "epoch": 0.15314310782897983, + "grad_norm": 8.972905092985298, + "learning_rate": 9.607692635542523e-06, + "loss": 18.5581, + "step": 8378 + }, + { + "epoch": 0.15316138702542637, + "grad_norm": 8.293569395540622, + "learning_rate": 9.607577690055316e-06, + "loss": 18.278, + "step": 8379 + }, + { + "epoch": 0.15317966622187287, + "grad_norm": 7.054632744946273, + "learning_rate": 9.607462728418948e-06, + "loss": 18.0781, + "step": 8380 + }, + { + "epoch": 0.1531979454183194, + "grad_norm": 7.178135813819243, + "learning_rate": 9.607347750633824e-06, + "loss": 17.4505, + "step": 8381 + }, + { + "epoch": 0.15321622461476594, + "grad_norm": 6.747552815412986, + "learning_rate": 9.607232756700345e-06, + "loss": 17.7336, + "step": 8382 + }, + { + "epoch": 0.15323450381121245, + "grad_norm": 6.898898806189686, + "learning_rate": 9.607117746618916e-06, + "loss": 17.9267, + "step": 8383 + }, + { + "epoch": 0.153252783007659, + "grad_norm": 6.3177247531269884, + "learning_rate": 9.607002720389938e-06, + "loss": 17.5096, + "step": 8384 + }, + { + "epoch": 0.1532710622041055, + "grad_norm": 7.065871314133091, + "learning_rate": 9.606887678013817e-06, + "loss": 17.6314, + "step": 8385 + }, + { + "epoch": 0.15328934140055203, + "grad_norm": 6.845906887067965, + "learning_rate": 9.606772619490952e-06, + "loss": 17.5358, + "step": 8386 + }, + { + "epoch": 0.15330762059699857, + "grad_norm": 5.547321384041577, + "learning_rate": 9.60665754482175e-06, + "loss": 17.0189, + "step": 8387 + }, + { + "epoch": 0.15332589979344508, + "grad_norm": 7.777365897968196, + "learning_rate": 9.606542454006614e-06, + "loss": 17.3437, + "step": 8388 + }, + { + "epoch": 0.1533441789898916, + "grad_norm": 6.35152422892199, + "learning_rate": 9.606427347045945e-06, + "loss": 17.3832, + "step": 8389 + }, + { + "epoch": 0.15336245818633812, + "grad_norm": 7.921700262257623, + "learning_rate": 9.606312223940149e-06, + "loss": 18.2209, + "step": 8390 + }, + { + "epoch": 0.15338073738278465, + "grad_norm": 8.568656423301833, + "learning_rate": 9.606197084689628e-06, + "loss": 17.256, + "step": 8391 + }, + { + "epoch": 0.1533990165792312, + "grad_norm": 7.973003413749782, + "learning_rate": 9.606081929294785e-06, + "loss": 18.0763, + "step": 8392 + }, + { + "epoch": 0.1534172957756777, + "grad_norm": 6.084094784940632, + "learning_rate": 9.605966757756025e-06, + "loss": 17.402, + "step": 8393 + }, + { + "epoch": 0.15343557497212423, + "grad_norm": 7.196630568735634, + "learning_rate": 9.605851570073751e-06, + "loss": 17.8722, + "step": 8394 + }, + { + "epoch": 0.15345385416857074, + "grad_norm": 7.565946134509732, + "learning_rate": 9.605736366248368e-06, + "loss": 17.703, + "step": 8395 + }, + { + "epoch": 0.15347213336501728, + "grad_norm": 7.121086636966807, + "learning_rate": 9.605621146280278e-06, + "loss": 17.5359, + "step": 8396 + }, + { + "epoch": 0.15349041256146378, + "grad_norm": 8.071905115767212, + "learning_rate": 9.605505910169885e-06, + "loss": 18.0466, + "step": 8397 + }, + { + "epoch": 0.15350869175791032, + "grad_norm": 7.1637062157975135, + "learning_rate": 9.605390657917594e-06, + "loss": 17.9921, + "step": 8398 + }, + { + "epoch": 0.15352697095435686, + "grad_norm": 6.79267118405408, + "learning_rate": 9.605275389523809e-06, + "loss": 17.437, + "step": 8399 + }, + { + "epoch": 0.15354525015080336, + "grad_norm": 6.152885576362294, + "learning_rate": 9.605160104988934e-06, + "loss": 17.1758, + "step": 8400 + }, + { + "epoch": 0.1535635293472499, + "grad_norm": 7.600485582114135, + "learning_rate": 9.60504480431337e-06, + "loss": 18.0655, + "step": 8401 + }, + { + "epoch": 0.1535818085436964, + "grad_norm": 8.148246820658834, + "learning_rate": 9.604929487497525e-06, + "loss": 18.264, + "step": 8402 + }, + { + "epoch": 0.15360008774014294, + "grad_norm": 7.703202854237343, + "learning_rate": 9.604814154541801e-06, + "loss": 18.1432, + "step": 8403 + }, + { + "epoch": 0.15361836693658948, + "grad_norm": 6.731615825436619, + "learning_rate": 9.604698805446604e-06, + "loss": 17.361, + "step": 8404 + }, + { + "epoch": 0.153636646133036, + "grad_norm": 7.287228303237335, + "learning_rate": 9.604583440212338e-06, + "loss": 17.8045, + "step": 8405 + }, + { + "epoch": 0.15365492532948252, + "grad_norm": 6.349368019047275, + "learning_rate": 9.604468058839405e-06, + "loss": 17.2851, + "step": 8406 + }, + { + "epoch": 0.15367320452592903, + "grad_norm": 6.740836440994955, + "learning_rate": 9.604352661328212e-06, + "loss": 17.4433, + "step": 8407 + }, + { + "epoch": 0.15369148372237557, + "grad_norm": 6.536896433117541, + "learning_rate": 9.604237247679162e-06, + "loss": 17.553, + "step": 8408 + }, + { + "epoch": 0.1537097629188221, + "grad_norm": 7.988312390636531, + "learning_rate": 9.60412181789266e-06, + "loss": 18.0865, + "step": 8409 + }, + { + "epoch": 0.1537280421152686, + "grad_norm": 7.7923249945074025, + "learning_rate": 9.604006371969111e-06, + "loss": 17.7819, + "step": 8410 + }, + { + "epoch": 0.15374632131171515, + "grad_norm": 7.338200333907157, + "learning_rate": 9.603890909908917e-06, + "loss": 17.9571, + "step": 8411 + }, + { + "epoch": 0.15376460050816165, + "grad_norm": 7.761879206895382, + "learning_rate": 9.603775431712487e-06, + "loss": 17.6326, + "step": 8412 + }, + { + "epoch": 0.1537828797046082, + "grad_norm": 6.48170202759222, + "learning_rate": 9.603659937380223e-06, + "loss": 17.4447, + "step": 8413 + }, + { + "epoch": 0.1538011589010547, + "grad_norm": 7.836939441436887, + "learning_rate": 9.60354442691253e-06, + "loss": 17.8237, + "step": 8414 + }, + { + "epoch": 0.15381943809750123, + "grad_norm": 6.719946252733968, + "learning_rate": 9.603428900309815e-06, + "loss": 17.6225, + "step": 8415 + }, + { + "epoch": 0.15383771729394777, + "grad_norm": 7.542350517608685, + "learning_rate": 9.60331335757248e-06, + "loss": 17.9143, + "step": 8416 + }, + { + "epoch": 0.15385599649039428, + "grad_norm": 6.557273227773028, + "learning_rate": 9.60319779870093e-06, + "loss": 17.6563, + "step": 8417 + }, + { + "epoch": 0.1538742756868408, + "grad_norm": 7.687704941739925, + "learning_rate": 9.603082223695572e-06, + "loss": 17.8355, + "step": 8418 + }, + { + "epoch": 0.15389255488328732, + "grad_norm": 5.9273300557097, + "learning_rate": 9.602966632556812e-06, + "loss": 17.266, + "step": 8419 + }, + { + "epoch": 0.15391083407973385, + "grad_norm": 7.122823065006941, + "learning_rate": 9.602851025285052e-06, + "loss": 17.7552, + "step": 8420 + }, + { + "epoch": 0.1539291132761804, + "grad_norm": 6.624075396223145, + "learning_rate": 9.602735401880699e-06, + "loss": 17.7015, + "step": 8421 + }, + { + "epoch": 0.1539473924726269, + "grad_norm": 6.480000062232727, + "learning_rate": 9.602619762344156e-06, + "loss": 17.4433, + "step": 8422 + }, + { + "epoch": 0.15396567166907343, + "grad_norm": 7.189402278528076, + "learning_rate": 9.602504106675832e-06, + "loss": 17.7305, + "step": 8423 + }, + { + "epoch": 0.15398395086551994, + "grad_norm": 8.228315896538703, + "learning_rate": 9.60238843487613e-06, + "loss": 18.2283, + "step": 8424 + }, + { + "epoch": 0.15400223006196648, + "grad_norm": 6.209607023376287, + "learning_rate": 9.602272746945455e-06, + "loss": 17.4764, + "step": 8425 + }, + { + "epoch": 0.154020509258413, + "grad_norm": 7.474945940669149, + "learning_rate": 9.602157042884214e-06, + "loss": 18.2579, + "step": 8426 + }, + { + "epoch": 0.15403878845485952, + "grad_norm": 8.210926673852006, + "learning_rate": 9.602041322692811e-06, + "loss": 17.5434, + "step": 8427 + }, + { + "epoch": 0.15405706765130606, + "grad_norm": 6.769935715137518, + "learning_rate": 9.601925586371655e-06, + "loss": 17.7453, + "step": 8428 + }, + { + "epoch": 0.15407534684775256, + "grad_norm": 6.101879424645034, + "learning_rate": 9.601809833921148e-06, + "loss": 17.1897, + "step": 8429 + }, + { + "epoch": 0.1540936260441991, + "grad_norm": 7.916061720624111, + "learning_rate": 9.601694065341697e-06, + "loss": 18.0381, + "step": 8430 + }, + { + "epoch": 0.1541119052406456, + "grad_norm": 8.024275521095763, + "learning_rate": 9.601578280633707e-06, + "loss": 18.1038, + "step": 8431 + }, + { + "epoch": 0.15413018443709214, + "grad_norm": 6.076529789367406, + "learning_rate": 9.601462479797585e-06, + "loss": 17.3861, + "step": 8432 + }, + { + "epoch": 0.15414846363353868, + "grad_norm": 6.127567418286736, + "learning_rate": 9.601346662833735e-06, + "loss": 17.5513, + "step": 8433 + }, + { + "epoch": 0.1541667428299852, + "grad_norm": 7.639056821110453, + "learning_rate": 9.601230829742566e-06, + "loss": 17.6508, + "step": 8434 + }, + { + "epoch": 0.15418502202643172, + "grad_norm": 5.856297758944837, + "learning_rate": 9.601114980524481e-06, + "loss": 17.1349, + "step": 8435 + }, + { + "epoch": 0.15420330122287823, + "grad_norm": 7.046360103050307, + "learning_rate": 9.600999115179888e-06, + "loss": 17.7374, + "step": 8436 + }, + { + "epoch": 0.15422158041932477, + "grad_norm": 8.808883027907436, + "learning_rate": 9.600883233709192e-06, + "loss": 18.8813, + "step": 8437 + }, + { + "epoch": 0.1542398596157713, + "grad_norm": 6.65821248202838, + "learning_rate": 9.6007673361128e-06, + "loss": 17.6938, + "step": 8438 + }, + { + "epoch": 0.1542581388122178, + "grad_norm": 8.040611572705162, + "learning_rate": 9.600651422391116e-06, + "loss": 17.9305, + "step": 8439 + }, + { + "epoch": 0.15427641800866435, + "grad_norm": 6.922120606496327, + "learning_rate": 9.600535492544551e-06, + "loss": 17.6077, + "step": 8440 + }, + { + "epoch": 0.15429469720511085, + "grad_norm": 6.183653454314233, + "learning_rate": 9.600419546573506e-06, + "loss": 17.3557, + "step": 8441 + }, + { + "epoch": 0.1543129764015574, + "grad_norm": 7.595367575093661, + "learning_rate": 9.60030358447839e-06, + "loss": 18.0367, + "step": 8442 + }, + { + "epoch": 0.15433125559800392, + "grad_norm": 5.957698249849723, + "learning_rate": 9.60018760625961e-06, + "loss": 17.3916, + "step": 8443 + }, + { + "epoch": 0.15434953479445043, + "grad_norm": 7.12068244520129, + "learning_rate": 9.60007161191757e-06, + "loss": 17.7851, + "step": 8444 + }, + { + "epoch": 0.15436781399089697, + "grad_norm": 6.939791736338509, + "learning_rate": 9.59995560145268e-06, + "loss": 17.8164, + "step": 8445 + }, + { + "epoch": 0.15438609318734348, + "grad_norm": 6.039847122761099, + "learning_rate": 9.599839574865345e-06, + "loss": 17.2839, + "step": 8446 + }, + { + "epoch": 0.15440437238379, + "grad_norm": 7.360061800195574, + "learning_rate": 9.59972353215597e-06, + "loss": 17.5683, + "step": 8447 + }, + { + "epoch": 0.15442265158023652, + "grad_norm": 7.856934463667765, + "learning_rate": 9.599607473324963e-06, + "loss": 17.6077, + "step": 8448 + }, + { + "epoch": 0.15444093077668306, + "grad_norm": 5.795155529479495, + "learning_rate": 9.599491398372731e-06, + "loss": 17.2855, + "step": 8449 + }, + { + "epoch": 0.1544592099731296, + "grad_norm": 6.385509459357291, + "learning_rate": 9.599375307299682e-06, + "loss": 17.6995, + "step": 8450 + }, + { + "epoch": 0.1544774891695761, + "grad_norm": 7.730295345935798, + "learning_rate": 9.59925920010622e-06, + "loss": 17.8271, + "step": 8451 + }, + { + "epoch": 0.15449576836602263, + "grad_norm": 6.656709036539596, + "learning_rate": 9.599143076792756e-06, + "loss": 17.5784, + "step": 8452 + }, + { + "epoch": 0.15451404756246914, + "grad_norm": 6.494362497958026, + "learning_rate": 9.599026937359694e-06, + "loss": 17.5043, + "step": 8453 + }, + { + "epoch": 0.15453232675891568, + "grad_norm": 7.926585516267862, + "learning_rate": 9.59891078180744e-06, + "loss": 17.8609, + "step": 8454 + }, + { + "epoch": 0.1545506059553622, + "grad_norm": 7.2283692594229745, + "learning_rate": 9.598794610136405e-06, + "loss": 17.9943, + "step": 8455 + }, + { + "epoch": 0.15456888515180872, + "grad_norm": 8.839924010422484, + "learning_rate": 9.598678422346992e-06, + "loss": 18.5454, + "step": 8456 + }, + { + "epoch": 0.15458716434825526, + "grad_norm": 6.560765416062664, + "learning_rate": 9.598562218439612e-06, + "loss": 17.6048, + "step": 8457 + }, + { + "epoch": 0.15460544354470176, + "grad_norm": 5.979901552116303, + "learning_rate": 9.598445998414668e-06, + "loss": 17.2934, + "step": 8458 + }, + { + "epoch": 0.1546237227411483, + "grad_norm": 7.905131329432131, + "learning_rate": 9.598329762272571e-06, + "loss": 18.2834, + "step": 8459 + }, + { + "epoch": 0.15464200193759484, + "grad_norm": 7.238376643054584, + "learning_rate": 9.598213510013728e-06, + "loss": 17.7917, + "step": 8460 + }, + { + "epoch": 0.15466028113404134, + "grad_norm": 8.377683188548191, + "learning_rate": 9.598097241638544e-06, + "loss": 18.7146, + "step": 8461 + }, + { + "epoch": 0.15467856033048788, + "grad_norm": 8.320108141643493, + "learning_rate": 9.59798095714743e-06, + "loss": 18.2979, + "step": 8462 + }, + { + "epoch": 0.1546968395269344, + "grad_norm": 5.638796431764524, + "learning_rate": 9.597864656540789e-06, + "loss": 17.1185, + "step": 8463 + }, + { + "epoch": 0.15471511872338092, + "grad_norm": 7.106385638757528, + "learning_rate": 9.597748339819035e-06, + "loss": 17.7804, + "step": 8464 + }, + { + "epoch": 0.15473339791982743, + "grad_norm": 6.978364375188652, + "learning_rate": 9.597632006982569e-06, + "loss": 17.5941, + "step": 8465 + }, + { + "epoch": 0.15475167711627397, + "grad_norm": 5.698546235618529, + "learning_rate": 9.597515658031804e-06, + "loss": 17.3585, + "step": 8466 + }, + { + "epoch": 0.1547699563127205, + "grad_norm": 8.112020435876998, + "learning_rate": 9.597399292967144e-06, + "loss": 18.0897, + "step": 8467 + }, + { + "epoch": 0.154788235509167, + "grad_norm": 7.282222268329545, + "learning_rate": 9.597282911789e-06, + "loss": 18.0584, + "step": 8468 + }, + { + "epoch": 0.15480651470561355, + "grad_norm": 7.765696488224575, + "learning_rate": 9.597166514497777e-06, + "loss": 17.7791, + "step": 8469 + }, + { + "epoch": 0.15482479390206005, + "grad_norm": 6.515923023581861, + "learning_rate": 9.597050101093885e-06, + "loss": 17.3934, + "step": 8470 + }, + { + "epoch": 0.1548430730985066, + "grad_norm": 7.251663888841731, + "learning_rate": 9.596933671577731e-06, + "loss": 17.911, + "step": 8471 + }, + { + "epoch": 0.15486135229495313, + "grad_norm": 6.547575766660995, + "learning_rate": 9.596817225949722e-06, + "loss": 17.2965, + "step": 8472 + }, + { + "epoch": 0.15487963149139963, + "grad_norm": 7.60869051294314, + "learning_rate": 9.59670076421027e-06, + "loss": 17.9197, + "step": 8473 + }, + { + "epoch": 0.15489791068784617, + "grad_norm": 6.14338822215821, + "learning_rate": 9.59658428635978e-06, + "loss": 17.379, + "step": 8474 + }, + { + "epoch": 0.15491618988429268, + "grad_norm": 6.5197838892209194, + "learning_rate": 9.59646779239866e-06, + "loss": 17.4835, + "step": 8475 + }, + { + "epoch": 0.1549344690807392, + "grad_norm": 6.654093881851663, + "learning_rate": 9.59635128232732e-06, + "loss": 17.7212, + "step": 8476 + }, + { + "epoch": 0.15495274827718575, + "grad_norm": 7.3700092470496195, + "learning_rate": 9.596234756146167e-06, + "loss": 17.4965, + "step": 8477 + }, + { + "epoch": 0.15497102747363226, + "grad_norm": 6.659004832144627, + "learning_rate": 9.596118213855611e-06, + "loss": 17.415, + "step": 8478 + }, + { + "epoch": 0.1549893066700788, + "grad_norm": 6.1720652632126685, + "learning_rate": 9.596001655456059e-06, + "loss": 17.6741, + "step": 8479 + }, + { + "epoch": 0.1550075858665253, + "grad_norm": 7.032000921413487, + "learning_rate": 9.59588508094792e-06, + "loss": 18.3352, + "step": 8480 + }, + { + "epoch": 0.15502586506297183, + "grad_norm": 6.243013315635809, + "learning_rate": 9.595768490331603e-06, + "loss": 17.2966, + "step": 8481 + }, + { + "epoch": 0.15504414425941834, + "grad_norm": 7.426378919149949, + "learning_rate": 9.595651883607514e-06, + "loss": 17.4242, + "step": 8482 + }, + { + "epoch": 0.15506242345586488, + "grad_norm": 7.1075562450734235, + "learning_rate": 9.595535260776066e-06, + "loss": 17.8095, + "step": 8483 + }, + { + "epoch": 0.15508070265231141, + "grad_norm": 5.365278674569126, + "learning_rate": 9.595418621837667e-06, + "loss": 17.0794, + "step": 8484 + }, + { + "epoch": 0.15509898184875792, + "grad_norm": 6.349419628952927, + "learning_rate": 9.595301966792722e-06, + "loss": 17.5486, + "step": 8485 + }, + { + "epoch": 0.15511726104520446, + "grad_norm": 7.738224246855898, + "learning_rate": 9.595185295641644e-06, + "loss": 18.1471, + "step": 8486 + }, + { + "epoch": 0.15513554024165097, + "grad_norm": 7.300456034374533, + "learning_rate": 9.59506860838484e-06, + "loss": 17.767, + "step": 8487 + }, + { + "epoch": 0.1551538194380975, + "grad_norm": 6.674806091221157, + "learning_rate": 9.594951905022718e-06, + "loss": 17.3993, + "step": 8488 + }, + { + "epoch": 0.15517209863454404, + "grad_norm": 5.689164721165032, + "learning_rate": 9.594835185555688e-06, + "loss": 17.0957, + "step": 8489 + }, + { + "epoch": 0.15519037783099054, + "grad_norm": 7.221146427191833, + "learning_rate": 9.594718449984162e-06, + "loss": 17.7497, + "step": 8490 + }, + { + "epoch": 0.15520865702743708, + "grad_norm": 6.531405122433829, + "learning_rate": 9.594601698308545e-06, + "loss": 17.5837, + "step": 8491 + }, + { + "epoch": 0.1552269362238836, + "grad_norm": 7.893076460747014, + "learning_rate": 9.594484930529248e-06, + "loss": 18.0133, + "step": 8492 + }, + { + "epoch": 0.15524521542033012, + "grad_norm": 6.635984489532212, + "learning_rate": 9.59436814664668e-06, + "loss": 17.7828, + "step": 8493 + }, + { + "epoch": 0.15526349461677666, + "grad_norm": 7.302024162710131, + "learning_rate": 9.594251346661249e-06, + "loss": 17.8628, + "step": 8494 + }, + { + "epoch": 0.15528177381322317, + "grad_norm": 6.549778065930616, + "learning_rate": 9.594134530573367e-06, + "loss": 17.3769, + "step": 8495 + }, + { + "epoch": 0.1553000530096697, + "grad_norm": 7.195584202554672, + "learning_rate": 9.594017698383442e-06, + "loss": 17.7915, + "step": 8496 + }, + { + "epoch": 0.1553183322061162, + "grad_norm": 9.34243831281511, + "learning_rate": 9.593900850091885e-06, + "loss": 18.2791, + "step": 8497 + }, + { + "epoch": 0.15533661140256275, + "grad_norm": 5.988360475293336, + "learning_rate": 9.593783985699101e-06, + "loss": 17.5919, + "step": 8498 + }, + { + "epoch": 0.15535489059900925, + "grad_norm": 6.715506159691648, + "learning_rate": 9.593667105205506e-06, + "loss": 17.8342, + "step": 8499 + }, + { + "epoch": 0.1553731697954558, + "grad_norm": 6.318911904885308, + "learning_rate": 9.593550208611505e-06, + "loss": 17.4947, + "step": 8500 + }, + { + "epoch": 0.15539144899190233, + "grad_norm": 8.154867457940789, + "learning_rate": 9.59343329591751e-06, + "loss": 18.4076, + "step": 8501 + }, + { + "epoch": 0.15540972818834883, + "grad_norm": 6.3570347394540265, + "learning_rate": 9.593316367123928e-06, + "loss": 17.2772, + "step": 8502 + }, + { + "epoch": 0.15542800738479537, + "grad_norm": 8.662408364427396, + "learning_rate": 9.593199422231173e-06, + "loss": 18.6454, + "step": 8503 + }, + { + "epoch": 0.15544628658124188, + "grad_norm": 6.591927918941316, + "learning_rate": 9.59308246123965e-06, + "loss": 17.6183, + "step": 8504 + }, + { + "epoch": 0.1554645657776884, + "grad_norm": 7.350871149854708, + "learning_rate": 9.592965484149772e-06, + "loss": 17.6501, + "step": 8505 + }, + { + "epoch": 0.15548284497413495, + "grad_norm": 6.2228465194042615, + "learning_rate": 9.59284849096195e-06, + "loss": 17.3951, + "step": 8506 + }, + { + "epoch": 0.15550112417058146, + "grad_norm": 7.340207689427119, + "learning_rate": 9.592731481676592e-06, + "loss": 17.8985, + "step": 8507 + }, + { + "epoch": 0.155519403367028, + "grad_norm": 5.860694243527434, + "learning_rate": 9.59261445629411e-06, + "loss": 17.2409, + "step": 8508 + }, + { + "epoch": 0.1555376825634745, + "grad_norm": 8.852024998275656, + "learning_rate": 9.592497414814911e-06, + "loss": 17.4417, + "step": 8509 + }, + { + "epoch": 0.15555596175992104, + "grad_norm": 8.071592123190793, + "learning_rate": 9.592380357239408e-06, + "loss": 17.9372, + "step": 8510 + }, + { + "epoch": 0.15557424095636757, + "grad_norm": 5.926252834765691, + "learning_rate": 9.592263283568008e-06, + "loss": 17.1788, + "step": 8511 + }, + { + "epoch": 0.15559252015281408, + "grad_norm": 6.7011376664875915, + "learning_rate": 9.592146193801127e-06, + "loss": 17.6574, + "step": 8512 + }, + { + "epoch": 0.15561079934926061, + "grad_norm": 6.303016037431096, + "learning_rate": 9.59202908793917e-06, + "loss": 17.4543, + "step": 8513 + }, + { + "epoch": 0.15562907854570712, + "grad_norm": 7.412547093100808, + "learning_rate": 9.59191196598255e-06, + "loss": 18.2372, + "step": 8514 + }, + { + "epoch": 0.15564735774215366, + "grad_norm": 6.53962961001332, + "learning_rate": 9.591794827931679e-06, + "loss": 17.4454, + "step": 8515 + }, + { + "epoch": 0.15566563693860017, + "grad_norm": 5.892445713863878, + "learning_rate": 9.591677673786963e-06, + "loss": 17.3634, + "step": 8516 + }, + { + "epoch": 0.1556839161350467, + "grad_norm": 6.433111180533362, + "learning_rate": 9.591560503548816e-06, + "loss": 17.5807, + "step": 8517 + }, + { + "epoch": 0.15570219533149324, + "grad_norm": 7.891231384464062, + "learning_rate": 9.591443317217647e-06, + "loss": 18.1661, + "step": 8518 + }, + { + "epoch": 0.15572047452793975, + "grad_norm": 6.588605284400302, + "learning_rate": 9.591326114793871e-06, + "loss": 17.68, + "step": 8519 + }, + { + "epoch": 0.15573875372438628, + "grad_norm": 6.726872242898803, + "learning_rate": 9.591208896277892e-06, + "loss": 17.7196, + "step": 8520 + }, + { + "epoch": 0.1557570329208328, + "grad_norm": 6.676150957695583, + "learning_rate": 9.591091661670125e-06, + "loss": 17.7211, + "step": 8521 + }, + { + "epoch": 0.15577531211727932, + "grad_norm": 6.354196660907414, + "learning_rate": 9.590974410970981e-06, + "loss": 17.5494, + "step": 8522 + }, + { + "epoch": 0.15579359131372586, + "grad_norm": 8.81674414046929, + "learning_rate": 9.59085714418087e-06, + "loss": 18.3573, + "step": 8523 + }, + { + "epoch": 0.15581187051017237, + "grad_norm": 6.622294384275694, + "learning_rate": 9.590739861300202e-06, + "loss": 17.6632, + "step": 8524 + }, + { + "epoch": 0.1558301497066189, + "grad_norm": 5.559659622657357, + "learning_rate": 9.59062256232939e-06, + "loss": 17.2316, + "step": 8525 + }, + { + "epoch": 0.1558484289030654, + "grad_norm": 7.307350725079425, + "learning_rate": 9.590505247268842e-06, + "loss": 18.2184, + "step": 8526 + }, + { + "epoch": 0.15586670809951195, + "grad_norm": 6.817131197650809, + "learning_rate": 9.590387916118975e-06, + "loss": 17.5981, + "step": 8527 + }, + { + "epoch": 0.15588498729595848, + "grad_norm": 7.096014670023533, + "learning_rate": 9.590270568880194e-06, + "loss": 18.1983, + "step": 8528 + }, + { + "epoch": 0.155903266492405, + "grad_norm": 7.992282933840935, + "learning_rate": 9.590153205552914e-06, + "loss": 18.0549, + "step": 8529 + }, + { + "epoch": 0.15592154568885153, + "grad_norm": 6.723702128700275, + "learning_rate": 9.590035826137546e-06, + "loss": 17.6164, + "step": 8530 + }, + { + "epoch": 0.15593982488529803, + "grad_norm": 6.69807799183464, + "learning_rate": 9.5899184306345e-06, + "loss": 17.5677, + "step": 8531 + }, + { + "epoch": 0.15595810408174457, + "grad_norm": 7.331194963743321, + "learning_rate": 9.589801019044188e-06, + "loss": 17.8097, + "step": 8532 + }, + { + "epoch": 0.15597638327819108, + "grad_norm": 6.734347726978225, + "learning_rate": 9.589683591367022e-06, + "loss": 17.5873, + "step": 8533 + }, + { + "epoch": 0.1559946624746376, + "grad_norm": 11.856050505777045, + "learning_rate": 9.589566147603413e-06, + "loss": 17.5046, + "step": 8534 + }, + { + "epoch": 0.15601294167108415, + "grad_norm": 7.222427471083795, + "learning_rate": 9.589448687753773e-06, + "loss": 17.6746, + "step": 8535 + }, + { + "epoch": 0.15603122086753066, + "grad_norm": 7.3315001487201705, + "learning_rate": 9.589331211818515e-06, + "loss": 17.8191, + "step": 8536 + }, + { + "epoch": 0.1560495000639772, + "grad_norm": 7.039387644346871, + "learning_rate": 9.589213719798048e-06, + "loss": 17.7906, + "step": 8537 + }, + { + "epoch": 0.1560677792604237, + "grad_norm": 6.956771012404695, + "learning_rate": 9.589096211692785e-06, + "loss": 17.8115, + "step": 8538 + }, + { + "epoch": 0.15608605845687024, + "grad_norm": 7.537196469773764, + "learning_rate": 9.588978687503139e-06, + "loss": 18.0115, + "step": 8539 + }, + { + "epoch": 0.15610433765331677, + "grad_norm": 7.644245909337144, + "learning_rate": 9.588861147229521e-06, + "loss": 17.8212, + "step": 8540 + }, + { + "epoch": 0.15612261684976328, + "grad_norm": 8.022592212437466, + "learning_rate": 9.58874359087234e-06, + "loss": 18.2235, + "step": 8541 + }, + { + "epoch": 0.15614089604620981, + "grad_norm": 7.173768077180046, + "learning_rate": 9.588626018432014e-06, + "loss": 17.6495, + "step": 8542 + }, + { + "epoch": 0.15615917524265632, + "grad_norm": 6.66266495172659, + "learning_rate": 9.58850842990895e-06, + "loss": 17.455, + "step": 8543 + }, + { + "epoch": 0.15617745443910286, + "grad_norm": 5.842986755428583, + "learning_rate": 9.588390825303564e-06, + "loss": 17.1366, + "step": 8544 + }, + { + "epoch": 0.1561957336355494, + "grad_norm": 7.2621323602948715, + "learning_rate": 9.588273204616266e-06, + "loss": 17.7071, + "step": 8545 + }, + { + "epoch": 0.1562140128319959, + "grad_norm": 5.88392891932802, + "learning_rate": 9.588155567847469e-06, + "loss": 17.0827, + "step": 8546 + }, + { + "epoch": 0.15623229202844244, + "grad_norm": 7.344325399835583, + "learning_rate": 9.588037914997582e-06, + "loss": 17.7232, + "step": 8547 + }, + { + "epoch": 0.15625057122488895, + "grad_norm": 5.859663652010187, + "learning_rate": 9.587920246067022e-06, + "loss": 17.3981, + "step": 8548 + }, + { + "epoch": 0.15626885042133548, + "grad_norm": 6.56078940492395, + "learning_rate": 9.5878025610562e-06, + "loss": 17.5337, + "step": 8549 + }, + { + "epoch": 0.156287129617782, + "grad_norm": 7.681235712099995, + "learning_rate": 9.587684859965529e-06, + "loss": 17.8948, + "step": 8550 + }, + { + "epoch": 0.15630540881422852, + "grad_norm": 7.524388588177178, + "learning_rate": 9.587567142795419e-06, + "loss": 17.806, + "step": 8551 + }, + { + "epoch": 0.15632368801067506, + "grad_norm": 8.154780063802253, + "learning_rate": 9.587449409546284e-06, + "loss": 18.2139, + "step": 8552 + }, + { + "epoch": 0.15634196720712157, + "grad_norm": 8.234716189725752, + "learning_rate": 9.587331660218537e-06, + "loss": 17.9248, + "step": 8553 + }, + { + "epoch": 0.1563602464035681, + "grad_norm": 6.719842263902505, + "learning_rate": 9.587213894812593e-06, + "loss": 17.6152, + "step": 8554 + }, + { + "epoch": 0.1563785256000146, + "grad_norm": 6.783205588697717, + "learning_rate": 9.58709611332886e-06, + "loss": 17.7465, + "step": 8555 + }, + { + "epoch": 0.15639680479646115, + "grad_norm": 7.795942526111688, + "learning_rate": 9.586978315767755e-06, + "loss": 18.381, + "step": 8556 + }, + { + "epoch": 0.15641508399290768, + "grad_norm": 7.623458633933016, + "learning_rate": 9.586860502129686e-06, + "loss": 17.8816, + "step": 8557 + }, + { + "epoch": 0.1564333631893542, + "grad_norm": 6.789083733935318, + "learning_rate": 9.586742672415073e-06, + "loss": 17.8753, + "step": 8558 + }, + { + "epoch": 0.15645164238580073, + "grad_norm": 7.157241078959216, + "learning_rate": 9.586624826624322e-06, + "loss": 17.9102, + "step": 8559 + }, + { + "epoch": 0.15646992158224723, + "grad_norm": 5.642902572370437, + "learning_rate": 9.586506964757849e-06, + "loss": 17.266, + "step": 8560 + }, + { + "epoch": 0.15648820077869377, + "grad_norm": 7.0361229462686525, + "learning_rate": 9.586389086816068e-06, + "loss": 17.6732, + "step": 8561 + }, + { + "epoch": 0.1565064799751403, + "grad_norm": 8.400883349565074, + "learning_rate": 9.586271192799392e-06, + "loss": 18.2939, + "step": 8562 + }, + { + "epoch": 0.1565247591715868, + "grad_norm": 7.9279995310918, + "learning_rate": 9.586153282708233e-06, + "loss": 18.1139, + "step": 8563 + }, + { + "epoch": 0.15654303836803335, + "grad_norm": 5.758986063647926, + "learning_rate": 9.586035356543005e-06, + "loss": 17.2393, + "step": 8564 + }, + { + "epoch": 0.15656131756447986, + "grad_norm": 6.874973190523697, + "learning_rate": 9.585917414304119e-06, + "loss": 17.6617, + "step": 8565 + }, + { + "epoch": 0.1565795967609264, + "grad_norm": 8.324872630273362, + "learning_rate": 9.58579945599199e-06, + "loss": 18.464, + "step": 8566 + }, + { + "epoch": 0.1565978759573729, + "grad_norm": 7.410527394539033, + "learning_rate": 9.585681481607035e-06, + "loss": 18.2697, + "step": 8567 + }, + { + "epoch": 0.15661615515381944, + "grad_norm": 7.343080238999604, + "learning_rate": 9.585563491149663e-06, + "loss": 18.1351, + "step": 8568 + }, + { + "epoch": 0.15663443435026597, + "grad_norm": 7.575183517132464, + "learning_rate": 9.585445484620288e-06, + "loss": 17.939, + "step": 8569 + }, + { + "epoch": 0.15665271354671248, + "grad_norm": 8.1357613990169, + "learning_rate": 9.585327462019327e-06, + "loss": 18.5413, + "step": 8570 + }, + { + "epoch": 0.15667099274315902, + "grad_norm": 6.779577490105369, + "learning_rate": 9.585209423347188e-06, + "loss": 17.4974, + "step": 8571 + }, + { + "epoch": 0.15668927193960552, + "grad_norm": 8.07894326475309, + "learning_rate": 9.58509136860429e-06, + "loss": 17.9928, + "step": 8572 + }, + { + "epoch": 0.15670755113605206, + "grad_norm": 7.0722506209810945, + "learning_rate": 9.584973297791045e-06, + "loss": 17.9897, + "step": 8573 + }, + { + "epoch": 0.1567258303324986, + "grad_norm": 6.438500881550452, + "learning_rate": 9.584855210907864e-06, + "loss": 17.5617, + "step": 8574 + }, + { + "epoch": 0.1567441095289451, + "grad_norm": 6.270999339681938, + "learning_rate": 9.584737107955165e-06, + "loss": 17.4895, + "step": 8575 + }, + { + "epoch": 0.15676238872539164, + "grad_norm": 7.517518615524435, + "learning_rate": 9.58461898893336e-06, + "loss": 17.9209, + "step": 8576 + }, + { + "epoch": 0.15678066792183815, + "grad_norm": 6.540675387314155, + "learning_rate": 9.584500853842865e-06, + "loss": 17.7416, + "step": 8577 + }, + { + "epoch": 0.15679894711828468, + "grad_norm": 6.896809487961548, + "learning_rate": 9.58438270268409e-06, + "loss": 18.0654, + "step": 8578 + }, + { + "epoch": 0.15681722631473122, + "grad_norm": 6.9106355894783436, + "learning_rate": 9.58426453545745e-06, + "loss": 17.3348, + "step": 8579 + }, + { + "epoch": 0.15683550551117773, + "grad_norm": 6.73826832223587, + "learning_rate": 9.584146352163365e-06, + "loss": 17.4553, + "step": 8580 + }, + { + "epoch": 0.15685378470762426, + "grad_norm": 7.9892625504157015, + "learning_rate": 9.58402815280224e-06, + "loss": 18.3169, + "step": 8581 + }, + { + "epoch": 0.15687206390407077, + "grad_norm": 6.190261729230549, + "learning_rate": 9.583909937374498e-06, + "loss": 17.3757, + "step": 8582 + }, + { + "epoch": 0.1568903431005173, + "grad_norm": 6.502355971704945, + "learning_rate": 9.583791705880548e-06, + "loss": 17.386, + "step": 8583 + }, + { + "epoch": 0.1569086222969638, + "grad_norm": 5.691456524327374, + "learning_rate": 9.583673458320806e-06, + "loss": 17.205, + "step": 8584 + }, + { + "epoch": 0.15692690149341035, + "grad_norm": 8.101221139386617, + "learning_rate": 9.583555194695686e-06, + "loss": 18.3278, + "step": 8585 + }, + { + "epoch": 0.15694518068985688, + "grad_norm": 7.13921274085771, + "learning_rate": 9.583436915005602e-06, + "loss": 17.7824, + "step": 8586 + }, + { + "epoch": 0.1569634598863034, + "grad_norm": 7.508935075788792, + "learning_rate": 9.583318619250973e-06, + "loss": 17.8597, + "step": 8587 + }, + { + "epoch": 0.15698173908274993, + "grad_norm": 6.290705774851627, + "learning_rate": 9.583200307432206e-06, + "loss": 17.4956, + "step": 8588 + }, + { + "epoch": 0.15700001827919643, + "grad_norm": 7.18031074118022, + "learning_rate": 9.58308197954972e-06, + "loss": 17.493, + "step": 8589 + }, + { + "epoch": 0.15701829747564297, + "grad_norm": 7.055950999653975, + "learning_rate": 9.58296363560393e-06, + "loss": 17.7738, + "step": 8590 + }, + { + "epoch": 0.1570365766720895, + "grad_norm": 6.898125200720675, + "learning_rate": 9.582845275595252e-06, + "loss": 17.6118, + "step": 8591 + }, + { + "epoch": 0.15705485586853601, + "grad_norm": 6.822839256959291, + "learning_rate": 9.582726899524096e-06, + "loss": 17.5974, + "step": 8592 + }, + { + "epoch": 0.15707313506498255, + "grad_norm": 7.498924502404051, + "learning_rate": 9.582608507390883e-06, + "loss": 17.9733, + "step": 8593 + }, + { + "epoch": 0.15709141426142906, + "grad_norm": 6.109250911940779, + "learning_rate": 9.582490099196023e-06, + "loss": 17.4529, + "step": 8594 + }, + { + "epoch": 0.1571096934578756, + "grad_norm": 7.184334856210555, + "learning_rate": 9.582371674939932e-06, + "loss": 17.7924, + "step": 8595 + }, + { + "epoch": 0.15712797265432213, + "grad_norm": 6.6232562938971755, + "learning_rate": 9.582253234623027e-06, + "loss": 17.388, + "step": 8596 + }, + { + "epoch": 0.15714625185076864, + "grad_norm": 7.165670758294181, + "learning_rate": 9.582134778245722e-06, + "loss": 17.7685, + "step": 8597 + }, + { + "epoch": 0.15716453104721517, + "grad_norm": 6.425743050384466, + "learning_rate": 9.582016305808433e-06, + "loss": 17.3309, + "step": 8598 + }, + { + "epoch": 0.15718281024366168, + "grad_norm": 6.040470941431053, + "learning_rate": 9.581897817311571e-06, + "loss": 17.4582, + "step": 8599 + }, + { + "epoch": 0.15720108944010822, + "grad_norm": 6.525849640296614, + "learning_rate": 9.58177931275556e-06, + "loss": 17.5882, + "step": 8600 + }, + { + "epoch": 0.15721936863655472, + "grad_norm": 6.830962980849063, + "learning_rate": 9.581660792140807e-06, + "loss": 17.5274, + "step": 8601 + }, + { + "epoch": 0.15723764783300126, + "grad_norm": 5.97855634507643, + "learning_rate": 9.58154225546773e-06, + "loss": 17.4051, + "step": 8602 + }, + { + "epoch": 0.1572559270294478, + "grad_norm": 8.005231048138352, + "learning_rate": 9.581423702736747e-06, + "loss": 18.2805, + "step": 8603 + }, + { + "epoch": 0.1572742062258943, + "grad_norm": 6.965833565128532, + "learning_rate": 9.581305133948269e-06, + "loss": 17.9589, + "step": 8604 + }, + { + "epoch": 0.15729248542234084, + "grad_norm": 8.307702529661622, + "learning_rate": 9.581186549102717e-06, + "loss": 18.2579, + "step": 8605 + }, + { + "epoch": 0.15731076461878735, + "grad_norm": 6.439174824316372, + "learning_rate": 9.581067948200503e-06, + "loss": 17.3737, + "step": 8606 + }, + { + "epoch": 0.15732904381523388, + "grad_norm": 7.123205864341623, + "learning_rate": 9.580949331242042e-06, + "loss": 17.5323, + "step": 8607 + }, + { + "epoch": 0.15734732301168042, + "grad_norm": 7.049663918196313, + "learning_rate": 9.58083069822775e-06, + "loss": 17.8473, + "step": 8608 + }, + { + "epoch": 0.15736560220812693, + "grad_norm": 6.139725136135639, + "learning_rate": 9.580712049158046e-06, + "loss": 17.2127, + "step": 8609 + }, + { + "epoch": 0.15738388140457346, + "grad_norm": 8.677418539267073, + "learning_rate": 9.580593384033343e-06, + "loss": 18.3846, + "step": 8610 + }, + { + "epoch": 0.15740216060101997, + "grad_norm": 7.20631993395279, + "learning_rate": 9.580474702854058e-06, + "loss": 18.1721, + "step": 8611 + }, + { + "epoch": 0.1574204397974665, + "grad_norm": 6.5505428804457795, + "learning_rate": 9.580356005620608e-06, + "loss": 17.4097, + "step": 8612 + }, + { + "epoch": 0.15743871899391304, + "grad_norm": 7.123841848182562, + "learning_rate": 9.580237292333406e-06, + "loss": 17.8687, + "step": 8613 + }, + { + "epoch": 0.15745699819035955, + "grad_norm": 6.39697252165992, + "learning_rate": 9.580118562992868e-06, + "loss": 17.3785, + "step": 8614 + }, + { + "epoch": 0.15747527738680608, + "grad_norm": 7.070870038480495, + "learning_rate": 9.579999817599415e-06, + "loss": 17.692, + "step": 8615 + }, + { + "epoch": 0.1574935565832526, + "grad_norm": 6.26053032908961, + "learning_rate": 9.579881056153459e-06, + "loss": 17.2914, + "step": 8616 + }, + { + "epoch": 0.15751183577969913, + "grad_norm": 6.697145280584922, + "learning_rate": 9.579762278655417e-06, + "loss": 17.5871, + "step": 8617 + }, + { + "epoch": 0.15753011497614564, + "grad_norm": 7.715165824108414, + "learning_rate": 9.579643485105706e-06, + "loss": 18.0104, + "step": 8618 + }, + { + "epoch": 0.15754839417259217, + "grad_norm": 6.722127511425423, + "learning_rate": 9.579524675504743e-06, + "loss": 17.7836, + "step": 8619 + }, + { + "epoch": 0.1575666733690387, + "grad_norm": 8.465236155753898, + "learning_rate": 9.579405849852942e-06, + "loss": 18.3863, + "step": 8620 + }, + { + "epoch": 0.15758495256548521, + "grad_norm": 7.765050473720229, + "learning_rate": 9.579287008150721e-06, + "loss": 18.0953, + "step": 8621 + }, + { + "epoch": 0.15760323176193175, + "grad_norm": 7.757385280429804, + "learning_rate": 9.579168150398496e-06, + "loss": 18.1099, + "step": 8622 + }, + { + "epoch": 0.15762151095837826, + "grad_norm": 6.575447720202369, + "learning_rate": 9.579049276596684e-06, + "loss": 17.8145, + "step": 8623 + }, + { + "epoch": 0.1576397901548248, + "grad_norm": 7.214456986634153, + "learning_rate": 9.578930386745704e-06, + "loss": 17.5937, + "step": 8624 + }, + { + "epoch": 0.15765806935127133, + "grad_norm": 6.66633688657013, + "learning_rate": 9.578811480845968e-06, + "loss": 17.6349, + "step": 8625 + }, + { + "epoch": 0.15767634854771784, + "grad_norm": 8.18054623746064, + "learning_rate": 9.578692558897895e-06, + "loss": 18.2933, + "step": 8626 + }, + { + "epoch": 0.15769462774416437, + "grad_norm": 5.721921894328776, + "learning_rate": 9.578573620901903e-06, + "loss": 17.0205, + "step": 8627 + }, + { + "epoch": 0.15771290694061088, + "grad_norm": 6.523186426909358, + "learning_rate": 9.578454666858408e-06, + "loss": 17.5072, + "step": 8628 + }, + { + "epoch": 0.15773118613705742, + "grad_norm": 6.5150941377836205, + "learning_rate": 9.578335696767825e-06, + "loss": 17.5661, + "step": 8629 + }, + { + "epoch": 0.15774946533350395, + "grad_norm": 6.904503266798888, + "learning_rate": 9.578216710630574e-06, + "loss": 17.4964, + "step": 8630 + }, + { + "epoch": 0.15776774452995046, + "grad_norm": 6.901044540590434, + "learning_rate": 9.57809770844707e-06, + "loss": 17.551, + "step": 8631 + }, + { + "epoch": 0.157786023726397, + "grad_norm": 6.8625124003165885, + "learning_rate": 9.577978690217732e-06, + "loss": 17.514, + "step": 8632 + }, + { + "epoch": 0.1578043029228435, + "grad_norm": 6.79711657068216, + "learning_rate": 9.577859655942975e-06, + "loss": 17.5851, + "step": 8633 + }, + { + "epoch": 0.15782258211929004, + "grad_norm": 7.428381409221911, + "learning_rate": 9.577740605623218e-06, + "loss": 18.121, + "step": 8634 + }, + { + "epoch": 0.15784086131573655, + "grad_norm": 9.082209566902788, + "learning_rate": 9.577621539258876e-06, + "loss": 18.3479, + "step": 8635 + }, + { + "epoch": 0.15785914051218308, + "grad_norm": 6.739861152037539, + "learning_rate": 9.577502456850368e-06, + "loss": 17.7048, + "step": 8636 + }, + { + "epoch": 0.15787741970862962, + "grad_norm": 6.52170095545089, + "learning_rate": 9.577383358398111e-06, + "loss": 17.3415, + "step": 8637 + }, + { + "epoch": 0.15789569890507613, + "grad_norm": 6.253937057566765, + "learning_rate": 9.577264243902524e-06, + "loss": 17.3816, + "step": 8638 + }, + { + "epoch": 0.15791397810152266, + "grad_norm": 6.5586503017309035, + "learning_rate": 9.577145113364022e-06, + "loss": 17.7927, + "step": 8639 + }, + { + "epoch": 0.15793225729796917, + "grad_norm": 6.822586486919763, + "learning_rate": 9.577025966783025e-06, + "loss": 17.5244, + "step": 8640 + }, + { + "epoch": 0.1579505364944157, + "grad_norm": 6.156764339679498, + "learning_rate": 9.576906804159947e-06, + "loss": 17.5124, + "step": 8641 + }, + { + "epoch": 0.15796881569086224, + "grad_norm": 7.525565901915947, + "learning_rate": 9.57678762549521e-06, + "loss": 17.9959, + "step": 8642 + }, + { + "epoch": 0.15798709488730875, + "grad_norm": 7.573590715549222, + "learning_rate": 9.576668430789227e-06, + "loss": 17.8283, + "step": 8643 + }, + { + "epoch": 0.15800537408375528, + "grad_norm": 5.465960370810305, + "learning_rate": 9.576549220042419e-06, + "loss": 17.0043, + "step": 8644 + }, + { + "epoch": 0.1580236532802018, + "grad_norm": 7.507270295043513, + "learning_rate": 9.576429993255203e-06, + "loss": 18.4469, + "step": 8645 + }, + { + "epoch": 0.15804193247664833, + "grad_norm": 8.381314359700884, + "learning_rate": 9.576310750427998e-06, + "loss": 18.4688, + "step": 8646 + }, + { + "epoch": 0.15806021167309486, + "grad_norm": 5.195004795725945, + "learning_rate": 9.57619149156122e-06, + "loss": 17.1027, + "step": 8647 + }, + { + "epoch": 0.15807849086954137, + "grad_norm": 6.5466336494995305, + "learning_rate": 9.57607221665529e-06, + "loss": 17.6005, + "step": 8648 + }, + { + "epoch": 0.1580967700659879, + "grad_norm": 7.650693031787093, + "learning_rate": 9.57595292571062e-06, + "loss": 17.7174, + "step": 8649 + }, + { + "epoch": 0.15811504926243442, + "grad_norm": 7.524522513133388, + "learning_rate": 9.575833618727637e-06, + "loss": 18.1352, + "step": 8650 + }, + { + "epoch": 0.15813332845888095, + "grad_norm": 7.076198658972048, + "learning_rate": 9.575714295706751e-06, + "loss": 17.7296, + "step": 8651 + }, + { + "epoch": 0.15815160765532746, + "grad_norm": 9.334436224397965, + "learning_rate": 9.575594956648384e-06, + "loss": 18.0572, + "step": 8652 + }, + { + "epoch": 0.158169886851774, + "grad_norm": 6.865725757427967, + "learning_rate": 9.575475601552955e-06, + "loss": 17.5508, + "step": 8653 + }, + { + "epoch": 0.15818816604822053, + "grad_norm": 5.968781954249258, + "learning_rate": 9.57535623042088e-06, + "loss": 17.2271, + "step": 8654 + }, + { + "epoch": 0.15820644524466704, + "grad_norm": 7.927357273268601, + "learning_rate": 9.575236843252578e-06, + "loss": 17.9615, + "step": 8655 + }, + { + "epoch": 0.15822472444111357, + "grad_norm": 6.147631388732034, + "learning_rate": 9.575117440048469e-06, + "loss": 17.5935, + "step": 8656 + }, + { + "epoch": 0.15824300363756008, + "grad_norm": 6.664098577906064, + "learning_rate": 9.574998020808969e-06, + "loss": 17.3875, + "step": 8657 + }, + { + "epoch": 0.15826128283400662, + "grad_norm": 7.039980596920565, + "learning_rate": 9.574878585534498e-06, + "loss": 17.7298, + "step": 8658 + }, + { + "epoch": 0.15827956203045315, + "grad_norm": 8.43221093342362, + "learning_rate": 9.574759134225476e-06, + "loss": 18.1742, + "step": 8659 + }, + { + "epoch": 0.15829784122689966, + "grad_norm": 7.32183485146767, + "learning_rate": 9.574639666882319e-06, + "loss": 17.7696, + "step": 8660 + }, + { + "epoch": 0.1583161204233462, + "grad_norm": 8.225441049432984, + "learning_rate": 9.574520183505447e-06, + "loss": 18.4105, + "step": 8661 + }, + { + "epoch": 0.1583343996197927, + "grad_norm": 7.767409983133676, + "learning_rate": 9.57440068409528e-06, + "loss": 18.0198, + "step": 8662 + }, + { + "epoch": 0.15835267881623924, + "grad_norm": 7.541541219196348, + "learning_rate": 9.574281168652234e-06, + "loss": 17.8972, + "step": 8663 + }, + { + "epoch": 0.15837095801268578, + "grad_norm": 7.343574019287776, + "learning_rate": 9.57416163717673e-06, + "loss": 17.9099, + "step": 8664 + }, + { + "epoch": 0.15838923720913228, + "grad_norm": 6.989636427625274, + "learning_rate": 9.574042089669186e-06, + "loss": 17.8249, + "step": 8665 + }, + { + "epoch": 0.15840751640557882, + "grad_norm": 7.189037334290815, + "learning_rate": 9.573922526130021e-06, + "loss": 18.0304, + "step": 8666 + }, + { + "epoch": 0.15842579560202533, + "grad_norm": 7.552814407373591, + "learning_rate": 9.573802946559656e-06, + "loss": 18.1647, + "step": 8667 + }, + { + "epoch": 0.15844407479847186, + "grad_norm": 7.719845968675812, + "learning_rate": 9.57368335095851e-06, + "loss": 18.1486, + "step": 8668 + }, + { + "epoch": 0.15846235399491837, + "grad_norm": 7.165440768937146, + "learning_rate": 9.573563739326997e-06, + "loss": 18.0737, + "step": 8669 + }, + { + "epoch": 0.1584806331913649, + "grad_norm": 7.197366971362831, + "learning_rate": 9.573444111665542e-06, + "loss": 17.9356, + "step": 8670 + }, + { + "epoch": 0.15849891238781144, + "grad_norm": 6.445771316820499, + "learning_rate": 9.573324467974562e-06, + "loss": 17.5002, + "step": 8671 + }, + { + "epoch": 0.15851719158425795, + "grad_norm": 7.326785099838132, + "learning_rate": 9.573204808254476e-06, + "loss": 17.7761, + "step": 8672 + }, + { + "epoch": 0.15853547078070448, + "grad_norm": 6.1440248773733375, + "learning_rate": 9.573085132505705e-06, + "loss": 17.5076, + "step": 8673 + }, + { + "epoch": 0.158553749977151, + "grad_norm": 7.317194130755906, + "learning_rate": 9.572965440728667e-06, + "loss": 17.8295, + "step": 8674 + }, + { + "epoch": 0.15857202917359753, + "grad_norm": 6.759062435788617, + "learning_rate": 9.572845732923781e-06, + "loss": 17.4328, + "step": 8675 + }, + { + "epoch": 0.15859030837004406, + "grad_norm": 6.7954543106881715, + "learning_rate": 9.572726009091469e-06, + "loss": 17.6332, + "step": 8676 + }, + { + "epoch": 0.15860858756649057, + "grad_norm": 8.776539660855445, + "learning_rate": 9.572606269232148e-06, + "loss": 18.7997, + "step": 8677 + }, + { + "epoch": 0.1586268667629371, + "grad_norm": 5.6294904211526005, + "learning_rate": 9.572486513346239e-06, + "loss": 17.1821, + "step": 8678 + }, + { + "epoch": 0.15864514595938362, + "grad_norm": 6.258365595852227, + "learning_rate": 9.572366741434163e-06, + "loss": 17.564, + "step": 8679 + }, + { + "epoch": 0.15866342515583015, + "grad_norm": 11.479545019600947, + "learning_rate": 9.572246953496336e-06, + "loss": 18.2172, + "step": 8680 + }, + { + "epoch": 0.1586817043522767, + "grad_norm": 6.403658755230618, + "learning_rate": 9.572127149533182e-06, + "loss": 17.5483, + "step": 8681 + }, + { + "epoch": 0.1586999835487232, + "grad_norm": 5.101281347483226, + "learning_rate": 9.572007329545119e-06, + "loss": 16.9884, + "step": 8682 + }, + { + "epoch": 0.15871826274516973, + "grad_norm": 9.981835161632372, + "learning_rate": 9.571887493532566e-06, + "loss": 17.8174, + "step": 8683 + }, + { + "epoch": 0.15873654194161624, + "grad_norm": 6.548260835919465, + "learning_rate": 9.571767641495944e-06, + "loss": 17.3436, + "step": 8684 + }, + { + "epoch": 0.15875482113806277, + "grad_norm": 6.8461827153199275, + "learning_rate": 9.571647773435674e-06, + "loss": 17.8834, + "step": 8685 + }, + { + "epoch": 0.15877310033450928, + "grad_norm": 8.502495700321242, + "learning_rate": 9.571527889352174e-06, + "loss": 18.5276, + "step": 8686 + }, + { + "epoch": 0.15879137953095582, + "grad_norm": 6.176439201496272, + "learning_rate": 9.571407989245866e-06, + "loss": 17.4895, + "step": 8687 + }, + { + "epoch": 0.15880965872740235, + "grad_norm": 6.022477784888412, + "learning_rate": 9.571288073117171e-06, + "loss": 17.6312, + "step": 8688 + }, + { + "epoch": 0.15882793792384886, + "grad_norm": 7.118900886235516, + "learning_rate": 9.571168140966506e-06, + "loss": 17.8077, + "step": 8689 + }, + { + "epoch": 0.1588462171202954, + "grad_norm": 7.404382215336522, + "learning_rate": 9.571048192794297e-06, + "loss": 17.7534, + "step": 8690 + }, + { + "epoch": 0.1588644963167419, + "grad_norm": 7.673872954903309, + "learning_rate": 9.570928228600957e-06, + "loss": 18.2886, + "step": 8691 + }, + { + "epoch": 0.15888277551318844, + "grad_norm": 8.213756221609747, + "learning_rate": 9.570808248386911e-06, + "loss": 18.2046, + "step": 8692 + }, + { + "epoch": 0.15890105470963498, + "grad_norm": 6.004541473602897, + "learning_rate": 9.57068825215258e-06, + "loss": 17.3993, + "step": 8693 + }, + { + "epoch": 0.15891933390608148, + "grad_norm": 7.67699382473935, + "learning_rate": 9.570568239898383e-06, + "loss": 17.8866, + "step": 8694 + }, + { + "epoch": 0.15893761310252802, + "grad_norm": 7.801825759335127, + "learning_rate": 9.570448211624738e-06, + "loss": 17.812, + "step": 8695 + }, + { + "epoch": 0.15895589229897453, + "grad_norm": 6.661143090418699, + "learning_rate": 9.570328167332072e-06, + "loss": 17.5459, + "step": 8696 + }, + { + "epoch": 0.15897417149542106, + "grad_norm": 6.379851837271899, + "learning_rate": 9.570208107020802e-06, + "loss": 17.7867, + "step": 8697 + }, + { + "epoch": 0.1589924506918676, + "grad_norm": 7.174447656296335, + "learning_rate": 9.570088030691348e-06, + "loss": 17.6017, + "step": 8698 + }, + { + "epoch": 0.1590107298883141, + "grad_norm": 7.3451704570964695, + "learning_rate": 9.569967938344134e-06, + "loss": 18.1274, + "step": 8699 + }, + { + "epoch": 0.15902900908476064, + "grad_norm": 7.1401695622395485, + "learning_rate": 9.569847829979577e-06, + "loss": 17.5868, + "step": 8700 + }, + { + "epoch": 0.15904728828120715, + "grad_norm": 7.1894016334097985, + "learning_rate": 9.5697277055981e-06, + "loss": 17.7075, + "step": 8701 + }, + { + "epoch": 0.15906556747765369, + "grad_norm": 6.458617217159287, + "learning_rate": 9.569607565200123e-06, + "loss": 17.5532, + "step": 8702 + }, + { + "epoch": 0.1590838466741002, + "grad_norm": 6.825022394793046, + "learning_rate": 9.56948740878607e-06, + "loss": 17.9135, + "step": 8703 + }, + { + "epoch": 0.15910212587054673, + "grad_norm": 6.28830097810962, + "learning_rate": 9.56936723635636e-06, + "loss": 17.3174, + "step": 8704 + }, + { + "epoch": 0.15912040506699326, + "grad_norm": 7.956355899333767, + "learning_rate": 9.569247047911414e-06, + "loss": 17.7205, + "step": 8705 + }, + { + "epoch": 0.15913868426343977, + "grad_norm": 6.332582650391563, + "learning_rate": 9.569126843451652e-06, + "loss": 17.463, + "step": 8706 + }, + { + "epoch": 0.1591569634598863, + "grad_norm": 7.004815411625668, + "learning_rate": 9.569006622977499e-06, + "loss": 17.6234, + "step": 8707 + }, + { + "epoch": 0.15917524265633282, + "grad_norm": 8.335114969107142, + "learning_rate": 9.568886386489373e-06, + "loss": 18.0666, + "step": 8708 + }, + { + "epoch": 0.15919352185277935, + "grad_norm": 5.926383063830562, + "learning_rate": 9.568766133987698e-06, + "loss": 17.3391, + "step": 8709 + }, + { + "epoch": 0.1592118010492259, + "grad_norm": 6.491640226766058, + "learning_rate": 9.568645865472893e-06, + "loss": 17.6675, + "step": 8710 + }, + { + "epoch": 0.1592300802456724, + "grad_norm": 6.932616478421611, + "learning_rate": 9.568525580945382e-06, + "loss": 17.6239, + "step": 8711 + }, + { + "epoch": 0.15924835944211893, + "grad_norm": 7.634918806624296, + "learning_rate": 9.568405280405583e-06, + "loss": 18.0179, + "step": 8712 + }, + { + "epoch": 0.15926663863856544, + "grad_norm": 7.197427622249253, + "learning_rate": 9.568284963853923e-06, + "loss": 17.6312, + "step": 8713 + }, + { + "epoch": 0.15928491783501197, + "grad_norm": 8.088800271659048, + "learning_rate": 9.568164631290819e-06, + "loss": 18.0679, + "step": 8714 + }, + { + "epoch": 0.1593031970314585, + "grad_norm": 6.445077687085442, + "learning_rate": 9.568044282716695e-06, + "loss": 17.4712, + "step": 8715 + }, + { + "epoch": 0.15932147622790502, + "grad_norm": 6.698055899410576, + "learning_rate": 9.567923918131971e-06, + "loss": 17.9143, + "step": 8716 + }, + { + "epoch": 0.15933975542435155, + "grad_norm": 7.635732485280628, + "learning_rate": 9.567803537537071e-06, + "loss": 18.2345, + "step": 8717 + }, + { + "epoch": 0.15935803462079806, + "grad_norm": 6.9098385174088675, + "learning_rate": 9.567683140932415e-06, + "loss": 17.914, + "step": 8718 + }, + { + "epoch": 0.1593763138172446, + "grad_norm": 7.689218781560204, + "learning_rate": 9.567562728318426e-06, + "loss": 18.0113, + "step": 8719 + }, + { + "epoch": 0.1593945930136911, + "grad_norm": 5.908448434642699, + "learning_rate": 9.567442299695526e-06, + "loss": 17.334, + "step": 8720 + }, + { + "epoch": 0.15941287221013764, + "grad_norm": 7.383001977435694, + "learning_rate": 9.567321855064137e-06, + "loss": 18.0264, + "step": 8721 + }, + { + "epoch": 0.15943115140658418, + "grad_norm": 6.0762845717344485, + "learning_rate": 9.567201394424683e-06, + "loss": 17.2396, + "step": 8722 + }, + { + "epoch": 0.15944943060303068, + "grad_norm": 7.249071343599725, + "learning_rate": 9.567080917777582e-06, + "loss": 18.3273, + "step": 8723 + }, + { + "epoch": 0.15946770979947722, + "grad_norm": 6.79927527377071, + "learning_rate": 9.566960425123262e-06, + "loss": 17.464, + "step": 8724 + }, + { + "epoch": 0.15948598899592373, + "grad_norm": 6.032798301046214, + "learning_rate": 9.566839916462139e-06, + "loss": 17.1785, + "step": 8725 + }, + { + "epoch": 0.15950426819237026, + "grad_norm": 7.2277709946029205, + "learning_rate": 9.566719391794639e-06, + "loss": 17.6834, + "step": 8726 + }, + { + "epoch": 0.1595225473888168, + "grad_norm": 7.274076078711354, + "learning_rate": 9.566598851121184e-06, + "loss": 17.9066, + "step": 8727 + }, + { + "epoch": 0.1595408265852633, + "grad_norm": 7.194632883119047, + "learning_rate": 9.566478294442197e-06, + "loss": 17.8001, + "step": 8728 + }, + { + "epoch": 0.15955910578170984, + "grad_norm": 7.523618385263561, + "learning_rate": 9.566357721758099e-06, + "loss": 17.9979, + "step": 8729 + }, + { + "epoch": 0.15957738497815635, + "grad_norm": 5.817974321190137, + "learning_rate": 9.566237133069314e-06, + "loss": 17.3732, + "step": 8730 + }, + { + "epoch": 0.15959566417460289, + "grad_norm": 7.13909383047071, + "learning_rate": 9.566116528376264e-06, + "loss": 17.8938, + "step": 8731 + }, + { + "epoch": 0.15961394337104942, + "grad_norm": 8.348706340397504, + "learning_rate": 9.56599590767937e-06, + "loss": 18.6279, + "step": 8732 + }, + { + "epoch": 0.15963222256749593, + "grad_norm": 5.503249224940718, + "learning_rate": 9.56587527097906e-06, + "loss": 17.1792, + "step": 8733 + }, + { + "epoch": 0.15965050176394247, + "grad_norm": 6.56382302545479, + "learning_rate": 9.56575461827575e-06, + "loss": 17.7867, + "step": 8734 + }, + { + "epoch": 0.15966878096038897, + "grad_norm": 7.307967686522048, + "learning_rate": 9.565633949569869e-06, + "loss": 17.6034, + "step": 8735 + }, + { + "epoch": 0.1596870601568355, + "grad_norm": 6.929338459157738, + "learning_rate": 9.565513264861837e-06, + "loss": 17.5525, + "step": 8736 + }, + { + "epoch": 0.15970533935328202, + "grad_norm": 6.443022151956936, + "learning_rate": 9.565392564152074e-06, + "loss": 17.477, + "step": 8737 + }, + { + "epoch": 0.15972361854972855, + "grad_norm": 7.241973816833101, + "learning_rate": 9.56527184744101e-06, + "loss": 17.5325, + "step": 8738 + }, + { + "epoch": 0.1597418977461751, + "grad_norm": 6.563851454103735, + "learning_rate": 9.565151114729063e-06, + "loss": 17.4029, + "step": 8739 + }, + { + "epoch": 0.1597601769426216, + "grad_norm": 6.508498858443748, + "learning_rate": 9.565030366016656e-06, + "loss": 17.6719, + "step": 8740 + }, + { + "epoch": 0.15977845613906813, + "grad_norm": 6.456972815731345, + "learning_rate": 9.564909601304215e-06, + "loss": 17.4541, + "step": 8741 + }, + { + "epoch": 0.15979673533551464, + "grad_norm": 6.163034436791755, + "learning_rate": 9.564788820592162e-06, + "loss": 17.3692, + "step": 8742 + }, + { + "epoch": 0.15981501453196117, + "grad_norm": 5.575126929964949, + "learning_rate": 9.564668023880921e-06, + "loss": 17.1237, + "step": 8743 + }, + { + "epoch": 0.1598332937284077, + "grad_norm": 7.64526785634227, + "learning_rate": 9.564547211170914e-06, + "loss": 18.0375, + "step": 8744 + }, + { + "epoch": 0.15985157292485422, + "grad_norm": 7.417285485026281, + "learning_rate": 9.564426382462564e-06, + "loss": 17.5785, + "step": 8745 + }, + { + "epoch": 0.15986985212130075, + "grad_norm": 6.4554490728201195, + "learning_rate": 9.564305537756298e-06, + "loss": 17.3587, + "step": 8746 + }, + { + "epoch": 0.15988813131774726, + "grad_norm": 7.298964984532181, + "learning_rate": 9.564184677052536e-06, + "loss": 18.0229, + "step": 8747 + }, + { + "epoch": 0.1599064105141938, + "grad_norm": 6.613225166292407, + "learning_rate": 9.564063800351702e-06, + "loss": 17.5211, + "step": 8748 + }, + { + "epoch": 0.15992468971064033, + "grad_norm": 9.573566077771629, + "learning_rate": 9.56394290765422e-06, + "loss": 18.4496, + "step": 8749 + }, + { + "epoch": 0.15994296890708684, + "grad_norm": 6.406176375922915, + "learning_rate": 9.563821998960516e-06, + "loss": 17.5678, + "step": 8750 + }, + { + "epoch": 0.15996124810353338, + "grad_norm": 6.260061914930123, + "learning_rate": 9.56370107427101e-06, + "loss": 17.409, + "step": 8751 + }, + { + "epoch": 0.15997952729997988, + "grad_norm": 6.754535119047297, + "learning_rate": 9.56358013358613e-06, + "loss": 17.56, + "step": 8752 + }, + { + "epoch": 0.15999780649642642, + "grad_norm": 6.432051614070615, + "learning_rate": 9.563459176906296e-06, + "loss": 17.7027, + "step": 8753 + }, + { + "epoch": 0.16001608569287293, + "grad_norm": 7.594418566449968, + "learning_rate": 9.563338204231933e-06, + "loss": 17.9458, + "step": 8754 + }, + { + "epoch": 0.16003436488931946, + "grad_norm": 7.371403461236743, + "learning_rate": 9.563217215563468e-06, + "loss": 17.6388, + "step": 8755 + }, + { + "epoch": 0.160052644085766, + "grad_norm": 8.453908317655701, + "learning_rate": 9.563096210901321e-06, + "loss": 18.4992, + "step": 8756 + }, + { + "epoch": 0.1600709232822125, + "grad_norm": 7.239148752398614, + "learning_rate": 9.562975190245917e-06, + "loss": 17.2711, + "step": 8757 + }, + { + "epoch": 0.16008920247865904, + "grad_norm": 7.535249967549871, + "learning_rate": 9.562854153597682e-06, + "loss": 18.0152, + "step": 8758 + }, + { + "epoch": 0.16010748167510555, + "grad_norm": 6.377766363405279, + "learning_rate": 9.56273310095704e-06, + "loss": 17.6365, + "step": 8759 + }, + { + "epoch": 0.1601257608715521, + "grad_norm": 5.729475222732704, + "learning_rate": 9.562612032324414e-06, + "loss": 17.0194, + "step": 8760 + }, + { + "epoch": 0.16014404006799862, + "grad_norm": 6.904956145505646, + "learning_rate": 9.562490947700228e-06, + "loss": 17.6042, + "step": 8761 + }, + { + "epoch": 0.16016231926444513, + "grad_norm": 5.786519415904625, + "learning_rate": 9.562369847084906e-06, + "loss": 17.294, + "step": 8762 + }, + { + "epoch": 0.16018059846089167, + "grad_norm": 6.966187542125642, + "learning_rate": 9.562248730478875e-06, + "loss": 17.7972, + "step": 8763 + }, + { + "epoch": 0.16019887765733817, + "grad_norm": 6.957106629142053, + "learning_rate": 9.56212759788256e-06, + "loss": 17.7768, + "step": 8764 + }, + { + "epoch": 0.1602171568537847, + "grad_norm": 7.653301452414369, + "learning_rate": 9.562006449296381e-06, + "loss": 17.6291, + "step": 8765 + }, + { + "epoch": 0.16023543605023124, + "grad_norm": 7.294945839487996, + "learning_rate": 9.561885284720767e-06, + "loss": 17.8276, + "step": 8766 + }, + { + "epoch": 0.16025371524667775, + "grad_norm": 7.134948801613209, + "learning_rate": 9.561764104156139e-06, + "loss": 17.7329, + "step": 8767 + }, + { + "epoch": 0.1602719944431243, + "grad_norm": 6.642006525302264, + "learning_rate": 9.561642907602923e-06, + "loss": 17.2391, + "step": 8768 + }, + { + "epoch": 0.1602902736395708, + "grad_norm": 6.475013961772217, + "learning_rate": 9.561521695061547e-06, + "loss": 17.4464, + "step": 8769 + }, + { + "epoch": 0.16030855283601733, + "grad_norm": 7.212239729576027, + "learning_rate": 9.561400466532433e-06, + "loss": 17.1043, + "step": 8770 + }, + { + "epoch": 0.16032683203246384, + "grad_norm": 7.095701461594068, + "learning_rate": 9.561279222016004e-06, + "loss": 17.7539, + "step": 8771 + }, + { + "epoch": 0.16034511122891038, + "grad_norm": 6.593081321509201, + "learning_rate": 9.56115796151269e-06, + "loss": 17.5958, + "step": 8772 + }, + { + "epoch": 0.1603633904253569, + "grad_norm": 8.66286614955099, + "learning_rate": 9.561036685022911e-06, + "loss": 18.3973, + "step": 8773 + }, + { + "epoch": 0.16038166962180342, + "grad_norm": 7.398229057452632, + "learning_rate": 9.560915392547095e-06, + "loss": 17.97, + "step": 8774 + }, + { + "epoch": 0.16039994881824995, + "grad_norm": 7.52899839892259, + "learning_rate": 9.560794084085667e-06, + "loss": 18.0149, + "step": 8775 + }, + { + "epoch": 0.16041822801469646, + "grad_norm": 7.0884022559299735, + "learning_rate": 9.560672759639052e-06, + "loss": 17.6417, + "step": 8776 + }, + { + "epoch": 0.160436507211143, + "grad_norm": 7.6345269325657314, + "learning_rate": 9.560551419207673e-06, + "loss": 18.2399, + "step": 8777 + }, + { + "epoch": 0.16045478640758953, + "grad_norm": 6.786548576572578, + "learning_rate": 9.560430062791956e-06, + "loss": 17.7836, + "step": 8778 + }, + { + "epoch": 0.16047306560403604, + "grad_norm": 7.630147309540174, + "learning_rate": 9.560308690392331e-06, + "loss": 17.9565, + "step": 8779 + }, + { + "epoch": 0.16049134480048258, + "grad_norm": 9.243567669339185, + "learning_rate": 9.560187302009216e-06, + "loss": 18.5087, + "step": 8780 + }, + { + "epoch": 0.16050962399692908, + "grad_norm": 6.68852694381934, + "learning_rate": 9.560065897643043e-06, + "loss": 17.4963, + "step": 8781 + }, + { + "epoch": 0.16052790319337562, + "grad_norm": 7.723848633136946, + "learning_rate": 9.559944477294235e-06, + "loss": 17.9461, + "step": 8782 + }, + { + "epoch": 0.16054618238982216, + "grad_norm": 6.027865934325465, + "learning_rate": 9.559823040963214e-06, + "loss": 17.4491, + "step": 8783 + }, + { + "epoch": 0.16056446158626866, + "grad_norm": 6.525364454120052, + "learning_rate": 9.55970158865041e-06, + "loss": 17.6133, + "step": 8784 + }, + { + "epoch": 0.1605827407827152, + "grad_norm": 6.234268240730879, + "learning_rate": 9.55958012035625e-06, + "loss": 17.5524, + "step": 8785 + }, + { + "epoch": 0.1606010199791617, + "grad_norm": 7.6452263245109515, + "learning_rate": 9.559458636081156e-06, + "loss": 17.8828, + "step": 8786 + }, + { + "epoch": 0.16061929917560824, + "grad_norm": 6.805593151866449, + "learning_rate": 9.559337135825555e-06, + "loss": 17.8694, + "step": 8787 + }, + { + "epoch": 0.16063757837205475, + "grad_norm": 7.376660941074235, + "learning_rate": 9.559215619589872e-06, + "loss": 17.9796, + "step": 8788 + }, + { + "epoch": 0.1606558575685013, + "grad_norm": 7.114462555908165, + "learning_rate": 9.559094087374535e-06, + "loss": 17.6144, + "step": 8789 + }, + { + "epoch": 0.16067413676494782, + "grad_norm": 6.750929925152189, + "learning_rate": 9.558972539179969e-06, + "loss": 17.5734, + "step": 8790 + }, + { + "epoch": 0.16069241596139433, + "grad_norm": 6.7640328775666125, + "learning_rate": 9.558850975006599e-06, + "loss": 17.6017, + "step": 8791 + }, + { + "epoch": 0.16071069515784087, + "grad_norm": 6.540873147196798, + "learning_rate": 9.558729394854854e-06, + "loss": 17.5003, + "step": 8792 + }, + { + "epoch": 0.16072897435428737, + "grad_norm": 5.795009420922162, + "learning_rate": 9.558607798725155e-06, + "loss": 17.0964, + "step": 8793 + }, + { + "epoch": 0.1607472535507339, + "grad_norm": 7.8149691739728, + "learning_rate": 9.558486186617933e-06, + "loss": 17.9489, + "step": 8794 + }, + { + "epoch": 0.16076553274718045, + "grad_norm": 5.961826323415727, + "learning_rate": 9.558364558533613e-06, + "loss": 17.3436, + "step": 8795 + }, + { + "epoch": 0.16078381194362695, + "grad_norm": 7.980379462536579, + "learning_rate": 9.558242914472619e-06, + "loss": 18.2505, + "step": 8796 + }, + { + "epoch": 0.1608020911400735, + "grad_norm": 8.799802937393846, + "learning_rate": 9.55812125443538e-06, + "loss": 18.4094, + "step": 8797 + }, + { + "epoch": 0.16082037033652, + "grad_norm": 7.875497214055206, + "learning_rate": 9.557999578422323e-06, + "loss": 17.8522, + "step": 8798 + }, + { + "epoch": 0.16083864953296653, + "grad_norm": 6.825628477609388, + "learning_rate": 9.55787788643387e-06, + "loss": 17.6042, + "step": 8799 + }, + { + "epoch": 0.16085692872941307, + "grad_norm": 6.656232687908398, + "learning_rate": 9.557756178470453e-06, + "loss": 17.4498, + "step": 8800 + }, + { + "epoch": 0.16087520792585958, + "grad_norm": 7.452719354689855, + "learning_rate": 9.557634454532495e-06, + "loss": 17.791, + "step": 8801 + }, + { + "epoch": 0.1608934871223061, + "grad_norm": 7.075444203805298, + "learning_rate": 9.557512714620424e-06, + "loss": 18.0032, + "step": 8802 + }, + { + "epoch": 0.16091176631875262, + "grad_norm": 7.753044557459597, + "learning_rate": 9.557390958734667e-06, + "loss": 18.0721, + "step": 8803 + }, + { + "epoch": 0.16093004551519915, + "grad_norm": 6.633972339969448, + "learning_rate": 9.557269186875649e-06, + "loss": 17.4466, + "step": 8804 + }, + { + "epoch": 0.16094832471164566, + "grad_norm": 6.876666467275851, + "learning_rate": 9.5571473990438e-06, + "loss": 17.6804, + "step": 8805 + }, + { + "epoch": 0.1609666039080922, + "grad_norm": 8.2795245562326, + "learning_rate": 9.557025595239543e-06, + "loss": 17.9313, + "step": 8806 + }, + { + "epoch": 0.16098488310453873, + "grad_norm": 6.303225996141523, + "learning_rate": 9.556903775463306e-06, + "loss": 17.5084, + "step": 8807 + }, + { + "epoch": 0.16100316230098524, + "grad_norm": 6.5407205829138, + "learning_rate": 9.556781939715519e-06, + "loss": 17.3902, + "step": 8808 + }, + { + "epoch": 0.16102144149743178, + "grad_norm": 8.460848418617777, + "learning_rate": 9.556660087996605e-06, + "loss": 18.3657, + "step": 8809 + }, + { + "epoch": 0.16103972069387829, + "grad_norm": 6.897862897657835, + "learning_rate": 9.556538220306994e-06, + "loss": 17.5954, + "step": 8810 + }, + { + "epoch": 0.16105799989032482, + "grad_norm": 7.164626874468124, + "learning_rate": 9.55641633664711e-06, + "loss": 17.7571, + "step": 8811 + }, + { + "epoch": 0.16107627908677136, + "grad_norm": 6.833970612165855, + "learning_rate": 9.556294437017383e-06, + "loss": 17.6259, + "step": 8812 + }, + { + "epoch": 0.16109455828321786, + "grad_norm": 6.4596259350114025, + "learning_rate": 9.556172521418241e-06, + "loss": 17.577, + "step": 8813 + }, + { + "epoch": 0.1611128374796644, + "grad_norm": 7.498344388477673, + "learning_rate": 9.556050589850109e-06, + "loss": 17.7124, + "step": 8814 + }, + { + "epoch": 0.1611311166761109, + "grad_norm": 7.971377103769383, + "learning_rate": 9.555928642313415e-06, + "loss": 18.1817, + "step": 8815 + }, + { + "epoch": 0.16114939587255744, + "grad_norm": 6.699257541796852, + "learning_rate": 9.555806678808586e-06, + "loss": 17.6014, + "step": 8816 + }, + { + "epoch": 0.16116767506900398, + "grad_norm": 7.14970775353149, + "learning_rate": 9.55568469933605e-06, + "loss": 17.8008, + "step": 8817 + }, + { + "epoch": 0.1611859542654505, + "grad_norm": 6.899336621313897, + "learning_rate": 9.555562703896232e-06, + "loss": 17.7719, + "step": 8818 + }, + { + "epoch": 0.16120423346189702, + "grad_norm": 7.5932976917227295, + "learning_rate": 9.555440692489566e-06, + "loss": 17.9854, + "step": 8819 + }, + { + "epoch": 0.16122251265834353, + "grad_norm": 7.497653414850729, + "learning_rate": 9.555318665116475e-06, + "loss": 17.5895, + "step": 8820 + }, + { + "epoch": 0.16124079185479007, + "grad_norm": 6.501683877973553, + "learning_rate": 9.555196621777385e-06, + "loss": 17.3843, + "step": 8821 + }, + { + "epoch": 0.16125907105123657, + "grad_norm": 6.046399938171552, + "learning_rate": 9.555074562472728e-06, + "loss": 17.4456, + "step": 8822 + }, + { + "epoch": 0.1612773502476831, + "grad_norm": 8.333521331479742, + "learning_rate": 9.554952487202929e-06, + "loss": 17.9339, + "step": 8823 + }, + { + "epoch": 0.16129562944412965, + "grad_norm": 7.188884164898245, + "learning_rate": 9.554830395968417e-06, + "loss": 17.9014, + "step": 8824 + }, + { + "epoch": 0.16131390864057615, + "grad_norm": 8.20227410730392, + "learning_rate": 9.55470828876962e-06, + "loss": 18.1764, + "step": 8825 + }, + { + "epoch": 0.1613321878370227, + "grad_norm": 7.485773635555615, + "learning_rate": 9.554586165606967e-06, + "loss": 17.878, + "step": 8826 + }, + { + "epoch": 0.1613504670334692, + "grad_norm": 8.393810519342246, + "learning_rate": 9.554464026480884e-06, + "loss": 18.1257, + "step": 8827 + }, + { + "epoch": 0.16136874622991573, + "grad_norm": 7.989067804682207, + "learning_rate": 9.554341871391799e-06, + "loss": 17.798, + "step": 8828 + }, + { + "epoch": 0.16138702542636227, + "grad_norm": 7.123354701496578, + "learning_rate": 9.55421970034014e-06, + "loss": 18.0067, + "step": 8829 + }, + { + "epoch": 0.16140530462280878, + "grad_norm": 7.798993085522868, + "learning_rate": 9.554097513326338e-06, + "loss": 18.1148, + "step": 8830 + }, + { + "epoch": 0.1614235838192553, + "grad_norm": 4.76814135126192, + "learning_rate": 9.553975310350819e-06, + "loss": 16.7965, + "step": 8831 + }, + { + "epoch": 0.16144186301570182, + "grad_norm": 6.4682768640768336, + "learning_rate": 9.55385309141401e-06, + "loss": 17.2909, + "step": 8832 + }, + { + "epoch": 0.16146014221214836, + "grad_norm": 6.797191004024916, + "learning_rate": 9.553730856516343e-06, + "loss": 17.7865, + "step": 8833 + }, + { + "epoch": 0.1614784214085949, + "grad_norm": 7.591471038689188, + "learning_rate": 9.553608605658244e-06, + "loss": 17.6864, + "step": 8834 + }, + { + "epoch": 0.1614967006050414, + "grad_norm": 6.842949793100091, + "learning_rate": 9.553486338840143e-06, + "loss": 17.3557, + "step": 8835 + }, + { + "epoch": 0.16151497980148793, + "grad_norm": 8.929409275541913, + "learning_rate": 9.553364056062467e-06, + "loss": 18.7151, + "step": 8836 + }, + { + "epoch": 0.16153325899793444, + "grad_norm": 7.785349282093318, + "learning_rate": 9.553241757325644e-06, + "loss": 17.9587, + "step": 8837 + }, + { + "epoch": 0.16155153819438098, + "grad_norm": 8.211137903475462, + "learning_rate": 9.553119442630103e-06, + "loss": 17.9899, + "step": 8838 + }, + { + "epoch": 0.16156981739082749, + "grad_norm": 6.7164478822489375, + "learning_rate": 9.552997111976275e-06, + "loss": 17.529, + "step": 8839 + }, + { + "epoch": 0.16158809658727402, + "grad_norm": 7.546942335017686, + "learning_rate": 9.552874765364587e-06, + "loss": 17.9005, + "step": 8840 + }, + { + "epoch": 0.16160637578372056, + "grad_norm": 8.153689286797787, + "learning_rate": 9.552752402795469e-06, + "loss": 18.4123, + "step": 8841 + }, + { + "epoch": 0.16162465498016707, + "grad_norm": 7.152845343535854, + "learning_rate": 9.552630024269347e-06, + "loss": 17.7524, + "step": 8842 + }, + { + "epoch": 0.1616429341766136, + "grad_norm": 7.337910363949701, + "learning_rate": 9.552507629786653e-06, + "loss": 17.8114, + "step": 8843 + }, + { + "epoch": 0.1616612133730601, + "grad_norm": 7.350235652986651, + "learning_rate": 9.552385219347816e-06, + "loss": 17.8237, + "step": 8844 + }, + { + "epoch": 0.16167949256950664, + "grad_norm": 6.14665711848872, + "learning_rate": 9.552262792953262e-06, + "loss": 17.3868, + "step": 8845 + }, + { + "epoch": 0.16169777176595318, + "grad_norm": 6.272718841791613, + "learning_rate": 9.55214035060342e-06, + "loss": 17.4701, + "step": 8846 + }, + { + "epoch": 0.1617160509623997, + "grad_norm": 6.863264719435674, + "learning_rate": 9.552017892298724e-06, + "loss": 17.508, + "step": 8847 + }, + { + "epoch": 0.16173433015884622, + "grad_norm": 7.136719533652093, + "learning_rate": 9.551895418039601e-06, + "loss": 17.4004, + "step": 8848 + }, + { + "epoch": 0.16175260935529273, + "grad_norm": 7.283507678344315, + "learning_rate": 9.551772927826477e-06, + "loss": 17.9247, + "step": 8849 + }, + { + "epoch": 0.16177088855173927, + "grad_norm": 6.05289082646221, + "learning_rate": 9.551650421659786e-06, + "loss": 17.0785, + "step": 8850 + }, + { + "epoch": 0.1617891677481858, + "grad_norm": 8.625028740166986, + "learning_rate": 9.551527899539954e-06, + "loss": 17.9702, + "step": 8851 + }, + { + "epoch": 0.1618074469446323, + "grad_norm": 6.3450623363035294, + "learning_rate": 9.551405361467412e-06, + "loss": 17.3423, + "step": 8852 + }, + { + "epoch": 0.16182572614107885, + "grad_norm": 6.320029822690952, + "learning_rate": 9.55128280744259e-06, + "loss": 17.4594, + "step": 8853 + }, + { + "epoch": 0.16184400533752535, + "grad_norm": 8.117196672477366, + "learning_rate": 9.551160237465915e-06, + "loss": 18.0902, + "step": 8854 + }, + { + "epoch": 0.1618622845339719, + "grad_norm": 11.078199725555908, + "learning_rate": 9.55103765153782e-06, + "loss": 18.8538, + "step": 8855 + }, + { + "epoch": 0.1618805637304184, + "grad_norm": 8.079093213144644, + "learning_rate": 9.550915049658733e-06, + "loss": 18.4629, + "step": 8856 + }, + { + "epoch": 0.16189884292686493, + "grad_norm": 6.484534105389004, + "learning_rate": 9.550792431829082e-06, + "loss": 17.5436, + "step": 8857 + }, + { + "epoch": 0.16191712212331147, + "grad_norm": 7.49800621285524, + "learning_rate": 9.5506697980493e-06, + "loss": 18.0245, + "step": 8858 + }, + { + "epoch": 0.16193540131975798, + "grad_norm": 6.5500496032887545, + "learning_rate": 9.550547148319814e-06, + "loss": 17.3675, + "step": 8859 + }, + { + "epoch": 0.1619536805162045, + "grad_norm": 9.450945561850927, + "learning_rate": 9.550424482641057e-06, + "loss": 18.4343, + "step": 8860 + }, + { + "epoch": 0.16197195971265102, + "grad_norm": 5.6372846773157015, + "learning_rate": 9.550301801013456e-06, + "loss": 17.2191, + "step": 8861 + }, + { + "epoch": 0.16199023890909756, + "grad_norm": 8.082414695165987, + "learning_rate": 9.55017910343744e-06, + "loss": 18.0178, + "step": 8862 + }, + { + "epoch": 0.1620085181055441, + "grad_norm": 6.189643379768784, + "learning_rate": 9.550056389913443e-06, + "loss": 17.2269, + "step": 8863 + }, + { + "epoch": 0.1620267973019906, + "grad_norm": 7.229971847798873, + "learning_rate": 9.549933660441892e-06, + "loss": 17.4883, + "step": 8864 + }, + { + "epoch": 0.16204507649843714, + "grad_norm": 6.811545341412197, + "learning_rate": 9.549810915023222e-06, + "loss": 17.3983, + "step": 8865 + }, + { + "epoch": 0.16206335569488364, + "grad_norm": 6.727419034143885, + "learning_rate": 9.549688153657855e-06, + "loss": 17.4161, + "step": 8866 + }, + { + "epoch": 0.16208163489133018, + "grad_norm": 7.330171877370214, + "learning_rate": 9.549565376346229e-06, + "loss": 17.8667, + "step": 8867 + }, + { + "epoch": 0.16209991408777671, + "grad_norm": 8.272764506260637, + "learning_rate": 9.549442583088769e-06, + "loss": 18.1347, + "step": 8868 + }, + { + "epoch": 0.16211819328422322, + "grad_norm": 6.147822248842741, + "learning_rate": 9.549319773885908e-06, + "loss": 17.4762, + "step": 8869 + }, + { + "epoch": 0.16213647248066976, + "grad_norm": 6.189533530420878, + "learning_rate": 9.549196948738078e-06, + "loss": 17.1608, + "step": 8870 + }, + { + "epoch": 0.16215475167711627, + "grad_norm": 6.38942492187238, + "learning_rate": 9.549074107645704e-06, + "loss": 17.533, + "step": 8871 + }, + { + "epoch": 0.1621730308735628, + "grad_norm": 5.844114971106341, + "learning_rate": 9.548951250609223e-06, + "loss": 17.0304, + "step": 8872 + }, + { + "epoch": 0.1621913100700093, + "grad_norm": 6.919844670071196, + "learning_rate": 9.54882837762906e-06, + "loss": 17.6341, + "step": 8873 + }, + { + "epoch": 0.16220958926645584, + "grad_norm": 6.1769616846319675, + "learning_rate": 9.548705488705651e-06, + "loss": 17.6437, + "step": 8874 + }, + { + "epoch": 0.16222786846290238, + "grad_norm": 7.123526203979538, + "learning_rate": 9.548582583839424e-06, + "loss": 17.6275, + "step": 8875 + }, + { + "epoch": 0.1622461476593489, + "grad_norm": 7.8622977543729, + "learning_rate": 9.548459663030807e-06, + "loss": 18.1613, + "step": 8876 + }, + { + "epoch": 0.16226442685579542, + "grad_norm": 6.988235033949581, + "learning_rate": 9.548336726280235e-06, + "loss": 17.8994, + "step": 8877 + }, + { + "epoch": 0.16228270605224193, + "grad_norm": 8.668390020559814, + "learning_rate": 9.548213773588137e-06, + "loss": 18.4199, + "step": 8878 + }, + { + "epoch": 0.16230098524868847, + "grad_norm": 7.592850934071577, + "learning_rate": 9.548090804954946e-06, + "loss": 18.0574, + "step": 8879 + }, + { + "epoch": 0.162319264445135, + "grad_norm": 6.243808839154162, + "learning_rate": 9.54796782038109e-06, + "loss": 17.2268, + "step": 8880 + }, + { + "epoch": 0.1623375436415815, + "grad_norm": 6.92471135372571, + "learning_rate": 9.547844819867002e-06, + "loss": 17.7505, + "step": 8881 + }, + { + "epoch": 0.16235582283802805, + "grad_norm": 8.250122694694106, + "learning_rate": 9.547721803413113e-06, + "loss": 18.4426, + "step": 8882 + }, + { + "epoch": 0.16237410203447455, + "grad_norm": 7.90110995616912, + "learning_rate": 9.547598771019853e-06, + "loss": 17.9777, + "step": 8883 + }, + { + "epoch": 0.1623923812309211, + "grad_norm": 7.675239492202096, + "learning_rate": 9.547475722687653e-06, + "loss": 18.0677, + "step": 8884 + }, + { + "epoch": 0.16241066042736763, + "grad_norm": 7.2494996796705635, + "learning_rate": 9.547352658416946e-06, + "loss": 17.9652, + "step": 8885 + }, + { + "epoch": 0.16242893962381413, + "grad_norm": 7.074103628188423, + "learning_rate": 9.547229578208164e-06, + "loss": 17.7379, + "step": 8886 + }, + { + "epoch": 0.16244721882026067, + "grad_norm": 6.9007334560151685, + "learning_rate": 9.547106482061734e-06, + "loss": 17.7754, + "step": 8887 + }, + { + "epoch": 0.16246549801670718, + "grad_norm": 6.010855450347364, + "learning_rate": 9.546983369978093e-06, + "loss": 17.1205, + "step": 8888 + }, + { + "epoch": 0.1624837772131537, + "grad_norm": 7.231444756815707, + "learning_rate": 9.546860241957669e-06, + "loss": 17.7948, + "step": 8889 + }, + { + "epoch": 0.16250205640960022, + "grad_norm": 7.131651381970469, + "learning_rate": 9.546737098000893e-06, + "loss": 17.9893, + "step": 8890 + }, + { + "epoch": 0.16252033560604676, + "grad_norm": 7.70387932922149, + "learning_rate": 9.5466139381082e-06, + "loss": 18.0789, + "step": 8891 + }, + { + "epoch": 0.1625386148024933, + "grad_norm": 6.632072531801575, + "learning_rate": 9.546490762280018e-06, + "loss": 17.6724, + "step": 8892 + }, + { + "epoch": 0.1625568939989398, + "grad_norm": 6.936192890123043, + "learning_rate": 9.546367570516782e-06, + "loss": 17.6417, + "step": 8893 + }, + { + "epoch": 0.16257517319538634, + "grad_norm": 6.956442992720794, + "learning_rate": 9.546244362818922e-06, + "loss": 17.681, + "step": 8894 + }, + { + "epoch": 0.16259345239183284, + "grad_norm": 7.446021023217806, + "learning_rate": 9.546121139186869e-06, + "loss": 18.232, + "step": 8895 + }, + { + "epoch": 0.16261173158827938, + "grad_norm": 7.878532797276115, + "learning_rate": 9.545997899621057e-06, + "loss": 17.8911, + "step": 8896 + }, + { + "epoch": 0.16263001078472591, + "grad_norm": 6.543557569485853, + "learning_rate": 9.545874644121915e-06, + "loss": 17.519, + "step": 8897 + }, + { + "epoch": 0.16264828998117242, + "grad_norm": 7.015477953441177, + "learning_rate": 9.545751372689879e-06, + "loss": 17.8328, + "step": 8898 + }, + { + "epoch": 0.16266656917761896, + "grad_norm": 6.3181880335265665, + "learning_rate": 9.545628085325378e-06, + "loss": 17.2829, + "step": 8899 + }, + { + "epoch": 0.16268484837406547, + "grad_norm": 6.697700055575188, + "learning_rate": 9.545504782028845e-06, + "loss": 17.3818, + "step": 8900 + }, + { + "epoch": 0.162703127570512, + "grad_norm": 5.856440390979823, + "learning_rate": 9.545381462800713e-06, + "loss": 17.1333, + "step": 8901 + }, + { + "epoch": 0.16272140676695854, + "grad_norm": 7.761739161167816, + "learning_rate": 9.545258127641412e-06, + "loss": 17.8042, + "step": 8902 + }, + { + "epoch": 0.16273968596340505, + "grad_norm": 5.975151780291863, + "learning_rate": 9.545134776551377e-06, + "loss": 17.4304, + "step": 8903 + }, + { + "epoch": 0.16275796515985158, + "grad_norm": 6.5665951275741845, + "learning_rate": 9.545011409531037e-06, + "loss": 17.7145, + "step": 8904 + }, + { + "epoch": 0.1627762443562981, + "grad_norm": 6.733855110402917, + "learning_rate": 9.544888026580827e-06, + "loss": 17.099, + "step": 8905 + }, + { + "epoch": 0.16279452355274462, + "grad_norm": 5.905538250052269, + "learning_rate": 9.54476462770118e-06, + "loss": 17.0874, + "step": 8906 + }, + { + "epoch": 0.16281280274919113, + "grad_norm": 6.573048580583706, + "learning_rate": 9.544641212892526e-06, + "loss": 17.5165, + "step": 8907 + }, + { + "epoch": 0.16283108194563767, + "grad_norm": 9.295126203116364, + "learning_rate": 9.544517782155302e-06, + "loss": 18.2697, + "step": 8908 + }, + { + "epoch": 0.1628493611420842, + "grad_norm": 8.26365264438235, + "learning_rate": 9.544394335489935e-06, + "loss": 17.8954, + "step": 8909 + }, + { + "epoch": 0.1628676403385307, + "grad_norm": 7.084231977177663, + "learning_rate": 9.54427087289686e-06, + "loss": 17.4922, + "step": 8910 + }, + { + "epoch": 0.16288591953497725, + "grad_norm": 6.964164989018065, + "learning_rate": 9.54414739437651e-06, + "loss": 17.7176, + "step": 8911 + }, + { + "epoch": 0.16290419873142375, + "grad_norm": 6.997736867494417, + "learning_rate": 9.54402389992932e-06, + "loss": 17.4331, + "step": 8912 + }, + { + "epoch": 0.1629224779278703, + "grad_norm": 8.043540829147126, + "learning_rate": 9.543900389555718e-06, + "loss": 18.0513, + "step": 8913 + }, + { + "epoch": 0.16294075712431683, + "grad_norm": 7.16888807126414, + "learning_rate": 9.54377686325614e-06, + "loss": 17.7014, + "step": 8914 + }, + { + "epoch": 0.16295903632076333, + "grad_norm": 8.30871827815993, + "learning_rate": 9.54365332103102e-06, + "loss": 18.0837, + "step": 8915 + }, + { + "epoch": 0.16297731551720987, + "grad_norm": 8.503896845917318, + "learning_rate": 9.543529762880787e-06, + "loss": 18.0608, + "step": 8916 + }, + { + "epoch": 0.16299559471365638, + "grad_norm": 6.712750605148803, + "learning_rate": 9.543406188805877e-06, + "loss": 17.3239, + "step": 8917 + }, + { + "epoch": 0.1630138739101029, + "grad_norm": 6.401990566639425, + "learning_rate": 9.543282598806723e-06, + "loss": 17.4695, + "step": 8918 + }, + { + "epoch": 0.16303215310654945, + "grad_norm": 6.642985318262605, + "learning_rate": 9.543158992883758e-06, + "loss": 17.6847, + "step": 8919 + }, + { + "epoch": 0.16305043230299596, + "grad_norm": 7.542441204187607, + "learning_rate": 9.543035371037415e-06, + "loss": 18.0636, + "step": 8920 + }, + { + "epoch": 0.1630687114994425, + "grad_norm": 6.2766294396528375, + "learning_rate": 9.542911733268126e-06, + "loss": 17.5489, + "step": 8921 + }, + { + "epoch": 0.163086990695889, + "grad_norm": 7.384420981284008, + "learning_rate": 9.542788079576326e-06, + "loss": 17.7955, + "step": 8922 + }, + { + "epoch": 0.16310526989233554, + "grad_norm": 5.8974784763862935, + "learning_rate": 9.54266440996245e-06, + "loss": 17.3754, + "step": 8923 + }, + { + "epoch": 0.16312354908878204, + "grad_norm": 6.578968888106481, + "learning_rate": 9.542540724426927e-06, + "loss": 17.6573, + "step": 8924 + }, + { + "epoch": 0.16314182828522858, + "grad_norm": 6.860280161621239, + "learning_rate": 9.542417022970194e-06, + "loss": 17.6178, + "step": 8925 + }, + { + "epoch": 0.16316010748167512, + "grad_norm": 6.979930012077105, + "learning_rate": 9.542293305592683e-06, + "loss": 17.6281, + "step": 8926 + }, + { + "epoch": 0.16317838667812162, + "grad_norm": 7.599394431780828, + "learning_rate": 9.54216957229483e-06, + "loss": 18.0106, + "step": 8927 + }, + { + "epoch": 0.16319666587456816, + "grad_norm": 7.238928157250578, + "learning_rate": 9.542045823077064e-06, + "loss": 18.2261, + "step": 8928 + }, + { + "epoch": 0.16321494507101467, + "grad_norm": 7.766677650452347, + "learning_rate": 9.541922057939823e-06, + "loss": 18.0347, + "step": 8929 + }, + { + "epoch": 0.1632332242674612, + "grad_norm": 7.662017576676054, + "learning_rate": 9.54179827688354e-06, + "loss": 17.8838, + "step": 8930 + }, + { + "epoch": 0.16325150346390774, + "grad_norm": 7.232795696096888, + "learning_rate": 9.541674479908647e-06, + "loss": 17.866, + "step": 8931 + }, + { + "epoch": 0.16326978266035425, + "grad_norm": 7.886448752917773, + "learning_rate": 9.54155066701558e-06, + "loss": 17.6537, + "step": 8932 + }, + { + "epoch": 0.16328806185680078, + "grad_norm": 7.881010893635565, + "learning_rate": 9.541426838204771e-06, + "loss": 18.0129, + "step": 8933 + }, + { + "epoch": 0.1633063410532473, + "grad_norm": 7.606065616458181, + "learning_rate": 9.541302993476655e-06, + "loss": 17.8275, + "step": 8934 + }, + { + "epoch": 0.16332462024969382, + "grad_norm": 7.123188545637438, + "learning_rate": 9.541179132831666e-06, + "loss": 17.6886, + "step": 8935 + }, + { + "epoch": 0.16334289944614036, + "grad_norm": 7.226467542992779, + "learning_rate": 9.54105525627024e-06, + "loss": 17.7167, + "step": 8936 + }, + { + "epoch": 0.16336117864258687, + "grad_norm": 5.937890595858325, + "learning_rate": 9.540931363792808e-06, + "loss": 17.3241, + "step": 8937 + }, + { + "epoch": 0.1633794578390334, + "grad_norm": 6.032465860912279, + "learning_rate": 9.540807455399806e-06, + "loss": 17.4818, + "step": 8938 + }, + { + "epoch": 0.1633977370354799, + "grad_norm": 7.150257416553276, + "learning_rate": 9.540683531091667e-06, + "loss": 17.7593, + "step": 8939 + }, + { + "epoch": 0.16341601623192645, + "grad_norm": 6.682123741954857, + "learning_rate": 9.540559590868826e-06, + "loss": 17.4994, + "step": 8940 + }, + { + "epoch": 0.16343429542837296, + "grad_norm": 7.771814579829153, + "learning_rate": 9.54043563473172e-06, + "loss": 18.012, + "step": 8941 + }, + { + "epoch": 0.1634525746248195, + "grad_norm": 7.097596182243404, + "learning_rate": 9.540311662680779e-06, + "loss": 17.7753, + "step": 8942 + }, + { + "epoch": 0.16347085382126603, + "grad_norm": 7.096495466577756, + "learning_rate": 9.540187674716439e-06, + "loss": 17.9051, + "step": 8943 + }, + { + "epoch": 0.16348913301771253, + "grad_norm": 6.006982250832487, + "learning_rate": 9.540063670839138e-06, + "loss": 17.148, + "step": 8944 + }, + { + "epoch": 0.16350741221415907, + "grad_norm": 11.197540538981825, + "learning_rate": 9.539939651049306e-06, + "loss": 17.011, + "step": 8945 + }, + { + "epoch": 0.16352569141060558, + "grad_norm": 7.675777563157956, + "learning_rate": 9.539815615347378e-06, + "loss": 18.1948, + "step": 8946 + }, + { + "epoch": 0.1635439706070521, + "grad_norm": 9.277697757170023, + "learning_rate": 9.539691563733793e-06, + "loss": 18.748, + "step": 8947 + }, + { + "epoch": 0.16356224980349865, + "grad_norm": 6.754838801733986, + "learning_rate": 9.53956749620898e-06, + "loss": 17.4221, + "step": 8948 + }, + { + "epoch": 0.16358052899994516, + "grad_norm": 7.415932795215124, + "learning_rate": 9.53944341277338e-06, + "loss": 17.9688, + "step": 8949 + }, + { + "epoch": 0.1635988081963917, + "grad_norm": 6.2238818314264, + "learning_rate": 9.539319313427424e-06, + "loss": 17.4398, + "step": 8950 + }, + { + "epoch": 0.1636170873928382, + "grad_norm": 9.021443626345528, + "learning_rate": 9.539195198171547e-06, + "loss": 18.203, + "step": 8951 + }, + { + "epoch": 0.16363536658928474, + "grad_norm": 9.027950393619902, + "learning_rate": 9.539071067006185e-06, + "loss": 18.3249, + "step": 8952 + }, + { + "epoch": 0.16365364578573127, + "grad_norm": 7.381196928707877, + "learning_rate": 9.538946919931773e-06, + "loss": 18.3222, + "step": 8953 + }, + { + "epoch": 0.16367192498217778, + "grad_norm": 7.174947326296616, + "learning_rate": 9.538822756948746e-06, + "loss": 17.8498, + "step": 8954 + }, + { + "epoch": 0.16369020417862432, + "grad_norm": 6.901302070754817, + "learning_rate": 9.538698578057538e-06, + "loss": 17.7299, + "step": 8955 + }, + { + "epoch": 0.16370848337507082, + "grad_norm": 7.761207539143965, + "learning_rate": 9.538574383258586e-06, + "loss": 18.3238, + "step": 8956 + }, + { + "epoch": 0.16372676257151736, + "grad_norm": 7.44744965291629, + "learning_rate": 9.538450172552324e-06, + "loss": 17.5281, + "step": 8957 + }, + { + "epoch": 0.16374504176796387, + "grad_norm": 6.987075909306739, + "learning_rate": 9.53832594593919e-06, + "loss": 17.7534, + "step": 8958 + }, + { + "epoch": 0.1637633209644104, + "grad_norm": 6.624246707335277, + "learning_rate": 9.538201703419616e-06, + "loss": 17.6023, + "step": 8959 + }, + { + "epoch": 0.16378160016085694, + "grad_norm": 7.097707090816197, + "learning_rate": 9.538077444994039e-06, + "loss": 18.0419, + "step": 8960 + }, + { + "epoch": 0.16379987935730345, + "grad_norm": 7.760668279565784, + "learning_rate": 9.537953170662894e-06, + "loss": 17.9141, + "step": 8961 + }, + { + "epoch": 0.16381815855374998, + "grad_norm": 8.699347616373558, + "learning_rate": 9.537828880426617e-06, + "loss": 18.2053, + "step": 8962 + }, + { + "epoch": 0.1638364377501965, + "grad_norm": 7.326815693030651, + "learning_rate": 9.537704574285644e-06, + "loss": 17.7434, + "step": 8963 + }, + { + "epoch": 0.16385471694664303, + "grad_norm": 8.0552337540858, + "learning_rate": 9.53758025224041e-06, + "loss": 18.2094, + "step": 8964 + }, + { + "epoch": 0.16387299614308956, + "grad_norm": 8.021030654479663, + "learning_rate": 9.537455914291351e-06, + "loss": 17.5572, + "step": 8965 + }, + { + "epoch": 0.16389127533953607, + "grad_norm": 9.40369593502039, + "learning_rate": 9.537331560438903e-06, + "loss": 18.1477, + "step": 8966 + }, + { + "epoch": 0.1639095545359826, + "grad_norm": 7.449777743755273, + "learning_rate": 9.537207190683501e-06, + "loss": 18.0022, + "step": 8967 + }, + { + "epoch": 0.1639278337324291, + "grad_norm": 6.755855208741576, + "learning_rate": 9.537082805025581e-06, + "loss": 17.6478, + "step": 8968 + }, + { + "epoch": 0.16394611292887565, + "grad_norm": 7.829269949213463, + "learning_rate": 9.536958403465581e-06, + "loss": 18.2046, + "step": 8969 + }, + { + "epoch": 0.16396439212532218, + "grad_norm": 7.561169726588245, + "learning_rate": 9.536833986003935e-06, + "loss": 17.9798, + "step": 8970 + }, + { + "epoch": 0.1639826713217687, + "grad_norm": 7.979160217539934, + "learning_rate": 9.536709552641079e-06, + "loss": 18.1425, + "step": 8971 + }, + { + "epoch": 0.16400095051821523, + "grad_norm": 7.4120818373445925, + "learning_rate": 9.53658510337745e-06, + "loss": 18.0333, + "step": 8972 + }, + { + "epoch": 0.16401922971466174, + "grad_norm": 7.110277402834711, + "learning_rate": 9.536460638213484e-06, + "loss": 17.8487, + "step": 8973 + }, + { + "epoch": 0.16403750891110827, + "grad_norm": 6.450188051453727, + "learning_rate": 9.536336157149617e-06, + "loss": 17.5824, + "step": 8974 + }, + { + "epoch": 0.16405578810755478, + "grad_norm": 6.619651163009498, + "learning_rate": 9.536211660186285e-06, + "loss": 17.6198, + "step": 8975 + }, + { + "epoch": 0.16407406730400131, + "grad_norm": 6.628904688632431, + "learning_rate": 9.536087147323925e-06, + "loss": 17.3563, + "step": 8976 + }, + { + "epoch": 0.16409234650044785, + "grad_norm": 6.390833912123965, + "learning_rate": 9.535962618562973e-06, + "loss": 17.6438, + "step": 8977 + }, + { + "epoch": 0.16411062569689436, + "grad_norm": 8.84388746773094, + "learning_rate": 9.535838073903867e-06, + "loss": 18.0541, + "step": 8978 + }, + { + "epoch": 0.1641289048933409, + "grad_norm": 6.898897406262503, + "learning_rate": 9.535713513347041e-06, + "loss": 17.5198, + "step": 8979 + }, + { + "epoch": 0.1641471840897874, + "grad_norm": 6.938844238649979, + "learning_rate": 9.535588936892934e-06, + "loss": 17.473, + "step": 8980 + }, + { + "epoch": 0.16416546328623394, + "grad_norm": 10.265314021194731, + "learning_rate": 9.53546434454198e-06, + "loss": 17.8857, + "step": 8981 + }, + { + "epoch": 0.16418374248268047, + "grad_norm": 5.973072517552841, + "learning_rate": 9.535339736294618e-06, + "loss": 17.269, + "step": 8982 + }, + { + "epoch": 0.16420202167912698, + "grad_norm": 6.949757546277983, + "learning_rate": 9.535215112151281e-06, + "loss": 17.7386, + "step": 8983 + }, + { + "epoch": 0.16422030087557352, + "grad_norm": 6.326312509864464, + "learning_rate": 9.535090472112411e-06, + "loss": 17.2417, + "step": 8984 + }, + { + "epoch": 0.16423858007202002, + "grad_norm": 9.264120030924737, + "learning_rate": 9.534965816178443e-06, + "loss": 18.3123, + "step": 8985 + }, + { + "epoch": 0.16425685926846656, + "grad_norm": 6.772672020924337, + "learning_rate": 9.534841144349813e-06, + "loss": 17.7576, + "step": 8986 + }, + { + "epoch": 0.1642751384649131, + "grad_norm": 8.329126811228969, + "learning_rate": 9.534716456626957e-06, + "loss": 18.0118, + "step": 8987 + }, + { + "epoch": 0.1642934176613596, + "grad_norm": 7.400742624479253, + "learning_rate": 9.534591753010314e-06, + "loss": 17.9224, + "step": 8988 + }, + { + "epoch": 0.16431169685780614, + "grad_norm": 6.007836449564718, + "learning_rate": 9.53446703350032e-06, + "loss": 17.0726, + "step": 8989 + }, + { + "epoch": 0.16432997605425265, + "grad_norm": 7.562766480730223, + "learning_rate": 9.534342298097412e-06, + "loss": 17.8489, + "step": 8990 + }, + { + "epoch": 0.16434825525069918, + "grad_norm": 6.98880304185308, + "learning_rate": 9.53421754680203e-06, + "loss": 17.4952, + "step": 8991 + }, + { + "epoch": 0.1643665344471457, + "grad_norm": 9.724134196821625, + "learning_rate": 9.534092779614607e-06, + "loss": 18.9074, + "step": 8992 + }, + { + "epoch": 0.16438481364359223, + "grad_norm": 7.251382567170701, + "learning_rate": 9.533967996535584e-06, + "loss": 17.9577, + "step": 8993 + }, + { + "epoch": 0.16440309284003876, + "grad_norm": 7.498903365392758, + "learning_rate": 9.533843197565396e-06, + "loss": 18.052, + "step": 8994 + }, + { + "epoch": 0.16442137203648527, + "grad_norm": 7.262856847283712, + "learning_rate": 9.53371838270448e-06, + "loss": 17.8097, + "step": 8995 + }, + { + "epoch": 0.1644396512329318, + "grad_norm": 7.681262369820483, + "learning_rate": 9.533593551953276e-06, + "loss": 18.0161, + "step": 8996 + }, + { + "epoch": 0.1644579304293783, + "grad_norm": 6.880805878430297, + "learning_rate": 9.533468705312218e-06, + "loss": 17.5347, + "step": 8997 + }, + { + "epoch": 0.16447620962582485, + "grad_norm": 7.242434003077306, + "learning_rate": 9.533343842781746e-06, + "loss": 17.8824, + "step": 8998 + }, + { + "epoch": 0.16449448882227138, + "grad_norm": 9.904514161620357, + "learning_rate": 9.533218964362299e-06, + "loss": 18.8511, + "step": 8999 + }, + { + "epoch": 0.1645127680187179, + "grad_norm": 7.483815263177726, + "learning_rate": 9.533094070054311e-06, + "loss": 17.3156, + "step": 9000 + }, + { + "epoch": 0.16453104721516443, + "grad_norm": 8.05181023024189, + "learning_rate": 9.532969159858223e-06, + "loss": 18.2519, + "step": 9001 + }, + { + "epoch": 0.16454932641161094, + "grad_norm": 7.113555245957429, + "learning_rate": 9.53284423377447e-06, + "loss": 17.8087, + "step": 9002 + }, + { + "epoch": 0.16456760560805747, + "grad_norm": 7.012927381244689, + "learning_rate": 9.532719291803492e-06, + "loss": 17.6893, + "step": 9003 + }, + { + "epoch": 0.164585884804504, + "grad_norm": 6.958872956133071, + "learning_rate": 9.532594333945727e-06, + "loss": 17.6789, + "step": 9004 + }, + { + "epoch": 0.16460416400095051, + "grad_norm": 7.436941121855428, + "learning_rate": 9.532469360201612e-06, + "loss": 18.0439, + "step": 9005 + }, + { + "epoch": 0.16462244319739705, + "grad_norm": 8.062205985339064, + "learning_rate": 9.532344370571584e-06, + "loss": 18.1162, + "step": 9006 + }, + { + "epoch": 0.16464072239384356, + "grad_norm": 7.177649603497674, + "learning_rate": 9.532219365056083e-06, + "loss": 17.6347, + "step": 9007 + }, + { + "epoch": 0.1646590015902901, + "grad_norm": 6.7695673105289025, + "learning_rate": 9.532094343655548e-06, + "loss": 17.4804, + "step": 9008 + }, + { + "epoch": 0.1646772807867366, + "grad_norm": 8.496341139684638, + "learning_rate": 9.531969306370412e-06, + "loss": 18.0347, + "step": 9009 + }, + { + "epoch": 0.16469555998318314, + "grad_norm": 7.318266467326885, + "learning_rate": 9.531844253201119e-06, + "loss": 18.001, + "step": 9010 + }, + { + "epoch": 0.16471383917962967, + "grad_norm": 7.999530387100114, + "learning_rate": 9.531719184148106e-06, + "loss": 18.4319, + "step": 9011 + }, + { + "epoch": 0.16473211837607618, + "grad_norm": 7.3789199094497535, + "learning_rate": 9.53159409921181e-06, + "loss": 17.6925, + "step": 9012 + }, + { + "epoch": 0.16475039757252272, + "grad_norm": 7.426827848723716, + "learning_rate": 9.531468998392669e-06, + "loss": 17.5441, + "step": 9013 + }, + { + "epoch": 0.16476867676896922, + "grad_norm": 7.415358347673079, + "learning_rate": 9.531343881691122e-06, + "loss": 17.6196, + "step": 9014 + }, + { + "epoch": 0.16478695596541576, + "grad_norm": 8.714016979434655, + "learning_rate": 9.53121874910761e-06, + "loss": 18.4228, + "step": 9015 + }, + { + "epoch": 0.1648052351618623, + "grad_norm": 7.923681657299792, + "learning_rate": 9.531093600642567e-06, + "loss": 17.8882, + "step": 9016 + }, + { + "epoch": 0.1648235143583088, + "grad_norm": 7.345767412881944, + "learning_rate": 9.530968436296435e-06, + "loss": 17.8592, + "step": 9017 + }, + { + "epoch": 0.16484179355475534, + "grad_norm": 6.56783751919579, + "learning_rate": 9.530843256069654e-06, + "loss": 17.6087, + "step": 9018 + }, + { + "epoch": 0.16486007275120185, + "grad_norm": 7.22552776237849, + "learning_rate": 9.530718059962658e-06, + "loss": 17.7078, + "step": 9019 + }, + { + "epoch": 0.16487835194764838, + "grad_norm": 5.982942068514054, + "learning_rate": 9.53059284797589e-06, + "loss": 17.0983, + "step": 9020 + }, + { + "epoch": 0.16489663114409492, + "grad_norm": 7.0002543746521795, + "learning_rate": 9.530467620109786e-06, + "loss": 17.7523, + "step": 9021 + }, + { + "epoch": 0.16491491034054143, + "grad_norm": 8.649614178075742, + "learning_rate": 9.530342376364786e-06, + "loss": 17.756, + "step": 9022 + }, + { + "epoch": 0.16493318953698796, + "grad_norm": 7.16265798516129, + "learning_rate": 9.530217116741329e-06, + "loss": 17.9631, + "step": 9023 + }, + { + "epoch": 0.16495146873343447, + "grad_norm": 5.529878030601523, + "learning_rate": 9.530091841239854e-06, + "loss": 17.0858, + "step": 9024 + }, + { + "epoch": 0.164969747929881, + "grad_norm": 5.93450893703127, + "learning_rate": 9.529966549860801e-06, + "loss": 17.1779, + "step": 9025 + }, + { + "epoch": 0.1649880271263275, + "grad_norm": 5.920941626064105, + "learning_rate": 9.529841242604609e-06, + "loss": 17.3672, + "step": 9026 + }, + { + "epoch": 0.16500630632277405, + "grad_norm": 7.387747158692213, + "learning_rate": 9.529715919471715e-06, + "loss": 17.7749, + "step": 9027 + }, + { + "epoch": 0.16502458551922058, + "grad_norm": 8.98797917732331, + "learning_rate": 9.529590580462562e-06, + "loss": 18.6328, + "step": 9028 + }, + { + "epoch": 0.1650428647156671, + "grad_norm": 6.590188461567282, + "learning_rate": 9.529465225577586e-06, + "loss": 17.5267, + "step": 9029 + }, + { + "epoch": 0.16506114391211363, + "grad_norm": 8.336657364415158, + "learning_rate": 9.529339854817226e-06, + "loss": 17.4487, + "step": 9030 + }, + { + "epoch": 0.16507942310856014, + "grad_norm": 7.971714010329702, + "learning_rate": 9.529214468181924e-06, + "loss": 18.2908, + "step": 9031 + }, + { + "epoch": 0.16509770230500667, + "grad_norm": 6.820560068494281, + "learning_rate": 9.529089065672118e-06, + "loss": 17.4721, + "step": 9032 + }, + { + "epoch": 0.1651159815014532, + "grad_norm": 7.444814055406021, + "learning_rate": 9.528963647288247e-06, + "loss": 17.5634, + "step": 9033 + }, + { + "epoch": 0.16513426069789972, + "grad_norm": 8.52045768338414, + "learning_rate": 9.528838213030753e-06, + "loss": 18.699, + "step": 9034 + }, + { + "epoch": 0.16515253989434625, + "grad_norm": 5.784679984227972, + "learning_rate": 9.528712762900074e-06, + "loss": 17.4384, + "step": 9035 + }, + { + "epoch": 0.16517081909079276, + "grad_norm": 7.653037685529301, + "learning_rate": 9.528587296896649e-06, + "loss": 18.2578, + "step": 9036 + }, + { + "epoch": 0.1651890982872393, + "grad_norm": 6.799617147984463, + "learning_rate": 9.528461815020918e-06, + "loss": 17.8007, + "step": 9037 + }, + { + "epoch": 0.16520737748368583, + "grad_norm": 6.968598825158566, + "learning_rate": 9.528336317273324e-06, + "loss": 17.888, + "step": 9038 + }, + { + "epoch": 0.16522565668013234, + "grad_norm": 7.165305393866474, + "learning_rate": 9.528210803654302e-06, + "loss": 17.699, + "step": 9039 + }, + { + "epoch": 0.16524393587657887, + "grad_norm": 5.371448743197811, + "learning_rate": 9.528085274164294e-06, + "loss": 17.0871, + "step": 9040 + }, + { + "epoch": 0.16526221507302538, + "grad_norm": 7.1524163925995685, + "learning_rate": 9.52795972880374e-06, + "loss": 18.0725, + "step": 9041 + }, + { + "epoch": 0.16528049426947192, + "grad_norm": 6.932547375366222, + "learning_rate": 9.52783416757308e-06, + "loss": 17.4842, + "step": 9042 + }, + { + "epoch": 0.16529877346591842, + "grad_norm": 6.666372747082885, + "learning_rate": 9.527708590472755e-06, + "loss": 17.5194, + "step": 9043 + }, + { + "epoch": 0.16531705266236496, + "grad_norm": 6.116693421795748, + "learning_rate": 9.527582997503203e-06, + "loss": 17.6409, + "step": 9044 + }, + { + "epoch": 0.1653353318588115, + "grad_norm": 6.27081926121905, + "learning_rate": 9.527457388664866e-06, + "loss": 17.5528, + "step": 9045 + }, + { + "epoch": 0.165353611055258, + "grad_norm": 6.842569182429887, + "learning_rate": 9.527331763958186e-06, + "loss": 17.6139, + "step": 9046 + }, + { + "epoch": 0.16537189025170454, + "grad_norm": 8.74274288483976, + "learning_rate": 9.527206123383597e-06, + "loss": 18.2157, + "step": 9047 + }, + { + "epoch": 0.16539016944815105, + "grad_norm": 6.264777251475679, + "learning_rate": 9.527080466941546e-06, + "loss": 17.285, + "step": 9048 + }, + { + "epoch": 0.16540844864459758, + "grad_norm": 7.537827085353265, + "learning_rate": 9.52695479463247e-06, + "loss": 18.1821, + "step": 9049 + }, + { + "epoch": 0.16542672784104412, + "grad_norm": 7.565179764824264, + "learning_rate": 9.526829106456811e-06, + "loss": 17.8749, + "step": 9050 + }, + { + "epoch": 0.16544500703749063, + "grad_norm": 6.884819067861885, + "learning_rate": 9.526703402415007e-06, + "loss": 17.7371, + "step": 9051 + }, + { + "epoch": 0.16546328623393716, + "grad_norm": 6.510990120480692, + "learning_rate": 9.526577682507504e-06, + "loss": 17.6405, + "step": 9052 + }, + { + "epoch": 0.16548156543038367, + "grad_norm": 6.513347772149104, + "learning_rate": 9.526451946734736e-06, + "loss": 17.5503, + "step": 9053 + }, + { + "epoch": 0.1654998446268302, + "grad_norm": 5.428298287850259, + "learning_rate": 9.526326195097146e-06, + "loss": 17.1256, + "step": 9054 + }, + { + "epoch": 0.16551812382327674, + "grad_norm": 6.582988013596661, + "learning_rate": 9.526200427595178e-06, + "loss": 17.6936, + "step": 9055 + }, + { + "epoch": 0.16553640301972325, + "grad_norm": 7.370843557480547, + "learning_rate": 9.526074644229269e-06, + "loss": 17.517, + "step": 9056 + }, + { + "epoch": 0.16555468221616979, + "grad_norm": 6.923825195792302, + "learning_rate": 9.525948844999861e-06, + "loss": 17.7574, + "step": 9057 + }, + { + "epoch": 0.1655729614126163, + "grad_norm": 6.8302860552307445, + "learning_rate": 9.525823029907396e-06, + "loss": 17.2951, + "step": 9058 + }, + { + "epoch": 0.16559124060906283, + "grad_norm": 6.958696460058351, + "learning_rate": 9.525697198952313e-06, + "loss": 17.741, + "step": 9059 + }, + { + "epoch": 0.16560951980550934, + "grad_norm": 6.996366141266741, + "learning_rate": 9.525571352135055e-06, + "loss": 17.6815, + "step": 9060 + }, + { + "epoch": 0.16562779900195587, + "grad_norm": 6.157407783203882, + "learning_rate": 9.52544548945606e-06, + "loss": 17.4198, + "step": 9061 + }, + { + "epoch": 0.1656460781984024, + "grad_norm": 6.440894364279506, + "learning_rate": 9.525319610915773e-06, + "loss": 17.3242, + "step": 9062 + }, + { + "epoch": 0.16566435739484892, + "grad_norm": 6.854729327882623, + "learning_rate": 9.525193716514634e-06, + "loss": 17.5983, + "step": 9063 + }, + { + "epoch": 0.16568263659129545, + "grad_norm": 6.171897296806404, + "learning_rate": 9.525067806253082e-06, + "loss": 17.2897, + "step": 9064 + }, + { + "epoch": 0.16570091578774196, + "grad_norm": 8.008405996013842, + "learning_rate": 9.524941880131562e-06, + "loss": 18.3425, + "step": 9065 + }, + { + "epoch": 0.1657191949841885, + "grad_norm": 6.725834049524768, + "learning_rate": 9.52481593815051e-06, + "loss": 17.3255, + "step": 9066 + }, + { + "epoch": 0.16573747418063503, + "grad_norm": 7.92148911913241, + "learning_rate": 9.524689980310375e-06, + "loss": 17.8377, + "step": 9067 + }, + { + "epoch": 0.16575575337708154, + "grad_norm": 7.0223075023185775, + "learning_rate": 9.524564006611592e-06, + "loss": 17.6414, + "step": 9068 + }, + { + "epoch": 0.16577403257352807, + "grad_norm": 6.3789484707849295, + "learning_rate": 9.524438017054604e-06, + "loss": 17.4408, + "step": 9069 + }, + { + "epoch": 0.16579231176997458, + "grad_norm": 5.897977748498922, + "learning_rate": 9.524312011639856e-06, + "loss": 17.3339, + "step": 9070 + }, + { + "epoch": 0.16581059096642112, + "grad_norm": 6.207281410268841, + "learning_rate": 9.524185990367785e-06, + "loss": 17.2, + "step": 9071 + }, + { + "epoch": 0.16582887016286765, + "grad_norm": 8.342653154892563, + "learning_rate": 9.524059953238836e-06, + "loss": 18.3694, + "step": 9072 + }, + { + "epoch": 0.16584714935931416, + "grad_norm": 6.598668454206093, + "learning_rate": 9.523933900253448e-06, + "loss": 17.6158, + "step": 9073 + }, + { + "epoch": 0.1658654285557607, + "grad_norm": 6.59926984065775, + "learning_rate": 9.523807831412065e-06, + "loss": 17.7051, + "step": 9074 + }, + { + "epoch": 0.1658837077522072, + "grad_norm": 7.1647343560089505, + "learning_rate": 9.523681746715128e-06, + "loss": 17.5268, + "step": 9075 + }, + { + "epoch": 0.16590198694865374, + "grad_norm": 7.627179920498086, + "learning_rate": 9.523555646163078e-06, + "loss": 17.6897, + "step": 9076 + }, + { + "epoch": 0.16592026614510025, + "grad_norm": 7.637983839679278, + "learning_rate": 9.52342952975636e-06, + "loss": 18.0547, + "step": 9077 + }, + { + "epoch": 0.16593854534154678, + "grad_norm": 8.041272758097744, + "learning_rate": 9.523303397495414e-06, + "loss": 18.2532, + "step": 9078 + }, + { + "epoch": 0.16595682453799332, + "grad_norm": 6.30967522088222, + "learning_rate": 9.52317724938068e-06, + "loss": 17.4144, + "step": 9079 + }, + { + "epoch": 0.16597510373443983, + "grad_norm": 6.995478330767793, + "learning_rate": 9.523051085412603e-06, + "loss": 17.6971, + "step": 9080 + }, + { + "epoch": 0.16599338293088636, + "grad_norm": 6.919525674027049, + "learning_rate": 9.522924905591625e-06, + "loss": 17.7011, + "step": 9081 + }, + { + "epoch": 0.16601166212733287, + "grad_norm": 7.863781012799176, + "learning_rate": 9.522798709918189e-06, + "loss": 18.3145, + "step": 9082 + }, + { + "epoch": 0.1660299413237794, + "grad_norm": 6.244417281280778, + "learning_rate": 9.522672498392734e-06, + "loss": 17.4247, + "step": 9083 + }, + { + "epoch": 0.16604822052022594, + "grad_norm": 7.8538816717185895, + "learning_rate": 9.522546271015705e-06, + "loss": 17.696, + "step": 9084 + }, + { + "epoch": 0.16606649971667245, + "grad_norm": 9.300180918835974, + "learning_rate": 9.522420027787543e-06, + "loss": 18.6505, + "step": 9085 + }, + { + "epoch": 0.16608477891311899, + "grad_norm": 5.7769481419581155, + "learning_rate": 9.522293768708691e-06, + "loss": 17.0681, + "step": 9086 + }, + { + "epoch": 0.1661030581095655, + "grad_norm": 7.199260336614925, + "learning_rate": 9.522167493779593e-06, + "loss": 17.9693, + "step": 9087 + }, + { + "epoch": 0.16612133730601203, + "grad_norm": 6.379245550741849, + "learning_rate": 9.52204120300069e-06, + "loss": 17.7245, + "step": 9088 + }, + { + "epoch": 0.16613961650245856, + "grad_norm": 8.844373939107124, + "learning_rate": 9.521914896372424e-06, + "loss": 18.4572, + "step": 9089 + }, + { + "epoch": 0.16615789569890507, + "grad_norm": 8.247430546903153, + "learning_rate": 9.52178857389524e-06, + "loss": 18.4524, + "step": 9090 + }, + { + "epoch": 0.1661761748953516, + "grad_norm": 6.444161082567926, + "learning_rate": 9.52166223556958e-06, + "loss": 17.3894, + "step": 9091 + }, + { + "epoch": 0.16619445409179812, + "grad_norm": 5.442652376247098, + "learning_rate": 9.521535881395884e-06, + "loss": 17.0941, + "step": 9092 + }, + { + "epoch": 0.16621273328824465, + "grad_norm": 7.279601640261967, + "learning_rate": 9.5214095113746e-06, + "loss": 17.6853, + "step": 9093 + }, + { + "epoch": 0.16623101248469116, + "grad_norm": 6.189097401956089, + "learning_rate": 9.521283125506166e-06, + "loss": 17.417, + "step": 9094 + }, + { + "epoch": 0.1662492916811377, + "grad_norm": 5.91625718018307, + "learning_rate": 9.521156723791028e-06, + "loss": 17.0529, + "step": 9095 + }, + { + "epoch": 0.16626757087758423, + "grad_norm": 6.6431258708244565, + "learning_rate": 9.521030306229627e-06, + "loss": 17.6213, + "step": 9096 + }, + { + "epoch": 0.16628585007403074, + "grad_norm": 7.330145933182181, + "learning_rate": 9.520903872822407e-06, + "loss": 17.8557, + "step": 9097 + }, + { + "epoch": 0.16630412927047727, + "grad_norm": 6.671444664843496, + "learning_rate": 9.520777423569812e-06, + "loss": 17.6392, + "step": 9098 + }, + { + "epoch": 0.16632240846692378, + "grad_norm": 7.772729339617621, + "learning_rate": 9.520650958472285e-06, + "loss": 17.9338, + "step": 9099 + }, + { + "epoch": 0.16634068766337032, + "grad_norm": 7.466196889005785, + "learning_rate": 9.520524477530266e-06, + "loss": 17.9547, + "step": 9100 + }, + { + "epoch": 0.16635896685981685, + "grad_norm": 7.539791802325543, + "learning_rate": 9.520397980744204e-06, + "loss": 17.8858, + "step": 9101 + }, + { + "epoch": 0.16637724605626336, + "grad_norm": 7.129833234767096, + "learning_rate": 9.520271468114539e-06, + "loss": 17.8684, + "step": 9102 + }, + { + "epoch": 0.1663955252527099, + "grad_norm": 5.323013439138336, + "learning_rate": 9.520144939641713e-06, + "loss": 16.938, + "step": 9103 + }, + { + "epoch": 0.1664138044491564, + "grad_norm": 7.393868613524314, + "learning_rate": 9.520018395326171e-06, + "loss": 18.0491, + "step": 9104 + }, + { + "epoch": 0.16643208364560294, + "grad_norm": 7.244360215684982, + "learning_rate": 9.519891835168359e-06, + "loss": 17.443, + "step": 9105 + }, + { + "epoch": 0.16645036284204948, + "grad_norm": 6.633329129216576, + "learning_rate": 9.519765259168716e-06, + "loss": 17.2289, + "step": 9106 + }, + { + "epoch": 0.16646864203849598, + "grad_norm": 6.955839024230898, + "learning_rate": 9.519638667327691e-06, + "loss": 17.6643, + "step": 9107 + }, + { + "epoch": 0.16648692123494252, + "grad_norm": 5.691450733880839, + "learning_rate": 9.519512059645723e-06, + "loss": 17.0938, + "step": 9108 + }, + { + "epoch": 0.16650520043138903, + "grad_norm": 7.454345822977432, + "learning_rate": 9.519385436123256e-06, + "loss": 17.7329, + "step": 9109 + }, + { + "epoch": 0.16652347962783556, + "grad_norm": 8.266789452889764, + "learning_rate": 9.519258796760738e-06, + "loss": 17.6905, + "step": 9110 + }, + { + "epoch": 0.16654175882428207, + "grad_norm": 9.33813301366781, + "learning_rate": 9.519132141558607e-06, + "loss": 18.6763, + "step": 9111 + }, + { + "epoch": 0.1665600380207286, + "grad_norm": 8.822979246389119, + "learning_rate": 9.519005470517312e-06, + "loss": 18.5242, + "step": 9112 + }, + { + "epoch": 0.16657831721717514, + "grad_norm": 8.916193767795104, + "learning_rate": 9.518878783637296e-06, + "loss": 18.3267, + "step": 9113 + }, + { + "epoch": 0.16659659641362165, + "grad_norm": 8.055989505768691, + "learning_rate": 9.518752080918999e-06, + "loss": 18.1398, + "step": 9114 + }, + { + "epoch": 0.16661487561006819, + "grad_norm": 12.760266756194506, + "learning_rate": 9.51862536236287e-06, + "loss": 18.4892, + "step": 9115 + }, + { + "epoch": 0.1666331548065147, + "grad_norm": 7.3553093464905235, + "learning_rate": 9.518498627969351e-06, + "loss": 18.0986, + "step": 9116 + }, + { + "epoch": 0.16665143400296123, + "grad_norm": 7.113153293673835, + "learning_rate": 9.518371877738885e-06, + "loss": 17.8732, + "step": 9117 + }, + { + "epoch": 0.16666971319940777, + "grad_norm": 7.311254523474977, + "learning_rate": 9.51824511167192e-06, + "loss": 17.6422, + "step": 9118 + }, + { + "epoch": 0.16668799239585427, + "grad_norm": 7.902962118314464, + "learning_rate": 9.518118329768897e-06, + "loss": 18.2016, + "step": 9119 + }, + { + "epoch": 0.1667062715923008, + "grad_norm": 6.982253601525164, + "learning_rate": 9.51799153203026e-06, + "loss": 17.5656, + "step": 9120 + }, + { + "epoch": 0.16672455078874732, + "grad_norm": 9.64107093255908, + "learning_rate": 9.517864718456457e-06, + "loss": 18.3941, + "step": 9121 + }, + { + "epoch": 0.16674282998519385, + "grad_norm": 6.648779777416434, + "learning_rate": 9.51773788904793e-06, + "loss": 17.7655, + "step": 9122 + }, + { + "epoch": 0.1667611091816404, + "grad_norm": 7.52096006546244, + "learning_rate": 9.517611043805122e-06, + "loss": 18.342, + "step": 9123 + }, + { + "epoch": 0.1667793883780869, + "grad_norm": 6.967318309335177, + "learning_rate": 9.517484182728481e-06, + "loss": 17.4008, + "step": 9124 + }, + { + "epoch": 0.16679766757453343, + "grad_norm": 5.507922484346446, + "learning_rate": 9.517357305818447e-06, + "loss": 16.934, + "step": 9125 + }, + { + "epoch": 0.16681594677097994, + "grad_norm": 8.284403361115952, + "learning_rate": 9.517230413075471e-06, + "loss": 18.146, + "step": 9126 + }, + { + "epoch": 0.16683422596742647, + "grad_norm": 6.162027763574121, + "learning_rate": 9.517103504499993e-06, + "loss": 17.4649, + "step": 9127 + }, + { + "epoch": 0.16685250516387298, + "grad_norm": 5.993646032662872, + "learning_rate": 9.516976580092459e-06, + "loss": 17.2926, + "step": 9128 + }, + { + "epoch": 0.16687078436031952, + "grad_norm": 7.503613357613627, + "learning_rate": 9.516849639853314e-06, + "loss": 18.0304, + "step": 9129 + }, + { + "epoch": 0.16688906355676605, + "grad_norm": 6.890723043094061, + "learning_rate": 9.516722683783003e-06, + "loss": 18.1006, + "step": 9130 + }, + { + "epoch": 0.16690734275321256, + "grad_norm": 6.5303244166727525, + "learning_rate": 9.516595711881972e-06, + "loss": 17.6837, + "step": 9131 + }, + { + "epoch": 0.1669256219496591, + "grad_norm": 7.579422848183306, + "learning_rate": 9.516468724150664e-06, + "loss": 18.1218, + "step": 9132 + }, + { + "epoch": 0.1669439011461056, + "grad_norm": 7.730681670507118, + "learning_rate": 9.516341720589525e-06, + "loss": 17.8222, + "step": 9133 + }, + { + "epoch": 0.16696218034255214, + "grad_norm": 5.854139019858464, + "learning_rate": 9.516214701199e-06, + "loss": 17.2757, + "step": 9134 + }, + { + "epoch": 0.16698045953899868, + "grad_norm": 9.443290375382677, + "learning_rate": 9.516087665979536e-06, + "loss": 17.5585, + "step": 9135 + }, + { + "epoch": 0.16699873873544518, + "grad_norm": 7.466096064656885, + "learning_rate": 9.515960614931575e-06, + "loss": 18.0145, + "step": 9136 + }, + { + "epoch": 0.16701701793189172, + "grad_norm": 6.513489335465951, + "learning_rate": 9.515833548055563e-06, + "loss": 17.6289, + "step": 9137 + }, + { + "epoch": 0.16703529712833823, + "grad_norm": 7.957763787735906, + "learning_rate": 9.51570646535195e-06, + "loss": 18.057, + "step": 9138 + }, + { + "epoch": 0.16705357632478476, + "grad_norm": 7.047812347213954, + "learning_rate": 9.515579366821175e-06, + "loss": 17.9389, + "step": 9139 + }, + { + "epoch": 0.1670718555212313, + "grad_norm": 6.718738424242314, + "learning_rate": 9.515452252463687e-06, + "loss": 17.5463, + "step": 9140 + }, + { + "epoch": 0.1670901347176778, + "grad_norm": 6.0169625408681835, + "learning_rate": 9.51532512227993e-06, + "loss": 17.4768, + "step": 9141 + }, + { + "epoch": 0.16710841391412434, + "grad_norm": 6.4821786910550365, + "learning_rate": 9.51519797627035e-06, + "loss": 17.5259, + "step": 9142 + }, + { + "epoch": 0.16712669311057085, + "grad_norm": 6.517838393509723, + "learning_rate": 9.515070814435394e-06, + "loss": 17.4701, + "step": 9143 + }, + { + "epoch": 0.1671449723070174, + "grad_norm": 8.17111782001087, + "learning_rate": 9.514943636775504e-06, + "loss": 18.0557, + "step": 9144 + }, + { + "epoch": 0.1671632515034639, + "grad_norm": 7.30316783584968, + "learning_rate": 9.51481644329113e-06, + "loss": 17.4473, + "step": 9145 + }, + { + "epoch": 0.16718153069991043, + "grad_norm": 7.990399924648765, + "learning_rate": 9.514689233982718e-06, + "loss": 17.9287, + "step": 9146 + }, + { + "epoch": 0.16719980989635697, + "grad_norm": 7.193933282375789, + "learning_rate": 9.51456200885071e-06, + "loss": 17.6412, + "step": 9147 + }, + { + "epoch": 0.16721808909280347, + "grad_norm": 8.033713818889083, + "learning_rate": 9.514434767895555e-06, + "loss": 18.1817, + "step": 9148 + }, + { + "epoch": 0.16723636828925, + "grad_norm": 6.876274301198719, + "learning_rate": 9.514307511117697e-06, + "loss": 17.4852, + "step": 9149 + }, + { + "epoch": 0.16725464748569652, + "grad_norm": 9.832202016975332, + "learning_rate": 9.514180238517583e-06, + "loss": 18.4228, + "step": 9150 + }, + { + "epoch": 0.16727292668214305, + "grad_norm": 7.205506056626777, + "learning_rate": 9.514052950095659e-06, + "loss": 17.7711, + "step": 9151 + }, + { + "epoch": 0.1672912058785896, + "grad_norm": 7.148710541106331, + "learning_rate": 9.51392564585237e-06, + "loss": 17.6796, + "step": 9152 + }, + { + "epoch": 0.1673094850750361, + "grad_norm": 7.348555779791997, + "learning_rate": 9.513798325788167e-06, + "loss": 17.7698, + "step": 9153 + }, + { + "epoch": 0.16732776427148263, + "grad_norm": 7.677600866506125, + "learning_rate": 9.51367098990349e-06, + "loss": 17.7246, + "step": 9154 + }, + { + "epoch": 0.16734604346792914, + "grad_norm": 7.2066128084205285, + "learning_rate": 9.513543638198787e-06, + "loss": 17.3363, + "step": 9155 + }, + { + "epoch": 0.16736432266437568, + "grad_norm": 7.187256040340105, + "learning_rate": 9.513416270674506e-06, + "loss": 17.952, + "step": 9156 + }, + { + "epoch": 0.1673826018608222, + "grad_norm": 6.73423065779139, + "learning_rate": 9.513288887331093e-06, + "loss": 17.8486, + "step": 9157 + }, + { + "epoch": 0.16740088105726872, + "grad_norm": 7.845017050587436, + "learning_rate": 9.513161488168993e-06, + "loss": 18.1444, + "step": 9158 + }, + { + "epoch": 0.16741916025371525, + "grad_norm": 7.398952054211818, + "learning_rate": 9.513034073188656e-06, + "loss": 18.2226, + "step": 9159 + }, + { + "epoch": 0.16743743945016176, + "grad_norm": 6.692649336390622, + "learning_rate": 9.512906642390526e-06, + "loss": 17.5083, + "step": 9160 + }, + { + "epoch": 0.1674557186466083, + "grad_norm": 6.974035709920217, + "learning_rate": 9.512779195775048e-06, + "loss": 17.8033, + "step": 9161 + }, + { + "epoch": 0.1674739978430548, + "grad_norm": 6.330347140989411, + "learning_rate": 9.512651733342671e-06, + "loss": 17.5932, + "step": 9162 + }, + { + "epoch": 0.16749227703950134, + "grad_norm": 7.61732463302869, + "learning_rate": 9.51252425509384e-06, + "loss": 17.8264, + "step": 9163 + }, + { + "epoch": 0.16751055623594788, + "grad_norm": 7.355653977371296, + "learning_rate": 9.512396761029004e-06, + "loss": 17.8449, + "step": 9164 + }, + { + "epoch": 0.16752883543239439, + "grad_norm": 6.141978303978035, + "learning_rate": 9.51226925114861e-06, + "loss": 17.3168, + "step": 9165 + }, + { + "epoch": 0.16754711462884092, + "grad_norm": 6.877778001090658, + "learning_rate": 9.512141725453103e-06, + "loss": 17.7299, + "step": 9166 + }, + { + "epoch": 0.16756539382528743, + "grad_norm": 6.62830012161467, + "learning_rate": 9.512014183942932e-06, + "loss": 17.5122, + "step": 9167 + }, + { + "epoch": 0.16758367302173396, + "grad_norm": 7.861931039407114, + "learning_rate": 9.51188662661854e-06, + "loss": 18.0033, + "step": 9168 + }, + { + "epoch": 0.1676019522181805, + "grad_norm": 6.158488854561656, + "learning_rate": 9.51175905348038e-06, + "loss": 17.3119, + "step": 9169 + }, + { + "epoch": 0.167620231414627, + "grad_norm": 7.31101099269187, + "learning_rate": 9.511631464528894e-06, + "loss": 17.6251, + "step": 9170 + }, + { + "epoch": 0.16763851061107354, + "grad_norm": 6.069969422578263, + "learning_rate": 9.511503859764532e-06, + "loss": 17.281, + "step": 9171 + }, + { + "epoch": 0.16765678980752005, + "grad_norm": 7.98784248537527, + "learning_rate": 9.511376239187741e-06, + "loss": 18.3653, + "step": 9172 + }, + { + "epoch": 0.1676750690039666, + "grad_norm": 7.4389387664164515, + "learning_rate": 9.511248602798967e-06, + "loss": 17.8475, + "step": 9173 + }, + { + "epoch": 0.16769334820041312, + "grad_norm": 8.398094097487682, + "learning_rate": 9.511120950598659e-06, + "loss": 17.9461, + "step": 9174 + }, + { + "epoch": 0.16771162739685963, + "grad_norm": 5.612258948719771, + "learning_rate": 9.510993282587262e-06, + "loss": 17.1776, + "step": 9175 + }, + { + "epoch": 0.16772990659330617, + "grad_norm": 7.1766094368171505, + "learning_rate": 9.510865598765227e-06, + "loss": 17.7494, + "step": 9176 + }, + { + "epoch": 0.16774818578975267, + "grad_norm": 7.2923563662027195, + "learning_rate": 9.510737899132998e-06, + "loss": 18.1259, + "step": 9177 + }, + { + "epoch": 0.1677664649861992, + "grad_norm": 6.7949270375861195, + "learning_rate": 9.510610183691023e-06, + "loss": 17.5615, + "step": 9178 + }, + { + "epoch": 0.16778474418264572, + "grad_norm": 8.461201564995129, + "learning_rate": 9.510482452439753e-06, + "loss": 18.3026, + "step": 9179 + }, + { + "epoch": 0.16780302337909225, + "grad_norm": 6.799459045467971, + "learning_rate": 9.510354705379632e-06, + "loss": 17.7901, + "step": 9180 + }, + { + "epoch": 0.1678213025755388, + "grad_norm": 8.015234311899766, + "learning_rate": 9.51022694251111e-06, + "loss": 17.9572, + "step": 9181 + }, + { + "epoch": 0.1678395817719853, + "grad_norm": 6.784102337008483, + "learning_rate": 9.510099163834633e-06, + "loss": 17.7439, + "step": 9182 + }, + { + "epoch": 0.16785786096843183, + "grad_norm": 7.6356079692659655, + "learning_rate": 9.50997136935065e-06, + "loss": 18.1289, + "step": 9183 + }, + { + "epoch": 0.16787614016487834, + "grad_norm": 6.4649360917136995, + "learning_rate": 9.50984355905961e-06, + "loss": 17.3035, + "step": 9184 + }, + { + "epoch": 0.16789441936132488, + "grad_norm": 5.971468991654149, + "learning_rate": 9.509715732961959e-06, + "loss": 17.2778, + "step": 9185 + }, + { + "epoch": 0.1679126985577714, + "grad_norm": 7.19222144592288, + "learning_rate": 9.509587891058144e-06, + "loss": 17.628, + "step": 9186 + }, + { + "epoch": 0.16793097775421792, + "grad_norm": 5.6168211935698436, + "learning_rate": 9.509460033348617e-06, + "loss": 17.0353, + "step": 9187 + }, + { + "epoch": 0.16794925695066446, + "grad_norm": 7.143353577569963, + "learning_rate": 9.509332159833824e-06, + "loss": 17.691, + "step": 9188 + }, + { + "epoch": 0.16796753614711096, + "grad_norm": 7.99527562581922, + "learning_rate": 9.509204270514212e-06, + "loss": 18.5328, + "step": 9189 + }, + { + "epoch": 0.1679858153435575, + "grad_norm": 8.806796971586664, + "learning_rate": 9.509076365390231e-06, + "loss": 18.1942, + "step": 9190 + }, + { + "epoch": 0.16800409454000403, + "grad_norm": 6.80142875338237, + "learning_rate": 9.508948444462327e-06, + "loss": 17.8174, + "step": 9191 + }, + { + "epoch": 0.16802237373645054, + "grad_norm": 8.82466589624803, + "learning_rate": 9.508820507730952e-06, + "loss": 18.6217, + "step": 9192 + }, + { + "epoch": 0.16804065293289708, + "grad_norm": 7.190350624726181, + "learning_rate": 9.50869255519655e-06, + "loss": 17.8272, + "step": 9193 + }, + { + "epoch": 0.16805893212934359, + "grad_norm": 7.015287144182023, + "learning_rate": 9.508564586859575e-06, + "loss": 17.9058, + "step": 9194 + }, + { + "epoch": 0.16807721132579012, + "grad_norm": 5.8777521897708525, + "learning_rate": 9.50843660272047e-06, + "loss": 17.3103, + "step": 9195 + }, + { + "epoch": 0.16809549052223663, + "grad_norm": 7.262802111291724, + "learning_rate": 9.508308602779686e-06, + "loss": 18.0411, + "step": 9196 + }, + { + "epoch": 0.16811376971868316, + "grad_norm": 7.969769699031251, + "learning_rate": 9.508180587037673e-06, + "loss": 17.8796, + "step": 9197 + }, + { + "epoch": 0.1681320489151297, + "grad_norm": 8.27773983726501, + "learning_rate": 9.508052555494878e-06, + "loss": 18.2145, + "step": 9198 + }, + { + "epoch": 0.1681503281115762, + "grad_norm": 7.160206735734677, + "learning_rate": 9.50792450815175e-06, + "loss": 17.6246, + "step": 9199 + }, + { + "epoch": 0.16816860730802274, + "grad_norm": 8.286567525870572, + "learning_rate": 9.507796445008737e-06, + "loss": 17.7956, + "step": 9200 + }, + { + "epoch": 0.16818688650446925, + "grad_norm": 7.4618560161589444, + "learning_rate": 9.507668366066289e-06, + "loss": 17.9003, + "step": 9201 + }, + { + "epoch": 0.1682051657009158, + "grad_norm": 6.215729319601452, + "learning_rate": 9.507540271324852e-06, + "loss": 17.2985, + "step": 9202 + }, + { + "epoch": 0.16822344489736232, + "grad_norm": 8.174184185896188, + "learning_rate": 9.507412160784881e-06, + "loss": 18.2771, + "step": 9203 + }, + { + "epoch": 0.16824172409380883, + "grad_norm": 5.97025787633085, + "learning_rate": 9.50728403444682e-06, + "loss": 17.3012, + "step": 9204 + }, + { + "epoch": 0.16826000329025537, + "grad_norm": 8.322850448430295, + "learning_rate": 9.50715589231112e-06, + "loss": 17.9653, + "step": 9205 + }, + { + "epoch": 0.16827828248670187, + "grad_norm": 6.128258394920188, + "learning_rate": 9.50702773437823e-06, + "loss": 17.4, + "step": 9206 + }, + { + "epoch": 0.1682965616831484, + "grad_norm": 5.603444969341588, + "learning_rate": 9.5068995606486e-06, + "loss": 17.2268, + "step": 9207 + }, + { + "epoch": 0.16831484087959495, + "grad_norm": 6.814603802889895, + "learning_rate": 9.506771371122676e-06, + "loss": 17.7093, + "step": 9208 + }, + { + "epoch": 0.16833312007604145, + "grad_norm": 7.321675236998762, + "learning_rate": 9.50664316580091e-06, + "loss": 17.8145, + "step": 9209 + }, + { + "epoch": 0.168351399272488, + "grad_norm": 6.474422885000199, + "learning_rate": 9.506514944683752e-06, + "loss": 17.464, + "step": 9210 + }, + { + "epoch": 0.1683696784689345, + "grad_norm": 7.30339512472791, + "learning_rate": 9.506386707771648e-06, + "loss": 17.8909, + "step": 9211 + }, + { + "epoch": 0.16838795766538103, + "grad_norm": 7.456931045903403, + "learning_rate": 9.50625845506505e-06, + "loss": 17.8489, + "step": 9212 + }, + { + "epoch": 0.16840623686182754, + "grad_norm": 7.463795746898944, + "learning_rate": 9.506130186564408e-06, + "loss": 18.1574, + "step": 9213 + }, + { + "epoch": 0.16842451605827408, + "grad_norm": 8.117000953008189, + "learning_rate": 9.50600190227017e-06, + "loss": 18.2337, + "step": 9214 + }, + { + "epoch": 0.1684427952547206, + "grad_norm": 7.830745084486118, + "learning_rate": 9.505873602182788e-06, + "loss": 18.0402, + "step": 9215 + }, + { + "epoch": 0.16846107445116712, + "grad_norm": 9.237325081003904, + "learning_rate": 9.505745286302707e-06, + "loss": 18.5084, + "step": 9216 + }, + { + "epoch": 0.16847935364761366, + "grad_norm": 7.6594654940174856, + "learning_rate": 9.505616954630383e-06, + "loss": 18.4119, + "step": 9217 + }, + { + "epoch": 0.16849763284406016, + "grad_norm": 6.192285300112505, + "learning_rate": 9.505488607166262e-06, + "loss": 17.3778, + "step": 9218 + }, + { + "epoch": 0.1685159120405067, + "grad_norm": 9.209633641285492, + "learning_rate": 9.505360243910792e-06, + "loss": 18.6063, + "step": 9219 + }, + { + "epoch": 0.16853419123695323, + "grad_norm": 9.379714619614132, + "learning_rate": 9.505231864864429e-06, + "loss": 18.6675, + "step": 9220 + }, + { + "epoch": 0.16855247043339974, + "grad_norm": 8.075719850020935, + "learning_rate": 9.505103470027617e-06, + "loss": 18.3677, + "step": 9221 + }, + { + "epoch": 0.16857074962984628, + "grad_norm": 8.194403618110798, + "learning_rate": 9.504975059400807e-06, + "loss": 18.0993, + "step": 9222 + }, + { + "epoch": 0.16858902882629279, + "grad_norm": 6.596985374575552, + "learning_rate": 9.504846632984452e-06, + "loss": 17.5941, + "step": 9223 + }, + { + "epoch": 0.16860730802273932, + "grad_norm": 6.485470657566503, + "learning_rate": 9.504718190779001e-06, + "loss": 17.6721, + "step": 9224 + }, + { + "epoch": 0.16862558721918586, + "grad_norm": 6.6423030234339295, + "learning_rate": 9.504589732784903e-06, + "loss": 17.5654, + "step": 9225 + }, + { + "epoch": 0.16864386641563237, + "grad_norm": 6.915606847521376, + "learning_rate": 9.50446125900261e-06, + "loss": 17.7335, + "step": 9226 + }, + { + "epoch": 0.1686621456120789, + "grad_norm": 5.773394547521735, + "learning_rate": 9.50433276943257e-06, + "loss": 17.2237, + "step": 9227 + }, + { + "epoch": 0.1686804248085254, + "grad_norm": 7.11829780011999, + "learning_rate": 9.504204264075234e-06, + "loss": 17.641, + "step": 9228 + }, + { + "epoch": 0.16869870400497194, + "grad_norm": 6.4137613968029505, + "learning_rate": 9.504075742931052e-06, + "loss": 17.5393, + "step": 9229 + }, + { + "epoch": 0.16871698320141845, + "grad_norm": 6.7655899129952255, + "learning_rate": 9.503947206000477e-06, + "loss": 17.5254, + "step": 9230 + }, + { + "epoch": 0.168735262397865, + "grad_norm": 5.51686495662697, + "learning_rate": 9.503818653283959e-06, + "loss": 17.1054, + "step": 9231 + }, + { + "epoch": 0.16875354159431152, + "grad_norm": 7.395907813258995, + "learning_rate": 9.503690084781945e-06, + "loss": 18.1155, + "step": 9232 + }, + { + "epoch": 0.16877182079075803, + "grad_norm": 6.682609950792527, + "learning_rate": 9.503561500494889e-06, + "loss": 17.6452, + "step": 9233 + }, + { + "epoch": 0.16879009998720457, + "grad_norm": 6.556590064553701, + "learning_rate": 9.503432900423242e-06, + "loss": 17.8319, + "step": 9234 + }, + { + "epoch": 0.16880837918365107, + "grad_norm": 6.682219631635365, + "learning_rate": 9.503304284567451e-06, + "loss": 17.5929, + "step": 9235 + }, + { + "epoch": 0.1688266583800976, + "grad_norm": 8.022964534443789, + "learning_rate": 9.503175652927972e-06, + "loss": 17.993, + "step": 9236 + }, + { + "epoch": 0.16884493757654415, + "grad_norm": 7.680467349943672, + "learning_rate": 9.50304700550525e-06, + "loss": 18.2086, + "step": 9237 + }, + { + "epoch": 0.16886321677299065, + "grad_norm": 7.797938679905344, + "learning_rate": 9.502918342299742e-06, + "loss": 17.9965, + "step": 9238 + }, + { + "epoch": 0.1688814959694372, + "grad_norm": 8.175512592833154, + "learning_rate": 9.502789663311896e-06, + "loss": 17.7192, + "step": 9239 + }, + { + "epoch": 0.1688997751658837, + "grad_norm": 7.807439592723891, + "learning_rate": 9.50266096854216e-06, + "loss": 18.2918, + "step": 9240 + }, + { + "epoch": 0.16891805436233023, + "grad_norm": 7.1402803356561355, + "learning_rate": 9.502532257990991e-06, + "loss": 17.7497, + "step": 9241 + }, + { + "epoch": 0.16893633355877677, + "grad_norm": 7.1479553245385405, + "learning_rate": 9.502403531658834e-06, + "loss": 17.7492, + "step": 9242 + }, + { + "epoch": 0.16895461275522328, + "grad_norm": 7.6740459373006615, + "learning_rate": 9.502274789546147e-06, + "loss": 18.0036, + "step": 9243 + }, + { + "epoch": 0.1689728919516698, + "grad_norm": 7.04948573680895, + "learning_rate": 9.502146031653375e-06, + "loss": 17.817, + "step": 9244 + }, + { + "epoch": 0.16899117114811632, + "grad_norm": 7.472060819134237, + "learning_rate": 9.502017257980972e-06, + "loss": 17.7266, + "step": 9245 + }, + { + "epoch": 0.16900945034456286, + "grad_norm": 11.345342492889468, + "learning_rate": 9.50188846852939e-06, + "loss": 18.4252, + "step": 9246 + }, + { + "epoch": 0.16902772954100936, + "grad_norm": 6.324292922016057, + "learning_rate": 9.501759663299079e-06, + "loss": 17.4231, + "step": 9247 + }, + { + "epoch": 0.1690460087374559, + "grad_norm": 7.947575167075575, + "learning_rate": 9.501630842290492e-06, + "loss": 18.4636, + "step": 9248 + }, + { + "epoch": 0.16906428793390244, + "grad_norm": 7.064056122783764, + "learning_rate": 9.501502005504079e-06, + "loss": 17.7031, + "step": 9249 + }, + { + "epoch": 0.16908256713034894, + "grad_norm": 6.607574761603722, + "learning_rate": 9.501373152940292e-06, + "loss": 17.5506, + "step": 9250 + }, + { + "epoch": 0.16910084632679548, + "grad_norm": 7.47745336555634, + "learning_rate": 9.501244284599582e-06, + "loss": 18.0999, + "step": 9251 + }, + { + "epoch": 0.169119125523242, + "grad_norm": 6.442336287804173, + "learning_rate": 9.501115400482401e-06, + "loss": 17.6483, + "step": 9252 + }, + { + "epoch": 0.16913740471968852, + "grad_norm": 6.689660504446449, + "learning_rate": 9.500986500589204e-06, + "loss": 17.7348, + "step": 9253 + }, + { + "epoch": 0.16915568391613506, + "grad_norm": 7.0556173662591055, + "learning_rate": 9.500857584920438e-06, + "loss": 17.8223, + "step": 9254 + }, + { + "epoch": 0.16917396311258157, + "grad_norm": 6.435332083102732, + "learning_rate": 9.500728653476556e-06, + "loss": 17.435, + "step": 9255 + }, + { + "epoch": 0.1691922423090281, + "grad_norm": 8.178954829322633, + "learning_rate": 9.500599706258012e-06, + "loss": 18.1133, + "step": 9256 + }, + { + "epoch": 0.1692105215054746, + "grad_norm": 7.486000079432466, + "learning_rate": 9.500470743265256e-06, + "loss": 17.9521, + "step": 9257 + }, + { + "epoch": 0.16922880070192114, + "grad_norm": 7.414235605822304, + "learning_rate": 9.50034176449874e-06, + "loss": 17.9106, + "step": 9258 + }, + { + "epoch": 0.16924707989836768, + "grad_norm": 7.29957041738579, + "learning_rate": 9.500212769958916e-06, + "loss": 17.9123, + "step": 9259 + }, + { + "epoch": 0.1692653590948142, + "grad_norm": 6.419324021770509, + "learning_rate": 9.50008375964624e-06, + "loss": 17.5822, + "step": 9260 + }, + { + "epoch": 0.16928363829126072, + "grad_norm": 7.183503307851751, + "learning_rate": 9.499954733561159e-06, + "loss": 17.827, + "step": 9261 + }, + { + "epoch": 0.16930191748770723, + "grad_norm": 5.9867549399642535, + "learning_rate": 9.499825691704125e-06, + "loss": 17.3328, + "step": 9262 + }, + { + "epoch": 0.16932019668415377, + "grad_norm": 6.538978287646619, + "learning_rate": 9.499696634075595e-06, + "loss": 17.1429, + "step": 9263 + }, + { + "epoch": 0.16933847588060028, + "grad_norm": 6.492351933050396, + "learning_rate": 9.499567560676018e-06, + "loss": 17.5489, + "step": 9264 + }, + { + "epoch": 0.1693567550770468, + "grad_norm": 7.239262810517472, + "learning_rate": 9.499438471505848e-06, + "loss": 17.7833, + "step": 9265 + }, + { + "epoch": 0.16937503427349335, + "grad_norm": 6.616435648759427, + "learning_rate": 9.499309366565536e-06, + "loss": 17.4924, + "step": 9266 + }, + { + "epoch": 0.16939331346993985, + "grad_norm": 6.554446689823281, + "learning_rate": 9.499180245855535e-06, + "loss": 17.3935, + "step": 9267 + }, + { + "epoch": 0.1694115926663864, + "grad_norm": 6.5185668445542655, + "learning_rate": 9.499051109376298e-06, + "loss": 17.4669, + "step": 9268 + }, + { + "epoch": 0.1694298718628329, + "grad_norm": 7.007126029712472, + "learning_rate": 9.498921957128278e-06, + "loss": 17.9778, + "step": 9269 + }, + { + "epoch": 0.16944815105927943, + "grad_norm": 8.031631202231285, + "learning_rate": 9.498792789111924e-06, + "loss": 18.2722, + "step": 9270 + }, + { + "epoch": 0.16946643025572597, + "grad_norm": 6.224877837271482, + "learning_rate": 9.498663605327694e-06, + "loss": 17.4046, + "step": 9271 + }, + { + "epoch": 0.16948470945217248, + "grad_norm": 7.110763781889539, + "learning_rate": 9.498534405776038e-06, + "loss": 17.8919, + "step": 9272 + }, + { + "epoch": 0.169502988648619, + "grad_norm": 7.6365904652147885, + "learning_rate": 9.498405190457409e-06, + "loss": 17.9798, + "step": 9273 + }, + { + "epoch": 0.16952126784506552, + "grad_norm": 8.219589460169788, + "learning_rate": 9.49827595937226e-06, + "loss": 18.0456, + "step": 9274 + }, + { + "epoch": 0.16953954704151206, + "grad_norm": 7.332579281884612, + "learning_rate": 9.498146712521046e-06, + "loss": 17.9169, + "step": 9275 + }, + { + "epoch": 0.1695578262379586, + "grad_norm": 6.298096144118503, + "learning_rate": 9.498017449904216e-06, + "loss": 17.4031, + "step": 9276 + }, + { + "epoch": 0.1695761054344051, + "grad_norm": 7.552642894361333, + "learning_rate": 9.497888171522227e-06, + "loss": 17.9247, + "step": 9277 + }, + { + "epoch": 0.16959438463085164, + "grad_norm": 6.453234743792428, + "learning_rate": 9.497758877375528e-06, + "loss": 17.3823, + "step": 9278 + }, + { + "epoch": 0.16961266382729814, + "grad_norm": 8.067008263297412, + "learning_rate": 9.497629567464576e-06, + "loss": 17.763, + "step": 9279 + }, + { + "epoch": 0.16963094302374468, + "grad_norm": 8.0629395551368, + "learning_rate": 9.497500241789822e-06, + "loss": 17.9484, + "step": 9280 + }, + { + "epoch": 0.1696492222201912, + "grad_norm": 7.35078081421746, + "learning_rate": 9.49737090035172e-06, + "loss": 17.988, + "step": 9281 + }, + { + "epoch": 0.16966750141663772, + "grad_norm": 7.718604409984683, + "learning_rate": 9.497241543150724e-06, + "loss": 17.883, + "step": 9282 + }, + { + "epoch": 0.16968578061308426, + "grad_norm": 6.59497357682199, + "learning_rate": 9.497112170187287e-06, + "loss": 17.394, + "step": 9283 + }, + { + "epoch": 0.16970405980953077, + "grad_norm": 7.051795670369403, + "learning_rate": 9.49698278146186e-06, + "loss": 17.551, + "step": 9284 + }, + { + "epoch": 0.1697223390059773, + "grad_norm": 7.299070725776896, + "learning_rate": 9.4968533769749e-06, + "loss": 17.7815, + "step": 9285 + }, + { + "epoch": 0.1697406182024238, + "grad_norm": 7.968673991617567, + "learning_rate": 9.49672395672686e-06, + "loss": 17.9397, + "step": 9286 + }, + { + "epoch": 0.16975889739887035, + "grad_norm": 7.478284411752328, + "learning_rate": 9.49659452071819e-06, + "loss": 18.2165, + "step": 9287 + }, + { + "epoch": 0.16977717659531688, + "grad_norm": 6.5996511026123, + "learning_rate": 9.496465068949348e-06, + "loss": 17.4892, + "step": 9288 + }, + { + "epoch": 0.1697954557917634, + "grad_norm": 6.682420819376153, + "learning_rate": 9.496335601420786e-06, + "loss": 17.5018, + "step": 9289 + }, + { + "epoch": 0.16981373498820992, + "grad_norm": 5.949975487642374, + "learning_rate": 9.496206118132958e-06, + "loss": 17.3644, + "step": 9290 + }, + { + "epoch": 0.16983201418465643, + "grad_norm": 6.953868735342993, + "learning_rate": 9.496076619086318e-06, + "loss": 17.7816, + "step": 9291 + }, + { + "epoch": 0.16985029338110297, + "grad_norm": 7.628905059730847, + "learning_rate": 9.495947104281318e-06, + "loss": 17.9063, + "step": 9292 + }, + { + "epoch": 0.1698685725775495, + "grad_norm": 6.472233724623038, + "learning_rate": 9.495817573718415e-06, + "loss": 17.8451, + "step": 9293 + }, + { + "epoch": 0.169886851773996, + "grad_norm": 6.881108318120469, + "learning_rate": 9.495688027398062e-06, + "loss": 17.6711, + "step": 9294 + }, + { + "epoch": 0.16990513097044255, + "grad_norm": 7.015050799593046, + "learning_rate": 9.495558465320712e-06, + "loss": 17.7221, + "step": 9295 + }, + { + "epoch": 0.16992341016688906, + "grad_norm": 6.518346540265528, + "learning_rate": 9.49542888748682e-06, + "loss": 17.5115, + "step": 9296 + }, + { + "epoch": 0.1699416893633356, + "grad_norm": 6.619956820599698, + "learning_rate": 9.495299293896839e-06, + "loss": 17.5084, + "step": 9297 + }, + { + "epoch": 0.1699599685597821, + "grad_norm": 7.572798054728141, + "learning_rate": 9.495169684551224e-06, + "loss": 17.9554, + "step": 9298 + }, + { + "epoch": 0.16997824775622863, + "grad_norm": 6.131299772610638, + "learning_rate": 9.495040059450431e-06, + "loss": 17.3223, + "step": 9299 + }, + { + "epoch": 0.16999652695267517, + "grad_norm": 8.178945579484868, + "learning_rate": 9.494910418594911e-06, + "loss": 17.4849, + "step": 9300 + }, + { + "epoch": 0.17001480614912168, + "grad_norm": 7.652603534647454, + "learning_rate": 9.494780761985121e-06, + "loss": 17.8424, + "step": 9301 + }, + { + "epoch": 0.1700330853455682, + "grad_norm": 5.783751209083505, + "learning_rate": 9.494651089621514e-06, + "loss": 17.2083, + "step": 9302 + }, + { + "epoch": 0.17005136454201472, + "grad_norm": 7.664250666867046, + "learning_rate": 9.494521401504543e-06, + "loss": 17.8247, + "step": 9303 + }, + { + "epoch": 0.17006964373846126, + "grad_norm": 5.82619101418688, + "learning_rate": 9.494391697634669e-06, + "loss": 17.4791, + "step": 9304 + }, + { + "epoch": 0.1700879229349078, + "grad_norm": 7.838781534300981, + "learning_rate": 9.494261978012339e-06, + "loss": 18.1943, + "step": 9305 + }, + { + "epoch": 0.1701062021313543, + "grad_norm": 5.987071466722858, + "learning_rate": 9.49413224263801e-06, + "loss": 17.2892, + "step": 9306 + }, + { + "epoch": 0.17012448132780084, + "grad_norm": 8.175921098814772, + "learning_rate": 9.49400249151214e-06, + "loss": 18.1773, + "step": 9307 + }, + { + "epoch": 0.17014276052424734, + "grad_norm": 8.073834735977083, + "learning_rate": 9.493872724635181e-06, + "loss": 18.4408, + "step": 9308 + }, + { + "epoch": 0.17016103972069388, + "grad_norm": 7.009521229929869, + "learning_rate": 9.493742942007586e-06, + "loss": 17.8554, + "step": 9309 + }, + { + "epoch": 0.17017931891714042, + "grad_norm": 7.246449100658442, + "learning_rate": 9.493613143629812e-06, + "loss": 17.4506, + "step": 9310 + }, + { + "epoch": 0.17019759811358692, + "grad_norm": 6.370295476618179, + "learning_rate": 9.493483329502315e-06, + "loss": 17.4496, + "step": 9311 + }, + { + "epoch": 0.17021587731003346, + "grad_norm": 6.840840264164594, + "learning_rate": 9.49335349962555e-06, + "loss": 17.5516, + "step": 9312 + }, + { + "epoch": 0.17023415650647997, + "grad_norm": 6.1046331762652155, + "learning_rate": 9.493223653999968e-06, + "loss": 17.2566, + "step": 9313 + }, + { + "epoch": 0.1702524357029265, + "grad_norm": 7.016368323732165, + "learning_rate": 9.493093792626029e-06, + "loss": 18.0346, + "step": 9314 + }, + { + "epoch": 0.170270714899373, + "grad_norm": 6.76175892001121, + "learning_rate": 9.492963915504188e-06, + "loss": 17.7956, + "step": 9315 + }, + { + "epoch": 0.17028899409581955, + "grad_norm": 6.531249376786396, + "learning_rate": 9.492834022634895e-06, + "loss": 17.4568, + "step": 9316 + }, + { + "epoch": 0.17030727329226608, + "grad_norm": 7.5987658416392225, + "learning_rate": 9.49270411401861e-06, + "loss": 17.7604, + "step": 9317 + }, + { + "epoch": 0.1703255524887126, + "grad_norm": 7.431519081490518, + "learning_rate": 9.492574189655786e-06, + "loss": 17.8669, + "step": 9318 + }, + { + "epoch": 0.17034383168515912, + "grad_norm": 7.958235479289281, + "learning_rate": 9.49244424954688e-06, + "loss": 18.2507, + "step": 9319 + }, + { + "epoch": 0.17036211088160563, + "grad_norm": 6.986082779483737, + "learning_rate": 9.492314293692348e-06, + "loss": 17.9994, + "step": 9320 + }, + { + "epoch": 0.17038039007805217, + "grad_norm": 8.672267116506786, + "learning_rate": 9.492184322092642e-06, + "loss": 18.2985, + "step": 9321 + }, + { + "epoch": 0.1703986692744987, + "grad_norm": 8.225867360448504, + "learning_rate": 9.492054334748221e-06, + "loss": 18.4201, + "step": 9322 + }, + { + "epoch": 0.1704169484709452, + "grad_norm": 7.556418871422229, + "learning_rate": 9.491924331659539e-06, + "loss": 18.1668, + "step": 9323 + }, + { + "epoch": 0.17043522766739175, + "grad_norm": 7.06440955032604, + "learning_rate": 9.491794312827051e-06, + "loss": 17.9922, + "step": 9324 + }, + { + "epoch": 0.17045350686383826, + "grad_norm": 6.717742517738556, + "learning_rate": 9.491664278251215e-06, + "loss": 17.4833, + "step": 9325 + }, + { + "epoch": 0.1704717860602848, + "grad_norm": 12.546466403581084, + "learning_rate": 9.491534227932487e-06, + "loss": 18.7726, + "step": 9326 + }, + { + "epoch": 0.17049006525673133, + "grad_norm": 8.062593558854038, + "learning_rate": 9.49140416187132e-06, + "loss": 18.2224, + "step": 9327 + }, + { + "epoch": 0.17050834445317783, + "grad_norm": 7.0581222055222685, + "learning_rate": 9.49127408006817e-06, + "loss": 17.5139, + "step": 9328 + }, + { + "epoch": 0.17052662364962437, + "grad_norm": 7.099344618501691, + "learning_rate": 9.491143982523494e-06, + "loss": 17.9362, + "step": 9329 + }, + { + "epoch": 0.17054490284607088, + "grad_norm": 7.402714202305478, + "learning_rate": 9.49101386923775e-06, + "loss": 18.0988, + "step": 9330 + }, + { + "epoch": 0.17056318204251741, + "grad_norm": 7.7416532297368645, + "learning_rate": 9.490883740211392e-06, + "loss": 18.1308, + "step": 9331 + }, + { + "epoch": 0.17058146123896392, + "grad_norm": 5.993456403412218, + "learning_rate": 9.490753595444875e-06, + "loss": 17.3263, + "step": 9332 + }, + { + "epoch": 0.17059974043541046, + "grad_norm": 7.382773762165547, + "learning_rate": 9.490623434938655e-06, + "loss": 18.0158, + "step": 9333 + }, + { + "epoch": 0.170618019631857, + "grad_norm": 6.90202197645296, + "learning_rate": 9.490493258693192e-06, + "loss": 17.852, + "step": 9334 + }, + { + "epoch": 0.1706362988283035, + "grad_norm": 6.578185739755169, + "learning_rate": 9.490363066708939e-06, + "loss": 17.4495, + "step": 9335 + }, + { + "epoch": 0.17065457802475004, + "grad_norm": 5.573591882246648, + "learning_rate": 9.490232858986353e-06, + "loss": 16.9851, + "step": 9336 + }, + { + "epoch": 0.17067285722119654, + "grad_norm": 5.830884237617553, + "learning_rate": 9.490102635525891e-06, + "loss": 17.0644, + "step": 9337 + }, + { + "epoch": 0.17069113641764308, + "grad_norm": 7.101055498888201, + "learning_rate": 9.489972396328008e-06, + "loss": 17.8703, + "step": 9338 + }, + { + "epoch": 0.17070941561408962, + "grad_norm": 6.8893580123268325, + "learning_rate": 9.489842141393162e-06, + "loss": 17.3605, + "step": 9339 + }, + { + "epoch": 0.17072769481053612, + "grad_norm": 7.00415508406558, + "learning_rate": 9.489711870721808e-06, + "loss": 17.6772, + "step": 9340 + }, + { + "epoch": 0.17074597400698266, + "grad_norm": 6.687536240515922, + "learning_rate": 9.489581584314404e-06, + "loss": 17.6577, + "step": 9341 + }, + { + "epoch": 0.17076425320342917, + "grad_norm": 6.716421562949809, + "learning_rate": 9.489451282171407e-06, + "loss": 17.8492, + "step": 9342 + }, + { + "epoch": 0.1707825323998757, + "grad_norm": 7.025368007532384, + "learning_rate": 9.489320964293272e-06, + "loss": 17.8394, + "step": 9343 + }, + { + "epoch": 0.17080081159632224, + "grad_norm": 8.1347781081007, + "learning_rate": 9.489190630680456e-06, + "loss": 18.1092, + "step": 9344 + }, + { + "epoch": 0.17081909079276875, + "grad_norm": 8.161686929233428, + "learning_rate": 9.489060281333417e-06, + "loss": 18.0617, + "step": 9345 + }, + { + "epoch": 0.17083736998921528, + "grad_norm": 7.743324549428767, + "learning_rate": 9.488929916252611e-06, + "loss": 18.2099, + "step": 9346 + }, + { + "epoch": 0.1708556491856618, + "grad_norm": 7.457913709310798, + "learning_rate": 9.488799535438496e-06, + "loss": 17.9657, + "step": 9347 + }, + { + "epoch": 0.17087392838210833, + "grad_norm": 6.4565171819898275, + "learning_rate": 9.48866913889153e-06, + "loss": 17.4339, + "step": 9348 + }, + { + "epoch": 0.17089220757855483, + "grad_norm": 5.7198681828309175, + "learning_rate": 9.488538726612165e-06, + "loss": 17.1374, + "step": 9349 + }, + { + "epoch": 0.17091048677500137, + "grad_norm": 5.829504990701503, + "learning_rate": 9.488408298600861e-06, + "loss": 17.1341, + "step": 9350 + }, + { + "epoch": 0.1709287659714479, + "grad_norm": 8.547314581987964, + "learning_rate": 9.488277854858077e-06, + "loss": 18.4687, + "step": 9351 + }, + { + "epoch": 0.1709470451678944, + "grad_norm": 6.427092565488006, + "learning_rate": 9.488147395384267e-06, + "loss": 17.4307, + "step": 9352 + }, + { + "epoch": 0.17096532436434095, + "grad_norm": 5.945035141440985, + "learning_rate": 9.488016920179892e-06, + "loss": 17.3094, + "step": 9353 + }, + { + "epoch": 0.17098360356078746, + "grad_norm": 5.212870788799658, + "learning_rate": 9.487886429245406e-06, + "loss": 16.9609, + "step": 9354 + }, + { + "epoch": 0.171001882757234, + "grad_norm": 7.182918513019496, + "learning_rate": 9.487755922581267e-06, + "loss": 17.9397, + "step": 9355 + }, + { + "epoch": 0.17102016195368053, + "grad_norm": 8.180485431431427, + "learning_rate": 9.487625400187935e-06, + "loss": 18.2699, + "step": 9356 + }, + { + "epoch": 0.17103844115012704, + "grad_norm": 6.908136535638802, + "learning_rate": 9.487494862065863e-06, + "loss": 17.6333, + "step": 9357 + }, + { + "epoch": 0.17105672034657357, + "grad_norm": 6.582988597315935, + "learning_rate": 9.487364308215513e-06, + "loss": 17.5628, + "step": 9358 + }, + { + "epoch": 0.17107499954302008, + "grad_norm": 7.458926835537541, + "learning_rate": 9.487233738637338e-06, + "loss": 17.9193, + "step": 9359 + }, + { + "epoch": 0.17109327873946661, + "grad_norm": 7.8258914490505065, + "learning_rate": 9.487103153331799e-06, + "loss": 17.9659, + "step": 9360 + }, + { + "epoch": 0.17111155793591315, + "grad_norm": 7.791564347644736, + "learning_rate": 9.486972552299354e-06, + "loss": 18.1149, + "step": 9361 + }, + { + "epoch": 0.17112983713235966, + "grad_norm": 7.872110184686366, + "learning_rate": 9.486841935540458e-06, + "loss": 17.9103, + "step": 9362 + }, + { + "epoch": 0.1711481163288062, + "grad_norm": 7.707540377198523, + "learning_rate": 9.486711303055571e-06, + "loss": 18.2933, + "step": 9363 + }, + { + "epoch": 0.1711663955252527, + "grad_norm": 7.144271057657575, + "learning_rate": 9.486580654845151e-06, + "loss": 17.9076, + "step": 9364 + }, + { + "epoch": 0.17118467472169924, + "grad_norm": 5.582895005836393, + "learning_rate": 9.486449990909654e-06, + "loss": 17.0059, + "step": 9365 + }, + { + "epoch": 0.17120295391814574, + "grad_norm": 5.283504181308812, + "learning_rate": 9.48631931124954e-06, + "loss": 16.9713, + "step": 9366 + }, + { + "epoch": 0.17122123311459228, + "grad_norm": 7.036891960760273, + "learning_rate": 9.486188615865267e-06, + "loss": 17.764, + "step": 9367 + }, + { + "epoch": 0.17123951231103882, + "grad_norm": 7.0244422250085155, + "learning_rate": 9.48605790475729e-06, + "loss": 17.9067, + "step": 9368 + }, + { + "epoch": 0.17125779150748532, + "grad_norm": 6.402158325323122, + "learning_rate": 9.485927177926071e-06, + "loss": 17.339, + "step": 9369 + }, + { + "epoch": 0.17127607070393186, + "grad_norm": 6.943192297422052, + "learning_rate": 9.485796435372066e-06, + "loss": 17.7603, + "step": 9370 + }, + { + "epoch": 0.17129434990037837, + "grad_norm": 6.398925418463297, + "learning_rate": 9.485665677095733e-06, + "loss": 17.2458, + "step": 9371 + }, + { + "epoch": 0.1713126290968249, + "grad_norm": 6.856151315543031, + "learning_rate": 9.48553490309753e-06, + "loss": 17.853, + "step": 9372 + }, + { + "epoch": 0.17133090829327144, + "grad_norm": 7.3519575505114565, + "learning_rate": 9.48540411337792e-06, + "loss": 17.97, + "step": 9373 + }, + { + "epoch": 0.17134918748971795, + "grad_norm": 6.2059150149780615, + "learning_rate": 9.485273307937354e-06, + "loss": 17.2831, + "step": 9374 + }, + { + "epoch": 0.17136746668616448, + "grad_norm": 5.751463733804113, + "learning_rate": 9.485142486776297e-06, + "loss": 17.1818, + "step": 9375 + }, + { + "epoch": 0.171385745882611, + "grad_norm": 7.047700854948244, + "learning_rate": 9.485011649895204e-06, + "loss": 17.7665, + "step": 9376 + }, + { + "epoch": 0.17140402507905753, + "grad_norm": 8.475999209283327, + "learning_rate": 9.484880797294534e-06, + "loss": 18.6278, + "step": 9377 + }, + { + "epoch": 0.17142230427550406, + "grad_norm": 6.271246333842615, + "learning_rate": 9.484749928974745e-06, + "loss": 17.466, + "step": 9378 + }, + { + "epoch": 0.17144058347195057, + "grad_norm": 7.504721163883052, + "learning_rate": 9.4846190449363e-06, + "loss": 17.8629, + "step": 9379 + }, + { + "epoch": 0.1714588626683971, + "grad_norm": 6.753137748892236, + "learning_rate": 9.48448814517965e-06, + "loss": 17.3884, + "step": 9380 + }, + { + "epoch": 0.1714771418648436, + "grad_norm": 6.683449785929558, + "learning_rate": 9.484357229705262e-06, + "loss": 17.3045, + "step": 9381 + }, + { + "epoch": 0.17149542106129015, + "grad_norm": 7.038968690947826, + "learning_rate": 9.48422629851359e-06, + "loss": 17.6161, + "step": 9382 + }, + { + "epoch": 0.17151370025773666, + "grad_norm": 8.246775361551709, + "learning_rate": 9.484095351605093e-06, + "loss": 18.5538, + "step": 9383 + }, + { + "epoch": 0.1715319794541832, + "grad_norm": 6.04432556618661, + "learning_rate": 9.48396438898023e-06, + "loss": 17.5251, + "step": 9384 + }, + { + "epoch": 0.17155025865062973, + "grad_norm": 6.775248989592338, + "learning_rate": 9.483833410639465e-06, + "loss": 17.7649, + "step": 9385 + }, + { + "epoch": 0.17156853784707624, + "grad_norm": 5.488825339415775, + "learning_rate": 9.483702416583249e-06, + "loss": 17.0887, + "step": 9386 + }, + { + "epoch": 0.17158681704352277, + "grad_norm": 6.663263419093369, + "learning_rate": 9.483571406812046e-06, + "loss": 17.3529, + "step": 9387 + }, + { + "epoch": 0.17160509623996928, + "grad_norm": 6.451288086920941, + "learning_rate": 9.483440381326316e-06, + "loss": 17.446, + "step": 9388 + }, + { + "epoch": 0.17162337543641581, + "grad_norm": 9.190972193048253, + "learning_rate": 9.483309340126514e-06, + "loss": 18.0486, + "step": 9389 + }, + { + "epoch": 0.17164165463286235, + "grad_norm": 6.5989987104060726, + "learning_rate": 9.483178283213104e-06, + "loss": 17.6012, + "step": 9390 + }, + { + "epoch": 0.17165993382930886, + "grad_norm": 6.328149633870934, + "learning_rate": 9.483047210586542e-06, + "loss": 17.3198, + "step": 9391 + }, + { + "epoch": 0.1716782130257554, + "grad_norm": 7.2569753611456065, + "learning_rate": 9.482916122247289e-06, + "loss": 17.9867, + "step": 9392 + }, + { + "epoch": 0.1716964922222019, + "grad_norm": 6.898388072866421, + "learning_rate": 9.482785018195803e-06, + "loss": 17.5655, + "step": 9393 + }, + { + "epoch": 0.17171477141864844, + "grad_norm": 8.01711657514621, + "learning_rate": 9.482653898432546e-06, + "loss": 18.3204, + "step": 9394 + }, + { + "epoch": 0.17173305061509497, + "grad_norm": 6.975439275024527, + "learning_rate": 9.482522762957976e-06, + "loss": 18.1094, + "step": 9395 + }, + { + "epoch": 0.17175132981154148, + "grad_norm": 5.868700240209868, + "learning_rate": 9.48239161177255e-06, + "loss": 16.9796, + "step": 9396 + }, + { + "epoch": 0.17176960900798802, + "grad_norm": 7.08908481341133, + "learning_rate": 9.482260444876733e-06, + "loss": 17.6141, + "step": 9397 + }, + { + "epoch": 0.17178788820443452, + "grad_norm": 7.055610246006772, + "learning_rate": 9.482129262270982e-06, + "loss": 17.5318, + "step": 9398 + }, + { + "epoch": 0.17180616740088106, + "grad_norm": 6.116674772768493, + "learning_rate": 9.481998063955756e-06, + "loss": 17.1327, + "step": 9399 + }, + { + "epoch": 0.17182444659732757, + "grad_norm": 8.588323819381399, + "learning_rate": 9.481866849931516e-06, + "loss": 18.4417, + "step": 9400 + }, + { + "epoch": 0.1718427257937741, + "grad_norm": 7.51140039685407, + "learning_rate": 9.481735620198722e-06, + "loss": 17.8579, + "step": 9401 + }, + { + "epoch": 0.17186100499022064, + "grad_norm": 8.035682754298808, + "learning_rate": 9.481604374757834e-06, + "loss": 17.7268, + "step": 9402 + }, + { + "epoch": 0.17187928418666715, + "grad_norm": 7.592674702990398, + "learning_rate": 9.481473113609309e-06, + "loss": 18.0118, + "step": 9403 + }, + { + "epoch": 0.17189756338311368, + "grad_norm": 6.667093276838761, + "learning_rate": 9.481341836753612e-06, + "loss": 17.4662, + "step": 9404 + }, + { + "epoch": 0.1719158425795602, + "grad_norm": 7.810607329528676, + "learning_rate": 9.4812105441912e-06, + "loss": 17.719, + "step": 9405 + }, + { + "epoch": 0.17193412177600673, + "grad_norm": 7.194972914942562, + "learning_rate": 9.481079235922534e-06, + "loss": 17.5976, + "step": 9406 + }, + { + "epoch": 0.17195240097245326, + "grad_norm": 7.9834727347623895, + "learning_rate": 9.480947911948075e-06, + "loss": 18.1629, + "step": 9407 + }, + { + "epoch": 0.17197068016889977, + "grad_norm": 7.18606951996447, + "learning_rate": 9.480816572268281e-06, + "loss": 17.5883, + "step": 9408 + }, + { + "epoch": 0.1719889593653463, + "grad_norm": 6.486718640880905, + "learning_rate": 9.480685216883614e-06, + "loss": 17.5277, + "step": 9409 + }, + { + "epoch": 0.1720072385617928, + "grad_norm": 9.50867175041148, + "learning_rate": 9.480553845794534e-06, + "loss": 18.0472, + "step": 9410 + }, + { + "epoch": 0.17202551775823935, + "grad_norm": 6.833479189201265, + "learning_rate": 9.480422459001503e-06, + "loss": 17.6381, + "step": 9411 + }, + { + "epoch": 0.17204379695468588, + "grad_norm": 6.277692317050017, + "learning_rate": 9.480291056504978e-06, + "loss": 17.2588, + "step": 9412 + }, + { + "epoch": 0.1720620761511324, + "grad_norm": 5.7551353372689995, + "learning_rate": 9.480159638305424e-06, + "loss": 17.2131, + "step": 9413 + }, + { + "epoch": 0.17208035534757893, + "grad_norm": 8.692658198171147, + "learning_rate": 9.480028204403298e-06, + "loss": 17.8449, + "step": 9414 + }, + { + "epoch": 0.17209863454402544, + "grad_norm": 7.435976881499505, + "learning_rate": 9.47989675479906e-06, + "loss": 17.9663, + "step": 9415 + }, + { + "epoch": 0.17211691374047197, + "grad_norm": 8.665769778151954, + "learning_rate": 9.479765289493176e-06, + "loss": 18.0989, + "step": 9416 + }, + { + "epoch": 0.17213519293691848, + "grad_norm": 7.548053517846902, + "learning_rate": 9.479633808486103e-06, + "loss": 17.9994, + "step": 9417 + }, + { + "epoch": 0.17215347213336502, + "grad_norm": 8.24601842526659, + "learning_rate": 9.4795023117783e-06, + "loss": 17.7702, + "step": 9418 + }, + { + "epoch": 0.17217175132981155, + "grad_norm": 7.3080893508518265, + "learning_rate": 9.479370799370231e-06, + "loss": 17.6346, + "step": 9419 + }, + { + "epoch": 0.17219003052625806, + "grad_norm": 6.95859428350624, + "learning_rate": 9.479239271262356e-06, + "loss": 17.7272, + "step": 9420 + }, + { + "epoch": 0.1722083097227046, + "grad_norm": 5.658747751909899, + "learning_rate": 9.479107727455137e-06, + "loss": 17.1958, + "step": 9421 + }, + { + "epoch": 0.1722265889191511, + "grad_norm": 9.264521686672701, + "learning_rate": 9.478976167949032e-06, + "loss": 18.6736, + "step": 9422 + }, + { + "epoch": 0.17224486811559764, + "grad_norm": 10.464266390240628, + "learning_rate": 9.478844592744507e-06, + "loss": 18.6533, + "step": 9423 + }, + { + "epoch": 0.17226314731204417, + "grad_norm": 6.793778486617364, + "learning_rate": 9.478713001842019e-06, + "loss": 17.5827, + "step": 9424 + }, + { + "epoch": 0.17228142650849068, + "grad_norm": 7.856395977325854, + "learning_rate": 9.47858139524203e-06, + "loss": 18.2891, + "step": 9425 + }, + { + "epoch": 0.17229970570493722, + "grad_norm": 6.938032387697228, + "learning_rate": 9.478449772945003e-06, + "loss": 17.7025, + "step": 9426 + }, + { + "epoch": 0.17231798490138373, + "grad_norm": 6.694615648790434, + "learning_rate": 9.478318134951396e-06, + "loss": 17.7233, + "step": 9427 + }, + { + "epoch": 0.17233626409783026, + "grad_norm": 7.934515973309323, + "learning_rate": 9.478186481261674e-06, + "loss": 18.6778, + "step": 9428 + }, + { + "epoch": 0.1723545432942768, + "grad_norm": 7.6012840022268655, + "learning_rate": 9.478054811876298e-06, + "loss": 17.8783, + "step": 9429 + }, + { + "epoch": 0.1723728224907233, + "grad_norm": 7.09475782007339, + "learning_rate": 9.477923126795727e-06, + "loss": 17.8057, + "step": 9430 + }, + { + "epoch": 0.17239110168716984, + "grad_norm": 6.954595742628623, + "learning_rate": 9.477791426020425e-06, + "loss": 17.8825, + "step": 9431 + }, + { + "epoch": 0.17240938088361635, + "grad_norm": 6.6808165057065, + "learning_rate": 9.477659709550852e-06, + "loss": 17.6323, + "step": 9432 + }, + { + "epoch": 0.17242766008006288, + "grad_norm": 9.677616218480246, + "learning_rate": 9.477527977387471e-06, + "loss": 18.3513, + "step": 9433 + }, + { + "epoch": 0.1724459392765094, + "grad_norm": 6.898574913690863, + "learning_rate": 9.477396229530742e-06, + "loss": 17.7003, + "step": 9434 + }, + { + "epoch": 0.17246421847295593, + "grad_norm": 6.22690980824051, + "learning_rate": 9.477264465981128e-06, + "loss": 17.5063, + "step": 9435 + }, + { + "epoch": 0.17248249766940246, + "grad_norm": 6.551990444920954, + "learning_rate": 9.477132686739091e-06, + "loss": 17.5723, + "step": 9436 + }, + { + "epoch": 0.17250077686584897, + "grad_norm": 8.222108731581205, + "learning_rate": 9.477000891805092e-06, + "loss": 18.1106, + "step": 9437 + }, + { + "epoch": 0.1725190560622955, + "grad_norm": 7.472141826139331, + "learning_rate": 9.476869081179595e-06, + "loss": 17.926, + "step": 9438 + }, + { + "epoch": 0.17253733525874201, + "grad_norm": 5.729838615705796, + "learning_rate": 9.476737254863057e-06, + "loss": 17.2853, + "step": 9439 + }, + { + "epoch": 0.17255561445518855, + "grad_norm": 6.169816445120499, + "learning_rate": 9.476605412855946e-06, + "loss": 17.3952, + "step": 9440 + }, + { + "epoch": 0.17257389365163509, + "grad_norm": 5.714087489923032, + "learning_rate": 9.47647355515872e-06, + "loss": 17.0376, + "step": 9441 + }, + { + "epoch": 0.1725921728480816, + "grad_norm": 6.319413544978291, + "learning_rate": 9.476341681771844e-06, + "loss": 17.6697, + "step": 9442 + }, + { + "epoch": 0.17261045204452813, + "grad_norm": 6.365535308271305, + "learning_rate": 9.476209792695779e-06, + "loss": 17.5086, + "step": 9443 + }, + { + "epoch": 0.17262873124097464, + "grad_norm": 9.358700783372539, + "learning_rate": 9.476077887930985e-06, + "loss": 18.0325, + "step": 9444 + }, + { + "epoch": 0.17264701043742117, + "grad_norm": 5.875412377637854, + "learning_rate": 9.475945967477929e-06, + "loss": 17.2611, + "step": 9445 + }, + { + "epoch": 0.1726652896338677, + "grad_norm": 6.57373215116459, + "learning_rate": 9.47581403133707e-06, + "loss": 17.5664, + "step": 9446 + }, + { + "epoch": 0.17268356883031422, + "grad_norm": 6.9318182688007655, + "learning_rate": 9.47568207950887e-06, + "loss": 17.4997, + "step": 9447 + }, + { + "epoch": 0.17270184802676075, + "grad_norm": 8.41243281063665, + "learning_rate": 9.475550111993794e-06, + "loss": 17.8644, + "step": 9448 + }, + { + "epoch": 0.17272012722320726, + "grad_norm": 7.471280300668058, + "learning_rate": 9.475418128792302e-06, + "loss": 17.8876, + "step": 9449 + }, + { + "epoch": 0.1727384064196538, + "grad_norm": 6.601480469516368, + "learning_rate": 9.47528612990486e-06, + "loss": 17.4701, + "step": 9450 + }, + { + "epoch": 0.1727566856161003, + "grad_norm": 6.810351201802684, + "learning_rate": 9.475154115331926e-06, + "loss": 17.5172, + "step": 9451 + }, + { + "epoch": 0.17277496481254684, + "grad_norm": 7.398992952527537, + "learning_rate": 9.475022085073967e-06, + "loss": 17.4504, + "step": 9452 + }, + { + "epoch": 0.17279324400899337, + "grad_norm": 6.6966327893105415, + "learning_rate": 9.474890039131442e-06, + "loss": 17.4509, + "step": 9453 + }, + { + "epoch": 0.17281152320543988, + "grad_norm": 6.158766465461097, + "learning_rate": 9.474757977504817e-06, + "loss": 17.4434, + "step": 9454 + }, + { + "epoch": 0.17282980240188642, + "grad_norm": 5.923134645722096, + "learning_rate": 9.474625900194554e-06, + "loss": 16.9824, + "step": 9455 + }, + { + "epoch": 0.17284808159833293, + "grad_norm": 8.076136920082657, + "learning_rate": 9.474493807201114e-06, + "loss": 17.8831, + "step": 9456 + }, + { + "epoch": 0.17286636079477946, + "grad_norm": 6.456655976023691, + "learning_rate": 9.474361698524963e-06, + "loss": 17.6123, + "step": 9457 + }, + { + "epoch": 0.172884639991226, + "grad_norm": 7.792694797980782, + "learning_rate": 9.474229574166563e-06, + "loss": 17.6647, + "step": 9458 + }, + { + "epoch": 0.1729029191876725, + "grad_norm": 7.571500525823527, + "learning_rate": 9.474097434126374e-06, + "loss": 18.2985, + "step": 9459 + }, + { + "epoch": 0.17292119838411904, + "grad_norm": 7.8823129226813275, + "learning_rate": 9.473965278404865e-06, + "loss": 17.6927, + "step": 9460 + }, + { + "epoch": 0.17293947758056555, + "grad_norm": 6.901785326814127, + "learning_rate": 9.473833107002492e-06, + "loss": 17.8228, + "step": 9461 + }, + { + "epoch": 0.17295775677701208, + "grad_norm": 9.092246095613532, + "learning_rate": 9.473700919919725e-06, + "loss": 18.1414, + "step": 9462 + }, + { + "epoch": 0.17297603597345862, + "grad_norm": 8.048377244864216, + "learning_rate": 9.473568717157024e-06, + "loss": 18.1686, + "step": 9463 + }, + { + "epoch": 0.17299431516990513, + "grad_norm": 6.299790719426887, + "learning_rate": 9.473436498714852e-06, + "loss": 17.4108, + "step": 9464 + }, + { + "epoch": 0.17301259436635166, + "grad_norm": 7.137264906670612, + "learning_rate": 9.473304264593674e-06, + "loss": 17.9108, + "step": 9465 + }, + { + "epoch": 0.17303087356279817, + "grad_norm": 6.39898306603595, + "learning_rate": 9.473172014793953e-06, + "loss": 17.4791, + "step": 9466 + }, + { + "epoch": 0.1730491527592447, + "grad_norm": 6.967386160536522, + "learning_rate": 9.473039749316152e-06, + "loss": 17.5312, + "step": 9467 + }, + { + "epoch": 0.17306743195569121, + "grad_norm": 6.762288020691088, + "learning_rate": 9.472907468160735e-06, + "loss": 17.6392, + "step": 9468 + }, + { + "epoch": 0.17308571115213775, + "grad_norm": 8.540444264241714, + "learning_rate": 9.472775171328165e-06, + "loss": 18.0354, + "step": 9469 + }, + { + "epoch": 0.17310399034858429, + "grad_norm": 6.765161338247924, + "learning_rate": 9.472642858818906e-06, + "loss": 17.7281, + "step": 9470 + }, + { + "epoch": 0.1731222695450308, + "grad_norm": 6.962275288338839, + "learning_rate": 9.472510530633422e-06, + "loss": 17.6537, + "step": 9471 + }, + { + "epoch": 0.17314054874147733, + "grad_norm": 7.049348562423112, + "learning_rate": 9.472378186772177e-06, + "loss": 17.7875, + "step": 9472 + }, + { + "epoch": 0.17315882793792384, + "grad_norm": 6.193548539422179, + "learning_rate": 9.472245827235636e-06, + "loss": 17.3468, + "step": 9473 + }, + { + "epoch": 0.17317710713437037, + "grad_norm": 7.164603951954371, + "learning_rate": 9.472113452024258e-06, + "loss": 17.9715, + "step": 9474 + }, + { + "epoch": 0.1731953863308169, + "grad_norm": 6.968068689021553, + "learning_rate": 9.471981061138513e-06, + "loss": 17.7948, + "step": 9475 + }, + { + "epoch": 0.17321366552726342, + "grad_norm": 7.232139235254524, + "learning_rate": 9.471848654578862e-06, + "loss": 17.7792, + "step": 9476 + }, + { + "epoch": 0.17323194472370995, + "grad_norm": 5.9023763429473215, + "learning_rate": 9.471716232345769e-06, + "loss": 16.9879, + "step": 9477 + }, + { + "epoch": 0.17325022392015646, + "grad_norm": 5.738475405414184, + "learning_rate": 9.4715837944397e-06, + "loss": 17.0774, + "step": 9478 + }, + { + "epoch": 0.173268503116603, + "grad_norm": 6.112365319319647, + "learning_rate": 9.471451340861117e-06, + "loss": 17.4933, + "step": 9479 + }, + { + "epoch": 0.17328678231304953, + "grad_norm": 6.886066210165729, + "learning_rate": 9.471318871610484e-06, + "loss": 17.8008, + "step": 9480 + }, + { + "epoch": 0.17330506150949604, + "grad_norm": 7.979041327998414, + "learning_rate": 9.471186386688267e-06, + "loss": 17.9677, + "step": 9481 + }, + { + "epoch": 0.17332334070594257, + "grad_norm": 6.447605600130551, + "learning_rate": 9.47105388609493e-06, + "loss": 17.4334, + "step": 9482 + }, + { + "epoch": 0.17334161990238908, + "grad_norm": 6.919822594374885, + "learning_rate": 9.470921369830938e-06, + "loss": 17.4449, + "step": 9483 + }, + { + "epoch": 0.17335989909883562, + "grad_norm": 6.224254864865902, + "learning_rate": 9.470788837896753e-06, + "loss": 17.3345, + "step": 9484 + }, + { + "epoch": 0.17337817829528213, + "grad_norm": 7.83572911688027, + "learning_rate": 9.470656290292842e-06, + "loss": 17.9033, + "step": 9485 + }, + { + "epoch": 0.17339645749172866, + "grad_norm": 6.013585901590195, + "learning_rate": 9.47052372701967e-06, + "loss": 17.1984, + "step": 9486 + }, + { + "epoch": 0.1734147366881752, + "grad_norm": 6.787471336236899, + "learning_rate": 9.470391148077697e-06, + "loss": 17.2764, + "step": 9487 + }, + { + "epoch": 0.1734330158846217, + "grad_norm": 8.49541940305247, + "learning_rate": 9.470258553467392e-06, + "loss": 18.336, + "step": 9488 + }, + { + "epoch": 0.17345129508106824, + "grad_norm": 7.359129457025921, + "learning_rate": 9.47012594318922e-06, + "loss": 17.8962, + "step": 9489 + }, + { + "epoch": 0.17346957427751475, + "grad_norm": 5.905825614079544, + "learning_rate": 9.469993317243644e-06, + "loss": 17.2531, + "step": 9490 + }, + { + "epoch": 0.17348785347396128, + "grad_norm": 6.661236621727768, + "learning_rate": 9.469860675631129e-06, + "loss": 17.4225, + "step": 9491 + }, + { + "epoch": 0.17350613267040782, + "grad_norm": 7.970349098170724, + "learning_rate": 9.46972801835214e-06, + "loss": 18.0449, + "step": 9492 + }, + { + "epoch": 0.17352441186685433, + "grad_norm": 7.870137417874784, + "learning_rate": 9.469595345407144e-06, + "loss": 18.2065, + "step": 9493 + }, + { + "epoch": 0.17354269106330086, + "grad_norm": 6.630693665009491, + "learning_rate": 9.4694626567966e-06, + "loss": 17.6834, + "step": 9494 + }, + { + "epoch": 0.17356097025974737, + "grad_norm": 6.727202801770572, + "learning_rate": 9.46932995252098e-06, + "loss": 17.6419, + "step": 9495 + }, + { + "epoch": 0.1735792494561939, + "grad_norm": 8.627751500853709, + "learning_rate": 9.469197232580747e-06, + "loss": 18.1795, + "step": 9496 + }, + { + "epoch": 0.17359752865264044, + "grad_norm": 6.179486784072806, + "learning_rate": 9.469064496976365e-06, + "loss": 17.31, + "step": 9497 + }, + { + "epoch": 0.17361580784908695, + "grad_norm": 6.334591869376551, + "learning_rate": 9.468931745708298e-06, + "loss": 17.6099, + "step": 9498 + }, + { + "epoch": 0.1736340870455335, + "grad_norm": 6.316203075348294, + "learning_rate": 9.468798978777016e-06, + "loss": 17.4841, + "step": 9499 + }, + { + "epoch": 0.17365236624198, + "grad_norm": 7.7296442146211515, + "learning_rate": 9.46866619618298e-06, + "loss": 17.8273, + "step": 9500 + }, + { + "epoch": 0.17367064543842653, + "grad_norm": 6.517640989987836, + "learning_rate": 9.468533397926656e-06, + "loss": 17.6511, + "step": 9501 + }, + { + "epoch": 0.17368892463487304, + "grad_norm": 6.486360811626599, + "learning_rate": 9.46840058400851e-06, + "loss": 17.7592, + "step": 9502 + }, + { + "epoch": 0.17370720383131957, + "grad_norm": 6.973484127279262, + "learning_rate": 9.468267754429009e-06, + "loss": 17.5944, + "step": 9503 + }, + { + "epoch": 0.1737254830277661, + "grad_norm": 6.580408787595043, + "learning_rate": 9.468134909188616e-06, + "loss": 17.7031, + "step": 9504 + }, + { + "epoch": 0.17374376222421262, + "grad_norm": 6.121295434799951, + "learning_rate": 9.468002048287799e-06, + "loss": 17.4001, + "step": 9505 + }, + { + "epoch": 0.17376204142065915, + "grad_norm": 6.64922992160366, + "learning_rate": 9.467869171727022e-06, + "loss": 17.7164, + "step": 9506 + }, + { + "epoch": 0.17378032061710566, + "grad_norm": 6.549658495995932, + "learning_rate": 9.467736279506752e-06, + "loss": 17.4104, + "step": 9507 + }, + { + "epoch": 0.1737985998135522, + "grad_norm": 6.197963987146198, + "learning_rate": 9.467603371627454e-06, + "loss": 17.3903, + "step": 9508 + }, + { + "epoch": 0.17381687900999873, + "grad_norm": 6.181427883663454, + "learning_rate": 9.467470448089592e-06, + "loss": 17.346, + "step": 9509 + }, + { + "epoch": 0.17383515820644524, + "grad_norm": 6.575355283380181, + "learning_rate": 9.467337508893635e-06, + "loss": 17.5846, + "step": 9510 + }, + { + "epoch": 0.17385343740289178, + "grad_norm": 7.123198600785866, + "learning_rate": 9.467204554040049e-06, + "loss": 18.1258, + "step": 9511 + }, + { + "epoch": 0.17387171659933828, + "grad_norm": 6.802118401190757, + "learning_rate": 9.467071583529297e-06, + "loss": 17.6492, + "step": 9512 + }, + { + "epoch": 0.17388999579578482, + "grad_norm": 6.764335831864584, + "learning_rate": 9.466938597361847e-06, + "loss": 17.4252, + "step": 9513 + }, + { + "epoch": 0.17390827499223135, + "grad_norm": 6.252125581799929, + "learning_rate": 9.466805595538165e-06, + "loss": 17.5834, + "step": 9514 + }, + { + "epoch": 0.17392655418867786, + "grad_norm": 7.274545612028333, + "learning_rate": 9.466672578058718e-06, + "loss": 17.6257, + "step": 9515 + }, + { + "epoch": 0.1739448333851244, + "grad_norm": 7.550294008739678, + "learning_rate": 9.46653954492397e-06, + "loss": 17.8097, + "step": 9516 + }, + { + "epoch": 0.1739631125815709, + "grad_norm": 6.783902071701367, + "learning_rate": 9.466406496134388e-06, + "loss": 17.7032, + "step": 9517 + }, + { + "epoch": 0.17398139177801744, + "grad_norm": 7.286554500889945, + "learning_rate": 9.466273431690439e-06, + "loss": 17.752, + "step": 9518 + }, + { + "epoch": 0.17399967097446395, + "grad_norm": 6.522243344605513, + "learning_rate": 9.46614035159259e-06, + "loss": 17.6034, + "step": 9519 + }, + { + "epoch": 0.17401795017091048, + "grad_norm": 6.418133639420691, + "learning_rate": 9.466007255841307e-06, + "loss": 17.4132, + "step": 9520 + }, + { + "epoch": 0.17403622936735702, + "grad_norm": 6.754508075717153, + "learning_rate": 9.465874144437055e-06, + "loss": 17.8215, + "step": 9521 + }, + { + "epoch": 0.17405450856380353, + "grad_norm": 6.2240970765795085, + "learning_rate": 9.465741017380302e-06, + "loss": 17.3865, + "step": 9522 + }, + { + "epoch": 0.17407278776025006, + "grad_norm": 7.749530890247584, + "learning_rate": 9.465607874671514e-06, + "loss": 17.9878, + "step": 9523 + }, + { + "epoch": 0.17409106695669657, + "grad_norm": 7.70501705038296, + "learning_rate": 9.465474716311159e-06, + "loss": 18.3077, + "step": 9524 + }, + { + "epoch": 0.1741093461531431, + "grad_norm": 6.65303915166089, + "learning_rate": 9.465341542299701e-06, + "loss": 17.6386, + "step": 9525 + }, + { + "epoch": 0.17412762534958964, + "grad_norm": 5.461389075851484, + "learning_rate": 9.46520835263761e-06, + "loss": 17.0258, + "step": 9526 + }, + { + "epoch": 0.17414590454603615, + "grad_norm": 9.381998953111543, + "learning_rate": 9.46507514732535e-06, + "loss": 17.5598, + "step": 9527 + }, + { + "epoch": 0.1741641837424827, + "grad_norm": 6.1220603773567, + "learning_rate": 9.464941926363389e-06, + "loss": 17.4067, + "step": 9528 + }, + { + "epoch": 0.1741824629389292, + "grad_norm": 8.00793554449915, + "learning_rate": 9.464808689752195e-06, + "loss": 18.0463, + "step": 9529 + }, + { + "epoch": 0.17420074213537573, + "grad_norm": 6.465493988527969, + "learning_rate": 9.464675437492234e-06, + "loss": 17.4025, + "step": 9530 + }, + { + "epoch": 0.17421902133182227, + "grad_norm": 6.306562954623618, + "learning_rate": 9.464542169583972e-06, + "loss": 17.3987, + "step": 9531 + }, + { + "epoch": 0.17423730052826877, + "grad_norm": 8.160215439993134, + "learning_rate": 9.464408886027878e-06, + "loss": 18.1336, + "step": 9532 + }, + { + "epoch": 0.1742555797247153, + "grad_norm": 6.695224799762931, + "learning_rate": 9.464275586824418e-06, + "loss": 17.6448, + "step": 9533 + }, + { + "epoch": 0.17427385892116182, + "grad_norm": 6.977009153648582, + "learning_rate": 9.46414227197406e-06, + "loss": 17.7975, + "step": 9534 + }, + { + "epoch": 0.17429213811760835, + "grad_norm": 7.331561066680906, + "learning_rate": 9.46400894147727e-06, + "loss": 17.8779, + "step": 9535 + }, + { + "epoch": 0.17431041731405486, + "grad_norm": 7.011303994873853, + "learning_rate": 9.463875595334516e-06, + "loss": 17.9372, + "step": 9536 + }, + { + "epoch": 0.1743286965105014, + "grad_norm": 6.169869422341589, + "learning_rate": 9.463742233546264e-06, + "loss": 17.5423, + "step": 9537 + }, + { + "epoch": 0.17434697570694793, + "grad_norm": 7.389420360269115, + "learning_rate": 9.463608856112985e-06, + "loss": 18.075, + "step": 9538 + }, + { + "epoch": 0.17436525490339444, + "grad_norm": 8.262990116786577, + "learning_rate": 9.463475463035143e-06, + "loss": 18.1106, + "step": 9539 + }, + { + "epoch": 0.17438353409984098, + "grad_norm": 7.2006562511719, + "learning_rate": 9.463342054313207e-06, + "loss": 17.6868, + "step": 9540 + }, + { + "epoch": 0.17440181329628748, + "grad_norm": 7.840608118510198, + "learning_rate": 9.463208629947645e-06, + "loss": 18.3659, + "step": 9541 + }, + { + "epoch": 0.17442009249273402, + "grad_norm": 7.0299308033934755, + "learning_rate": 9.463075189938925e-06, + "loss": 17.4907, + "step": 9542 + }, + { + "epoch": 0.17443837168918055, + "grad_norm": 6.323297700497119, + "learning_rate": 9.462941734287511e-06, + "loss": 17.3497, + "step": 9543 + }, + { + "epoch": 0.17445665088562706, + "grad_norm": 7.9047948424288315, + "learning_rate": 9.462808262993876e-06, + "loss": 17.917, + "step": 9544 + }, + { + "epoch": 0.1744749300820736, + "grad_norm": 6.179598644734943, + "learning_rate": 9.462674776058485e-06, + "loss": 17.4775, + "step": 9545 + }, + { + "epoch": 0.1744932092785201, + "grad_norm": 6.596936136388797, + "learning_rate": 9.462541273481804e-06, + "loss": 17.3298, + "step": 9546 + }, + { + "epoch": 0.17451148847496664, + "grad_norm": 7.149311211802416, + "learning_rate": 9.462407755264305e-06, + "loss": 17.6487, + "step": 9547 + }, + { + "epoch": 0.17452976767141318, + "grad_norm": 6.735124720416647, + "learning_rate": 9.462274221406455e-06, + "loss": 17.4208, + "step": 9548 + }, + { + "epoch": 0.17454804686785969, + "grad_norm": 7.670894500045217, + "learning_rate": 9.46214067190872e-06, + "loss": 18.224, + "step": 9549 + }, + { + "epoch": 0.17456632606430622, + "grad_norm": 6.435070996214837, + "learning_rate": 9.462007106771569e-06, + "loss": 17.6639, + "step": 9550 + }, + { + "epoch": 0.17458460526075273, + "grad_norm": 7.6112651650811305, + "learning_rate": 9.461873525995469e-06, + "loss": 18.2247, + "step": 9551 + }, + { + "epoch": 0.17460288445719926, + "grad_norm": 5.044347889863651, + "learning_rate": 9.461739929580892e-06, + "loss": 17.024, + "step": 9552 + }, + { + "epoch": 0.17462116365364577, + "grad_norm": 6.5749082393182645, + "learning_rate": 9.4616063175283e-06, + "loss": 17.595, + "step": 9553 + }, + { + "epoch": 0.1746394428500923, + "grad_norm": 6.841947019941837, + "learning_rate": 9.46147268983817e-06, + "loss": 17.6326, + "step": 9554 + }, + { + "epoch": 0.17465772204653884, + "grad_norm": 7.120305404120127, + "learning_rate": 9.461339046510962e-06, + "loss": 17.5067, + "step": 9555 + }, + { + "epoch": 0.17467600124298535, + "grad_norm": 9.627379721584031, + "learning_rate": 9.461205387547147e-06, + "loss": 18.5417, + "step": 9556 + }, + { + "epoch": 0.1746942804394319, + "grad_norm": 6.590698216359157, + "learning_rate": 9.461071712947197e-06, + "loss": 17.5086, + "step": 9557 + }, + { + "epoch": 0.1747125596358784, + "grad_norm": 6.823081760392698, + "learning_rate": 9.460938022711576e-06, + "loss": 17.5671, + "step": 9558 + }, + { + "epoch": 0.17473083883232493, + "grad_norm": 7.28494667014169, + "learning_rate": 9.460804316840755e-06, + "loss": 18.4391, + "step": 9559 + }, + { + "epoch": 0.17474911802877147, + "grad_norm": 7.1721623372429555, + "learning_rate": 9.460670595335201e-06, + "loss": 17.804, + "step": 9560 + }, + { + "epoch": 0.17476739722521797, + "grad_norm": 7.984601933362222, + "learning_rate": 9.460536858195387e-06, + "loss": 18.2357, + "step": 9561 + }, + { + "epoch": 0.1747856764216645, + "grad_norm": 6.3102168532318395, + "learning_rate": 9.460403105421773e-06, + "loss": 17.2541, + "step": 9562 + }, + { + "epoch": 0.17480395561811102, + "grad_norm": 7.554736078456946, + "learning_rate": 9.460269337014838e-06, + "loss": 17.9618, + "step": 9563 + }, + { + "epoch": 0.17482223481455755, + "grad_norm": 8.529445122710069, + "learning_rate": 9.460135552975043e-06, + "loss": 18.1476, + "step": 9564 + }, + { + "epoch": 0.1748405140110041, + "grad_norm": 7.224102814581794, + "learning_rate": 9.460001753302862e-06, + "loss": 17.9554, + "step": 9565 + }, + { + "epoch": 0.1748587932074506, + "grad_norm": 7.694910481548016, + "learning_rate": 9.459867937998762e-06, + "loss": 18.3189, + "step": 9566 + }, + { + "epoch": 0.17487707240389713, + "grad_norm": 6.902715343961525, + "learning_rate": 9.45973410706321e-06, + "loss": 17.6001, + "step": 9567 + }, + { + "epoch": 0.17489535160034364, + "grad_norm": 6.689873324000622, + "learning_rate": 9.459600260496677e-06, + "loss": 17.6243, + "step": 9568 + }, + { + "epoch": 0.17491363079679018, + "grad_norm": 6.086483769488931, + "learning_rate": 9.459466398299632e-06, + "loss": 17.2564, + "step": 9569 + }, + { + "epoch": 0.17493190999323668, + "grad_norm": 7.063496404856035, + "learning_rate": 9.459332520472546e-06, + "loss": 17.6633, + "step": 9570 + }, + { + "epoch": 0.17495018918968322, + "grad_norm": 7.943562445239964, + "learning_rate": 9.459198627015885e-06, + "loss": 18.2078, + "step": 9571 + }, + { + "epoch": 0.17496846838612976, + "grad_norm": 6.494236879534126, + "learning_rate": 9.45906471793012e-06, + "loss": 17.5818, + "step": 9572 + }, + { + "epoch": 0.17498674758257626, + "grad_norm": 7.441500171931855, + "learning_rate": 9.458930793215722e-06, + "loss": 17.849, + "step": 9573 + }, + { + "epoch": 0.1750050267790228, + "grad_norm": 8.228828669558796, + "learning_rate": 9.458796852873155e-06, + "loss": 18.0843, + "step": 9574 + }, + { + "epoch": 0.1750233059754693, + "grad_norm": 5.825673737747694, + "learning_rate": 9.458662896902895e-06, + "loss": 17.0919, + "step": 9575 + }, + { + "epoch": 0.17504158517191584, + "grad_norm": 5.872710126464134, + "learning_rate": 9.458528925305406e-06, + "loss": 17.2865, + "step": 9576 + }, + { + "epoch": 0.17505986436836238, + "grad_norm": 7.611633683494937, + "learning_rate": 9.458394938081162e-06, + "loss": 17.8971, + "step": 9577 + }, + { + "epoch": 0.17507814356480889, + "grad_norm": 7.063929080650453, + "learning_rate": 9.45826093523063e-06, + "loss": 17.736, + "step": 9578 + }, + { + "epoch": 0.17509642276125542, + "grad_norm": 7.791055831721408, + "learning_rate": 9.45812691675428e-06, + "loss": 18.0464, + "step": 9579 + }, + { + "epoch": 0.17511470195770193, + "grad_norm": 7.736108451433705, + "learning_rate": 9.457992882652583e-06, + "loss": 17.5614, + "step": 9580 + }, + { + "epoch": 0.17513298115414846, + "grad_norm": 8.217265916851911, + "learning_rate": 9.457858832926006e-06, + "loss": 17.9376, + "step": 9581 + }, + { + "epoch": 0.175151260350595, + "grad_norm": 6.331160305006264, + "learning_rate": 9.457724767575023e-06, + "loss": 17.4408, + "step": 9582 + }, + { + "epoch": 0.1751695395470415, + "grad_norm": 6.254197417031652, + "learning_rate": 9.4575906866001e-06, + "loss": 17.1966, + "step": 9583 + }, + { + "epoch": 0.17518781874348804, + "grad_norm": 7.363918482060166, + "learning_rate": 9.457456590001708e-06, + "loss": 17.5009, + "step": 9584 + }, + { + "epoch": 0.17520609793993455, + "grad_norm": 6.023205168853669, + "learning_rate": 9.457322477780317e-06, + "loss": 17.2705, + "step": 9585 + }, + { + "epoch": 0.1752243771363811, + "grad_norm": 6.668531385943409, + "learning_rate": 9.457188349936399e-06, + "loss": 17.4821, + "step": 9586 + }, + { + "epoch": 0.1752426563328276, + "grad_norm": 6.677434109572615, + "learning_rate": 9.457054206470422e-06, + "loss": 17.4897, + "step": 9587 + }, + { + "epoch": 0.17526093552927413, + "grad_norm": 7.421678870093581, + "learning_rate": 9.456920047382858e-06, + "loss": 17.7544, + "step": 9588 + }, + { + "epoch": 0.17527921472572067, + "grad_norm": 5.749753796678464, + "learning_rate": 9.456785872674174e-06, + "loss": 17.1499, + "step": 9589 + }, + { + "epoch": 0.17529749392216717, + "grad_norm": 7.657753373935576, + "learning_rate": 9.456651682344844e-06, + "loss": 17.8947, + "step": 9590 + }, + { + "epoch": 0.1753157731186137, + "grad_norm": 5.800114173360619, + "learning_rate": 9.456517476395334e-06, + "loss": 17.5072, + "step": 9591 + }, + { + "epoch": 0.17533405231506022, + "grad_norm": 6.370407507874773, + "learning_rate": 9.45638325482612e-06, + "loss": 17.6061, + "step": 9592 + }, + { + "epoch": 0.17535233151150675, + "grad_norm": 7.54027055496554, + "learning_rate": 9.456249017637669e-06, + "loss": 18.0063, + "step": 9593 + }, + { + "epoch": 0.1753706107079533, + "grad_norm": 7.614861521245855, + "learning_rate": 9.45611476483045e-06, + "loss": 17.9974, + "step": 9594 + }, + { + "epoch": 0.1753888899043998, + "grad_norm": 7.161606441548655, + "learning_rate": 9.455980496404937e-06, + "loss": 18.0735, + "step": 9595 + }, + { + "epoch": 0.17540716910084633, + "grad_norm": 8.015904801234518, + "learning_rate": 9.4558462123616e-06, + "loss": 18.0219, + "step": 9596 + }, + { + "epoch": 0.17542544829729284, + "grad_norm": 6.314264808075519, + "learning_rate": 9.455711912700906e-06, + "loss": 17.5404, + "step": 9597 + }, + { + "epoch": 0.17544372749373938, + "grad_norm": 6.389007170985312, + "learning_rate": 9.455577597423329e-06, + "loss": 17.4354, + "step": 9598 + }, + { + "epoch": 0.1754620066901859, + "grad_norm": 6.450687917607619, + "learning_rate": 9.45544326652934e-06, + "loss": 17.6394, + "step": 9599 + }, + { + "epoch": 0.17548028588663242, + "grad_norm": 8.174249896841113, + "learning_rate": 9.455308920019407e-06, + "loss": 17.6692, + "step": 9600 + }, + { + "epoch": 0.17549856508307896, + "grad_norm": 6.553016425433632, + "learning_rate": 9.455174557894006e-06, + "loss": 17.7826, + "step": 9601 + }, + { + "epoch": 0.17551684427952546, + "grad_norm": 6.90399626900844, + "learning_rate": 9.455040180153602e-06, + "loss": 17.644, + "step": 9602 + }, + { + "epoch": 0.175535123475972, + "grad_norm": 7.13345054726534, + "learning_rate": 9.454905786798671e-06, + "loss": 17.8096, + "step": 9603 + }, + { + "epoch": 0.1755534026724185, + "grad_norm": 6.901085836989342, + "learning_rate": 9.454771377829682e-06, + "loss": 17.8616, + "step": 9604 + }, + { + "epoch": 0.17557168186886504, + "grad_norm": 6.407392162682347, + "learning_rate": 9.454636953247106e-06, + "loss": 17.5621, + "step": 9605 + }, + { + "epoch": 0.17558996106531158, + "grad_norm": 8.909637325449719, + "learning_rate": 9.454502513051411e-06, + "loss": 18.2019, + "step": 9606 + }, + { + "epoch": 0.1756082402617581, + "grad_norm": 7.75470214126141, + "learning_rate": 9.454368057243072e-06, + "loss": 17.8469, + "step": 9607 + }, + { + "epoch": 0.17562651945820462, + "grad_norm": 6.488640748696882, + "learning_rate": 9.454233585822562e-06, + "loss": 17.6242, + "step": 9608 + }, + { + "epoch": 0.17564479865465113, + "grad_norm": 7.347397929283839, + "learning_rate": 9.454099098790348e-06, + "loss": 17.8359, + "step": 9609 + }, + { + "epoch": 0.17566307785109767, + "grad_norm": 7.888743786396532, + "learning_rate": 9.453964596146902e-06, + "loss": 18.2884, + "step": 9610 + }, + { + "epoch": 0.1756813570475442, + "grad_norm": 8.862293313268275, + "learning_rate": 9.453830077892699e-06, + "loss": 18.3987, + "step": 9611 + }, + { + "epoch": 0.1756996362439907, + "grad_norm": 7.377641513426715, + "learning_rate": 9.453695544028207e-06, + "loss": 17.9834, + "step": 9612 + }, + { + "epoch": 0.17571791544043724, + "grad_norm": 9.07862999594866, + "learning_rate": 9.453560994553899e-06, + "loss": 18.696, + "step": 9613 + }, + { + "epoch": 0.17573619463688375, + "grad_norm": 6.545419816988638, + "learning_rate": 9.453426429470247e-06, + "loss": 17.328, + "step": 9614 + }, + { + "epoch": 0.1757544738333303, + "grad_norm": 6.713530668856103, + "learning_rate": 9.45329184877772e-06, + "loss": 17.2036, + "step": 9615 + }, + { + "epoch": 0.17577275302977682, + "grad_norm": 5.675338688006315, + "learning_rate": 9.453157252476793e-06, + "loss": 17.1771, + "step": 9616 + }, + { + "epoch": 0.17579103222622333, + "grad_norm": 7.9498118466028185, + "learning_rate": 9.453022640567936e-06, + "loss": 17.8481, + "step": 9617 + }, + { + "epoch": 0.17580931142266987, + "grad_norm": 7.229448778468172, + "learning_rate": 9.45288801305162e-06, + "loss": 17.6368, + "step": 9618 + }, + { + "epoch": 0.17582759061911638, + "grad_norm": 6.703388893560162, + "learning_rate": 9.452753369928318e-06, + "loss": 17.4984, + "step": 9619 + }, + { + "epoch": 0.1758458698155629, + "grad_norm": 6.976151557876985, + "learning_rate": 9.452618711198503e-06, + "loss": 17.7493, + "step": 9620 + }, + { + "epoch": 0.17586414901200942, + "grad_norm": 6.700961464767145, + "learning_rate": 9.452484036862644e-06, + "loss": 17.6641, + "step": 9621 + }, + { + "epoch": 0.17588242820845595, + "grad_norm": 7.089096875826108, + "learning_rate": 9.452349346921217e-06, + "loss": 17.7229, + "step": 9622 + }, + { + "epoch": 0.1759007074049025, + "grad_norm": 6.745625988494399, + "learning_rate": 9.45221464137469e-06, + "loss": 17.4635, + "step": 9623 + }, + { + "epoch": 0.175918986601349, + "grad_norm": 5.519624368429729, + "learning_rate": 9.452079920223538e-06, + "loss": 16.9726, + "step": 9624 + }, + { + "epoch": 0.17593726579779553, + "grad_norm": 7.556663734793128, + "learning_rate": 9.451945183468232e-06, + "loss": 17.9782, + "step": 9625 + }, + { + "epoch": 0.17595554499424204, + "grad_norm": 7.05912937539355, + "learning_rate": 9.451810431109245e-06, + "loss": 17.5649, + "step": 9626 + }, + { + "epoch": 0.17597382419068858, + "grad_norm": 7.26119786780201, + "learning_rate": 9.451675663147049e-06, + "loss": 17.8877, + "step": 9627 + }, + { + "epoch": 0.1759921033871351, + "grad_norm": 6.14689326735572, + "learning_rate": 9.451540879582115e-06, + "loss": 17.3651, + "step": 9628 + }, + { + "epoch": 0.17601038258358162, + "grad_norm": 7.051044815148934, + "learning_rate": 9.451406080414915e-06, + "loss": 17.7112, + "step": 9629 + }, + { + "epoch": 0.17602866178002816, + "grad_norm": 8.696622023622663, + "learning_rate": 9.451271265645925e-06, + "loss": 17.8157, + "step": 9630 + }, + { + "epoch": 0.17604694097647466, + "grad_norm": 7.096216250790805, + "learning_rate": 9.451136435275617e-06, + "loss": 17.6581, + "step": 9631 + }, + { + "epoch": 0.1760652201729212, + "grad_norm": 6.879831978724544, + "learning_rate": 9.45100158930446e-06, + "loss": 17.526, + "step": 9632 + }, + { + "epoch": 0.17608349936936774, + "grad_norm": 6.42182044924456, + "learning_rate": 9.450866727732929e-06, + "loss": 17.5388, + "step": 9633 + }, + { + "epoch": 0.17610177856581424, + "grad_norm": 6.712998064872013, + "learning_rate": 9.450731850561496e-06, + "loss": 17.6062, + "step": 9634 + }, + { + "epoch": 0.17612005776226078, + "grad_norm": 7.162659350706177, + "learning_rate": 9.450596957790635e-06, + "loss": 17.6215, + "step": 9635 + }, + { + "epoch": 0.1761383369587073, + "grad_norm": 8.155756487703615, + "learning_rate": 9.450462049420816e-06, + "loss": 18.093, + "step": 9636 + }, + { + "epoch": 0.17615661615515382, + "grad_norm": 7.107062681470219, + "learning_rate": 9.450327125452517e-06, + "loss": 17.442, + "step": 9637 + }, + { + "epoch": 0.17617489535160033, + "grad_norm": 6.436645919508923, + "learning_rate": 9.450192185886205e-06, + "loss": 17.4097, + "step": 9638 + }, + { + "epoch": 0.17619317454804687, + "grad_norm": 7.073010915110433, + "learning_rate": 9.450057230722356e-06, + "loss": 17.6839, + "step": 9639 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 6.3032993215199635, + "learning_rate": 9.449922259961443e-06, + "loss": 17.4623, + "step": 9640 + }, + { + "epoch": 0.1762297329409399, + "grad_norm": 6.099549268477369, + "learning_rate": 9.44978727360394e-06, + "loss": 17.335, + "step": 9641 + }, + { + "epoch": 0.17624801213738645, + "grad_norm": 7.786219841755767, + "learning_rate": 9.449652271650314e-06, + "loss": 18.2596, + "step": 9642 + }, + { + "epoch": 0.17626629133383295, + "grad_norm": 5.731132304582805, + "learning_rate": 9.449517254101048e-06, + "loss": 17.0984, + "step": 9643 + }, + { + "epoch": 0.1762845705302795, + "grad_norm": 9.083475202687568, + "learning_rate": 9.449382220956607e-06, + "loss": 18.6494, + "step": 9644 + }, + { + "epoch": 0.17630284972672602, + "grad_norm": 6.359743207189475, + "learning_rate": 9.449247172217468e-06, + "loss": 17.3785, + "step": 9645 + }, + { + "epoch": 0.17632112892317253, + "grad_norm": 7.051513680981001, + "learning_rate": 9.449112107884105e-06, + "loss": 17.505, + "step": 9646 + }, + { + "epoch": 0.17633940811961907, + "grad_norm": 6.2166941084379985, + "learning_rate": 9.448977027956989e-06, + "loss": 17.4986, + "step": 9647 + }, + { + "epoch": 0.17635768731606558, + "grad_norm": 7.336685680841691, + "learning_rate": 9.448841932436596e-06, + "loss": 17.8284, + "step": 9648 + }, + { + "epoch": 0.1763759665125121, + "grad_norm": 7.702246485336523, + "learning_rate": 9.448706821323396e-06, + "loss": 17.7406, + "step": 9649 + }, + { + "epoch": 0.17639424570895865, + "grad_norm": 6.4258054404145355, + "learning_rate": 9.448571694617868e-06, + "loss": 17.4923, + "step": 9650 + }, + { + "epoch": 0.17641252490540515, + "grad_norm": 9.007529205106438, + "learning_rate": 9.448436552320479e-06, + "loss": 18.8918, + "step": 9651 + }, + { + "epoch": 0.1764308041018517, + "grad_norm": 8.425281287035055, + "learning_rate": 9.448301394431707e-06, + "loss": 18.4124, + "step": 9652 + }, + { + "epoch": 0.1764490832982982, + "grad_norm": 8.206058412032595, + "learning_rate": 9.448166220952025e-06, + "loss": 18.5093, + "step": 9653 + }, + { + "epoch": 0.17646736249474473, + "grad_norm": 9.808163777383681, + "learning_rate": 9.448031031881905e-06, + "loss": 18.3022, + "step": 9654 + }, + { + "epoch": 0.17648564169119124, + "grad_norm": 6.733960562872665, + "learning_rate": 9.447895827221822e-06, + "loss": 17.687, + "step": 9655 + }, + { + "epoch": 0.17650392088763778, + "grad_norm": 8.096405222635829, + "learning_rate": 9.447760606972252e-06, + "loss": 18.2036, + "step": 9656 + }, + { + "epoch": 0.1765222000840843, + "grad_norm": 7.329142019655432, + "learning_rate": 9.447625371133667e-06, + "loss": 17.9571, + "step": 9657 + }, + { + "epoch": 0.17654047928053082, + "grad_norm": 6.581840834842928, + "learning_rate": 9.44749011970654e-06, + "loss": 17.5703, + "step": 9658 + }, + { + "epoch": 0.17655875847697736, + "grad_norm": 6.129898632277393, + "learning_rate": 9.447354852691345e-06, + "loss": 17.1393, + "step": 9659 + }, + { + "epoch": 0.17657703767342386, + "grad_norm": 6.762291526337835, + "learning_rate": 9.44721957008856e-06, + "loss": 17.4261, + "step": 9660 + }, + { + "epoch": 0.1765953168698704, + "grad_norm": 6.813084022090381, + "learning_rate": 9.447084271898654e-06, + "loss": 17.7524, + "step": 9661 + }, + { + "epoch": 0.17661359606631694, + "grad_norm": 7.259022069175019, + "learning_rate": 9.446948958122105e-06, + "loss": 17.8218, + "step": 9662 + }, + { + "epoch": 0.17663187526276344, + "grad_norm": 6.345286917442418, + "learning_rate": 9.446813628759385e-06, + "loss": 17.5285, + "step": 9663 + }, + { + "epoch": 0.17665015445920998, + "grad_norm": 7.507549321072424, + "learning_rate": 9.44667828381097e-06, + "loss": 17.8682, + "step": 9664 + }, + { + "epoch": 0.1766684336556565, + "grad_norm": 8.854101984446455, + "learning_rate": 9.446542923277334e-06, + "loss": 18.0953, + "step": 9665 + }, + { + "epoch": 0.17668671285210302, + "grad_norm": 7.0324185514608075, + "learning_rate": 9.446407547158948e-06, + "loss": 17.8778, + "step": 9666 + }, + { + "epoch": 0.17670499204854956, + "grad_norm": 6.7879172787870825, + "learning_rate": 9.44627215545629e-06, + "loss": 17.4351, + "step": 9667 + }, + { + "epoch": 0.17672327124499607, + "grad_norm": 8.513656505846203, + "learning_rate": 9.446136748169836e-06, + "loss": 18.1137, + "step": 9668 + }, + { + "epoch": 0.1767415504414426, + "grad_norm": 7.215523316095467, + "learning_rate": 9.446001325300058e-06, + "loss": 17.8333, + "step": 9669 + }, + { + "epoch": 0.1767598296378891, + "grad_norm": 8.158464109641324, + "learning_rate": 9.445865886847429e-06, + "loss": 18.5054, + "step": 9670 + }, + { + "epoch": 0.17677810883433565, + "grad_norm": 6.687987689542999, + "learning_rate": 9.445730432812429e-06, + "loss": 17.5526, + "step": 9671 + }, + { + "epoch": 0.17679638803078215, + "grad_norm": 6.478660305319019, + "learning_rate": 9.445594963195529e-06, + "loss": 17.5868, + "step": 9672 + }, + { + "epoch": 0.1768146672272287, + "grad_norm": 5.151104796398375, + "learning_rate": 9.445459477997203e-06, + "loss": 16.9636, + "step": 9673 + }, + { + "epoch": 0.17683294642367522, + "grad_norm": 7.433461809978908, + "learning_rate": 9.445323977217927e-06, + "loss": 18.2122, + "step": 9674 + }, + { + "epoch": 0.17685122562012173, + "grad_norm": 7.8410617284588255, + "learning_rate": 9.445188460858176e-06, + "loss": 17.9256, + "step": 9675 + }, + { + "epoch": 0.17686950481656827, + "grad_norm": 7.321948538376839, + "learning_rate": 9.445052928918428e-06, + "loss": 17.8209, + "step": 9676 + }, + { + "epoch": 0.17688778401301478, + "grad_norm": 6.376730731198681, + "learning_rate": 9.444917381399153e-06, + "loss": 17.3759, + "step": 9677 + }, + { + "epoch": 0.1769060632094613, + "grad_norm": 7.316377252451896, + "learning_rate": 9.444781818300828e-06, + "loss": 18.0705, + "step": 9678 + }, + { + "epoch": 0.17692434240590785, + "grad_norm": 7.027882068028113, + "learning_rate": 9.444646239623929e-06, + "loss": 17.7918, + "step": 9679 + }, + { + "epoch": 0.17694262160235436, + "grad_norm": 7.850501897469477, + "learning_rate": 9.444510645368932e-06, + "loss": 17.9935, + "step": 9680 + }, + { + "epoch": 0.1769609007988009, + "grad_norm": 7.419180565871145, + "learning_rate": 9.444375035536309e-06, + "loss": 17.9222, + "step": 9681 + }, + { + "epoch": 0.1769791799952474, + "grad_norm": 6.110589969967948, + "learning_rate": 9.444239410126538e-06, + "loss": 17.4264, + "step": 9682 + }, + { + "epoch": 0.17699745919169393, + "grad_norm": 6.622212880277883, + "learning_rate": 9.444103769140094e-06, + "loss": 17.6152, + "step": 9683 + }, + { + "epoch": 0.17701573838814047, + "grad_norm": 7.350055275318429, + "learning_rate": 9.44396811257745e-06, + "loss": 18.2224, + "step": 9684 + }, + { + "epoch": 0.17703401758458698, + "grad_norm": 5.899957783709529, + "learning_rate": 9.443832440439084e-06, + "loss": 17.0868, + "step": 9685 + }, + { + "epoch": 0.1770522967810335, + "grad_norm": 9.15888569103452, + "learning_rate": 9.443696752725473e-06, + "loss": 18.4873, + "step": 9686 + }, + { + "epoch": 0.17707057597748002, + "grad_norm": 7.1445693175578775, + "learning_rate": 9.443561049437089e-06, + "loss": 17.8865, + "step": 9687 + }, + { + "epoch": 0.17708885517392656, + "grad_norm": 8.27896390971788, + "learning_rate": 9.44342533057441e-06, + "loss": 17.904, + "step": 9688 + }, + { + "epoch": 0.17710713437037306, + "grad_norm": 7.739589589633317, + "learning_rate": 9.443289596137909e-06, + "loss": 17.9802, + "step": 9689 + }, + { + "epoch": 0.1771254135668196, + "grad_norm": 6.871999460302412, + "learning_rate": 9.443153846128063e-06, + "loss": 17.5567, + "step": 9690 + }, + { + "epoch": 0.17714369276326614, + "grad_norm": 6.700836374363081, + "learning_rate": 9.443018080545352e-06, + "loss": 17.5755, + "step": 9691 + }, + { + "epoch": 0.17716197195971264, + "grad_norm": 6.385486423826585, + "learning_rate": 9.442882299390246e-06, + "loss": 17.4786, + "step": 9692 + }, + { + "epoch": 0.17718025115615918, + "grad_norm": 7.213933951038367, + "learning_rate": 9.442746502663223e-06, + "loss": 17.8697, + "step": 9693 + }, + { + "epoch": 0.1771985303526057, + "grad_norm": 7.214876349743183, + "learning_rate": 9.442610690364758e-06, + "loss": 17.9732, + "step": 9694 + }, + { + "epoch": 0.17721680954905222, + "grad_norm": 6.9425163586911225, + "learning_rate": 9.44247486249533e-06, + "loss": 17.7903, + "step": 9695 + }, + { + "epoch": 0.17723508874549876, + "grad_norm": 6.178478150841416, + "learning_rate": 9.442339019055412e-06, + "loss": 17.3213, + "step": 9696 + }, + { + "epoch": 0.17725336794194527, + "grad_norm": 6.278235189723746, + "learning_rate": 9.442203160045482e-06, + "loss": 17.541, + "step": 9697 + }, + { + "epoch": 0.1772716471383918, + "grad_norm": 6.115087524574453, + "learning_rate": 9.442067285466014e-06, + "loss": 17.7573, + "step": 9698 + }, + { + "epoch": 0.1772899263348383, + "grad_norm": 7.899599701776045, + "learning_rate": 9.441931395317488e-06, + "loss": 18.0926, + "step": 9699 + }, + { + "epoch": 0.17730820553128485, + "grad_norm": 6.094800422486613, + "learning_rate": 9.441795489600374e-06, + "loss": 17.2838, + "step": 9700 + }, + { + "epoch": 0.17732648472773138, + "grad_norm": 8.220077362304531, + "learning_rate": 9.441659568315156e-06, + "loss": 18.1649, + "step": 9701 + }, + { + "epoch": 0.1773447639241779, + "grad_norm": 8.180979102384269, + "learning_rate": 9.441523631462306e-06, + "loss": 18.0576, + "step": 9702 + }, + { + "epoch": 0.17736304312062443, + "grad_norm": 6.821931812911939, + "learning_rate": 9.4413876790423e-06, + "loss": 17.7972, + "step": 9703 + }, + { + "epoch": 0.17738132231707093, + "grad_norm": 9.030951775341116, + "learning_rate": 9.441251711055616e-06, + "loss": 18.5217, + "step": 9704 + }, + { + "epoch": 0.17739960151351747, + "grad_norm": 6.0484269852928225, + "learning_rate": 9.44111572750273e-06, + "loss": 17.4727, + "step": 9705 + }, + { + "epoch": 0.17741788070996398, + "grad_norm": 7.721039215305776, + "learning_rate": 9.440979728384118e-06, + "loss": 18.0058, + "step": 9706 + }, + { + "epoch": 0.1774361599064105, + "grad_norm": 6.295985288778256, + "learning_rate": 9.440843713700258e-06, + "loss": 17.2709, + "step": 9707 + }, + { + "epoch": 0.17745443910285705, + "grad_norm": 7.319272852063073, + "learning_rate": 9.440707683451627e-06, + "loss": 17.9808, + "step": 9708 + }, + { + "epoch": 0.17747271829930356, + "grad_norm": 9.102364113732065, + "learning_rate": 9.4405716376387e-06, + "loss": 17.7096, + "step": 9709 + }, + { + "epoch": 0.1774909974957501, + "grad_norm": 6.809521540271753, + "learning_rate": 9.440435576261957e-06, + "loss": 17.5893, + "step": 9710 + }, + { + "epoch": 0.1775092766921966, + "grad_norm": 6.852650002439833, + "learning_rate": 9.44029949932187e-06, + "loss": 17.7249, + "step": 9711 + }, + { + "epoch": 0.17752755588864313, + "grad_norm": 6.526419833756175, + "learning_rate": 9.440163406818919e-06, + "loss": 17.6222, + "step": 9712 + }, + { + "epoch": 0.17754583508508967, + "grad_norm": 6.898101318285359, + "learning_rate": 9.44002729875358e-06, + "loss": 17.765, + "step": 9713 + }, + { + "epoch": 0.17756411428153618, + "grad_norm": 6.788442053561319, + "learning_rate": 9.439891175126331e-06, + "loss": 17.6916, + "step": 9714 + }, + { + "epoch": 0.17758239347798271, + "grad_norm": 6.906235154036778, + "learning_rate": 9.43975503593765e-06, + "loss": 17.5038, + "step": 9715 + }, + { + "epoch": 0.17760067267442922, + "grad_norm": 7.042972173507783, + "learning_rate": 9.439618881188014e-06, + "loss": 17.6661, + "step": 9716 + }, + { + "epoch": 0.17761895187087576, + "grad_norm": 7.19275145236644, + "learning_rate": 9.439482710877896e-06, + "loss": 17.5986, + "step": 9717 + }, + { + "epoch": 0.1776372310673223, + "grad_norm": 7.130931651391098, + "learning_rate": 9.439346525007777e-06, + "loss": 17.9464, + "step": 9718 + }, + { + "epoch": 0.1776555102637688, + "grad_norm": 6.67146969845684, + "learning_rate": 9.439210323578134e-06, + "loss": 17.3904, + "step": 9719 + }, + { + "epoch": 0.17767378946021534, + "grad_norm": 7.465071058551338, + "learning_rate": 9.439074106589445e-06, + "loss": 17.5445, + "step": 9720 + }, + { + "epoch": 0.17769206865666184, + "grad_norm": 7.690613516101903, + "learning_rate": 9.438937874042185e-06, + "loss": 17.9147, + "step": 9721 + }, + { + "epoch": 0.17771034785310838, + "grad_norm": 6.656617422977974, + "learning_rate": 9.438801625936832e-06, + "loss": 17.6754, + "step": 9722 + }, + { + "epoch": 0.1777286270495549, + "grad_norm": 7.680997168893982, + "learning_rate": 9.438665362273868e-06, + "loss": 17.6818, + "step": 9723 + }, + { + "epoch": 0.17774690624600142, + "grad_norm": 6.51239015634626, + "learning_rate": 9.438529083053765e-06, + "loss": 17.5307, + "step": 9724 + }, + { + "epoch": 0.17776518544244796, + "grad_norm": 6.603219062765521, + "learning_rate": 9.438392788277002e-06, + "loss": 17.8635, + "step": 9725 + }, + { + "epoch": 0.17778346463889447, + "grad_norm": 7.559472034188811, + "learning_rate": 9.438256477944058e-06, + "loss": 17.8341, + "step": 9726 + }, + { + "epoch": 0.177801743835341, + "grad_norm": 7.86013382771788, + "learning_rate": 9.438120152055413e-06, + "loss": 18.0118, + "step": 9727 + }, + { + "epoch": 0.1778200230317875, + "grad_norm": 6.620496532263314, + "learning_rate": 9.437983810611537e-06, + "loss": 17.5389, + "step": 9728 + }, + { + "epoch": 0.17783830222823405, + "grad_norm": 7.8524726290862175, + "learning_rate": 9.437847453612916e-06, + "loss": 18.0236, + "step": 9729 + }, + { + "epoch": 0.17785658142468058, + "grad_norm": 6.803781577666788, + "learning_rate": 9.437711081060024e-06, + "loss": 17.722, + "step": 9730 + }, + { + "epoch": 0.1778748606211271, + "grad_norm": 6.804629365353588, + "learning_rate": 9.437574692953339e-06, + "loss": 17.4993, + "step": 9731 + }, + { + "epoch": 0.17789313981757363, + "grad_norm": 6.2524073121621715, + "learning_rate": 9.437438289293342e-06, + "loss": 17.3258, + "step": 9732 + }, + { + "epoch": 0.17791141901402013, + "grad_norm": 6.462773309750215, + "learning_rate": 9.437301870080507e-06, + "loss": 17.9451, + "step": 9733 + }, + { + "epoch": 0.17792969821046667, + "grad_norm": 6.460433370820935, + "learning_rate": 9.437165435315315e-06, + "loss": 17.2655, + "step": 9734 + }, + { + "epoch": 0.1779479774069132, + "grad_norm": 7.436405726537002, + "learning_rate": 9.437028984998242e-06, + "loss": 17.7149, + "step": 9735 + }, + { + "epoch": 0.1779662566033597, + "grad_norm": 6.176887549546213, + "learning_rate": 9.436892519129767e-06, + "loss": 17.4163, + "step": 9736 + }, + { + "epoch": 0.17798453579980625, + "grad_norm": 7.730634578467468, + "learning_rate": 9.436756037710371e-06, + "loss": 17.4026, + "step": 9737 + }, + { + "epoch": 0.17800281499625276, + "grad_norm": 5.161988822903628, + "learning_rate": 9.436619540740528e-06, + "loss": 16.8931, + "step": 9738 + }, + { + "epoch": 0.1780210941926993, + "grad_norm": 7.344605573317709, + "learning_rate": 9.436483028220719e-06, + "loss": 17.7463, + "step": 9739 + }, + { + "epoch": 0.1780393733891458, + "grad_norm": 8.046813266600296, + "learning_rate": 9.436346500151423e-06, + "loss": 17.9651, + "step": 9740 + }, + { + "epoch": 0.17805765258559234, + "grad_norm": 6.393431035630223, + "learning_rate": 9.436209956533117e-06, + "loss": 17.5466, + "step": 9741 + }, + { + "epoch": 0.17807593178203887, + "grad_norm": 6.994112434580161, + "learning_rate": 9.436073397366282e-06, + "loss": 17.9602, + "step": 9742 + }, + { + "epoch": 0.17809421097848538, + "grad_norm": 7.571745779934617, + "learning_rate": 9.435936822651391e-06, + "loss": 17.9859, + "step": 9743 + }, + { + "epoch": 0.17811249017493191, + "grad_norm": 7.051017052383482, + "learning_rate": 9.435800232388927e-06, + "loss": 17.8538, + "step": 9744 + }, + { + "epoch": 0.17813076937137842, + "grad_norm": 7.086992505485541, + "learning_rate": 9.43566362657937e-06, + "loss": 17.9082, + "step": 9745 + }, + { + "epoch": 0.17814904856782496, + "grad_norm": 7.575001476737331, + "learning_rate": 9.435527005223197e-06, + "loss": 18.3628, + "step": 9746 + }, + { + "epoch": 0.1781673277642715, + "grad_norm": 6.620470467188751, + "learning_rate": 9.435390368320885e-06, + "loss": 17.7196, + "step": 9747 + }, + { + "epoch": 0.178185606960718, + "grad_norm": 6.767458937127244, + "learning_rate": 9.435253715872917e-06, + "loss": 17.5436, + "step": 9748 + }, + { + "epoch": 0.17820388615716454, + "grad_norm": 7.768517323398791, + "learning_rate": 9.435117047879768e-06, + "loss": 17.5803, + "step": 9749 + }, + { + "epoch": 0.17822216535361105, + "grad_norm": 8.52002998348253, + "learning_rate": 9.434980364341917e-06, + "loss": 18.5091, + "step": 9750 + }, + { + "epoch": 0.17824044455005758, + "grad_norm": 8.60681587905685, + "learning_rate": 9.434843665259847e-06, + "loss": 18.4451, + "step": 9751 + }, + { + "epoch": 0.17825872374650412, + "grad_norm": 6.936536731140007, + "learning_rate": 9.434706950634034e-06, + "loss": 17.6692, + "step": 9752 + }, + { + "epoch": 0.17827700294295062, + "grad_norm": 7.080582829257614, + "learning_rate": 9.434570220464959e-06, + "loss": 17.5401, + "step": 9753 + }, + { + "epoch": 0.17829528213939716, + "grad_norm": 6.9923608229850815, + "learning_rate": 9.434433474753098e-06, + "loss": 17.8878, + "step": 9754 + }, + { + "epoch": 0.17831356133584367, + "grad_norm": 8.90585203556734, + "learning_rate": 9.434296713498934e-06, + "loss": 18.6246, + "step": 9755 + }, + { + "epoch": 0.1783318405322902, + "grad_norm": 7.578519220791576, + "learning_rate": 9.434159936702943e-06, + "loss": 17.9341, + "step": 9756 + }, + { + "epoch": 0.1783501197287367, + "grad_norm": 6.728053978855922, + "learning_rate": 9.434023144365608e-06, + "loss": 17.7731, + "step": 9757 + }, + { + "epoch": 0.17836839892518325, + "grad_norm": 7.003711526559738, + "learning_rate": 9.433886336487407e-06, + "loss": 17.5275, + "step": 9758 + }, + { + "epoch": 0.17838667812162978, + "grad_norm": 8.455193694248191, + "learning_rate": 9.433749513068818e-06, + "loss": 18.0235, + "step": 9759 + }, + { + "epoch": 0.1784049573180763, + "grad_norm": 5.342623445358985, + "learning_rate": 9.433612674110322e-06, + "loss": 16.9604, + "step": 9760 + }, + { + "epoch": 0.17842323651452283, + "grad_norm": 7.356764788393615, + "learning_rate": 9.433475819612399e-06, + "loss": 17.9372, + "step": 9761 + }, + { + "epoch": 0.17844151571096933, + "grad_norm": 7.828348541507714, + "learning_rate": 9.433338949575527e-06, + "loss": 17.8288, + "step": 9762 + }, + { + "epoch": 0.17845979490741587, + "grad_norm": 6.98720409664005, + "learning_rate": 9.433202064000187e-06, + "loss": 17.7981, + "step": 9763 + }, + { + "epoch": 0.1784780741038624, + "grad_norm": 6.788120270191315, + "learning_rate": 9.433065162886859e-06, + "loss": 17.7682, + "step": 9764 + }, + { + "epoch": 0.1784963533003089, + "grad_norm": 5.223666450826554, + "learning_rate": 9.432928246236022e-06, + "loss": 16.8469, + "step": 9765 + }, + { + "epoch": 0.17851463249675545, + "grad_norm": 7.726231461511586, + "learning_rate": 9.432791314048156e-06, + "loss": 18.1611, + "step": 9766 + }, + { + "epoch": 0.17853291169320196, + "grad_norm": 6.963165020375716, + "learning_rate": 9.432654366323741e-06, + "loss": 17.8006, + "step": 9767 + }, + { + "epoch": 0.1785511908896485, + "grad_norm": 5.360391003965571, + "learning_rate": 9.432517403063257e-06, + "loss": 17.1445, + "step": 9768 + }, + { + "epoch": 0.17856947008609503, + "grad_norm": 6.51934534987101, + "learning_rate": 9.432380424267185e-06, + "loss": 17.8409, + "step": 9769 + }, + { + "epoch": 0.17858774928254154, + "grad_norm": 6.2131845811506095, + "learning_rate": 9.432243429936003e-06, + "loss": 17.5967, + "step": 9770 + }, + { + "epoch": 0.17860602847898807, + "grad_norm": 6.490373216880616, + "learning_rate": 9.432106420070193e-06, + "loss": 17.188, + "step": 9771 + }, + { + "epoch": 0.17862430767543458, + "grad_norm": 7.772721584194353, + "learning_rate": 9.431969394670235e-06, + "loss": 18.0158, + "step": 9772 + }, + { + "epoch": 0.17864258687188111, + "grad_norm": 6.6291600442599625, + "learning_rate": 9.431832353736608e-06, + "loss": 17.5063, + "step": 9773 + }, + { + "epoch": 0.17866086606832762, + "grad_norm": 6.337764937297904, + "learning_rate": 9.431695297269794e-06, + "loss": 17.4012, + "step": 9774 + }, + { + "epoch": 0.17867914526477416, + "grad_norm": 5.528648977290598, + "learning_rate": 9.431558225270272e-06, + "loss": 17.1467, + "step": 9775 + }, + { + "epoch": 0.1786974244612207, + "grad_norm": 8.17410182241707, + "learning_rate": 9.431421137738523e-06, + "loss": 18.0794, + "step": 9776 + }, + { + "epoch": 0.1787157036576672, + "grad_norm": 7.257810407204982, + "learning_rate": 9.431284034675029e-06, + "loss": 17.6139, + "step": 9777 + }, + { + "epoch": 0.17873398285411374, + "grad_norm": 7.471120028201146, + "learning_rate": 9.431146916080267e-06, + "loss": 17.7815, + "step": 9778 + }, + { + "epoch": 0.17875226205056025, + "grad_norm": 6.23432741622461, + "learning_rate": 9.431009781954721e-06, + "loss": 17.3911, + "step": 9779 + }, + { + "epoch": 0.17877054124700678, + "grad_norm": 8.843764130326498, + "learning_rate": 9.430872632298868e-06, + "loss": 18.4642, + "step": 9780 + }, + { + "epoch": 0.17878882044345332, + "grad_norm": 6.205504168993172, + "learning_rate": 9.430735467113192e-06, + "loss": 17.344, + "step": 9781 + }, + { + "epoch": 0.17880709963989982, + "grad_norm": 8.572140095617657, + "learning_rate": 9.430598286398174e-06, + "loss": 18.2109, + "step": 9782 + }, + { + "epoch": 0.17882537883634636, + "grad_norm": 9.004655976115545, + "learning_rate": 9.430461090154293e-06, + "loss": 18.4639, + "step": 9783 + }, + { + "epoch": 0.17884365803279287, + "grad_norm": 8.06569636279287, + "learning_rate": 9.43032387838203e-06, + "loss": 18.4043, + "step": 9784 + }, + { + "epoch": 0.1788619372292394, + "grad_norm": 6.832525538322095, + "learning_rate": 9.430186651081865e-06, + "loss": 17.5653, + "step": 9785 + }, + { + "epoch": 0.17888021642568594, + "grad_norm": 7.490859644683718, + "learning_rate": 9.430049408254282e-06, + "loss": 18.0886, + "step": 9786 + }, + { + "epoch": 0.17889849562213245, + "grad_norm": 7.045513759459739, + "learning_rate": 9.429912149899758e-06, + "loss": 17.5878, + "step": 9787 + }, + { + "epoch": 0.17891677481857898, + "grad_norm": 8.059792695285463, + "learning_rate": 9.429774876018779e-06, + "loss": 18.1043, + "step": 9788 + }, + { + "epoch": 0.1789350540150255, + "grad_norm": 6.0787364962803645, + "learning_rate": 9.429637586611822e-06, + "loss": 17.3551, + "step": 9789 + }, + { + "epoch": 0.17895333321147203, + "grad_norm": 4.943654424937247, + "learning_rate": 9.42950028167937e-06, + "loss": 16.8844, + "step": 9790 + }, + { + "epoch": 0.17897161240791853, + "grad_norm": 6.14956192712607, + "learning_rate": 9.429362961221904e-06, + "loss": 17.6326, + "step": 9791 + }, + { + "epoch": 0.17898989160436507, + "grad_norm": 8.847278040251075, + "learning_rate": 9.429225625239906e-06, + "loss": 18.5878, + "step": 9792 + }, + { + "epoch": 0.1790081708008116, + "grad_norm": 10.696567532179113, + "learning_rate": 9.429088273733855e-06, + "loss": 18.5805, + "step": 9793 + }, + { + "epoch": 0.1790264499972581, + "grad_norm": 6.926405386042904, + "learning_rate": 9.428950906704234e-06, + "loss": 17.7401, + "step": 9794 + }, + { + "epoch": 0.17904472919370465, + "grad_norm": 6.489936352810286, + "learning_rate": 9.428813524151525e-06, + "loss": 17.5293, + "step": 9795 + }, + { + "epoch": 0.17906300839015116, + "grad_norm": 6.964940518291793, + "learning_rate": 9.428676126076208e-06, + "loss": 17.8764, + "step": 9796 + }, + { + "epoch": 0.1790812875865977, + "grad_norm": 7.295689182900891, + "learning_rate": 9.428538712478767e-06, + "loss": 17.7729, + "step": 9797 + }, + { + "epoch": 0.17909956678304423, + "grad_norm": 7.437427660905084, + "learning_rate": 9.428401283359682e-06, + "loss": 17.875, + "step": 9798 + }, + { + "epoch": 0.17911784597949074, + "grad_norm": 7.679807104368676, + "learning_rate": 9.428263838719434e-06, + "loss": 17.7642, + "step": 9799 + }, + { + "epoch": 0.17913612517593727, + "grad_norm": 6.1585842593847895, + "learning_rate": 9.428126378558506e-06, + "loss": 17.3325, + "step": 9800 + }, + { + "epoch": 0.17915440437238378, + "grad_norm": 7.7743560244330405, + "learning_rate": 9.427988902877378e-06, + "loss": 17.3411, + "step": 9801 + }, + { + "epoch": 0.17917268356883032, + "grad_norm": 6.914870919299745, + "learning_rate": 9.427851411676535e-06, + "loss": 17.7374, + "step": 9802 + }, + { + "epoch": 0.17919096276527685, + "grad_norm": 7.269947505575387, + "learning_rate": 9.427713904956455e-06, + "loss": 17.6458, + "step": 9803 + }, + { + "epoch": 0.17920924196172336, + "grad_norm": 7.7465526450080375, + "learning_rate": 9.427576382717624e-06, + "loss": 18.127, + "step": 9804 + }, + { + "epoch": 0.1792275211581699, + "grad_norm": 7.438328267476282, + "learning_rate": 9.427438844960521e-06, + "loss": 17.9759, + "step": 9805 + }, + { + "epoch": 0.1792458003546164, + "grad_norm": 6.431880029486637, + "learning_rate": 9.42730129168563e-06, + "loss": 17.5455, + "step": 9806 + }, + { + "epoch": 0.17926407955106294, + "grad_norm": 7.497073928030626, + "learning_rate": 9.42716372289343e-06, + "loss": 17.7965, + "step": 9807 + }, + { + "epoch": 0.17928235874750945, + "grad_norm": 6.6020967923882345, + "learning_rate": 9.427026138584408e-06, + "loss": 17.5642, + "step": 9808 + }, + { + "epoch": 0.17930063794395598, + "grad_norm": 7.76060879963446, + "learning_rate": 9.426888538759042e-06, + "loss": 17.9669, + "step": 9809 + }, + { + "epoch": 0.17931891714040252, + "grad_norm": 7.807309750568793, + "learning_rate": 9.426750923417815e-06, + "loss": 18.3616, + "step": 9810 + }, + { + "epoch": 0.17933719633684903, + "grad_norm": 7.828851331167055, + "learning_rate": 9.42661329256121e-06, + "loss": 18.0805, + "step": 9811 + }, + { + "epoch": 0.17935547553329556, + "grad_norm": 6.59582425984953, + "learning_rate": 9.426475646189713e-06, + "loss": 17.4784, + "step": 9812 + }, + { + "epoch": 0.17937375472974207, + "grad_norm": 7.099462090867203, + "learning_rate": 9.426337984303799e-06, + "loss": 17.8802, + "step": 9813 + }, + { + "epoch": 0.1793920339261886, + "grad_norm": 8.122302428401728, + "learning_rate": 9.426200306903957e-06, + "loss": 18.4685, + "step": 9814 + }, + { + "epoch": 0.17941031312263514, + "grad_norm": 6.179400084084576, + "learning_rate": 9.426062613990667e-06, + "loss": 17.3515, + "step": 9815 + }, + { + "epoch": 0.17942859231908165, + "grad_norm": 7.911103376841772, + "learning_rate": 9.42592490556441e-06, + "loss": 18.0463, + "step": 9816 + }, + { + "epoch": 0.17944687151552818, + "grad_norm": 7.205215188326355, + "learning_rate": 9.425787181625671e-06, + "loss": 17.8049, + "step": 9817 + }, + { + "epoch": 0.1794651507119747, + "grad_norm": 6.062865053906306, + "learning_rate": 9.425649442174933e-06, + "loss": 17.1905, + "step": 9818 + }, + { + "epoch": 0.17948342990842123, + "grad_norm": 7.380725263172451, + "learning_rate": 9.425511687212677e-06, + "loss": 18.1633, + "step": 9819 + }, + { + "epoch": 0.17950170910486776, + "grad_norm": 6.652388341966013, + "learning_rate": 9.425373916739384e-06, + "loss": 17.4928, + "step": 9820 + }, + { + "epoch": 0.17951998830131427, + "grad_norm": 6.9587652781673555, + "learning_rate": 9.425236130755544e-06, + "loss": 17.7109, + "step": 9821 + }, + { + "epoch": 0.1795382674977608, + "grad_norm": 6.35310895058361, + "learning_rate": 9.425098329261632e-06, + "loss": 17.3162, + "step": 9822 + }, + { + "epoch": 0.17955654669420731, + "grad_norm": 7.297330906963598, + "learning_rate": 9.424960512258136e-06, + "loss": 18.0997, + "step": 9823 + }, + { + "epoch": 0.17957482589065385, + "grad_norm": 6.775515192904784, + "learning_rate": 9.424822679745536e-06, + "loss": 17.8178, + "step": 9824 + }, + { + "epoch": 0.17959310508710036, + "grad_norm": 9.758082204390167, + "learning_rate": 9.424684831724318e-06, + "loss": 18.8286, + "step": 9825 + }, + { + "epoch": 0.1796113842835469, + "grad_norm": 7.322342902333123, + "learning_rate": 9.424546968194963e-06, + "loss": 17.5736, + "step": 9826 + }, + { + "epoch": 0.17962966347999343, + "grad_norm": 5.823542098051173, + "learning_rate": 9.424409089157955e-06, + "loss": 17.1623, + "step": 9827 + }, + { + "epoch": 0.17964794267643994, + "grad_norm": 7.162051981471785, + "learning_rate": 9.424271194613776e-06, + "loss": 17.82, + "step": 9828 + }, + { + "epoch": 0.17966622187288647, + "grad_norm": 6.903782555726466, + "learning_rate": 9.424133284562911e-06, + "loss": 17.8435, + "step": 9829 + }, + { + "epoch": 0.17968450106933298, + "grad_norm": 7.52178146204798, + "learning_rate": 9.423995359005844e-06, + "loss": 17.5393, + "step": 9830 + }, + { + "epoch": 0.17970278026577952, + "grad_norm": 6.0192047068100765, + "learning_rate": 9.423857417943057e-06, + "loss": 17.2282, + "step": 9831 + }, + { + "epoch": 0.17972105946222605, + "grad_norm": 6.631329068271743, + "learning_rate": 9.423719461375031e-06, + "loss": 17.7655, + "step": 9832 + }, + { + "epoch": 0.17973933865867256, + "grad_norm": 5.770385109698054, + "learning_rate": 9.423581489302255e-06, + "loss": 17.3378, + "step": 9833 + }, + { + "epoch": 0.1797576178551191, + "grad_norm": 6.433071754790578, + "learning_rate": 9.423443501725209e-06, + "loss": 17.3947, + "step": 9834 + }, + { + "epoch": 0.1797758970515656, + "grad_norm": 7.123725461827745, + "learning_rate": 9.423305498644376e-06, + "loss": 17.3041, + "step": 9835 + }, + { + "epoch": 0.17979417624801214, + "grad_norm": 6.843623403109756, + "learning_rate": 9.423167480060242e-06, + "loss": 17.6027, + "step": 9836 + }, + { + "epoch": 0.17981245544445867, + "grad_norm": 6.312874700147576, + "learning_rate": 9.423029445973291e-06, + "loss": 17.2771, + "step": 9837 + }, + { + "epoch": 0.17983073464090518, + "grad_norm": 6.790106535741972, + "learning_rate": 9.422891396384004e-06, + "loss": 17.3934, + "step": 9838 + }, + { + "epoch": 0.17984901383735172, + "grad_norm": 6.414037164648971, + "learning_rate": 9.422753331292867e-06, + "loss": 17.6646, + "step": 9839 + }, + { + "epoch": 0.17986729303379823, + "grad_norm": 7.2004252066756615, + "learning_rate": 9.422615250700363e-06, + "loss": 17.9489, + "step": 9840 + }, + { + "epoch": 0.17988557223024476, + "grad_norm": 7.387366300328245, + "learning_rate": 9.422477154606978e-06, + "loss": 17.9723, + "step": 9841 + }, + { + "epoch": 0.17990385142669127, + "grad_norm": 6.858661119487742, + "learning_rate": 9.422339043013192e-06, + "loss": 17.6167, + "step": 9842 + }, + { + "epoch": 0.1799221306231378, + "grad_norm": 6.1255287752906336, + "learning_rate": 9.422200915919493e-06, + "loss": 17.2448, + "step": 9843 + }, + { + "epoch": 0.17994040981958434, + "grad_norm": 6.7525066962578935, + "learning_rate": 9.422062773326361e-06, + "loss": 17.758, + "step": 9844 + }, + { + "epoch": 0.17995868901603085, + "grad_norm": 7.1877365319482855, + "learning_rate": 9.421924615234286e-06, + "loss": 17.9013, + "step": 9845 + }, + { + "epoch": 0.17997696821247738, + "grad_norm": 8.481912440458515, + "learning_rate": 9.421786441643748e-06, + "loss": 18.6036, + "step": 9846 + }, + { + "epoch": 0.1799952474089239, + "grad_norm": 6.284005726091458, + "learning_rate": 9.42164825255523e-06, + "loss": 17.2517, + "step": 9847 + }, + { + "epoch": 0.18001352660537043, + "grad_norm": 6.2068072418850395, + "learning_rate": 9.421510047969223e-06, + "loss": 17.4497, + "step": 9848 + }, + { + "epoch": 0.18003180580181696, + "grad_norm": 6.416446149254776, + "learning_rate": 9.421371827886203e-06, + "loss": 17.3684, + "step": 9849 + }, + { + "epoch": 0.18005008499826347, + "grad_norm": 6.15143758420953, + "learning_rate": 9.42123359230666e-06, + "loss": 17.264, + "step": 9850 + }, + { + "epoch": 0.18006836419471, + "grad_norm": 7.432611704220462, + "learning_rate": 9.421095341231077e-06, + "loss": 17.6573, + "step": 9851 + }, + { + "epoch": 0.18008664339115651, + "grad_norm": 6.965229914547005, + "learning_rate": 9.420957074659938e-06, + "loss": 18.2525, + "step": 9852 + }, + { + "epoch": 0.18010492258760305, + "grad_norm": 6.079438368269971, + "learning_rate": 9.420818792593729e-06, + "loss": 17.3922, + "step": 9853 + }, + { + "epoch": 0.18012320178404959, + "grad_norm": 7.949627215604142, + "learning_rate": 9.420680495032932e-06, + "loss": 18.1077, + "step": 9854 + }, + { + "epoch": 0.1801414809804961, + "grad_norm": 6.430165678416542, + "learning_rate": 9.420542181978034e-06, + "loss": 17.4384, + "step": 9855 + }, + { + "epoch": 0.18015976017694263, + "grad_norm": 8.106001315581224, + "learning_rate": 9.42040385342952e-06, + "loss": 18.1259, + "step": 9856 + }, + { + "epoch": 0.18017803937338914, + "grad_norm": 6.931273081965224, + "learning_rate": 9.420265509387874e-06, + "loss": 17.6603, + "step": 9857 + }, + { + "epoch": 0.18019631856983567, + "grad_norm": 7.860322040698806, + "learning_rate": 9.420127149853581e-06, + "loss": 17.6595, + "step": 9858 + }, + { + "epoch": 0.18021459776628218, + "grad_norm": 7.522948316284845, + "learning_rate": 9.419988774827126e-06, + "loss": 17.9882, + "step": 9859 + }, + { + "epoch": 0.18023287696272872, + "grad_norm": 6.3089213056183535, + "learning_rate": 9.419850384308993e-06, + "loss": 17.4897, + "step": 9860 + }, + { + "epoch": 0.18025115615917525, + "grad_norm": 7.5995078159913225, + "learning_rate": 9.419711978299668e-06, + "loss": 17.979, + "step": 9861 + }, + { + "epoch": 0.18026943535562176, + "grad_norm": 6.3541903338947066, + "learning_rate": 9.419573556799637e-06, + "loss": 17.4696, + "step": 9862 + }, + { + "epoch": 0.1802877145520683, + "grad_norm": 6.597222063316642, + "learning_rate": 9.419435119809384e-06, + "loss": 17.7096, + "step": 9863 + }, + { + "epoch": 0.1803059937485148, + "grad_norm": 6.657552047121865, + "learning_rate": 9.419296667329394e-06, + "loss": 17.4063, + "step": 9864 + }, + { + "epoch": 0.18032427294496134, + "grad_norm": 6.959719643856069, + "learning_rate": 9.419158199360153e-06, + "loss": 17.5825, + "step": 9865 + }, + { + "epoch": 0.18034255214140787, + "grad_norm": 6.589167108452301, + "learning_rate": 9.419019715902146e-06, + "loss": 17.6088, + "step": 9866 + }, + { + "epoch": 0.18036083133785438, + "grad_norm": 6.289589791858059, + "learning_rate": 9.418881216955858e-06, + "loss": 17.3522, + "step": 9867 + }, + { + "epoch": 0.18037911053430092, + "grad_norm": 6.72399211882093, + "learning_rate": 9.418742702521774e-06, + "loss": 17.2417, + "step": 9868 + }, + { + "epoch": 0.18039738973074743, + "grad_norm": 7.565165408967926, + "learning_rate": 9.418604172600382e-06, + "loss": 17.848, + "step": 9869 + }, + { + "epoch": 0.18041566892719396, + "grad_norm": 5.029258380477208, + "learning_rate": 9.418465627192165e-06, + "loss": 16.9919, + "step": 9870 + }, + { + "epoch": 0.1804339481236405, + "grad_norm": 6.991090099572182, + "learning_rate": 9.41832706629761e-06, + "loss": 17.768, + "step": 9871 + }, + { + "epoch": 0.180452227320087, + "grad_norm": 6.551253626491615, + "learning_rate": 9.418188489917202e-06, + "loss": 17.4717, + "step": 9872 + }, + { + "epoch": 0.18047050651653354, + "grad_norm": 5.915775501322046, + "learning_rate": 9.418049898051425e-06, + "loss": 17.1038, + "step": 9873 + }, + { + "epoch": 0.18048878571298005, + "grad_norm": 7.9635352828765225, + "learning_rate": 9.417911290700767e-06, + "loss": 18.2166, + "step": 9874 + }, + { + "epoch": 0.18050706490942658, + "grad_norm": 7.690572860183641, + "learning_rate": 9.417772667865714e-06, + "loss": 18.1678, + "step": 9875 + }, + { + "epoch": 0.1805253441058731, + "grad_norm": 6.588647620562926, + "learning_rate": 9.417634029546751e-06, + "loss": 17.3728, + "step": 9876 + }, + { + "epoch": 0.18054362330231963, + "grad_norm": 6.904329940733219, + "learning_rate": 9.417495375744365e-06, + "loss": 17.7944, + "step": 9877 + }, + { + "epoch": 0.18056190249876616, + "grad_norm": 5.317146962442455, + "learning_rate": 9.41735670645904e-06, + "loss": 16.975, + "step": 9878 + }, + { + "epoch": 0.18058018169521267, + "grad_norm": 7.1684649269101195, + "learning_rate": 9.417218021691263e-06, + "loss": 17.7719, + "step": 9879 + }, + { + "epoch": 0.1805984608916592, + "grad_norm": 5.861741382044178, + "learning_rate": 9.417079321441522e-06, + "loss": 17.4348, + "step": 9880 + }, + { + "epoch": 0.18061674008810572, + "grad_norm": 7.671495340190093, + "learning_rate": 9.416940605710298e-06, + "loss": 18.0847, + "step": 9881 + }, + { + "epoch": 0.18063501928455225, + "grad_norm": 7.011991807235905, + "learning_rate": 9.416801874498082e-06, + "loss": 17.7374, + "step": 9882 + }, + { + "epoch": 0.1806532984809988, + "grad_norm": 6.787270191127173, + "learning_rate": 9.41666312780536e-06, + "loss": 17.9021, + "step": 9883 + }, + { + "epoch": 0.1806715776774453, + "grad_norm": 7.500985617097195, + "learning_rate": 9.416524365632615e-06, + "loss": 18.2551, + "step": 9884 + }, + { + "epoch": 0.18068985687389183, + "grad_norm": 6.540146133715169, + "learning_rate": 9.416385587980337e-06, + "loss": 17.3675, + "step": 9885 + }, + { + "epoch": 0.18070813607033834, + "grad_norm": 6.088448734189069, + "learning_rate": 9.41624679484901e-06, + "loss": 17.3463, + "step": 9886 + }, + { + "epoch": 0.18072641526678487, + "grad_norm": 7.054512600951021, + "learning_rate": 9.416107986239121e-06, + "loss": 17.8378, + "step": 9887 + }, + { + "epoch": 0.1807446944632314, + "grad_norm": 6.275529072292613, + "learning_rate": 9.415969162151157e-06, + "loss": 17.5825, + "step": 9888 + }, + { + "epoch": 0.18076297365967792, + "grad_norm": 5.658267038377804, + "learning_rate": 9.415830322585604e-06, + "loss": 17.0669, + "step": 9889 + }, + { + "epoch": 0.18078125285612445, + "grad_norm": 8.489460603910983, + "learning_rate": 9.415691467542948e-06, + "loss": 18.1464, + "step": 9890 + }, + { + "epoch": 0.18079953205257096, + "grad_norm": 6.28354033344867, + "learning_rate": 9.415552597023679e-06, + "loss": 17.5069, + "step": 9891 + }, + { + "epoch": 0.1808178112490175, + "grad_norm": 8.075302061599936, + "learning_rate": 9.41541371102828e-06, + "loss": 17.9517, + "step": 9892 + }, + { + "epoch": 0.180836090445464, + "grad_norm": 6.797481229337563, + "learning_rate": 9.41527480955724e-06, + "loss": 17.6253, + "step": 9893 + }, + { + "epoch": 0.18085436964191054, + "grad_norm": 6.691582855088232, + "learning_rate": 9.415135892611043e-06, + "loss": 17.5782, + "step": 9894 + }, + { + "epoch": 0.18087264883835708, + "grad_norm": 8.017505693934982, + "learning_rate": 9.414996960190179e-06, + "loss": 18.2248, + "step": 9895 + }, + { + "epoch": 0.18089092803480358, + "grad_norm": 8.452980092894855, + "learning_rate": 9.414858012295134e-06, + "loss": 18.584, + "step": 9896 + }, + { + "epoch": 0.18090920723125012, + "grad_norm": 6.917700380185664, + "learning_rate": 9.414719048926393e-06, + "loss": 17.5312, + "step": 9897 + }, + { + "epoch": 0.18092748642769663, + "grad_norm": 6.910260643563813, + "learning_rate": 9.414580070084446e-06, + "loss": 17.5055, + "step": 9898 + }, + { + "epoch": 0.18094576562414316, + "grad_norm": 10.117433650943333, + "learning_rate": 9.41444107576978e-06, + "loss": 18.4476, + "step": 9899 + }, + { + "epoch": 0.1809640448205897, + "grad_norm": 7.18260649555454, + "learning_rate": 9.414302065982882e-06, + "loss": 17.9295, + "step": 9900 + }, + { + "epoch": 0.1809823240170362, + "grad_norm": 6.440786598596819, + "learning_rate": 9.414163040724235e-06, + "loss": 17.7439, + "step": 9901 + }, + { + "epoch": 0.18100060321348274, + "grad_norm": 7.063733425870379, + "learning_rate": 9.414023999994332e-06, + "loss": 17.7516, + "step": 9902 + }, + { + "epoch": 0.18101888240992925, + "grad_norm": 9.131458627224548, + "learning_rate": 9.413884943793657e-06, + "loss": 18.7895, + "step": 9903 + }, + { + "epoch": 0.18103716160637578, + "grad_norm": 6.618670036736652, + "learning_rate": 9.413745872122698e-06, + "loss": 17.6756, + "step": 9904 + }, + { + "epoch": 0.18105544080282232, + "grad_norm": 7.358234377721218, + "learning_rate": 9.413606784981943e-06, + "loss": 17.6505, + "step": 9905 + }, + { + "epoch": 0.18107371999926883, + "grad_norm": 6.151961923717866, + "learning_rate": 9.413467682371879e-06, + "loss": 17.4507, + "step": 9906 + }, + { + "epoch": 0.18109199919571536, + "grad_norm": 7.212741272650908, + "learning_rate": 9.413328564292994e-06, + "loss": 17.6875, + "step": 9907 + }, + { + "epoch": 0.18111027839216187, + "grad_norm": 8.969986783784599, + "learning_rate": 9.413189430745776e-06, + "loss": 17.6648, + "step": 9908 + }, + { + "epoch": 0.1811285575886084, + "grad_norm": 8.318671839400224, + "learning_rate": 9.413050281730712e-06, + "loss": 18.4372, + "step": 9909 + }, + { + "epoch": 0.18114683678505492, + "grad_norm": 7.071225881915424, + "learning_rate": 9.412911117248289e-06, + "loss": 17.5117, + "step": 9910 + }, + { + "epoch": 0.18116511598150145, + "grad_norm": 6.090548068212628, + "learning_rate": 9.412771937298995e-06, + "loss": 17.4232, + "step": 9911 + }, + { + "epoch": 0.181183395177948, + "grad_norm": 7.119644974550266, + "learning_rate": 9.412632741883319e-06, + "loss": 17.7013, + "step": 9912 + }, + { + "epoch": 0.1812016743743945, + "grad_norm": 7.907039076382293, + "learning_rate": 9.412493531001747e-06, + "loss": 18.181, + "step": 9913 + }, + { + "epoch": 0.18121995357084103, + "grad_norm": 7.043056196251649, + "learning_rate": 9.41235430465477e-06, + "loss": 17.7467, + "step": 9914 + }, + { + "epoch": 0.18123823276728754, + "grad_norm": 7.077150538842783, + "learning_rate": 9.412215062842872e-06, + "loss": 17.6708, + "step": 9915 + }, + { + "epoch": 0.18125651196373407, + "grad_norm": 9.17201448218564, + "learning_rate": 9.412075805566545e-06, + "loss": 18.8287, + "step": 9916 + }, + { + "epoch": 0.1812747911601806, + "grad_norm": 5.606994231286592, + "learning_rate": 9.411936532826274e-06, + "loss": 17.4046, + "step": 9917 + }, + { + "epoch": 0.18129307035662712, + "grad_norm": 6.554508488550973, + "learning_rate": 9.41179724462255e-06, + "loss": 17.5461, + "step": 9918 + }, + { + "epoch": 0.18131134955307365, + "grad_norm": 7.503180711274367, + "learning_rate": 9.411657940955858e-06, + "loss": 17.7825, + "step": 9919 + }, + { + "epoch": 0.18132962874952016, + "grad_norm": 6.995120372790944, + "learning_rate": 9.411518621826687e-06, + "loss": 17.9555, + "step": 9920 + }, + { + "epoch": 0.1813479079459667, + "grad_norm": 7.139291895107338, + "learning_rate": 9.411379287235527e-06, + "loss": 17.5175, + "step": 9921 + }, + { + "epoch": 0.18136618714241323, + "grad_norm": 7.344403626673978, + "learning_rate": 9.411239937182866e-06, + "loss": 17.9815, + "step": 9922 + }, + { + "epoch": 0.18138446633885974, + "grad_norm": 6.886892858166152, + "learning_rate": 9.411100571669192e-06, + "loss": 17.9211, + "step": 9923 + }, + { + "epoch": 0.18140274553530628, + "grad_norm": 6.923457077091651, + "learning_rate": 9.41096119069499e-06, + "loss": 18.0434, + "step": 9924 + }, + { + "epoch": 0.18142102473175278, + "grad_norm": 7.035732712622207, + "learning_rate": 9.410821794260756e-06, + "loss": 17.9492, + "step": 9925 + }, + { + "epoch": 0.18143930392819932, + "grad_norm": 7.3410971156511415, + "learning_rate": 9.410682382366973e-06, + "loss": 17.9117, + "step": 9926 + }, + { + "epoch": 0.18145758312464583, + "grad_norm": 7.014893919851392, + "learning_rate": 9.410542955014131e-06, + "loss": 17.8421, + "step": 9927 + }, + { + "epoch": 0.18147586232109236, + "grad_norm": 8.667741479907686, + "learning_rate": 9.410403512202718e-06, + "loss": 18.7418, + "step": 9928 + }, + { + "epoch": 0.1814941415175389, + "grad_norm": 7.676598377016749, + "learning_rate": 9.410264053933222e-06, + "loss": 17.7935, + "step": 9929 + }, + { + "epoch": 0.1815124207139854, + "grad_norm": 9.793170691535114, + "learning_rate": 9.410124580206136e-06, + "loss": 17.8563, + "step": 9930 + }, + { + "epoch": 0.18153069991043194, + "grad_norm": 6.292817351753322, + "learning_rate": 9.409985091021944e-06, + "loss": 17.4834, + "step": 9931 + }, + { + "epoch": 0.18154897910687845, + "grad_norm": 5.9271806986499005, + "learning_rate": 9.409845586381139e-06, + "loss": 17.2664, + "step": 9932 + }, + { + "epoch": 0.18156725830332499, + "grad_norm": 6.017048522299535, + "learning_rate": 9.409706066284206e-06, + "loss": 17.3449, + "step": 9933 + }, + { + "epoch": 0.18158553749977152, + "grad_norm": 6.069216586223563, + "learning_rate": 9.409566530731638e-06, + "loss": 17.3073, + "step": 9934 + }, + { + "epoch": 0.18160381669621803, + "grad_norm": 6.3845350850509615, + "learning_rate": 9.409426979723919e-06, + "loss": 17.4427, + "step": 9935 + }, + { + "epoch": 0.18162209589266456, + "grad_norm": 5.896983928031587, + "learning_rate": 9.409287413261543e-06, + "loss": 17.2706, + "step": 9936 + }, + { + "epoch": 0.18164037508911107, + "grad_norm": 6.096302848327695, + "learning_rate": 9.409147831344997e-06, + "loss": 17.468, + "step": 9937 + }, + { + "epoch": 0.1816586542855576, + "grad_norm": 7.979016556832373, + "learning_rate": 9.40900823397477e-06, + "loss": 18.0505, + "step": 9938 + }, + { + "epoch": 0.18167693348200414, + "grad_norm": 7.068405087083261, + "learning_rate": 9.408868621151352e-06, + "loss": 17.8445, + "step": 9939 + }, + { + "epoch": 0.18169521267845065, + "grad_norm": 6.774797892285199, + "learning_rate": 9.408728992875233e-06, + "loss": 17.6941, + "step": 9940 + }, + { + "epoch": 0.1817134918748972, + "grad_norm": 6.888916109967754, + "learning_rate": 9.408589349146901e-06, + "loss": 17.7118, + "step": 9941 + }, + { + "epoch": 0.1817317710713437, + "grad_norm": 6.737448231143625, + "learning_rate": 9.408449689966845e-06, + "loss": 17.4894, + "step": 9942 + }, + { + "epoch": 0.18175005026779023, + "grad_norm": 6.93402269608677, + "learning_rate": 9.408310015335555e-06, + "loss": 17.6713, + "step": 9943 + }, + { + "epoch": 0.18176832946423674, + "grad_norm": 7.468910853899415, + "learning_rate": 9.408170325253524e-06, + "loss": 17.9388, + "step": 9944 + }, + { + "epoch": 0.18178660866068327, + "grad_norm": 6.567975554918502, + "learning_rate": 9.408030619721235e-06, + "loss": 17.6042, + "step": 9945 + }, + { + "epoch": 0.1818048878571298, + "grad_norm": 7.991804699293005, + "learning_rate": 9.407890898739182e-06, + "loss": 18.2704, + "step": 9946 + }, + { + "epoch": 0.18182316705357632, + "grad_norm": 6.919307371545031, + "learning_rate": 9.407751162307855e-06, + "loss": 17.7989, + "step": 9947 + }, + { + "epoch": 0.18184144625002285, + "grad_norm": 6.082884753677575, + "learning_rate": 9.407611410427742e-06, + "loss": 17.2717, + "step": 9948 + }, + { + "epoch": 0.18185972544646936, + "grad_norm": 7.396876469407254, + "learning_rate": 9.407471643099333e-06, + "loss": 17.7669, + "step": 9949 + }, + { + "epoch": 0.1818780046429159, + "grad_norm": 6.193139067930993, + "learning_rate": 9.407331860323118e-06, + "loss": 17.1319, + "step": 9950 + }, + { + "epoch": 0.18189628383936243, + "grad_norm": 7.05041214785699, + "learning_rate": 9.407192062099589e-06, + "loss": 17.6034, + "step": 9951 + }, + { + "epoch": 0.18191456303580894, + "grad_norm": 7.293537176686419, + "learning_rate": 9.407052248429234e-06, + "loss": 17.9462, + "step": 9952 + }, + { + "epoch": 0.18193284223225548, + "grad_norm": 5.572292741725751, + "learning_rate": 9.406912419312543e-06, + "loss": 17.0839, + "step": 9953 + }, + { + "epoch": 0.18195112142870198, + "grad_norm": 5.805385138851213, + "learning_rate": 9.406772574750006e-06, + "loss": 17.2494, + "step": 9954 + }, + { + "epoch": 0.18196940062514852, + "grad_norm": 7.218236653398646, + "learning_rate": 9.406632714742115e-06, + "loss": 17.6228, + "step": 9955 + }, + { + "epoch": 0.18198767982159506, + "grad_norm": 5.406273098967501, + "learning_rate": 9.406492839289355e-06, + "loss": 17.088, + "step": 9956 + }, + { + "epoch": 0.18200595901804156, + "grad_norm": 8.057469465486134, + "learning_rate": 9.406352948392224e-06, + "loss": 18.4128, + "step": 9957 + }, + { + "epoch": 0.1820242382144881, + "grad_norm": 8.223652800163661, + "learning_rate": 9.406213042051207e-06, + "loss": 17.7673, + "step": 9958 + }, + { + "epoch": 0.1820425174109346, + "grad_norm": 6.334895683420072, + "learning_rate": 9.406073120266794e-06, + "loss": 17.4154, + "step": 9959 + }, + { + "epoch": 0.18206079660738114, + "grad_norm": 7.323503860436332, + "learning_rate": 9.405933183039479e-06, + "loss": 18.0966, + "step": 9960 + }, + { + "epoch": 0.18207907580382765, + "grad_norm": 6.70794441563754, + "learning_rate": 9.40579323036975e-06, + "loss": 17.5014, + "step": 9961 + }, + { + "epoch": 0.18209735500027419, + "grad_norm": 7.085619004783449, + "learning_rate": 9.405653262258097e-06, + "loss": 17.9702, + "step": 9962 + }, + { + "epoch": 0.18211563419672072, + "grad_norm": 7.806110273938615, + "learning_rate": 9.405513278705013e-06, + "loss": 18.001, + "step": 9963 + }, + { + "epoch": 0.18213391339316723, + "grad_norm": 6.311410983703041, + "learning_rate": 9.405373279710988e-06, + "loss": 17.3548, + "step": 9964 + }, + { + "epoch": 0.18215219258961377, + "grad_norm": 7.53607579749945, + "learning_rate": 9.40523326527651e-06, + "loss": 17.7716, + "step": 9965 + }, + { + "epoch": 0.18217047178606027, + "grad_norm": 7.118059738999945, + "learning_rate": 9.405093235402072e-06, + "loss": 17.9583, + "step": 9966 + }, + { + "epoch": 0.1821887509825068, + "grad_norm": 6.554212662623746, + "learning_rate": 9.404953190088165e-06, + "loss": 17.8426, + "step": 9967 + }, + { + "epoch": 0.18220703017895334, + "grad_norm": 5.7344565315641916, + "learning_rate": 9.40481312933528e-06, + "loss": 17.0365, + "step": 9968 + }, + { + "epoch": 0.18222530937539985, + "grad_norm": 7.77142609011555, + "learning_rate": 9.404673053143905e-06, + "loss": 17.7052, + "step": 9969 + }, + { + "epoch": 0.1822435885718464, + "grad_norm": 6.707515501917969, + "learning_rate": 9.404532961514536e-06, + "loss": 17.8102, + "step": 9970 + }, + { + "epoch": 0.1822618677682929, + "grad_norm": 5.616284311175807, + "learning_rate": 9.40439285444766e-06, + "loss": 17.2327, + "step": 9971 + }, + { + "epoch": 0.18228014696473943, + "grad_norm": 7.549665088110754, + "learning_rate": 9.404252731943768e-06, + "loss": 17.9735, + "step": 9972 + }, + { + "epoch": 0.18229842616118597, + "grad_norm": 8.599801538164002, + "learning_rate": 9.404112594003353e-06, + "loss": 18.1074, + "step": 9973 + }, + { + "epoch": 0.18231670535763247, + "grad_norm": 7.606886871850189, + "learning_rate": 9.403972440626907e-06, + "loss": 18.0169, + "step": 9974 + }, + { + "epoch": 0.182334984554079, + "grad_norm": 6.391215479650936, + "learning_rate": 9.403832271814918e-06, + "loss": 17.5384, + "step": 9975 + }, + { + "epoch": 0.18235326375052552, + "grad_norm": 5.869188700593644, + "learning_rate": 9.40369208756788e-06, + "loss": 17.2025, + "step": 9976 + }, + { + "epoch": 0.18237154294697205, + "grad_norm": 7.249672643884424, + "learning_rate": 9.403551887886282e-06, + "loss": 17.8932, + "step": 9977 + }, + { + "epoch": 0.18238982214341856, + "grad_norm": 7.626645051386975, + "learning_rate": 9.403411672770618e-06, + "loss": 17.8717, + "step": 9978 + }, + { + "epoch": 0.1824081013398651, + "grad_norm": 7.5216968414726955, + "learning_rate": 9.403271442221378e-06, + "loss": 18.1983, + "step": 9979 + }, + { + "epoch": 0.18242638053631163, + "grad_norm": 6.94418319476342, + "learning_rate": 9.403131196239053e-06, + "loss": 17.8647, + "step": 9980 + }, + { + "epoch": 0.18244465973275814, + "grad_norm": 7.65154381549128, + "learning_rate": 9.402990934824137e-06, + "loss": 17.8206, + "step": 9981 + }, + { + "epoch": 0.18246293892920468, + "grad_norm": 7.73998953084367, + "learning_rate": 9.402850657977119e-06, + "loss": 17.909, + "step": 9982 + }, + { + "epoch": 0.18248121812565118, + "grad_norm": 7.880188873447152, + "learning_rate": 9.40271036569849e-06, + "loss": 17.5407, + "step": 9983 + }, + { + "epoch": 0.18249949732209772, + "grad_norm": 6.521102938355329, + "learning_rate": 9.402570057988746e-06, + "loss": 17.4695, + "step": 9984 + }, + { + "epoch": 0.18251777651854426, + "grad_norm": 7.2910739138281295, + "learning_rate": 9.402429734848374e-06, + "loss": 17.7697, + "step": 9985 + }, + { + "epoch": 0.18253605571499076, + "grad_norm": 7.031356327043205, + "learning_rate": 9.402289396277869e-06, + "loss": 17.8313, + "step": 9986 + }, + { + "epoch": 0.1825543349114373, + "grad_norm": 7.243873441445957, + "learning_rate": 9.40214904227772e-06, + "loss": 17.6849, + "step": 9987 + }, + { + "epoch": 0.1825726141078838, + "grad_norm": 7.303002672344109, + "learning_rate": 9.402008672848422e-06, + "loss": 17.6079, + "step": 9988 + }, + { + "epoch": 0.18259089330433034, + "grad_norm": 5.444014869557507, + "learning_rate": 9.401868287990465e-06, + "loss": 17.0645, + "step": 9989 + }, + { + "epoch": 0.18260917250077688, + "grad_norm": 7.635553170981905, + "learning_rate": 9.401727887704341e-06, + "loss": 18.2581, + "step": 9990 + }, + { + "epoch": 0.1826274516972234, + "grad_norm": 6.646319730785736, + "learning_rate": 9.401587471990544e-06, + "loss": 17.7106, + "step": 9991 + }, + { + "epoch": 0.18264573089366992, + "grad_norm": 6.3913214185636855, + "learning_rate": 9.401447040849565e-06, + "loss": 17.4812, + "step": 9992 + }, + { + "epoch": 0.18266401009011643, + "grad_norm": 6.793394615008496, + "learning_rate": 9.401306594281896e-06, + "loss": 17.6085, + "step": 9993 + }, + { + "epoch": 0.18268228928656297, + "grad_norm": 5.887989498671422, + "learning_rate": 9.401166132288028e-06, + "loss": 17.2802, + "step": 9994 + }, + { + "epoch": 0.18270056848300947, + "grad_norm": 6.55002944821401, + "learning_rate": 9.401025654868455e-06, + "loss": 17.7171, + "step": 9995 + }, + { + "epoch": 0.182718847679456, + "grad_norm": 7.108601306973718, + "learning_rate": 9.40088516202367e-06, + "loss": 18.119, + "step": 9996 + }, + { + "epoch": 0.18273712687590254, + "grad_norm": 7.148578925418961, + "learning_rate": 9.400744653754164e-06, + "loss": 17.9143, + "step": 9997 + }, + { + "epoch": 0.18275540607234905, + "grad_norm": 7.107843541409638, + "learning_rate": 9.400604130060429e-06, + "loss": 17.6363, + "step": 9998 + }, + { + "epoch": 0.1827736852687956, + "grad_norm": 6.141869069173877, + "learning_rate": 9.400463590942959e-06, + "loss": 17.3609, + "step": 9999 + }, + { + "epoch": 0.1827919644652421, + "grad_norm": 6.372164605878134, + "learning_rate": 9.400323036402246e-06, + "loss": 17.2801, + "step": 10000 + }, + { + "epoch": 0.18281024366168863, + "grad_norm": 7.07052019562697, + "learning_rate": 9.400182466438783e-06, + "loss": 17.6534, + "step": 10001 + }, + { + "epoch": 0.18282852285813517, + "grad_norm": 6.7555910254831915, + "learning_rate": 9.400041881053062e-06, + "loss": 17.7892, + "step": 10002 + }, + { + "epoch": 0.18284680205458168, + "grad_norm": 5.703967614405472, + "learning_rate": 9.399901280245576e-06, + "loss": 17.1553, + "step": 10003 + }, + { + "epoch": 0.1828650812510282, + "grad_norm": 7.350236892808791, + "learning_rate": 9.399760664016817e-06, + "loss": 17.7499, + "step": 10004 + }, + { + "epoch": 0.18288336044747472, + "grad_norm": 9.360314148330353, + "learning_rate": 9.399620032367279e-06, + "loss": 18.4034, + "step": 10005 + }, + { + "epoch": 0.18290163964392125, + "grad_norm": 6.742925435216869, + "learning_rate": 9.399479385297456e-06, + "loss": 17.5546, + "step": 10006 + }, + { + "epoch": 0.1829199188403678, + "grad_norm": 7.9932538496041, + "learning_rate": 9.399338722807838e-06, + "loss": 18.0338, + "step": 10007 + }, + { + "epoch": 0.1829381980368143, + "grad_norm": 5.844579655978202, + "learning_rate": 9.39919804489892e-06, + "loss": 17.2263, + "step": 10008 + }, + { + "epoch": 0.18295647723326083, + "grad_norm": 7.339076586709035, + "learning_rate": 9.399057351571194e-06, + "loss": 17.9018, + "step": 10009 + }, + { + "epoch": 0.18297475642970734, + "grad_norm": 6.562142229656559, + "learning_rate": 9.398916642825155e-06, + "loss": 17.3051, + "step": 10010 + }, + { + "epoch": 0.18299303562615388, + "grad_norm": 5.616188088521416, + "learning_rate": 9.398775918661295e-06, + "loss": 17.1079, + "step": 10011 + }, + { + "epoch": 0.18301131482260038, + "grad_norm": 6.6211991045661165, + "learning_rate": 9.398635179080105e-06, + "loss": 17.5124, + "step": 10012 + }, + { + "epoch": 0.18302959401904692, + "grad_norm": 6.806000524611527, + "learning_rate": 9.398494424082082e-06, + "loss": 17.4438, + "step": 10013 + }, + { + "epoch": 0.18304787321549346, + "grad_norm": 7.556791874915172, + "learning_rate": 9.398353653667719e-06, + "loss": 17.9809, + "step": 10014 + }, + { + "epoch": 0.18306615241193996, + "grad_norm": 6.576924936052816, + "learning_rate": 9.398212867837505e-06, + "loss": 17.6573, + "step": 10015 + }, + { + "epoch": 0.1830844316083865, + "grad_norm": 6.834198367818092, + "learning_rate": 9.398072066591937e-06, + "loss": 17.6258, + "step": 10016 + }, + { + "epoch": 0.183102710804833, + "grad_norm": 6.899273350066197, + "learning_rate": 9.39793124993151e-06, + "loss": 17.6503, + "step": 10017 + }, + { + "epoch": 0.18312099000127954, + "grad_norm": 7.19435807009442, + "learning_rate": 9.397790417856714e-06, + "loss": 17.7391, + "step": 10018 + }, + { + "epoch": 0.18313926919772608, + "grad_norm": 5.963027513530443, + "learning_rate": 9.397649570368046e-06, + "loss": 17.2179, + "step": 10019 + }, + { + "epoch": 0.1831575483941726, + "grad_norm": 6.606682036623378, + "learning_rate": 9.397508707465997e-06, + "loss": 17.327, + "step": 10020 + }, + { + "epoch": 0.18317582759061912, + "grad_norm": 8.032489710799243, + "learning_rate": 9.39736782915106e-06, + "loss": 17.8206, + "step": 10021 + }, + { + "epoch": 0.18319410678706563, + "grad_norm": 5.835923644342947, + "learning_rate": 9.397226935423734e-06, + "loss": 17.0689, + "step": 10022 + }, + { + "epoch": 0.18321238598351217, + "grad_norm": 6.729184548387186, + "learning_rate": 9.397086026284505e-06, + "loss": 17.6471, + "step": 10023 + }, + { + "epoch": 0.1832306651799587, + "grad_norm": 7.4706077757716285, + "learning_rate": 9.396945101733874e-06, + "loss": 17.8337, + "step": 10024 + }, + { + "epoch": 0.1832489443764052, + "grad_norm": 9.125053382198983, + "learning_rate": 9.396804161772331e-06, + "loss": 17.1154, + "step": 10025 + }, + { + "epoch": 0.18326722357285175, + "grad_norm": 6.067168561109368, + "learning_rate": 9.396663206400372e-06, + "loss": 17.4188, + "step": 10026 + }, + { + "epoch": 0.18328550276929825, + "grad_norm": 5.82922530940505, + "learning_rate": 9.396522235618488e-06, + "loss": 17.2414, + "step": 10027 + }, + { + "epoch": 0.1833037819657448, + "grad_norm": 6.557609650978757, + "learning_rate": 9.396381249427176e-06, + "loss": 17.5663, + "step": 10028 + }, + { + "epoch": 0.1833220611621913, + "grad_norm": 7.821631448805034, + "learning_rate": 9.396240247826929e-06, + "loss": 17.898, + "step": 10029 + }, + { + "epoch": 0.18334034035863783, + "grad_norm": 6.802719553007564, + "learning_rate": 9.39609923081824e-06, + "loss": 17.6314, + "step": 10030 + }, + { + "epoch": 0.18335861955508437, + "grad_norm": 6.593847507280106, + "learning_rate": 9.395958198401608e-06, + "loss": 18.0311, + "step": 10031 + }, + { + "epoch": 0.18337689875153088, + "grad_norm": 6.582581645853874, + "learning_rate": 9.395817150577522e-06, + "loss": 17.6809, + "step": 10032 + }, + { + "epoch": 0.1833951779479774, + "grad_norm": 6.975542477321055, + "learning_rate": 9.395676087346478e-06, + "loss": 18.0356, + "step": 10033 + }, + { + "epoch": 0.18341345714442392, + "grad_norm": 7.001343203133638, + "learning_rate": 9.395535008708972e-06, + "loss": 17.8048, + "step": 10034 + }, + { + "epoch": 0.18343173634087045, + "grad_norm": 7.530149791963838, + "learning_rate": 9.395393914665496e-06, + "loss": 18.0788, + "step": 10035 + }, + { + "epoch": 0.183450015537317, + "grad_norm": 11.947231797910732, + "learning_rate": 9.395252805216545e-06, + "loss": 18.8291, + "step": 10036 + }, + { + "epoch": 0.1834682947337635, + "grad_norm": 8.45783714915619, + "learning_rate": 9.395111680362616e-06, + "loss": 18.9009, + "step": 10037 + }, + { + "epoch": 0.18348657393021003, + "grad_norm": 6.250795698995749, + "learning_rate": 9.394970540104203e-06, + "loss": 17.2324, + "step": 10038 + }, + { + "epoch": 0.18350485312665654, + "grad_norm": 6.887931607530826, + "learning_rate": 9.394829384441796e-06, + "loss": 17.6447, + "step": 10039 + }, + { + "epoch": 0.18352313232310308, + "grad_norm": 6.881663556628881, + "learning_rate": 9.394688213375897e-06, + "loss": 17.4677, + "step": 10040 + }, + { + "epoch": 0.1835414115195496, + "grad_norm": 7.094733645881841, + "learning_rate": 9.394547026906996e-06, + "loss": 17.7156, + "step": 10041 + }, + { + "epoch": 0.18355969071599612, + "grad_norm": 6.721369910362387, + "learning_rate": 9.394405825035588e-06, + "loss": 17.5653, + "step": 10042 + }, + { + "epoch": 0.18357796991244266, + "grad_norm": 7.905659665079018, + "learning_rate": 9.394264607762171e-06, + "loss": 18.1893, + "step": 10043 + }, + { + "epoch": 0.18359624910888916, + "grad_norm": 8.182496878526997, + "learning_rate": 9.394123375087236e-06, + "loss": 18.1963, + "step": 10044 + }, + { + "epoch": 0.1836145283053357, + "grad_norm": 5.924467389440648, + "learning_rate": 9.39398212701128e-06, + "loss": 17.3593, + "step": 10045 + }, + { + "epoch": 0.1836328075017822, + "grad_norm": 7.158563543479542, + "learning_rate": 9.393840863534798e-06, + "loss": 17.7377, + "step": 10046 + }, + { + "epoch": 0.18365108669822874, + "grad_norm": 6.6072075670565855, + "learning_rate": 9.393699584658287e-06, + "loss": 17.5862, + "step": 10047 + }, + { + "epoch": 0.18366936589467528, + "grad_norm": 6.610929634307579, + "learning_rate": 9.393558290382238e-06, + "loss": 17.4893, + "step": 10048 + }, + { + "epoch": 0.1836876450911218, + "grad_norm": 7.360027450851649, + "learning_rate": 9.393416980707148e-06, + "loss": 17.7952, + "step": 10049 + }, + { + "epoch": 0.18370592428756832, + "grad_norm": 5.487437849285093, + "learning_rate": 9.393275655633515e-06, + "loss": 17.0889, + "step": 10050 + }, + { + "epoch": 0.18372420348401483, + "grad_norm": 7.046362910788258, + "learning_rate": 9.393134315161832e-06, + "loss": 17.9629, + "step": 10051 + }, + { + "epoch": 0.18374248268046137, + "grad_norm": 8.130850826922257, + "learning_rate": 9.392992959292593e-06, + "loss": 17.8732, + "step": 10052 + }, + { + "epoch": 0.1837607618769079, + "grad_norm": 7.145303003288416, + "learning_rate": 9.392851588026295e-06, + "loss": 17.8343, + "step": 10053 + }, + { + "epoch": 0.1837790410733544, + "grad_norm": 7.878470503338938, + "learning_rate": 9.392710201363433e-06, + "loss": 17.9847, + "step": 10054 + }, + { + "epoch": 0.18379732026980095, + "grad_norm": 7.385497631069745, + "learning_rate": 9.392568799304504e-06, + "loss": 18.0775, + "step": 10055 + }, + { + "epoch": 0.18381559946624745, + "grad_norm": 7.56836478360785, + "learning_rate": 9.392427381850002e-06, + "loss": 18.3344, + "step": 10056 + }, + { + "epoch": 0.183833878662694, + "grad_norm": 6.030781229462143, + "learning_rate": 9.392285949000422e-06, + "loss": 17.0676, + "step": 10057 + }, + { + "epoch": 0.18385215785914052, + "grad_norm": 6.762860666840852, + "learning_rate": 9.392144500756261e-06, + "loss": 17.5847, + "step": 10058 + }, + { + "epoch": 0.18387043705558703, + "grad_norm": 8.958143732662874, + "learning_rate": 9.392003037118018e-06, + "loss": 18.4871, + "step": 10059 + }, + { + "epoch": 0.18388871625203357, + "grad_norm": 7.639826919790793, + "learning_rate": 9.391861558086183e-06, + "loss": 18.2061, + "step": 10060 + }, + { + "epoch": 0.18390699544848008, + "grad_norm": 5.734010063500707, + "learning_rate": 9.391720063661253e-06, + "loss": 17.0315, + "step": 10061 + }, + { + "epoch": 0.1839252746449266, + "grad_norm": 8.518019074220268, + "learning_rate": 9.391578553843727e-06, + "loss": 17.5116, + "step": 10062 + }, + { + "epoch": 0.18394355384137312, + "grad_norm": 5.843246554835414, + "learning_rate": 9.3914370286341e-06, + "loss": 17.3507, + "step": 10063 + }, + { + "epoch": 0.18396183303781966, + "grad_norm": 5.959346547632752, + "learning_rate": 9.391295488032866e-06, + "loss": 17.3375, + "step": 10064 + }, + { + "epoch": 0.1839801122342662, + "grad_norm": 8.566221840609426, + "learning_rate": 9.391153932040524e-06, + "loss": 18.4116, + "step": 10065 + }, + { + "epoch": 0.1839983914307127, + "grad_norm": 7.258289236998518, + "learning_rate": 9.391012360657567e-06, + "loss": 17.9535, + "step": 10066 + }, + { + "epoch": 0.18401667062715923, + "grad_norm": 8.74913373707712, + "learning_rate": 9.390870773884493e-06, + "loss": 18.2529, + "step": 10067 + }, + { + "epoch": 0.18403494982360574, + "grad_norm": 6.276195917759392, + "learning_rate": 9.390729171721797e-06, + "loss": 17.5967, + "step": 10068 + }, + { + "epoch": 0.18405322902005228, + "grad_norm": 6.043802023406444, + "learning_rate": 9.390587554169978e-06, + "loss": 17.4002, + "step": 10069 + }, + { + "epoch": 0.1840715082164988, + "grad_norm": 7.056645386674894, + "learning_rate": 9.390445921229529e-06, + "loss": 17.681, + "step": 10070 + }, + { + "epoch": 0.18408978741294532, + "grad_norm": 6.877355921850625, + "learning_rate": 9.390304272900949e-06, + "loss": 17.8717, + "step": 10071 + }, + { + "epoch": 0.18410806660939186, + "grad_norm": 7.529191829145646, + "learning_rate": 9.390162609184735e-06, + "loss": 18.1924, + "step": 10072 + }, + { + "epoch": 0.18412634580583837, + "grad_norm": 7.1874899146623905, + "learning_rate": 9.390020930081378e-06, + "loss": 17.9067, + "step": 10073 + }, + { + "epoch": 0.1841446250022849, + "grad_norm": 4.955860626321352, + "learning_rate": 9.389879235591381e-06, + "loss": 16.8302, + "step": 10074 + }, + { + "epoch": 0.18416290419873144, + "grad_norm": 7.089664707773817, + "learning_rate": 9.38973752571524e-06, + "loss": 17.7532, + "step": 10075 + }, + { + "epoch": 0.18418118339517794, + "grad_norm": 6.880420490934243, + "learning_rate": 9.389595800453447e-06, + "loss": 17.7116, + "step": 10076 + }, + { + "epoch": 0.18419946259162448, + "grad_norm": 7.391470423927875, + "learning_rate": 9.389454059806502e-06, + "loss": 18.0362, + "step": 10077 + }, + { + "epoch": 0.184217741788071, + "grad_norm": 8.1552671824139, + "learning_rate": 9.389312303774902e-06, + "loss": 18.1661, + "step": 10078 + }, + { + "epoch": 0.18423602098451752, + "grad_norm": 4.865258771724745, + "learning_rate": 9.389170532359145e-06, + "loss": 16.8387, + "step": 10079 + }, + { + "epoch": 0.18425430018096403, + "grad_norm": 6.256518850905666, + "learning_rate": 9.389028745559724e-06, + "loss": 17.465, + "step": 10080 + }, + { + "epoch": 0.18427257937741057, + "grad_norm": 7.334441424716792, + "learning_rate": 9.388886943377139e-06, + "loss": 17.9603, + "step": 10081 + }, + { + "epoch": 0.1842908585738571, + "grad_norm": 6.080709347340776, + "learning_rate": 9.388745125811884e-06, + "loss": 17.4325, + "step": 10082 + }, + { + "epoch": 0.1843091377703036, + "grad_norm": 7.740074002067427, + "learning_rate": 9.38860329286446e-06, + "loss": 17.9683, + "step": 10083 + }, + { + "epoch": 0.18432741696675015, + "grad_norm": 6.758180192876033, + "learning_rate": 9.388461444535364e-06, + "loss": 17.5877, + "step": 10084 + }, + { + "epoch": 0.18434569616319665, + "grad_norm": 7.360070317757277, + "learning_rate": 9.38831958082509e-06, + "loss": 18.0347, + "step": 10085 + }, + { + "epoch": 0.1843639753596432, + "grad_norm": 6.233753484045297, + "learning_rate": 9.388177701734135e-06, + "loss": 17.5804, + "step": 10086 + }, + { + "epoch": 0.18438225455608973, + "grad_norm": 7.317271295294663, + "learning_rate": 9.388035807263e-06, + "loss": 18.0553, + "step": 10087 + }, + { + "epoch": 0.18440053375253623, + "grad_norm": 6.537674890608169, + "learning_rate": 9.38789389741218e-06, + "loss": 17.4096, + "step": 10088 + }, + { + "epoch": 0.18441881294898277, + "grad_norm": 6.928024621313798, + "learning_rate": 9.387751972182171e-06, + "loss": 17.7727, + "step": 10089 + }, + { + "epoch": 0.18443709214542928, + "grad_norm": 7.5708364315683, + "learning_rate": 9.387610031573474e-06, + "loss": 17.839, + "step": 10090 + }, + { + "epoch": 0.1844553713418758, + "grad_norm": 7.908171570383844, + "learning_rate": 9.387468075586583e-06, + "loss": 18.1779, + "step": 10091 + }, + { + "epoch": 0.18447365053832235, + "grad_norm": 5.5242661833756435, + "learning_rate": 9.387326104221999e-06, + "loss": 17.1945, + "step": 10092 + }, + { + "epoch": 0.18449192973476886, + "grad_norm": 5.186178471195878, + "learning_rate": 9.387184117480217e-06, + "loss": 16.9887, + "step": 10093 + }, + { + "epoch": 0.1845102089312154, + "grad_norm": 6.9561430161599755, + "learning_rate": 9.387042115361735e-06, + "loss": 17.9478, + "step": 10094 + }, + { + "epoch": 0.1845284881276619, + "grad_norm": 7.4253635627651775, + "learning_rate": 9.38690009786705e-06, + "loss": 18.0151, + "step": 10095 + }, + { + "epoch": 0.18454676732410843, + "grad_norm": 6.854517931928672, + "learning_rate": 9.386758064996663e-06, + "loss": 17.6601, + "step": 10096 + }, + { + "epoch": 0.18456504652055494, + "grad_norm": 7.891688813940142, + "learning_rate": 9.386616016751069e-06, + "loss": 18.3511, + "step": 10097 + }, + { + "epoch": 0.18458332571700148, + "grad_norm": 6.971177894420033, + "learning_rate": 9.386473953130766e-06, + "loss": 17.6837, + "step": 10098 + }, + { + "epoch": 0.18460160491344801, + "grad_norm": 7.559625105872039, + "learning_rate": 9.386331874136252e-06, + "loss": 17.6225, + "step": 10099 + }, + { + "epoch": 0.18461988410989452, + "grad_norm": 9.163406483025124, + "learning_rate": 9.386189779768026e-06, + "loss": 18.6586, + "step": 10100 + }, + { + "epoch": 0.18463816330634106, + "grad_norm": 6.754811481736553, + "learning_rate": 9.386047670026585e-06, + "loss": 17.5206, + "step": 10101 + }, + { + "epoch": 0.18465644250278757, + "grad_norm": 6.034743620294371, + "learning_rate": 9.385905544912427e-06, + "loss": 17.3234, + "step": 10102 + }, + { + "epoch": 0.1846747216992341, + "grad_norm": 6.749802836729946, + "learning_rate": 9.385763404426053e-06, + "loss": 17.4607, + "step": 10103 + }, + { + "epoch": 0.18469300089568064, + "grad_norm": 8.09839861060369, + "learning_rate": 9.385621248567957e-06, + "loss": 18.2918, + "step": 10104 + }, + { + "epoch": 0.18471128009212714, + "grad_norm": 6.523424208415734, + "learning_rate": 9.38547907733864e-06, + "loss": 17.3888, + "step": 10105 + }, + { + "epoch": 0.18472955928857368, + "grad_norm": 7.515683686348755, + "learning_rate": 9.385336890738599e-06, + "loss": 18.2368, + "step": 10106 + }, + { + "epoch": 0.1847478384850202, + "grad_norm": 6.969248317430007, + "learning_rate": 9.385194688768334e-06, + "loss": 17.6281, + "step": 10107 + }, + { + "epoch": 0.18476611768146672, + "grad_norm": 6.434898297464353, + "learning_rate": 9.38505247142834e-06, + "loss": 17.5382, + "step": 10108 + }, + { + "epoch": 0.18478439687791326, + "grad_norm": 5.989770127717803, + "learning_rate": 9.384910238719119e-06, + "loss": 17.2119, + "step": 10109 + }, + { + "epoch": 0.18480267607435977, + "grad_norm": 5.946040994438568, + "learning_rate": 9.384767990641166e-06, + "loss": 17.28, + "step": 10110 + }, + { + "epoch": 0.1848209552708063, + "grad_norm": 7.4997105516833775, + "learning_rate": 9.384625727194983e-06, + "loss": 18.1866, + "step": 10111 + }, + { + "epoch": 0.1848392344672528, + "grad_norm": 10.378955170882492, + "learning_rate": 9.384483448381068e-06, + "loss": 17.5535, + "step": 10112 + }, + { + "epoch": 0.18485751366369935, + "grad_norm": 8.133534058588747, + "learning_rate": 9.384341154199918e-06, + "loss": 17.8558, + "step": 10113 + }, + { + "epoch": 0.18487579286014585, + "grad_norm": 5.925984036602524, + "learning_rate": 9.384198844652034e-06, + "loss": 17.2775, + "step": 10114 + }, + { + "epoch": 0.1848940720565924, + "grad_norm": 8.087471806115738, + "learning_rate": 9.384056519737912e-06, + "loss": 17.8861, + "step": 10115 + }, + { + "epoch": 0.18491235125303893, + "grad_norm": 7.360788970198534, + "learning_rate": 9.383914179458053e-06, + "loss": 17.9566, + "step": 10116 + }, + { + "epoch": 0.18493063044948543, + "grad_norm": 7.029999292910309, + "learning_rate": 9.383771823812957e-06, + "loss": 17.6375, + "step": 10117 + }, + { + "epoch": 0.18494890964593197, + "grad_norm": 5.860795062387218, + "learning_rate": 9.383629452803118e-06, + "loss": 17.3074, + "step": 10118 + }, + { + "epoch": 0.18496718884237848, + "grad_norm": 6.72733824557979, + "learning_rate": 9.38348706642904e-06, + "loss": 17.6756, + "step": 10119 + }, + { + "epoch": 0.184985468038825, + "grad_norm": 6.348613506593034, + "learning_rate": 9.38334466469122e-06, + "loss": 17.251, + "step": 10120 + }, + { + "epoch": 0.18500374723527155, + "grad_norm": 6.4407131573845025, + "learning_rate": 9.383202247590157e-06, + "loss": 17.4239, + "step": 10121 + }, + { + "epoch": 0.18502202643171806, + "grad_norm": 7.66417138062931, + "learning_rate": 9.38305981512635e-06, + "loss": 18.2959, + "step": 10122 + }, + { + "epoch": 0.1850403056281646, + "grad_norm": 8.096402083085778, + "learning_rate": 9.3829173673003e-06, + "loss": 18.2023, + "step": 10123 + }, + { + "epoch": 0.1850585848246111, + "grad_norm": 7.441095682577782, + "learning_rate": 9.382774904112505e-06, + "loss": 17.9975, + "step": 10124 + }, + { + "epoch": 0.18507686402105764, + "grad_norm": 5.874787975602297, + "learning_rate": 9.382632425563462e-06, + "loss": 17.1499, + "step": 10125 + }, + { + "epoch": 0.18509514321750417, + "grad_norm": 8.396712904272395, + "learning_rate": 9.382489931653675e-06, + "loss": 18.1694, + "step": 10126 + }, + { + "epoch": 0.18511342241395068, + "grad_norm": 6.880236844657977, + "learning_rate": 9.38234742238364e-06, + "loss": 17.9352, + "step": 10127 + }, + { + "epoch": 0.18513170161039721, + "grad_norm": 6.891231969914025, + "learning_rate": 9.38220489775386e-06, + "loss": 17.673, + "step": 10128 + }, + { + "epoch": 0.18514998080684372, + "grad_norm": 7.265333835185887, + "learning_rate": 9.382062357764828e-06, + "loss": 17.8743, + "step": 10129 + }, + { + "epoch": 0.18516826000329026, + "grad_norm": 6.99979969720114, + "learning_rate": 9.38191980241705e-06, + "loss": 17.6382, + "step": 10130 + }, + { + "epoch": 0.18518653919973677, + "grad_norm": 8.187962009688826, + "learning_rate": 9.381777231711024e-06, + "loss": 18.2169, + "step": 10131 + }, + { + "epoch": 0.1852048183961833, + "grad_norm": 8.05353103911642, + "learning_rate": 9.381634645647247e-06, + "loss": 18.227, + "step": 10132 + }, + { + "epoch": 0.18522309759262984, + "grad_norm": 7.505607921149841, + "learning_rate": 9.38149204422622e-06, + "loss": 17.8716, + "step": 10133 + }, + { + "epoch": 0.18524137678907635, + "grad_norm": 6.635816173964813, + "learning_rate": 9.381349427448448e-06, + "loss": 17.619, + "step": 10134 + }, + { + "epoch": 0.18525965598552288, + "grad_norm": 7.975934284092967, + "learning_rate": 9.381206795314424e-06, + "loss": 18.2861, + "step": 10135 + }, + { + "epoch": 0.1852779351819694, + "grad_norm": 7.2156964395727945, + "learning_rate": 9.38106414782465e-06, + "loss": 17.5814, + "step": 10136 + }, + { + "epoch": 0.18529621437841592, + "grad_norm": 6.413651784388292, + "learning_rate": 9.380921484979626e-06, + "loss": 17.7215, + "step": 10137 + }, + { + "epoch": 0.18531449357486246, + "grad_norm": 6.167896859875163, + "learning_rate": 9.380778806779853e-06, + "loss": 17.3194, + "step": 10138 + }, + { + "epoch": 0.18533277277130897, + "grad_norm": 10.13812019241804, + "learning_rate": 9.38063611322583e-06, + "loss": 18.9814, + "step": 10139 + }, + { + "epoch": 0.1853510519677555, + "grad_norm": 6.022633303778132, + "learning_rate": 9.380493404318059e-06, + "loss": 17.2726, + "step": 10140 + }, + { + "epoch": 0.185369331164202, + "grad_norm": 8.295729304229326, + "learning_rate": 9.380350680057038e-06, + "loss": 18.1683, + "step": 10141 + }, + { + "epoch": 0.18538761036064855, + "grad_norm": 7.2665935336368, + "learning_rate": 9.380207940443266e-06, + "loss": 18.1368, + "step": 10142 + }, + { + "epoch": 0.18540588955709508, + "grad_norm": 6.446637878102153, + "learning_rate": 9.380065185477247e-06, + "loss": 17.5363, + "step": 10143 + }, + { + "epoch": 0.1854241687535416, + "grad_norm": 6.383097911432984, + "learning_rate": 9.379922415159479e-06, + "loss": 17.4873, + "step": 10144 + }, + { + "epoch": 0.18544244794998813, + "grad_norm": 6.344580774345908, + "learning_rate": 9.379779629490463e-06, + "loss": 17.3647, + "step": 10145 + }, + { + "epoch": 0.18546072714643463, + "grad_norm": 7.348473901289618, + "learning_rate": 9.379636828470702e-06, + "loss": 17.9987, + "step": 10146 + }, + { + "epoch": 0.18547900634288117, + "grad_norm": 6.160850989268245, + "learning_rate": 9.379494012100691e-06, + "loss": 17.3402, + "step": 10147 + }, + { + "epoch": 0.18549728553932768, + "grad_norm": 5.993419326775649, + "learning_rate": 9.379351180380934e-06, + "loss": 17.3536, + "step": 10148 + }, + { + "epoch": 0.1855155647357742, + "grad_norm": 8.509094960522217, + "learning_rate": 9.379208333311932e-06, + "loss": 17.8672, + "step": 10149 + }, + { + "epoch": 0.18553384393222075, + "grad_norm": 7.807610592514754, + "learning_rate": 9.379065470894185e-06, + "loss": 18.1017, + "step": 10150 + }, + { + "epoch": 0.18555212312866726, + "grad_norm": 7.123159893464457, + "learning_rate": 9.378922593128192e-06, + "loss": 17.5521, + "step": 10151 + }, + { + "epoch": 0.1855704023251138, + "grad_norm": 7.48066657787623, + "learning_rate": 9.378779700014457e-06, + "loss": 18.2265, + "step": 10152 + }, + { + "epoch": 0.1855886815215603, + "grad_norm": 6.13410547093554, + "learning_rate": 9.378636791553479e-06, + "loss": 17.1917, + "step": 10153 + }, + { + "epoch": 0.18560696071800684, + "grad_norm": 7.5276101799697654, + "learning_rate": 9.378493867745757e-06, + "loss": 17.8194, + "step": 10154 + }, + { + "epoch": 0.18562523991445337, + "grad_norm": 7.534516614694993, + "learning_rate": 9.378350928591795e-06, + "loss": 18.3422, + "step": 10155 + }, + { + "epoch": 0.18564351911089988, + "grad_norm": 7.091677494357677, + "learning_rate": 9.378207974092094e-06, + "loss": 17.7917, + "step": 10156 + }, + { + "epoch": 0.18566179830734642, + "grad_norm": 8.561682749273809, + "learning_rate": 9.378065004247154e-06, + "loss": 18.1131, + "step": 10157 + }, + { + "epoch": 0.18568007750379292, + "grad_norm": 5.976930234644536, + "learning_rate": 9.377922019057475e-06, + "loss": 17.1155, + "step": 10158 + }, + { + "epoch": 0.18569835670023946, + "grad_norm": 7.4921041502776005, + "learning_rate": 9.377779018523558e-06, + "loss": 18.0279, + "step": 10159 + }, + { + "epoch": 0.185716635896686, + "grad_norm": 7.823224316133014, + "learning_rate": 9.377636002645907e-06, + "loss": 17.7456, + "step": 10160 + }, + { + "epoch": 0.1857349150931325, + "grad_norm": 6.3364572003429585, + "learning_rate": 9.377492971425022e-06, + "loss": 17.4062, + "step": 10161 + }, + { + "epoch": 0.18575319428957904, + "grad_norm": 5.9663246026209995, + "learning_rate": 9.377349924861404e-06, + "loss": 17.2638, + "step": 10162 + }, + { + "epoch": 0.18577147348602555, + "grad_norm": 6.679882467011436, + "learning_rate": 9.377206862955554e-06, + "loss": 17.5757, + "step": 10163 + }, + { + "epoch": 0.18578975268247208, + "grad_norm": 6.940412229607728, + "learning_rate": 9.377063785707974e-06, + "loss": 17.6697, + "step": 10164 + }, + { + "epoch": 0.1858080318789186, + "grad_norm": 6.6646624172232105, + "learning_rate": 9.376920693119164e-06, + "loss": 17.6532, + "step": 10165 + }, + { + "epoch": 0.18582631107536512, + "grad_norm": 7.239341796820249, + "learning_rate": 9.376777585189629e-06, + "loss": 17.9977, + "step": 10166 + }, + { + "epoch": 0.18584459027181166, + "grad_norm": 7.771468138586223, + "learning_rate": 9.376634461919867e-06, + "loss": 18.2488, + "step": 10167 + }, + { + "epoch": 0.18586286946825817, + "grad_norm": 8.85702272829546, + "learning_rate": 9.37649132331038e-06, + "loss": 17.7924, + "step": 10168 + }, + { + "epoch": 0.1858811486647047, + "grad_norm": 7.300670685126151, + "learning_rate": 9.376348169361673e-06, + "loss": 17.9292, + "step": 10169 + }, + { + "epoch": 0.1858994278611512, + "grad_norm": 8.538267262281579, + "learning_rate": 9.376205000074243e-06, + "loss": 17.9711, + "step": 10170 + }, + { + "epoch": 0.18591770705759775, + "grad_norm": 6.43279408233411, + "learning_rate": 9.376061815448596e-06, + "loss": 17.5059, + "step": 10171 + }, + { + "epoch": 0.18593598625404428, + "grad_norm": 6.055184820090015, + "learning_rate": 9.375918615485231e-06, + "loss": 17.1337, + "step": 10172 + }, + { + "epoch": 0.1859542654504908, + "grad_norm": 7.993831665485996, + "learning_rate": 9.375775400184652e-06, + "loss": 17.4964, + "step": 10173 + }, + { + "epoch": 0.18597254464693733, + "grad_norm": 6.944128214910914, + "learning_rate": 9.37563216954736e-06, + "loss": 17.7661, + "step": 10174 + }, + { + "epoch": 0.18599082384338383, + "grad_norm": 7.7628760757659725, + "learning_rate": 9.375488923573857e-06, + "loss": 18.064, + "step": 10175 + }, + { + "epoch": 0.18600910303983037, + "grad_norm": 6.433721378115287, + "learning_rate": 9.375345662264644e-06, + "loss": 17.7262, + "step": 10176 + }, + { + "epoch": 0.1860273822362769, + "grad_norm": 6.346395656175717, + "learning_rate": 9.375202385620223e-06, + "loss": 17.3542, + "step": 10177 + }, + { + "epoch": 0.1860456614327234, + "grad_norm": 7.679772556378218, + "learning_rate": 9.375059093641099e-06, + "loss": 18.1816, + "step": 10178 + }, + { + "epoch": 0.18606394062916995, + "grad_norm": 6.895906852677195, + "learning_rate": 9.374915786327773e-06, + "loss": 17.5511, + "step": 10179 + }, + { + "epoch": 0.18608221982561646, + "grad_norm": 6.281701990782323, + "learning_rate": 9.374772463680745e-06, + "loss": 17.4323, + "step": 10180 + }, + { + "epoch": 0.186100499022063, + "grad_norm": 7.485888610640797, + "learning_rate": 9.374629125700522e-06, + "loss": 17.5209, + "step": 10181 + }, + { + "epoch": 0.1861187782185095, + "grad_norm": 5.6333109911431904, + "learning_rate": 9.374485772387602e-06, + "loss": 17.2553, + "step": 10182 + }, + { + "epoch": 0.18613705741495604, + "grad_norm": 7.691203740517854, + "learning_rate": 9.374342403742489e-06, + "loss": 17.6799, + "step": 10183 + }, + { + "epoch": 0.18615533661140257, + "grad_norm": 6.906312158650607, + "learning_rate": 9.374199019765685e-06, + "loss": 17.5729, + "step": 10184 + }, + { + "epoch": 0.18617361580784908, + "grad_norm": 6.663267416505823, + "learning_rate": 9.374055620457693e-06, + "loss": 17.6142, + "step": 10185 + }, + { + "epoch": 0.18619189500429562, + "grad_norm": 6.339292529984208, + "learning_rate": 9.373912205819016e-06, + "loss": 17.4268, + "step": 10186 + }, + { + "epoch": 0.18621017420074212, + "grad_norm": 7.67804688835375, + "learning_rate": 9.373768775850156e-06, + "loss": 17.8058, + "step": 10187 + }, + { + "epoch": 0.18622845339718866, + "grad_norm": 8.012542663727075, + "learning_rate": 9.373625330551617e-06, + "loss": 17.9881, + "step": 10188 + }, + { + "epoch": 0.1862467325936352, + "grad_norm": 7.4263693912563635, + "learning_rate": 9.3734818699239e-06, + "loss": 18.0399, + "step": 10189 + }, + { + "epoch": 0.1862650117900817, + "grad_norm": 9.111480253737692, + "learning_rate": 9.373338393967508e-06, + "loss": 18.4965, + "step": 10190 + }, + { + "epoch": 0.18628329098652824, + "grad_norm": 6.5584971932398375, + "learning_rate": 9.373194902682945e-06, + "loss": 17.6057, + "step": 10191 + }, + { + "epoch": 0.18630157018297475, + "grad_norm": 7.604951707353146, + "learning_rate": 9.373051396070713e-06, + "loss": 18.274, + "step": 10192 + }, + { + "epoch": 0.18631984937942128, + "grad_norm": 7.375005504809956, + "learning_rate": 9.372907874131316e-06, + "loss": 18.1264, + "step": 10193 + }, + { + "epoch": 0.18633812857586782, + "grad_norm": 7.43140604220699, + "learning_rate": 9.372764336865255e-06, + "loss": 17.9132, + "step": 10194 + }, + { + "epoch": 0.18635640777231433, + "grad_norm": 8.193563811574354, + "learning_rate": 9.372620784273036e-06, + "loss": 18.137, + "step": 10195 + }, + { + "epoch": 0.18637468696876086, + "grad_norm": 8.009154005190014, + "learning_rate": 9.372477216355158e-06, + "loss": 17.9132, + "step": 10196 + }, + { + "epoch": 0.18639296616520737, + "grad_norm": 7.128934812209609, + "learning_rate": 9.372333633112129e-06, + "loss": 18.0419, + "step": 10197 + }, + { + "epoch": 0.1864112453616539, + "grad_norm": 7.317620610851088, + "learning_rate": 9.37219003454445e-06, + "loss": 17.5791, + "step": 10198 + }, + { + "epoch": 0.1864295245581004, + "grad_norm": 6.525025639592337, + "learning_rate": 9.372046420652625e-06, + "loss": 17.7318, + "step": 10199 + }, + { + "epoch": 0.18644780375454695, + "grad_norm": 7.735597506460469, + "learning_rate": 9.371902791437155e-06, + "loss": 17.8672, + "step": 10200 + }, + { + "epoch": 0.18646608295099348, + "grad_norm": 8.226678540737261, + "learning_rate": 9.371759146898547e-06, + "loss": 17.9677, + "step": 10201 + }, + { + "epoch": 0.18648436214744, + "grad_norm": 5.501891707057994, + "learning_rate": 9.371615487037302e-06, + "loss": 17.2741, + "step": 10202 + }, + { + "epoch": 0.18650264134388653, + "grad_norm": 6.572065461356103, + "learning_rate": 9.371471811853923e-06, + "loss": 17.4299, + "step": 10203 + }, + { + "epoch": 0.18652092054033304, + "grad_norm": 8.720931550768686, + "learning_rate": 9.371328121348914e-06, + "loss": 18.4159, + "step": 10204 + }, + { + "epoch": 0.18653919973677957, + "grad_norm": 6.38453974087123, + "learning_rate": 9.37118441552278e-06, + "loss": 17.5934, + "step": 10205 + }, + { + "epoch": 0.1865574789332261, + "grad_norm": 7.3772736465978515, + "learning_rate": 9.371040694376026e-06, + "loss": 17.9282, + "step": 10206 + }, + { + "epoch": 0.18657575812967261, + "grad_norm": 8.31169856553042, + "learning_rate": 9.370896957909151e-06, + "loss": 18.1827, + "step": 10207 + }, + { + "epoch": 0.18659403732611915, + "grad_norm": 6.2329002713871695, + "learning_rate": 9.370753206122662e-06, + "loss": 17.4252, + "step": 10208 + }, + { + "epoch": 0.18661231652256566, + "grad_norm": 7.739947269091635, + "learning_rate": 9.370609439017064e-06, + "loss": 18.0942, + "step": 10209 + }, + { + "epoch": 0.1866305957190122, + "grad_norm": 6.710341963329881, + "learning_rate": 9.370465656592858e-06, + "loss": 17.6096, + "step": 10210 + }, + { + "epoch": 0.18664887491545873, + "grad_norm": 7.315130101646613, + "learning_rate": 9.37032185885055e-06, + "loss": 17.9044, + "step": 10211 + }, + { + "epoch": 0.18666715411190524, + "grad_norm": 6.345220118476805, + "learning_rate": 9.37017804579064e-06, + "loss": 17.5248, + "step": 10212 + }, + { + "epoch": 0.18668543330835177, + "grad_norm": 6.268704186528599, + "learning_rate": 9.370034217413638e-06, + "loss": 17.661, + "step": 10213 + }, + { + "epoch": 0.18670371250479828, + "grad_norm": 5.01108789980958, + "learning_rate": 9.369890373720044e-06, + "loss": 16.9445, + "step": 10214 + }, + { + "epoch": 0.18672199170124482, + "grad_norm": 5.2239857534211245, + "learning_rate": 9.369746514710365e-06, + "loss": 16.9543, + "step": 10215 + }, + { + "epoch": 0.18674027089769132, + "grad_norm": 6.487987836666354, + "learning_rate": 9.369602640385102e-06, + "loss": 17.7461, + "step": 10216 + }, + { + "epoch": 0.18675855009413786, + "grad_norm": 7.865169897417005, + "learning_rate": 9.369458750744762e-06, + "loss": 18.0864, + "step": 10217 + }, + { + "epoch": 0.1867768292905844, + "grad_norm": 6.197612176392014, + "learning_rate": 9.369314845789847e-06, + "loss": 17.458, + "step": 10218 + }, + { + "epoch": 0.1867951084870309, + "grad_norm": 7.561435627743233, + "learning_rate": 9.369170925520865e-06, + "loss": 18.1334, + "step": 10219 + }, + { + "epoch": 0.18681338768347744, + "grad_norm": 7.44381941931186, + "learning_rate": 9.369026989938318e-06, + "loss": 17.7356, + "step": 10220 + }, + { + "epoch": 0.18683166687992395, + "grad_norm": 7.017214885184996, + "learning_rate": 9.368883039042706e-06, + "loss": 17.5028, + "step": 10221 + }, + { + "epoch": 0.18684994607637048, + "grad_norm": 7.229664469044334, + "learning_rate": 9.368739072834543e-06, + "loss": 17.5561, + "step": 10222 + }, + { + "epoch": 0.18686822527281702, + "grad_norm": 7.189737803230755, + "learning_rate": 9.368595091314326e-06, + "loss": 18.2759, + "step": 10223 + }, + { + "epoch": 0.18688650446926353, + "grad_norm": 8.319257418022968, + "learning_rate": 9.368451094482564e-06, + "loss": 18.3799, + "step": 10224 + }, + { + "epoch": 0.18690478366571006, + "grad_norm": 6.4889484367898005, + "learning_rate": 9.368307082339758e-06, + "loss": 17.5709, + "step": 10225 + }, + { + "epoch": 0.18692306286215657, + "grad_norm": 12.229828869233472, + "learning_rate": 9.368163054886417e-06, + "loss": 17.6928, + "step": 10226 + }, + { + "epoch": 0.1869413420586031, + "grad_norm": 6.322971098801259, + "learning_rate": 9.368019012123042e-06, + "loss": 17.4152, + "step": 10227 + }, + { + "epoch": 0.18695962125504964, + "grad_norm": 6.77865355835652, + "learning_rate": 9.36787495405014e-06, + "loss": 17.6817, + "step": 10228 + }, + { + "epoch": 0.18697790045149615, + "grad_norm": 6.426308198557916, + "learning_rate": 9.367730880668214e-06, + "loss": 17.4643, + "step": 10229 + }, + { + "epoch": 0.18699617964794268, + "grad_norm": 5.632297905265568, + "learning_rate": 9.367586791977772e-06, + "loss": 17.073, + "step": 10230 + }, + { + "epoch": 0.1870144588443892, + "grad_norm": 6.974509567515869, + "learning_rate": 9.367442687979317e-06, + "loss": 17.7247, + "step": 10231 + }, + { + "epoch": 0.18703273804083573, + "grad_norm": 6.83206984158418, + "learning_rate": 9.367298568673354e-06, + "loss": 17.5729, + "step": 10232 + }, + { + "epoch": 0.18705101723728224, + "grad_norm": 9.757596180231703, + "learning_rate": 9.367154434060389e-06, + "loss": 18.1063, + "step": 10233 + }, + { + "epoch": 0.18706929643372877, + "grad_norm": 6.313951935010378, + "learning_rate": 9.367010284140925e-06, + "loss": 17.4356, + "step": 10234 + }, + { + "epoch": 0.1870875756301753, + "grad_norm": 6.317780261667729, + "learning_rate": 9.366866118915469e-06, + "loss": 17.4707, + "step": 10235 + }, + { + "epoch": 0.18710585482662181, + "grad_norm": 7.5308562210962195, + "learning_rate": 9.36672193838453e-06, + "loss": 18.0669, + "step": 10236 + }, + { + "epoch": 0.18712413402306835, + "grad_norm": 6.156414652177409, + "learning_rate": 9.366577742548606e-06, + "loss": 17.4318, + "step": 10237 + }, + { + "epoch": 0.18714241321951486, + "grad_norm": 7.545443507483131, + "learning_rate": 9.366433531408206e-06, + "loss": 17.7277, + "step": 10238 + }, + { + "epoch": 0.1871606924159614, + "grad_norm": 7.1117771320861145, + "learning_rate": 9.366289304963835e-06, + "loss": 17.7777, + "step": 10239 + }, + { + "epoch": 0.18717897161240793, + "grad_norm": 7.38012130805539, + "learning_rate": 9.366145063216002e-06, + "loss": 17.7226, + "step": 10240 + }, + { + "epoch": 0.18719725080885444, + "grad_norm": 6.287584397312286, + "learning_rate": 9.366000806165208e-06, + "loss": 17.5169, + "step": 10241 + }, + { + "epoch": 0.18721553000530097, + "grad_norm": 8.272480728809974, + "learning_rate": 9.365856533811958e-06, + "loss": 18.2174, + "step": 10242 + }, + { + "epoch": 0.18723380920174748, + "grad_norm": 6.334337383323769, + "learning_rate": 9.365712246156762e-06, + "loss": 17.4599, + "step": 10243 + }, + { + "epoch": 0.18725208839819402, + "grad_norm": 6.252816761629262, + "learning_rate": 9.365567943200122e-06, + "loss": 17.4577, + "step": 10244 + }, + { + "epoch": 0.18727036759464055, + "grad_norm": 7.720308446724791, + "learning_rate": 9.365423624942546e-06, + "loss": 17.9001, + "step": 10245 + }, + { + "epoch": 0.18728864679108706, + "grad_norm": 6.447776379913213, + "learning_rate": 9.365279291384539e-06, + "loss": 17.6432, + "step": 10246 + }, + { + "epoch": 0.1873069259875336, + "grad_norm": 6.534097759809123, + "learning_rate": 9.365134942526606e-06, + "loss": 17.6352, + "step": 10247 + }, + { + "epoch": 0.1873252051839801, + "grad_norm": 5.86259723785051, + "learning_rate": 9.364990578369255e-06, + "loss": 17.3302, + "step": 10248 + }, + { + "epoch": 0.18734348438042664, + "grad_norm": 7.858264480931946, + "learning_rate": 9.36484619891299e-06, + "loss": 18.0367, + "step": 10249 + }, + { + "epoch": 0.18736176357687315, + "grad_norm": 6.153744360184581, + "learning_rate": 9.364701804158318e-06, + "loss": 17.3333, + "step": 10250 + }, + { + "epoch": 0.18738004277331968, + "grad_norm": 6.815205240338957, + "learning_rate": 9.364557394105746e-06, + "loss": 17.5782, + "step": 10251 + }, + { + "epoch": 0.18739832196976622, + "grad_norm": 7.874192778617276, + "learning_rate": 9.364412968755777e-06, + "loss": 18.5423, + "step": 10252 + }, + { + "epoch": 0.18741660116621273, + "grad_norm": 8.207110183282222, + "learning_rate": 9.36426852810892e-06, + "loss": 18.8335, + "step": 10253 + }, + { + "epoch": 0.18743488036265926, + "grad_norm": 6.255530075436144, + "learning_rate": 9.36412407216568e-06, + "loss": 17.1486, + "step": 10254 + }, + { + "epoch": 0.18745315955910577, + "grad_norm": 5.798395753238181, + "learning_rate": 9.363979600926567e-06, + "loss": 17.2161, + "step": 10255 + }, + { + "epoch": 0.1874714387555523, + "grad_norm": 6.092057908902809, + "learning_rate": 9.363835114392082e-06, + "loss": 17.2954, + "step": 10256 + }, + { + "epoch": 0.18748971795199884, + "grad_norm": 5.69434192164635, + "learning_rate": 9.363690612562732e-06, + "loss": 17.2059, + "step": 10257 + }, + { + "epoch": 0.18750799714844535, + "grad_norm": 5.785054308131656, + "learning_rate": 9.363546095439026e-06, + "loss": 17.1083, + "step": 10258 + }, + { + "epoch": 0.18752627634489188, + "grad_norm": 6.479975436775092, + "learning_rate": 9.363401563021472e-06, + "loss": 17.6234, + "step": 10259 + }, + { + "epoch": 0.1875445555413384, + "grad_norm": 7.197517234570277, + "learning_rate": 9.363257015310572e-06, + "loss": 17.8155, + "step": 10260 + }, + { + "epoch": 0.18756283473778493, + "grad_norm": 6.108622891651722, + "learning_rate": 9.363112452306835e-06, + "loss": 17.2461, + "step": 10261 + }, + { + "epoch": 0.18758111393423146, + "grad_norm": 8.276899118951857, + "learning_rate": 9.362967874010768e-06, + "loss": 17.9235, + "step": 10262 + }, + { + "epoch": 0.18759939313067797, + "grad_norm": 6.631154795403799, + "learning_rate": 9.362823280422877e-06, + "loss": 17.6759, + "step": 10263 + }, + { + "epoch": 0.1876176723271245, + "grad_norm": 6.665722795146296, + "learning_rate": 9.362678671543668e-06, + "loss": 17.5608, + "step": 10264 + }, + { + "epoch": 0.18763595152357102, + "grad_norm": 7.2208338930364215, + "learning_rate": 9.36253404737365e-06, + "loss": 17.7458, + "step": 10265 + }, + { + "epoch": 0.18765423072001755, + "grad_norm": 8.015701508939888, + "learning_rate": 9.362389407913327e-06, + "loss": 18.339, + "step": 10266 + }, + { + "epoch": 0.18767250991646406, + "grad_norm": 7.048841773166798, + "learning_rate": 9.36224475316321e-06, + "loss": 17.832, + "step": 10267 + }, + { + "epoch": 0.1876907891129106, + "grad_norm": 6.466751494526317, + "learning_rate": 9.362100083123803e-06, + "loss": 17.7112, + "step": 10268 + }, + { + "epoch": 0.18770906830935713, + "grad_norm": 7.476181853417334, + "learning_rate": 9.361955397795613e-06, + "loss": 17.9526, + "step": 10269 + }, + { + "epoch": 0.18772734750580364, + "grad_norm": 7.011969367959185, + "learning_rate": 9.36181069717915e-06, + "loss": 17.746, + "step": 10270 + }, + { + "epoch": 0.18774562670225017, + "grad_norm": 6.297938662302559, + "learning_rate": 9.361665981274916e-06, + "loss": 17.3984, + "step": 10271 + }, + { + "epoch": 0.18776390589869668, + "grad_norm": 6.835725591588053, + "learning_rate": 9.361521250083422e-06, + "loss": 17.6402, + "step": 10272 + }, + { + "epoch": 0.18778218509514322, + "grad_norm": 6.118580864255345, + "learning_rate": 9.361376503605174e-06, + "loss": 17.5137, + "step": 10273 + }, + { + "epoch": 0.18780046429158975, + "grad_norm": 6.648036608556335, + "learning_rate": 9.361231741840684e-06, + "loss": 17.4866, + "step": 10274 + }, + { + "epoch": 0.18781874348803626, + "grad_norm": 6.4771008015554035, + "learning_rate": 9.361086964790452e-06, + "loss": 17.3811, + "step": 10275 + }, + { + "epoch": 0.1878370226844828, + "grad_norm": 6.243823701274997, + "learning_rate": 9.360942172454987e-06, + "loss": 17.1627, + "step": 10276 + }, + { + "epoch": 0.1878553018809293, + "grad_norm": 5.83650409867925, + "learning_rate": 9.360797364834799e-06, + "loss": 17.3612, + "step": 10277 + }, + { + "epoch": 0.18787358107737584, + "grad_norm": 8.524501459586926, + "learning_rate": 9.360652541930396e-06, + "loss": 18.3621, + "step": 10278 + }, + { + "epoch": 0.18789186027382238, + "grad_norm": 6.137853498680438, + "learning_rate": 9.360507703742285e-06, + "loss": 17.3574, + "step": 10279 + }, + { + "epoch": 0.18791013947026888, + "grad_norm": 7.107590055221519, + "learning_rate": 9.36036285027097e-06, + "loss": 17.6897, + "step": 10280 + }, + { + "epoch": 0.18792841866671542, + "grad_norm": 8.289986445811634, + "learning_rate": 9.360217981516963e-06, + "loss": 18.3256, + "step": 10281 + }, + { + "epoch": 0.18794669786316193, + "grad_norm": 8.307333242252078, + "learning_rate": 9.360073097480771e-06, + "loss": 17.9867, + "step": 10282 + }, + { + "epoch": 0.18796497705960846, + "grad_norm": 6.639776770178107, + "learning_rate": 9.3599281981629e-06, + "loss": 17.4129, + "step": 10283 + }, + { + "epoch": 0.18798325625605497, + "grad_norm": 9.511438673072446, + "learning_rate": 9.35978328356386e-06, + "loss": 18.4594, + "step": 10284 + }, + { + "epoch": 0.1880015354525015, + "grad_norm": 7.389362953957318, + "learning_rate": 9.359638353684157e-06, + "loss": 17.4465, + "step": 10285 + }, + { + "epoch": 0.18801981464894804, + "grad_norm": 7.843654781813417, + "learning_rate": 9.3594934085243e-06, + "loss": 18.0209, + "step": 10286 + }, + { + "epoch": 0.18803809384539455, + "grad_norm": 5.828374392464251, + "learning_rate": 9.359348448084798e-06, + "loss": 17.119, + "step": 10287 + }, + { + "epoch": 0.18805637304184109, + "grad_norm": 6.8428373551619055, + "learning_rate": 9.359203472366158e-06, + "loss": 17.4392, + "step": 10288 + }, + { + "epoch": 0.1880746522382876, + "grad_norm": 6.622676320783083, + "learning_rate": 9.359058481368888e-06, + "loss": 17.6567, + "step": 10289 + }, + { + "epoch": 0.18809293143473413, + "grad_norm": 6.338188570627085, + "learning_rate": 9.358913475093496e-06, + "loss": 17.4636, + "step": 10290 + }, + { + "epoch": 0.18811121063118066, + "grad_norm": 6.943641282726379, + "learning_rate": 9.35876845354049e-06, + "loss": 17.5162, + "step": 10291 + }, + { + "epoch": 0.18812948982762717, + "grad_norm": 7.508261262549054, + "learning_rate": 9.358623416710378e-06, + "loss": 18.2523, + "step": 10292 + }, + { + "epoch": 0.1881477690240737, + "grad_norm": 7.615613676014433, + "learning_rate": 9.35847836460367e-06, + "loss": 17.8776, + "step": 10293 + }, + { + "epoch": 0.18816604822052022, + "grad_norm": 7.960862017040444, + "learning_rate": 9.358333297220875e-06, + "loss": 18.1081, + "step": 10294 + }, + { + "epoch": 0.18818432741696675, + "grad_norm": 7.710056369955521, + "learning_rate": 9.358188214562499e-06, + "loss": 17.9136, + "step": 10295 + }, + { + "epoch": 0.1882026066134133, + "grad_norm": 7.86632430016447, + "learning_rate": 9.35804311662905e-06, + "loss": 18.3673, + "step": 10296 + }, + { + "epoch": 0.1882208858098598, + "grad_norm": 7.047120650918058, + "learning_rate": 9.35789800342104e-06, + "loss": 17.8213, + "step": 10297 + }, + { + "epoch": 0.18823916500630633, + "grad_norm": 8.508225806407097, + "learning_rate": 9.357752874938975e-06, + "loss": 18.6244, + "step": 10298 + }, + { + "epoch": 0.18825744420275284, + "grad_norm": 7.954864284670036, + "learning_rate": 9.357607731183362e-06, + "loss": 18.0167, + "step": 10299 + }, + { + "epoch": 0.18827572339919937, + "grad_norm": 7.305427095963071, + "learning_rate": 9.357462572154716e-06, + "loss": 17.9168, + "step": 10300 + }, + { + "epoch": 0.18829400259564588, + "grad_norm": 7.117544417631457, + "learning_rate": 9.35731739785354e-06, + "loss": 17.4382, + "step": 10301 + }, + { + "epoch": 0.18831228179209242, + "grad_norm": 7.269573153020997, + "learning_rate": 9.357172208280344e-06, + "loss": 17.7793, + "step": 10302 + }, + { + "epoch": 0.18833056098853895, + "grad_norm": 5.5781282396621865, + "learning_rate": 9.357027003435638e-06, + "loss": 17.145, + "step": 10303 + }, + { + "epoch": 0.18834884018498546, + "grad_norm": 8.244892639621913, + "learning_rate": 9.356881783319932e-06, + "loss": 17.9596, + "step": 10304 + }, + { + "epoch": 0.188367119381432, + "grad_norm": 7.491096000135977, + "learning_rate": 9.356736547933731e-06, + "loss": 17.7012, + "step": 10305 + }, + { + "epoch": 0.1883853985778785, + "grad_norm": 6.639930293503112, + "learning_rate": 9.356591297277548e-06, + "loss": 17.5532, + "step": 10306 + }, + { + "epoch": 0.18840367777432504, + "grad_norm": 7.712491671647756, + "learning_rate": 9.35644603135189e-06, + "loss": 17.6495, + "step": 10307 + }, + { + "epoch": 0.18842195697077158, + "grad_norm": 7.09722689162491, + "learning_rate": 9.356300750157266e-06, + "loss": 17.4289, + "step": 10308 + }, + { + "epoch": 0.18844023616721808, + "grad_norm": 7.023241890632782, + "learning_rate": 9.356155453694186e-06, + "loss": 17.7111, + "step": 10309 + }, + { + "epoch": 0.18845851536366462, + "grad_norm": 6.371635969895364, + "learning_rate": 9.356010141963161e-06, + "loss": 17.6677, + "step": 10310 + }, + { + "epoch": 0.18847679456011113, + "grad_norm": 6.038358992274468, + "learning_rate": 9.355864814964696e-06, + "loss": 17.4605, + "step": 10311 + }, + { + "epoch": 0.18849507375655766, + "grad_norm": 6.295758541209396, + "learning_rate": 9.355719472699306e-06, + "loss": 17.3981, + "step": 10312 + }, + { + "epoch": 0.1885133529530042, + "grad_norm": 7.163153660352481, + "learning_rate": 9.355574115167493e-06, + "loss": 17.5929, + "step": 10313 + }, + { + "epoch": 0.1885316321494507, + "grad_norm": 7.708420745067751, + "learning_rate": 9.355428742369774e-06, + "loss": 17.9523, + "step": 10314 + }, + { + "epoch": 0.18854991134589724, + "grad_norm": 7.916856030708843, + "learning_rate": 9.355283354306655e-06, + "loss": 18.0848, + "step": 10315 + }, + { + "epoch": 0.18856819054234375, + "grad_norm": 6.276171146796963, + "learning_rate": 9.355137950978644e-06, + "loss": 17.5003, + "step": 10316 + }, + { + "epoch": 0.18858646973879029, + "grad_norm": 5.716743447353856, + "learning_rate": 9.354992532386253e-06, + "loss": 17.2313, + "step": 10317 + }, + { + "epoch": 0.1886047489352368, + "grad_norm": 5.909377827740273, + "learning_rate": 9.35484709852999e-06, + "loss": 17.4874, + "step": 10318 + }, + { + "epoch": 0.18862302813168333, + "grad_norm": 7.926833432487396, + "learning_rate": 9.354701649410369e-06, + "loss": 18.0493, + "step": 10319 + }, + { + "epoch": 0.18864130732812986, + "grad_norm": 5.893369379778734, + "learning_rate": 9.354556185027894e-06, + "loss": 17.1941, + "step": 10320 + }, + { + "epoch": 0.18865958652457637, + "grad_norm": 6.925840564390966, + "learning_rate": 9.354410705383079e-06, + "loss": 17.9839, + "step": 10321 + }, + { + "epoch": 0.1886778657210229, + "grad_norm": 6.496801526721922, + "learning_rate": 9.354265210476432e-06, + "loss": 17.4445, + "step": 10322 + }, + { + "epoch": 0.18869614491746942, + "grad_norm": 7.164697364599269, + "learning_rate": 9.354119700308463e-06, + "loss": 17.9656, + "step": 10323 + }, + { + "epoch": 0.18871442411391595, + "grad_norm": 5.870553458649823, + "learning_rate": 9.353974174879684e-06, + "loss": 17.3856, + "step": 10324 + }, + { + "epoch": 0.1887327033103625, + "grad_norm": 6.473687299671137, + "learning_rate": 9.3538286341906e-06, + "loss": 17.3945, + "step": 10325 + }, + { + "epoch": 0.188750982506809, + "grad_norm": 6.800167833248619, + "learning_rate": 9.353683078241726e-06, + "loss": 17.5475, + "step": 10326 + }, + { + "epoch": 0.18876926170325553, + "grad_norm": 8.797610668770592, + "learning_rate": 9.35353750703357e-06, + "loss": 18.4934, + "step": 10327 + }, + { + "epoch": 0.18878754089970204, + "grad_norm": 6.9870955597911895, + "learning_rate": 9.353391920566643e-06, + "loss": 17.7022, + "step": 10328 + }, + { + "epoch": 0.18880582009614857, + "grad_norm": 7.91011523509433, + "learning_rate": 9.353246318841456e-06, + "loss": 17.9763, + "step": 10329 + }, + { + "epoch": 0.1888240992925951, + "grad_norm": 6.583339412986538, + "learning_rate": 9.353100701858517e-06, + "loss": 17.4026, + "step": 10330 + }, + { + "epoch": 0.18884237848904162, + "grad_norm": 7.0968858218294795, + "learning_rate": 9.35295506961834e-06, + "loss": 17.9342, + "step": 10331 + }, + { + "epoch": 0.18886065768548815, + "grad_norm": 7.034159509522916, + "learning_rate": 9.352809422121432e-06, + "loss": 17.4617, + "step": 10332 + }, + { + "epoch": 0.18887893688193466, + "grad_norm": 7.634212798384977, + "learning_rate": 9.352663759368303e-06, + "loss": 17.9708, + "step": 10333 + }, + { + "epoch": 0.1888972160783812, + "grad_norm": 7.91627024684817, + "learning_rate": 9.352518081359468e-06, + "loss": 18.0931, + "step": 10334 + }, + { + "epoch": 0.1889154952748277, + "grad_norm": 7.879522839290091, + "learning_rate": 9.352372388095435e-06, + "loss": 17.8632, + "step": 10335 + }, + { + "epoch": 0.18893377447127424, + "grad_norm": 5.875811968143654, + "learning_rate": 9.352226679576712e-06, + "loss": 17.447, + "step": 10336 + }, + { + "epoch": 0.18895205366772078, + "grad_norm": 6.97015228690603, + "learning_rate": 9.352080955803813e-06, + "loss": 17.7629, + "step": 10337 + }, + { + "epoch": 0.18897033286416728, + "grad_norm": 5.965939193317881, + "learning_rate": 9.351935216777248e-06, + "loss": 17.1888, + "step": 10338 + }, + { + "epoch": 0.18898861206061382, + "grad_norm": 7.103798476791659, + "learning_rate": 9.351789462497529e-06, + "loss": 17.7767, + "step": 10339 + }, + { + "epoch": 0.18900689125706033, + "grad_norm": 6.769213501368472, + "learning_rate": 9.351643692965164e-06, + "loss": 17.9347, + "step": 10340 + }, + { + "epoch": 0.18902517045350686, + "grad_norm": 7.488585050253277, + "learning_rate": 9.351497908180664e-06, + "loss": 17.8217, + "step": 10341 + }, + { + "epoch": 0.1890434496499534, + "grad_norm": 7.6060020362558545, + "learning_rate": 9.351352108144544e-06, + "loss": 18.2705, + "step": 10342 + }, + { + "epoch": 0.1890617288463999, + "grad_norm": 6.013510437455529, + "learning_rate": 9.351206292857312e-06, + "loss": 17.2332, + "step": 10343 + }, + { + "epoch": 0.18908000804284644, + "grad_norm": 7.556401584698262, + "learning_rate": 9.35106046231948e-06, + "loss": 18.0483, + "step": 10344 + }, + { + "epoch": 0.18909828723929295, + "grad_norm": 8.603068189092529, + "learning_rate": 9.350914616531557e-06, + "loss": 17.7321, + "step": 10345 + }, + { + "epoch": 0.18911656643573949, + "grad_norm": 13.481326330149399, + "learning_rate": 9.350768755494057e-06, + "loss": 17.8328, + "step": 10346 + }, + { + "epoch": 0.18913484563218602, + "grad_norm": 7.0546202021714945, + "learning_rate": 9.35062287920749e-06, + "loss": 17.8827, + "step": 10347 + }, + { + "epoch": 0.18915312482863253, + "grad_norm": 7.258429206863186, + "learning_rate": 9.350476987672367e-06, + "loss": 17.8897, + "step": 10348 + }, + { + "epoch": 0.18917140402507907, + "grad_norm": 6.951416055469755, + "learning_rate": 9.350331080889201e-06, + "loss": 18.0216, + "step": 10349 + }, + { + "epoch": 0.18918968322152557, + "grad_norm": 7.256919398246387, + "learning_rate": 9.3501851588585e-06, + "loss": 17.7268, + "step": 10350 + }, + { + "epoch": 0.1892079624179721, + "grad_norm": 6.828697846997594, + "learning_rate": 9.350039221580778e-06, + "loss": 17.7134, + "step": 10351 + }, + { + "epoch": 0.18922624161441862, + "grad_norm": 5.90178890801479, + "learning_rate": 9.349893269056547e-06, + "loss": 17.1515, + "step": 10352 + }, + { + "epoch": 0.18924452081086515, + "grad_norm": 6.496939715605593, + "learning_rate": 9.349747301286317e-06, + "loss": 17.6936, + "step": 10353 + }, + { + "epoch": 0.1892628000073117, + "grad_norm": 9.108829587225198, + "learning_rate": 9.349601318270601e-06, + "loss": 18.645, + "step": 10354 + }, + { + "epoch": 0.1892810792037582, + "grad_norm": 5.86049343307796, + "learning_rate": 9.349455320009907e-06, + "loss": 17.3097, + "step": 10355 + }, + { + "epoch": 0.18929935840020473, + "grad_norm": 6.701323732100626, + "learning_rate": 9.349309306504752e-06, + "loss": 17.5384, + "step": 10356 + }, + { + "epoch": 0.18931763759665124, + "grad_norm": 6.497133703269843, + "learning_rate": 9.349163277755646e-06, + "loss": 17.5846, + "step": 10357 + }, + { + "epoch": 0.18933591679309777, + "grad_norm": 6.982833709429088, + "learning_rate": 9.349017233763099e-06, + "loss": 17.5816, + "step": 10358 + }, + { + "epoch": 0.1893541959895443, + "grad_norm": 6.928286726453096, + "learning_rate": 9.348871174527622e-06, + "loss": 17.7773, + "step": 10359 + }, + { + "epoch": 0.18937247518599082, + "grad_norm": 5.814438366221879, + "learning_rate": 9.348725100049732e-06, + "loss": 17.2186, + "step": 10360 + }, + { + "epoch": 0.18939075438243735, + "grad_norm": 6.791611147286944, + "learning_rate": 9.348579010329938e-06, + "loss": 17.5189, + "step": 10361 + }, + { + "epoch": 0.18940903357888386, + "grad_norm": 7.006833346279452, + "learning_rate": 9.34843290536875e-06, + "loss": 17.6483, + "step": 10362 + }, + { + "epoch": 0.1894273127753304, + "grad_norm": 6.7005998859096225, + "learning_rate": 9.348286785166682e-06, + "loss": 17.5486, + "step": 10363 + }, + { + "epoch": 0.18944559197177693, + "grad_norm": 7.311681418110757, + "learning_rate": 9.348140649724246e-06, + "loss": 17.7656, + "step": 10364 + }, + { + "epoch": 0.18946387116822344, + "grad_norm": 5.209233208298461, + "learning_rate": 9.347994499041958e-06, + "loss": 16.818, + "step": 10365 + }, + { + "epoch": 0.18948215036466998, + "grad_norm": 8.244729645321057, + "learning_rate": 9.347848333120321e-06, + "loss": 18.3306, + "step": 10366 + }, + { + "epoch": 0.18950042956111648, + "grad_norm": 5.912515092024369, + "learning_rate": 9.347702151959856e-06, + "loss": 17.4306, + "step": 10367 + }, + { + "epoch": 0.18951870875756302, + "grad_norm": 7.409086396633225, + "learning_rate": 9.347555955561072e-06, + "loss": 17.9846, + "step": 10368 + }, + { + "epoch": 0.18953698795400953, + "grad_norm": 7.55162462688707, + "learning_rate": 9.347409743924483e-06, + "loss": 17.9045, + "step": 10369 + }, + { + "epoch": 0.18955526715045606, + "grad_norm": 6.335150015054112, + "learning_rate": 9.347263517050598e-06, + "loss": 17.3046, + "step": 10370 + }, + { + "epoch": 0.1895735463469026, + "grad_norm": 4.602767659201406, + "learning_rate": 9.347117274939933e-06, + "loss": 16.7154, + "step": 10371 + }, + { + "epoch": 0.1895918255433491, + "grad_norm": 6.774494471367734, + "learning_rate": 9.346971017592996e-06, + "loss": 17.6588, + "step": 10372 + }, + { + "epoch": 0.18961010473979564, + "grad_norm": 7.728532927150962, + "learning_rate": 9.346824745010306e-06, + "loss": 18.0744, + "step": 10373 + }, + { + "epoch": 0.18962838393624215, + "grad_norm": 5.58968017477524, + "learning_rate": 9.346678457192372e-06, + "loss": 17.0602, + "step": 10374 + }, + { + "epoch": 0.1896466631326887, + "grad_norm": 7.543744868813387, + "learning_rate": 9.346532154139707e-06, + "loss": 18.1805, + "step": 10375 + }, + { + "epoch": 0.18966494232913522, + "grad_norm": 7.632468889333532, + "learning_rate": 9.346385835852824e-06, + "loss": 17.1272, + "step": 10376 + }, + { + "epoch": 0.18968322152558173, + "grad_norm": 7.269525125102864, + "learning_rate": 9.346239502332234e-06, + "loss": 17.8056, + "step": 10377 + }, + { + "epoch": 0.18970150072202827, + "grad_norm": 7.960912859082299, + "learning_rate": 9.346093153578455e-06, + "loss": 18.0061, + "step": 10378 + }, + { + "epoch": 0.18971977991847477, + "grad_norm": 7.9004152781134795, + "learning_rate": 9.345946789591995e-06, + "loss": 18.1292, + "step": 10379 + }, + { + "epoch": 0.1897380591149213, + "grad_norm": 8.526333028061192, + "learning_rate": 9.345800410373366e-06, + "loss": 18.1318, + "step": 10380 + }, + { + "epoch": 0.18975633831136784, + "grad_norm": 6.8944897455067125, + "learning_rate": 9.345654015923088e-06, + "loss": 17.6491, + "step": 10381 + }, + { + "epoch": 0.18977461750781435, + "grad_norm": 7.406943808270386, + "learning_rate": 9.345507606241668e-06, + "loss": 17.9084, + "step": 10382 + }, + { + "epoch": 0.1897928967042609, + "grad_norm": 6.049216269328242, + "learning_rate": 9.34536118132962e-06, + "loss": 17.2285, + "step": 10383 + }, + { + "epoch": 0.1898111759007074, + "grad_norm": 7.43964672162737, + "learning_rate": 9.345214741187461e-06, + "loss": 17.5745, + "step": 10384 + }, + { + "epoch": 0.18982945509715393, + "grad_norm": 8.26994894520124, + "learning_rate": 9.345068285815698e-06, + "loss": 18.1613, + "step": 10385 + }, + { + "epoch": 0.18984773429360044, + "grad_norm": 7.707488960899155, + "learning_rate": 9.34492181521485e-06, + "loss": 17.9862, + "step": 10386 + }, + { + "epoch": 0.18986601349004698, + "grad_norm": 16.5411776967301, + "learning_rate": 9.344775329385427e-06, + "loss": 17.9865, + "step": 10387 + }, + { + "epoch": 0.1898842926864935, + "grad_norm": 5.718349447364829, + "learning_rate": 9.344628828327944e-06, + "loss": 17.2111, + "step": 10388 + }, + { + "epoch": 0.18990257188294002, + "grad_norm": 5.886715887936213, + "learning_rate": 9.344482312042914e-06, + "loss": 17.2884, + "step": 10389 + }, + { + "epoch": 0.18992085107938655, + "grad_norm": 6.4689961628351735, + "learning_rate": 9.34433578053085e-06, + "loss": 17.2763, + "step": 10390 + }, + { + "epoch": 0.18993913027583306, + "grad_norm": 5.2140418989877615, + "learning_rate": 9.344189233792265e-06, + "loss": 16.9948, + "step": 10391 + }, + { + "epoch": 0.1899574094722796, + "grad_norm": 6.900030581357269, + "learning_rate": 9.344042671827676e-06, + "loss": 17.7844, + "step": 10392 + }, + { + "epoch": 0.18997568866872613, + "grad_norm": 9.860719450425975, + "learning_rate": 9.343896094637593e-06, + "loss": 18.0996, + "step": 10393 + }, + { + "epoch": 0.18999396786517264, + "grad_norm": 6.861050770224629, + "learning_rate": 9.343749502222532e-06, + "loss": 17.8036, + "step": 10394 + }, + { + "epoch": 0.19001224706161918, + "grad_norm": 6.806301180607863, + "learning_rate": 9.343602894583004e-06, + "loss": 17.6822, + "step": 10395 + }, + { + "epoch": 0.19003052625806569, + "grad_norm": 6.882477940579044, + "learning_rate": 9.343456271719527e-06, + "loss": 17.7113, + "step": 10396 + }, + { + "epoch": 0.19004880545451222, + "grad_norm": 6.843754715150214, + "learning_rate": 9.34330963363261e-06, + "loss": 17.2852, + "step": 10397 + }, + { + "epoch": 0.19006708465095876, + "grad_norm": 7.827123227667387, + "learning_rate": 9.343162980322773e-06, + "loss": 18.1252, + "step": 10398 + }, + { + "epoch": 0.19008536384740526, + "grad_norm": 8.605165913765676, + "learning_rate": 9.343016311790525e-06, + "loss": 18.6341, + "step": 10399 + }, + { + "epoch": 0.1901036430438518, + "grad_norm": 6.029185270059626, + "learning_rate": 9.342869628036382e-06, + "loss": 17.426, + "step": 10400 + }, + { + "epoch": 0.1901219222402983, + "grad_norm": 6.334705243565285, + "learning_rate": 9.342722929060858e-06, + "loss": 17.5421, + "step": 10401 + }, + { + "epoch": 0.19014020143674484, + "grad_norm": 6.863610700531152, + "learning_rate": 9.342576214864466e-06, + "loss": 17.9516, + "step": 10402 + }, + { + "epoch": 0.19015848063319135, + "grad_norm": 6.793845212938461, + "learning_rate": 9.342429485447721e-06, + "loss": 17.5948, + "step": 10403 + }, + { + "epoch": 0.1901767598296379, + "grad_norm": 5.971066691459425, + "learning_rate": 9.342282740811139e-06, + "loss": 17.4468, + "step": 10404 + }, + { + "epoch": 0.19019503902608442, + "grad_norm": 8.698390597005583, + "learning_rate": 9.342135980955233e-06, + "loss": 18.2342, + "step": 10405 + }, + { + "epoch": 0.19021331822253093, + "grad_norm": 6.172372569569622, + "learning_rate": 9.341989205880516e-06, + "loss": 17.3087, + "step": 10406 + }, + { + "epoch": 0.19023159741897747, + "grad_norm": 5.496960166619962, + "learning_rate": 9.341842415587502e-06, + "loss": 17.0624, + "step": 10407 + }, + { + "epoch": 0.19024987661542397, + "grad_norm": 7.701462103673928, + "learning_rate": 9.34169561007671e-06, + "loss": 18.0715, + "step": 10408 + }, + { + "epoch": 0.1902681558118705, + "grad_norm": 7.671296168887727, + "learning_rate": 9.341548789348652e-06, + "loss": 17.813, + "step": 10409 + }, + { + "epoch": 0.19028643500831705, + "grad_norm": 5.881014951677098, + "learning_rate": 9.34140195340384e-06, + "loss": 17.299, + "step": 10410 + }, + { + "epoch": 0.19030471420476355, + "grad_norm": 8.036024617783314, + "learning_rate": 9.341255102242792e-06, + "loss": 18.0828, + "step": 10411 + }, + { + "epoch": 0.1903229934012101, + "grad_norm": 6.9130025699726065, + "learning_rate": 9.34110823586602e-06, + "loss": 17.6697, + "step": 10412 + }, + { + "epoch": 0.1903412725976566, + "grad_norm": 8.82180752045835, + "learning_rate": 9.340961354274043e-06, + "loss": 18.1533, + "step": 10413 + }, + { + "epoch": 0.19035955179410313, + "grad_norm": 5.968379644562009, + "learning_rate": 9.34081445746737e-06, + "loss": 17.1328, + "step": 10414 + }, + { + "epoch": 0.19037783099054967, + "grad_norm": 7.278498339936495, + "learning_rate": 9.340667545446522e-06, + "loss": 18.0188, + "step": 10415 + }, + { + "epoch": 0.19039611018699618, + "grad_norm": 6.124699532025761, + "learning_rate": 9.34052061821201e-06, + "loss": 17.4712, + "step": 10416 + }, + { + "epoch": 0.1904143893834427, + "grad_norm": 6.572271076368137, + "learning_rate": 9.34037367576435e-06, + "loss": 17.8327, + "step": 10417 + }, + { + "epoch": 0.19043266857988922, + "grad_norm": 5.6625490691218845, + "learning_rate": 9.340226718104057e-06, + "loss": 16.9799, + "step": 10418 + }, + { + "epoch": 0.19045094777633576, + "grad_norm": 8.470132525616112, + "learning_rate": 9.340079745231645e-06, + "loss": 17.9597, + "step": 10419 + }, + { + "epoch": 0.19046922697278226, + "grad_norm": 7.046150956276198, + "learning_rate": 9.33993275714763e-06, + "loss": 17.8574, + "step": 10420 + }, + { + "epoch": 0.1904875061692288, + "grad_norm": 6.966526694615226, + "learning_rate": 9.339785753852529e-06, + "loss": 17.6667, + "step": 10421 + }, + { + "epoch": 0.19050578536567533, + "grad_norm": 5.102008911949953, + "learning_rate": 9.339638735346854e-06, + "loss": 16.9083, + "step": 10422 + }, + { + "epoch": 0.19052406456212184, + "grad_norm": 6.537757982753672, + "learning_rate": 9.339491701631122e-06, + "loss": 17.4962, + "step": 10423 + }, + { + "epoch": 0.19054234375856838, + "grad_norm": 6.043523754574515, + "learning_rate": 9.339344652705848e-06, + "loss": 17.3628, + "step": 10424 + }, + { + "epoch": 0.19056062295501489, + "grad_norm": 8.846123908373793, + "learning_rate": 9.339197588571549e-06, + "loss": 18.4887, + "step": 10425 + }, + { + "epoch": 0.19057890215146142, + "grad_norm": 9.695608602744546, + "learning_rate": 9.339050509228737e-06, + "loss": 18.5418, + "step": 10426 + }, + { + "epoch": 0.19059718134790796, + "grad_norm": 6.77165990869322, + "learning_rate": 9.33890341467793e-06, + "loss": 17.6323, + "step": 10427 + }, + { + "epoch": 0.19061546054435446, + "grad_norm": 6.4706053546411395, + "learning_rate": 9.338756304919644e-06, + "loss": 17.5203, + "step": 10428 + }, + { + "epoch": 0.190633739740801, + "grad_norm": 7.2279891914592245, + "learning_rate": 9.338609179954393e-06, + "loss": 18.0453, + "step": 10429 + }, + { + "epoch": 0.1906520189372475, + "grad_norm": 5.717809797629613, + "learning_rate": 9.338462039782695e-06, + "loss": 17.0092, + "step": 10430 + }, + { + "epoch": 0.19067029813369404, + "grad_norm": 6.453387703051551, + "learning_rate": 9.33831488440506e-06, + "loss": 17.5693, + "step": 10431 + }, + { + "epoch": 0.19068857733014058, + "grad_norm": 6.286712840872121, + "learning_rate": 9.33816771382201e-06, + "loss": 17.3303, + "step": 10432 + }, + { + "epoch": 0.1907068565265871, + "grad_norm": 6.518167738374248, + "learning_rate": 9.33802052803406e-06, + "loss": 17.5911, + "step": 10433 + }, + { + "epoch": 0.19072513572303362, + "grad_norm": 6.043467175502499, + "learning_rate": 9.337873327041723e-06, + "loss": 17.274, + "step": 10434 + }, + { + "epoch": 0.19074341491948013, + "grad_norm": 7.354373411650934, + "learning_rate": 9.337726110845518e-06, + "loss": 17.6185, + "step": 10435 + }, + { + "epoch": 0.19076169411592667, + "grad_norm": 6.884420085630948, + "learning_rate": 9.337578879445957e-06, + "loss": 17.7226, + "step": 10436 + }, + { + "epoch": 0.19077997331237317, + "grad_norm": 7.995802622064922, + "learning_rate": 9.33743163284356e-06, + "loss": 18.1389, + "step": 10437 + }, + { + "epoch": 0.1907982525088197, + "grad_norm": 6.190516487906699, + "learning_rate": 9.337284371038841e-06, + "loss": 17.4509, + "step": 10438 + }, + { + "epoch": 0.19081653170526625, + "grad_norm": 7.035449624213116, + "learning_rate": 9.337137094032316e-06, + "loss": 17.7969, + "step": 10439 + }, + { + "epoch": 0.19083481090171275, + "grad_norm": 5.999930652722676, + "learning_rate": 9.336989801824504e-06, + "loss": 17.3565, + "step": 10440 + }, + { + "epoch": 0.1908530900981593, + "grad_norm": 7.1151169267855705, + "learning_rate": 9.336842494415916e-06, + "loss": 18.1344, + "step": 10441 + }, + { + "epoch": 0.1908713692946058, + "grad_norm": 6.458074269714437, + "learning_rate": 9.336695171807074e-06, + "loss": 17.5333, + "step": 10442 + }, + { + "epoch": 0.19088964849105233, + "grad_norm": 5.981033483435007, + "learning_rate": 9.33654783399849e-06, + "loss": 17.2124, + "step": 10443 + }, + { + "epoch": 0.19090792768749887, + "grad_norm": 6.505129219203311, + "learning_rate": 9.336400480990684e-06, + "loss": 17.423, + "step": 10444 + }, + { + "epoch": 0.19092620688394538, + "grad_norm": 6.410413952750364, + "learning_rate": 9.336253112784169e-06, + "loss": 17.5055, + "step": 10445 + }, + { + "epoch": 0.1909444860803919, + "grad_norm": 5.943410520611284, + "learning_rate": 9.336105729379463e-06, + "loss": 17.3018, + "step": 10446 + }, + { + "epoch": 0.19096276527683842, + "grad_norm": 6.662730581833728, + "learning_rate": 9.335958330777084e-06, + "loss": 17.4771, + "step": 10447 + }, + { + "epoch": 0.19098104447328496, + "grad_norm": 7.714959509225025, + "learning_rate": 9.335810916977547e-06, + "loss": 17.6309, + "step": 10448 + }, + { + "epoch": 0.1909993236697315, + "grad_norm": 7.267940668830873, + "learning_rate": 9.335663487981368e-06, + "loss": 17.585, + "step": 10449 + }, + { + "epoch": 0.191017602866178, + "grad_norm": 6.499461630992459, + "learning_rate": 9.335516043789065e-06, + "loss": 17.4037, + "step": 10450 + }, + { + "epoch": 0.19103588206262453, + "grad_norm": 7.661848776562771, + "learning_rate": 9.335368584401156e-06, + "loss": 18.0498, + "step": 10451 + }, + { + "epoch": 0.19105416125907104, + "grad_norm": 7.489698080064114, + "learning_rate": 9.335221109818154e-06, + "loss": 17.7291, + "step": 10452 + }, + { + "epoch": 0.19107244045551758, + "grad_norm": 6.139836763169081, + "learning_rate": 9.33507362004058e-06, + "loss": 17.4132, + "step": 10453 + }, + { + "epoch": 0.19109071965196409, + "grad_norm": 7.4440984180972904, + "learning_rate": 9.334926115068949e-06, + "loss": 17.8699, + "step": 10454 + }, + { + "epoch": 0.19110899884841062, + "grad_norm": 6.946981728420909, + "learning_rate": 9.334778594903777e-06, + "loss": 17.8496, + "step": 10455 + }, + { + "epoch": 0.19112727804485716, + "grad_norm": 7.214419541432831, + "learning_rate": 9.334631059545583e-06, + "loss": 17.9652, + "step": 10456 + }, + { + "epoch": 0.19114555724130367, + "grad_norm": 6.70901587454331, + "learning_rate": 9.334483508994883e-06, + "loss": 17.8415, + "step": 10457 + }, + { + "epoch": 0.1911638364377502, + "grad_norm": 8.014234392354286, + "learning_rate": 9.334335943252196e-06, + "loss": 18.1527, + "step": 10458 + }, + { + "epoch": 0.1911821156341967, + "grad_norm": 6.94485658188503, + "learning_rate": 9.334188362318035e-06, + "loss": 17.511, + "step": 10459 + }, + { + "epoch": 0.19120039483064324, + "grad_norm": 7.417707844606827, + "learning_rate": 9.33404076619292e-06, + "loss": 17.6998, + "step": 10460 + }, + { + "epoch": 0.19121867402708978, + "grad_norm": 7.657042394726569, + "learning_rate": 9.333893154877369e-06, + "loss": 18.0186, + "step": 10461 + }, + { + "epoch": 0.1912369532235363, + "grad_norm": 10.22892844201954, + "learning_rate": 9.3337455283719e-06, + "loss": 18.4338, + "step": 10462 + }, + { + "epoch": 0.19125523241998282, + "grad_norm": 7.238723278249536, + "learning_rate": 9.333597886677027e-06, + "loss": 17.7325, + "step": 10463 + }, + { + "epoch": 0.19127351161642933, + "grad_norm": 5.993710460992475, + "learning_rate": 9.33345022979327e-06, + "loss": 17.3588, + "step": 10464 + }, + { + "epoch": 0.19129179081287587, + "grad_norm": 6.340197372919521, + "learning_rate": 9.333302557721146e-06, + "loss": 17.7382, + "step": 10465 + }, + { + "epoch": 0.1913100700093224, + "grad_norm": 6.791912878405465, + "learning_rate": 9.333154870461174e-06, + "loss": 17.539, + "step": 10466 + }, + { + "epoch": 0.1913283492057689, + "grad_norm": 6.060523017852661, + "learning_rate": 9.333007168013868e-06, + "loss": 17.3509, + "step": 10467 + }, + { + "epoch": 0.19134662840221545, + "grad_norm": 7.2323114052940305, + "learning_rate": 9.33285945037975e-06, + "loss": 17.8316, + "step": 10468 + }, + { + "epoch": 0.19136490759866195, + "grad_norm": 6.52251643048427, + "learning_rate": 9.332711717559334e-06, + "loss": 17.3098, + "step": 10469 + }, + { + "epoch": 0.1913831867951085, + "grad_norm": 7.333715729836014, + "learning_rate": 9.33256396955314e-06, + "loss": 17.8015, + "step": 10470 + }, + { + "epoch": 0.191401465991555, + "grad_norm": 7.244729110021288, + "learning_rate": 9.332416206361686e-06, + "loss": 17.6741, + "step": 10471 + }, + { + "epoch": 0.19141974518800153, + "grad_norm": 7.336581266459314, + "learning_rate": 9.332268427985487e-06, + "loss": 17.6178, + "step": 10472 + }, + { + "epoch": 0.19143802438444807, + "grad_norm": 9.518093122476557, + "learning_rate": 9.332120634425067e-06, + "loss": 18.0936, + "step": 10473 + }, + { + "epoch": 0.19145630358089458, + "grad_norm": 5.7833375579983075, + "learning_rate": 9.331972825680935e-06, + "loss": 17.1807, + "step": 10474 + }, + { + "epoch": 0.1914745827773411, + "grad_norm": 6.422057620330305, + "learning_rate": 9.331825001753617e-06, + "loss": 17.5081, + "step": 10475 + }, + { + "epoch": 0.19149286197378762, + "grad_norm": 7.77793057493313, + "learning_rate": 9.331677162643629e-06, + "loss": 17.7322, + "step": 10476 + }, + { + "epoch": 0.19151114117023416, + "grad_norm": 6.446977267695607, + "learning_rate": 9.331529308351485e-06, + "loss": 17.4001, + "step": 10477 + }, + { + "epoch": 0.1915294203666807, + "grad_norm": 5.9778560695633125, + "learning_rate": 9.33138143887771e-06, + "loss": 17.3762, + "step": 10478 + }, + { + "epoch": 0.1915476995631272, + "grad_norm": 6.533211496809169, + "learning_rate": 9.331233554222819e-06, + "loss": 17.6646, + "step": 10479 + }, + { + "epoch": 0.19156597875957374, + "grad_norm": 5.958983266303413, + "learning_rate": 9.331085654387328e-06, + "loss": 17.2335, + "step": 10480 + }, + { + "epoch": 0.19158425795602024, + "grad_norm": 6.083401883455692, + "learning_rate": 9.33093773937176e-06, + "loss": 17.2894, + "step": 10481 + }, + { + "epoch": 0.19160253715246678, + "grad_norm": 6.732405095493683, + "learning_rate": 9.33078980917663e-06, + "loss": 17.529, + "step": 10482 + }, + { + "epoch": 0.19162081634891331, + "grad_norm": 7.588140581672176, + "learning_rate": 9.330641863802457e-06, + "loss": 17.6138, + "step": 10483 + }, + { + "epoch": 0.19163909554535982, + "grad_norm": 6.839579510099215, + "learning_rate": 9.33049390324976e-06, + "loss": 17.571, + "step": 10484 + }, + { + "epoch": 0.19165737474180636, + "grad_norm": 6.41343049608932, + "learning_rate": 9.330345927519057e-06, + "loss": 17.846, + "step": 10485 + }, + { + "epoch": 0.19167565393825287, + "grad_norm": 7.451348130479233, + "learning_rate": 9.33019793661087e-06, + "loss": 17.8147, + "step": 10486 + }, + { + "epoch": 0.1916939331346994, + "grad_norm": 5.82472880839712, + "learning_rate": 9.330049930525713e-06, + "loss": 17.2141, + "step": 10487 + }, + { + "epoch": 0.1917122123311459, + "grad_norm": 5.445147209150908, + "learning_rate": 9.329901909264107e-06, + "loss": 16.9596, + "step": 10488 + }, + { + "epoch": 0.19173049152759244, + "grad_norm": 6.562132849224037, + "learning_rate": 9.32975387282657e-06, + "loss": 17.6303, + "step": 10489 + }, + { + "epoch": 0.19174877072403898, + "grad_norm": 6.901108693152038, + "learning_rate": 9.329605821213623e-06, + "loss": 17.7932, + "step": 10490 + }, + { + "epoch": 0.1917670499204855, + "grad_norm": 6.675714856643467, + "learning_rate": 9.329457754425782e-06, + "loss": 17.564, + "step": 10491 + }, + { + "epoch": 0.19178532911693202, + "grad_norm": 7.517726578104453, + "learning_rate": 9.329309672463567e-06, + "loss": 18.0812, + "step": 10492 + }, + { + "epoch": 0.19180360831337853, + "grad_norm": 7.3784629388301255, + "learning_rate": 9.329161575327499e-06, + "loss": 17.8641, + "step": 10493 + }, + { + "epoch": 0.19182188750982507, + "grad_norm": 6.862251404933019, + "learning_rate": 9.329013463018093e-06, + "loss": 17.6595, + "step": 10494 + }, + { + "epoch": 0.1918401667062716, + "grad_norm": 6.260407870718441, + "learning_rate": 9.328865335535872e-06, + "loss": 17.5309, + "step": 10495 + }, + { + "epoch": 0.1918584459027181, + "grad_norm": 5.439647176748364, + "learning_rate": 9.328717192881353e-06, + "loss": 17.1329, + "step": 10496 + }, + { + "epoch": 0.19187672509916465, + "grad_norm": 6.668790465111025, + "learning_rate": 9.328569035055058e-06, + "loss": 17.8241, + "step": 10497 + }, + { + "epoch": 0.19189500429561115, + "grad_norm": 6.47700749270235, + "learning_rate": 9.3284208620575e-06, + "loss": 17.7274, + "step": 10498 + }, + { + "epoch": 0.1919132834920577, + "grad_norm": 7.3122825475634485, + "learning_rate": 9.328272673889206e-06, + "loss": 17.7026, + "step": 10499 + }, + { + "epoch": 0.19193156268850423, + "grad_norm": 7.002894709055904, + "learning_rate": 9.32812447055069e-06, + "loss": 17.5361, + "step": 10500 + }, + { + "epoch": 0.19194984188495073, + "grad_norm": 8.460734664670872, + "learning_rate": 9.327976252042474e-06, + "loss": 18.4211, + "step": 10501 + }, + { + "epoch": 0.19196812108139727, + "grad_norm": 6.835754991854399, + "learning_rate": 9.327828018365078e-06, + "loss": 17.9194, + "step": 10502 + }, + { + "epoch": 0.19198640027784378, + "grad_norm": 6.5207677165144355, + "learning_rate": 9.327679769519017e-06, + "loss": 17.5511, + "step": 10503 + }, + { + "epoch": 0.1920046794742903, + "grad_norm": 6.307911366992025, + "learning_rate": 9.327531505504818e-06, + "loss": 17.3657, + "step": 10504 + }, + { + "epoch": 0.19202295867073682, + "grad_norm": 5.608000385527272, + "learning_rate": 9.327383226322995e-06, + "loss": 17.2065, + "step": 10505 + }, + { + "epoch": 0.19204123786718336, + "grad_norm": 7.304296844315352, + "learning_rate": 9.327234931974068e-06, + "loss": 17.6759, + "step": 10506 + }, + { + "epoch": 0.1920595170636299, + "grad_norm": 7.672875311084413, + "learning_rate": 9.327086622458559e-06, + "loss": 18.0848, + "step": 10507 + }, + { + "epoch": 0.1920777962600764, + "grad_norm": 7.913357073006939, + "learning_rate": 9.326938297776987e-06, + "loss": 17.7198, + "step": 10508 + }, + { + "epoch": 0.19209607545652294, + "grad_norm": 6.333736124268162, + "learning_rate": 9.326789957929872e-06, + "loss": 17.3474, + "step": 10509 + }, + { + "epoch": 0.19211435465296944, + "grad_norm": 6.905903139041645, + "learning_rate": 9.326641602917734e-06, + "loss": 17.6535, + "step": 10510 + }, + { + "epoch": 0.19213263384941598, + "grad_norm": 6.520776450934488, + "learning_rate": 9.326493232741092e-06, + "loss": 17.5217, + "step": 10511 + }, + { + "epoch": 0.19215091304586251, + "grad_norm": 7.317714389966107, + "learning_rate": 9.326344847400466e-06, + "loss": 17.874, + "step": 10512 + }, + { + "epoch": 0.19216919224230902, + "grad_norm": 6.8801757926825875, + "learning_rate": 9.326196446896377e-06, + "loss": 17.6736, + "step": 10513 + }, + { + "epoch": 0.19218747143875556, + "grad_norm": 5.998845084749652, + "learning_rate": 9.326048031229346e-06, + "loss": 17.5381, + "step": 10514 + }, + { + "epoch": 0.19220575063520207, + "grad_norm": 7.2534001313545335, + "learning_rate": 9.32589960039989e-06, + "loss": 17.9485, + "step": 10515 + }, + { + "epoch": 0.1922240298316486, + "grad_norm": 6.1874438901416156, + "learning_rate": 9.325751154408534e-06, + "loss": 17.2185, + "step": 10516 + }, + { + "epoch": 0.19224230902809514, + "grad_norm": 7.784618928324922, + "learning_rate": 9.325602693255793e-06, + "loss": 18.0031, + "step": 10517 + }, + { + "epoch": 0.19226058822454165, + "grad_norm": 5.117871695439608, + "learning_rate": 9.325454216942192e-06, + "loss": 16.8991, + "step": 10518 + }, + { + "epoch": 0.19227886742098818, + "grad_norm": 6.289985694303816, + "learning_rate": 9.325305725468248e-06, + "loss": 17.4479, + "step": 10519 + }, + { + "epoch": 0.1922971466174347, + "grad_norm": 7.29537081876363, + "learning_rate": 9.325157218834481e-06, + "loss": 18.2189, + "step": 10520 + }, + { + "epoch": 0.19231542581388122, + "grad_norm": 6.799227527051088, + "learning_rate": 9.325008697041418e-06, + "loss": 17.4002, + "step": 10521 + }, + { + "epoch": 0.19233370501032773, + "grad_norm": 8.351520976907086, + "learning_rate": 9.324860160089571e-06, + "loss": 17.766, + "step": 10522 + }, + { + "epoch": 0.19235198420677427, + "grad_norm": 7.0551663389754555, + "learning_rate": 9.324711607979466e-06, + "loss": 17.8016, + "step": 10523 + }, + { + "epoch": 0.1923702634032208, + "grad_norm": 6.490942895032392, + "learning_rate": 9.324563040711621e-06, + "loss": 17.4714, + "step": 10524 + }, + { + "epoch": 0.1923885425996673, + "grad_norm": 7.953588425719637, + "learning_rate": 9.32441445828656e-06, + "loss": 18.4641, + "step": 10525 + }, + { + "epoch": 0.19240682179611385, + "grad_norm": 5.234308943229955, + "learning_rate": 9.3242658607048e-06, + "loss": 16.9446, + "step": 10526 + }, + { + "epoch": 0.19242510099256036, + "grad_norm": 7.309660964790849, + "learning_rate": 9.324117247966863e-06, + "loss": 17.6846, + "step": 10527 + }, + { + "epoch": 0.1924433801890069, + "grad_norm": 7.29274034818036, + "learning_rate": 9.323968620073271e-06, + "loss": 18.0063, + "step": 10528 + }, + { + "epoch": 0.19246165938545343, + "grad_norm": 7.037351986652799, + "learning_rate": 9.323819977024545e-06, + "loss": 17.624, + "step": 10529 + }, + { + "epoch": 0.19247993858189993, + "grad_norm": 7.479671712839552, + "learning_rate": 9.323671318821203e-06, + "loss": 17.892, + "step": 10530 + }, + { + "epoch": 0.19249821777834647, + "grad_norm": 9.330847404690038, + "learning_rate": 9.32352264546377e-06, + "loss": 18.7533, + "step": 10531 + }, + { + "epoch": 0.19251649697479298, + "grad_norm": 6.156017588113379, + "learning_rate": 9.323373956952764e-06, + "loss": 17.4061, + "step": 10532 + }, + { + "epoch": 0.1925347761712395, + "grad_norm": 6.484326782442109, + "learning_rate": 9.323225253288709e-06, + "loss": 17.5862, + "step": 10533 + }, + { + "epoch": 0.19255305536768605, + "grad_norm": 7.395482460822145, + "learning_rate": 9.323076534472123e-06, + "loss": 18.0998, + "step": 10534 + }, + { + "epoch": 0.19257133456413256, + "grad_norm": 7.022486083857744, + "learning_rate": 9.322927800503529e-06, + "loss": 17.7172, + "step": 10535 + }, + { + "epoch": 0.1925896137605791, + "grad_norm": 6.440153027775848, + "learning_rate": 9.32277905138345e-06, + "loss": 17.2067, + "step": 10536 + }, + { + "epoch": 0.1926078929570256, + "grad_norm": 6.646389189065471, + "learning_rate": 9.322630287112404e-06, + "loss": 17.7001, + "step": 10537 + }, + { + "epoch": 0.19262617215347214, + "grad_norm": 6.233991271662693, + "learning_rate": 9.322481507690916e-06, + "loss": 17.4111, + "step": 10538 + }, + { + "epoch": 0.19264445134991864, + "grad_norm": 7.583188301422039, + "learning_rate": 9.322332713119501e-06, + "loss": 18.0243, + "step": 10539 + }, + { + "epoch": 0.19266273054636518, + "grad_norm": 6.985848731048602, + "learning_rate": 9.322183903398689e-06, + "loss": 17.4681, + "step": 10540 + }, + { + "epoch": 0.19268100974281172, + "grad_norm": 6.944841568800823, + "learning_rate": 9.322035078528996e-06, + "loss": 17.6833, + "step": 10541 + }, + { + "epoch": 0.19269928893925822, + "grad_norm": 6.769110657349653, + "learning_rate": 9.321886238510945e-06, + "loss": 18.0905, + "step": 10542 + }, + { + "epoch": 0.19271756813570476, + "grad_norm": 6.175205785648062, + "learning_rate": 9.321737383345059e-06, + "loss": 17.5036, + "step": 10543 + }, + { + "epoch": 0.19273584733215127, + "grad_norm": 7.39292483491582, + "learning_rate": 9.321588513031857e-06, + "loss": 18.1339, + "step": 10544 + }, + { + "epoch": 0.1927541265285978, + "grad_norm": 7.62593299399902, + "learning_rate": 9.321439627571863e-06, + "loss": 17.6942, + "step": 10545 + }, + { + "epoch": 0.19277240572504434, + "grad_norm": 7.852047992226749, + "learning_rate": 9.321290726965598e-06, + "loss": 18.4223, + "step": 10546 + }, + { + "epoch": 0.19279068492149085, + "grad_norm": 6.79943435019734, + "learning_rate": 9.321141811213582e-06, + "loss": 17.6777, + "step": 10547 + }, + { + "epoch": 0.19280896411793738, + "grad_norm": 5.801317405164884, + "learning_rate": 9.320992880316342e-06, + "loss": 17.3783, + "step": 10548 + }, + { + "epoch": 0.1928272433143839, + "grad_norm": 11.13761053265287, + "learning_rate": 9.320843934274396e-06, + "loss": 18.3038, + "step": 10549 + }, + { + "epoch": 0.19284552251083042, + "grad_norm": 7.955616697808509, + "learning_rate": 9.320694973088267e-06, + "loss": 18.0079, + "step": 10550 + }, + { + "epoch": 0.19286380170727696, + "grad_norm": 8.347267151769406, + "learning_rate": 9.320545996758477e-06, + "loss": 18.0559, + "step": 10551 + }, + { + "epoch": 0.19288208090372347, + "grad_norm": 6.7086603689711115, + "learning_rate": 9.320397005285548e-06, + "loss": 17.5578, + "step": 10552 + }, + { + "epoch": 0.19290036010017, + "grad_norm": 6.701291688077603, + "learning_rate": 9.320247998670003e-06, + "loss": 17.3163, + "step": 10553 + }, + { + "epoch": 0.1929186392966165, + "grad_norm": 6.043578464403494, + "learning_rate": 9.320098976912362e-06, + "loss": 17.2764, + "step": 10554 + }, + { + "epoch": 0.19293691849306305, + "grad_norm": 6.42510153394605, + "learning_rate": 9.319949940013149e-06, + "loss": 17.6753, + "step": 10555 + }, + { + "epoch": 0.19295519768950956, + "grad_norm": 7.745886185222929, + "learning_rate": 9.319800887972887e-06, + "loss": 18.0879, + "step": 10556 + }, + { + "epoch": 0.1929734768859561, + "grad_norm": 7.468077190633721, + "learning_rate": 9.3196518207921e-06, + "loss": 17.7699, + "step": 10557 + }, + { + "epoch": 0.19299175608240263, + "grad_norm": 7.070802593273419, + "learning_rate": 9.319502738471304e-06, + "loss": 18.0692, + "step": 10558 + }, + { + "epoch": 0.19301003527884913, + "grad_norm": 6.909086042391009, + "learning_rate": 9.319353641011028e-06, + "loss": 17.9103, + "step": 10559 + }, + { + "epoch": 0.19302831447529567, + "grad_norm": 7.037366292756734, + "learning_rate": 9.319204528411794e-06, + "loss": 17.8153, + "step": 10560 + }, + { + "epoch": 0.19304659367174218, + "grad_norm": 7.930576037493707, + "learning_rate": 9.31905540067412e-06, + "loss": 18.2639, + "step": 10561 + }, + { + "epoch": 0.1930648728681887, + "grad_norm": 6.254364915771636, + "learning_rate": 9.318906257798533e-06, + "loss": 17.3226, + "step": 10562 + }, + { + "epoch": 0.19308315206463525, + "grad_norm": 7.822969907801438, + "learning_rate": 9.318757099785554e-06, + "loss": 18.1452, + "step": 10563 + }, + { + "epoch": 0.19310143126108176, + "grad_norm": 5.464234538796677, + "learning_rate": 9.318607926635708e-06, + "loss": 17.168, + "step": 10564 + }, + { + "epoch": 0.1931197104575283, + "grad_norm": 7.450394358946171, + "learning_rate": 9.318458738349514e-06, + "loss": 18.4264, + "step": 10565 + }, + { + "epoch": 0.1931379896539748, + "grad_norm": 6.567630052692855, + "learning_rate": 9.318309534927496e-06, + "loss": 17.5298, + "step": 10566 + }, + { + "epoch": 0.19315626885042134, + "grad_norm": 6.996251927452832, + "learning_rate": 9.31816031637018e-06, + "loss": 17.6233, + "step": 10567 + }, + { + "epoch": 0.19317454804686787, + "grad_norm": 7.19843337457532, + "learning_rate": 9.318011082678084e-06, + "loss": 17.7532, + "step": 10568 + }, + { + "epoch": 0.19319282724331438, + "grad_norm": 8.16279084086477, + "learning_rate": 9.317861833851737e-06, + "loss": 18.0928, + "step": 10569 + }, + { + "epoch": 0.19321110643976092, + "grad_norm": 6.8381809249343215, + "learning_rate": 9.317712569891656e-06, + "loss": 17.8484, + "step": 10570 + }, + { + "epoch": 0.19322938563620742, + "grad_norm": 6.215993573824501, + "learning_rate": 9.31756329079837e-06, + "loss": 17.2163, + "step": 10571 + }, + { + "epoch": 0.19324766483265396, + "grad_norm": 8.464180348844982, + "learning_rate": 9.317413996572398e-06, + "loss": 17.8885, + "step": 10572 + }, + { + "epoch": 0.19326594402910047, + "grad_norm": 5.687433583149414, + "learning_rate": 9.317264687214266e-06, + "loss": 17.083, + "step": 10573 + }, + { + "epoch": 0.193284223225547, + "grad_norm": 6.170371554072229, + "learning_rate": 9.317115362724492e-06, + "loss": 17.3101, + "step": 10574 + }, + { + "epoch": 0.19330250242199354, + "grad_norm": 6.302181705482549, + "learning_rate": 9.316966023103606e-06, + "loss": 17.6045, + "step": 10575 + }, + { + "epoch": 0.19332078161844005, + "grad_norm": 6.693383226821278, + "learning_rate": 9.316816668352129e-06, + "loss": 17.4768, + "step": 10576 + }, + { + "epoch": 0.19333906081488658, + "grad_norm": 6.738883879953232, + "learning_rate": 9.316667298470583e-06, + "loss": 17.6461, + "step": 10577 + }, + { + "epoch": 0.1933573400113331, + "grad_norm": 6.650902543790805, + "learning_rate": 9.316517913459495e-06, + "loss": 17.4843, + "step": 10578 + }, + { + "epoch": 0.19337561920777963, + "grad_norm": 5.945529606703904, + "learning_rate": 9.316368513319383e-06, + "loss": 17.3006, + "step": 10579 + }, + { + "epoch": 0.19339389840422616, + "grad_norm": 6.5654109763194315, + "learning_rate": 9.316219098050777e-06, + "loss": 17.4598, + "step": 10580 + }, + { + "epoch": 0.19341217760067267, + "grad_norm": 9.49438091067944, + "learning_rate": 9.316069667654196e-06, + "loss": 18.5121, + "step": 10581 + }, + { + "epoch": 0.1934304567971192, + "grad_norm": 7.18090129984022, + "learning_rate": 9.315920222130163e-06, + "loss": 17.6947, + "step": 10582 + }, + { + "epoch": 0.1934487359935657, + "grad_norm": 8.277954961005245, + "learning_rate": 9.315770761479209e-06, + "loss": 18.0253, + "step": 10583 + }, + { + "epoch": 0.19346701519001225, + "grad_norm": 9.693217376877973, + "learning_rate": 9.31562128570185e-06, + "loss": 18.3004, + "step": 10584 + }, + { + "epoch": 0.19348529438645878, + "grad_norm": 7.771114929456253, + "learning_rate": 9.315471794798614e-06, + "loss": 17.9545, + "step": 10585 + }, + { + "epoch": 0.1935035735829053, + "grad_norm": 6.674895642955014, + "learning_rate": 9.315322288770024e-06, + "loss": 17.8881, + "step": 10586 + }, + { + "epoch": 0.19352185277935183, + "grad_norm": 6.407049298848421, + "learning_rate": 9.315172767616602e-06, + "loss": 17.1966, + "step": 10587 + }, + { + "epoch": 0.19354013197579834, + "grad_norm": 6.797248276208662, + "learning_rate": 9.315023231338875e-06, + "loss": 17.5365, + "step": 10588 + }, + { + "epoch": 0.19355841117224487, + "grad_norm": 7.642996263755199, + "learning_rate": 9.314873679937366e-06, + "loss": 18.1229, + "step": 10589 + }, + { + "epoch": 0.19357669036869138, + "grad_norm": 6.3013328966463655, + "learning_rate": 9.314724113412599e-06, + "loss": 17.155, + "step": 10590 + }, + { + "epoch": 0.19359496956513791, + "grad_norm": 7.847098323725367, + "learning_rate": 9.3145745317651e-06, + "loss": 18.1772, + "step": 10591 + }, + { + "epoch": 0.19361324876158445, + "grad_norm": 7.028199647753349, + "learning_rate": 9.31442493499539e-06, + "loss": 17.6506, + "step": 10592 + }, + { + "epoch": 0.19363152795803096, + "grad_norm": 7.597175193959776, + "learning_rate": 9.314275323103994e-06, + "loss": 17.9633, + "step": 10593 + }, + { + "epoch": 0.1936498071544775, + "grad_norm": 7.568322940690469, + "learning_rate": 9.31412569609144e-06, + "loss": 18.1824, + "step": 10594 + }, + { + "epoch": 0.193668086350924, + "grad_norm": 7.226247408972656, + "learning_rate": 9.313976053958249e-06, + "loss": 17.7692, + "step": 10595 + }, + { + "epoch": 0.19368636554737054, + "grad_norm": 7.776126565740525, + "learning_rate": 9.313826396704945e-06, + "loss": 18.2208, + "step": 10596 + }, + { + "epoch": 0.19370464474381707, + "grad_norm": 6.3933084635983, + "learning_rate": 9.313676724332054e-06, + "loss": 17.4391, + "step": 10597 + }, + { + "epoch": 0.19372292394026358, + "grad_norm": 6.872603044415165, + "learning_rate": 9.313527036840103e-06, + "loss": 17.7786, + "step": 10598 + }, + { + "epoch": 0.19374120313671012, + "grad_norm": 6.527531981879228, + "learning_rate": 9.31337733422961e-06, + "loss": 17.4212, + "step": 10599 + }, + { + "epoch": 0.19375948233315662, + "grad_norm": 6.301659271412311, + "learning_rate": 9.313227616501106e-06, + "loss": 17.3434, + "step": 10600 + }, + { + "epoch": 0.19377776152960316, + "grad_norm": 7.712829624281709, + "learning_rate": 9.313077883655112e-06, + "loss": 18.0235, + "step": 10601 + }, + { + "epoch": 0.1937960407260497, + "grad_norm": 6.327685086474394, + "learning_rate": 9.312928135692156e-06, + "loss": 17.269, + "step": 10602 + }, + { + "epoch": 0.1938143199224962, + "grad_norm": 7.151033400204553, + "learning_rate": 9.312778372612761e-06, + "loss": 17.8822, + "step": 10603 + }, + { + "epoch": 0.19383259911894274, + "grad_norm": 5.65434615832553, + "learning_rate": 9.312628594417452e-06, + "loss": 16.9864, + "step": 10604 + }, + { + "epoch": 0.19385087831538925, + "grad_norm": 7.526213554422045, + "learning_rate": 9.312478801106754e-06, + "loss": 18.1226, + "step": 10605 + }, + { + "epoch": 0.19386915751183578, + "grad_norm": 7.449896069951539, + "learning_rate": 9.312328992681191e-06, + "loss": 18.0462, + "step": 10606 + }, + { + "epoch": 0.1938874367082823, + "grad_norm": 7.313314532439838, + "learning_rate": 9.312179169141292e-06, + "loss": 17.7867, + "step": 10607 + }, + { + "epoch": 0.19390571590472883, + "grad_norm": 7.535135216513795, + "learning_rate": 9.312029330487576e-06, + "loss": 17.8511, + "step": 10608 + }, + { + "epoch": 0.19392399510117536, + "grad_norm": 7.6273741504347985, + "learning_rate": 9.311879476720572e-06, + "loss": 18.0612, + "step": 10609 + }, + { + "epoch": 0.19394227429762187, + "grad_norm": 7.3073291992089615, + "learning_rate": 9.311729607840804e-06, + "loss": 17.8439, + "step": 10610 + }, + { + "epoch": 0.1939605534940684, + "grad_norm": 9.447661877803624, + "learning_rate": 9.3115797238488e-06, + "loss": 18.8587, + "step": 10611 + }, + { + "epoch": 0.1939788326905149, + "grad_norm": 6.56812485016018, + "learning_rate": 9.311429824745082e-06, + "loss": 17.6294, + "step": 10612 + }, + { + "epoch": 0.19399711188696145, + "grad_norm": 6.583498507950827, + "learning_rate": 9.311279910530177e-06, + "loss": 17.5195, + "step": 10613 + }, + { + "epoch": 0.19401539108340798, + "grad_norm": 6.368004010018813, + "learning_rate": 9.31112998120461e-06, + "loss": 17.4466, + "step": 10614 + }, + { + "epoch": 0.1940336702798545, + "grad_norm": 6.474764642472075, + "learning_rate": 9.310980036768905e-06, + "loss": 17.5982, + "step": 10615 + }, + { + "epoch": 0.19405194947630103, + "grad_norm": 6.380601550493955, + "learning_rate": 9.310830077223592e-06, + "loss": 17.4736, + "step": 10616 + }, + { + "epoch": 0.19407022867274754, + "grad_norm": 7.878257561781664, + "learning_rate": 9.310680102569192e-06, + "loss": 17.8663, + "step": 10617 + }, + { + "epoch": 0.19408850786919407, + "grad_norm": 7.34069814682072, + "learning_rate": 9.310530112806232e-06, + "loss": 17.8274, + "step": 10618 + }, + { + "epoch": 0.1941067870656406, + "grad_norm": 8.218669835162492, + "learning_rate": 9.310380107935238e-06, + "loss": 17.9432, + "step": 10619 + }, + { + "epoch": 0.19412506626208711, + "grad_norm": 7.3994748891972675, + "learning_rate": 9.310230087956736e-06, + "loss": 18.1808, + "step": 10620 + }, + { + "epoch": 0.19414334545853365, + "grad_norm": 5.658178055925301, + "learning_rate": 9.310080052871252e-06, + "loss": 17.1895, + "step": 10621 + }, + { + "epoch": 0.19416162465498016, + "grad_norm": 9.654160263039296, + "learning_rate": 9.30993000267931e-06, + "loss": 18.5662, + "step": 10622 + }, + { + "epoch": 0.1941799038514267, + "grad_norm": 6.784352251926139, + "learning_rate": 9.30977993738144e-06, + "loss": 17.7657, + "step": 10623 + }, + { + "epoch": 0.1941981830478732, + "grad_norm": 7.395272450714351, + "learning_rate": 9.309629856978163e-06, + "loss": 17.5341, + "step": 10624 + }, + { + "epoch": 0.19421646224431974, + "grad_norm": 6.801140426252479, + "learning_rate": 9.309479761470008e-06, + "loss": 17.6007, + "step": 10625 + }, + { + "epoch": 0.19423474144076627, + "grad_norm": 6.93117260309201, + "learning_rate": 9.309329650857501e-06, + "loss": 17.8933, + "step": 10626 + }, + { + "epoch": 0.19425302063721278, + "grad_norm": 9.095551821382614, + "learning_rate": 9.309179525141167e-06, + "loss": 18.7848, + "step": 10627 + }, + { + "epoch": 0.19427129983365932, + "grad_norm": 8.740240869200871, + "learning_rate": 9.309029384321533e-06, + "loss": 18.1465, + "step": 10628 + }, + { + "epoch": 0.19428957903010582, + "grad_norm": 6.321263675952195, + "learning_rate": 9.308879228399123e-06, + "loss": 17.6637, + "step": 10629 + }, + { + "epoch": 0.19430785822655236, + "grad_norm": 7.488813766017334, + "learning_rate": 9.308729057374468e-06, + "loss": 18.1155, + "step": 10630 + }, + { + "epoch": 0.1943261374229989, + "grad_norm": 6.80232486213669, + "learning_rate": 9.30857887124809e-06, + "loss": 17.3481, + "step": 10631 + }, + { + "epoch": 0.1943444166194454, + "grad_norm": 6.272396843005326, + "learning_rate": 9.308428670020517e-06, + "loss": 17.3226, + "step": 10632 + }, + { + "epoch": 0.19436269581589194, + "grad_norm": 11.094605897917983, + "learning_rate": 9.308278453692277e-06, + "loss": 18.3589, + "step": 10633 + }, + { + "epoch": 0.19438097501233845, + "grad_norm": 9.03773670620496, + "learning_rate": 9.308128222263893e-06, + "loss": 18.6966, + "step": 10634 + }, + { + "epoch": 0.19439925420878498, + "grad_norm": 6.290513129643883, + "learning_rate": 9.307977975735894e-06, + "loss": 17.4824, + "step": 10635 + }, + { + "epoch": 0.19441753340523152, + "grad_norm": 6.140177837419432, + "learning_rate": 9.307827714108805e-06, + "loss": 17.5921, + "step": 10636 + }, + { + "epoch": 0.19443581260167803, + "grad_norm": 6.344802791878528, + "learning_rate": 9.307677437383156e-06, + "loss": 17.4377, + "step": 10637 + }, + { + "epoch": 0.19445409179812456, + "grad_norm": 7.6818635390571, + "learning_rate": 9.30752714555947e-06, + "loss": 17.8808, + "step": 10638 + }, + { + "epoch": 0.19447237099457107, + "grad_norm": 7.467567891909981, + "learning_rate": 9.307376838638274e-06, + "loss": 17.9963, + "step": 10639 + }, + { + "epoch": 0.1944906501910176, + "grad_norm": 7.601758098061826, + "learning_rate": 9.307226516620096e-06, + "loss": 18.4309, + "step": 10640 + }, + { + "epoch": 0.1945089293874641, + "grad_norm": 6.949781637654198, + "learning_rate": 9.307076179505466e-06, + "loss": 17.7668, + "step": 10641 + }, + { + "epoch": 0.19452720858391065, + "grad_norm": 6.344390623967537, + "learning_rate": 9.306925827294905e-06, + "loss": 17.6243, + "step": 10642 + }, + { + "epoch": 0.19454548778035718, + "grad_norm": 6.121671076286463, + "learning_rate": 9.306775459988944e-06, + "loss": 17.3048, + "step": 10643 + }, + { + "epoch": 0.1945637669768037, + "grad_norm": 6.634782358505514, + "learning_rate": 9.306625077588108e-06, + "loss": 17.1535, + "step": 10644 + }, + { + "epoch": 0.19458204617325023, + "grad_norm": 7.101410142798328, + "learning_rate": 9.306474680092925e-06, + "loss": 17.5748, + "step": 10645 + }, + { + "epoch": 0.19460032536969674, + "grad_norm": 7.109025070919761, + "learning_rate": 9.306324267503921e-06, + "loss": 17.6979, + "step": 10646 + }, + { + "epoch": 0.19461860456614327, + "grad_norm": 7.028477675482064, + "learning_rate": 9.306173839821626e-06, + "loss": 17.587, + "step": 10647 + }, + { + "epoch": 0.1946368837625898, + "grad_norm": 6.6055027926956145, + "learning_rate": 9.306023397046564e-06, + "loss": 17.51, + "step": 10648 + }, + { + "epoch": 0.19465516295903632, + "grad_norm": 8.327068170217741, + "learning_rate": 9.305872939179263e-06, + "loss": 18.2495, + "step": 10649 + }, + { + "epoch": 0.19467344215548285, + "grad_norm": 6.141988982849482, + "learning_rate": 9.305722466220253e-06, + "loss": 17.316, + "step": 10650 + }, + { + "epoch": 0.19469172135192936, + "grad_norm": 5.932637633055441, + "learning_rate": 9.305571978170058e-06, + "loss": 17.3231, + "step": 10651 + }, + { + "epoch": 0.1947100005483759, + "grad_norm": 7.503772590917745, + "learning_rate": 9.305421475029209e-06, + "loss": 17.8163, + "step": 10652 + }, + { + "epoch": 0.19472827974482243, + "grad_norm": 6.695506480582686, + "learning_rate": 9.30527095679823e-06, + "loss": 17.4673, + "step": 10653 + }, + { + "epoch": 0.19474655894126894, + "grad_norm": 6.662264358726758, + "learning_rate": 9.305120423477647e-06, + "loss": 17.9, + "step": 10654 + }, + { + "epoch": 0.19476483813771547, + "grad_norm": 6.896771157978884, + "learning_rate": 9.304969875067995e-06, + "loss": 17.7757, + "step": 10655 + }, + { + "epoch": 0.19478311733416198, + "grad_norm": 6.460014333683582, + "learning_rate": 9.304819311569794e-06, + "loss": 17.4464, + "step": 10656 + }, + { + "epoch": 0.19480139653060852, + "grad_norm": 6.586314892185982, + "learning_rate": 9.304668732983576e-06, + "loss": 17.4563, + "step": 10657 + }, + { + "epoch": 0.19481967572705503, + "grad_norm": 7.077086928979827, + "learning_rate": 9.304518139309869e-06, + "loss": 17.83, + "step": 10658 + }, + { + "epoch": 0.19483795492350156, + "grad_norm": 6.568784637257991, + "learning_rate": 9.304367530549197e-06, + "loss": 17.7367, + "step": 10659 + }, + { + "epoch": 0.1948562341199481, + "grad_norm": 8.723658430859082, + "learning_rate": 9.304216906702092e-06, + "loss": 17.9172, + "step": 10660 + }, + { + "epoch": 0.1948745133163946, + "grad_norm": 7.5472534182362745, + "learning_rate": 9.304066267769078e-06, + "loss": 17.334, + "step": 10661 + }, + { + "epoch": 0.19489279251284114, + "grad_norm": 7.6130460575294965, + "learning_rate": 9.303915613750689e-06, + "loss": 17.8953, + "step": 10662 + }, + { + "epoch": 0.19491107170928765, + "grad_norm": 6.632862241041892, + "learning_rate": 9.303764944647447e-06, + "loss": 17.3226, + "step": 10663 + }, + { + "epoch": 0.19492935090573418, + "grad_norm": 6.255270723640296, + "learning_rate": 9.303614260459882e-06, + "loss": 17.2681, + "step": 10664 + }, + { + "epoch": 0.19494763010218072, + "grad_norm": 6.770546445817632, + "learning_rate": 9.303463561188522e-06, + "loss": 17.4723, + "step": 10665 + }, + { + "epoch": 0.19496590929862723, + "grad_norm": 6.307885423130489, + "learning_rate": 9.303312846833897e-06, + "loss": 17.2604, + "step": 10666 + }, + { + "epoch": 0.19498418849507376, + "grad_norm": 6.301987745633895, + "learning_rate": 9.303162117396534e-06, + "loss": 17.5328, + "step": 10667 + }, + { + "epoch": 0.19500246769152027, + "grad_norm": 5.773298416189256, + "learning_rate": 9.30301137287696e-06, + "loss": 17.0753, + "step": 10668 + }, + { + "epoch": 0.1950207468879668, + "grad_norm": 8.46297537772062, + "learning_rate": 9.302860613275705e-06, + "loss": 18.3674, + "step": 10669 + }, + { + "epoch": 0.19503902608441334, + "grad_norm": 6.870016457950988, + "learning_rate": 9.302709838593299e-06, + "loss": 17.4719, + "step": 10670 + }, + { + "epoch": 0.19505730528085985, + "grad_norm": 6.93741752124257, + "learning_rate": 9.302559048830266e-06, + "loss": 17.7941, + "step": 10671 + }, + { + "epoch": 0.19507558447730639, + "grad_norm": 6.897580639313483, + "learning_rate": 9.302408243987137e-06, + "loss": 17.8397, + "step": 10672 + }, + { + "epoch": 0.1950938636737529, + "grad_norm": 7.6283759654674625, + "learning_rate": 9.302257424064441e-06, + "loss": 17.693, + "step": 10673 + }, + { + "epoch": 0.19511214287019943, + "grad_norm": 9.701676756354317, + "learning_rate": 9.302106589062705e-06, + "loss": 18.6908, + "step": 10674 + }, + { + "epoch": 0.19513042206664594, + "grad_norm": 6.684327878630932, + "learning_rate": 9.30195573898246e-06, + "loss": 17.3524, + "step": 10675 + }, + { + "epoch": 0.19514870126309247, + "grad_norm": 8.020973963191935, + "learning_rate": 9.301804873824234e-06, + "loss": 17.8621, + "step": 10676 + }, + { + "epoch": 0.195166980459539, + "grad_norm": 5.992558588825945, + "learning_rate": 9.301653993588554e-06, + "loss": 17.4288, + "step": 10677 + }, + { + "epoch": 0.19518525965598552, + "grad_norm": 6.253317221883683, + "learning_rate": 9.30150309827595e-06, + "loss": 17.5206, + "step": 10678 + }, + { + "epoch": 0.19520353885243205, + "grad_norm": 6.709397417090899, + "learning_rate": 9.301352187886952e-06, + "loss": 17.6034, + "step": 10679 + }, + { + "epoch": 0.19522181804887856, + "grad_norm": 6.5657654728391535, + "learning_rate": 9.301201262422086e-06, + "loss": 17.66, + "step": 10680 + }, + { + "epoch": 0.1952400972453251, + "grad_norm": 8.283829826030313, + "learning_rate": 9.301050321881884e-06, + "loss": 18.0831, + "step": 10681 + }, + { + "epoch": 0.19525837644177163, + "grad_norm": 7.298913795471017, + "learning_rate": 9.300899366266875e-06, + "loss": 17.2761, + "step": 10682 + }, + { + "epoch": 0.19527665563821814, + "grad_norm": 9.970929517780979, + "learning_rate": 9.300748395577585e-06, + "loss": 18.5915, + "step": 10683 + }, + { + "epoch": 0.19529493483466467, + "grad_norm": 7.506747819785145, + "learning_rate": 9.300597409814546e-06, + "loss": 17.8649, + "step": 10684 + }, + { + "epoch": 0.19531321403111118, + "grad_norm": 7.590362488356904, + "learning_rate": 9.300446408978285e-06, + "loss": 17.9497, + "step": 10685 + }, + { + "epoch": 0.19533149322755772, + "grad_norm": 7.031810394509858, + "learning_rate": 9.300295393069333e-06, + "loss": 17.7331, + "step": 10686 + }, + { + "epoch": 0.19534977242400425, + "grad_norm": 6.029242218241611, + "learning_rate": 9.300144362088221e-06, + "loss": 17.5721, + "step": 10687 + }, + { + "epoch": 0.19536805162045076, + "grad_norm": 5.882413409238314, + "learning_rate": 9.299993316035474e-06, + "loss": 17.5585, + "step": 10688 + }, + { + "epoch": 0.1953863308168973, + "grad_norm": 6.850432216812345, + "learning_rate": 9.299842254911625e-06, + "loss": 17.7273, + "step": 10689 + }, + { + "epoch": 0.1954046100133438, + "grad_norm": 5.193430594848009, + "learning_rate": 9.2996911787172e-06, + "loss": 17.0182, + "step": 10690 + }, + { + "epoch": 0.19542288920979034, + "grad_norm": 6.219183428513139, + "learning_rate": 9.299540087452732e-06, + "loss": 17.4285, + "step": 10691 + }, + { + "epoch": 0.19544116840623685, + "grad_norm": 6.628865620959073, + "learning_rate": 9.299388981118749e-06, + "loss": 17.6518, + "step": 10692 + }, + { + "epoch": 0.19545944760268338, + "grad_norm": 6.177767982169759, + "learning_rate": 9.29923785971578e-06, + "loss": 17.3978, + "step": 10693 + }, + { + "epoch": 0.19547772679912992, + "grad_norm": 7.3571905538615, + "learning_rate": 9.299086723244358e-06, + "loss": 17.9955, + "step": 10694 + }, + { + "epoch": 0.19549600599557643, + "grad_norm": 7.729865331947313, + "learning_rate": 9.298935571705005e-06, + "loss": 17.8947, + "step": 10695 + }, + { + "epoch": 0.19551428519202296, + "grad_norm": 6.118071552231917, + "learning_rate": 9.29878440509826e-06, + "loss": 17.1034, + "step": 10696 + }, + { + "epoch": 0.19553256438846947, + "grad_norm": 5.95000903021858, + "learning_rate": 9.298633223424647e-06, + "loss": 17.2047, + "step": 10697 + }, + { + "epoch": 0.195550843584916, + "grad_norm": 6.843863320766885, + "learning_rate": 9.298482026684699e-06, + "loss": 17.8315, + "step": 10698 + }, + { + "epoch": 0.19556912278136254, + "grad_norm": 6.487031716869424, + "learning_rate": 9.298330814878944e-06, + "loss": 17.5716, + "step": 10699 + }, + { + "epoch": 0.19558740197780905, + "grad_norm": 7.355481764198525, + "learning_rate": 9.298179588007912e-06, + "loss": 17.663, + "step": 10700 + }, + { + "epoch": 0.19560568117425559, + "grad_norm": 6.468301288869318, + "learning_rate": 9.298028346072132e-06, + "loss": 17.4242, + "step": 10701 + }, + { + "epoch": 0.1956239603707021, + "grad_norm": 7.243010837289347, + "learning_rate": 9.297877089072138e-06, + "loss": 18.0509, + "step": 10702 + }, + { + "epoch": 0.19564223956714863, + "grad_norm": 7.9735117004437805, + "learning_rate": 9.297725817008458e-06, + "loss": 17.4712, + "step": 10703 + }, + { + "epoch": 0.19566051876359516, + "grad_norm": 6.844671694673614, + "learning_rate": 9.297574529881619e-06, + "loss": 17.5793, + "step": 10704 + }, + { + "epoch": 0.19567879796004167, + "grad_norm": 6.642715710082763, + "learning_rate": 9.297423227692158e-06, + "loss": 17.3391, + "step": 10705 + }, + { + "epoch": 0.1956970771564882, + "grad_norm": 7.867678656114496, + "learning_rate": 9.297271910440598e-06, + "loss": 18.1073, + "step": 10706 + }, + { + "epoch": 0.19571535635293472, + "grad_norm": 6.688362811911548, + "learning_rate": 9.297120578127474e-06, + "loss": 17.4292, + "step": 10707 + }, + { + "epoch": 0.19573363554938125, + "grad_norm": 7.8432901584227945, + "learning_rate": 9.296969230753316e-06, + "loss": 18.1777, + "step": 10708 + }, + { + "epoch": 0.19575191474582776, + "grad_norm": 6.725996842444234, + "learning_rate": 9.296817868318653e-06, + "loss": 17.6476, + "step": 10709 + }, + { + "epoch": 0.1957701939422743, + "grad_norm": 8.875850812368208, + "learning_rate": 9.296666490824018e-06, + "loss": 18.4301, + "step": 10710 + }, + { + "epoch": 0.19578847313872083, + "grad_norm": 6.075179535741035, + "learning_rate": 9.296515098269938e-06, + "loss": 17.2243, + "step": 10711 + }, + { + "epoch": 0.19580675233516734, + "grad_norm": 6.834905199062873, + "learning_rate": 9.296363690656947e-06, + "loss": 17.6401, + "step": 10712 + }, + { + "epoch": 0.19582503153161387, + "grad_norm": 5.901095979828014, + "learning_rate": 9.296212267985572e-06, + "loss": 17.2658, + "step": 10713 + }, + { + "epoch": 0.19584331072806038, + "grad_norm": 6.3500574552408455, + "learning_rate": 9.296060830256346e-06, + "loss": 17.3889, + "step": 10714 + }, + { + "epoch": 0.19586158992450692, + "grad_norm": 6.91711244092669, + "learning_rate": 9.2959093774698e-06, + "loss": 17.9106, + "step": 10715 + }, + { + "epoch": 0.19587986912095345, + "grad_norm": 6.207672715333535, + "learning_rate": 9.295757909626465e-06, + "loss": 17.1979, + "step": 10716 + }, + { + "epoch": 0.19589814831739996, + "grad_norm": 6.810790272390377, + "learning_rate": 9.29560642672687e-06, + "loss": 17.5168, + "step": 10717 + }, + { + "epoch": 0.1959164275138465, + "grad_norm": 7.976867102812035, + "learning_rate": 9.29545492877155e-06, + "loss": 18.029, + "step": 10718 + }, + { + "epoch": 0.195934706710293, + "grad_norm": 7.261506383099081, + "learning_rate": 9.295303415761032e-06, + "loss": 17.7888, + "step": 10719 + }, + { + "epoch": 0.19595298590673954, + "grad_norm": 7.008392962604296, + "learning_rate": 9.295151887695846e-06, + "loss": 17.5551, + "step": 10720 + }, + { + "epoch": 0.19597126510318608, + "grad_norm": 8.457848839474217, + "learning_rate": 9.295000344576528e-06, + "loss": 17.6012, + "step": 10721 + }, + { + "epoch": 0.19598954429963258, + "grad_norm": 6.2744108798805955, + "learning_rate": 9.294848786403605e-06, + "loss": 17.4231, + "step": 10722 + }, + { + "epoch": 0.19600782349607912, + "grad_norm": 7.999772964827329, + "learning_rate": 9.294697213177611e-06, + "loss": 18.4582, + "step": 10723 + }, + { + "epoch": 0.19602610269252563, + "grad_norm": 5.8756792379465805, + "learning_rate": 9.294545624899076e-06, + "loss": 17.3341, + "step": 10724 + }, + { + "epoch": 0.19604438188897216, + "grad_norm": 7.126986733708883, + "learning_rate": 9.294394021568529e-06, + "loss": 17.692, + "step": 10725 + }, + { + "epoch": 0.19606266108541867, + "grad_norm": 7.765735288662383, + "learning_rate": 9.294242403186507e-06, + "loss": 17.8931, + "step": 10726 + }, + { + "epoch": 0.1960809402818652, + "grad_norm": 7.604591023698333, + "learning_rate": 9.294090769753534e-06, + "loss": 17.8412, + "step": 10727 + }, + { + "epoch": 0.19609921947831174, + "grad_norm": 6.772845344253822, + "learning_rate": 9.293939121270148e-06, + "loss": 17.6838, + "step": 10728 + }, + { + "epoch": 0.19611749867475825, + "grad_norm": 8.594384432462094, + "learning_rate": 9.293787457736878e-06, + "loss": 18.3965, + "step": 10729 + }, + { + "epoch": 0.1961357778712048, + "grad_norm": 6.806616763621282, + "learning_rate": 9.293635779154255e-06, + "loss": 17.5861, + "step": 10730 + }, + { + "epoch": 0.1961540570676513, + "grad_norm": 6.43974598625156, + "learning_rate": 9.293484085522812e-06, + "loss": 17.3388, + "step": 10731 + }, + { + "epoch": 0.19617233626409783, + "grad_norm": 7.452707094156926, + "learning_rate": 9.293332376843078e-06, + "loss": 17.93, + "step": 10732 + }, + { + "epoch": 0.19619061546054437, + "grad_norm": 5.686505492084528, + "learning_rate": 9.293180653115587e-06, + "loss": 17.1631, + "step": 10733 + }, + { + "epoch": 0.19620889465699087, + "grad_norm": 5.864712950618436, + "learning_rate": 9.293028914340873e-06, + "loss": 17.1778, + "step": 10734 + }, + { + "epoch": 0.1962271738534374, + "grad_norm": 5.693583083329729, + "learning_rate": 9.292877160519463e-06, + "loss": 17.0784, + "step": 10735 + }, + { + "epoch": 0.19624545304988392, + "grad_norm": 6.146937285583666, + "learning_rate": 9.29272539165189e-06, + "loss": 17.2606, + "step": 10736 + }, + { + "epoch": 0.19626373224633045, + "grad_norm": 5.731994226922119, + "learning_rate": 9.292573607738688e-06, + "loss": 17.3579, + "step": 10737 + }, + { + "epoch": 0.196282011442777, + "grad_norm": 6.736091173241974, + "learning_rate": 9.292421808780389e-06, + "loss": 17.7158, + "step": 10738 + }, + { + "epoch": 0.1963002906392235, + "grad_norm": 8.56794848346165, + "learning_rate": 9.292269994777524e-06, + "loss": 18.3193, + "step": 10739 + }, + { + "epoch": 0.19631856983567003, + "grad_norm": 5.8844783678847685, + "learning_rate": 9.292118165730625e-06, + "loss": 17.149, + "step": 10740 + }, + { + "epoch": 0.19633684903211654, + "grad_norm": 5.790033531735474, + "learning_rate": 9.291966321640223e-06, + "loss": 17.1911, + "step": 10741 + }, + { + "epoch": 0.19635512822856308, + "grad_norm": 9.037962856954309, + "learning_rate": 9.291814462506852e-06, + "loss": 18.7479, + "step": 10742 + }, + { + "epoch": 0.19637340742500958, + "grad_norm": 10.393617610573418, + "learning_rate": 9.291662588331046e-06, + "loss": 18.0216, + "step": 10743 + }, + { + "epoch": 0.19639168662145612, + "grad_norm": 7.880354051662654, + "learning_rate": 9.291510699113332e-06, + "loss": 18.13, + "step": 10744 + }, + { + "epoch": 0.19640996581790265, + "grad_norm": 6.649269674339112, + "learning_rate": 9.291358794854246e-06, + "loss": 17.7472, + "step": 10745 + }, + { + "epoch": 0.19642824501434916, + "grad_norm": 6.24457974277704, + "learning_rate": 9.29120687555432e-06, + "loss": 17.4464, + "step": 10746 + }, + { + "epoch": 0.1964465242107957, + "grad_norm": 5.379803587595591, + "learning_rate": 9.291054941214087e-06, + "loss": 17.0152, + "step": 10747 + }, + { + "epoch": 0.1964648034072422, + "grad_norm": 7.007398276187704, + "learning_rate": 9.290902991834078e-06, + "loss": 17.7299, + "step": 10748 + }, + { + "epoch": 0.19648308260368874, + "grad_norm": 7.699192976521023, + "learning_rate": 9.290751027414828e-06, + "loss": 17.9241, + "step": 10749 + }, + { + "epoch": 0.19650136180013528, + "grad_norm": 5.54371662902325, + "learning_rate": 9.290599047956865e-06, + "loss": 17.1983, + "step": 10750 + }, + { + "epoch": 0.19651964099658178, + "grad_norm": 6.092367698307502, + "learning_rate": 9.290447053460727e-06, + "loss": 17.5637, + "step": 10751 + }, + { + "epoch": 0.19653792019302832, + "grad_norm": 6.810666588395866, + "learning_rate": 9.290295043926945e-06, + "loss": 17.5853, + "step": 10752 + }, + { + "epoch": 0.19655619938947483, + "grad_norm": 7.0641765133106285, + "learning_rate": 9.290143019356048e-06, + "loss": 17.8393, + "step": 10753 + }, + { + "epoch": 0.19657447858592136, + "grad_norm": 7.402254855193758, + "learning_rate": 9.289990979748575e-06, + "loss": 17.8503, + "step": 10754 + }, + { + "epoch": 0.1965927577823679, + "grad_norm": 6.726314999452945, + "learning_rate": 9.289838925105054e-06, + "loss": 17.6592, + "step": 10755 + }, + { + "epoch": 0.1966110369788144, + "grad_norm": 5.848487032895969, + "learning_rate": 9.28968685542602e-06, + "loss": 17.1868, + "step": 10756 + }, + { + "epoch": 0.19662931617526094, + "grad_norm": 7.762407867966833, + "learning_rate": 9.289534770712007e-06, + "loss": 17.8981, + "step": 10757 + }, + { + "epoch": 0.19664759537170745, + "grad_norm": 6.529578507189958, + "learning_rate": 9.289382670963548e-06, + "loss": 17.4256, + "step": 10758 + }, + { + "epoch": 0.196665874568154, + "grad_norm": 7.857419411920845, + "learning_rate": 9.289230556181172e-06, + "loss": 18.0849, + "step": 10759 + }, + { + "epoch": 0.1966841537646005, + "grad_norm": 7.205082738089403, + "learning_rate": 9.289078426365417e-06, + "loss": 17.5877, + "step": 10760 + }, + { + "epoch": 0.19670243296104703, + "grad_norm": 5.807288445967445, + "learning_rate": 9.288926281516812e-06, + "loss": 17.2288, + "step": 10761 + }, + { + "epoch": 0.19672071215749357, + "grad_norm": 8.342745064606985, + "learning_rate": 9.288774121635895e-06, + "loss": 17.6209, + "step": 10762 + }, + { + "epoch": 0.19673899135394007, + "grad_norm": 7.180800065658491, + "learning_rate": 9.288621946723196e-06, + "loss": 17.6955, + "step": 10763 + }, + { + "epoch": 0.1967572705503866, + "grad_norm": 6.565589161064451, + "learning_rate": 9.28846975677925e-06, + "loss": 17.5578, + "step": 10764 + }, + { + "epoch": 0.19677554974683312, + "grad_norm": 7.649048599746578, + "learning_rate": 9.28831755180459e-06, + "loss": 18.1785, + "step": 10765 + }, + { + "epoch": 0.19679382894327965, + "grad_norm": 6.6356404509470694, + "learning_rate": 9.288165331799746e-06, + "loss": 17.6297, + "step": 10766 + }, + { + "epoch": 0.1968121081397262, + "grad_norm": 7.221765529876187, + "learning_rate": 9.288013096765258e-06, + "loss": 17.8655, + "step": 10767 + }, + { + "epoch": 0.1968303873361727, + "grad_norm": 6.003297464221561, + "learning_rate": 9.287860846701654e-06, + "loss": 17.2949, + "step": 10768 + }, + { + "epoch": 0.19684866653261923, + "grad_norm": 6.526098385174477, + "learning_rate": 9.287708581609472e-06, + "loss": 17.2405, + "step": 10769 + }, + { + "epoch": 0.19686694572906574, + "grad_norm": 7.148816755709115, + "learning_rate": 9.28755630148924e-06, + "loss": 17.6601, + "step": 10770 + }, + { + "epoch": 0.19688522492551228, + "grad_norm": 6.88800618744152, + "learning_rate": 9.287404006341498e-06, + "loss": 17.3901, + "step": 10771 + }, + { + "epoch": 0.1969035041219588, + "grad_norm": 7.202044151925685, + "learning_rate": 9.287251696166777e-06, + "loss": 17.5591, + "step": 10772 + }, + { + "epoch": 0.19692178331840532, + "grad_norm": 8.632297946052338, + "learning_rate": 9.28709937096561e-06, + "loss": 18.4776, + "step": 10773 + }, + { + "epoch": 0.19694006251485185, + "grad_norm": 9.534936187783808, + "learning_rate": 9.286947030738532e-06, + "loss": 18.5315, + "step": 10774 + }, + { + "epoch": 0.19695834171129836, + "grad_norm": 6.679001612778986, + "learning_rate": 9.286794675486076e-06, + "loss": 17.3928, + "step": 10775 + }, + { + "epoch": 0.1969766209077449, + "grad_norm": 6.254906701770776, + "learning_rate": 9.286642305208776e-06, + "loss": 17.3015, + "step": 10776 + }, + { + "epoch": 0.1969949001041914, + "grad_norm": 6.088482117192647, + "learning_rate": 9.28648991990717e-06, + "loss": 17.483, + "step": 10777 + }, + { + "epoch": 0.19701317930063794, + "grad_norm": 7.250488850608622, + "learning_rate": 9.286337519581786e-06, + "loss": 17.6471, + "step": 10778 + }, + { + "epoch": 0.19703145849708448, + "grad_norm": 7.044707665896951, + "learning_rate": 9.286185104233162e-06, + "loss": 17.7596, + "step": 10779 + }, + { + "epoch": 0.19704973769353099, + "grad_norm": 7.233640126102035, + "learning_rate": 9.286032673861831e-06, + "loss": 18.0291, + "step": 10780 + }, + { + "epoch": 0.19706801688997752, + "grad_norm": 7.2387001895141205, + "learning_rate": 9.285880228468327e-06, + "loss": 17.7576, + "step": 10781 + }, + { + "epoch": 0.19708629608642403, + "grad_norm": 6.579224732400311, + "learning_rate": 9.285727768053185e-06, + "loss": 17.5907, + "step": 10782 + }, + { + "epoch": 0.19710457528287056, + "grad_norm": 7.167222966537241, + "learning_rate": 9.285575292616938e-06, + "loss": 17.7615, + "step": 10783 + }, + { + "epoch": 0.1971228544793171, + "grad_norm": 6.263058696373889, + "learning_rate": 9.285422802160123e-06, + "loss": 17.6282, + "step": 10784 + }, + { + "epoch": 0.1971411336757636, + "grad_norm": 5.558002420549016, + "learning_rate": 9.285270296683273e-06, + "loss": 17.2044, + "step": 10785 + }, + { + "epoch": 0.19715941287221014, + "grad_norm": 6.457501984565157, + "learning_rate": 9.285117776186922e-06, + "loss": 17.3073, + "step": 10786 + }, + { + "epoch": 0.19717769206865665, + "grad_norm": 6.8279219970562215, + "learning_rate": 9.284965240671604e-06, + "loss": 17.796, + "step": 10787 + }, + { + "epoch": 0.1971959712651032, + "grad_norm": 6.265323462252164, + "learning_rate": 9.284812690137857e-06, + "loss": 17.3556, + "step": 10788 + }, + { + "epoch": 0.19721425046154972, + "grad_norm": 7.24634507097224, + "learning_rate": 9.28466012458621e-06, + "loss": 17.9901, + "step": 10789 + }, + { + "epoch": 0.19723252965799623, + "grad_norm": 6.7671818090699976, + "learning_rate": 9.284507544017203e-06, + "loss": 17.3748, + "step": 10790 + }, + { + "epoch": 0.19725080885444277, + "grad_norm": 7.11350885505168, + "learning_rate": 9.28435494843137e-06, + "loss": 17.4191, + "step": 10791 + }, + { + "epoch": 0.19726908805088927, + "grad_norm": 6.429692882394918, + "learning_rate": 9.284202337829244e-06, + "loss": 17.6613, + "step": 10792 + }, + { + "epoch": 0.1972873672473358, + "grad_norm": 6.543431871173417, + "learning_rate": 9.28404971221136e-06, + "loss": 17.5675, + "step": 10793 + }, + { + "epoch": 0.19730564644378232, + "grad_norm": 7.3774655749339315, + "learning_rate": 9.283897071578254e-06, + "loss": 17.8745, + "step": 10794 + }, + { + "epoch": 0.19732392564022885, + "grad_norm": 7.393943625505083, + "learning_rate": 9.28374441593046e-06, + "loss": 17.5336, + "step": 10795 + }, + { + "epoch": 0.1973422048366754, + "grad_norm": 6.2664293535677995, + "learning_rate": 9.283591745268512e-06, + "loss": 17.5629, + "step": 10796 + }, + { + "epoch": 0.1973604840331219, + "grad_norm": 6.8093411548541365, + "learning_rate": 9.283439059592949e-06, + "loss": 17.6704, + "step": 10797 + }, + { + "epoch": 0.19737876322956843, + "grad_norm": 6.954806390677689, + "learning_rate": 9.283286358904304e-06, + "loss": 17.5025, + "step": 10798 + }, + { + "epoch": 0.19739704242601494, + "grad_norm": 8.512641998122215, + "learning_rate": 9.283133643203111e-06, + "loss": 18.2705, + "step": 10799 + }, + { + "epoch": 0.19741532162246148, + "grad_norm": 6.994446648295691, + "learning_rate": 9.282980912489906e-06, + "loss": 17.8209, + "step": 10800 + }, + { + "epoch": 0.197433600818908, + "grad_norm": 6.001762851933642, + "learning_rate": 9.282828166765226e-06, + "loss": 17.3401, + "step": 10801 + }, + { + "epoch": 0.19745188001535452, + "grad_norm": 6.380943936450123, + "learning_rate": 9.282675406029604e-06, + "loss": 17.1109, + "step": 10802 + }, + { + "epoch": 0.19747015921180106, + "grad_norm": 6.851594593750535, + "learning_rate": 9.282522630283575e-06, + "loss": 17.4685, + "step": 10803 + }, + { + "epoch": 0.19748843840824756, + "grad_norm": 6.1245737107628875, + "learning_rate": 9.282369839527678e-06, + "loss": 17.3703, + "step": 10804 + }, + { + "epoch": 0.1975067176046941, + "grad_norm": 7.088313913282194, + "learning_rate": 9.282217033762444e-06, + "loss": 17.644, + "step": 10805 + }, + { + "epoch": 0.19752499680114063, + "grad_norm": 6.70642803619076, + "learning_rate": 9.282064212988414e-06, + "loss": 17.7818, + "step": 10806 + }, + { + "epoch": 0.19754327599758714, + "grad_norm": 5.523099811272099, + "learning_rate": 9.281911377206118e-06, + "loss": 17.089, + "step": 10807 + }, + { + "epoch": 0.19756155519403368, + "grad_norm": 7.182200762733401, + "learning_rate": 9.281758526416094e-06, + "loss": 17.5055, + "step": 10808 + }, + { + "epoch": 0.19757983439048019, + "grad_norm": 5.83526328434585, + "learning_rate": 9.28160566061888e-06, + "loss": 16.8324, + "step": 10809 + }, + { + "epoch": 0.19759811358692672, + "grad_norm": 7.869972066413671, + "learning_rate": 9.281452779815007e-06, + "loss": 18.2756, + "step": 10810 + }, + { + "epoch": 0.19761639278337323, + "grad_norm": 5.962958806178682, + "learning_rate": 9.281299884005017e-06, + "loss": 17.4023, + "step": 10811 + }, + { + "epoch": 0.19763467197981976, + "grad_norm": 6.822327165227126, + "learning_rate": 9.28114697318944e-06, + "loss": 17.5601, + "step": 10812 + }, + { + "epoch": 0.1976529511762663, + "grad_norm": 7.4630866633682365, + "learning_rate": 9.280994047368815e-06, + "loss": 17.8554, + "step": 10813 + }, + { + "epoch": 0.1976712303727128, + "grad_norm": 7.617800974958288, + "learning_rate": 9.280841106543677e-06, + "loss": 17.9616, + "step": 10814 + }, + { + "epoch": 0.19768950956915934, + "grad_norm": 6.9302645605626525, + "learning_rate": 9.280688150714562e-06, + "loss": 17.2055, + "step": 10815 + }, + { + "epoch": 0.19770778876560585, + "grad_norm": 6.354683628285268, + "learning_rate": 9.280535179882008e-06, + "loss": 17.3398, + "step": 10816 + }, + { + "epoch": 0.1977260679620524, + "grad_norm": 5.553582914311273, + "learning_rate": 9.280382194046548e-06, + "loss": 17.2443, + "step": 10817 + }, + { + "epoch": 0.19774434715849892, + "grad_norm": 6.313404246286071, + "learning_rate": 9.28022919320872e-06, + "loss": 17.6153, + "step": 10818 + }, + { + "epoch": 0.19776262635494543, + "grad_norm": 7.183189177438069, + "learning_rate": 9.280076177369062e-06, + "loss": 17.8033, + "step": 10819 + }, + { + "epoch": 0.19778090555139197, + "grad_norm": 6.426110523637121, + "learning_rate": 9.279923146528106e-06, + "loss": 17.4548, + "step": 10820 + }, + { + "epoch": 0.19779918474783847, + "grad_norm": 6.8997417900141835, + "learning_rate": 9.279770100686391e-06, + "loss": 17.6989, + "step": 10821 + }, + { + "epoch": 0.197817463944285, + "grad_norm": 7.029461683598869, + "learning_rate": 9.279617039844455e-06, + "loss": 17.317, + "step": 10822 + }, + { + "epoch": 0.19783574314073155, + "grad_norm": 5.334324070800717, + "learning_rate": 9.279463964002832e-06, + "loss": 17.0673, + "step": 10823 + }, + { + "epoch": 0.19785402233717805, + "grad_norm": 7.220410031420295, + "learning_rate": 9.279310873162059e-06, + "loss": 17.7454, + "step": 10824 + }, + { + "epoch": 0.1978723015336246, + "grad_norm": 6.445500181237727, + "learning_rate": 9.279157767322673e-06, + "loss": 17.4973, + "step": 10825 + }, + { + "epoch": 0.1978905807300711, + "grad_norm": 7.370052113078593, + "learning_rate": 9.27900464648521e-06, + "loss": 18.4546, + "step": 10826 + }, + { + "epoch": 0.19790885992651763, + "grad_norm": 7.436753759920104, + "learning_rate": 9.278851510650207e-06, + "loss": 17.5844, + "step": 10827 + }, + { + "epoch": 0.19792713912296414, + "grad_norm": 6.357018547397667, + "learning_rate": 9.2786983598182e-06, + "loss": 17.5205, + "step": 10828 + }, + { + "epoch": 0.19794541831941068, + "grad_norm": 6.274501885764996, + "learning_rate": 9.278545193989728e-06, + "loss": 17.3663, + "step": 10829 + }, + { + "epoch": 0.1979636975158572, + "grad_norm": 6.895095266400871, + "learning_rate": 9.278392013165325e-06, + "loss": 17.6496, + "step": 10830 + }, + { + "epoch": 0.19798197671230372, + "grad_norm": 5.365151583121495, + "learning_rate": 9.27823881734553e-06, + "loss": 17.0242, + "step": 10831 + }, + { + "epoch": 0.19800025590875026, + "grad_norm": 7.001238935540446, + "learning_rate": 9.278085606530879e-06, + "loss": 17.775, + "step": 10832 + }, + { + "epoch": 0.19801853510519676, + "grad_norm": 7.1434068090519265, + "learning_rate": 9.277932380721908e-06, + "loss": 17.6297, + "step": 10833 + }, + { + "epoch": 0.1980368143016433, + "grad_norm": 7.27685512217264, + "learning_rate": 9.277779139919157e-06, + "loss": 17.9849, + "step": 10834 + }, + { + "epoch": 0.19805509349808983, + "grad_norm": 6.499332836240062, + "learning_rate": 9.277625884123162e-06, + "loss": 17.5162, + "step": 10835 + }, + { + "epoch": 0.19807337269453634, + "grad_norm": 7.142852026254099, + "learning_rate": 9.277472613334457e-06, + "loss": 17.7445, + "step": 10836 + }, + { + "epoch": 0.19809165189098288, + "grad_norm": 8.129783649562425, + "learning_rate": 9.277319327553584e-06, + "loss": 18.3967, + "step": 10837 + }, + { + "epoch": 0.1981099310874294, + "grad_norm": 6.8305374317355705, + "learning_rate": 9.277166026781076e-06, + "loss": 17.5188, + "step": 10838 + }, + { + "epoch": 0.19812821028387592, + "grad_norm": 6.344683701928417, + "learning_rate": 9.277012711017473e-06, + "loss": 17.4358, + "step": 10839 + }, + { + "epoch": 0.19814648948032246, + "grad_norm": 7.054374100658382, + "learning_rate": 9.276859380263313e-06, + "loss": 17.6545, + "step": 10840 + }, + { + "epoch": 0.19816476867676897, + "grad_norm": 6.621685344435275, + "learning_rate": 9.276706034519129e-06, + "loss": 17.7552, + "step": 10841 + }, + { + "epoch": 0.1981830478732155, + "grad_norm": 6.43218685786527, + "learning_rate": 9.276552673785464e-06, + "loss": 17.2751, + "step": 10842 + }, + { + "epoch": 0.198201327069662, + "grad_norm": 5.896436973847265, + "learning_rate": 9.276399298062853e-06, + "loss": 16.996, + "step": 10843 + }, + { + "epoch": 0.19821960626610854, + "grad_norm": 12.692140193254197, + "learning_rate": 9.276245907351831e-06, + "loss": 18.7467, + "step": 10844 + }, + { + "epoch": 0.19823788546255505, + "grad_norm": 8.392272527927158, + "learning_rate": 9.27609250165294e-06, + "loss": 18.1811, + "step": 10845 + }, + { + "epoch": 0.1982561646590016, + "grad_norm": 6.3733792969341, + "learning_rate": 9.275939080966716e-06, + "loss": 17.4744, + "step": 10846 + }, + { + "epoch": 0.19827444385544812, + "grad_norm": 6.670479841457684, + "learning_rate": 9.275785645293697e-06, + "loss": 17.6064, + "step": 10847 + }, + { + "epoch": 0.19829272305189463, + "grad_norm": 7.498507478063741, + "learning_rate": 9.27563219463442e-06, + "loss": 17.9733, + "step": 10848 + }, + { + "epoch": 0.19831100224834117, + "grad_norm": 6.610138137691433, + "learning_rate": 9.275478728989422e-06, + "loss": 17.5745, + "step": 10849 + }, + { + "epoch": 0.19832928144478768, + "grad_norm": 10.92354219974126, + "learning_rate": 9.275325248359245e-06, + "loss": 18.6201, + "step": 10850 + }, + { + "epoch": 0.1983475606412342, + "grad_norm": 7.570411642303618, + "learning_rate": 9.27517175274442e-06, + "loss": 18.1232, + "step": 10851 + }, + { + "epoch": 0.19836583983768075, + "grad_norm": 7.008058196705809, + "learning_rate": 9.27501824214549e-06, + "loss": 17.7092, + "step": 10852 + }, + { + "epoch": 0.19838411903412725, + "grad_norm": 6.787764942709426, + "learning_rate": 9.274864716562993e-06, + "loss": 17.4613, + "step": 10853 + }, + { + "epoch": 0.1984023982305738, + "grad_norm": 7.169628550706973, + "learning_rate": 9.274711175997466e-06, + "loss": 17.7941, + "step": 10854 + }, + { + "epoch": 0.1984206774270203, + "grad_norm": 6.785940497665481, + "learning_rate": 9.274557620449448e-06, + "loss": 17.6994, + "step": 10855 + }, + { + "epoch": 0.19843895662346683, + "grad_norm": 5.900930090399167, + "learning_rate": 9.274404049919475e-06, + "loss": 17.4232, + "step": 10856 + }, + { + "epoch": 0.19845723581991337, + "grad_norm": 6.298469309316371, + "learning_rate": 9.274250464408087e-06, + "loss": 17.6218, + "step": 10857 + }, + { + "epoch": 0.19847551501635988, + "grad_norm": 7.4420262749846, + "learning_rate": 9.274096863915824e-06, + "loss": 18.0382, + "step": 10858 + }, + { + "epoch": 0.1984937942128064, + "grad_norm": 8.457845366616512, + "learning_rate": 9.27394324844322e-06, + "loss": 18.1733, + "step": 10859 + }, + { + "epoch": 0.19851207340925292, + "grad_norm": 6.5472351756218075, + "learning_rate": 9.273789617990818e-06, + "loss": 17.4029, + "step": 10860 + }, + { + "epoch": 0.19853035260569946, + "grad_norm": 6.117378900362806, + "learning_rate": 9.273635972559152e-06, + "loss": 17.3464, + "step": 10861 + }, + { + "epoch": 0.19854863180214596, + "grad_norm": 6.69009016103747, + "learning_rate": 9.273482312148766e-06, + "loss": 17.4837, + "step": 10862 + }, + { + "epoch": 0.1985669109985925, + "grad_norm": 6.675722361896055, + "learning_rate": 9.27332863676019e-06, + "loss": 17.777, + "step": 10863 + }, + { + "epoch": 0.19858519019503904, + "grad_norm": 6.670294770073691, + "learning_rate": 9.273174946393973e-06, + "loss": 17.6327, + "step": 10864 + }, + { + "epoch": 0.19860346939148554, + "grad_norm": 5.8263897901825334, + "learning_rate": 9.273021241050645e-06, + "loss": 17.1493, + "step": 10865 + }, + { + "epoch": 0.19862174858793208, + "grad_norm": 6.1740388700652105, + "learning_rate": 9.27286752073075e-06, + "loss": 17.3768, + "step": 10866 + }, + { + "epoch": 0.1986400277843786, + "grad_norm": 5.703210716908118, + "learning_rate": 9.272713785434827e-06, + "loss": 17.1263, + "step": 10867 + }, + { + "epoch": 0.19865830698082512, + "grad_norm": 7.443573776636084, + "learning_rate": 9.27256003516341e-06, + "loss": 18.0349, + "step": 10868 + }, + { + "epoch": 0.19867658617727166, + "grad_norm": 5.991726657722256, + "learning_rate": 9.272406269917041e-06, + "loss": 17.1115, + "step": 10869 + }, + { + "epoch": 0.19869486537371817, + "grad_norm": 7.437226033210058, + "learning_rate": 9.27225248969626e-06, + "loss": 18.4094, + "step": 10870 + }, + { + "epoch": 0.1987131445701647, + "grad_norm": 7.523218516674712, + "learning_rate": 9.272098694501604e-06, + "loss": 17.7628, + "step": 10871 + }, + { + "epoch": 0.1987314237666112, + "grad_norm": 6.54169780413771, + "learning_rate": 9.271944884333614e-06, + "loss": 17.5334, + "step": 10872 + }, + { + "epoch": 0.19874970296305774, + "grad_norm": 7.856703845300767, + "learning_rate": 9.271791059192828e-06, + "loss": 18.1138, + "step": 10873 + }, + { + "epoch": 0.19876798215950428, + "grad_norm": 7.311420298885436, + "learning_rate": 9.271637219079784e-06, + "loss": 17.978, + "step": 10874 + }, + { + "epoch": 0.1987862613559508, + "grad_norm": 7.086903096477535, + "learning_rate": 9.27148336399502e-06, + "loss": 17.587, + "step": 10875 + }, + { + "epoch": 0.19880454055239732, + "grad_norm": 6.838484668499034, + "learning_rate": 9.27132949393908e-06, + "loss": 17.5756, + "step": 10876 + }, + { + "epoch": 0.19882281974884383, + "grad_norm": 6.848692496481984, + "learning_rate": 9.271175608912501e-06, + "loss": 17.5735, + "step": 10877 + }, + { + "epoch": 0.19884109894529037, + "grad_norm": 8.390905815444885, + "learning_rate": 9.271021708915822e-06, + "loss": 18.1065, + "step": 10878 + }, + { + "epoch": 0.19885937814173688, + "grad_norm": 6.230535545187577, + "learning_rate": 9.270867793949583e-06, + "loss": 17.6444, + "step": 10879 + }, + { + "epoch": 0.1988776573381834, + "grad_norm": 6.248780898488394, + "learning_rate": 9.270713864014321e-06, + "loss": 17.6363, + "step": 10880 + }, + { + "epoch": 0.19889593653462995, + "grad_norm": 6.088296431862701, + "learning_rate": 9.270559919110579e-06, + "loss": 17.0838, + "step": 10881 + }, + { + "epoch": 0.19891421573107645, + "grad_norm": 7.964064373572228, + "learning_rate": 9.270405959238896e-06, + "loss": 17.9265, + "step": 10882 + }, + { + "epoch": 0.198932494927523, + "grad_norm": 6.243094934678688, + "learning_rate": 9.270251984399807e-06, + "loss": 17.4484, + "step": 10883 + }, + { + "epoch": 0.1989507741239695, + "grad_norm": 7.907115831522865, + "learning_rate": 9.270097994593859e-06, + "loss": 18.049, + "step": 10884 + }, + { + "epoch": 0.19896905332041603, + "grad_norm": 6.272438758235311, + "learning_rate": 9.269943989821587e-06, + "loss": 17.3441, + "step": 10885 + }, + { + "epoch": 0.19898733251686257, + "grad_norm": 6.211685599992287, + "learning_rate": 9.269789970083531e-06, + "loss": 17.1009, + "step": 10886 + }, + { + "epoch": 0.19900561171330908, + "grad_norm": 5.111757953037374, + "learning_rate": 9.269635935380233e-06, + "loss": 16.9119, + "step": 10887 + }, + { + "epoch": 0.1990238909097556, + "grad_norm": 7.02040051715025, + "learning_rate": 9.26948188571223e-06, + "loss": 17.9046, + "step": 10888 + }, + { + "epoch": 0.19904217010620212, + "grad_norm": 5.70667606270827, + "learning_rate": 9.269327821080064e-06, + "loss": 17.2795, + "step": 10889 + }, + { + "epoch": 0.19906044930264866, + "grad_norm": 6.159647776524561, + "learning_rate": 9.269173741484277e-06, + "loss": 17.2703, + "step": 10890 + }, + { + "epoch": 0.1990787284990952, + "grad_norm": 7.719437738199956, + "learning_rate": 9.269019646925404e-06, + "loss": 17.9644, + "step": 10891 + }, + { + "epoch": 0.1990970076955417, + "grad_norm": 5.748278172028444, + "learning_rate": 9.268865537403987e-06, + "loss": 17.1837, + "step": 10892 + }, + { + "epoch": 0.19911528689198824, + "grad_norm": 6.12576407150042, + "learning_rate": 9.268711412920567e-06, + "loss": 17.3183, + "step": 10893 + }, + { + "epoch": 0.19913356608843474, + "grad_norm": 7.2380097679109845, + "learning_rate": 9.268557273475685e-06, + "loss": 17.6635, + "step": 10894 + }, + { + "epoch": 0.19915184528488128, + "grad_norm": 7.244045805527676, + "learning_rate": 9.26840311906988e-06, + "loss": 17.9317, + "step": 10895 + }, + { + "epoch": 0.1991701244813278, + "grad_norm": 7.473918145896732, + "learning_rate": 9.268248949703693e-06, + "loss": 17.9833, + "step": 10896 + }, + { + "epoch": 0.19918840367777432, + "grad_norm": 9.104871874023008, + "learning_rate": 9.268094765377662e-06, + "loss": 18.0553, + "step": 10897 + }, + { + "epoch": 0.19920668287422086, + "grad_norm": 6.944195293043811, + "learning_rate": 9.26794056609233e-06, + "loss": 17.1542, + "step": 10898 + }, + { + "epoch": 0.19922496207066737, + "grad_norm": 5.977489454455816, + "learning_rate": 9.267786351848238e-06, + "loss": 17.3206, + "step": 10899 + }, + { + "epoch": 0.1992432412671139, + "grad_norm": 7.571865722419859, + "learning_rate": 9.267632122645924e-06, + "loss": 18.0964, + "step": 10900 + }, + { + "epoch": 0.1992615204635604, + "grad_norm": 7.121409349012491, + "learning_rate": 9.26747787848593e-06, + "loss": 17.2025, + "step": 10901 + }, + { + "epoch": 0.19927979966000695, + "grad_norm": 7.834341990595362, + "learning_rate": 9.267323619368795e-06, + "loss": 17.7799, + "step": 10902 + }, + { + "epoch": 0.19929807885645348, + "grad_norm": 6.495101935179148, + "learning_rate": 9.267169345295063e-06, + "loss": 17.5505, + "step": 10903 + }, + { + "epoch": 0.1993163580529, + "grad_norm": 8.68647864663089, + "learning_rate": 9.267015056265272e-06, + "loss": 18.4633, + "step": 10904 + }, + { + "epoch": 0.19933463724934652, + "grad_norm": 5.533154408312759, + "learning_rate": 9.266860752279964e-06, + "loss": 17.2265, + "step": 10905 + }, + { + "epoch": 0.19935291644579303, + "grad_norm": 9.045653357664435, + "learning_rate": 9.266706433339678e-06, + "loss": 18.4361, + "step": 10906 + }, + { + "epoch": 0.19937119564223957, + "grad_norm": 6.681364541307185, + "learning_rate": 9.266552099444957e-06, + "loss": 17.7212, + "step": 10907 + }, + { + "epoch": 0.1993894748386861, + "grad_norm": 8.276329180423547, + "learning_rate": 9.26639775059634e-06, + "loss": 18.4311, + "step": 10908 + }, + { + "epoch": 0.1994077540351326, + "grad_norm": 7.544906962956952, + "learning_rate": 9.266243386794372e-06, + "loss": 17.7837, + "step": 10909 + }, + { + "epoch": 0.19942603323157915, + "grad_norm": 6.3703401990947155, + "learning_rate": 9.266089008039589e-06, + "loss": 17.3522, + "step": 10910 + }, + { + "epoch": 0.19944431242802566, + "grad_norm": 7.063462640206068, + "learning_rate": 9.265934614332534e-06, + "loss": 17.6295, + "step": 10911 + }, + { + "epoch": 0.1994625916244722, + "grad_norm": 7.051708868623823, + "learning_rate": 9.265780205673749e-06, + "loss": 17.9861, + "step": 10912 + }, + { + "epoch": 0.1994808708209187, + "grad_norm": 6.990076011496199, + "learning_rate": 9.265625782063774e-06, + "loss": 17.8551, + "step": 10913 + }, + { + "epoch": 0.19949915001736523, + "grad_norm": 6.814238402632026, + "learning_rate": 9.265471343503152e-06, + "loss": 17.6684, + "step": 10914 + }, + { + "epoch": 0.19951742921381177, + "grad_norm": 6.723381483146955, + "learning_rate": 9.265316889992422e-06, + "loss": 18.0331, + "step": 10915 + }, + { + "epoch": 0.19953570841025828, + "grad_norm": 6.216760276952046, + "learning_rate": 9.265162421532126e-06, + "loss": 17.3248, + "step": 10916 + }, + { + "epoch": 0.1995539876067048, + "grad_norm": 7.183698849365839, + "learning_rate": 9.265007938122807e-06, + "loss": 17.9315, + "step": 10917 + }, + { + "epoch": 0.19957226680315132, + "grad_norm": 7.490734028144163, + "learning_rate": 9.264853439765005e-06, + "loss": 18.2393, + "step": 10918 + }, + { + "epoch": 0.19959054599959786, + "grad_norm": 8.24745633213347, + "learning_rate": 9.264698926459261e-06, + "loss": 18.0886, + "step": 10919 + }, + { + "epoch": 0.1996088251960444, + "grad_norm": 6.113159803142408, + "learning_rate": 9.264544398206119e-06, + "loss": 17.486, + "step": 10920 + }, + { + "epoch": 0.1996271043924909, + "grad_norm": 8.206470583068661, + "learning_rate": 9.264389855006118e-06, + "loss": 18.1456, + "step": 10921 + }, + { + "epoch": 0.19964538358893744, + "grad_norm": 7.452119559441181, + "learning_rate": 9.2642352968598e-06, + "loss": 17.9846, + "step": 10922 + }, + { + "epoch": 0.19966366278538394, + "grad_norm": 6.938408694079041, + "learning_rate": 9.264080723767707e-06, + "loss": 17.8101, + "step": 10923 + }, + { + "epoch": 0.19968194198183048, + "grad_norm": 6.019695578849084, + "learning_rate": 9.263926135730383e-06, + "loss": 17.3154, + "step": 10924 + }, + { + "epoch": 0.19970022117827702, + "grad_norm": 6.782514032171597, + "learning_rate": 9.263771532748367e-06, + "loss": 17.5067, + "step": 10925 + }, + { + "epoch": 0.19971850037472352, + "grad_norm": 6.118197987069956, + "learning_rate": 9.263616914822201e-06, + "loss": 17.0961, + "step": 10926 + }, + { + "epoch": 0.19973677957117006, + "grad_norm": 7.035595199165753, + "learning_rate": 9.26346228195243e-06, + "loss": 17.7247, + "step": 10927 + }, + { + "epoch": 0.19975505876761657, + "grad_norm": 6.921837355340135, + "learning_rate": 9.26330763413959e-06, + "loss": 17.6363, + "step": 10928 + }, + { + "epoch": 0.1997733379640631, + "grad_norm": 7.238623576682792, + "learning_rate": 9.26315297138423e-06, + "loss": 17.6164, + "step": 10929 + }, + { + "epoch": 0.1997916171605096, + "grad_norm": 9.928512166924047, + "learning_rate": 9.262998293686888e-06, + "loss": 17.7197, + "step": 10930 + }, + { + "epoch": 0.19980989635695615, + "grad_norm": 5.792244353120198, + "learning_rate": 9.262843601048104e-06, + "loss": 17.2798, + "step": 10931 + }, + { + "epoch": 0.19982817555340268, + "grad_norm": 5.401133240098436, + "learning_rate": 9.262688893468426e-06, + "loss": 17.0325, + "step": 10932 + }, + { + "epoch": 0.1998464547498492, + "grad_norm": 6.911698034065736, + "learning_rate": 9.262534170948392e-06, + "loss": 17.7217, + "step": 10933 + }, + { + "epoch": 0.19986473394629573, + "grad_norm": 7.515353464874539, + "learning_rate": 9.262379433488547e-06, + "loss": 17.8443, + "step": 10934 + }, + { + "epoch": 0.19988301314274223, + "grad_norm": 6.765047061950747, + "learning_rate": 9.262224681089432e-06, + "loss": 17.6567, + "step": 10935 + }, + { + "epoch": 0.19990129233918877, + "grad_norm": 6.717639189321588, + "learning_rate": 9.262069913751589e-06, + "loss": 17.6313, + "step": 10936 + }, + { + "epoch": 0.1999195715356353, + "grad_norm": 7.848672015083547, + "learning_rate": 9.261915131475561e-06, + "loss": 17.9072, + "step": 10937 + }, + { + "epoch": 0.1999378507320818, + "grad_norm": 7.656189118648847, + "learning_rate": 9.261760334261888e-06, + "loss": 18.2532, + "step": 10938 + }, + { + "epoch": 0.19995612992852835, + "grad_norm": 6.906178224734655, + "learning_rate": 9.261605522111117e-06, + "loss": 17.8904, + "step": 10939 + }, + { + "epoch": 0.19997440912497486, + "grad_norm": 5.500725992902303, + "learning_rate": 9.261450695023789e-06, + "loss": 17.2585, + "step": 10940 + }, + { + "epoch": 0.1999926883214214, + "grad_norm": 7.063104493861325, + "learning_rate": 9.261295853000445e-06, + "loss": 17.7415, + "step": 10941 + }, + { + "epoch": 0.20001096751786793, + "grad_norm": 7.882567722388475, + "learning_rate": 9.26114099604163e-06, + "loss": 18.3038, + "step": 10942 + }, + { + "epoch": 0.20002924671431443, + "grad_norm": 6.572875620156166, + "learning_rate": 9.260986124147884e-06, + "loss": 17.6132, + "step": 10943 + }, + { + "epoch": 0.20004752591076097, + "grad_norm": 6.43107508416896, + "learning_rate": 9.260831237319752e-06, + "loss": 17.4252, + "step": 10944 + }, + { + "epoch": 0.20006580510720748, + "grad_norm": 6.133192475896936, + "learning_rate": 9.260676335557774e-06, + "loss": 17.3416, + "step": 10945 + }, + { + "epoch": 0.20008408430365401, + "grad_norm": 5.888722241992663, + "learning_rate": 9.260521418862498e-06, + "loss": 17.3938, + "step": 10946 + }, + { + "epoch": 0.20010236350010052, + "grad_norm": 8.157620817807263, + "learning_rate": 9.260366487234465e-06, + "loss": 18.2197, + "step": 10947 + }, + { + "epoch": 0.20012064269654706, + "grad_norm": 7.065616268419404, + "learning_rate": 9.260211540674215e-06, + "loss": 17.7513, + "step": 10948 + }, + { + "epoch": 0.2001389218929936, + "grad_norm": 6.562597400109817, + "learning_rate": 9.260056579182292e-06, + "loss": 17.3057, + "step": 10949 + }, + { + "epoch": 0.2001572010894401, + "grad_norm": 6.39383651983612, + "learning_rate": 9.259901602759244e-06, + "loss": 17.6876, + "step": 10950 + }, + { + "epoch": 0.20017548028588664, + "grad_norm": 6.396783115609105, + "learning_rate": 9.25974661140561e-06, + "loss": 17.347, + "step": 10951 + }, + { + "epoch": 0.20019375948233314, + "grad_norm": 5.385791227480741, + "learning_rate": 9.259591605121932e-06, + "loss": 17.1368, + "step": 10952 + }, + { + "epoch": 0.20021203867877968, + "grad_norm": 6.698850836391977, + "learning_rate": 9.259436583908754e-06, + "loss": 17.3805, + "step": 10953 + }, + { + "epoch": 0.20023031787522622, + "grad_norm": 6.7136310001901744, + "learning_rate": 9.259281547766623e-06, + "loss": 17.939, + "step": 10954 + }, + { + "epoch": 0.20024859707167272, + "grad_norm": 6.801087623392054, + "learning_rate": 9.259126496696079e-06, + "loss": 17.7017, + "step": 10955 + }, + { + "epoch": 0.20026687626811926, + "grad_norm": 7.7693655840737526, + "learning_rate": 9.258971430697666e-06, + "loss": 17.7556, + "step": 10956 + }, + { + "epoch": 0.20028515546456577, + "grad_norm": 6.9253554573314515, + "learning_rate": 9.258816349771927e-06, + "loss": 17.4131, + "step": 10957 + }, + { + "epoch": 0.2003034346610123, + "grad_norm": 6.094023118280109, + "learning_rate": 9.258661253919408e-06, + "loss": 17.5846, + "step": 10958 + }, + { + "epoch": 0.20032171385745884, + "grad_norm": 6.475855412892942, + "learning_rate": 9.25850614314065e-06, + "loss": 17.5506, + "step": 10959 + }, + { + "epoch": 0.20033999305390535, + "grad_norm": 6.121848840087118, + "learning_rate": 9.258351017436196e-06, + "loss": 17.2962, + "step": 10960 + }, + { + "epoch": 0.20035827225035188, + "grad_norm": 6.760167166388431, + "learning_rate": 9.258195876806593e-06, + "loss": 17.7835, + "step": 10961 + }, + { + "epoch": 0.2003765514467984, + "grad_norm": 5.917783471813344, + "learning_rate": 9.258040721252383e-06, + "loss": 17.2104, + "step": 10962 + }, + { + "epoch": 0.20039483064324493, + "grad_norm": 8.432581227712953, + "learning_rate": 9.257885550774108e-06, + "loss": 18.203, + "step": 10963 + }, + { + "epoch": 0.20041310983969143, + "grad_norm": 6.43125791912894, + "learning_rate": 9.257730365372315e-06, + "loss": 17.3735, + "step": 10964 + }, + { + "epoch": 0.20043138903613797, + "grad_norm": 6.348945505247846, + "learning_rate": 9.257575165047547e-06, + "loss": 17.5365, + "step": 10965 + }, + { + "epoch": 0.2004496682325845, + "grad_norm": 5.887554857610085, + "learning_rate": 9.257419949800347e-06, + "loss": 17.173, + "step": 10966 + }, + { + "epoch": 0.200467947429031, + "grad_norm": 7.285541336636381, + "learning_rate": 9.257264719631259e-06, + "loss": 17.8792, + "step": 10967 + }, + { + "epoch": 0.20048622662547755, + "grad_norm": 7.007494663319904, + "learning_rate": 9.257109474540828e-06, + "loss": 17.6362, + "step": 10968 + }, + { + "epoch": 0.20050450582192406, + "grad_norm": 6.328330410958518, + "learning_rate": 9.256954214529599e-06, + "loss": 17.5554, + "step": 10969 + }, + { + "epoch": 0.2005227850183706, + "grad_norm": 6.510092270616266, + "learning_rate": 9.256798939598113e-06, + "loss": 17.5696, + "step": 10970 + }, + { + "epoch": 0.20054106421481713, + "grad_norm": 5.909293919661958, + "learning_rate": 9.256643649746917e-06, + "loss": 17.2425, + "step": 10971 + }, + { + "epoch": 0.20055934341126364, + "grad_norm": 6.915669059292693, + "learning_rate": 9.256488344976552e-06, + "loss": 17.5665, + "step": 10972 + }, + { + "epoch": 0.20057762260771017, + "grad_norm": 7.034467967793795, + "learning_rate": 9.256333025287569e-06, + "loss": 17.6718, + "step": 10973 + }, + { + "epoch": 0.20059590180415668, + "grad_norm": 6.076594057259614, + "learning_rate": 9.256177690680506e-06, + "loss": 17.0781, + "step": 10974 + }, + { + "epoch": 0.20061418100060321, + "grad_norm": 5.794475106139136, + "learning_rate": 9.256022341155909e-06, + "loss": 17.5427, + "step": 10975 + }, + { + "epoch": 0.20063246019704975, + "grad_norm": 5.456558611233308, + "learning_rate": 9.255866976714323e-06, + "loss": 17.1452, + "step": 10976 + }, + { + "epoch": 0.20065073939349626, + "grad_norm": 7.4855302093332075, + "learning_rate": 9.255711597356293e-06, + "loss": 17.6005, + "step": 10977 + }, + { + "epoch": 0.2006690185899428, + "grad_norm": 8.035824468405956, + "learning_rate": 9.255556203082363e-06, + "loss": 17.6279, + "step": 10978 + }, + { + "epoch": 0.2006872977863893, + "grad_norm": 6.521175712373388, + "learning_rate": 9.25540079389308e-06, + "loss": 17.6865, + "step": 10979 + }, + { + "epoch": 0.20070557698283584, + "grad_norm": 6.731090043339445, + "learning_rate": 9.255245369788983e-06, + "loss": 17.648, + "step": 10980 + }, + { + "epoch": 0.20072385617928235, + "grad_norm": 8.79038401908676, + "learning_rate": 9.255089930770621e-06, + "loss": 17.3754, + "step": 10981 + }, + { + "epoch": 0.20074213537572888, + "grad_norm": 6.29924407503212, + "learning_rate": 9.254934476838539e-06, + "loss": 17.3005, + "step": 10982 + }, + { + "epoch": 0.20076041457217542, + "grad_norm": 5.544228481101844, + "learning_rate": 9.254779007993281e-06, + "loss": 17.005, + "step": 10983 + }, + { + "epoch": 0.20077869376862192, + "grad_norm": 8.489774790847997, + "learning_rate": 9.254623524235392e-06, + "loss": 18.2677, + "step": 10984 + }, + { + "epoch": 0.20079697296506846, + "grad_norm": 7.079374417733842, + "learning_rate": 9.254468025565414e-06, + "loss": 17.6445, + "step": 10985 + }, + { + "epoch": 0.20081525216151497, + "grad_norm": 7.550008096182841, + "learning_rate": 9.254312511983898e-06, + "loss": 17.9238, + "step": 10986 + }, + { + "epoch": 0.2008335313579615, + "grad_norm": 6.945211437259713, + "learning_rate": 9.254156983491385e-06, + "loss": 17.7057, + "step": 10987 + }, + { + "epoch": 0.20085181055440804, + "grad_norm": 7.7531398968572685, + "learning_rate": 9.25400144008842e-06, + "loss": 17.6414, + "step": 10988 + }, + { + "epoch": 0.20087008975085455, + "grad_norm": 6.249767722058566, + "learning_rate": 9.25384588177555e-06, + "loss": 17.4012, + "step": 10989 + }, + { + "epoch": 0.20088836894730108, + "grad_norm": 7.076621240543565, + "learning_rate": 9.253690308553318e-06, + "loss": 17.9639, + "step": 10990 + }, + { + "epoch": 0.2009066481437476, + "grad_norm": 6.622467245554146, + "learning_rate": 9.253534720422272e-06, + "loss": 17.5139, + "step": 10991 + }, + { + "epoch": 0.20092492734019413, + "grad_norm": 10.602283253980888, + "learning_rate": 9.253379117382957e-06, + "loss": 18.7819, + "step": 10992 + }, + { + "epoch": 0.20094320653664066, + "grad_norm": 7.706285912694962, + "learning_rate": 9.253223499435916e-06, + "loss": 17.9942, + "step": 10993 + }, + { + "epoch": 0.20096148573308717, + "grad_norm": 7.467509917385291, + "learning_rate": 9.253067866581696e-06, + "loss": 17.9538, + "step": 10994 + }, + { + "epoch": 0.2009797649295337, + "grad_norm": 7.20151835260283, + "learning_rate": 9.25291221882084e-06, + "loss": 17.9342, + "step": 10995 + }, + { + "epoch": 0.2009980441259802, + "grad_norm": 6.681603235474797, + "learning_rate": 9.252756556153898e-06, + "loss": 17.5466, + "step": 10996 + }, + { + "epoch": 0.20101632332242675, + "grad_norm": 5.785585526516853, + "learning_rate": 9.252600878581413e-06, + "loss": 17.3131, + "step": 10997 + }, + { + "epoch": 0.20103460251887326, + "grad_norm": 6.260524820848509, + "learning_rate": 9.252445186103931e-06, + "loss": 17.3714, + "step": 10998 + }, + { + "epoch": 0.2010528817153198, + "grad_norm": 5.946378521976767, + "learning_rate": 9.252289478721996e-06, + "loss": 17.1623, + "step": 10999 + }, + { + "epoch": 0.20107116091176633, + "grad_norm": 6.690654018343409, + "learning_rate": 9.252133756436158e-06, + "loss": 17.7501, + "step": 11000 + }, + { + "epoch": 0.20108944010821284, + "grad_norm": 6.182391269665302, + "learning_rate": 9.251978019246957e-06, + "loss": 17.2598, + "step": 11001 + }, + { + "epoch": 0.20110771930465937, + "grad_norm": 6.812728097434233, + "learning_rate": 9.251822267154946e-06, + "loss": 17.5469, + "step": 11002 + }, + { + "epoch": 0.20112599850110588, + "grad_norm": 7.386204815518348, + "learning_rate": 9.251666500160663e-06, + "loss": 17.613, + "step": 11003 + }, + { + "epoch": 0.20114427769755241, + "grad_norm": 6.417796772408095, + "learning_rate": 9.251510718264661e-06, + "loss": 17.3802, + "step": 11004 + }, + { + "epoch": 0.20116255689399895, + "grad_norm": 7.005406068602088, + "learning_rate": 9.251354921467482e-06, + "loss": 17.8147, + "step": 11005 + }, + { + "epoch": 0.20118083609044546, + "grad_norm": 7.020304115906132, + "learning_rate": 9.25119910976967e-06, + "loss": 17.8303, + "step": 11006 + }, + { + "epoch": 0.201199115286892, + "grad_norm": 7.980386483023567, + "learning_rate": 9.251043283171777e-06, + "loss": 18.3758, + "step": 11007 + }, + { + "epoch": 0.2012173944833385, + "grad_norm": 7.526697060476995, + "learning_rate": 9.250887441674345e-06, + "loss": 18.1684, + "step": 11008 + }, + { + "epoch": 0.20123567367978504, + "grad_norm": 7.133275257449116, + "learning_rate": 9.250731585277924e-06, + "loss": 17.7657, + "step": 11009 + }, + { + "epoch": 0.20125395287623157, + "grad_norm": 7.324366944288287, + "learning_rate": 9.250575713983056e-06, + "loss": 17.7502, + "step": 11010 + }, + { + "epoch": 0.20127223207267808, + "grad_norm": 5.644457924810181, + "learning_rate": 9.250419827790287e-06, + "loss": 17.3969, + "step": 11011 + }, + { + "epoch": 0.20129051126912462, + "grad_norm": 7.12453096673587, + "learning_rate": 9.250263926700168e-06, + "loss": 17.6701, + "step": 11012 + }, + { + "epoch": 0.20130879046557112, + "grad_norm": 7.7097577919955365, + "learning_rate": 9.25010801071324e-06, + "loss": 17.8423, + "step": 11013 + }, + { + "epoch": 0.20132706966201766, + "grad_norm": 7.522354809228273, + "learning_rate": 9.249952079830055e-06, + "loss": 17.8738, + "step": 11014 + }, + { + "epoch": 0.20134534885846417, + "grad_norm": 7.91721991854234, + "learning_rate": 9.249796134051156e-06, + "loss": 18.1216, + "step": 11015 + }, + { + "epoch": 0.2013636280549107, + "grad_norm": 7.953338394016145, + "learning_rate": 9.24964017337709e-06, + "loss": 18.6414, + "step": 11016 + }, + { + "epoch": 0.20138190725135724, + "grad_norm": 5.907648607896258, + "learning_rate": 9.249484197808405e-06, + "loss": 17.4272, + "step": 11017 + }, + { + "epoch": 0.20140018644780375, + "grad_norm": 7.452040030432068, + "learning_rate": 9.249328207345645e-06, + "loss": 18.0281, + "step": 11018 + }, + { + "epoch": 0.20141846564425028, + "grad_norm": 12.690281287890302, + "learning_rate": 9.24917220198936e-06, + "loss": 17.2009, + "step": 11019 + }, + { + "epoch": 0.2014367448406968, + "grad_norm": 5.707084774368487, + "learning_rate": 9.249016181740093e-06, + "loss": 17.3794, + "step": 11020 + }, + { + "epoch": 0.20145502403714333, + "grad_norm": 7.7033771874914425, + "learning_rate": 9.248860146598395e-06, + "loss": 18.2721, + "step": 11021 + }, + { + "epoch": 0.20147330323358986, + "grad_norm": 7.073375834259554, + "learning_rate": 9.24870409656481e-06, + "loss": 17.5917, + "step": 11022 + }, + { + "epoch": 0.20149158243003637, + "grad_norm": 5.304524765367442, + "learning_rate": 9.248548031639885e-06, + "loss": 17.1065, + "step": 11023 + }, + { + "epoch": 0.2015098616264829, + "grad_norm": 8.004742125968127, + "learning_rate": 9.24839195182417e-06, + "loss": 18.5089, + "step": 11024 + }, + { + "epoch": 0.2015281408229294, + "grad_norm": 7.681500562120354, + "learning_rate": 9.248235857118209e-06, + "loss": 17.8945, + "step": 11025 + }, + { + "epoch": 0.20154642001937595, + "grad_norm": 6.671975679551059, + "learning_rate": 9.248079747522549e-06, + "loss": 17.4852, + "step": 11026 + }, + { + "epoch": 0.20156469921582248, + "grad_norm": 6.981758276056119, + "learning_rate": 9.247923623037739e-06, + "loss": 18.1079, + "step": 11027 + }, + { + "epoch": 0.201582978412269, + "grad_norm": 8.387298847381826, + "learning_rate": 9.247767483664325e-06, + "loss": 18.0783, + "step": 11028 + }, + { + "epoch": 0.20160125760871553, + "grad_norm": 5.622659635224309, + "learning_rate": 9.247611329402854e-06, + "loss": 17.0959, + "step": 11029 + }, + { + "epoch": 0.20161953680516204, + "grad_norm": 6.730399075504475, + "learning_rate": 9.247455160253874e-06, + "loss": 17.6984, + "step": 11030 + }, + { + "epoch": 0.20163781600160857, + "grad_norm": 6.899416727690391, + "learning_rate": 9.247298976217933e-06, + "loss": 17.6986, + "step": 11031 + }, + { + "epoch": 0.20165609519805508, + "grad_norm": 6.0520197184089595, + "learning_rate": 9.247142777295578e-06, + "loss": 17.3721, + "step": 11032 + }, + { + "epoch": 0.20167437439450162, + "grad_norm": 6.393644475255894, + "learning_rate": 9.246986563487355e-06, + "loss": 17.461, + "step": 11033 + }, + { + "epoch": 0.20169265359094815, + "grad_norm": 6.182542631902963, + "learning_rate": 9.246830334793812e-06, + "loss": 17.4043, + "step": 11034 + }, + { + "epoch": 0.20171093278739466, + "grad_norm": 7.691680825883224, + "learning_rate": 9.246674091215499e-06, + "loss": 17.9927, + "step": 11035 + }, + { + "epoch": 0.2017292119838412, + "grad_norm": 5.994311075954899, + "learning_rate": 9.246517832752961e-06, + "loss": 17.284, + "step": 11036 + }, + { + "epoch": 0.2017474911802877, + "grad_norm": 5.222483291875188, + "learning_rate": 9.246361559406747e-06, + "loss": 16.9912, + "step": 11037 + }, + { + "epoch": 0.20176577037673424, + "grad_norm": 6.857095782740968, + "learning_rate": 9.246205271177405e-06, + "loss": 17.9019, + "step": 11038 + }, + { + "epoch": 0.20178404957318077, + "grad_norm": 6.771680986195066, + "learning_rate": 9.246048968065479e-06, + "loss": 17.588, + "step": 11039 + }, + { + "epoch": 0.20180232876962728, + "grad_norm": 6.828068076002624, + "learning_rate": 9.245892650071521e-06, + "loss": 17.5896, + "step": 11040 + }, + { + "epoch": 0.20182060796607382, + "grad_norm": 18.483989187270236, + "learning_rate": 9.245736317196079e-06, + "loss": 17.829, + "step": 11041 + }, + { + "epoch": 0.20183888716252033, + "grad_norm": 6.282962753229796, + "learning_rate": 9.2455799694397e-06, + "loss": 17.3659, + "step": 11042 + }, + { + "epoch": 0.20185716635896686, + "grad_norm": 6.50966517020041, + "learning_rate": 9.24542360680293e-06, + "loss": 17.7501, + "step": 11043 + }, + { + "epoch": 0.2018754455554134, + "grad_norm": 6.407674126181323, + "learning_rate": 9.245267229286319e-06, + "loss": 17.4041, + "step": 11044 + }, + { + "epoch": 0.2018937247518599, + "grad_norm": 6.749213381678077, + "learning_rate": 9.245110836890415e-06, + "loss": 17.6395, + "step": 11045 + }, + { + "epoch": 0.20191200394830644, + "grad_norm": 6.644572490671224, + "learning_rate": 9.244954429615766e-06, + "loss": 17.4293, + "step": 11046 + }, + { + "epoch": 0.20193028314475295, + "grad_norm": 6.201464127877089, + "learning_rate": 9.244798007462919e-06, + "loss": 17.2814, + "step": 11047 + }, + { + "epoch": 0.20194856234119948, + "grad_norm": 6.999797919378978, + "learning_rate": 9.244641570432426e-06, + "loss": 17.9026, + "step": 11048 + }, + { + "epoch": 0.201966841537646, + "grad_norm": 8.051882457201522, + "learning_rate": 9.24448511852483e-06, + "loss": 18.3227, + "step": 11049 + }, + { + "epoch": 0.20198512073409253, + "grad_norm": 7.266519999310683, + "learning_rate": 9.244328651740684e-06, + "loss": 17.7956, + "step": 11050 + }, + { + "epoch": 0.20200339993053906, + "grad_norm": 8.556487865262474, + "learning_rate": 9.244172170080532e-06, + "loss": 18.994, + "step": 11051 + }, + { + "epoch": 0.20202167912698557, + "grad_norm": 6.118251722258119, + "learning_rate": 9.244015673544925e-06, + "loss": 17.5535, + "step": 11052 + }, + { + "epoch": 0.2020399583234321, + "grad_norm": 7.353448160888481, + "learning_rate": 9.243859162134414e-06, + "loss": 17.6268, + "step": 11053 + }, + { + "epoch": 0.20205823751987861, + "grad_norm": 6.1185326123313315, + "learning_rate": 9.243702635849542e-06, + "loss": 17.36, + "step": 11054 + }, + { + "epoch": 0.20207651671632515, + "grad_norm": 6.173621161907901, + "learning_rate": 9.243546094690863e-06, + "loss": 17.5314, + "step": 11055 + }, + { + "epoch": 0.20209479591277169, + "grad_norm": 7.005533150540325, + "learning_rate": 9.243389538658922e-06, + "loss": 17.8785, + "step": 11056 + }, + { + "epoch": 0.2021130751092182, + "grad_norm": 8.048076185664863, + "learning_rate": 9.243232967754269e-06, + "loss": 18.4834, + "step": 11057 + }, + { + "epoch": 0.20213135430566473, + "grad_norm": 5.875936160734458, + "learning_rate": 9.243076381977453e-06, + "loss": 16.9904, + "step": 11058 + }, + { + "epoch": 0.20214963350211124, + "grad_norm": 6.812158374551983, + "learning_rate": 9.242919781329021e-06, + "loss": 17.4696, + "step": 11059 + }, + { + "epoch": 0.20216791269855777, + "grad_norm": 6.78174248427758, + "learning_rate": 9.242763165809525e-06, + "loss": 17.8872, + "step": 11060 + }, + { + "epoch": 0.2021861918950043, + "grad_norm": 6.690291000638457, + "learning_rate": 9.24260653541951e-06, + "loss": 17.7383, + "step": 11061 + }, + { + "epoch": 0.20220447109145082, + "grad_norm": 7.568173641090415, + "learning_rate": 9.24244989015953e-06, + "loss": 17.9104, + "step": 11062 + }, + { + "epoch": 0.20222275028789735, + "grad_norm": 5.596928581622695, + "learning_rate": 9.24229323003013e-06, + "loss": 17.2972, + "step": 11063 + }, + { + "epoch": 0.20224102948434386, + "grad_norm": 6.015883396400564, + "learning_rate": 9.242136555031862e-06, + "loss": 17.4608, + "step": 11064 + }, + { + "epoch": 0.2022593086807904, + "grad_norm": 6.328501309808907, + "learning_rate": 9.241979865165271e-06, + "loss": 17.1676, + "step": 11065 + }, + { + "epoch": 0.2022775878772369, + "grad_norm": 7.222603573510737, + "learning_rate": 9.24182316043091e-06, + "loss": 17.8221, + "step": 11066 + }, + { + "epoch": 0.20229586707368344, + "grad_norm": 7.106749197423903, + "learning_rate": 9.241666440829326e-06, + "loss": 18.1133, + "step": 11067 + }, + { + "epoch": 0.20231414627012997, + "grad_norm": 5.983886671823255, + "learning_rate": 9.241509706361072e-06, + "loss": 17.2629, + "step": 11068 + }, + { + "epoch": 0.20233242546657648, + "grad_norm": 7.179488341236794, + "learning_rate": 9.24135295702669e-06, + "loss": 17.8632, + "step": 11069 + }, + { + "epoch": 0.20235070466302302, + "grad_norm": 6.524423525191377, + "learning_rate": 9.241196192826738e-06, + "loss": 17.7955, + "step": 11070 + }, + { + "epoch": 0.20236898385946953, + "grad_norm": 6.264568453092564, + "learning_rate": 9.24103941376176e-06, + "loss": 17.2563, + "step": 11071 + }, + { + "epoch": 0.20238726305591606, + "grad_norm": 8.58333432459693, + "learning_rate": 9.240882619832306e-06, + "loss": 18.3223, + "step": 11072 + }, + { + "epoch": 0.2024055422523626, + "grad_norm": 7.066775398865614, + "learning_rate": 9.240725811038927e-06, + "loss": 17.8233, + "step": 11073 + }, + { + "epoch": 0.2024238214488091, + "grad_norm": 6.8026509708611735, + "learning_rate": 9.240568987382173e-06, + "loss": 17.5153, + "step": 11074 + }, + { + "epoch": 0.20244210064525564, + "grad_norm": 6.634238330328132, + "learning_rate": 9.240412148862591e-06, + "loss": 17.5084, + "step": 11075 + }, + { + "epoch": 0.20246037984170215, + "grad_norm": 8.026273114106043, + "learning_rate": 9.240255295480734e-06, + "loss": 18.0841, + "step": 11076 + }, + { + "epoch": 0.20247865903814868, + "grad_norm": 6.690087092426927, + "learning_rate": 9.240098427237148e-06, + "loss": 17.6633, + "step": 11077 + }, + { + "epoch": 0.20249693823459522, + "grad_norm": 5.8175423129199775, + "learning_rate": 9.239941544132386e-06, + "loss": 17.0738, + "step": 11078 + }, + { + "epoch": 0.20251521743104173, + "grad_norm": 6.414427782891805, + "learning_rate": 9.239784646166999e-06, + "loss": 17.1785, + "step": 11079 + }, + { + "epoch": 0.20253349662748826, + "grad_norm": 8.766801582188432, + "learning_rate": 9.239627733341531e-06, + "loss": 18.1234, + "step": 11080 + }, + { + "epoch": 0.20255177582393477, + "grad_norm": 7.365357716575713, + "learning_rate": 9.239470805656538e-06, + "loss": 17.6308, + "step": 11081 + }, + { + "epoch": 0.2025700550203813, + "grad_norm": 5.164235986921989, + "learning_rate": 9.239313863112567e-06, + "loss": 16.938, + "step": 11082 + }, + { + "epoch": 0.20258833421682781, + "grad_norm": 6.453351094731791, + "learning_rate": 9.23915690571017e-06, + "loss": 17.3139, + "step": 11083 + }, + { + "epoch": 0.20260661341327435, + "grad_norm": 6.6864016587948, + "learning_rate": 9.238999933449894e-06, + "loss": 17.4998, + "step": 11084 + }, + { + "epoch": 0.20262489260972089, + "grad_norm": 6.803955050509063, + "learning_rate": 9.238842946332292e-06, + "loss": 17.7202, + "step": 11085 + }, + { + "epoch": 0.2026431718061674, + "grad_norm": 6.7583508701882975, + "learning_rate": 9.238685944357913e-06, + "loss": 17.5772, + "step": 11086 + }, + { + "epoch": 0.20266145100261393, + "grad_norm": 7.021391029608066, + "learning_rate": 9.238528927527308e-06, + "loss": 17.8551, + "step": 11087 + }, + { + "epoch": 0.20267973019906044, + "grad_norm": 5.875048450870385, + "learning_rate": 9.238371895841027e-06, + "loss": 17.4093, + "step": 11088 + }, + { + "epoch": 0.20269800939550697, + "grad_norm": 6.7214107399667835, + "learning_rate": 9.23821484929962e-06, + "loss": 17.1692, + "step": 11089 + }, + { + "epoch": 0.2027162885919535, + "grad_norm": 8.274023358814, + "learning_rate": 9.238057787903637e-06, + "loss": 18.4087, + "step": 11090 + }, + { + "epoch": 0.20273456778840002, + "grad_norm": 6.979774278339356, + "learning_rate": 9.23790071165363e-06, + "loss": 17.2612, + "step": 11091 + }, + { + "epoch": 0.20275284698484655, + "grad_norm": 7.624076237694389, + "learning_rate": 9.237743620550148e-06, + "loss": 17.7562, + "step": 11092 + }, + { + "epoch": 0.20277112618129306, + "grad_norm": 7.896095765190013, + "learning_rate": 9.237586514593743e-06, + "loss": 18.3727, + "step": 11093 + }, + { + "epoch": 0.2027894053777396, + "grad_norm": 7.956270754415964, + "learning_rate": 9.237429393784965e-06, + "loss": 18.0114, + "step": 11094 + }, + { + "epoch": 0.20280768457418613, + "grad_norm": 7.989114623913356, + "learning_rate": 9.237272258124365e-06, + "loss": 18.1503, + "step": 11095 + }, + { + "epoch": 0.20282596377063264, + "grad_norm": 7.1258781155501705, + "learning_rate": 9.237115107612493e-06, + "loss": 17.8629, + "step": 11096 + }, + { + "epoch": 0.20284424296707917, + "grad_norm": 7.152743907610953, + "learning_rate": 9.236957942249902e-06, + "loss": 17.8655, + "step": 11097 + }, + { + "epoch": 0.20286252216352568, + "grad_norm": 7.663056514872933, + "learning_rate": 9.23680076203714e-06, + "loss": 17.8698, + "step": 11098 + }, + { + "epoch": 0.20288080135997222, + "grad_norm": 6.719182007292677, + "learning_rate": 9.236643566974758e-06, + "loss": 17.5382, + "step": 11099 + }, + { + "epoch": 0.20289908055641873, + "grad_norm": 6.699845952976791, + "learning_rate": 9.236486357063307e-06, + "loss": 17.601, + "step": 11100 + }, + { + "epoch": 0.20291735975286526, + "grad_norm": 6.53621108530564, + "learning_rate": 9.23632913230334e-06, + "loss": 17.469, + "step": 11101 + }, + { + "epoch": 0.2029356389493118, + "grad_norm": 6.2682005317902645, + "learning_rate": 9.236171892695408e-06, + "loss": 17.6244, + "step": 11102 + }, + { + "epoch": 0.2029539181457583, + "grad_norm": 6.7197314798793135, + "learning_rate": 9.23601463824006e-06, + "loss": 17.5137, + "step": 11103 + }, + { + "epoch": 0.20297219734220484, + "grad_norm": 6.336006258595372, + "learning_rate": 9.23585736893785e-06, + "loss": 17.3593, + "step": 11104 + }, + { + "epoch": 0.20299047653865135, + "grad_norm": 6.137737038681827, + "learning_rate": 9.235700084789325e-06, + "loss": 17.0645, + "step": 11105 + }, + { + "epoch": 0.20300875573509788, + "grad_norm": 7.834039642186673, + "learning_rate": 9.23554278579504e-06, + "loss": 18.0745, + "step": 11106 + }, + { + "epoch": 0.20302703493154442, + "grad_norm": 6.954668084270081, + "learning_rate": 9.235385471955546e-06, + "loss": 17.7245, + "step": 11107 + }, + { + "epoch": 0.20304531412799093, + "grad_norm": 7.721553135183198, + "learning_rate": 9.235228143271392e-06, + "loss": 17.9186, + "step": 11108 + }, + { + "epoch": 0.20306359332443746, + "grad_norm": 9.495352412979067, + "learning_rate": 9.23507079974313e-06, + "loss": 18.7159, + "step": 11109 + }, + { + "epoch": 0.20308187252088397, + "grad_norm": 5.810202473978871, + "learning_rate": 9.234913441371314e-06, + "loss": 17.2472, + "step": 11110 + }, + { + "epoch": 0.2031001517173305, + "grad_norm": 6.442896637173475, + "learning_rate": 9.234756068156494e-06, + "loss": 17.4719, + "step": 11111 + }, + { + "epoch": 0.20311843091377704, + "grad_norm": 6.402017573467823, + "learning_rate": 9.234598680099222e-06, + "loss": 17.6745, + "step": 11112 + }, + { + "epoch": 0.20313671011022355, + "grad_norm": 5.4501875542114115, + "learning_rate": 9.234441277200048e-06, + "loss": 17.0627, + "step": 11113 + }, + { + "epoch": 0.2031549893066701, + "grad_norm": 5.838435571181767, + "learning_rate": 9.234283859459525e-06, + "loss": 17.2981, + "step": 11114 + }, + { + "epoch": 0.2031732685031166, + "grad_norm": 7.30484957185883, + "learning_rate": 9.234126426878203e-06, + "loss": 17.6792, + "step": 11115 + }, + { + "epoch": 0.20319154769956313, + "grad_norm": 8.679037738220984, + "learning_rate": 9.233968979456637e-06, + "loss": 18.3425, + "step": 11116 + }, + { + "epoch": 0.20320982689600964, + "grad_norm": 6.49102380204587, + "learning_rate": 9.233811517195378e-06, + "loss": 17.5286, + "step": 11117 + }, + { + "epoch": 0.20322810609245617, + "grad_norm": 6.991123610421662, + "learning_rate": 9.233654040094976e-06, + "loss": 17.8174, + "step": 11118 + }, + { + "epoch": 0.2032463852889027, + "grad_norm": 6.168237386342085, + "learning_rate": 9.233496548155984e-06, + "loss": 17.4737, + "step": 11119 + }, + { + "epoch": 0.20326466448534922, + "grad_norm": 6.4433712741488485, + "learning_rate": 9.233339041378952e-06, + "loss": 17.4473, + "step": 11120 + }, + { + "epoch": 0.20328294368179575, + "grad_norm": 5.891067840361349, + "learning_rate": 9.233181519764437e-06, + "loss": 17.2874, + "step": 11121 + }, + { + "epoch": 0.20330122287824226, + "grad_norm": 8.143132742497487, + "learning_rate": 9.233023983312987e-06, + "loss": 18.0829, + "step": 11122 + }, + { + "epoch": 0.2033195020746888, + "grad_norm": 8.487263957275987, + "learning_rate": 9.232866432025156e-06, + "loss": 18.3101, + "step": 11123 + }, + { + "epoch": 0.20333778127113533, + "grad_norm": 6.394992361882281, + "learning_rate": 9.232708865901495e-06, + "loss": 17.3551, + "step": 11124 + }, + { + "epoch": 0.20335606046758184, + "grad_norm": 6.575868690305045, + "learning_rate": 9.232551284942554e-06, + "loss": 17.5616, + "step": 11125 + }, + { + "epoch": 0.20337433966402838, + "grad_norm": 5.0399235075632856, + "learning_rate": 9.23239368914889e-06, + "loss": 16.9346, + "step": 11126 + }, + { + "epoch": 0.20339261886047488, + "grad_norm": 9.2722299744395, + "learning_rate": 9.232236078521055e-06, + "loss": 18.2814, + "step": 11127 + }, + { + "epoch": 0.20341089805692142, + "grad_norm": 6.8905819906211585, + "learning_rate": 9.232078453059598e-06, + "loss": 17.5832, + "step": 11128 + }, + { + "epoch": 0.20342917725336795, + "grad_norm": 6.132241847838988, + "learning_rate": 9.231920812765074e-06, + "loss": 17.467, + "step": 11129 + }, + { + "epoch": 0.20344745644981446, + "grad_norm": 6.379810305024003, + "learning_rate": 9.231763157638036e-06, + "loss": 17.4435, + "step": 11130 + }, + { + "epoch": 0.203465735646261, + "grad_norm": 5.808808655742433, + "learning_rate": 9.231605487679033e-06, + "loss": 17.2186, + "step": 11131 + }, + { + "epoch": 0.2034840148427075, + "grad_norm": 6.0394033521351345, + "learning_rate": 9.23144780288862e-06, + "loss": 17.122, + "step": 11132 + }, + { + "epoch": 0.20350229403915404, + "grad_norm": 8.642312035088267, + "learning_rate": 9.231290103267352e-06, + "loss": 18.5249, + "step": 11133 + }, + { + "epoch": 0.20352057323560055, + "grad_norm": 6.4284436306351385, + "learning_rate": 9.231132388815778e-06, + "loss": 17.4943, + "step": 11134 + }, + { + "epoch": 0.20353885243204708, + "grad_norm": 8.370086693427135, + "learning_rate": 9.230974659534451e-06, + "loss": 18.1642, + "step": 11135 + }, + { + "epoch": 0.20355713162849362, + "grad_norm": 8.025758280170024, + "learning_rate": 9.230816915423928e-06, + "loss": 18.0183, + "step": 11136 + }, + { + "epoch": 0.20357541082494013, + "grad_norm": 6.295715707343801, + "learning_rate": 9.230659156484755e-06, + "loss": 17.3867, + "step": 11137 + }, + { + "epoch": 0.20359369002138666, + "grad_norm": 7.556217004668174, + "learning_rate": 9.230501382717492e-06, + "loss": 17.8981, + "step": 11138 + }, + { + "epoch": 0.20361196921783317, + "grad_norm": 5.740317766351861, + "learning_rate": 9.230343594122687e-06, + "loss": 17.3958, + "step": 11139 + }, + { + "epoch": 0.2036302484142797, + "grad_norm": 6.944209250198379, + "learning_rate": 9.230185790700895e-06, + "loss": 17.7332, + "step": 11140 + }, + { + "epoch": 0.20364852761072624, + "grad_norm": 6.564848811373634, + "learning_rate": 9.230027972452669e-06, + "loss": 17.4095, + "step": 11141 + }, + { + "epoch": 0.20366680680717275, + "grad_norm": 6.854415444915705, + "learning_rate": 9.229870139378562e-06, + "loss": 17.5654, + "step": 11142 + }, + { + "epoch": 0.2036850860036193, + "grad_norm": 7.647143387274446, + "learning_rate": 9.229712291479128e-06, + "loss": 18.1861, + "step": 11143 + }, + { + "epoch": 0.2037033652000658, + "grad_norm": 6.264718415869328, + "learning_rate": 9.229554428754918e-06, + "loss": 17.5786, + "step": 11144 + }, + { + "epoch": 0.20372164439651233, + "grad_norm": 7.050400247635081, + "learning_rate": 9.229396551206488e-06, + "loss": 17.6802, + "step": 11145 + }, + { + "epoch": 0.20373992359295887, + "grad_norm": 7.013656808967406, + "learning_rate": 9.22923865883439e-06, + "loss": 17.7487, + "step": 11146 + }, + { + "epoch": 0.20375820278940537, + "grad_norm": 7.176620897988137, + "learning_rate": 9.229080751639177e-06, + "loss": 17.7222, + "step": 11147 + }, + { + "epoch": 0.2037764819858519, + "grad_norm": 7.104754563617455, + "learning_rate": 9.228922829621403e-06, + "loss": 17.6496, + "step": 11148 + }, + { + "epoch": 0.20379476118229842, + "grad_norm": 7.098889295955707, + "learning_rate": 9.228764892781622e-06, + "loss": 18.0888, + "step": 11149 + }, + { + "epoch": 0.20381304037874495, + "grad_norm": 8.365287733535423, + "learning_rate": 9.228606941120386e-06, + "loss": 18.5649, + "step": 11150 + }, + { + "epoch": 0.20383131957519146, + "grad_norm": 7.066472823936408, + "learning_rate": 9.228448974638252e-06, + "loss": 17.8336, + "step": 11151 + }, + { + "epoch": 0.203849598771638, + "grad_norm": 7.190799257549998, + "learning_rate": 9.228290993335768e-06, + "loss": 17.4468, + "step": 11152 + }, + { + "epoch": 0.20386787796808453, + "grad_norm": 7.424093952787657, + "learning_rate": 9.228132997213493e-06, + "loss": 17.9261, + "step": 11153 + }, + { + "epoch": 0.20388615716453104, + "grad_norm": 6.104952908897735, + "learning_rate": 9.227974986271976e-06, + "loss": 17.3901, + "step": 11154 + }, + { + "epoch": 0.20390443636097758, + "grad_norm": 5.532896939303763, + "learning_rate": 9.227816960511778e-06, + "loss": 17.4417, + "step": 11155 + }, + { + "epoch": 0.20392271555742408, + "grad_norm": 6.510258284011305, + "learning_rate": 9.227658919933446e-06, + "loss": 17.5093, + "step": 11156 + }, + { + "epoch": 0.20394099475387062, + "grad_norm": 6.876360849657799, + "learning_rate": 9.227500864537536e-06, + "loss": 17.7952, + "step": 11157 + }, + { + "epoch": 0.20395927395031715, + "grad_norm": 5.942607970633072, + "learning_rate": 9.227342794324603e-06, + "loss": 17.3487, + "step": 11158 + }, + { + "epoch": 0.20397755314676366, + "grad_norm": 7.574700009519517, + "learning_rate": 9.2271847092952e-06, + "loss": 18.1533, + "step": 11159 + }, + { + "epoch": 0.2039958323432102, + "grad_norm": 6.682538345208364, + "learning_rate": 9.227026609449881e-06, + "loss": 17.4988, + "step": 11160 + }, + { + "epoch": 0.2040141115396567, + "grad_norm": 7.752221844167152, + "learning_rate": 9.226868494789203e-06, + "loss": 18.0337, + "step": 11161 + }, + { + "epoch": 0.20403239073610324, + "grad_norm": 6.064331059840321, + "learning_rate": 9.226710365313714e-06, + "loss": 17.4478, + "step": 11162 + }, + { + "epoch": 0.20405066993254978, + "grad_norm": 6.199532295651936, + "learning_rate": 9.226552221023974e-06, + "loss": 17.4396, + "step": 11163 + }, + { + "epoch": 0.20406894912899629, + "grad_norm": 7.424588487200107, + "learning_rate": 9.226394061920537e-06, + "loss": 17.9378, + "step": 11164 + }, + { + "epoch": 0.20408722832544282, + "grad_norm": 7.034173364726601, + "learning_rate": 9.226235888003952e-06, + "loss": 17.9955, + "step": 11165 + }, + { + "epoch": 0.20410550752188933, + "grad_norm": 6.187052812857753, + "learning_rate": 9.226077699274778e-06, + "loss": 17.395, + "step": 11166 + }, + { + "epoch": 0.20412378671833586, + "grad_norm": 6.251115632206645, + "learning_rate": 9.225919495733569e-06, + "loss": 17.6024, + "step": 11167 + }, + { + "epoch": 0.20414206591478237, + "grad_norm": 7.201508065677238, + "learning_rate": 9.225761277380878e-06, + "loss": 17.7733, + "step": 11168 + }, + { + "epoch": 0.2041603451112289, + "grad_norm": 6.595517365453235, + "learning_rate": 9.225603044217261e-06, + "loss": 17.6297, + "step": 11169 + }, + { + "epoch": 0.20417862430767544, + "grad_norm": 7.720351350143014, + "learning_rate": 9.225444796243273e-06, + "loss": 17.9286, + "step": 11170 + }, + { + "epoch": 0.20419690350412195, + "grad_norm": 6.852815619637015, + "learning_rate": 9.225286533459468e-06, + "loss": 17.5033, + "step": 11171 + }, + { + "epoch": 0.2042151827005685, + "grad_norm": 7.025534011609405, + "learning_rate": 9.225128255866397e-06, + "loss": 17.7254, + "step": 11172 + }, + { + "epoch": 0.204233461897015, + "grad_norm": 8.401970977588938, + "learning_rate": 9.224969963464623e-06, + "loss": 18.3252, + "step": 11173 + }, + { + "epoch": 0.20425174109346153, + "grad_norm": 5.386890150228694, + "learning_rate": 9.224811656254694e-06, + "loss": 17.17, + "step": 11174 + }, + { + "epoch": 0.20427002028990807, + "grad_norm": 7.185666553147642, + "learning_rate": 9.224653334237163e-06, + "loss": 18.0141, + "step": 11175 + }, + { + "epoch": 0.20428829948635457, + "grad_norm": 7.066949308004852, + "learning_rate": 9.224494997412593e-06, + "loss": 17.7945, + "step": 11176 + }, + { + "epoch": 0.2043065786828011, + "grad_norm": 7.613231902013903, + "learning_rate": 9.224336645781533e-06, + "loss": 17.8731, + "step": 11177 + }, + { + "epoch": 0.20432485787924762, + "grad_norm": 6.427204203579496, + "learning_rate": 9.22417827934454e-06, + "loss": 17.5776, + "step": 11178 + }, + { + "epoch": 0.20434313707569415, + "grad_norm": 8.003971223075348, + "learning_rate": 9.224019898102168e-06, + "loss": 17.8197, + "step": 11179 + }, + { + "epoch": 0.2043614162721407, + "grad_norm": 5.5872193885185455, + "learning_rate": 9.223861502054974e-06, + "loss": 17.19, + "step": 11180 + }, + { + "epoch": 0.2043796954685872, + "grad_norm": 6.276657648988144, + "learning_rate": 9.223703091203511e-06, + "loss": 17.3117, + "step": 11181 + }, + { + "epoch": 0.20439797466503373, + "grad_norm": 6.897398207329288, + "learning_rate": 9.223544665548337e-06, + "loss": 17.6208, + "step": 11182 + }, + { + "epoch": 0.20441625386148024, + "grad_norm": 7.3221433396788305, + "learning_rate": 9.223386225090002e-06, + "loss": 17.4744, + "step": 11183 + }, + { + "epoch": 0.20443453305792678, + "grad_norm": 6.278377547536221, + "learning_rate": 9.223227769829068e-06, + "loss": 17.3664, + "step": 11184 + }, + { + "epoch": 0.20445281225437328, + "grad_norm": 7.80024806356108, + "learning_rate": 9.223069299766085e-06, + "loss": 18.1642, + "step": 11185 + }, + { + "epoch": 0.20447109145081982, + "grad_norm": 7.538758445830707, + "learning_rate": 9.222910814901611e-06, + "loss": 17.8844, + "step": 11186 + }, + { + "epoch": 0.20448937064726636, + "grad_norm": 6.2320072295754825, + "learning_rate": 9.222752315236203e-06, + "loss": 16.996, + "step": 11187 + }, + { + "epoch": 0.20450764984371286, + "grad_norm": 5.768389402983314, + "learning_rate": 9.222593800770411e-06, + "loss": 17.2548, + "step": 11188 + }, + { + "epoch": 0.2045259290401594, + "grad_norm": 9.386362018993129, + "learning_rate": 9.222435271504797e-06, + "loss": 18.8809, + "step": 11189 + }, + { + "epoch": 0.2045442082366059, + "grad_norm": 6.2184328619791085, + "learning_rate": 9.222276727439914e-06, + "loss": 17.5199, + "step": 11190 + }, + { + "epoch": 0.20456248743305244, + "grad_norm": 7.577401563324187, + "learning_rate": 9.222118168576316e-06, + "loss": 17.8251, + "step": 11191 + }, + { + "epoch": 0.20458076662949898, + "grad_norm": 7.8018276763599195, + "learning_rate": 9.22195959491456e-06, + "loss": 17.7496, + "step": 11192 + }, + { + "epoch": 0.20459904582594549, + "grad_norm": 7.7073048193379465, + "learning_rate": 9.221801006455204e-06, + "loss": 18.5523, + "step": 11193 + }, + { + "epoch": 0.20461732502239202, + "grad_norm": 7.672101512759613, + "learning_rate": 9.2216424031988e-06, + "loss": 17.8634, + "step": 11194 + }, + { + "epoch": 0.20463560421883853, + "grad_norm": 7.427337842379527, + "learning_rate": 9.221483785145906e-06, + "loss": 17.9564, + "step": 11195 + }, + { + "epoch": 0.20465388341528506, + "grad_norm": 6.569593973534207, + "learning_rate": 9.221325152297079e-06, + "loss": 17.6455, + "step": 11196 + }, + { + "epoch": 0.2046721626117316, + "grad_norm": 8.427328028996476, + "learning_rate": 9.221166504652871e-06, + "loss": 18.462, + "step": 11197 + }, + { + "epoch": 0.2046904418081781, + "grad_norm": 6.744904946082712, + "learning_rate": 9.221007842213843e-06, + "loss": 17.515, + "step": 11198 + }, + { + "epoch": 0.20470872100462464, + "grad_norm": 8.33352714628349, + "learning_rate": 9.220849164980548e-06, + "loss": 18.5856, + "step": 11199 + }, + { + "epoch": 0.20472700020107115, + "grad_norm": 7.956233550220343, + "learning_rate": 9.220690472953542e-06, + "loss": 18.1, + "step": 11200 + }, + { + "epoch": 0.2047452793975177, + "grad_norm": 7.034235858966999, + "learning_rate": 9.220531766133383e-06, + "loss": 17.6427, + "step": 11201 + }, + { + "epoch": 0.2047635585939642, + "grad_norm": 7.374752028705149, + "learning_rate": 9.220373044520628e-06, + "loss": 17.513, + "step": 11202 + }, + { + "epoch": 0.20478183779041073, + "grad_norm": 7.120915378950373, + "learning_rate": 9.22021430811583e-06, + "loss": 17.5448, + "step": 11203 + }, + { + "epoch": 0.20480011698685727, + "grad_norm": 5.5208955331425065, + "learning_rate": 9.220055556919547e-06, + "loss": 17.1941, + "step": 11204 + }, + { + "epoch": 0.20481839618330377, + "grad_norm": 7.252932148688536, + "learning_rate": 9.219896790932334e-06, + "loss": 17.8942, + "step": 11205 + }, + { + "epoch": 0.2048366753797503, + "grad_norm": 6.855217393125561, + "learning_rate": 9.219738010154753e-06, + "loss": 17.6032, + "step": 11206 + }, + { + "epoch": 0.20485495457619682, + "grad_norm": 7.905595712034856, + "learning_rate": 9.219579214587354e-06, + "loss": 18.0844, + "step": 11207 + }, + { + "epoch": 0.20487323377264335, + "grad_norm": 7.305491779279216, + "learning_rate": 9.219420404230694e-06, + "loss": 17.8717, + "step": 11208 + }, + { + "epoch": 0.2048915129690899, + "grad_norm": 7.109208649456086, + "learning_rate": 9.219261579085335e-06, + "loss": 17.9267, + "step": 11209 + }, + { + "epoch": 0.2049097921655364, + "grad_norm": 6.880451246231312, + "learning_rate": 9.21910273915183e-06, + "loss": 17.6225, + "step": 11210 + }, + { + "epoch": 0.20492807136198293, + "grad_norm": 7.476702279942833, + "learning_rate": 9.218943884430733e-06, + "loss": 18.0985, + "step": 11211 + }, + { + "epoch": 0.20494635055842944, + "grad_norm": 6.738653096375818, + "learning_rate": 9.218785014922606e-06, + "loss": 17.3817, + "step": 11212 + }, + { + "epoch": 0.20496462975487598, + "grad_norm": 6.053858809942252, + "learning_rate": 9.218626130628003e-06, + "loss": 17.3523, + "step": 11213 + }, + { + "epoch": 0.2049829089513225, + "grad_norm": 7.031194283853823, + "learning_rate": 9.218467231547482e-06, + "loss": 17.9715, + "step": 11214 + }, + { + "epoch": 0.20500118814776902, + "grad_norm": 7.617933246938375, + "learning_rate": 9.2183083176816e-06, + "loss": 18.2486, + "step": 11215 + }, + { + "epoch": 0.20501946734421556, + "grad_norm": 7.016914651326241, + "learning_rate": 9.218149389030913e-06, + "loss": 17.7558, + "step": 11216 + }, + { + "epoch": 0.20503774654066206, + "grad_norm": 6.314429775341909, + "learning_rate": 9.21799044559598e-06, + "loss": 17.4803, + "step": 11217 + }, + { + "epoch": 0.2050560257371086, + "grad_norm": 6.298121876385616, + "learning_rate": 9.217831487377354e-06, + "loss": 17.2778, + "step": 11218 + }, + { + "epoch": 0.2050743049335551, + "grad_norm": 6.463091502828433, + "learning_rate": 9.217672514375594e-06, + "loss": 17.6105, + "step": 11219 + }, + { + "epoch": 0.20509258413000164, + "grad_norm": 7.081118131205817, + "learning_rate": 9.217513526591259e-06, + "loss": 17.6592, + "step": 11220 + }, + { + "epoch": 0.20511086332644818, + "grad_norm": 7.573218858127629, + "learning_rate": 9.217354524024905e-06, + "loss": 17.7936, + "step": 11221 + }, + { + "epoch": 0.2051291425228947, + "grad_norm": 8.4098199670913, + "learning_rate": 9.21719550667709e-06, + "loss": 17.9587, + "step": 11222 + }, + { + "epoch": 0.20514742171934122, + "grad_norm": 6.121847899495171, + "learning_rate": 9.21703647454837e-06, + "loss": 17.4993, + "step": 11223 + }, + { + "epoch": 0.20516570091578773, + "grad_norm": 7.413990228255566, + "learning_rate": 9.216877427639303e-06, + "loss": 17.9504, + "step": 11224 + }, + { + "epoch": 0.20518398011223427, + "grad_norm": 7.076346612130487, + "learning_rate": 9.216718365950448e-06, + "loss": 18.0455, + "step": 11225 + }, + { + "epoch": 0.2052022593086808, + "grad_norm": 8.428479294844449, + "learning_rate": 9.21655928948236e-06, + "loss": 17.9222, + "step": 11226 + }, + { + "epoch": 0.2052205385051273, + "grad_norm": 9.552257974022613, + "learning_rate": 9.216400198235598e-06, + "loss": 18.1117, + "step": 11227 + }, + { + "epoch": 0.20523881770157384, + "grad_norm": 5.859977160157482, + "learning_rate": 9.216241092210718e-06, + "loss": 17.3769, + "step": 11228 + }, + { + "epoch": 0.20525709689802035, + "grad_norm": 6.6096858069978115, + "learning_rate": 9.21608197140828e-06, + "loss": 17.6359, + "step": 11229 + }, + { + "epoch": 0.2052753760944669, + "grad_norm": 6.743129044468079, + "learning_rate": 9.215922835828839e-06, + "loss": 17.7402, + "step": 11230 + }, + { + "epoch": 0.20529365529091342, + "grad_norm": 7.3244944650637, + "learning_rate": 9.215763685472955e-06, + "loss": 17.8573, + "step": 11231 + }, + { + "epoch": 0.20531193448735993, + "grad_norm": 7.177089459925122, + "learning_rate": 9.215604520341186e-06, + "loss": 17.9606, + "step": 11232 + }, + { + "epoch": 0.20533021368380647, + "grad_norm": 7.654941124166402, + "learning_rate": 9.215445340434088e-06, + "loss": 17.8949, + "step": 11233 + }, + { + "epoch": 0.20534849288025298, + "grad_norm": 7.105827489680923, + "learning_rate": 9.215286145752222e-06, + "loss": 17.7934, + "step": 11234 + }, + { + "epoch": 0.2053667720766995, + "grad_norm": 5.612250988367692, + "learning_rate": 9.215126936296141e-06, + "loss": 17.2452, + "step": 11235 + }, + { + "epoch": 0.20538505127314602, + "grad_norm": 12.762976536213964, + "learning_rate": 9.214967712066408e-06, + "loss": 19.0867, + "step": 11236 + }, + { + "epoch": 0.20540333046959255, + "grad_norm": 5.904836314826313, + "learning_rate": 9.214808473063578e-06, + "loss": 17.2909, + "step": 11237 + }, + { + "epoch": 0.2054216096660391, + "grad_norm": 7.576621186342858, + "learning_rate": 9.21464921928821e-06, + "loss": 17.8573, + "step": 11238 + }, + { + "epoch": 0.2054398888624856, + "grad_norm": 6.779356927404155, + "learning_rate": 9.21448995074086e-06, + "loss": 17.5983, + "step": 11239 + }, + { + "epoch": 0.20545816805893213, + "grad_norm": 6.856019507235172, + "learning_rate": 9.214330667422092e-06, + "loss": 17.8275, + "step": 11240 + }, + { + "epoch": 0.20547644725537864, + "grad_norm": 9.397441998703927, + "learning_rate": 9.21417136933246e-06, + "loss": 18.7988, + "step": 11241 + }, + { + "epoch": 0.20549472645182518, + "grad_norm": 5.921991480316953, + "learning_rate": 9.214012056472521e-06, + "loss": 17.259, + "step": 11242 + }, + { + "epoch": 0.2055130056482717, + "grad_norm": 7.2698720955148675, + "learning_rate": 9.213852728842839e-06, + "loss": 17.8498, + "step": 11243 + }, + { + "epoch": 0.20553128484471822, + "grad_norm": 6.651611949906887, + "learning_rate": 9.213693386443966e-06, + "loss": 17.7188, + "step": 11244 + }, + { + "epoch": 0.20554956404116476, + "grad_norm": 7.508074955197114, + "learning_rate": 9.213534029276464e-06, + "loss": 18.2089, + "step": 11245 + }, + { + "epoch": 0.20556784323761126, + "grad_norm": 8.512802495561843, + "learning_rate": 9.21337465734089e-06, + "loss": 17.8531, + "step": 11246 + }, + { + "epoch": 0.2055861224340578, + "grad_norm": 7.357293392894167, + "learning_rate": 9.213215270637805e-06, + "loss": 17.7409, + "step": 11247 + }, + { + "epoch": 0.20560440163050434, + "grad_norm": 6.011106680943368, + "learning_rate": 9.213055869167767e-06, + "loss": 17.2643, + "step": 11248 + }, + { + "epoch": 0.20562268082695084, + "grad_norm": 6.57061944873097, + "learning_rate": 9.212896452931331e-06, + "loss": 17.6767, + "step": 11249 + }, + { + "epoch": 0.20564096002339738, + "grad_norm": 6.287256367807107, + "learning_rate": 9.21273702192906e-06, + "loss": 17.2238, + "step": 11250 + }, + { + "epoch": 0.2056592392198439, + "grad_norm": 6.000760785422642, + "learning_rate": 9.21257757616151e-06, + "loss": 17.226, + "step": 11251 + }, + { + "epoch": 0.20567751841629042, + "grad_norm": 6.890166883988605, + "learning_rate": 9.212418115629243e-06, + "loss": 17.8284, + "step": 11252 + }, + { + "epoch": 0.20569579761273693, + "grad_norm": 7.627504871921874, + "learning_rate": 9.212258640332815e-06, + "loss": 17.4906, + "step": 11253 + }, + { + "epoch": 0.20571407680918347, + "grad_norm": 7.70247386428855, + "learning_rate": 9.212099150272786e-06, + "loss": 18.3294, + "step": 11254 + }, + { + "epoch": 0.20573235600563, + "grad_norm": 8.44945226014485, + "learning_rate": 9.211939645449715e-06, + "loss": 18.5711, + "step": 11255 + }, + { + "epoch": 0.2057506352020765, + "grad_norm": 8.303653254049596, + "learning_rate": 9.211780125864162e-06, + "loss": 18.3465, + "step": 11256 + }, + { + "epoch": 0.20576891439852305, + "grad_norm": 5.468438191150138, + "learning_rate": 9.211620591516683e-06, + "loss": 17.101, + "step": 11257 + }, + { + "epoch": 0.20578719359496955, + "grad_norm": 7.026778992898313, + "learning_rate": 9.211461042407841e-06, + "loss": 17.995, + "step": 11258 + }, + { + "epoch": 0.2058054727914161, + "grad_norm": 7.124654173790567, + "learning_rate": 9.211301478538194e-06, + "loss": 17.3347, + "step": 11259 + }, + { + "epoch": 0.20582375198786262, + "grad_norm": 6.822023858200299, + "learning_rate": 9.2111418999083e-06, + "loss": 17.7613, + "step": 11260 + }, + { + "epoch": 0.20584203118430913, + "grad_norm": 7.2272941338736905, + "learning_rate": 9.210982306518719e-06, + "loss": 17.6922, + "step": 11261 + }, + { + "epoch": 0.20586031038075567, + "grad_norm": 6.602902486894802, + "learning_rate": 9.21082269837001e-06, + "loss": 17.5514, + "step": 11262 + }, + { + "epoch": 0.20587858957720218, + "grad_norm": 7.480254531432755, + "learning_rate": 9.210663075462733e-06, + "loss": 18.2328, + "step": 11263 + }, + { + "epoch": 0.2058968687736487, + "grad_norm": 6.128790145698104, + "learning_rate": 9.210503437797448e-06, + "loss": 17.3685, + "step": 11264 + }, + { + "epoch": 0.20591514797009525, + "grad_norm": 8.293712419676533, + "learning_rate": 9.210343785374713e-06, + "loss": 18.1156, + "step": 11265 + }, + { + "epoch": 0.20593342716654175, + "grad_norm": 7.404471374026537, + "learning_rate": 9.21018411819509e-06, + "loss": 17.6813, + "step": 11266 + }, + { + "epoch": 0.2059517063629883, + "grad_norm": 6.296386617109181, + "learning_rate": 9.210024436259135e-06, + "loss": 17.4807, + "step": 11267 + }, + { + "epoch": 0.2059699855594348, + "grad_norm": 7.517825134964458, + "learning_rate": 9.20986473956741e-06, + "loss": 17.9497, + "step": 11268 + }, + { + "epoch": 0.20598826475588133, + "grad_norm": 7.198564620537514, + "learning_rate": 9.209705028120475e-06, + "loss": 18.061, + "step": 11269 + }, + { + "epoch": 0.20600654395232784, + "grad_norm": 7.446215235124976, + "learning_rate": 9.209545301918889e-06, + "loss": 17.8568, + "step": 11270 + }, + { + "epoch": 0.20602482314877438, + "grad_norm": 6.199927643465172, + "learning_rate": 9.209385560963212e-06, + "loss": 17.2508, + "step": 11271 + }, + { + "epoch": 0.2060431023452209, + "grad_norm": 7.165829469897477, + "learning_rate": 9.209225805254004e-06, + "loss": 17.7853, + "step": 11272 + }, + { + "epoch": 0.20606138154166742, + "grad_norm": 6.174745654746108, + "learning_rate": 9.209066034791824e-06, + "loss": 17.3414, + "step": 11273 + }, + { + "epoch": 0.20607966073811396, + "grad_norm": 6.613461508753073, + "learning_rate": 9.208906249577234e-06, + "loss": 17.5057, + "step": 11274 + }, + { + "epoch": 0.20609793993456046, + "grad_norm": 7.359812146534588, + "learning_rate": 9.208746449610792e-06, + "loss": 17.8326, + "step": 11275 + }, + { + "epoch": 0.206116219131007, + "grad_norm": 6.829152532704157, + "learning_rate": 9.20858663489306e-06, + "loss": 17.6674, + "step": 11276 + }, + { + "epoch": 0.20613449832745354, + "grad_norm": 7.597519963948508, + "learning_rate": 9.208426805424596e-06, + "loss": 18.0787, + "step": 11277 + }, + { + "epoch": 0.20615277752390004, + "grad_norm": 6.740113924962893, + "learning_rate": 9.208266961205961e-06, + "loss": 17.5505, + "step": 11278 + }, + { + "epoch": 0.20617105672034658, + "grad_norm": 6.659659448971432, + "learning_rate": 9.208107102237717e-06, + "loss": 17.5001, + "step": 11279 + }, + { + "epoch": 0.2061893359167931, + "grad_norm": 6.516375911744139, + "learning_rate": 9.207947228520421e-06, + "loss": 17.4043, + "step": 11280 + }, + { + "epoch": 0.20620761511323962, + "grad_norm": 7.591304504931219, + "learning_rate": 9.207787340054637e-06, + "loss": 17.5647, + "step": 11281 + }, + { + "epoch": 0.20622589430968616, + "grad_norm": 6.431169455060361, + "learning_rate": 9.207627436840922e-06, + "loss": 17.4243, + "step": 11282 + }, + { + "epoch": 0.20624417350613267, + "grad_norm": 5.904620637899576, + "learning_rate": 9.207467518879838e-06, + "loss": 17.2062, + "step": 11283 + }, + { + "epoch": 0.2062624527025792, + "grad_norm": 6.255924391466615, + "learning_rate": 9.207307586171946e-06, + "loss": 17.4979, + "step": 11284 + }, + { + "epoch": 0.2062807318990257, + "grad_norm": 5.775303579231696, + "learning_rate": 9.207147638717807e-06, + "loss": 17.4686, + "step": 11285 + }, + { + "epoch": 0.20629901109547225, + "grad_norm": 8.48887826399413, + "learning_rate": 9.20698767651798e-06, + "loss": 18.7141, + "step": 11286 + }, + { + "epoch": 0.20631729029191875, + "grad_norm": 5.258576051977037, + "learning_rate": 9.206827699573024e-06, + "loss": 16.9402, + "step": 11287 + }, + { + "epoch": 0.2063355694883653, + "grad_norm": 8.300980279165882, + "learning_rate": 9.206667707883504e-06, + "loss": 18.4599, + "step": 11288 + }, + { + "epoch": 0.20635384868481182, + "grad_norm": 6.571769161341952, + "learning_rate": 9.206507701449978e-06, + "loss": 17.6144, + "step": 11289 + }, + { + "epoch": 0.20637212788125833, + "grad_norm": 7.422547512778995, + "learning_rate": 9.206347680273008e-06, + "loss": 18.0077, + "step": 11290 + }, + { + "epoch": 0.20639040707770487, + "grad_norm": 5.961834342625839, + "learning_rate": 9.206187644353155e-06, + "loss": 17.343, + "step": 11291 + }, + { + "epoch": 0.20640868627415138, + "grad_norm": 7.77587413166556, + "learning_rate": 9.206027593690978e-06, + "loss": 18.1333, + "step": 11292 + }, + { + "epoch": 0.2064269654705979, + "grad_norm": 7.670740089916023, + "learning_rate": 9.20586752828704e-06, + "loss": 18.0532, + "step": 11293 + }, + { + "epoch": 0.20644524466704445, + "grad_norm": 6.4864691492117235, + "learning_rate": 9.205707448141901e-06, + "loss": 17.4666, + "step": 11294 + }, + { + "epoch": 0.20646352386349096, + "grad_norm": 6.757883265958228, + "learning_rate": 9.205547353256123e-06, + "loss": 17.739, + "step": 11295 + }, + { + "epoch": 0.2064818030599375, + "grad_norm": 6.691835969288469, + "learning_rate": 9.205387243630267e-06, + "loss": 17.8983, + "step": 11296 + }, + { + "epoch": 0.206500082256384, + "grad_norm": 7.450243470813527, + "learning_rate": 9.205227119264892e-06, + "loss": 17.7776, + "step": 11297 + }, + { + "epoch": 0.20651836145283053, + "grad_norm": 7.294062044729192, + "learning_rate": 9.205066980160561e-06, + "loss": 17.6949, + "step": 11298 + }, + { + "epoch": 0.20653664064927707, + "grad_norm": 6.869556676245195, + "learning_rate": 9.204906826317835e-06, + "loss": 17.8619, + "step": 11299 + }, + { + "epoch": 0.20655491984572358, + "grad_norm": 7.881950895303181, + "learning_rate": 9.204746657737276e-06, + "loss": 18.0034, + "step": 11300 + }, + { + "epoch": 0.2065731990421701, + "grad_norm": 6.516121643789768, + "learning_rate": 9.204586474419445e-06, + "loss": 17.5569, + "step": 11301 + }, + { + "epoch": 0.20659147823861662, + "grad_norm": 7.121390109024867, + "learning_rate": 9.204426276364905e-06, + "loss": 17.8313, + "step": 11302 + }, + { + "epoch": 0.20660975743506316, + "grad_norm": 8.016283913408762, + "learning_rate": 9.204266063574212e-06, + "loss": 18.2278, + "step": 11303 + }, + { + "epoch": 0.20662803663150967, + "grad_norm": 6.9818234347910755, + "learning_rate": 9.204105836047934e-06, + "loss": 17.5215, + "step": 11304 + }, + { + "epoch": 0.2066463158279562, + "grad_norm": 5.269493716760305, + "learning_rate": 9.203945593786628e-06, + "loss": 17.0022, + "step": 11305 + }, + { + "epoch": 0.20666459502440274, + "grad_norm": 6.455078838645331, + "learning_rate": 9.20378533679086e-06, + "loss": 17.3951, + "step": 11306 + }, + { + "epoch": 0.20668287422084924, + "grad_norm": 6.523396538436194, + "learning_rate": 9.203625065061186e-06, + "loss": 17.3609, + "step": 11307 + }, + { + "epoch": 0.20670115341729578, + "grad_norm": 6.544004948372208, + "learning_rate": 9.203464778598173e-06, + "loss": 17.4493, + "step": 11308 + }, + { + "epoch": 0.2067194326137423, + "grad_norm": 6.419709541482147, + "learning_rate": 9.203304477402382e-06, + "loss": 17.6385, + "step": 11309 + }, + { + "epoch": 0.20673771181018882, + "grad_norm": 6.332698117626546, + "learning_rate": 9.20314416147437e-06, + "loss": 17.6083, + "step": 11310 + }, + { + "epoch": 0.20675599100663536, + "grad_norm": 6.55180966767108, + "learning_rate": 9.202983830814704e-06, + "loss": 17.3829, + "step": 11311 + }, + { + "epoch": 0.20677427020308187, + "grad_norm": 6.576574494804952, + "learning_rate": 9.202823485423946e-06, + "loss": 17.5929, + "step": 11312 + }, + { + "epoch": 0.2067925493995284, + "grad_norm": 7.869222319228628, + "learning_rate": 9.202663125302656e-06, + "loss": 17.711, + "step": 11313 + }, + { + "epoch": 0.2068108285959749, + "grad_norm": 7.220267385438705, + "learning_rate": 9.202502750451394e-06, + "loss": 17.8717, + "step": 11314 + }, + { + "epoch": 0.20682910779242145, + "grad_norm": 6.498563801180941, + "learning_rate": 9.202342360870726e-06, + "loss": 17.4267, + "step": 11315 + }, + { + "epoch": 0.20684738698886798, + "grad_norm": 8.49745539591033, + "learning_rate": 9.202181956561213e-06, + "loss": 18.3729, + "step": 11316 + }, + { + "epoch": 0.2068656661853145, + "grad_norm": 6.113489474513642, + "learning_rate": 9.202021537523417e-06, + "loss": 17.3899, + "step": 11317 + }, + { + "epoch": 0.20688394538176103, + "grad_norm": 7.259428603948771, + "learning_rate": 9.2018611037579e-06, + "loss": 17.6373, + "step": 11318 + }, + { + "epoch": 0.20690222457820753, + "grad_norm": 6.862792995101118, + "learning_rate": 9.201700655265224e-06, + "loss": 17.6216, + "step": 11319 + }, + { + "epoch": 0.20692050377465407, + "grad_norm": 5.896591952112572, + "learning_rate": 9.201540192045952e-06, + "loss": 17.1226, + "step": 11320 + }, + { + "epoch": 0.20693878297110058, + "grad_norm": 7.457648774614467, + "learning_rate": 9.201379714100647e-06, + "loss": 18.2697, + "step": 11321 + }, + { + "epoch": 0.2069570621675471, + "grad_norm": 6.263572739947989, + "learning_rate": 9.201219221429869e-06, + "loss": 17.4497, + "step": 11322 + }, + { + "epoch": 0.20697534136399365, + "grad_norm": 5.976391011517805, + "learning_rate": 9.201058714034183e-06, + "loss": 17.3546, + "step": 11323 + }, + { + "epoch": 0.20699362056044016, + "grad_norm": 6.554414667157984, + "learning_rate": 9.200898191914152e-06, + "loss": 17.4139, + "step": 11324 + }, + { + "epoch": 0.2070118997568867, + "grad_norm": 5.507861465424304, + "learning_rate": 9.200737655070336e-06, + "loss": 16.9907, + "step": 11325 + }, + { + "epoch": 0.2070301789533332, + "grad_norm": 9.746191475372257, + "learning_rate": 9.2005771035033e-06, + "loss": 18.7296, + "step": 11326 + }, + { + "epoch": 0.20704845814977973, + "grad_norm": 6.341279694026166, + "learning_rate": 9.200416537213604e-06, + "loss": 17.1672, + "step": 11327 + }, + { + "epoch": 0.20706673734622627, + "grad_norm": 6.435856974138208, + "learning_rate": 9.200255956201814e-06, + "loss": 17.3943, + "step": 11328 + }, + { + "epoch": 0.20708501654267278, + "grad_norm": 6.875642123729105, + "learning_rate": 9.200095360468491e-06, + "loss": 17.8153, + "step": 11329 + }, + { + "epoch": 0.20710329573911931, + "grad_norm": 5.805741798094388, + "learning_rate": 9.1999347500142e-06, + "loss": 17.2909, + "step": 11330 + }, + { + "epoch": 0.20712157493556582, + "grad_norm": 6.748350954475548, + "learning_rate": 9.199774124839499e-06, + "loss": 17.7981, + "step": 11331 + }, + { + "epoch": 0.20713985413201236, + "grad_norm": 7.744974087535545, + "learning_rate": 9.199613484944956e-06, + "loss": 17.9069, + "step": 11332 + }, + { + "epoch": 0.2071581333284589, + "grad_norm": 6.331897098997471, + "learning_rate": 9.19945283033113e-06, + "loss": 17.4355, + "step": 11333 + }, + { + "epoch": 0.2071764125249054, + "grad_norm": 9.14905122928649, + "learning_rate": 9.19929216099859e-06, + "loss": 18.0282, + "step": 11334 + }, + { + "epoch": 0.20719469172135194, + "grad_norm": 7.462062481892406, + "learning_rate": 9.199131476947893e-06, + "loss": 17.7042, + "step": 11335 + }, + { + "epoch": 0.20721297091779844, + "grad_norm": 6.223147064247256, + "learning_rate": 9.198970778179605e-06, + "loss": 17.1327, + "step": 11336 + }, + { + "epoch": 0.20723125011424498, + "grad_norm": 5.925743273389555, + "learning_rate": 9.198810064694287e-06, + "loss": 17.0214, + "step": 11337 + }, + { + "epoch": 0.2072495293106915, + "grad_norm": 6.9396073366208135, + "learning_rate": 9.198649336492506e-06, + "loss": 17.5051, + "step": 11338 + }, + { + "epoch": 0.20726780850713802, + "grad_norm": 6.337173387833642, + "learning_rate": 9.198488593574823e-06, + "loss": 17.7323, + "step": 11339 + }, + { + "epoch": 0.20728608770358456, + "grad_norm": 7.55508858779258, + "learning_rate": 9.198327835941803e-06, + "loss": 17.9832, + "step": 11340 + }, + { + "epoch": 0.20730436690003107, + "grad_norm": 7.509552391910138, + "learning_rate": 9.198167063594006e-06, + "loss": 17.8462, + "step": 11341 + }, + { + "epoch": 0.2073226460964776, + "grad_norm": 6.973246871998054, + "learning_rate": 9.198006276531999e-06, + "loss": 17.5752, + "step": 11342 + }, + { + "epoch": 0.2073409252929241, + "grad_norm": 8.300606732955098, + "learning_rate": 9.197845474756344e-06, + "loss": 17.9324, + "step": 11343 + }, + { + "epoch": 0.20735920448937065, + "grad_norm": 6.386337974565366, + "learning_rate": 9.197684658267606e-06, + "loss": 17.2407, + "step": 11344 + }, + { + "epoch": 0.20737748368581718, + "grad_norm": 6.466076760950484, + "learning_rate": 9.197523827066347e-06, + "loss": 17.7687, + "step": 11345 + }, + { + "epoch": 0.2073957628822637, + "grad_norm": 7.731434377724766, + "learning_rate": 9.19736298115313e-06, + "loss": 17.6565, + "step": 11346 + }, + { + "epoch": 0.20741404207871023, + "grad_norm": 8.723847987198447, + "learning_rate": 9.19720212052852e-06, + "loss": 17.8272, + "step": 11347 + }, + { + "epoch": 0.20743232127515673, + "grad_norm": 7.794014451859341, + "learning_rate": 9.197041245193084e-06, + "loss": 17.756, + "step": 11348 + }, + { + "epoch": 0.20745060047160327, + "grad_norm": 6.766738502526163, + "learning_rate": 9.19688035514738e-06, + "loss": 17.5409, + "step": 11349 + }, + { + "epoch": 0.2074688796680498, + "grad_norm": 5.770209024310648, + "learning_rate": 9.196719450391975e-06, + "loss": 17.3905, + "step": 11350 + }, + { + "epoch": 0.2074871588644963, + "grad_norm": 6.549221562153917, + "learning_rate": 9.19655853092743e-06, + "loss": 17.5294, + "step": 11351 + }, + { + "epoch": 0.20750543806094285, + "grad_norm": 6.980172463938054, + "learning_rate": 9.196397596754316e-06, + "loss": 17.4521, + "step": 11352 + }, + { + "epoch": 0.20752371725738936, + "grad_norm": 6.60327897428517, + "learning_rate": 9.196236647873189e-06, + "loss": 17.5934, + "step": 11353 + }, + { + "epoch": 0.2075419964538359, + "grad_norm": 7.065355057578235, + "learning_rate": 9.19607568428462e-06, + "loss": 17.8144, + "step": 11354 + }, + { + "epoch": 0.2075602756502824, + "grad_norm": 6.866725361095603, + "learning_rate": 9.195914705989166e-06, + "loss": 17.7256, + "step": 11355 + }, + { + "epoch": 0.20757855484672894, + "grad_norm": 5.844090363108879, + "learning_rate": 9.195753712987397e-06, + "loss": 17.2677, + "step": 11356 + }, + { + "epoch": 0.20759683404317547, + "grad_norm": 8.539182934160497, + "learning_rate": 9.195592705279876e-06, + "loss": 18.7554, + "step": 11357 + }, + { + "epoch": 0.20761511323962198, + "grad_norm": 6.5348978855057, + "learning_rate": 9.195431682867166e-06, + "loss": 17.5521, + "step": 11358 + }, + { + "epoch": 0.20763339243606851, + "grad_norm": 6.803589053570613, + "learning_rate": 9.195270645749833e-06, + "loss": 17.6202, + "step": 11359 + }, + { + "epoch": 0.20765167163251502, + "grad_norm": 6.727479910864619, + "learning_rate": 9.195109593928438e-06, + "loss": 17.6091, + "step": 11360 + }, + { + "epoch": 0.20766995082896156, + "grad_norm": 6.413173530944886, + "learning_rate": 9.19494852740355e-06, + "loss": 17.3738, + "step": 11361 + }, + { + "epoch": 0.2076882300254081, + "grad_norm": 7.601512957017651, + "learning_rate": 9.19478744617573e-06, + "loss": 17.9366, + "step": 11362 + }, + { + "epoch": 0.2077065092218546, + "grad_norm": 5.789707525698412, + "learning_rate": 9.194626350245546e-06, + "loss": 17.1879, + "step": 11363 + }, + { + "epoch": 0.20772478841830114, + "grad_norm": 5.443213409859194, + "learning_rate": 9.19446523961356e-06, + "loss": 17.0118, + "step": 11364 + }, + { + "epoch": 0.20774306761474765, + "grad_norm": 6.413676483015337, + "learning_rate": 9.194304114280335e-06, + "loss": 17.4193, + "step": 11365 + }, + { + "epoch": 0.20776134681119418, + "grad_norm": 9.445433958255618, + "learning_rate": 9.194142974246441e-06, + "loss": 18.1818, + "step": 11366 + }, + { + "epoch": 0.20777962600764072, + "grad_norm": 7.0577186360297794, + "learning_rate": 9.193981819512439e-06, + "loss": 17.6962, + "step": 11367 + }, + { + "epoch": 0.20779790520408722, + "grad_norm": 6.606599937932465, + "learning_rate": 9.193820650078893e-06, + "loss": 17.3329, + "step": 11368 + }, + { + "epoch": 0.20781618440053376, + "grad_norm": 7.269147101298209, + "learning_rate": 9.19365946594637e-06, + "loss": 17.7729, + "step": 11369 + }, + { + "epoch": 0.20783446359698027, + "grad_norm": 10.434064155259405, + "learning_rate": 9.193498267115435e-06, + "loss": 17.6019, + "step": 11370 + }, + { + "epoch": 0.2078527427934268, + "grad_norm": 6.489079456392142, + "learning_rate": 9.193337053586654e-06, + "loss": 17.5649, + "step": 11371 + }, + { + "epoch": 0.2078710219898733, + "grad_norm": 5.076335939992683, + "learning_rate": 9.19317582536059e-06, + "loss": 16.8645, + "step": 11372 + }, + { + "epoch": 0.20788930118631985, + "grad_norm": 6.619757491576467, + "learning_rate": 9.193014582437806e-06, + "loss": 17.4425, + "step": 11373 + }, + { + "epoch": 0.20790758038276638, + "grad_norm": 7.043006390120621, + "learning_rate": 9.192853324818873e-06, + "loss": 17.7465, + "step": 11374 + }, + { + "epoch": 0.2079258595792129, + "grad_norm": 7.056339196299807, + "learning_rate": 9.192692052504351e-06, + "loss": 18.1348, + "step": 11375 + }, + { + "epoch": 0.20794413877565943, + "grad_norm": 7.062149859477818, + "learning_rate": 9.192530765494807e-06, + "loss": 17.8394, + "step": 11376 + }, + { + "epoch": 0.20796241797210593, + "grad_norm": 6.9000246413895026, + "learning_rate": 9.192369463790807e-06, + "loss": 17.7384, + "step": 11377 + }, + { + "epoch": 0.20798069716855247, + "grad_norm": 6.717007672062542, + "learning_rate": 9.192208147392916e-06, + "loss": 17.7328, + "step": 11378 + }, + { + "epoch": 0.207998976364999, + "grad_norm": 7.64450286934646, + "learning_rate": 9.192046816301701e-06, + "loss": 18.4562, + "step": 11379 + }, + { + "epoch": 0.2080172555614455, + "grad_norm": 5.88855172135595, + "learning_rate": 9.191885470517724e-06, + "loss": 17.2166, + "step": 11380 + }, + { + "epoch": 0.20803553475789205, + "grad_norm": 5.841888329181857, + "learning_rate": 9.191724110041551e-06, + "loss": 16.9695, + "step": 11381 + }, + { + "epoch": 0.20805381395433856, + "grad_norm": 6.762546033378747, + "learning_rate": 9.19156273487375e-06, + "loss": 17.7965, + "step": 11382 + }, + { + "epoch": 0.2080720931507851, + "grad_norm": 6.768479023383248, + "learning_rate": 9.191401345014886e-06, + "loss": 17.5318, + "step": 11383 + }, + { + "epoch": 0.20809037234723163, + "grad_norm": 6.559398362446401, + "learning_rate": 9.191239940465522e-06, + "loss": 17.4949, + "step": 11384 + }, + { + "epoch": 0.20810865154367814, + "grad_norm": 7.3701661115443615, + "learning_rate": 9.191078521226226e-06, + "loss": 18.0867, + "step": 11385 + }, + { + "epoch": 0.20812693074012467, + "grad_norm": 5.117281048479846, + "learning_rate": 9.190917087297565e-06, + "loss": 16.8779, + "step": 11386 + }, + { + "epoch": 0.20814520993657118, + "grad_norm": 8.126069612112843, + "learning_rate": 9.190755638680102e-06, + "loss": 18.0336, + "step": 11387 + }, + { + "epoch": 0.20816348913301772, + "grad_norm": 7.396998678970835, + "learning_rate": 9.190594175374406e-06, + "loss": 17.7872, + "step": 11388 + }, + { + "epoch": 0.20818176832946422, + "grad_norm": 7.287776594949756, + "learning_rate": 9.19043269738104e-06, + "loss": 17.6405, + "step": 11389 + }, + { + "epoch": 0.20820004752591076, + "grad_norm": 6.668249500613658, + "learning_rate": 9.19027120470057e-06, + "loss": 17.6275, + "step": 11390 + }, + { + "epoch": 0.2082183267223573, + "grad_norm": 6.037178264374184, + "learning_rate": 9.190109697333565e-06, + "loss": 17.2264, + "step": 11391 + }, + { + "epoch": 0.2082366059188038, + "grad_norm": 6.525459289361489, + "learning_rate": 9.189948175280588e-06, + "loss": 17.3118, + "step": 11392 + }, + { + "epoch": 0.20825488511525034, + "grad_norm": 6.459079614435108, + "learning_rate": 9.189786638542206e-06, + "loss": 17.4513, + "step": 11393 + }, + { + "epoch": 0.20827316431169685, + "grad_norm": 8.017385972157495, + "learning_rate": 9.189625087118985e-06, + "loss": 18.3373, + "step": 11394 + }, + { + "epoch": 0.20829144350814338, + "grad_norm": 7.19975293101108, + "learning_rate": 9.189463521011492e-06, + "loss": 18.112, + "step": 11395 + }, + { + "epoch": 0.20830972270458992, + "grad_norm": 7.312861634682219, + "learning_rate": 9.189301940220295e-06, + "loss": 17.7899, + "step": 11396 + }, + { + "epoch": 0.20832800190103642, + "grad_norm": 5.8548590381567776, + "learning_rate": 9.189140344745954e-06, + "loss": 17.3417, + "step": 11397 + }, + { + "epoch": 0.20834628109748296, + "grad_norm": 7.4346127839201515, + "learning_rate": 9.188978734589043e-06, + "loss": 17.8877, + "step": 11398 + }, + { + "epoch": 0.20836456029392947, + "grad_norm": 8.118283307165527, + "learning_rate": 9.188817109750124e-06, + "loss": 18.2895, + "step": 11399 + }, + { + "epoch": 0.208382839490376, + "grad_norm": 7.3696099466786436, + "learning_rate": 9.188655470229766e-06, + "loss": 17.7555, + "step": 11400 + }, + { + "epoch": 0.20840111868682254, + "grad_norm": 7.903233934029004, + "learning_rate": 9.188493816028532e-06, + "loss": 18.2937, + "step": 11401 + }, + { + "epoch": 0.20841939788326905, + "grad_norm": 5.666262011006457, + "learning_rate": 9.188332147146991e-06, + "loss": 17.3378, + "step": 11402 + }, + { + "epoch": 0.20843767707971558, + "grad_norm": 7.385682597886673, + "learning_rate": 9.188170463585709e-06, + "loss": 17.8607, + "step": 11403 + }, + { + "epoch": 0.2084559562761621, + "grad_norm": 6.771538572618234, + "learning_rate": 9.188008765345253e-06, + "loss": 17.6437, + "step": 11404 + }, + { + "epoch": 0.20847423547260863, + "grad_norm": 6.72804536843456, + "learning_rate": 9.18784705242619e-06, + "loss": 17.7804, + "step": 11405 + }, + { + "epoch": 0.20849251466905513, + "grad_norm": 7.677137840821924, + "learning_rate": 9.187685324829088e-06, + "loss": 18.1629, + "step": 11406 + }, + { + "epoch": 0.20851079386550167, + "grad_norm": 7.704103037260511, + "learning_rate": 9.187523582554512e-06, + "loss": 17.9746, + "step": 11407 + }, + { + "epoch": 0.2085290730619482, + "grad_norm": 6.845532859197307, + "learning_rate": 9.187361825603027e-06, + "loss": 17.6056, + "step": 11408 + }, + { + "epoch": 0.2085473522583947, + "grad_norm": 7.021336147229711, + "learning_rate": 9.187200053975203e-06, + "loss": 17.5864, + "step": 11409 + }, + { + "epoch": 0.20856563145484125, + "grad_norm": 6.675649621210543, + "learning_rate": 9.187038267671606e-06, + "loss": 17.7085, + "step": 11410 + }, + { + "epoch": 0.20858391065128776, + "grad_norm": 6.906161304809612, + "learning_rate": 9.186876466692805e-06, + "loss": 17.7359, + "step": 11411 + }, + { + "epoch": 0.2086021898477343, + "grad_norm": 7.051825935775604, + "learning_rate": 9.186714651039364e-06, + "loss": 17.8063, + "step": 11412 + }, + { + "epoch": 0.20862046904418083, + "grad_norm": 7.323049960219737, + "learning_rate": 9.186552820711852e-06, + "loss": 18.0081, + "step": 11413 + }, + { + "epoch": 0.20863874824062734, + "grad_norm": 5.1518009573793115, + "learning_rate": 9.186390975710835e-06, + "loss": 16.8863, + "step": 11414 + }, + { + "epoch": 0.20865702743707387, + "grad_norm": 5.688528340186772, + "learning_rate": 9.18622911603688e-06, + "loss": 16.9123, + "step": 11415 + }, + { + "epoch": 0.20867530663352038, + "grad_norm": 7.3617259942742646, + "learning_rate": 9.186067241690556e-06, + "loss": 17.7749, + "step": 11416 + }, + { + "epoch": 0.20869358582996692, + "grad_norm": 8.028959493961613, + "learning_rate": 9.18590535267243e-06, + "loss": 17.6508, + "step": 11417 + }, + { + "epoch": 0.20871186502641345, + "grad_norm": 6.41800121755563, + "learning_rate": 9.185743448983068e-06, + "loss": 17.5967, + "step": 11418 + }, + { + "epoch": 0.20873014422285996, + "grad_norm": 6.732489245810147, + "learning_rate": 9.18558153062304e-06, + "loss": 17.5627, + "step": 11419 + }, + { + "epoch": 0.2087484234193065, + "grad_norm": 8.23426094625324, + "learning_rate": 9.185419597592912e-06, + "loss": 18.5474, + "step": 11420 + }, + { + "epoch": 0.208766702615753, + "grad_norm": 5.885122435227352, + "learning_rate": 9.185257649893251e-06, + "loss": 17.3825, + "step": 11421 + }, + { + "epoch": 0.20878498181219954, + "grad_norm": 8.256182545916584, + "learning_rate": 9.185095687524625e-06, + "loss": 18.2636, + "step": 11422 + }, + { + "epoch": 0.20880326100864605, + "grad_norm": 6.254738320064321, + "learning_rate": 9.184933710487602e-06, + "loss": 17.4135, + "step": 11423 + }, + { + "epoch": 0.20882154020509258, + "grad_norm": 9.116702380571455, + "learning_rate": 9.184771718782748e-06, + "loss": 17.8799, + "step": 11424 + }, + { + "epoch": 0.20883981940153912, + "grad_norm": 7.7365396336510095, + "learning_rate": 9.184609712410633e-06, + "loss": 18.1285, + "step": 11425 + }, + { + "epoch": 0.20885809859798563, + "grad_norm": 7.124670689954153, + "learning_rate": 9.184447691371825e-06, + "loss": 17.692, + "step": 11426 + }, + { + "epoch": 0.20887637779443216, + "grad_norm": 8.189534692613202, + "learning_rate": 9.18428565566689e-06, + "loss": 18.231, + "step": 11427 + }, + { + "epoch": 0.20889465699087867, + "grad_norm": 6.6969344705123595, + "learning_rate": 9.184123605296397e-06, + "loss": 17.6378, + "step": 11428 + }, + { + "epoch": 0.2089129361873252, + "grad_norm": 7.448276271927157, + "learning_rate": 9.183961540260914e-06, + "loss": 18.1656, + "step": 11429 + }, + { + "epoch": 0.20893121538377174, + "grad_norm": 6.750870260961923, + "learning_rate": 9.18379946056101e-06, + "loss": 17.7285, + "step": 11430 + }, + { + "epoch": 0.20894949458021825, + "grad_norm": 6.91195418997168, + "learning_rate": 9.183637366197252e-06, + "loss": 17.4656, + "step": 11431 + }, + { + "epoch": 0.20896777377666478, + "grad_norm": 7.112439559293752, + "learning_rate": 9.183475257170205e-06, + "loss": 17.8876, + "step": 11432 + }, + { + "epoch": 0.2089860529731113, + "grad_norm": 7.193365368428257, + "learning_rate": 9.183313133480442e-06, + "loss": 17.7278, + "step": 11433 + }, + { + "epoch": 0.20900433216955783, + "grad_norm": 6.876107552977072, + "learning_rate": 9.18315099512853e-06, + "loss": 17.8101, + "step": 11434 + }, + { + "epoch": 0.20902261136600436, + "grad_norm": 8.549983313567564, + "learning_rate": 9.182988842115037e-06, + "loss": 18.7058, + "step": 11435 + }, + { + "epoch": 0.20904089056245087, + "grad_norm": 9.235386252918953, + "learning_rate": 9.18282667444053e-06, + "loss": 18.5952, + "step": 11436 + }, + { + "epoch": 0.2090591697588974, + "grad_norm": 7.496746496783825, + "learning_rate": 9.182664492105579e-06, + "loss": 17.7112, + "step": 11437 + }, + { + "epoch": 0.20907744895534391, + "grad_norm": 8.29534400602403, + "learning_rate": 9.182502295110754e-06, + "loss": 17.9013, + "step": 11438 + }, + { + "epoch": 0.20909572815179045, + "grad_norm": 7.806434686132032, + "learning_rate": 9.182340083456618e-06, + "loss": 17.938, + "step": 11439 + }, + { + "epoch": 0.20911400734823696, + "grad_norm": 7.688533630590921, + "learning_rate": 9.182177857143744e-06, + "loss": 18.0619, + "step": 11440 + }, + { + "epoch": 0.2091322865446835, + "grad_norm": 7.974976010537834, + "learning_rate": 9.1820156161727e-06, + "loss": 18.0908, + "step": 11441 + }, + { + "epoch": 0.20915056574113003, + "grad_norm": 8.852588540338171, + "learning_rate": 9.181853360544054e-06, + "loss": 18.5536, + "step": 11442 + }, + { + "epoch": 0.20916884493757654, + "grad_norm": 5.238748007782483, + "learning_rate": 9.181691090258375e-06, + "loss": 16.9777, + "step": 11443 + }, + { + "epoch": 0.20918712413402307, + "grad_norm": 5.822094027495809, + "learning_rate": 9.18152880531623e-06, + "loss": 17.2267, + "step": 11444 + }, + { + "epoch": 0.20920540333046958, + "grad_norm": 8.633433623820073, + "learning_rate": 9.181366505718192e-06, + "loss": 18.2636, + "step": 11445 + }, + { + "epoch": 0.20922368252691612, + "grad_norm": 6.403210662031341, + "learning_rate": 9.181204191464827e-06, + "loss": 17.5143, + "step": 11446 + }, + { + "epoch": 0.20924196172336265, + "grad_norm": 6.161692954214665, + "learning_rate": 9.181041862556703e-06, + "loss": 17.2519, + "step": 11447 + }, + { + "epoch": 0.20926024091980916, + "grad_norm": 6.270337845095842, + "learning_rate": 9.18087951899439e-06, + "loss": 17.325, + "step": 11448 + }, + { + "epoch": 0.2092785201162557, + "grad_norm": 6.037258527576589, + "learning_rate": 9.180717160778458e-06, + "loss": 17.2384, + "step": 11449 + }, + { + "epoch": 0.2092967993127022, + "grad_norm": 7.532790883811677, + "learning_rate": 9.180554787909474e-06, + "loss": 17.6423, + "step": 11450 + }, + { + "epoch": 0.20931507850914874, + "grad_norm": 6.960346383221193, + "learning_rate": 9.180392400388008e-06, + "loss": 17.9873, + "step": 11451 + }, + { + "epoch": 0.20933335770559527, + "grad_norm": 6.3281520616882245, + "learning_rate": 9.180229998214632e-06, + "loss": 17.4254, + "step": 11452 + }, + { + "epoch": 0.20935163690204178, + "grad_norm": 7.930718464564416, + "learning_rate": 9.180067581389908e-06, + "loss": 17.9693, + "step": 11453 + }, + { + "epoch": 0.20936991609848832, + "grad_norm": 7.16065467253636, + "learning_rate": 9.179905149914413e-06, + "loss": 17.9344, + "step": 11454 + }, + { + "epoch": 0.20938819529493483, + "grad_norm": 6.461398696890172, + "learning_rate": 9.179742703788715e-06, + "loss": 17.5761, + "step": 11455 + }, + { + "epoch": 0.20940647449138136, + "grad_norm": 5.583603477464138, + "learning_rate": 9.179580243013378e-06, + "loss": 17.2408, + "step": 11456 + }, + { + "epoch": 0.20942475368782787, + "grad_norm": 8.131423284574632, + "learning_rate": 9.179417767588975e-06, + "loss": 18.8045, + "step": 11457 + }, + { + "epoch": 0.2094430328842744, + "grad_norm": 6.570135935211809, + "learning_rate": 9.179255277516077e-06, + "loss": 17.7531, + "step": 11458 + }, + { + "epoch": 0.20946131208072094, + "grad_norm": 7.046249102869978, + "learning_rate": 9.179092772795253e-06, + "loss": 17.7526, + "step": 11459 + }, + { + "epoch": 0.20947959127716745, + "grad_norm": 8.84210435560813, + "learning_rate": 9.17893025342707e-06, + "loss": 18.1264, + "step": 11460 + }, + { + "epoch": 0.20949787047361398, + "grad_norm": 7.013666125292309, + "learning_rate": 9.178767719412098e-06, + "loss": 17.6529, + "step": 11461 + }, + { + "epoch": 0.2095161496700605, + "grad_norm": 6.193632295670529, + "learning_rate": 9.17860517075091e-06, + "loss": 17.5707, + "step": 11462 + }, + { + "epoch": 0.20953442886650703, + "grad_norm": 6.444396119092666, + "learning_rate": 9.17844260744407e-06, + "loss": 17.2939, + "step": 11463 + }, + { + "epoch": 0.20955270806295356, + "grad_norm": 5.333388606505633, + "learning_rate": 9.178280029492154e-06, + "loss": 16.963, + "step": 11464 + }, + { + "epoch": 0.20957098725940007, + "grad_norm": 5.560412811342642, + "learning_rate": 9.178117436895731e-06, + "loss": 17.177, + "step": 11465 + }, + { + "epoch": 0.2095892664558466, + "grad_norm": 6.115751298206976, + "learning_rate": 9.177954829655367e-06, + "loss": 17.423, + "step": 11466 + }, + { + "epoch": 0.20960754565229311, + "grad_norm": 5.775824499667194, + "learning_rate": 9.177792207771632e-06, + "loss": 17.2843, + "step": 11467 + }, + { + "epoch": 0.20962582484873965, + "grad_norm": 6.633139118448926, + "learning_rate": 9.177629571245099e-06, + "loss": 17.6942, + "step": 11468 + }, + { + "epoch": 0.20964410404518619, + "grad_norm": 6.442523114555858, + "learning_rate": 9.177466920076337e-06, + "loss": 17.6104, + "step": 11469 + }, + { + "epoch": 0.2096623832416327, + "grad_norm": 6.9251317768575955, + "learning_rate": 9.177304254265918e-06, + "loss": 17.7068, + "step": 11470 + }, + { + "epoch": 0.20968066243807923, + "grad_norm": 7.398398019701001, + "learning_rate": 9.177141573814407e-06, + "loss": 17.8246, + "step": 11471 + }, + { + "epoch": 0.20969894163452574, + "grad_norm": 6.424344074639616, + "learning_rate": 9.176978878722378e-06, + "loss": 17.4117, + "step": 11472 + }, + { + "epoch": 0.20971722083097227, + "grad_norm": 6.306753861980729, + "learning_rate": 9.176816168990402e-06, + "loss": 17.2655, + "step": 11473 + }, + { + "epoch": 0.20973550002741878, + "grad_norm": 6.672184053816812, + "learning_rate": 9.176653444619045e-06, + "loss": 17.54, + "step": 11474 + }, + { + "epoch": 0.20975377922386532, + "grad_norm": 5.9405160859744655, + "learning_rate": 9.176490705608883e-06, + "loss": 17.3811, + "step": 11475 + }, + { + "epoch": 0.20977205842031185, + "grad_norm": 6.3595404036329795, + "learning_rate": 9.176327951960481e-06, + "loss": 17.5449, + "step": 11476 + }, + { + "epoch": 0.20979033761675836, + "grad_norm": 7.935320178925701, + "learning_rate": 9.176165183674415e-06, + "loss": 18.1486, + "step": 11477 + }, + { + "epoch": 0.2098086168132049, + "grad_norm": 7.020614202529794, + "learning_rate": 9.17600240075125e-06, + "loss": 17.6709, + "step": 11478 + }, + { + "epoch": 0.2098268960096514, + "grad_norm": 6.801796338808055, + "learning_rate": 9.175839603191562e-06, + "loss": 17.6184, + "step": 11479 + }, + { + "epoch": 0.20984517520609794, + "grad_norm": 7.41898396181858, + "learning_rate": 9.175676790995914e-06, + "loss": 18.0923, + "step": 11480 + }, + { + "epoch": 0.20986345440254447, + "grad_norm": 7.078454204215108, + "learning_rate": 9.175513964164884e-06, + "loss": 17.5183, + "step": 11481 + }, + { + "epoch": 0.20988173359899098, + "grad_norm": 7.246881122848099, + "learning_rate": 9.175351122699039e-06, + "loss": 17.873, + "step": 11482 + }, + { + "epoch": 0.20990001279543752, + "grad_norm": 7.2882921527209765, + "learning_rate": 9.175188266598952e-06, + "loss": 18.4119, + "step": 11483 + }, + { + "epoch": 0.20991829199188403, + "grad_norm": 5.771480647417177, + "learning_rate": 9.175025395865191e-06, + "loss": 17.2252, + "step": 11484 + }, + { + "epoch": 0.20993657118833056, + "grad_norm": 5.6758103847942705, + "learning_rate": 9.174862510498328e-06, + "loss": 17.0851, + "step": 11485 + }, + { + "epoch": 0.2099548503847771, + "grad_norm": 7.267595105528622, + "learning_rate": 9.174699610498935e-06, + "loss": 17.7672, + "step": 11486 + }, + { + "epoch": 0.2099731295812236, + "grad_norm": 7.854747483556572, + "learning_rate": 9.174536695867582e-06, + "loss": 17.9662, + "step": 11487 + }, + { + "epoch": 0.20999140877767014, + "grad_norm": 6.087095273285454, + "learning_rate": 9.17437376660484e-06, + "loss": 17.4715, + "step": 11488 + }, + { + "epoch": 0.21000968797411665, + "grad_norm": 9.184815723203007, + "learning_rate": 9.17421082271128e-06, + "loss": 18.5931, + "step": 11489 + }, + { + "epoch": 0.21002796717056318, + "grad_norm": 6.876450065885871, + "learning_rate": 9.174047864187474e-06, + "loss": 17.8128, + "step": 11490 + }, + { + "epoch": 0.2100462463670097, + "grad_norm": 7.182804026312148, + "learning_rate": 9.173884891033991e-06, + "loss": 18.0019, + "step": 11491 + }, + { + "epoch": 0.21006452556345623, + "grad_norm": 7.132975071055591, + "learning_rate": 9.173721903251406e-06, + "loss": 17.7768, + "step": 11492 + }, + { + "epoch": 0.21008280475990276, + "grad_norm": 7.024857048951854, + "learning_rate": 9.173558900840286e-06, + "loss": 17.8144, + "step": 11493 + }, + { + "epoch": 0.21010108395634927, + "grad_norm": 7.681145450321271, + "learning_rate": 9.173395883801205e-06, + "loss": 17.9513, + "step": 11494 + }, + { + "epoch": 0.2101193631527958, + "grad_norm": 6.469819414030036, + "learning_rate": 9.173232852134733e-06, + "loss": 17.4744, + "step": 11495 + }, + { + "epoch": 0.21013764234924232, + "grad_norm": 6.146947289438684, + "learning_rate": 9.173069805841442e-06, + "loss": 17.3704, + "step": 11496 + }, + { + "epoch": 0.21015592154568885, + "grad_norm": 6.551915270906452, + "learning_rate": 9.172906744921904e-06, + "loss": 17.3509, + "step": 11497 + }, + { + "epoch": 0.2101742007421354, + "grad_norm": 6.192812102346293, + "learning_rate": 9.172743669376691e-06, + "loss": 17.6447, + "step": 11498 + }, + { + "epoch": 0.2101924799385819, + "grad_norm": 6.415154541814359, + "learning_rate": 9.172580579206372e-06, + "loss": 17.3324, + "step": 11499 + }, + { + "epoch": 0.21021075913502843, + "grad_norm": 6.722914924792808, + "learning_rate": 9.172417474411521e-06, + "loss": 17.6114, + "step": 11500 + }, + { + "epoch": 0.21022903833147494, + "grad_norm": 8.548949703373932, + "learning_rate": 9.172254354992707e-06, + "loss": 18.1477, + "step": 11501 + }, + { + "epoch": 0.21024731752792147, + "grad_norm": 5.861101349220818, + "learning_rate": 9.172091220950505e-06, + "loss": 17.2611, + "step": 11502 + }, + { + "epoch": 0.210265596724368, + "grad_norm": 5.905902955464844, + "learning_rate": 9.171928072285486e-06, + "loss": 17.3396, + "step": 11503 + }, + { + "epoch": 0.21028387592081452, + "grad_norm": 5.915945906752697, + "learning_rate": 9.17176490899822e-06, + "loss": 17.4707, + "step": 11504 + }, + { + "epoch": 0.21030215511726105, + "grad_norm": 6.276079061196319, + "learning_rate": 9.17160173108928e-06, + "loss": 17.4897, + "step": 11505 + }, + { + "epoch": 0.21032043431370756, + "grad_norm": 6.4612495913653625, + "learning_rate": 9.171438538559239e-06, + "loss": 17.8194, + "step": 11506 + }, + { + "epoch": 0.2103387135101541, + "grad_norm": 6.872186108418846, + "learning_rate": 9.171275331408667e-06, + "loss": 17.6669, + "step": 11507 + }, + { + "epoch": 0.2103569927066006, + "grad_norm": 5.683670818575571, + "learning_rate": 9.171112109638136e-06, + "loss": 17.1627, + "step": 11508 + }, + { + "epoch": 0.21037527190304714, + "grad_norm": 6.323289335138741, + "learning_rate": 9.17094887324822e-06, + "loss": 17.5417, + "step": 11509 + }, + { + "epoch": 0.21039355109949368, + "grad_norm": 5.994572405306893, + "learning_rate": 9.170785622239491e-06, + "loss": 17.4275, + "step": 11510 + }, + { + "epoch": 0.21041183029594018, + "grad_norm": 6.553108039899262, + "learning_rate": 9.17062235661252e-06, + "loss": 17.749, + "step": 11511 + }, + { + "epoch": 0.21043010949238672, + "grad_norm": 6.268290066568144, + "learning_rate": 9.170459076367878e-06, + "loss": 17.5229, + "step": 11512 + }, + { + "epoch": 0.21044838868883323, + "grad_norm": 6.242131561088442, + "learning_rate": 9.17029578150614e-06, + "loss": 17.6085, + "step": 11513 + }, + { + "epoch": 0.21046666788527976, + "grad_norm": 6.839671989520956, + "learning_rate": 9.170132472027876e-06, + "loss": 17.7471, + "step": 11514 + }, + { + "epoch": 0.2104849470817263, + "grad_norm": 6.299417662322813, + "learning_rate": 9.169969147933661e-06, + "loss": 17.3098, + "step": 11515 + }, + { + "epoch": 0.2105032262781728, + "grad_norm": 6.664050719356811, + "learning_rate": 9.169805809224067e-06, + "loss": 17.5669, + "step": 11516 + }, + { + "epoch": 0.21052150547461934, + "grad_norm": 6.881472460367836, + "learning_rate": 9.169642455899664e-06, + "loss": 17.6966, + "step": 11517 + }, + { + "epoch": 0.21053978467106585, + "grad_norm": 7.650325287111541, + "learning_rate": 9.169479087961026e-06, + "loss": 17.976, + "step": 11518 + }, + { + "epoch": 0.21055806386751239, + "grad_norm": 6.622241294044782, + "learning_rate": 9.169315705408725e-06, + "loss": 17.5739, + "step": 11519 + }, + { + "epoch": 0.21057634306395892, + "grad_norm": 6.596926006218519, + "learning_rate": 9.169152308243334e-06, + "loss": 17.704, + "step": 11520 + }, + { + "epoch": 0.21059462226040543, + "grad_norm": 6.179397638603914, + "learning_rate": 9.168988896465428e-06, + "loss": 17.3994, + "step": 11521 + }, + { + "epoch": 0.21061290145685196, + "grad_norm": 7.3885773820083935, + "learning_rate": 9.168825470075575e-06, + "loss": 18.2278, + "step": 11522 + }, + { + "epoch": 0.21063118065329847, + "grad_norm": 7.24958690158779, + "learning_rate": 9.168662029074354e-06, + "loss": 17.681, + "step": 11523 + }, + { + "epoch": 0.210649459849745, + "grad_norm": 7.721728704817233, + "learning_rate": 9.168498573462332e-06, + "loss": 17.9807, + "step": 11524 + }, + { + "epoch": 0.21066773904619152, + "grad_norm": 8.601277694577712, + "learning_rate": 9.168335103240085e-06, + "loss": 17.554, + "step": 11525 + }, + { + "epoch": 0.21068601824263805, + "grad_norm": 7.818730322223752, + "learning_rate": 9.168171618408184e-06, + "loss": 17.7242, + "step": 11526 + }, + { + "epoch": 0.2107042974390846, + "grad_norm": 6.578160810947474, + "learning_rate": 9.168008118967205e-06, + "loss": 17.8085, + "step": 11527 + }, + { + "epoch": 0.2107225766355311, + "grad_norm": 7.813451617036049, + "learning_rate": 9.167844604917718e-06, + "loss": 17.8416, + "step": 11528 + }, + { + "epoch": 0.21074085583197763, + "grad_norm": 7.586779397627988, + "learning_rate": 9.167681076260296e-06, + "loss": 18.3751, + "step": 11529 + }, + { + "epoch": 0.21075913502842414, + "grad_norm": 7.190603490535477, + "learning_rate": 9.167517532995516e-06, + "loss": 18.267, + "step": 11530 + }, + { + "epoch": 0.21077741422487067, + "grad_norm": 6.1094351684151835, + "learning_rate": 9.167353975123947e-06, + "loss": 17.2526, + "step": 11531 + }, + { + "epoch": 0.2107956934213172, + "grad_norm": 5.802959557639488, + "learning_rate": 9.167190402646165e-06, + "loss": 17.4219, + "step": 11532 + }, + { + "epoch": 0.21081397261776372, + "grad_norm": 7.848723997652496, + "learning_rate": 9.167026815562742e-06, + "loss": 17.758, + "step": 11533 + }, + { + "epoch": 0.21083225181421025, + "grad_norm": 6.219336050238912, + "learning_rate": 9.166863213874251e-06, + "loss": 17.2805, + "step": 11534 + }, + { + "epoch": 0.21085053101065676, + "grad_norm": 6.420279170239606, + "learning_rate": 9.166699597581267e-06, + "loss": 17.3998, + "step": 11535 + }, + { + "epoch": 0.2108688102071033, + "grad_norm": 6.206917950445866, + "learning_rate": 9.166535966684362e-06, + "loss": 17.4474, + "step": 11536 + }, + { + "epoch": 0.21088708940354983, + "grad_norm": 7.389121348819259, + "learning_rate": 9.16637232118411e-06, + "loss": 17.8443, + "step": 11537 + }, + { + "epoch": 0.21090536859999634, + "grad_norm": 7.116199840483028, + "learning_rate": 9.166208661081084e-06, + "loss": 17.713, + "step": 11538 + }, + { + "epoch": 0.21092364779644288, + "grad_norm": 6.67231809526451, + "learning_rate": 9.16604498637586e-06, + "loss": 17.4988, + "step": 11539 + }, + { + "epoch": 0.21094192699288938, + "grad_norm": 7.694803892212349, + "learning_rate": 9.165881297069008e-06, + "loss": 18.0834, + "step": 11540 + }, + { + "epoch": 0.21096020618933592, + "grad_norm": 6.290457034681836, + "learning_rate": 9.165717593161104e-06, + "loss": 17.6581, + "step": 11541 + }, + { + "epoch": 0.21097848538578243, + "grad_norm": 6.212094558893326, + "learning_rate": 9.16555387465272e-06, + "loss": 17.4364, + "step": 11542 + }, + { + "epoch": 0.21099676458222896, + "grad_norm": 6.628723132600602, + "learning_rate": 9.165390141544433e-06, + "loss": 17.5085, + "step": 11543 + }, + { + "epoch": 0.2110150437786755, + "grad_norm": 6.358862691567192, + "learning_rate": 9.165226393836815e-06, + "loss": 17.3477, + "step": 11544 + }, + { + "epoch": 0.211033322975122, + "grad_norm": 6.740305082496149, + "learning_rate": 9.16506263153044e-06, + "loss": 17.9455, + "step": 11545 + }, + { + "epoch": 0.21105160217156854, + "grad_norm": 6.26308925039984, + "learning_rate": 9.164898854625882e-06, + "loss": 17.5103, + "step": 11546 + }, + { + "epoch": 0.21106988136801505, + "grad_norm": 7.128480082817722, + "learning_rate": 9.164735063123714e-06, + "loss": 17.646, + "step": 11547 + }, + { + "epoch": 0.21108816056446159, + "grad_norm": 6.4775515543540605, + "learning_rate": 9.16457125702451e-06, + "loss": 17.6652, + "step": 11548 + }, + { + "epoch": 0.21110643976090812, + "grad_norm": 6.380750658999174, + "learning_rate": 9.164407436328845e-06, + "loss": 17.6208, + "step": 11549 + }, + { + "epoch": 0.21112471895735463, + "grad_norm": 6.888637801448464, + "learning_rate": 9.164243601037297e-06, + "loss": 17.5228, + "step": 11550 + }, + { + "epoch": 0.21114299815380116, + "grad_norm": 5.799070743378431, + "learning_rate": 9.164079751150434e-06, + "loss": 17.1002, + "step": 11551 + }, + { + "epoch": 0.21116127735024767, + "grad_norm": 6.427275669896442, + "learning_rate": 9.163915886668832e-06, + "loss": 17.6247, + "step": 11552 + }, + { + "epoch": 0.2111795565466942, + "grad_norm": 6.910129880790046, + "learning_rate": 9.163752007593068e-06, + "loss": 17.7014, + "step": 11553 + }, + { + "epoch": 0.21119783574314074, + "grad_norm": 6.334479730762166, + "learning_rate": 9.163588113923714e-06, + "loss": 17.4241, + "step": 11554 + }, + { + "epoch": 0.21121611493958725, + "grad_norm": 10.11538820518104, + "learning_rate": 9.163424205661345e-06, + "loss": 18.7674, + "step": 11555 + }, + { + "epoch": 0.2112343941360338, + "grad_norm": 5.632433495283178, + "learning_rate": 9.163260282806533e-06, + "loss": 17.0817, + "step": 11556 + }, + { + "epoch": 0.2112526733324803, + "grad_norm": 7.162886238789814, + "learning_rate": 9.163096345359859e-06, + "loss": 17.8033, + "step": 11557 + }, + { + "epoch": 0.21127095252892683, + "grad_norm": 6.708746524379571, + "learning_rate": 9.162932393321891e-06, + "loss": 17.5362, + "step": 11558 + }, + { + "epoch": 0.21128923172537334, + "grad_norm": 7.855253920799346, + "learning_rate": 9.162768426693207e-06, + "loss": 18.2244, + "step": 11559 + }, + { + "epoch": 0.21130751092181987, + "grad_norm": 7.777970909711207, + "learning_rate": 9.162604445474382e-06, + "loss": 18.2098, + "step": 11560 + }, + { + "epoch": 0.2113257901182664, + "grad_norm": 5.156482272524668, + "learning_rate": 9.162440449665988e-06, + "loss": 16.9433, + "step": 11561 + }, + { + "epoch": 0.21134406931471292, + "grad_norm": 6.151643993226001, + "learning_rate": 9.162276439268601e-06, + "loss": 17.3725, + "step": 11562 + }, + { + "epoch": 0.21136234851115945, + "grad_norm": 6.370579279810969, + "learning_rate": 9.162112414282797e-06, + "loss": 17.4643, + "step": 11563 + }, + { + "epoch": 0.21138062770760596, + "grad_norm": 8.356616226837273, + "learning_rate": 9.16194837470915e-06, + "loss": 18.4377, + "step": 11564 + }, + { + "epoch": 0.2113989069040525, + "grad_norm": 6.467923689781751, + "learning_rate": 9.161784320548237e-06, + "loss": 17.3117, + "step": 11565 + }, + { + "epoch": 0.21141718610049903, + "grad_norm": 9.517975928212337, + "learning_rate": 9.161620251800629e-06, + "loss": 18.5935, + "step": 11566 + }, + { + "epoch": 0.21143546529694554, + "grad_norm": 6.024618051987733, + "learning_rate": 9.161456168466905e-06, + "loss": 17.2635, + "step": 11567 + }, + { + "epoch": 0.21145374449339208, + "grad_norm": 6.240698126621197, + "learning_rate": 9.161292070547636e-06, + "loss": 17.4433, + "step": 11568 + }, + { + "epoch": 0.21147202368983858, + "grad_norm": 6.112310765398839, + "learning_rate": 9.1611279580434e-06, + "loss": 17.5563, + "step": 11569 + }, + { + "epoch": 0.21149030288628512, + "grad_norm": 6.5602463956907595, + "learning_rate": 9.160963830954772e-06, + "loss": 17.8943, + "step": 11570 + }, + { + "epoch": 0.21150858208273166, + "grad_norm": 6.263790052624193, + "learning_rate": 9.160799689282327e-06, + "loss": 17.509, + "step": 11571 + }, + { + "epoch": 0.21152686127917816, + "grad_norm": 7.064124643991461, + "learning_rate": 9.16063553302664e-06, + "loss": 17.9556, + "step": 11572 + }, + { + "epoch": 0.2115451404756247, + "grad_norm": 7.44979062916576, + "learning_rate": 9.160471362188285e-06, + "loss": 17.534, + "step": 11573 + }, + { + "epoch": 0.2115634196720712, + "grad_norm": 7.0510195637169595, + "learning_rate": 9.160307176767841e-06, + "loss": 17.726, + "step": 11574 + }, + { + "epoch": 0.21158169886851774, + "grad_norm": 5.517175463698974, + "learning_rate": 9.16014297676588e-06, + "loss": 17.1502, + "step": 11575 + }, + { + "epoch": 0.21159997806496425, + "grad_norm": 7.138670414777385, + "learning_rate": 9.159978762182979e-06, + "loss": 17.9271, + "step": 11576 + }, + { + "epoch": 0.21161825726141079, + "grad_norm": 6.403940223475687, + "learning_rate": 9.159814533019716e-06, + "loss": 17.4957, + "step": 11577 + }, + { + "epoch": 0.21163653645785732, + "grad_norm": 6.7509313342979205, + "learning_rate": 9.15965028927666e-06, + "loss": 17.6066, + "step": 11578 + }, + { + "epoch": 0.21165481565430383, + "grad_norm": 6.171380155539927, + "learning_rate": 9.159486030954395e-06, + "loss": 17.4828, + "step": 11579 + }, + { + "epoch": 0.21167309485075037, + "grad_norm": 6.342175557557343, + "learning_rate": 9.15932175805349e-06, + "loss": 17.5831, + "step": 11580 + }, + { + "epoch": 0.21169137404719687, + "grad_norm": 6.400579261216163, + "learning_rate": 9.159157470574522e-06, + "loss": 17.6458, + "step": 11581 + }, + { + "epoch": 0.2117096532436434, + "grad_norm": 6.927875169843063, + "learning_rate": 9.15899316851807e-06, + "loss": 17.5782, + "step": 11582 + }, + { + "epoch": 0.21172793244008994, + "grad_norm": 6.849203963747864, + "learning_rate": 9.158828851884707e-06, + "loss": 17.6371, + "step": 11583 + }, + { + "epoch": 0.21174621163653645, + "grad_norm": 7.280309477198249, + "learning_rate": 9.158664520675009e-06, + "loss": 17.5368, + "step": 11584 + }, + { + "epoch": 0.211764490832983, + "grad_norm": 7.687530189637583, + "learning_rate": 9.158500174889553e-06, + "loss": 17.8132, + "step": 11585 + }, + { + "epoch": 0.2117827700294295, + "grad_norm": 7.155417598881707, + "learning_rate": 9.158335814528916e-06, + "loss": 17.8905, + "step": 11586 + }, + { + "epoch": 0.21180104922587603, + "grad_norm": 6.738417741555908, + "learning_rate": 9.158171439593671e-06, + "loss": 17.4718, + "step": 11587 + }, + { + "epoch": 0.21181932842232257, + "grad_norm": 7.578109723057844, + "learning_rate": 9.1580070500844e-06, + "loss": 17.9955, + "step": 11588 + }, + { + "epoch": 0.21183760761876907, + "grad_norm": 6.373369435944889, + "learning_rate": 9.15784264600167e-06, + "loss": 17.2122, + "step": 11589 + }, + { + "epoch": 0.2118558868152156, + "grad_norm": 8.153928745446654, + "learning_rate": 9.157678227346066e-06, + "loss": 17.6803, + "step": 11590 + }, + { + "epoch": 0.21187416601166212, + "grad_norm": 7.7531314636113215, + "learning_rate": 9.157513794118158e-06, + "loss": 17.7606, + "step": 11591 + }, + { + "epoch": 0.21189244520810865, + "grad_norm": 8.286553170015672, + "learning_rate": 9.157349346318525e-06, + "loss": 17.923, + "step": 11592 + }, + { + "epoch": 0.21191072440455516, + "grad_norm": 6.890711489229295, + "learning_rate": 9.157184883947745e-06, + "loss": 17.7812, + "step": 11593 + }, + { + "epoch": 0.2119290036010017, + "grad_norm": 8.536433843597331, + "learning_rate": 9.157020407006392e-06, + "loss": 17.5298, + "step": 11594 + }, + { + "epoch": 0.21194728279744823, + "grad_norm": 8.666063839961629, + "learning_rate": 9.156855915495043e-06, + "loss": 18.1918, + "step": 11595 + }, + { + "epoch": 0.21196556199389474, + "grad_norm": 6.250026801355732, + "learning_rate": 9.156691409414275e-06, + "loss": 17.3656, + "step": 11596 + }, + { + "epoch": 0.21198384119034128, + "grad_norm": 6.820324698686371, + "learning_rate": 9.156526888764664e-06, + "loss": 18.0642, + "step": 11597 + }, + { + "epoch": 0.21200212038678778, + "grad_norm": 5.655656072320394, + "learning_rate": 9.156362353546789e-06, + "loss": 17.1208, + "step": 11598 + }, + { + "epoch": 0.21202039958323432, + "grad_norm": 7.6187923804479265, + "learning_rate": 9.156197803761223e-06, + "loss": 18.0396, + "step": 11599 + }, + { + "epoch": 0.21203867877968086, + "grad_norm": 5.836772120744281, + "learning_rate": 9.156033239408544e-06, + "loss": 17.3792, + "step": 11600 + }, + { + "epoch": 0.21205695797612736, + "grad_norm": 6.5679101584275825, + "learning_rate": 9.155868660489327e-06, + "loss": 17.4726, + "step": 11601 + }, + { + "epoch": 0.2120752371725739, + "grad_norm": 6.321499740706384, + "learning_rate": 9.155704067004154e-06, + "loss": 17.3891, + "step": 11602 + }, + { + "epoch": 0.2120935163690204, + "grad_norm": 6.889131606502487, + "learning_rate": 9.155539458953597e-06, + "loss": 17.5789, + "step": 11603 + }, + { + "epoch": 0.21211179556546694, + "grad_norm": 6.89687307770892, + "learning_rate": 9.155374836338237e-06, + "loss": 17.4977, + "step": 11604 + }, + { + "epoch": 0.21213007476191348, + "grad_norm": 5.848217535319225, + "learning_rate": 9.155210199158648e-06, + "loss": 17.0581, + "step": 11605 + }, + { + "epoch": 0.21214835395836, + "grad_norm": 6.740320780209365, + "learning_rate": 9.155045547415408e-06, + "loss": 17.4307, + "step": 11606 + }, + { + "epoch": 0.21216663315480652, + "grad_norm": 8.038571701517087, + "learning_rate": 9.154880881109093e-06, + "loss": 18.2448, + "step": 11607 + }, + { + "epoch": 0.21218491235125303, + "grad_norm": 6.480469147176548, + "learning_rate": 9.15471620024028e-06, + "loss": 17.4355, + "step": 11608 + }, + { + "epoch": 0.21220319154769957, + "grad_norm": 6.531460315315049, + "learning_rate": 9.15455150480955e-06, + "loss": 17.3958, + "step": 11609 + }, + { + "epoch": 0.21222147074414607, + "grad_norm": 7.304185423649102, + "learning_rate": 9.154386794817476e-06, + "loss": 18.0237, + "step": 11610 + }, + { + "epoch": 0.2122397499405926, + "grad_norm": 6.429840559018353, + "learning_rate": 9.154222070264637e-06, + "loss": 17.3974, + "step": 11611 + }, + { + "epoch": 0.21225802913703914, + "grad_norm": 5.9068517909284965, + "learning_rate": 9.154057331151612e-06, + "loss": 17.2643, + "step": 11612 + }, + { + "epoch": 0.21227630833348565, + "grad_norm": 6.46069971775121, + "learning_rate": 9.153892577478973e-06, + "loss": 17.4117, + "step": 11613 + }, + { + "epoch": 0.2122945875299322, + "grad_norm": 7.125258530227882, + "learning_rate": 9.153727809247303e-06, + "loss": 17.6316, + "step": 11614 + }, + { + "epoch": 0.2123128667263787, + "grad_norm": 6.122252316473095, + "learning_rate": 9.153563026457178e-06, + "loss": 17.4086, + "step": 11615 + }, + { + "epoch": 0.21233114592282523, + "grad_norm": 6.444666365891119, + "learning_rate": 9.153398229109174e-06, + "loss": 17.4249, + "step": 11616 + }, + { + "epoch": 0.21234942511927177, + "grad_norm": 7.493875148419845, + "learning_rate": 9.15323341720387e-06, + "loss": 17.8383, + "step": 11617 + }, + { + "epoch": 0.21236770431571828, + "grad_norm": 8.095743451259024, + "learning_rate": 9.153068590741843e-06, + "loss": 18.0685, + "step": 11618 + }, + { + "epoch": 0.2123859835121648, + "grad_norm": 7.440249340564604, + "learning_rate": 9.152903749723671e-06, + "loss": 18.1068, + "step": 11619 + }, + { + "epoch": 0.21240426270861132, + "grad_norm": 6.3605355322271295, + "learning_rate": 9.152738894149931e-06, + "loss": 17.5995, + "step": 11620 + }, + { + "epoch": 0.21242254190505785, + "grad_norm": 5.294877973158245, + "learning_rate": 9.152574024021202e-06, + "loss": 17.0195, + "step": 11621 + }, + { + "epoch": 0.2124408211015044, + "grad_norm": 7.75287908114004, + "learning_rate": 9.152409139338062e-06, + "loss": 18.4059, + "step": 11622 + }, + { + "epoch": 0.2124591002979509, + "grad_norm": 5.93776794059065, + "learning_rate": 9.152244240101088e-06, + "loss": 17.2836, + "step": 11623 + }, + { + "epoch": 0.21247737949439743, + "grad_norm": 8.964291260472157, + "learning_rate": 9.152079326310858e-06, + "loss": 18.1602, + "step": 11624 + }, + { + "epoch": 0.21249565869084394, + "grad_norm": 5.577234267455074, + "learning_rate": 9.151914397967952e-06, + "loss": 17.0911, + "step": 11625 + }, + { + "epoch": 0.21251393788729048, + "grad_norm": 5.771379750917453, + "learning_rate": 9.151749455072945e-06, + "loss": 17.3774, + "step": 11626 + }, + { + "epoch": 0.21253221708373699, + "grad_norm": 6.673806956891338, + "learning_rate": 9.151584497626415e-06, + "loss": 17.5897, + "step": 11627 + }, + { + "epoch": 0.21255049628018352, + "grad_norm": 6.5728951995585945, + "learning_rate": 9.151419525628942e-06, + "loss": 17.6813, + "step": 11628 + }, + { + "epoch": 0.21256877547663006, + "grad_norm": 6.38576838913584, + "learning_rate": 9.151254539081106e-06, + "loss": 17.3931, + "step": 11629 + }, + { + "epoch": 0.21258705467307656, + "grad_norm": 6.229414042846477, + "learning_rate": 9.151089537983482e-06, + "loss": 17.3624, + "step": 11630 + }, + { + "epoch": 0.2126053338695231, + "grad_norm": 7.222747357571975, + "learning_rate": 9.150924522336648e-06, + "loss": 17.5589, + "step": 11631 + }, + { + "epoch": 0.2126236130659696, + "grad_norm": 6.316618315287245, + "learning_rate": 9.150759492141186e-06, + "loss": 17.6173, + "step": 11632 + }, + { + "epoch": 0.21264189226241614, + "grad_norm": 6.707411291395412, + "learning_rate": 9.15059444739767e-06, + "loss": 17.5739, + "step": 11633 + }, + { + "epoch": 0.21266017145886268, + "grad_norm": 7.791898302407172, + "learning_rate": 9.150429388106684e-06, + "loss": 17.9749, + "step": 11634 + }, + { + "epoch": 0.2126784506553092, + "grad_norm": 7.6586665707499195, + "learning_rate": 9.1502643142688e-06, + "loss": 17.9408, + "step": 11635 + }, + { + "epoch": 0.21269672985175572, + "grad_norm": 7.394545431095758, + "learning_rate": 9.1500992258846e-06, + "loss": 17.6004, + "step": 11636 + }, + { + "epoch": 0.21271500904820223, + "grad_norm": 7.765655019870423, + "learning_rate": 9.149934122954662e-06, + "loss": 18.0874, + "step": 11637 + }, + { + "epoch": 0.21273328824464877, + "grad_norm": 5.704399797239602, + "learning_rate": 9.149769005479568e-06, + "loss": 16.9736, + "step": 11638 + }, + { + "epoch": 0.2127515674410953, + "grad_norm": 7.2538294596919854, + "learning_rate": 9.14960387345989e-06, + "loss": 18.0825, + "step": 11639 + }, + { + "epoch": 0.2127698466375418, + "grad_norm": 7.820846597900338, + "learning_rate": 9.149438726896213e-06, + "loss": 17.9034, + "step": 11640 + }, + { + "epoch": 0.21278812583398835, + "grad_norm": 7.4306590408094, + "learning_rate": 9.149273565789112e-06, + "loss": 17.8273, + "step": 11641 + }, + { + "epoch": 0.21280640503043485, + "grad_norm": 7.559820598413737, + "learning_rate": 9.149108390139168e-06, + "loss": 18.0741, + "step": 11642 + }, + { + "epoch": 0.2128246842268814, + "grad_norm": 6.603188208185257, + "learning_rate": 9.148943199946958e-06, + "loss": 17.4214, + "step": 11643 + }, + { + "epoch": 0.2128429634233279, + "grad_norm": 6.026696245161655, + "learning_rate": 9.148777995213062e-06, + "loss": 17.2833, + "step": 11644 + }, + { + "epoch": 0.21286124261977443, + "grad_norm": 7.963908018718288, + "learning_rate": 9.148612775938062e-06, + "loss": 18.3751, + "step": 11645 + }, + { + "epoch": 0.21287952181622097, + "grad_norm": 7.869206912743326, + "learning_rate": 9.148447542122532e-06, + "loss": 18.0016, + "step": 11646 + }, + { + "epoch": 0.21289780101266748, + "grad_norm": 6.242017443935883, + "learning_rate": 9.148282293767053e-06, + "loss": 17.4454, + "step": 11647 + }, + { + "epoch": 0.212916080209114, + "grad_norm": 7.435901395097871, + "learning_rate": 9.148117030872206e-06, + "loss": 17.8332, + "step": 11648 + }, + { + "epoch": 0.21293435940556052, + "grad_norm": 7.250999550074894, + "learning_rate": 9.147951753438568e-06, + "loss": 17.8396, + "step": 11649 + }, + { + "epoch": 0.21295263860200705, + "grad_norm": 6.9846991067583355, + "learning_rate": 9.147786461466718e-06, + "loss": 18.0227, + "step": 11650 + }, + { + "epoch": 0.2129709177984536, + "grad_norm": 6.5260356530520305, + "learning_rate": 9.147621154957238e-06, + "loss": 17.4412, + "step": 11651 + }, + { + "epoch": 0.2129891969949001, + "grad_norm": 7.501193901397891, + "learning_rate": 9.147455833910706e-06, + "loss": 18.01, + "step": 11652 + }, + { + "epoch": 0.21300747619134663, + "grad_norm": 6.063653386051944, + "learning_rate": 9.147290498327698e-06, + "loss": 17.2201, + "step": 11653 + }, + { + "epoch": 0.21302575538779314, + "grad_norm": 6.530678628238025, + "learning_rate": 9.1471251482088e-06, + "loss": 17.4399, + "step": 11654 + }, + { + "epoch": 0.21304403458423968, + "grad_norm": 6.143164244941838, + "learning_rate": 9.146959783554587e-06, + "loss": 17.2389, + "step": 11655 + }, + { + "epoch": 0.2130623137806862, + "grad_norm": 7.801729789804826, + "learning_rate": 9.14679440436564e-06, + "loss": 18.2345, + "step": 11656 + }, + { + "epoch": 0.21308059297713272, + "grad_norm": 7.16214036325532, + "learning_rate": 9.14662901064254e-06, + "loss": 17.6856, + "step": 11657 + }, + { + "epoch": 0.21309887217357926, + "grad_norm": 6.371340189688993, + "learning_rate": 9.146463602385863e-06, + "loss": 17.192, + "step": 11658 + }, + { + "epoch": 0.21311715137002576, + "grad_norm": 8.98845559325511, + "learning_rate": 9.146298179596191e-06, + "loss": 18.3197, + "step": 11659 + }, + { + "epoch": 0.2131354305664723, + "grad_norm": 7.3263238137853826, + "learning_rate": 9.146132742274106e-06, + "loss": 18.0394, + "step": 11660 + }, + { + "epoch": 0.2131537097629188, + "grad_norm": 5.462073782100659, + "learning_rate": 9.145967290420184e-06, + "loss": 17.2091, + "step": 11661 + }, + { + "epoch": 0.21317198895936534, + "grad_norm": 6.746639516360985, + "learning_rate": 9.145801824035006e-06, + "loss": 17.8386, + "step": 11662 + }, + { + "epoch": 0.21319026815581188, + "grad_norm": 7.6689912130221645, + "learning_rate": 9.145636343119152e-06, + "loss": 17.9464, + "step": 11663 + }, + { + "epoch": 0.2132085473522584, + "grad_norm": 6.695748876445942, + "learning_rate": 9.145470847673203e-06, + "loss": 17.5456, + "step": 11664 + }, + { + "epoch": 0.21322682654870492, + "grad_norm": 8.198516003864741, + "learning_rate": 9.145305337697737e-06, + "loss": 17.909, + "step": 11665 + }, + { + "epoch": 0.21324510574515143, + "grad_norm": 6.908031273691143, + "learning_rate": 9.145139813193337e-06, + "loss": 17.966, + "step": 11666 + }, + { + "epoch": 0.21326338494159797, + "grad_norm": 7.055918827022239, + "learning_rate": 9.144974274160582e-06, + "loss": 17.9561, + "step": 11667 + }, + { + "epoch": 0.2132816641380445, + "grad_norm": 6.398251192858152, + "learning_rate": 9.144808720600052e-06, + "loss": 17.5263, + "step": 11668 + }, + { + "epoch": 0.213299943334491, + "grad_norm": 6.898360543071194, + "learning_rate": 9.144643152512326e-06, + "loss": 17.321, + "step": 11669 + }, + { + "epoch": 0.21331822253093755, + "grad_norm": 6.859210135796859, + "learning_rate": 9.144477569897984e-06, + "loss": 17.7609, + "step": 11670 + }, + { + "epoch": 0.21333650172738405, + "grad_norm": 6.722237461387456, + "learning_rate": 9.14431197275761e-06, + "loss": 17.55, + "step": 11671 + }, + { + "epoch": 0.2133547809238306, + "grad_norm": 5.435865481530694, + "learning_rate": 9.144146361091784e-06, + "loss": 17.2483, + "step": 11672 + }, + { + "epoch": 0.21337306012027712, + "grad_norm": 6.935190425853276, + "learning_rate": 9.143980734901082e-06, + "loss": 17.8095, + "step": 11673 + }, + { + "epoch": 0.21339133931672363, + "grad_norm": 6.078212964955392, + "learning_rate": 9.143815094186088e-06, + "loss": 17.3058, + "step": 11674 + }, + { + "epoch": 0.21340961851317017, + "grad_norm": 6.8529284508095865, + "learning_rate": 9.14364943894738e-06, + "loss": 17.7405, + "step": 11675 + }, + { + "epoch": 0.21342789770961668, + "grad_norm": 7.243324531782682, + "learning_rate": 9.143483769185542e-06, + "loss": 18.0304, + "step": 11676 + }, + { + "epoch": 0.2134461769060632, + "grad_norm": 7.7500115768982765, + "learning_rate": 9.143318084901152e-06, + "loss": 17.9287, + "step": 11677 + }, + { + "epoch": 0.21346445610250972, + "grad_norm": 6.009762398044453, + "learning_rate": 9.143152386094791e-06, + "loss": 17.3918, + "step": 11678 + }, + { + "epoch": 0.21348273529895626, + "grad_norm": 7.524750726459312, + "learning_rate": 9.142986672767042e-06, + "loss": 17.9176, + "step": 11679 + }, + { + "epoch": 0.2135010144954028, + "grad_norm": 6.387471572678087, + "learning_rate": 9.142820944918485e-06, + "loss": 17.4112, + "step": 11680 + }, + { + "epoch": 0.2135192936918493, + "grad_norm": 7.499335328259966, + "learning_rate": 9.142655202549698e-06, + "loss": 17.9846, + "step": 11681 + }, + { + "epoch": 0.21353757288829583, + "grad_norm": 9.125716072213985, + "learning_rate": 9.142489445661265e-06, + "loss": 17.9601, + "step": 11682 + }, + { + "epoch": 0.21355585208474234, + "grad_norm": 8.12236431625984, + "learning_rate": 9.142323674253766e-06, + "loss": 18.2265, + "step": 11683 + }, + { + "epoch": 0.21357413128118888, + "grad_norm": 7.314767327594051, + "learning_rate": 9.142157888327781e-06, + "loss": 17.8159, + "step": 11684 + }, + { + "epoch": 0.2135924104776354, + "grad_norm": 6.091776107112946, + "learning_rate": 9.141992087883893e-06, + "loss": 17.414, + "step": 11685 + }, + { + "epoch": 0.21361068967408192, + "grad_norm": 8.327713269654664, + "learning_rate": 9.141826272922683e-06, + "loss": 17.9633, + "step": 11686 + }, + { + "epoch": 0.21362896887052846, + "grad_norm": 7.269109843841839, + "learning_rate": 9.14166044344473e-06, + "loss": 17.8597, + "step": 11687 + }, + { + "epoch": 0.21364724806697497, + "grad_norm": 4.9721885361299805, + "learning_rate": 9.141494599450615e-06, + "loss": 16.8696, + "step": 11688 + }, + { + "epoch": 0.2136655272634215, + "grad_norm": 7.090417751395468, + "learning_rate": 9.141328740940922e-06, + "loss": 17.8949, + "step": 11689 + }, + { + "epoch": 0.21368380645986804, + "grad_norm": 7.035078818145415, + "learning_rate": 9.141162867916234e-06, + "loss": 17.4371, + "step": 11690 + }, + { + "epoch": 0.21370208565631454, + "grad_norm": 16.43434679237421, + "learning_rate": 9.140996980377126e-06, + "loss": 18.5113, + "step": 11691 + }, + { + "epoch": 0.21372036485276108, + "grad_norm": 6.384761996289143, + "learning_rate": 9.140831078324183e-06, + "loss": 17.2118, + "step": 11692 + }, + { + "epoch": 0.2137386440492076, + "grad_norm": 4.832332939261827, + "learning_rate": 9.140665161757988e-06, + "loss": 16.8148, + "step": 11693 + }, + { + "epoch": 0.21375692324565412, + "grad_norm": 5.436442815946285, + "learning_rate": 9.14049923067912e-06, + "loss": 17.0378, + "step": 11694 + }, + { + "epoch": 0.21377520244210063, + "grad_norm": 6.427883132973813, + "learning_rate": 9.140333285088162e-06, + "loss": 17.2919, + "step": 11695 + }, + { + "epoch": 0.21379348163854717, + "grad_norm": 6.633390866294447, + "learning_rate": 9.140167324985695e-06, + "loss": 17.6946, + "step": 11696 + }, + { + "epoch": 0.2138117608349937, + "grad_norm": 6.974052542862472, + "learning_rate": 9.140001350372302e-06, + "loss": 17.7674, + "step": 11697 + }, + { + "epoch": 0.2138300400314402, + "grad_norm": 7.825334790348214, + "learning_rate": 9.139835361248563e-06, + "loss": 18.2005, + "step": 11698 + }, + { + "epoch": 0.21384831922788675, + "grad_norm": 6.478210075398582, + "learning_rate": 9.139669357615059e-06, + "loss": 17.6169, + "step": 11699 + }, + { + "epoch": 0.21386659842433325, + "grad_norm": 5.855948759772832, + "learning_rate": 9.139503339472375e-06, + "loss": 17.3109, + "step": 11700 + }, + { + "epoch": 0.2138848776207798, + "grad_norm": 7.681421099782268, + "learning_rate": 9.139337306821089e-06, + "loss": 18.3091, + "step": 11701 + }, + { + "epoch": 0.21390315681722633, + "grad_norm": 7.687429016987093, + "learning_rate": 9.139171259661785e-06, + "loss": 18.2078, + "step": 11702 + }, + { + "epoch": 0.21392143601367283, + "grad_norm": 7.724126260515264, + "learning_rate": 9.139005197995046e-06, + "loss": 18.0587, + "step": 11703 + }, + { + "epoch": 0.21393971521011937, + "grad_norm": 7.107129911998939, + "learning_rate": 9.138839121821454e-06, + "loss": 17.5602, + "step": 11704 + }, + { + "epoch": 0.21395799440656588, + "grad_norm": 7.221122836910635, + "learning_rate": 9.138673031141587e-06, + "loss": 17.8326, + "step": 11705 + }, + { + "epoch": 0.2139762736030124, + "grad_norm": 7.428135615526704, + "learning_rate": 9.138506925956032e-06, + "loss": 17.6103, + "step": 11706 + }, + { + "epoch": 0.21399455279945895, + "grad_norm": 6.715112095164756, + "learning_rate": 9.13834080626537e-06, + "loss": 17.5912, + "step": 11707 + }, + { + "epoch": 0.21401283199590546, + "grad_norm": 6.911483038049616, + "learning_rate": 9.138174672070181e-06, + "loss": 17.9271, + "step": 11708 + }, + { + "epoch": 0.214031111192352, + "grad_norm": 7.199424686812347, + "learning_rate": 9.13800852337105e-06, + "loss": 17.4627, + "step": 11709 + }, + { + "epoch": 0.2140493903887985, + "grad_norm": 6.34042024283748, + "learning_rate": 9.137842360168559e-06, + "loss": 17.6055, + "step": 11710 + }, + { + "epoch": 0.21406766958524504, + "grad_norm": 7.195130962687598, + "learning_rate": 9.137676182463287e-06, + "loss": 18.0304, + "step": 11711 + }, + { + "epoch": 0.21408594878169154, + "grad_norm": 7.174069647519072, + "learning_rate": 9.13750999025582e-06, + "loss": 18.0179, + "step": 11712 + }, + { + "epoch": 0.21410422797813808, + "grad_norm": 6.9921154121447895, + "learning_rate": 9.137343783546741e-06, + "loss": 17.8961, + "step": 11713 + }, + { + "epoch": 0.21412250717458461, + "grad_norm": 6.625369188687903, + "learning_rate": 9.13717756233663e-06, + "loss": 17.7955, + "step": 11714 + }, + { + "epoch": 0.21414078637103112, + "grad_norm": 6.851585685886345, + "learning_rate": 9.137011326626071e-06, + "loss": 17.5806, + "step": 11715 + }, + { + "epoch": 0.21415906556747766, + "grad_norm": 5.764128584589972, + "learning_rate": 9.136845076415645e-06, + "loss": 17.2906, + "step": 11716 + }, + { + "epoch": 0.21417734476392417, + "grad_norm": 6.615149485649565, + "learning_rate": 9.136678811705937e-06, + "loss": 17.7314, + "step": 11717 + }, + { + "epoch": 0.2141956239603707, + "grad_norm": 7.0609122220256575, + "learning_rate": 9.13651253249753e-06, + "loss": 17.646, + "step": 11718 + }, + { + "epoch": 0.21421390315681724, + "grad_norm": 6.169439045457185, + "learning_rate": 9.136346238791004e-06, + "loss": 17.2506, + "step": 11719 + }, + { + "epoch": 0.21423218235326374, + "grad_norm": 6.770321955388486, + "learning_rate": 9.136179930586944e-06, + "loss": 17.7473, + "step": 11720 + }, + { + "epoch": 0.21425046154971028, + "grad_norm": 6.334847259047038, + "learning_rate": 9.136013607885931e-06, + "loss": 17.3342, + "step": 11721 + }, + { + "epoch": 0.2142687407461568, + "grad_norm": 5.40271471549218, + "learning_rate": 9.135847270688548e-06, + "loss": 17.1949, + "step": 11722 + }, + { + "epoch": 0.21428701994260332, + "grad_norm": 5.540679582173256, + "learning_rate": 9.135680918995382e-06, + "loss": 17.0815, + "step": 11723 + }, + { + "epoch": 0.21430529913904986, + "grad_norm": 6.869525382603854, + "learning_rate": 9.135514552807014e-06, + "loss": 17.702, + "step": 11724 + }, + { + "epoch": 0.21432357833549637, + "grad_norm": 7.410156317625125, + "learning_rate": 9.135348172124024e-06, + "loss": 17.9923, + "step": 11725 + }, + { + "epoch": 0.2143418575319429, + "grad_norm": 7.271365725918522, + "learning_rate": 9.135181776946998e-06, + "loss": 17.9684, + "step": 11726 + }, + { + "epoch": 0.2143601367283894, + "grad_norm": 6.263625276598185, + "learning_rate": 9.135015367276519e-06, + "loss": 17.4419, + "step": 11727 + }, + { + "epoch": 0.21437841592483595, + "grad_norm": 8.377459866777203, + "learning_rate": 9.13484894311317e-06, + "loss": 18.2825, + "step": 11728 + }, + { + "epoch": 0.21439669512128248, + "grad_norm": 7.152149590163419, + "learning_rate": 9.134682504457534e-06, + "loss": 17.6428, + "step": 11729 + }, + { + "epoch": 0.214414974317729, + "grad_norm": 6.015003874468464, + "learning_rate": 9.134516051310196e-06, + "loss": 17.3465, + "step": 11730 + }, + { + "epoch": 0.21443325351417553, + "grad_norm": 6.631122011287288, + "learning_rate": 9.134349583671738e-06, + "loss": 17.4083, + "step": 11731 + }, + { + "epoch": 0.21445153271062203, + "grad_norm": 7.006134388504954, + "learning_rate": 9.134183101542742e-06, + "loss": 18.0137, + "step": 11732 + }, + { + "epoch": 0.21446981190706857, + "grad_norm": 7.5871653350385735, + "learning_rate": 9.134016604923792e-06, + "loss": 17.796, + "step": 11733 + }, + { + "epoch": 0.21448809110351508, + "grad_norm": 5.97955367235346, + "learning_rate": 9.133850093815474e-06, + "loss": 17.3184, + "step": 11734 + }, + { + "epoch": 0.2145063702999616, + "grad_norm": 8.284393843268226, + "learning_rate": 9.13368356821837e-06, + "loss": 18.2927, + "step": 11735 + }, + { + "epoch": 0.21452464949640815, + "grad_norm": 8.14329714951248, + "learning_rate": 9.133517028133066e-06, + "loss": 17.8919, + "step": 11736 + }, + { + "epoch": 0.21454292869285466, + "grad_norm": 6.615274421398922, + "learning_rate": 9.13335047356014e-06, + "loss": 17.3906, + "step": 11737 + }, + { + "epoch": 0.2145612078893012, + "grad_norm": 6.764795377485563, + "learning_rate": 9.13318390450018e-06, + "loss": 17.5324, + "step": 11738 + }, + { + "epoch": 0.2145794870857477, + "grad_norm": 6.6683910449413455, + "learning_rate": 9.133017320953769e-06, + "loss": 17.6568, + "step": 11739 + }, + { + "epoch": 0.21459776628219424, + "grad_norm": 5.7621555547544006, + "learning_rate": 9.132850722921494e-06, + "loss": 17.3257, + "step": 11740 + }, + { + "epoch": 0.21461604547864077, + "grad_norm": 6.748899472649107, + "learning_rate": 9.132684110403934e-06, + "loss": 17.5096, + "step": 11741 + }, + { + "epoch": 0.21463432467508728, + "grad_norm": 6.898553539535939, + "learning_rate": 9.132517483401673e-06, + "loss": 17.6772, + "step": 11742 + }, + { + "epoch": 0.21465260387153381, + "grad_norm": 7.127063150402214, + "learning_rate": 9.132350841915299e-06, + "loss": 18.1117, + "step": 11743 + }, + { + "epoch": 0.21467088306798032, + "grad_norm": 6.748241952896581, + "learning_rate": 9.132184185945392e-06, + "loss": 17.6061, + "step": 11744 + }, + { + "epoch": 0.21468916226442686, + "grad_norm": 6.273668380566096, + "learning_rate": 9.132017515492539e-06, + "loss": 17.5028, + "step": 11745 + }, + { + "epoch": 0.2147074414608734, + "grad_norm": 5.616942758696618, + "learning_rate": 9.131850830557323e-06, + "loss": 16.9859, + "step": 11746 + }, + { + "epoch": 0.2147257206573199, + "grad_norm": 6.6702528076925605, + "learning_rate": 9.131684131140328e-06, + "loss": 17.7597, + "step": 11747 + }, + { + "epoch": 0.21474399985376644, + "grad_norm": 6.4314363441149895, + "learning_rate": 9.13151741724214e-06, + "loss": 17.4066, + "step": 11748 + }, + { + "epoch": 0.21476227905021295, + "grad_norm": 6.254027235472205, + "learning_rate": 9.131350688863341e-06, + "loss": 17.4192, + "step": 11749 + }, + { + "epoch": 0.21478055824665948, + "grad_norm": 6.399946667461864, + "learning_rate": 9.131183946004515e-06, + "loss": 17.7945, + "step": 11750 + }, + { + "epoch": 0.214798837443106, + "grad_norm": 12.592891370944914, + "learning_rate": 9.131017188666251e-06, + "loss": 18.6017, + "step": 11751 + }, + { + "epoch": 0.21481711663955252, + "grad_norm": 7.892271113563236, + "learning_rate": 9.13085041684913e-06, + "loss": 17.752, + "step": 11752 + }, + { + "epoch": 0.21483539583599906, + "grad_norm": 6.360236938234864, + "learning_rate": 9.130683630553734e-06, + "loss": 17.7222, + "step": 11753 + }, + { + "epoch": 0.21485367503244557, + "grad_norm": 6.957877398085484, + "learning_rate": 9.130516829780652e-06, + "loss": 17.6721, + "step": 11754 + }, + { + "epoch": 0.2148719542288921, + "grad_norm": 5.36767478304127, + "learning_rate": 9.130350014530465e-06, + "loss": 17.0818, + "step": 11755 + }, + { + "epoch": 0.2148902334253386, + "grad_norm": 6.7866213840917045, + "learning_rate": 9.13018318480376e-06, + "loss": 17.5193, + "step": 11756 + }, + { + "epoch": 0.21490851262178515, + "grad_norm": 7.310114998337191, + "learning_rate": 9.130016340601124e-06, + "loss": 18.1721, + "step": 11757 + }, + { + "epoch": 0.21492679181823168, + "grad_norm": 7.766754764704571, + "learning_rate": 9.129849481923137e-06, + "loss": 17.9885, + "step": 11758 + }, + { + "epoch": 0.2149450710146782, + "grad_norm": 7.358159678313833, + "learning_rate": 9.129682608770388e-06, + "loss": 17.8406, + "step": 11759 + }, + { + "epoch": 0.21496335021112473, + "grad_norm": 6.34846838623486, + "learning_rate": 9.129515721143459e-06, + "loss": 17.1203, + "step": 11760 + }, + { + "epoch": 0.21498162940757123, + "grad_norm": 6.503362003029298, + "learning_rate": 9.129348819042934e-06, + "loss": 17.3981, + "step": 11761 + }, + { + "epoch": 0.21499990860401777, + "grad_norm": 7.710973776237608, + "learning_rate": 9.1291819024694e-06, + "loss": 18.0849, + "step": 11762 + }, + { + "epoch": 0.2150181878004643, + "grad_norm": 7.169148622837023, + "learning_rate": 9.129014971423442e-06, + "loss": 17.79, + "step": 11763 + }, + { + "epoch": 0.2150364669969108, + "grad_norm": 6.419588401375401, + "learning_rate": 9.128848025905645e-06, + "loss": 17.4207, + "step": 11764 + }, + { + "epoch": 0.21505474619335735, + "grad_norm": 6.879463926786173, + "learning_rate": 9.128681065916596e-06, + "loss": 17.5164, + "step": 11765 + }, + { + "epoch": 0.21507302538980386, + "grad_norm": 6.559677967491284, + "learning_rate": 9.128514091456876e-06, + "loss": 17.3865, + "step": 11766 + }, + { + "epoch": 0.2150913045862504, + "grad_norm": 6.290869658016945, + "learning_rate": 9.128347102527072e-06, + "loss": 17.1739, + "step": 11767 + }, + { + "epoch": 0.2151095837826969, + "grad_norm": 7.5171335927678635, + "learning_rate": 9.128180099127772e-06, + "loss": 17.824, + "step": 11768 + }, + { + "epoch": 0.21512786297914344, + "grad_norm": 6.275706202548021, + "learning_rate": 9.128013081259557e-06, + "loss": 17.4745, + "step": 11769 + }, + { + "epoch": 0.21514614217558997, + "grad_norm": 7.312128322408952, + "learning_rate": 9.127846048923015e-06, + "loss": 17.5866, + "step": 11770 + }, + { + "epoch": 0.21516442137203648, + "grad_norm": 6.570097448543907, + "learning_rate": 9.127679002118731e-06, + "loss": 17.8862, + "step": 11771 + }, + { + "epoch": 0.21518270056848302, + "grad_norm": 6.802289447448312, + "learning_rate": 9.12751194084729e-06, + "loss": 17.5273, + "step": 11772 + }, + { + "epoch": 0.21520097976492952, + "grad_norm": 7.7264833228498775, + "learning_rate": 9.127344865109276e-06, + "loss": 17.8344, + "step": 11773 + }, + { + "epoch": 0.21521925896137606, + "grad_norm": 6.054914653447179, + "learning_rate": 9.12717777490528e-06, + "loss": 17.6735, + "step": 11774 + }, + { + "epoch": 0.2152375381578226, + "grad_norm": 6.365035929911253, + "learning_rate": 9.12701067023588e-06, + "loss": 17.7055, + "step": 11775 + }, + { + "epoch": 0.2152558173542691, + "grad_norm": 5.932996851989658, + "learning_rate": 9.12684355110167e-06, + "loss": 17.1325, + "step": 11776 + }, + { + "epoch": 0.21527409655071564, + "grad_norm": 6.515564613167469, + "learning_rate": 9.126676417503229e-06, + "loss": 17.8026, + "step": 11777 + }, + { + "epoch": 0.21529237574716215, + "grad_norm": 7.039026329151559, + "learning_rate": 9.126509269441144e-06, + "loss": 17.803, + "step": 11778 + }, + { + "epoch": 0.21531065494360868, + "grad_norm": 7.11009534613514, + "learning_rate": 9.126342106916005e-06, + "loss": 17.6082, + "step": 11779 + }, + { + "epoch": 0.21532893414005522, + "grad_norm": 6.431209903045549, + "learning_rate": 9.126174929928394e-06, + "loss": 17.5655, + "step": 11780 + }, + { + "epoch": 0.21534721333650172, + "grad_norm": 6.794067942928916, + "learning_rate": 9.126007738478897e-06, + "loss": 17.7129, + "step": 11781 + }, + { + "epoch": 0.21536549253294826, + "grad_norm": 6.129034430365924, + "learning_rate": 9.1258405325681e-06, + "loss": 17.2967, + "step": 11782 + }, + { + "epoch": 0.21538377172939477, + "grad_norm": 6.228539361149761, + "learning_rate": 9.125673312196592e-06, + "loss": 17.4575, + "step": 11783 + }, + { + "epoch": 0.2154020509258413, + "grad_norm": 6.686981454888693, + "learning_rate": 9.125506077364958e-06, + "loss": 17.8332, + "step": 11784 + }, + { + "epoch": 0.2154203301222878, + "grad_norm": 5.786652186785163, + "learning_rate": 9.125338828073781e-06, + "loss": 17.1764, + "step": 11785 + }, + { + "epoch": 0.21543860931873435, + "grad_norm": 8.721482698306923, + "learning_rate": 9.125171564323649e-06, + "loss": 18.1051, + "step": 11786 + }, + { + "epoch": 0.21545688851518088, + "grad_norm": 5.932436499512886, + "learning_rate": 9.12500428611515e-06, + "loss": 17.0535, + "step": 11787 + }, + { + "epoch": 0.2154751677116274, + "grad_norm": 6.780036753127285, + "learning_rate": 9.124836993448868e-06, + "loss": 17.8021, + "step": 11788 + }, + { + "epoch": 0.21549344690807393, + "grad_norm": 6.002060187567544, + "learning_rate": 9.12466968632539e-06, + "loss": 17.2499, + "step": 11789 + }, + { + "epoch": 0.21551172610452043, + "grad_norm": 6.589613600248243, + "learning_rate": 9.124502364745305e-06, + "loss": 17.5803, + "step": 11790 + }, + { + "epoch": 0.21553000530096697, + "grad_norm": 5.867571287420147, + "learning_rate": 9.124335028709197e-06, + "loss": 17.2653, + "step": 11791 + }, + { + "epoch": 0.2155482844974135, + "grad_norm": 7.073145531524499, + "learning_rate": 9.12416767821765e-06, + "loss": 18.1059, + "step": 11792 + }, + { + "epoch": 0.21556656369386, + "grad_norm": 6.988284935897942, + "learning_rate": 9.124000313271256e-06, + "loss": 17.9629, + "step": 11793 + }, + { + "epoch": 0.21558484289030655, + "grad_norm": 5.539287280895075, + "learning_rate": 9.123832933870597e-06, + "loss": 17.3149, + "step": 11794 + }, + { + "epoch": 0.21560312208675306, + "grad_norm": 6.4895026181279984, + "learning_rate": 9.123665540016262e-06, + "loss": 17.5314, + "step": 11795 + }, + { + "epoch": 0.2156214012831996, + "grad_norm": 7.069783717638051, + "learning_rate": 9.123498131708837e-06, + "loss": 17.6707, + "step": 11796 + }, + { + "epoch": 0.21563968047964613, + "grad_norm": 6.540982541515931, + "learning_rate": 9.123330708948908e-06, + "loss": 17.6134, + "step": 11797 + }, + { + "epoch": 0.21565795967609264, + "grad_norm": 7.211356462757298, + "learning_rate": 9.123163271737063e-06, + "loss": 17.7928, + "step": 11798 + }, + { + "epoch": 0.21567623887253917, + "grad_norm": 5.38931003982935, + "learning_rate": 9.12299582007389e-06, + "loss": 17.2277, + "step": 11799 + }, + { + "epoch": 0.21569451806898568, + "grad_norm": 6.84494750179637, + "learning_rate": 9.122828353959971e-06, + "loss": 17.9003, + "step": 11800 + }, + { + "epoch": 0.21571279726543222, + "grad_norm": 6.745374541778459, + "learning_rate": 9.1226608733959e-06, + "loss": 17.5247, + "step": 11801 + }, + { + "epoch": 0.21573107646187872, + "grad_norm": 5.554898939967108, + "learning_rate": 9.122493378382259e-06, + "loss": 17.0669, + "step": 11802 + }, + { + "epoch": 0.21574935565832526, + "grad_norm": 6.301156541169049, + "learning_rate": 9.122325868919637e-06, + "loss": 17.3965, + "step": 11803 + }, + { + "epoch": 0.2157676348547718, + "grad_norm": 6.630125518593899, + "learning_rate": 9.12215834500862e-06, + "loss": 17.8173, + "step": 11804 + }, + { + "epoch": 0.2157859140512183, + "grad_norm": 6.10525941766438, + "learning_rate": 9.121990806649795e-06, + "loss": 17.5165, + "step": 11805 + }, + { + "epoch": 0.21580419324766484, + "grad_norm": 6.8259056305339945, + "learning_rate": 9.12182325384375e-06, + "loss": 17.8236, + "step": 11806 + }, + { + "epoch": 0.21582247244411135, + "grad_norm": 5.807572762701553, + "learning_rate": 9.121655686591073e-06, + "loss": 17.262, + "step": 11807 + }, + { + "epoch": 0.21584075164055788, + "grad_norm": 5.953191464279736, + "learning_rate": 9.121488104892352e-06, + "loss": 17.3984, + "step": 11808 + }, + { + "epoch": 0.21585903083700442, + "grad_norm": 5.750383837581517, + "learning_rate": 9.121320508748171e-06, + "loss": 17.3132, + "step": 11809 + }, + { + "epoch": 0.21587731003345093, + "grad_norm": 6.541225734110672, + "learning_rate": 9.121152898159118e-06, + "loss": 17.7538, + "step": 11810 + }, + { + "epoch": 0.21589558922989746, + "grad_norm": 8.125911642449207, + "learning_rate": 9.120985273125784e-06, + "loss": 18.2132, + "step": 11811 + }, + { + "epoch": 0.21591386842634397, + "grad_norm": 7.2094951004451, + "learning_rate": 9.120817633648753e-06, + "loss": 17.8451, + "step": 11812 + }, + { + "epoch": 0.2159321476227905, + "grad_norm": 5.94342052308106, + "learning_rate": 9.120649979728615e-06, + "loss": 17.1825, + "step": 11813 + }, + { + "epoch": 0.21595042681923704, + "grad_norm": 5.519430438837936, + "learning_rate": 9.120482311365955e-06, + "loss": 17.0082, + "step": 11814 + }, + { + "epoch": 0.21596870601568355, + "grad_norm": 7.840471022481249, + "learning_rate": 9.120314628561362e-06, + "loss": 18.1265, + "step": 11815 + }, + { + "epoch": 0.21598698521213008, + "grad_norm": 6.227794689099335, + "learning_rate": 9.120146931315424e-06, + "loss": 17.2198, + "step": 11816 + }, + { + "epoch": 0.2160052644085766, + "grad_norm": 6.449503747615225, + "learning_rate": 9.11997921962873e-06, + "loss": 17.7227, + "step": 11817 + }, + { + "epoch": 0.21602354360502313, + "grad_norm": 6.598491712152966, + "learning_rate": 9.119811493501865e-06, + "loss": 17.4789, + "step": 11818 + }, + { + "epoch": 0.21604182280146964, + "grad_norm": 7.658603780721176, + "learning_rate": 9.11964375293542e-06, + "loss": 17.9342, + "step": 11819 + }, + { + "epoch": 0.21606010199791617, + "grad_norm": 6.813154337910873, + "learning_rate": 9.11947599792998e-06, + "loss": 17.698, + "step": 11820 + }, + { + "epoch": 0.2160783811943627, + "grad_norm": 7.735146543824995, + "learning_rate": 9.11930822848613e-06, + "loss": 17.4835, + "step": 11821 + }, + { + "epoch": 0.21609666039080921, + "grad_norm": 6.586523536038008, + "learning_rate": 9.119140444604467e-06, + "loss": 17.6442, + "step": 11822 + }, + { + "epoch": 0.21611493958725575, + "grad_norm": 5.802005255457315, + "learning_rate": 9.118972646285573e-06, + "loss": 17.0101, + "step": 11823 + }, + { + "epoch": 0.21613321878370226, + "grad_norm": 7.947883271402378, + "learning_rate": 9.118804833530037e-06, + "loss": 18.2263, + "step": 11824 + }, + { + "epoch": 0.2161514979801488, + "grad_norm": 6.600167212655567, + "learning_rate": 9.118637006338448e-06, + "loss": 17.4165, + "step": 11825 + }, + { + "epoch": 0.21616977717659533, + "grad_norm": 8.321030702889383, + "learning_rate": 9.118469164711394e-06, + "loss": 18.3109, + "step": 11826 + }, + { + "epoch": 0.21618805637304184, + "grad_norm": 6.815631949701846, + "learning_rate": 9.118301308649461e-06, + "loss": 17.6911, + "step": 11827 + }, + { + "epoch": 0.21620633556948837, + "grad_norm": 7.600373915365126, + "learning_rate": 9.118133438153242e-06, + "loss": 17.961, + "step": 11828 + }, + { + "epoch": 0.21622461476593488, + "grad_norm": 7.772198325610063, + "learning_rate": 9.11796555322332e-06, + "loss": 18.0769, + "step": 11829 + }, + { + "epoch": 0.21624289396238142, + "grad_norm": 6.9767876318635675, + "learning_rate": 9.117797653860288e-06, + "loss": 17.5627, + "step": 11830 + }, + { + "epoch": 0.21626117315882795, + "grad_norm": 6.443769562013518, + "learning_rate": 9.117629740064732e-06, + "loss": 17.4205, + "step": 11831 + }, + { + "epoch": 0.21627945235527446, + "grad_norm": 7.680971999628747, + "learning_rate": 9.117461811837241e-06, + "loss": 17.9219, + "step": 11832 + }, + { + "epoch": 0.216297731551721, + "grad_norm": 6.172812442547839, + "learning_rate": 9.117293869178404e-06, + "loss": 17.427, + "step": 11833 + }, + { + "epoch": 0.2163160107481675, + "grad_norm": 6.9445119510917666, + "learning_rate": 9.11712591208881e-06, + "loss": 17.9369, + "step": 11834 + }, + { + "epoch": 0.21633428994461404, + "grad_norm": 6.02796978592889, + "learning_rate": 9.116957940569044e-06, + "loss": 17.5246, + "step": 11835 + }, + { + "epoch": 0.21635256914106055, + "grad_norm": 5.972191777563683, + "learning_rate": 9.1167899546197e-06, + "loss": 17.317, + "step": 11836 + }, + { + "epoch": 0.21637084833750708, + "grad_norm": 6.9378546496847235, + "learning_rate": 9.116621954241364e-06, + "loss": 17.3589, + "step": 11837 + }, + { + "epoch": 0.21638912753395362, + "grad_norm": 6.452519156452831, + "learning_rate": 9.116453939434626e-06, + "loss": 17.6529, + "step": 11838 + }, + { + "epoch": 0.21640740673040013, + "grad_norm": 5.643146956750747, + "learning_rate": 9.116285910200074e-06, + "loss": 17.2308, + "step": 11839 + }, + { + "epoch": 0.21642568592684666, + "grad_norm": 8.741670301991528, + "learning_rate": 9.116117866538297e-06, + "loss": 18.1486, + "step": 11840 + }, + { + "epoch": 0.21644396512329317, + "grad_norm": 7.543726740294994, + "learning_rate": 9.115949808449883e-06, + "loss": 18.0334, + "step": 11841 + }, + { + "epoch": 0.2164622443197397, + "grad_norm": 8.26587446562942, + "learning_rate": 9.115781735935423e-06, + "loss": 18.1301, + "step": 11842 + }, + { + "epoch": 0.21648052351618624, + "grad_norm": 7.667334811607829, + "learning_rate": 9.115613648995504e-06, + "loss": 17.8988, + "step": 11843 + }, + { + "epoch": 0.21649880271263275, + "grad_norm": 7.209493264717548, + "learning_rate": 9.115445547630716e-06, + "loss": 17.7141, + "step": 11844 + }, + { + "epoch": 0.21651708190907928, + "grad_norm": 6.4966574132294745, + "learning_rate": 9.115277431841652e-06, + "loss": 17.5758, + "step": 11845 + }, + { + "epoch": 0.2165353611055258, + "grad_norm": 7.053509179934936, + "learning_rate": 9.115109301628893e-06, + "loss": 18.1974, + "step": 11846 + }, + { + "epoch": 0.21655364030197233, + "grad_norm": 6.753767758501346, + "learning_rate": 9.114941156993036e-06, + "loss": 17.7995, + "step": 11847 + }, + { + "epoch": 0.21657191949841886, + "grad_norm": 8.437271655610894, + "learning_rate": 9.114772997934667e-06, + "loss": 18.6091, + "step": 11848 + }, + { + "epoch": 0.21659019869486537, + "grad_norm": 7.405182553876967, + "learning_rate": 9.114604824454376e-06, + "loss": 18.0246, + "step": 11849 + }, + { + "epoch": 0.2166084778913119, + "grad_norm": 6.925499954862183, + "learning_rate": 9.11443663655275e-06, + "loss": 17.683, + "step": 11850 + }, + { + "epoch": 0.21662675708775841, + "grad_norm": 7.02375638010205, + "learning_rate": 9.114268434230383e-06, + "loss": 17.6752, + "step": 11851 + }, + { + "epoch": 0.21664503628420495, + "grad_norm": 8.319262822755558, + "learning_rate": 9.11410021748786e-06, + "loss": 18.2466, + "step": 11852 + }, + { + "epoch": 0.21666331548065146, + "grad_norm": 5.675523731439726, + "learning_rate": 9.113931986325775e-06, + "loss": 17.0465, + "step": 11853 + }, + { + "epoch": 0.216681594677098, + "grad_norm": 6.491090021517005, + "learning_rate": 9.113763740744715e-06, + "loss": 17.6491, + "step": 11854 + }, + { + "epoch": 0.21669987387354453, + "grad_norm": 8.177124437509113, + "learning_rate": 9.11359548074527e-06, + "loss": 18.0958, + "step": 11855 + }, + { + "epoch": 0.21671815306999104, + "grad_norm": 5.667189848145001, + "learning_rate": 9.113427206328028e-06, + "loss": 17.1354, + "step": 11856 + }, + { + "epoch": 0.21673643226643757, + "grad_norm": 7.862395710919348, + "learning_rate": 9.113258917493581e-06, + "loss": 17.8076, + "step": 11857 + }, + { + "epoch": 0.21675471146288408, + "grad_norm": 5.5223788730459775, + "learning_rate": 9.11309061424252e-06, + "loss": 17.1542, + "step": 11858 + }, + { + "epoch": 0.21677299065933062, + "grad_norm": 7.686488475290111, + "learning_rate": 9.112922296575433e-06, + "loss": 18.2554, + "step": 11859 + }, + { + "epoch": 0.21679126985577715, + "grad_norm": 5.599340225252916, + "learning_rate": 9.11275396449291e-06, + "loss": 17.1083, + "step": 11860 + }, + { + "epoch": 0.21680954905222366, + "grad_norm": 7.6510061032633, + "learning_rate": 9.11258561799554e-06, + "loss": 18.2384, + "step": 11861 + }, + { + "epoch": 0.2168278282486702, + "grad_norm": 6.827662276542155, + "learning_rate": 9.112417257083916e-06, + "loss": 17.6346, + "step": 11862 + }, + { + "epoch": 0.2168461074451167, + "grad_norm": 6.4249535929699695, + "learning_rate": 9.112248881758625e-06, + "loss": 17.3116, + "step": 11863 + }, + { + "epoch": 0.21686438664156324, + "grad_norm": 6.345876603272063, + "learning_rate": 9.11208049202026e-06, + "loss": 17.6555, + "step": 11864 + }, + { + "epoch": 0.21688266583800977, + "grad_norm": 6.5773462015567565, + "learning_rate": 9.111912087869408e-06, + "loss": 17.8457, + "step": 11865 + }, + { + "epoch": 0.21690094503445628, + "grad_norm": 7.1769804192445, + "learning_rate": 9.111743669306663e-06, + "loss": 17.7754, + "step": 11866 + }, + { + "epoch": 0.21691922423090282, + "grad_norm": 6.6183778970578455, + "learning_rate": 9.111575236332613e-06, + "loss": 17.5283, + "step": 11867 + }, + { + "epoch": 0.21693750342734933, + "grad_norm": 7.250872914207179, + "learning_rate": 9.111406788947848e-06, + "loss": 18.2171, + "step": 11868 + }, + { + "epoch": 0.21695578262379586, + "grad_norm": 7.022477133112317, + "learning_rate": 9.111238327152958e-06, + "loss": 17.7527, + "step": 11869 + }, + { + "epoch": 0.21697406182024237, + "grad_norm": 7.619132096028822, + "learning_rate": 9.111069850948535e-06, + "loss": 18.0703, + "step": 11870 + }, + { + "epoch": 0.2169923410166889, + "grad_norm": 6.948474573255075, + "learning_rate": 9.110901360335169e-06, + "loss": 17.984, + "step": 11871 + }, + { + "epoch": 0.21701062021313544, + "grad_norm": 6.442358726349963, + "learning_rate": 9.11073285531345e-06, + "loss": 17.4928, + "step": 11872 + }, + { + "epoch": 0.21702889940958195, + "grad_norm": 7.101785052699681, + "learning_rate": 9.110564335883968e-06, + "loss": 17.9451, + "step": 11873 + }, + { + "epoch": 0.21704717860602848, + "grad_norm": 6.56195274322934, + "learning_rate": 9.110395802047319e-06, + "loss": 17.7829, + "step": 11874 + }, + { + "epoch": 0.217065457802475, + "grad_norm": 8.06071242426235, + "learning_rate": 9.110227253804085e-06, + "loss": 18.6257, + "step": 11875 + }, + { + "epoch": 0.21708373699892153, + "grad_norm": 6.432825201440536, + "learning_rate": 9.110058691154864e-06, + "loss": 17.4202, + "step": 11876 + }, + { + "epoch": 0.21710201619536806, + "grad_norm": 5.845197097981462, + "learning_rate": 9.109890114100242e-06, + "loss": 17.3125, + "step": 11877 + }, + { + "epoch": 0.21712029539181457, + "grad_norm": 7.532775156558808, + "learning_rate": 9.109721522640814e-06, + "loss": 18.237, + "step": 11878 + }, + { + "epoch": 0.2171385745882611, + "grad_norm": 6.287582770450092, + "learning_rate": 9.109552916777166e-06, + "loss": 17.3277, + "step": 11879 + }, + { + "epoch": 0.21715685378470762, + "grad_norm": 6.93569583883979, + "learning_rate": 9.109384296509893e-06, + "loss": 17.7749, + "step": 11880 + }, + { + "epoch": 0.21717513298115415, + "grad_norm": 6.934288715874354, + "learning_rate": 9.109215661839585e-06, + "loss": 17.8361, + "step": 11881 + }, + { + "epoch": 0.2171934121776007, + "grad_norm": 7.443478827388052, + "learning_rate": 9.109047012766832e-06, + "loss": 18.0715, + "step": 11882 + }, + { + "epoch": 0.2172116913740472, + "grad_norm": 6.191747183977963, + "learning_rate": 9.108878349292225e-06, + "loss": 17.3165, + "step": 11883 + }, + { + "epoch": 0.21722997057049373, + "grad_norm": 8.187954700447742, + "learning_rate": 9.108709671416357e-06, + "loss": 17.983, + "step": 11884 + }, + { + "epoch": 0.21724824976694024, + "grad_norm": 7.587699318994457, + "learning_rate": 9.108540979139818e-06, + "loss": 17.8604, + "step": 11885 + }, + { + "epoch": 0.21726652896338677, + "grad_norm": 6.688204633883863, + "learning_rate": 9.1083722724632e-06, + "loss": 17.604, + "step": 11886 + }, + { + "epoch": 0.21728480815983328, + "grad_norm": 7.402876541490754, + "learning_rate": 9.108203551387093e-06, + "loss": 17.8469, + "step": 11887 + }, + { + "epoch": 0.21730308735627982, + "grad_norm": 8.390172591765566, + "learning_rate": 9.108034815912089e-06, + "loss": 18.5591, + "step": 11888 + }, + { + "epoch": 0.21732136655272635, + "grad_norm": 6.715244974854403, + "learning_rate": 9.10786606603878e-06, + "loss": 17.9823, + "step": 11889 + }, + { + "epoch": 0.21733964574917286, + "grad_norm": 7.638039044905897, + "learning_rate": 9.107697301767757e-06, + "loss": 17.8907, + "step": 11890 + }, + { + "epoch": 0.2173579249456194, + "grad_norm": 12.617379013698208, + "learning_rate": 9.10752852309961e-06, + "loss": 19.1123, + "step": 11891 + }, + { + "epoch": 0.2173762041420659, + "grad_norm": 8.498138539586357, + "learning_rate": 9.107359730034932e-06, + "loss": 18.0663, + "step": 11892 + }, + { + "epoch": 0.21739448333851244, + "grad_norm": 6.258669536509158, + "learning_rate": 9.107190922574316e-06, + "loss": 17.4816, + "step": 11893 + }, + { + "epoch": 0.21741276253495898, + "grad_norm": 8.139844778110206, + "learning_rate": 9.107022100718353e-06, + "loss": 18.3541, + "step": 11894 + }, + { + "epoch": 0.21743104173140548, + "grad_norm": 6.958877476992919, + "learning_rate": 9.106853264467632e-06, + "loss": 17.824, + "step": 11895 + }, + { + "epoch": 0.21744932092785202, + "grad_norm": 8.369859115580702, + "learning_rate": 9.106684413822746e-06, + "loss": 18.3058, + "step": 11896 + }, + { + "epoch": 0.21746760012429853, + "grad_norm": 6.164079534457812, + "learning_rate": 9.106515548784289e-06, + "loss": 17.6808, + "step": 11897 + }, + { + "epoch": 0.21748587932074506, + "grad_norm": 6.402373408976194, + "learning_rate": 9.106346669352852e-06, + "loss": 17.6395, + "step": 11898 + }, + { + "epoch": 0.2175041585171916, + "grad_norm": 5.6330556941323975, + "learning_rate": 9.106177775529026e-06, + "loss": 17.204, + "step": 11899 + }, + { + "epoch": 0.2175224377136381, + "grad_norm": 6.350200341278443, + "learning_rate": 9.106008867313402e-06, + "loss": 17.5304, + "step": 11900 + }, + { + "epoch": 0.21754071691008464, + "grad_norm": 6.8636540147606215, + "learning_rate": 9.105839944706573e-06, + "loss": 17.6201, + "step": 11901 + }, + { + "epoch": 0.21755899610653115, + "grad_norm": 6.294765863989509, + "learning_rate": 9.105671007709134e-06, + "loss": 17.6585, + "step": 11902 + }, + { + "epoch": 0.21757727530297769, + "grad_norm": 6.703014440079122, + "learning_rate": 9.105502056321672e-06, + "loss": 17.7441, + "step": 11903 + }, + { + "epoch": 0.2175955544994242, + "grad_norm": 7.431064453772132, + "learning_rate": 9.10533309054478e-06, + "loss": 18.0045, + "step": 11904 + }, + { + "epoch": 0.21761383369587073, + "grad_norm": 5.927868837021644, + "learning_rate": 9.105164110379054e-06, + "loss": 17.4985, + "step": 11905 + }, + { + "epoch": 0.21763211289231726, + "grad_norm": 6.456673769313275, + "learning_rate": 9.104995115825086e-06, + "loss": 17.6168, + "step": 11906 + }, + { + "epoch": 0.21765039208876377, + "grad_norm": 6.264982376343958, + "learning_rate": 9.104826106883463e-06, + "loss": 17.1106, + "step": 11907 + }, + { + "epoch": 0.2176686712852103, + "grad_norm": 6.995548377019063, + "learning_rate": 9.104657083554783e-06, + "loss": 17.756, + "step": 11908 + }, + { + "epoch": 0.21768695048165682, + "grad_norm": 6.899534664640044, + "learning_rate": 9.104488045839635e-06, + "loss": 17.8964, + "step": 11909 + }, + { + "epoch": 0.21770522967810335, + "grad_norm": 6.974231060934944, + "learning_rate": 9.104318993738611e-06, + "loss": 17.5191, + "step": 11910 + }, + { + "epoch": 0.2177235088745499, + "grad_norm": 7.325421162880059, + "learning_rate": 9.104149927252308e-06, + "loss": 17.8467, + "step": 11911 + }, + { + "epoch": 0.2177417880709964, + "grad_norm": 5.671400688697253, + "learning_rate": 9.103980846381313e-06, + "loss": 17.2265, + "step": 11912 + }, + { + "epoch": 0.21776006726744293, + "grad_norm": 7.071496767851761, + "learning_rate": 9.103811751126223e-06, + "loss": 17.6563, + "step": 11913 + }, + { + "epoch": 0.21777834646388944, + "grad_norm": 7.053972963952122, + "learning_rate": 9.10364264148763e-06, + "loss": 17.7474, + "step": 11914 + }, + { + "epoch": 0.21779662566033597, + "grad_norm": 6.579193874837373, + "learning_rate": 9.103473517466122e-06, + "loss": 17.5621, + "step": 11915 + }, + { + "epoch": 0.2178149048567825, + "grad_norm": 7.67772167225187, + "learning_rate": 9.103304379062298e-06, + "loss": 17.775, + "step": 11916 + }, + { + "epoch": 0.21783318405322902, + "grad_norm": 7.306930891834455, + "learning_rate": 9.103135226276747e-06, + "loss": 17.7983, + "step": 11917 + }, + { + "epoch": 0.21785146324967555, + "grad_norm": 6.552866014793707, + "learning_rate": 9.102966059110065e-06, + "loss": 17.3175, + "step": 11918 + }, + { + "epoch": 0.21786974244612206, + "grad_norm": 7.010313412892092, + "learning_rate": 9.10279687756284e-06, + "loss": 17.6073, + "step": 11919 + }, + { + "epoch": 0.2178880216425686, + "grad_norm": 6.072376752884002, + "learning_rate": 9.102627681635672e-06, + "loss": 17.5278, + "step": 11920 + }, + { + "epoch": 0.2179063008390151, + "grad_norm": 7.112949881592971, + "learning_rate": 9.102458471329147e-06, + "loss": 17.6626, + "step": 11921 + }, + { + "epoch": 0.21792458003546164, + "grad_norm": 7.054496167321983, + "learning_rate": 9.102289246643862e-06, + "loss": 17.9616, + "step": 11922 + }, + { + "epoch": 0.21794285923190818, + "grad_norm": 8.625310648247885, + "learning_rate": 9.102120007580408e-06, + "loss": 18.2248, + "step": 11923 + }, + { + "epoch": 0.21796113842835468, + "grad_norm": 8.026328983401024, + "learning_rate": 9.101950754139381e-06, + "loss": 18.2001, + "step": 11924 + }, + { + "epoch": 0.21797941762480122, + "grad_norm": 6.643022144527676, + "learning_rate": 9.101781486321371e-06, + "loss": 17.6109, + "step": 11925 + }, + { + "epoch": 0.21799769682124773, + "grad_norm": 7.646103526543786, + "learning_rate": 9.101612204126975e-06, + "loss": 17.9194, + "step": 11926 + }, + { + "epoch": 0.21801597601769426, + "grad_norm": 5.488633472006613, + "learning_rate": 9.101442907556782e-06, + "loss": 17.4202, + "step": 11927 + }, + { + "epoch": 0.2180342552141408, + "grad_norm": 7.098617209330299, + "learning_rate": 9.101273596611388e-06, + "loss": 17.8842, + "step": 11928 + }, + { + "epoch": 0.2180525344105873, + "grad_norm": 5.565334616556032, + "learning_rate": 9.101104271291386e-06, + "loss": 17.1628, + "step": 11929 + }, + { + "epoch": 0.21807081360703384, + "grad_norm": 6.913246404151351, + "learning_rate": 9.10093493159737e-06, + "loss": 17.8469, + "step": 11930 + }, + { + "epoch": 0.21808909280348035, + "grad_norm": 8.524606431905655, + "learning_rate": 9.100765577529934e-06, + "loss": 17.9882, + "step": 11931 + }, + { + "epoch": 0.21810737199992689, + "grad_norm": 5.645896753841313, + "learning_rate": 9.100596209089668e-06, + "loss": 17.2779, + "step": 11932 + }, + { + "epoch": 0.21812565119637342, + "grad_norm": 6.447210336160625, + "learning_rate": 9.10042682627717e-06, + "loss": 17.4815, + "step": 11933 + }, + { + "epoch": 0.21814393039281993, + "grad_norm": 6.838627540587328, + "learning_rate": 9.100257429093031e-06, + "loss": 17.8926, + "step": 11934 + }, + { + "epoch": 0.21816220958926646, + "grad_norm": 7.473140013540203, + "learning_rate": 9.100088017537844e-06, + "loss": 17.7996, + "step": 11935 + }, + { + "epoch": 0.21818048878571297, + "grad_norm": 7.335375339384342, + "learning_rate": 9.099918591612207e-06, + "loss": 18.0495, + "step": 11936 + }, + { + "epoch": 0.2181987679821595, + "grad_norm": 7.779144124552107, + "learning_rate": 9.09974915131671e-06, + "loss": 17.809, + "step": 11937 + }, + { + "epoch": 0.21821704717860602, + "grad_norm": 6.432819143883393, + "learning_rate": 9.099579696651949e-06, + "loss": 17.458, + "step": 11938 + }, + { + "epoch": 0.21823532637505255, + "grad_norm": 6.6374244225346635, + "learning_rate": 9.099410227618514e-06, + "loss": 17.5334, + "step": 11939 + }, + { + "epoch": 0.2182536055714991, + "grad_norm": 6.027833915964923, + "learning_rate": 9.099240744217005e-06, + "loss": 17.3092, + "step": 11940 + }, + { + "epoch": 0.2182718847679456, + "grad_norm": 7.355883839099431, + "learning_rate": 9.099071246448012e-06, + "loss": 17.8985, + "step": 11941 + }, + { + "epoch": 0.21829016396439213, + "grad_norm": 7.36139068850242, + "learning_rate": 9.098901734312128e-06, + "loss": 17.8174, + "step": 11942 + }, + { + "epoch": 0.21830844316083864, + "grad_norm": 7.893589632897089, + "learning_rate": 9.098732207809951e-06, + "loss": 18.1183, + "step": 11943 + }, + { + "epoch": 0.21832672235728517, + "grad_norm": 7.700565711248756, + "learning_rate": 9.098562666942073e-06, + "loss": 18.0619, + "step": 11944 + }, + { + "epoch": 0.2183450015537317, + "grad_norm": 5.125093256215956, + "learning_rate": 9.09839311170909e-06, + "loss": 16.9294, + "step": 11945 + }, + { + "epoch": 0.21836328075017822, + "grad_norm": 8.333853040328622, + "learning_rate": 9.098223542111593e-06, + "loss": 18.1513, + "step": 11946 + }, + { + "epoch": 0.21838155994662475, + "grad_norm": 8.139593106388162, + "learning_rate": 9.098053958150178e-06, + "loss": 18.036, + "step": 11947 + }, + { + "epoch": 0.21839983914307126, + "grad_norm": 7.634997353143541, + "learning_rate": 9.09788435982544e-06, + "loss": 18.227, + "step": 11948 + }, + { + "epoch": 0.2184181183395178, + "grad_norm": 6.822586696970956, + "learning_rate": 9.097714747137974e-06, + "loss": 17.9048, + "step": 11949 + }, + { + "epoch": 0.21843639753596433, + "grad_norm": 5.224206413541762, + "learning_rate": 9.097545120088371e-06, + "loss": 17.1113, + "step": 11950 + }, + { + "epoch": 0.21845467673241084, + "grad_norm": 6.060260021453217, + "learning_rate": 9.097375478677228e-06, + "loss": 17.4103, + "step": 11951 + }, + { + "epoch": 0.21847295592885738, + "grad_norm": 5.282185504490752, + "learning_rate": 9.097205822905141e-06, + "loss": 16.9444, + "step": 11952 + }, + { + "epoch": 0.21849123512530388, + "grad_norm": 7.1102882167471195, + "learning_rate": 9.097036152772703e-06, + "loss": 17.9322, + "step": 11953 + }, + { + "epoch": 0.21850951432175042, + "grad_norm": 7.57731183830557, + "learning_rate": 9.09686646828051e-06, + "loss": 17.554, + "step": 11954 + }, + { + "epoch": 0.21852779351819693, + "grad_norm": 6.975978544577265, + "learning_rate": 9.096696769429154e-06, + "loss": 17.4307, + "step": 11955 + }, + { + "epoch": 0.21854607271464346, + "grad_norm": 5.449457076461351, + "learning_rate": 9.09652705621923e-06, + "loss": 17.1161, + "step": 11956 + }, + { + "epoch": 0.21856435191109, + "grad_norm": 6.9973741598750685, + "learning_rate": 9.096357328651337e-06, + "loss": 18.023, + "step": 11957 + }, + { + "epoch": 0.2185826311075365, + "grad_norm": 8.436011815734975, + "learning_rate": 9.096187586726064e-06, + "loss": 17.7318, + "step": 11958 + }, + { + "epoch": 0.21860091030398304, + "grad_norm": 7.78096901465431, + "learning_rate": 9.09601783044401e-06, + "loss": 18.0124, + "step": 11959 + }, + { + "epoch": 0.21861918950042955, + "grad_norm": 4.859143022196665, + "learning_rate": 9.09584805980577e-06, + "loss": 16.7077, + "step": 11960 + }, + { + "epoch": 0.21863746869687609, + "grad_norm": 5.9820407948444725, + "learning_rate": 9.095678274811938e-06, + "loss": 17.56, + "step": 11961 + }, + { + "epoch": 0.21865574789332262, + "grad_norm": 6.9941686148540745, + "learning_rate": 9.095508475463108e-06, + "loss": 17.5618, + "step": 11962 + }, + { + "epoch": 0.21867402708976913, + "grad_norm": 6.513741789828851, + "learning_rate": 9.095338661759879e-06, + "loss": 17.6074, + "step": 11963 + }, + { + "epoch": 0.21869230628621567, + "grad_norm": 6.469130823331481, + "learning_rate": 9.09516883370284e-06, + "loss": 17.6614, + "step": 11964 + }, + { + "epoch": 0.21871058548266217, + "grad_norm": 7.339213971916416, + "learning_rate": 9.09499899129259e-06, + "loss": 17.7564, + "step": 11965 + }, + { + "epoch": 0.2187288646791087, + "grad_norm": 7.051393632555219, + "learning_rate": 9.094829134529726e-06, + "loss": 17.7138, + "step": 11966 + }, + { + "epoch": 0.21874714387555524, + "grad_norm": 6.655667605004137, + "learning_rate": 9.094659263414838e-06, + "loss": 17.6074, + "step": 11967 + }, + { + "epoch": 0.21876542307200175, + "grad_norm": 5.697676945789829, + "learning_rate": 9.094489377948528e-06, + "loss": 17.228, + "step": 11968 + }, + { + "epoch": 0.2187837022684483, + "grad_norm": 7.858658288547763, + "learning_rate": 9.094319478131387e-06, + "loss": 18.2695, + "step": 11969 + }, + { + "epoch": 0.2188019814648948, + "grad_norm": 6.885393288110382, + "learning_rate": 9.09414956396401e-06, + "loss": 17.5842, + "step": 11970 + }, + { + "epoch": 0.21882026066134133, + "grad_norm": 7.015131369993315, + "learning_rate": 9.093979635446994e-06, + "loss": 17.59, + "step": 11971 + }, + { + "epoch": 0.21883853985778784, + "grad_norm": 7.2145632118926155, + "learning_rate": 9.093809692580937e-06, + "loss": 17.8017, + "step": 11972 + }, + { + "epoch": 0.21885681905423437, + "grad_norm": 6.565744924503204, + "learning_rate": 9.093639735366431e-06, + "loss": 17.5354, + "step": 11973 + }, + { + "epoch": 0.2188750982506809, + "grad_norm": 7.018434256222989, + "learning_rate": 9.093469763804073e-06, + "loss": 17.5274, + "step": 11974 + }, + { + "epoch": 0.21889337744712742, + "grad_norm": 7.556468320286656, + "learning_rate": 9.093299777894458e-06, + "loss": 17.6066, + "step": 11975 + }, + { + "epoch": 0.21891165664357395, + "grad_norm": 5.617374179301001, + "learning_rate": 9.093129777638183e-06, + "loss": 17.1376, + "step": 11976 + }, + { + "epoch": 0.21892993584002046, + "grad_norm": 8.243979994025754, + "learning_rate": 9.092959763035843e-06, + "loss": 17.8591, + "step": 11977 + }, + { + "epoch": 0.218948215036467, + "grad_norm": 6.783744924746423, + "learning_rate": 9.092789734088034e-06, + "loss": 17.8086, + "step": 11978 + }, + { + "epoch": 0.21896649423291353, + "grad_norm": 7.659724633477323, + "learning_rate": 9.092619690795354e-06, + "loss": 18.0718, + "step": 11979 + }, + { + "epoch": 0.21898477342936004, + "grad_norm": 6.391978236108534, + "learning_rate": 9.092449633158395e-06, + "loss": 17.5176, + "step": 11980 + }, + { + "epoch": 0.21900305262580658, + "grad_norm": 6.81729565369, + "learning_rate": 9.092279561177758e-06, + "loss": 17.6482, + "step": 11981 + }, + { + "epoch": 0.21902133182225308, + "grad_norm": 6.671017714206008, + "learning_rate": 9.092109474854031e-06, + "loss": 17.5313, + "step": 11982 + }, + { + "epoch": 0.21903961101869962, + "grad_norm": 6.339744106747581, + "learning_rate": 9.091939374187821e-06, + "loss": 17.3638, + "step": 11983 + }, + { + "epoch": 0.21905789021514616, + "grad_norm": 5.983675068374729, + "learning_rate": 9.091769259179715e-06, + "loss": 17.229, + "step": 11984 + }, + { + "epoch": 0.21907616941159266, + "grad_norm": 7.391299335830813, + "learning_rate": 9.091599129830313e-06, + "loss": 17.9019, + "step": 11985 + }, + { + "epoch": 0.2190944486080392, + "grad_norm": 6.425807083269966, + "learning_rate": 9.091428986140213e-06, + "loss": 17.414, + "step": 11986 + }, + { + "epoch": 0.2191127278044857, + "grad_norm": 5.483876618268429, + "learning_rate": 9.091258828110008e-06, + "loss": 17.0787, + "step": 11987 + }, + { + "epoch": 0.21913100700093224, + "grad_norm": 8.051543722042831, + "learning_rate": 9.091088655740298e-06, + "loss": 17.9003, + "step": 11988 + }, + { + "epoch": 0.21914928619737875, + "grad_norm": 6.625316110521182, + "learning_rate": 9.090918469031676e-06, + "loss": 17.5168, + "step": 11989 + }, + { + "epoch": 0.2191675653938253, + "grad_norm": 6.4224362385000715, + "learning_rate": 9.090748267984738e-06, + "loss": 17.2853, + "step": 11990 + }, + { + "epoch": 0.21918584459027182, + "grad_norm": 7.061070925059579, + "learning_rate": 9.090578052600082e-06, + "loss": 17.6916, + "step": 11991 + }, + { + "epoch": 0.21920412378671833, + "grad_norm": 6.970553476191607, + "learning_rate": 9.090407822878308e-06, + "loss": 17.6499, + "step": 11992 + }, + { + "epoch": 0.21922240298316487, + "grad_norm": 7.158033677684297, + "learning_rate": 9.09023757882001e-06, + "loss": 17.7665, + "step": 11993 + }, + { + "epoch": 0.21924068217961137, + "grad_norm": 6.272418194792395, + "learning_rate": 9.090067320425782e-06, + "loss": 17.4884, + "step": 11994 + }, + { + "epoch": 0.2192589613760579, + "grad_norm": 7.872646561097602, + "learning_rate": 9.089897047696223e-06, + "loss": 18.2421, + "step": 11995 + }, + { + "epoch": 0.21927724057250444, + "grad_norm": 6.128466211050685, + "learning_rate": 9.089726760631929e-06, + "loss": 17.2306, + "step": 11996 + }, + { + "epoch": 0.21929551976895095, + "grad_norm": 6.01276693195329, + "learning_rate": 9.0895564592335e-06, + "loss": 17.2313, + "step": 11997 + }, + { + "epoch": 0.2193137989653975, + "grad_norm": 7.076087289152299, + "learning_rate": 9.089386143501528e-06, + "loss": 17.803, + "step": 11998 + }, + { + "epoch": 0.219332078161844, + "grad_norm": 7.062194248458111, + "learning_rate": 9.089215813436614e-06, + "loss": 17.7454, + "step": 11999 + }, + { + "epoch": 0.21935035735829053, + "grad_norm": 5.747517064898039, + "learning_rate": 9.089045469039353e-06, + "loss": 17.0884, + "step": 12000 + }, + { + "epoch": 0.21936863655473707, + "grad_norm": 5.707829334885778, + "learning_rate": 9.088875110310343e-06, + "loss": 17.1666, + "step": 12001 + }, + { + "epoch": 0.21938691575118358, + "grad_norm": 7.905205923263187, + "learning_rate": 9.08870473725018e-06, + "loss": 18.4898, + "step": 12002 + }, + { + "epoch": 0.2194051949476301, + "grad_norm": 7.221712254111787, + "learning_rate": 9.088534349859462e-06, + "loss": 18.192, + "step": 12003 + }, + { + "epoch": 0.21942347414407662, + "grad_norm": 7.848459947058525, + "learning_rate": 9.088363948138786e-06, + "loss": 18.0316, + "step": 12004 + }, + { + "epoch": 0.21944175334052315, + "grad_norm": 7.562366816646246, + "learning_rate": 9.088193532088747e-06, + "loss": 18.1611, + "step": 12005 + }, + { + "epoch": 0.21946003253696966, + "grad_norm": 6.323638346385794, + "learning_rate": 9.088023101709946e-06, + "loss": 17.3996, + "step": 12006 + }, + { + "epoch": 0.2194783117334162, + "grad_norm": 7.368769866620946, + "learning_rate": 9.08785265700298e-06, + "loss": 18.0641, + "step": 12007 + }, + { + "epoch": 0.21949659092986273, + "grad_norm": 6.762553503819832, + "learning_rate": 9.087682197968444e-06, + "loss": 17.7905, + "step": 12008 + }, + { + "epoch": 0.21951487012630924, + "grad_norm": 7.787144860856624, + "learning_rate": 9.087511724606936e-06, + "loss": 17.965, + "step": 12009 + }, + { + "epoch": 0.21953314932275578, + "grad_norm": 6.843154924681289, + "learning_rate": 9.087341236919055e-06, + "loss": 17.6036, + "step": 12010 + }, + { + "epoch": 0.21955142851920229, + "grad_norm": 6.479468034628051, + "learning_rate": 9.087170734905397e-06, + "loss": 17.3977, + "step": 12011 + }, + { + "epoch": 0.21956970771564882, + "grad_norm": 6.161715466614702, + "learning_rate": 9.087000218566562e-06, + "loss": 17.4078, + "step": 12012 + }, + { + "epoch": 0.21958798691209536, + "grad_norm": 7.24460315815929, + "learning_rate": 9.086829687903144e-06, + "loss": 17.5102, + "step": 12013 + }, + { + "epoch": 0.21960626610854186, + "grad_norm": 7.761451058530412, + "learning_rate": 9.086659142915744e-06, + "loss": 17.99, + "step": 12014 + }, + { + "epoch": 0.2196245453049884, + "grad_norm": 5.437564033660677, + "learning_rate": 9.086488583604956e-06, + "loss": 17.1171, + "step": 12015 + }, + { + "epoch": 0.2196428245014349, + "grad_norm": 7.523027903179191, + "learning_rate": 9.086318009971383e-06, + "loss": 17.8936, + "step": 12016 + }, + { + "epoch": 0.21966110369788144, + "grad_norm": 7.238757594020898, + "learning_rate": 9.086147422015617e-06, + "loss": 17.6363, + "step": 12017 + }, + { + "epoch": 0.21967938289432798, + "grad_norm": 6.316033839709729, + "learning_rate": 9.085976819738261e-06, + "loss": 17.4743, + "step": 12018 + }, + { + "epoch": 0.2196976620907745, + "grad_norm": 7.593845688623457, + "learning_rate": 9.08580620313991e-06, + "loss": 17.8691, + "step": 12019 + }, + { + "epoch": 0.21971594128722102, + "grad_norm": 5.60514414452451, + "learning_rate": 9.085635572221163e-06, + "loss": 17.2711, + "step": 12020 + }, + { + "epoch": 0.21973422048366753, + "grad_norm": 6.490622293399584, + "learning_rate": 9.08546492698262e-06, + "loss": 17.7255, + "step": 12021 + }, + { + "epoch": 0.21975249968011407, + "grad_norm": 6.909787167782599, + "learning_rate": 9.085294267424874e-06, + "loss": 17.6954, + "step": 12022 + }, + { + "epoch": 0.21977077887656057, + "grad_norm": 8.902887335133869, + "learning_rate": 9.085123593548526e-06, + "loss": 17.6157, + "step": 12023 + }, + { + "epoch": 0.2197890580730071, + "grad_norm": 5.928585006625503, + "learning_rate": 9.084952905354177e-06, + "loss": 17.3431, + "step": 12024 + }, + { + "epoch": 0.21980733726945365, + "grad_norm": 6.630666001768404, + "learning_rate": 9.08478220284242e-06, + "loss": 17.6577, + "step": 12025 + }, + { + "epoch": 0.21982561646590015, + "grad_norm": 6.622126466769476, + "learning_rate": 9.084611486013857e-06, + "loss": 17.795, + "step": 12026 + }, + { + "epoch": 0.2198438956623467, + "grad_norm": 6.015359889451512, + "learning_rate": 9.084440754869085e-06, + "loss": 17.2767, + "step": 12027 + }, + { + "epoch": 0.2198621748587932, + "grad_norm": 5.932151449422499, + "learning_rate": 9.084270009408701e-06, + "loss": 17.358, + "step": 12028 + }, + { + "epoch": 0.21988045405523973, + "grad_norm": 6.126648192498557, + "learning_rate": 9.084099249633307e-06, + "loss": 17.1403, + "step": 12029 + }, + { + "epoch": 0.21989873325168627, + "grad_norm": 6.819311160305065, + "learning_rate": 9.083928475543498e-06, + "loss": 17.9101, + "step": 12030 + }, + { + "epoch": 0.21991701244813278, + "grad_norm": 7.7970206104472615, + "learning_rate": 9.083757687139876e-06, + "loss": 18.031, + "step": 12031 + }, + { + "epoch": 0.2199352916445793, + "grad_norm": 8.01782031017913, + "learning_rate": 9.083586884423037e-06, + "loss": 18.1894, + "step": 12032 + }, + { + "epoch": 0.21995357084102582, + "grad_norm": 6.155004043893107, + "learning_rate": 9.08341606739358e-06, + "loss": 17.2879, + "step": 12033 + }, + { + "epoch": 0.21997185003747236, + "grad_norm": 7.134669392044465, + "learning_rate": 9.083245236052103e-06, + "loss": 17.8754, + "step": 12034 + }, + { + "epoch": 0.2199901292339189, + "grad_norm": 9.047554058890503, + "learning_rate": 9.083074390399208e-06, + "loss": 18.2534, + "step": 12035 + }, + { + "epoch": 0.2200084084303654, + "grad_norm": 6.879625297549601, + "learning_rate": 9.08290353043549e-06, + "loss": 17.9168, + "step": 12036 + }, + { + "epoch": 0.22002668762681193, + "grad_norm": 6.552809811227273, + "learning_rate": 9.08273265616155e-06, + "loss": 17.797, + "step": 12037 + }, + { + "epoch": 0.22004496682325844, + "grad_norm": 6.8308385565125285, + "learning_rate": 9.082561767577986e-06, + "loss": 17.7753, + "step": 12038 + }, + { + "epoch": 0.22006324601970498, + "grad_norm": 7.742324753665272, + "learning_rate": 9.082390864685397e-06, + "loss": 17.6787, + "step": 12039 + }, + { + "epoch": 0.22008152521615149, + "grad_norm": 9.080837449731453, + "learning_rate": 9.082219947484383e-06, + "loss": 17.7981, + "step": 12040 + }, + { + "epoch": 0.22009980441259802, + "grad_norm": 6.729636067383213, + "learning_rate": 9.082049015975542e-06, + "loss": 17.5586, + "step": 12041 + }, + { + "epoch": 0.22011808360904456, + "grad_norm": 7.289131014769498, + "learning_rate": 9.081878070159475e-06, + "loss": 17.9244, + "step": 12042 + }, + { + "epoch": 0.22013636280549106, + "grad_norm": 6.764780826991467, + "learning_rate": 9.081707110036777e-06, + "loss": 17.8212, + "step": 12043 + }, + { + "epoch": 0.2201546420019376, + "grad_norm": 4.907240918746769, + "learning_rate": 9.081536135608052e-06, + "loss": 16.8488, + "step": 12044 + }, + { + "epoch": 0.2201729211983841, + "grad_norm": 5.758994634397533, + "learning_rate": 9.081365146873895e-06, + "loss": 17.2962, + "step": 12045 + }, + { + "epoch": 0.22019120039483064, + "grad_norm": 5.986978869383253, + "learning_rate": 9.081194143834908e-06, + "loss": 17.2695, + "step": 12046 + }, + { + "epoch": 0.22020947959127718, + "grad_norm": 6.843056842103075, + "learning_rate": 9.08102312649169e-06, + "loss": 17.5858, + "step": 12047 + }, + { + "epoch": 0.2202277587877237, + "grad_norm": 6.659494582954172, + "learning_rate": 9.080852094844839e-06, + "loss": 17.5676, + "step": 12048 + }, + { + "epoch": 0.22024603798417022, + "grad_norm": 7.580450822304439, + "learning_rate": 9.080681048894957e-06, + "loss": 17.8294, + "step": 12049 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 6.177606235294419, + "learning_rate": 9.080509988642641e-06, + "loss": 17.3763, + "step": 12050 + }, + { + "epoch": 0.22028259637706327, + "grad_norm": 6.720988067004841, + "learning_rate": 9.080338914088494e-06, + "loss": 17.426, + "step": 12051 + }, + { + "epoch": 0.2203008755735098, + "grad_norm": 7.5094124068149615, + "learning_rate": 9.08016782523311e-06, + "loss": 17.7883, + "step": 12052 + }, + { + "epoch": 0.2203191547699563, + "grad_norm": 7.748640349325527, + "learning_rate": 9.079996722077094e-06, + "loss": 18.0248, + "step": 12053 + }, + { + "epoch": 0.22033743396640285, + "grad_norm": 5.936252782832204, + "learning_rate": 9.079825604621041e-06, + "loss": 17.3862, + "step": 12054 + }, + { + "epoch": 0.22035571316284935, + "grad_norm": 7.384229712370265, + "learning_rate": 9.079654472865556e-06, + "loss": 17.8948, + "step": 12055 + }, + { + "epoch": 0.2203739923592959, + "grad_norm": 6.597747095443318, + "learning_rate": 9.079483326811236e-06, + "loss": 17.5383, + "step": 12056 + }, + { + "epoch": 0.2203922715557424, + "grad_norm": 5.842929305692039, + "learning_rate": 9.079312166458678e-06, + "loss": 17.3827, + "step": 12057 + }, + { + "epoch": 0.22041055075218893, + "grad_norm": 6.588821379665793, + "learning_rate": 9.079140991808488e-06, + "loss": 17.5008, + "step": 12058 + }, + { + "epoch": 0.22042882994863547, + "grad_norm": 6.60250764612619, + "learning_rate": 9.078969802861262e-06, + "loss": 17.5604, + "step": 12059 + }, + { + "epoch": 0.22044710914508198, + "grad_norm": 6.2355489714401084, + "learning_rate": 9.0787985996176e-06, + "loss": 17.0363, + "step": 12060 + }, + { + "epoch": 0.2204653883415285, + "grad_norm": 6.538423104713626, + "learning_rate": 9.078627382078103e-06, + "loss": 17.8154, + "step": 12061 + }, + { + "epoch": 0.22048366753797502, + "grad_norm": 7.022784672294815, + "learning_rate": 9.078456150243371e-06, + "loss": 17.7635, + "step": 12062 + }, + { + "epoch": 0.22050194673442156, + "grad_norm": 7.1416528461418, + "learning_rate": 9.078284904114005e-06, + "loss": 17.7005, + "step": 12063 + }, + { + "epoch": 0.2205202259308681, + "grad_norm": 6.059971626355186, + "learning_rate": 9.078113643690602e-06, + "loss": 17.2732, + "step": 12064 + }, + { + "epoch": 0.2205385051273146, + "grad_norm": 6.5397619475435205, + "learning_rate": 9.077942368973767e-06, + "loss": 17.8203, + "step": 12065 + }, + { + "epoch": 0.22055678432376113, + "grad_norm": 6.575952660749232, + "learning_rate": 9.077771079964097e-06, + "loss": 17.6508, + "step": 12066 + }, + { + "epoch": 0.22057506352020764, + "grad_norm": 6.674876409356483, + "learning_rate": 9.077599776662194e-06, + "loss": 17.5094, + "step": 12067 + }, + { + "epoch": 0.22059334271665418, + "grad_norm": 6.747351806384442, + "learning_rate": 9.077428459068656e-06, + "loss": 17.6921, + "step": 12068 + }, + { + "epoch": 0.22061162191310071, + "grad_norm": 8.107350141197397, + "learning_rate": 9.077257127184087e-06, + "loss": 18.0762, + "step": 12069 + }, + { + "epoch": 0.22062990110954722, + "grad_norm": 7.087594877867561, + "learning_rate": 9.077085781009084e-06, + "loss": 17.6351, + "step": 12070 + }, + { + "epoch": 0.22064818030599376, + "grad_norm": 7.426294762385301, + "learning_rate": 9.07691442054425e-06, + "loss": 17.9902, + "step": 12071 + }, + { + "epoch": 0.22066645950244027, + "grad_norm": 6.433734635874548, + "learning_rate": 9.076743045790184e-06, + "loss": 17.5732, + "step": 12072 + }, + { + "epoch": 0.2206847386988868, + "grad_norm": 7.418091579853038, + "learning_rate": 9.076571656747488e-06, + "loss": 17.8136, + "step": 12073 + }, + { + "epoch": 0.2207030178953333, + "grad_norm": 8.994427748065133, + "learning_rate": 9.076400253416762e-06, + "loss": 18.7329, + "step": 12074 + }, + { + "epoch": 0.22072129709177984, + "grad_norm": 6.890757558467735, + "learning_rate": 9.076228835798606e-06, + "loss": 17.6003, + "step": 12075 + }, + { + "epoch": 0.22073957628822638, + "grad_norm": 4.939340614448473, + "learning_rate": 9.076057403893624e-06, + "loss": 16.8055, + "step": 12076 + }, + { + "epoch": 0.2207578554846729, + "grad_norm": 5.740784737536925, + "learning_rate": 9.075885957702411e-06, + "loss": 17.1904, + "step": 12077 + }, + { + "epoch": 0.22077613468111942, + "grad_norm": 5.982820156246118, + "learning_rate": 9.075714497225574e-06, + "loss": 17.4779, + "step": 12078 + }, + { + "epoch": 0.22079441387756593, + "grad_norm": 6.580558522174517, + "learning_rate": 9.075543022463711e-06, + "loss": 17.7942, + "step": 12079 + }, + { + "epoch": 0.22081269307401247, + "grad_norm": 6.624701415458696, + "learning_rate": 9.075371533417423e-06, + "loss": 17.4807, + "step": 12080 + }, + { + "epoch": 0.220830972270459, + "grad_norm": 6.630159754538874, + "learning_rate": 9.07520003008731e-06, + "loss": 17.6889, + "step": 12081 + }, + { + "epoch": 0.2208492514669055, + "grad_norm": 6.679633768938046, + "learning_rate": 9.075028512473976e-06, + "loss": 17.7805, + "step": 12082 + }, + { + "epoch": 0.22086753066335205, + "grad_norm": 7.804806343191544, + "learning_rate": 9.074856980578022e-06, + "loss": 17.8208, + "step": 12083 + }, + { + "epoch": 0.22088580985979855, + "grad_norm": 7.006482526280416, + "learning_rate": 9.074685434400046e-06, + "loss": 18.0162, + "step": 12084 + }, + { + "epoch": 0.2209040890562451, + "grad_norm": 7.047766027138001, + "learning_rate": 9.074513873940651e-06, + "loss": 17.4851, + "step": 12085 + }, + { + "epoch": 0.22092236825269163, + "grad_norm": 6.3626919655071035, + "learning_rate": 9.07434229920044e-06, + "loss": 17.3642, + "step": 12086 + }, + { + "epoch": 0.22094064744913813, + "grad_norm": 7.247062356212498, + "learning_rate": 9.07417071018001e-06, + "loss": 18.0834, + "step": 12087 + }, + { + "epoch": 0.22095892664558467, + "grad_norm": 5.822405120819056, + "learning_rate": 9.07399910687997e-06, + "loss": 17.1425, + "step": 12088 + }, + { + "epoch": 0.22097720584203118, + "grad_norm": 7.587197075988354, + "learning_rate": 9.073827489300913e-06, + "loss": 17.8303, + "step": 12089 + }, + { + "epoch": 0.2209954850384777, + "grad_norm": 6.068602889869737, + "learning_rate": 9.073655857443444e-06, + "loss": 17.369, + "step": 12090 + }, + { + "epoch": 0.22101376423492422, + "grad_norm": 7.393926217882674, + "learning_rate": 9.073484211308166e-06, + "loss": 17.5568, + "step": 12091 + }, + { + "epoch": 0.22103204343137076, + "grad_norm": 7.637252425495857, + "learning_rate": 9.073312550895678e-06, + "loss": 17.6237, + "step": 12092 + }, + { + "epoch": 0.2210503226278173, + "grad_norm": 6.042011046293065, + "learning_rate": 9.073140876206585e-06, + "loss": 17.231, + "step": 12093 + }, + { + "epoch": 0.2210686018242638, + "grad_norm": 6.446392263232922, + "learning_rate": 9.072969187241484e-06, + "loss": 17.4578, + "step": 12094 + }, + { + "epoch": 0.22108688102071034, + "grad_norm": 6.754235291343457, + "learning_rate": 9.072797484000983e-06, + "loss": 17.2657, + "step": 12095 + }, + { + "epoch": 0.22110516021715684, + "grad_norm": 7.994525310983319, + "learning_rate": 9.072625766485678e-06, + "loss": 18.352, + "step": 12096 + }, + { + "epoch": 0.22112343941360338, + "grad_norm": 6.907998648106382, + "learning_rate": 9.072454034696173e-06, + "loss": 17.8207, + "step": 12097 + }, + { + "epoch": 0.22114171861004991, + "grad_norm": 7.5304715476577275, + "learning_rate": 9.07228228863307e-06, + "loss": 18.1671, + "step": 12098 + }, + { + "epoch": 0.22115999780649642, + "grad_norm": 6.864295122027136, + "learning_rate": 9.072110528296971e-06, + "loss": 17.8149, + "step": 12099 + }, + { + "epoch": 0.22117827700294296, + "grad_norm": 6.496031378288208, + "learning_rate": 9.07193875368848e-06, + "loss": 17.5587, + "step": 12100 + }, + { + "epoch": 0.22119655619938947, + "grad_norm": 6.7719935577797, + "learning_rate": 9.071766964808193e-06, + "loss": 17.8892, + "step": 12101 + }, + { + "epoch": 0.221214835395836, + "grad_norm": 6.019303894423978, + "learning_rate": 9.071595161656718e-06, + "loss": 17.6829, + "step": 12102 + }, + { + "epoch": 0.22123311459228254, + "grad_norm": 7.398712332894582, + "learning_rate": 9.071423344234658e-06, + "loss": 18.0845, + "step": 12103 + }, + { + "epoch": 0.22125139378872904, + "grad_norm": 5.725552928569081, + "learning_rate": 9.07125151254261e-06, + "loss": 17.1853, + "step": 12104 + }, + { + "epoch": 0.22126967298517558, + "grad_norm": 5.828800196676859, + "learning_rate": 9.071079666581178e-06, + "loss": 17.1133, + "step": 12105 + }, + { + "epoch": 0.2212879521816221, + "grad_norm": 5.561468258385647, + "learning_rate": 9.070907806350965e-06, + "loss": 16.9982, + "step": 12106 + }, + { + "epoch": 0.22130623137806862, + "grad_norm": 6.118137601073453, + "learning_rate": 9.070735931852575e-06, + "loss": 17.4773, + "step": 12107 + }, + { + "epoch": 0.22132451057451513, + "grad_norm": 6.723948730169479, + "learning_rate": 9.07056404308661e-06, + "loss": 17.8599, + "step": 12108 + }, + { + "epoch": 0.22134278977096167, + "grad_norm": 6.251266020611498, + "learning_rate": 9.070392140053667e-06, + "loss": 17.5272, + "step": 12109 + }, + { + "epoch": 0.2213610689674082, + "grad_norm": 5.895701484924443, + "learning_rate": 9.070220222754356e-06, + "loss": 17.346, + "step": 12110 + }, + { + "epoch": 0.2213793481638547, + "grad_norm": 8.795498461265977, + "learning_rate": 9.070048291189276e-06, + "loss": 18.1511, + "step": 12111 + }, + { + "epoch": 0.22139762736030125, + "grad_norm": 7.085636395269582, + "learning_rate": 9.06987634535903e-06, + "loss": 18.2111, + "step": 12112 + }, + { + "epoch": 0.22141590655674775, + "grad_norm": 5.200696641321617, + "learning_rate": 9.06970438526422e-06, + "loss": 17.1305, + "step": 12113 + }, + { + "epoch": 0.2214341857531943, + "grad_norm": 6.834922203306733, + "learning_rate": 9.069532410905448e-06, + "loss": 17.7227, + "step": 12114 + }, + { + "epoch": 0.22145246494964083, + "grad_norm": 6.393837202331609, + "learning_rate": 9.06936042228332e-06, + "loss": 17.3959, + "step": 12115 + }, + { + "epoch": 0.22147074414608733, + "grad_norm": 5.5328302722177085, + "learning_rate": 9.069188419398437e-06, + "loss": 17.0514, + "step": 12116 + }, + { + "epoch": 0.22148902334253387, + "grad_norm": 8.510829456601536, + "learning_rate": 9.0690164022514e-06, + "loss": 18.1078, + "step": 12117 + }, + { + "epoch": 0.22150730253898038, + "grad_norm": 5.904706526852767, + "learning_rate": 9.068844370842812e-06, + "loss": 17.2968, + "step": 12118 + }, + { + "epoch": 0.2215255817354269, + "grad_norm": 6.622754466255814, + "learning_rate": 9.068672325173282e-06, + "loss": 17.389, + "step": 12119 + }, + { + "epoch": 0.22154386093187345, + "grad_norm": 6.508877281910572, + "learning_rate": 9.068500265243407e-06, + "loss": 17.6178, + "step": 12120 + }, + { + "epoch": 0.22156214012831996, + "grad_norm": 5.864018435445338, + "learning_rate": 9.06832819105379e-06, + "loss": 17.1021, + "step": 12121 + }, + { + "epoch": 0.2215804193247665, + "grad_norm": 6.403591175170202, + "learning_rate": 9.068156102605037e-06, + "loss": 17.4996, + "step": 12122 + }, + { + "epoch": 0.221598698521213, + "grad_norm": 6.990439120209378, + "learning_rate": 9.067983999897751e-06, + "loss": 17.4864, + "step": 12123 + }, + { + "epoch": 0.22161697771765954, + "grad_norm": 6.4612132220284915, + "learning_rate": 9.067811882932533e-06, + "loss": 17.3983, + "step": 12124 + }, + { + "epoch": 0.22163525691410604, + "grad_norm": 5.932857378409871, + "learning_rate": 9.067639751709987e-06, + "loss": 17.5504, + "step": 12125 + }, + { + "epoch": 0.22165353611055258, + "grad_norm": 6.832095320539582, + "learning_rate": 9.067467606230717e-06, + "loss": 17.6407, + "step": 12126 + }, + { + "epoch": 0.22167181530699911, + "grad_norm": 5.785721329037084, + "learning_rate": 9.067295446495326e-06, + "loss": 17.2609, + "step": 12127 + }, + { + "epoch": 0.22169009450344562, + "grad_norm": 6.631662600339456, + "learning_rate": 9.067123272504417e-06, + "loss": 17.3671, + "step": 12128 + }, + { + "epoch": 0.22170837369989216, + "grad_norm": 7.589981984568511, + "learning_rate": 9.066951084258593e-06, + "loss": 18.2443, + "step": 12129 + }, + { + "epoch": 0.22172665289633867, + "grad_norm": 5.525196115915716, + "learning_rate": 9.06677888175846e-06, + "loss": 16.9806, + "step": 12130 + }, + { + "epoch": 0.2217449320927852, + "grad_norm": 6.955554771941406, + "learning_rate": 9.06660666500462e-06, + "loss": 17.662, + "step": 12131 + }, + { + "epoch": 0.22176321128923174, + "grad_norm": 5.991003541933081, + "learning_rate": 9.066434433997674e-06, + "loss": 17.4138, + "step": 12132 + }, + { + "epoch": 0.22178149048567825, + "grad_norm": 7.893901139197201, + "learning_rate": 9.06626218873823e-06, + "loss": 18.0052, + "step": 12133 + }, + { + "epoch": 0.22179976968212478, + "grad_norm": 7.379640206812513, + "learning_rate": 9.066089929226891e-06, + "loss": 18.0161, + "step": 12134 + }, + { + "epoch": 0.2218180488785713, + "grad_norm": 5.8641168083447806, + "learning_rate": 9.065917655464258e-06, + "loss": 17.3031, + "step": 12135 + }, + { + "epoch": 0.22183632807501782, + "grad_norm": 6.350075766092297, + "learning_rate": 9.065745367450938e-06, + "loss": 17.3539, + "step": 12136 + }, + { + "epoch": 0.22185460727146436, + "grad_norm": 7.8634994124227955, + "learning_rate": 9.065573065187531e-06, + "loss": 17.9024, + "step": 12137 + }, + { + "epoch": 0.22187288646791087, + "grad_norm": 5.082605456754155, + "learning_rate": 9.065400748674646e-06, + "loss": 16.9995, + "step": 12138 + }, + { + "epoch": 0.2218911656643574, + "grad_norm": 9.111701523707396, + "learning_rate": 9.065228417912882e-06, + "loss": 18.4247, + "step": 12139 + }, + { + "epoch": 0.2219094448608039, + "grad_norm": 7.461955655034305, + "learning_rate": 9.065056072902847e-06, + "loss": 17.6911, + "step": 12140 + }, + { + "epoch": 0.22192772405725045, + "grad_norm": 5.56744554253036, + "learning_rate": 9.06488371364514e-06, + "loss": 17.2422, + "step": 12141 + }, + { + "epoch": 0.22194600325369696, + "grad_norm": 6.874676922587669, + "learning_rate": 9.064711340140373e-06, + "loss": 17.7655, + "step": 12142 + }, + { + "epoch": 0.2219642824501435, + "grad_norm": 6.967705772045241, + "learning_rate": 9.064538952389141e-06, + "loss": 17.7408, + "step": 12143 + }, + { + "epoch": 0.22198256164659003, + "grad_norm": 5.7354699215764136, + "learning_rate": 9.064366550392056e-06, + "loss": 17.2371, + "step": 12144 + }, + { + "epoch": 0.22200084084303653, + "grad_norm": 6.379178656800686, + "learning_rate": 9.064194134149718e-06, + "loss": 17.4984, + "step": 12145 + }, + { + "epoch": 0.22201912003948307, + "grad_norm": 6.637973537030778, + "learning_rate": 9.064021703662732e-06, + "loss": 17.7825, + "step": 12146 + }, + { + "epoch": 0.22203739923592958, + "grad_norm": 8.032869745002564, + "learning_rate": 9.0638492589317e-06, + "loss": 18.2834, + "step": 12147 + }, + { + "epoch": 0.2220556784323761, + "grad_norm": 6.144100501655099, + "learning_rate": 9.063676799957231e-06, + "loss": 17.0968, + "step": 12148 + }, + { + "epoch": 0.22207395762882265, + "grad_norm": 6.762697195423152, + "learning_rate": 9.063504326739929e-06, + "loss": 17.5261, + "step": 12149 + }, + { + "epoch": 0.22209223682526916, + "grad_norm": 5.334294547815651, + "learning_rate": 9.063331839280395e-06, + "loss": 17.4522, + "step": 12150 + }, + { + "epoch": 0.2221105160217157, + "grad_norm": 6.868929711870742, + "learning_rate": 9.063159337579238e-06, + "loss": 17.7483, + "step": 12151 + }, + { + "epoch": 0.2221287952181622, + "grad_norm": 6.366454532936392, + "learning_rate": 9.062986821637056e-06, + "loss": 17.6542, + "step": 12152 + }, + { + "epoch": 0.22214707441460874, + "grad_norm": 7.9848156345152255, + "learning_rate": 9.06281429145446e-06, + "loss": 18.2242, + "step": 12153 + }, + { + "epoch": 0.22216535361105527, + "grad_norm": 6.573086364479184, + "learning_rate": 9.062641747032052e-06, + "loss": 17.4293, + "step": 12154 + }, + { + "epoch": 0.22218363280750178, + "grad_norm": 6.234651122800711, + "learning_rate": 9.06246918837044e-06, + "loss": 17.3009, + "step": 12155 + }, + { + "epoch": 0.22220191200394832, + "grad_norm": 6.491836262913137, + "learning_rate": 9.062296615470223e-06, + "loss": 17.6475, + "step": 12156 + }, + { + "epoch": 0.22222019120039482, + "grad_norm": 5.141991111949409, + "learning_rate": 9.062124028332008e-06, + "loss": 16.8352, + "step": 12157 + }, + { + "epoch": 0.22223847039684136, + "grad_norm": 7.045667001321327, + "learning_rate": 9.061951426956403e-06, + "loss": 17.8412, + "step": 12158 + }, + { + "epoch": 0.22225674959328787, + "grad_norm": 7.651692323400573, + "learning_rate": 9.06177881134401e-06, + "loss": 18.0355, + "step": 12159 + }, + { + "epoch": 0.2222750287897344, + "grad_norm": 7.487448585021054, + "learning_rate": 9.061606181495436e-06, + "loss": 17.893, + "step": 12160 + }, + { + "epoch": 0.22229330798618094, + "grad_norm": 7.015425481940219, + "learning_rate": 9.061433537411285e-06, + "loss": 18.1156, + "step": 12161 + }, + { + "epoch": 0.22231158718262745, + "grad_norm": 6.962871882681409, + "learning_rate": 9.06126087909216e-06, + "loss": 17.9275, + "step": 12162 + }, + { + "epoch": 0.22232986637907398, + "grad_norm": 6.817616740706698, + "learning_rate": 9.061088206538668e-06, + "loss": 17.5149, + "step": 12163 + }, + { + "epoch": 0.2223481455755205, + "grad_norm": 6.509896268933717, + "learning_rate": 9.060915519751415e-06, + "loss": 17.6248, + "step": 12164 + }, + { + "epoch": 0.22236642477196703, + "grad_norm": 6.315903827237091, + "learning_rate": 9.060742818731006e-06, + "loss": 17.1711, + "step": 12165 + }, + { + "epoch": 0.22238470396841356, + "grad_norm": 6.4543926230363935, + "learning_rate": 9.060570103478043e-06, + "loss": 17.5929, + "step": 12166 + }, + { + "epoch": 0.22240298316486007, + "grad_norm": 7.083003033265911, + "learning_rate": 9.060397373993138e-06, + "loss": 18.1269, + "step": 12167 + }, + { + "epoch": 0.2224212623613066, + "grad_norm": 6.918556123420934, + "learning_rate": 9.06022463027689e-06, + "loss": 17.7514, + "step": 12168 + }, + { + "epoch": 0.2224395415577531, + "grad_norm": 6.13862923828019, + "learning_rate": 9.060051872329907e-06, + "loss": 17.3936, + "step": 12169 + }, + { + "epoch": 0.22245782075419965, + "grad_norm": 7.548290244094069, + "learning_rate": 9.059879100152795e-06, + "loss": 18.0276, + "step": 12170 + }, + { + "epoch": 0.22247609995064618, + "grad_norm": 6.654352827636281, + "learning_rate": 9.05970631374616e-06, + "loss": 17.6661, + "step": 12171 + }, + { + "epoch": 0.2224943791470927, + "grad_norm": 6.23502675325729, + "learning_rate": 9.059533513110605e-06, + "loss": 17.4606, + "step": 12172 + }, + { + "epoch": 0.22251265834353923, + "grad_norm": 7.595924061961402, + "learning_rate": 9.05936069824674e-06, + "loss": 18.3619, + "step": 12173 + }, + { + "epoch": 0.22253093753998573, + "grad_norm": 5.993091462334962, + "learning_rate": 9.059187869155167e-06, + "loss": 17.4158, + "step": 12174 + }, + { + "epoch": 0.22254921673643227, + "grad_norm": 5.892020062009522, + "learning_rate": 9.05901502583649e-06, + "loss": 17.3697, + "step": 12175 + }, + { + "epoch": 0.22256749593287878, + "grad_norm": 4.909215357289737, + "learning_rate": 9.05884216829132e-06, + "loss": 16.8452, + "step": 12176 + }, + { + "epoch": 0.22258577512932531, + "grad_norm": 6.505330308008546, + "learning_rate": 9.05866929652026e-06, + "loss": 17.6892, + "step": 12177 + }, + { + "epoch": 0.22260405432577185, + "grad_norm": 7.260690227232895, + "learning_rate": 9.058496410523917e-06, + "loss": 18.0694, + "step": 12178 + }, + { + "epoch": 0.22262233352221836, + "grad_norm": 6.559486921726839, + "learning_rate": 9.058323510302896e-06, + "loss": 17.6832, + "step": 12179 + }, + { + "epoch": 0.2226406127186649, + "grad_norm": 6.477247629775747, + "learning_rate": 9.058150595857803e-06, + "loss": 17.4083, + "step": 12180 + }, + { + "epoch": 0.2226588919151114, + "grad_norm": 6.222961774733105, + "learning_rate": 9.057977667189244e-06, + "loss": 17.6799, + "step": 12181 + }, + { + "epoch": 0.22267717111155794, + "grad_norm": 7.0625027289353275, + "learning_rate": 9.057804724297825e-06, + "loss": 17.8661, + "step": 12182 + }, + { + "epoch": 0.22269545030800447, + "grad_norm": 6.206804862803218, + "learning_rate": 9.057631767184153e-06, + "loss": 17.3949, + "step": 12183 + }, + { + "epoch": 0.22271372950445098, + "grad_norm": 6.557056824978244, + "learning_rate": 9.057458795848834e-06, + "loss": 17.5059, + "step": 12184 + }, + { + "epoch": 0.22273200870089752, + "grad_norm": 6.20159662590665, + "learning_rate": 9.057285810292474e-06, + "loss": 17.278, + "step": 12185 + }, + { + "epoch": 0.22275028789734402, + "grad_norm": 6.695076245431577, + "learning_rate": 9.057112810515681e-06, + "loss": 17.743, + "step": 12186 + }, + { + "epoch": 0.22276856709379056, + "grad_norm": 6.108160305567904, + "learning_rate": 9.056939796519056e-06, + "loss": 17.4396, + "step": 12187 + }, + { + "epoch": 0.2227868462902371, + "grad_norm": 9.339105529261538, + "learning_rate": 9.056766768303212e-06, + "loss": 18.6124, + "step": 12188 + }, + { + "epoch": 0.2228051254866836, + "grad_norm": 5.530740482200743, + "learning_rate": 9.056593725868752e-06, + "loss": 16.9295, + "step": 12189 + }, + { + "epoch": 0.22282340468313014, + "grad_norm": 6.706535134975608, + "learning_rate": 9.056420669216281e-06, + "loss": 17.8878, + "step": 12190 + }, + { + "epoch": 0.22284168387957665, + "grad_norm": 5.2416419568065145, + "learning_rate": 9.05624759834641e-06, + "loss": 17.0756, + "step": 12191 + }, + { + "epoch": 0.22285996307602318, + "grad_norm": 7.805341729400662, + "learning_rate": 9.056074513259742e-06, + "loss": 18.2496, + "step": 12192 + }, + { + "epoch": 0.2228782422724697, + "grad_norm": 7.356348571563792, + "learning_rate": 9.055901413956885e-06, + "loss": 17.8351, + "step": 12193 + }, + { + "epoch": 0.22289652146891623, + "grad_norm": 6.1622294015402845, + "learning_rate": 9.055728300438445e-06, + "loss": 17.1803, + "step": 12194 + }, + { + "epoch": 0.22291480066536276, + "grad_norm": 6.4323271410674945, + "learning_rate": 9.05555517270503e-06, + "loss": 17.4918, + "step": 12195 + }, + { + "epoch": 0.22293307986180927, + "grad_norm": 7.902888915890806, + "learning_rate": 9.055382030757244e-06, + "loss": 18.0494, + "step": 12196 + }, + { + "epoch": 0.2229513590582558, + "grad_norm": 14.911929793128163, + "learning_rate": 9.0552088745957e-06, + "loss": 17.9418, + "step": 12197 + }, + { + "epoch": 0.2229696382547023, + "grad_norm": 8.190149957916722, + "learning_rate": 9.055035704220998e-06, + "loss": 18.1531, + "step": 12198 + }, + { + "epoch": 0.22298791745114885, + "grad_norm": 6.5247192925214375, + "learning_rate": 9.054862519633749e-06, + "loss": 17.3929, + "step": 12199 + }, + { + "epoch": 0.22300619664759538, + "grad_norm": 5.65360135672644, + "learning_rate": 9.054689320834557e-06, + "loss": 16.9583, + "step": 12200 + }, + { + "epoch": 0.2230244758440419, + "grad_norm": 6.677669658979401, + "learning_rate": 9.054516107824031e-06, + "loss": 17.7014, + "step": 12201 + }, + { + "epoch": 0.22304275504048843, + "grad_norm": 6.8350644668940514, + "learning_rate": 9.05434288060278e-06, + "loss": 17.5601, + "step": 12202 + }, + { + "epoch": 0.22306103423693494, + "grad_norm": 7.85227969398357, + "learning_rate": 9.054169639171407e-06, + "loss": 17.7565, + "step": 12203 + }, + { + "epoch": 0.22307931343338147, + "grad_norm": 6.945638606751802, + "learning_rate": 9.05399638353052e-06, + "loss": 17.969, + "step": 12204 + }, + { + "epoch": 0.223097592629828, + "grad_norm": 6.036209937688848, + "learning_rate": 9.053823113680731e-06, + "loss": 17.3046, + "step": 12205 + }, + { + "epoch": 0.22311587182627451, + "grad_norm": 6.685623520160601, + "learning_rate": 9.053649829622642e-06, + "loss": 18.1842, + "step": 12206 + }, + { + "epoch": 0.22313415102272105, + "grad_norm": 5.7246782458163965, + "learning_rate": 9.053476531356861e-06, + "loss": 17.2299, + "step": 12207 + }, + { + "epoch": 0.22315243021916756, + "grad_norm": 7.408503539027606, + "learning_rate": 9.053303218883998e-06, + "loss": 18.1562, + "step": 12208 + }, + { + "epoch": 0.2231707094156141, + "grad_norm": 6.427588789991955, + "learning_rate": 9.05312989220466e-06, + "loss": 17.3499, + "step": 12209 + }, + { + "epoch": 0.2231889886120606, + "grad_norm": 6.795794685545221, + "learning_rate": 9.052956551319452e-06, + "loss": 17.3862, + "step": 12210 + }, + { + "epoch": 0.22320726780850714, + "grad_norm": 7.06147244109122, + "learning_rate": 9.052783196228983e-06, + "loss": 17.317, + "step": 12211 + }, + { + "epoch": 0.22322554700495367, + "grad_norm": 6.7282083201337315, + "learning_rate": 9.05260982693386e-06, + "loss": 17.6491, + "step": 12212 + }, + { + "epoch": 0.22324382620140018, + "grad_norm": 5.4593299113472495, + "learning_rate": 9.05243644343469e-06, + "loss": 17.2507, + "step": 12213 + }, + { + "epoch": 0.22326210539784672, + "grad_norm": 6.109716435642244, + "learning_rate": 9.052263045732087e-06, + "loss": 17.6383, + "step": 12214 + }, + { + "epoch": 0.22328038459429322, + "grad_norm": 8.530470453759085, + "learning_rate": 9.05208963382665e-06, + "loss": 17.5968, + "step": 12215 + }, + { + "epoch": 0.22329866379073976, + "grad_norm": 6.024941972181284, + "learning_rate": 9.05191620771899e-06, + "loss": 17.3743, + "step": 12216 + }, + { + "epoch": 0.2233169429871863, + "grad_norm": 5.780308959116288, + "learning_rate": 9.051742767409716e-06, + "loss": 16.8978, + "step": 12217 + }, + { + "epoch": 0.2233352221836328, + "grad_norm": 7.2216651620068735, + "learning_rate": 9.051569312899436e-06, + "loss": 17.4587, + "step": 12218 + }, + { + "epoch": 0.22335350138007934, + "grad_norm": 6.794678175019179, + "learning_rate": 9.051395844188755e-06, + "loss": 17.5773, + "step": 12219 + }, + { + "epoch": 0.22337178057652585, + "grad_norm": 6.631565450155475, + "learning_rate": 9.051222361278286e-06, + "loss": 17.563, + "step": 12220 + }, + { + "epoch": 0.22339005977297238, + "grad_norm": 7.237088734195267, + "learning_rate": 9.051048864168632e-06, + "loss": 17.6461, + "step": 12221 + }, + { + "epoch": 0.22340833896941892, + "grad_norm": 5.937021023793204, + "learning_rate": 9.050875352860404e-06, + "loss": 17.3293, + "step": 12222 + }, + { + "epoch": 0.22342661816586543, + "grad_norm": 8.196093042757301, + "learning_rate": 9.050701827354211e-06, + "loss": 17.6361, + "step": 12223 + }, + { + "epoch": 0.22344489736231196, + "grad_norm": 7.425693450852276, + "learning_rate": 9.050528287650657e-06, + "loss": 17.8749, + "step": 12224 + }, + { + "epoch": 0.22346317655875847, + "grad_norm": 5.215058225206581, + "learning_rate": 9.050354733750354e-06, + "loss": 17.0018, + "step": 12225 + }, + { + "epoch": 0.223481455755205, + "grad_norm": 5.787247505005862, + "learning_rate": 9.05018116565391e-06, + "loss": 17.2517, + "step": 12226 + }, + { + "epoch": 0.2234997349516515, + "grad_norm": 6.5409886645510635, + "learning_rate": 9.05000758336193e-06, + "loss": 17.6792, + "step": 12227 + }, + { + "epoch": 0.22351801414809805, + "grad_norm": 8.445251899757077, + "learning_rate": 9.049833986875027e-06, + "loss": 17.5437, + "step": 12228 + }, + { + "epoch": 0.22353629334454458, + "grad_norm": 6.821241351948771, + "learning_rate": 9.049660376193808e-06, + "loss": 17.546, + "step": 12229 + }, + { + "epoch": 0.2235545725409911, + "grad_norm": 7.823994149490272, + "learning_rate": 9.049486751318879e-06, + "loss": 17.9458, + "step": 12230 + }, + { + "epoch": 0.22357285173743763, + "grad_norm": 7.369459589380154, + "learning_rate": 9.04931311225085e-06, + "loss": 18.0869, + "step": 12231 + }, + { + "epoch": 0.22359113093388414, + "grad_norm": 7.6500548569966185, + "learning_rate": 9.04913945899033e-06, + "loss": 17.8878, + "step": 12232 + }, + { + "epoch": 0.22360941013033067, + "grad_norm": 5.837867098791316, + "learning_rate": 9.048965791537929e-06, + "loss": 17.042, + "step": 12233 + }, + { + "epoch": 0.2236276893267772, + "grad_norm": 6.050463393955847, + "learning_rate": 9.048792109894253e-06, + "loss": 17.1248, + "step": 12234 + }, + { + "epoch": 0.22364596852322371, + "grad_norm": 8.164927438234022, + "learning_rate": 9.048618414059912e-06, + "loss": 18.2822, + "step": 12235 + }, + { + "epoch": 0.22366424771967025, + "grad_norm": 7.768789550138342, + "learning_rate": 9.048444704035517e-06, + "loss": 18.2395, + "step": 12236 + }, + { + "epoch": 0.22368252691611676, + "grad_norm": 5.907906867932357, + "learning_rate": 9.048270979821673e-06, + "loss": 17.217, + "step": 12237 + }, + { + "epoch": 0.2237008061125633, + "grad_norm": 6.668599665032728, + "learning_rate": 9.04809724141899e-06, + "loss": 17.6935, + "step": 12238 + }, + { + "epoch": 0.22371908530900983, + "grad_norm": 6.426863155040541, + "learning_rate": 9.047923488828079e-06, + "loss": 17.4179, + "step": 12239 + }, + { + "epoch": 0.22373736450545634, + "grad_norm": 7.489766139729761, + "learning_rate": 9.047749722049545e-06, + "loss": 17.9008, + "step": 12240 + }, + { + "epoch": 0.22375564370190287, + "grad_norm": 7.03745251707885, + "learning_rate": 9.047575941084002e-06, + "loss": 17.8194, + "step": 12241 + }, + { + "epoch": 0.22377392289834938, + "grad_norm": 8.213014293127971, + "learning_rate": 9.047402145932055e-06, + "loss": 18.2351, + "step": 12242 + }, + { + "epoch": 0.22379220209479592, + "grad_norm": 6.809378559607105, + "learning_rate": 9.047228336594315e-06, + "loss": 17.6302, + "step": 12243 + }, + { + "epoch": 0.22381048129124242, + "grad_norm": 7.37470269898717, + "learning_rate": 9.047054513071391e-06, + "loss": 17.9828, + "step": 12244 + }, + { + "epoch": 0.22382876048768896, + "grad_norm": 11.370524647387033, + "learning_rate": 9.046880675363892e-06, + "loss": 17.5631, + "step": 12245 + }, + { + "epoch": 0.2238470396841355, + "grad_norm": 7.798356886013414, + "learning_rate": 9.046706823472428e-06, + "loss": 18.0636, + "step": 12246 + }, + { + "epoch": 0.223865318880582, + "grad_norm": 7.08648155761893, + "learning_rate": 9.046532957397606e-06, + "loss": 17.9467, + "step": 12247 + }, + { + "epoch": 0.22388359807702854, + "grad_norm": 5.435527706552968, + "learning_rate": 9.046359077140039e-06, + "loss": 17.0462, + "step": 12248 + }, + { + "epoch": 0.22390187727347505, + "grad_norm": 6.68293259405961, + "learning_rate": 9.046185182700333e-06, + "loss": 17.7712, + "step": 12249 + }, + { + "epoch": 0.22392015646992158, + "grad_norm": 5.368867604488464, + "learning_rate": 9.0460112740791e-06, + "loss": 17.112, + "step": 12250 + }, + { + "epoch": 0.22393843566636812, + "grad_norm": 5.77104810479662, + "learning_rate": 9.045837351276949e-06, + "loss": 17.4235, + "step": 12251 + }, + { + "epoch": 0.22395671486281463, + "grad_norm": 6.159005156455112, + "learning_rate": 9.04566341429449e-06, + "loss": 17.606, + "step": 12252 + }, + { + "epoch": 0.22397499405926116, + "grad_norm": 6.8603474478294215, + "learning_rate": 9.04548946313233e-06, + "loss": 17.6305, + "step": 12253 + }, + { + "epoch": 0.22399327325570767, + "grad_norm": 6.893380745047261, + "learning_rate": 9.04531549779108e-06, + "loss": 17.9018, + "step": 12254 + }, + { + "epoch": 0.2240115524521542, + "grad_norm": 7.395851504751612, + "learning_rate": 9.045141518271352e-06, + "loss": 18.0254, + "step": 12255 + }, + { + "epoch": 0.22402983164860074, + "grad_norm": 7.459036947022944, + "learning_rate": 9.044967524573754e-06, + "loss": 17.4971, + "step": 12256 + }, + { + "epoch": 0.22404811084504725, + "grad_norm": 7.4754209209697455, + "learning_rate": 9.044793516698894e-06, + "loss": 17.8842, + "step": 12257 + }, + { + "epoch": 0.22406639004149378, + "grad_norm": 6.060827315306458, + "learning_rate": 9.044619494647383e-06, + "loss": 17.2545, + "step": 12258 + }, + { + "epoch": 0.2240846692379403, + "grad_norm": 9.437373283210361, + "learning_rate": 9.044445458419834e-06, + "loss": 18.409, + "step": 12259 + }, + { + "epoch": 0.22410294843438683, + "grad_norm": 7.125314012595321, + "learning_rate": 9.044271408016856e-06, + "loss": 17.8622, + "step": 12260 + }, + { + "epoch": 0.22412122763083334, + "grad_norm": 6.546315968350545, + "learning_rate": 9.044097343439055e-06, + "loss": 17.2875, + "step": 12261 + }, + { + "epoch": 0.22413950682727987, + "grad_norm": 6.622025460441975, + "learning_rate": 9.043923264687045e-06, + "loss": 17.3571, + "step": 12262 + }, + { + "epoch": 0.2241577860237264, + "grad_norm": 7.090160753684515, + "learning_rate": 9.043749171761433e-06, + "loss": 17.9867, + "step": 12263 + }, + { + "epoch": 0.22417606522017292, + "grad_norm": 6.387876368059813, + "learning_rate": 9.043575064662833e-06, + "loss": 17.4131, + "step": 12264 + }, + { + "epoch": 0.22419434441661945, + "grad_norm": 5.936893993367334, + "learning_rate": 9.043400943391853e-06, + "loss": 17.2094, + "step": 12265 + }, + { + "epoch": 0.22421262361306596, + "grad_norm": 6.055384088286888, + "learning_rate": 9.043226807949103e-06, + "loss": 17.3158, + "step": 12266 + }, + { + "epoch": 0.2242309028095125, + "grad_norm": 7.2211548054105, + "learning_rate": 9.043052658335195e-06, + "loss": 17.7897, + "step": 12267 + }, + { + "epoch": 0.22424918200595903, + "grad_norm": 6.633570997443187, + "learning_rate": 9.042878494550736e-06, + "loss": 17.3886, + "step": 12268 + }, + { + "epoch": 0.22426746120240554, + "grad_norm": 6.899123000691209, + "learning_rate": 9.04270431659634e-06, + "loss": 17.9087, + "step": 12269 + }, + { + "epoch": 0.22428574039885207, + "grad_norm": 6.548213007922687, + "learning_rate": 9.042530124472617e-06, + "loss": 17.3299, + "step": 12270 + }, + { + "epoch": 0.22430401959529858, + "grad_norm": 6.58743098605056, + "learning_rate": 9.042355918180176e-06, + "loss": 17.5219, + "step": 12271 + }, + { + "epoch": 0.22432229879174512, + "grad_norm": 5.714440633680433, + "learning_rate": 9.042181697719627e-06, + "loss": 17.272, + "step": 12272 + }, + { + "epoch": 0.22434057798819165, + "grad_norm": 8.0434319343778, + "learning_rate": 9.042007463091584e-06, + "loss": 18.2251, + "step": 12273 + }, + { + "epoch": 0.22435885718463816, + "grad_norm": 6.3418512583693545, + "learning_rate": 9.041833214296656e-06, + "loss": 17.4084, + "step": 12274 + }, + { + "epoch": 0.2243771363810847, + "grad_norm": 6.476545250193185, + "learning_rate": 9.041658951335451e-06, + "loss": 17.1878, + "step": 12275 + }, + { + "epoch": 0.2243954155775312, + "grad_norm": 6.491641118428361, + "learning_rate": 9.041484674208584e-06, + "loss": 17.5175, + "step": 12276 + }, + { + "epoch": 0.22441369477397774, + "grad_norm": 7.263383471347915, + "learning_rate": 9.041310382916663e-06, + "loss": 18.2752, + "step": 12277 + }, + { + "epoch": 0.22443197397042425, + "grad_norm": 5.8331459930085465, + "learning_rate": 9.0411360774603e-06, + "loss": 17.2002, + "step": 12278 + }, + { + "epoch": 0.22445025316687078, + "grad_norm": 5.8998655177738515, + "learning_rate": 9.040961757840105e-06, + "loss": 17.4966, + "step": 12279 + }, + { + "epoch": 0.22446853236331732, + "grad_norm": 6.678973847779486, + "learning_rate": 9.04078742405669e-06, + "loss": 17.5399, + "step": 12280 + }, + { + "epoch": 0.22448681155976383, + "grad_norm": 8.828607081786217, + "learning_rate": 9.040613076110667e-06, + "loss": 18.1534, + "step": 12281 + }, + { + "epoch": 0.22450509075621036, + "grad_norm": 7.563487311099231, + "learning_rate": 9.040438714002645e-06, + "loss": 18.0516, + "step": 12282 + }, + { + "epoch": 0.22452336995265687, + "grad_norm": 6.668099309745011, + "learning_rate": 9.040264337733236e-06, + "loss": 17.7034, + "step": 12283 + }, + { + "epoch": 0.2245416491491034, + "grad_norm": 7.098032500757102, + "learning_rate": 9.04008994730305e-06, + "loss": 17.812, + "step": 12284 + }, + { + "epoch": 0.22455992834554994, + "grad_norm": 6.616043707524381, + "learning_rate": 9.0399155427127e-06, + "loss": 17.7823, + "step": 12285 + }, + { + "epoch": 0.22457820754199645, + "grad_norm": 7.516322271528726, + "learning_rate": 9.039741123962797e-06, + "loss": 17.8506, + "step": 12286 + }, + { + "epoch": 0.22459648673844299, + "grad_norm": 7.694205402970119, + "learning_rate": 9.039566691053952e-06, + "loss": 18.3337, + "step": 12287 + }, + { + "epoch": 0.2246147659348895, + "grad_norm": 7.268622837670842, + "learning_rate": 9.039392243986775e-06, + "loss": 17.6922, + "step": 12288 + }, + { + "epoch": 0.22463304513133603, + "grad_norm": 6.224198191373773, + "learning_rate": 9.03921778276188e-06, + "loss": 17.4293, + "step": 12289 + }, + { + "epoch": 0.22465132432778256, + "grad_norm": 6.813770995982024, + "learning_rate": 9.039043307379878e-06, + "loss": 17.2797, + "step": 12290 + }, + { + "epoch": 0.22466960352422907, + "grad_norm": 7.772614195152835, + "learning_rate": 9.038868817841378e-06, + "loss": 17.9302, + "step": 12291 + }, + { + "epoch": 0.2246878827206756, + "grad_norm": 6.295833759594762, + "learning_rate": 9.038694314146994e-06, + "loss": 17.3379, + "step": 12292 + }, + { + "epoch": 0.22470616191712212, + "grad_norm": 6.433483661840641, + "learning_rate": 9.038519796297336e-06, + "loss": 17.4104, + "step": 12293 + }, + { + "epoch": 0.22472444111356865, + "grad_norm": 6.797406734423923, + "learning_rate": 9.038345264293019e-06, + "loss": 17.5433, + "step": 12294 + }, + { + "epoch": 0.22474272031001516, + "grad_norm": 8.778103723823701, + "learning_rate": 9.038170718134649e-06, + "loss": 18.4851, + "step": 12295 + }, + { + "epoch": 0.2247609995064617, + "grad_norm": 7.7645876826132225, + "learning_rate": 9.037996157822843e-06, + "loss": 17.9773, + "step": 12296 + }, + { + "epoch": 0.22477927870290823, + "grad_norm": 7.250220959212541, + "learning_rate": 9.03782158335821e-06, + "loss": 17.9881, + "step": 12297 + }, + { + "epoch": 0.22479755789935474, + "grad_norm": 6.823593498163983, + "learning_rate": 9.037646994741362e-06, + "loss": 17.6734, + "step": 12298 + }, + { + "epoch": 0.22481583709580127, + "grad_norm": 6.678320920993797, + "learning_rate": 9.037472391972915e-06, + "loss": 17.838, + "step": 12299 + }, + { + "epoch": 0.22483411629224778, + "grad_norm": 6.64005663537683, + "learning_rate": 9.037297775053476e-06, + "loss": 17.5277, + "step": 12300 + }, + { + "epoch": 0.22485239548869432, + "grad_norm": 6.95275096027094, + "learning_rate": 9.037123143983658e-06, + "loss": 17.8921, + "step": 12301 + }, + { + "epoch": 0.22487067468514085, + "grad_norm": 7.228429314890307, + "learning_rate": 9.036948498764071e-06, + "loss": 17.8771, + "step": 12302 + }, + { + "epoch": 0.22488895388158736, + "grad_norm": 7.130853561770472, + "learning_rate": 9.036773839395335e-06, + "loss": 17.9073, + "step": 12303 + }, + { + "epoch": 0.2249072330780339, + "grad_norm": 7.2958135662705255, + "learning_rate": 9.036599165878053e-06, + "loss": 17.8241, + "step": 12304 + }, + { + "epoch": 0.2249255122744804, + "grad_norm": 6.491199829661077, + "learning_rate": 9.036424478212843e-06, + "loss": 17.3875, + "step": 12305 + }, + { + "epoch": 0.22494379147092694, + "grad_norm": 7.487583887635919, + "learning_rate": 9.036249776400317e-06, + "loss": 17.8521, + "step": 12306 + }, + { + "epoch": 0.22496207066737348, + "grad_norm": 5.91990847320521, + "learning_rate": 9.036075060441083e-06, + "loss": 17.1465, + "step": 12307 + }, + { + "epoch": 0.22498034986381998, + "grad_norm": 16.952346676832793, + "learning_rate": 9.035900330335757e-06, + "loss": 17.8836, + "step": 12308 + }, + { + "epoch": 0.22499862906026652, + "grad_norm": 6.955858301762353, + "learning_rate": 9.035725586084951e-06, + "loss": 17.3048, + "step": 12309 + }, + { + "epoch": 0.22501690825671303, + "grad_norm": 7.941360770280564, + "learning_rate": 9.035550827689276e-06, + "loss": 18.2653, + "step": 12310 + }, + { + "epoch": 0.22503518745315956, + "grad_norm": 8.13819670099608, + "learning_rate": 9.035376055149347e-06, + "loss": 18.0662, + "step": 12311 + }, + { + "epoch": 0.22505346664960607, + "grad_norm": 7.437032540865966, + "learning_rate": 9.035201268465774e-06, + "loss": 17.7133, + "step": 12312 + }, + { + "epoch": 0.2250717458460526, + "grad_norm": 8.855246124244507, + "learning_rate": 9.035026467639172e-06, + "loss": 18.5506, + "step": 12313 + }, + { + "epoch": 0.22509002504249914, + "grad_norm": 7.074037704114128, + "learning_rate": 9.034851652670151e-06, + "loss": 17.9627, + "step": 12314 + }, + { + "epoch": 0.22510830423894565, + "grad_norm": 7.773871078629837, + "learning_rate": 9.034676823559326e-06, + "loss": 17.9938, + "step": 12315 + }, + { + "epoch": 0.22512658343539219, + "grad_norm": 7.41166943339688, + "learning_rate": 9.034501980307309e-06, + "loss": 17.877, + "step": 12316 + }, + { + "epoch": 0.2251448626318387, + "grad_norm": 7.703758649739981, + "learning_rate": 9.034327122914711e-06, + "loss": 17.9387, + "step": 12317 + }, + { + "epoch": 0.22516314182828523, + "grad_norm": 6.287787631773602, + "learning_rate": 9.034152251382148e-06, + "loss": 17.5248, + "step": 12318 + }, + { + "epoch": 0.22518142102473176, + "grad_norm": 5.991404866001102, + "learning_rate": 9.033977365710231e-06, + "loss": 17.175, + "step": 12319 + }, + { + "epoch": 0.22519970022117827, + "grad_norm": 6.737952129673305, + "learning_rate": 9.033802465899573e-06, + "loss": 17.4521, + "step": 12320 + }, + { + "epoch": 0.2252179794176248, + "grad_norm": 6.635142538134955, + "learning_rate": 9.033627551950788e-06, + "loss": 17.5758, + "step": 12321 + }, + { + "epoch": 0.22523625861407132, + "grad_norm": 6.187063928254991, + "learning_rate": 9.03345262386449e-06, + "loss": 17.3834, + "step": 12322 + }, + { + "epoch": 0.22525453781051785, + "grad_norm": 6.854516115810566, + "learning_rate": 9.033277681641288e-06, + "loss": 17.5332, + "step": 12323 + }, + { + "epoch": 0.2252728170069644, + "grad_norm": 7.3160421278758445, + "learning_rate": 9.033102725281799e-06, + "loss": 18.0384, + "step": 12324 + }, + { + "epoch": 0.2252910962034109, + "grad_norm": 5.726426405407629, + "learning_rate": 9.032927754786633e-06, + "loss": 17.1843, + "step": 12325 + }, + { + "epoch": 0.22530937539985743, + "grad_norm": 6.094053491340705, + "learning_rate": 9.032752770156408e-06, + "loss": 17.4087, + "step": 12326 + }, + { + "epoch": 0.22532765459630394, + "grad_norm": 7.677299805986886, + "learning_rate": 9.032577771391732e-06, + "loss": 18.114, + "step": 12327 + }, + { + "epoch": 0.22534593379275047, + "grad_norm": 6.226787280353511, + "learning_rate": 9.032402758493222e-06, + "loss": 17.5154, + "step": 12328 + }, + { + "epoch": 0.22536421298919698, + "grad_norm": 7.993772075133782, + "learning_rate": 9.032227731461492e-06, + "loss": 18.0567, + "step": 12329 + }, + { + "epoch": 0.22538249218564352, + "grad_norm": 6.667197143954941, + "learning_rate": 9.03205269029715e-06, + "loss": 17.7498, + "step": 12330 + }, + { + "epoch": 0.22540077138209005, + "grad_norm": 6.9567092826670915, + "learning_rate": 9.031877635000817e-06, + "loss": 17.7729, + "step": 12331 + }, + { + "epoch": 0.22541905057853656, + "grad_norm": 6.854502695810457, + "learning_rate": 9.0317025655731e-06, + "loss": 17.7204, + "step": 12332 + }, + { + "epoch": 0.2254373297749831, + "grad_norm": 6.71738812866057, + "learning_rate": 9.031527482014617e-06, + "loss": 17.5618, + "step": 12333 + }, + { + "epoch": 0.2254556089714296, + "grad_norm": 7.189306010353928, + "learning_rate": 9.031352384325977e-06, + "loss": 17.6072, + "step": 12334 + }, + { + "epoch": 0.22547388816787614, + "grad_norm": 7.855535097029646, + "learning_rate": 9.0311772725078e-06, + "loss": 17.5918, + "step": 12335 + }, + { + "epoch": 0.22549216736432268, + "grad_norm": 6.600393763396325, + "learning_rate": 9.031002146560697e-06, + "loss": 17.3361, + "step": 12336 + }, + { + "epoch": 0.22551044656076918, + "grad_norm": 6.748223412611591, + "learning_rate": 9.03082700648528e-06, + "loss": 17.6995, + "step": 12337 + }, + { + "epoch": 0.22552872575721572, + "grad_norm": 5.70658566896244, + "learning_rate": 9.030651852282164e-06, + "loss": 17.1565, + "step": 12338 + }, + { + "epoch": 0.22554700495366223, + "grad_norm": 6.576046686350633, + "learning_rate": 9.030476683951961e-06, + "loss": 17.6812, + "step": 12339 + }, + { + "epoch": 0.22556528415010876, + "grad_norm": 6.747470316806803, + "learning_rate": 9.03030150149529e-06, + "loss": 17.6554, + "step": 12340 + }, + { + "epoch": 0.2255835633465553, + "grad_norm": 6.812748807744925, + "learning_rate": 9.03012630491276e-06, + "loss": 17.8052, + "step": 12341 + }, + { + "epoch": 0.2256018425430018, + "grad_norm": 6.138326043650591, + "learning_rate": 9.029951094204988e-06, + "loss": 17.3363, + "step": 12342 + }, + { + "epoch": 0.22562012173944834, + "grad_norm": 6.25429209699059, + "learning_rate": 9.029775869372589e-06, + "loss": 17.4272, + "step": 12343 + }, + { + "epoch": 0.22563840093589485, + "grad_norm": 8.214937102579777, + "learning_rate": 9.029600630416171e-06, + "loss": 18.3377, + "step": 12344 + }, + { + "epoch": 0.2256566801323414, + "grad_norm": 6.270352054936354, + "learning_rate": 9.029425377336356e-06, + "loss": 17.568, + "step": 12345 + }, + { + "epoch": 0.2256749593287879, + "grad_norm": 5.901018463275873, + "learning_rate": 9.029250110133753e-06, + "loss": 17.266, + "step": 12346 + }, + { + "epoch": 0.22569323852523443, + "grad_norm": 6.701032995897633, + "learning_rate": 9.02907482880898e-06, + "loss": 18.0243, + "step": 12347 + }, + { + "epoch": 0.22571151772168097, + "grad_norm": 6.966690579019389, + "learning_rate": 9.028899533362645e-06, + "loss": 17.8246, + "step": 12348 + }, + { + "epoch": 0.22572979691812747, + "grad_norm": 7.11109379819678, + "learning_rate": 9.02872422379537e-06, + "loss": 17.9911, + "step": 12349 + }, + { + "epoch": 0.225748076114574, + "grad_norm": 7.267833352735575, + "learning_rate": 9.028548900107767e-06, + "loss": 17.9534, + "step": 12350 + }, + { + "epoch": 0.22576635531102052, + "grad_norm": 6.169634868324523, + "learning_rate": 9.028373562300448e-06, + "loss": 17.2552, + "step": 12351 + }, + { + "epoch": 0.22578463450746705, + "grad_norm": 6.5126285468693474, + "learning_rate": 9.02819821037403e-06, + "loss": 17.5073, + "step": 12352 + }, + { + "epoch": 0.2258029137039136, + "grad_norm": 7.21379651562743, + "learning_rate": 9.028022844329126e-06, + "loss": 17.9977, + "step": 12353 + }, + { + "epoch": 0.2258211929003601, + "grad_norm": 6.5548063039005315, + "learning_rate": 9.027847464166353e-06, + "loss": 17.8834, + "step": 12354 + }, + { + "epoch": 0.22583947209680663, + "grad_norm": 7.08098150183937, + "learning_rate": 9.027672069886322e-06, + "loss": 17.8187, + "step": 12355 + }, + { + "epoch": 0.22585775129325314, + "grad_norm": 7.038826231934636, + "learning_rate": 9.02749666148965e-06, + "loss": 17.9689, + "step": 12356 + }, + { + "epoch": 0.22587603048969968, + "grad_norm": 7.07682686405181, + "learning_rate": 9.027321238976954e-06, + "loss": 17.8265, + "step": 12357 + }, + { + "epoch": 0.2258943096861462, + "grad_norm": 7.530074738438792, + "learning_rate": 9.027145802348844e-06, + "loss": 17.7535, + "step": 12358 + }, + { + "epoch": 0.22591258888259272, + "grad_norm": 6.446825344866139, + "learning_rate": 9.02697035160594e-06, + "loss": 17.585, + "step": 12359 + }, + { + "epoch": 0.22593086807903925, + "grad_norm": 7.637084923790986, + "learning_rate": 9.026794886748853e-06, + "loss": 18.0022, + "step": 12360 + }, + { + "epoch": 0.22594914727548576, + "grad_norm": 6.987077479178613, + "learning_rate": 9.0266194077782e-06, + "loss": 17.7459, + "step": 12361 + }, + { + "epoch": 0.2259674264719323, + "grad_norm": 6.824769412214328, + "learning_rate": 9.026443914694594e-06, + "loss": 17.9237, + "step": 12362 + }, + { + "epoch": 0.2259857056683788, + "grad_norm": 6.348585215954245, + "learning_rate": 9.026268407498651e-06, + "loss": 17.421, + "step": 12363 + }, + { + "epoch": 0.22600398486482534, + "grad_norm": 6.102880320893344, + "learning_rate": 9.026092886190989e-06, + "loss": 17.4555, + "step": 12364 + }, + { + "epoch": 0.22602226406127188, + "grad_norm": 6.55237701023886, + "learning_rate": 9.02591735077222e-06, + "loss": 17.4844, + "step": 12365 + }, + { + "epoch": 0.22604054325771838, + "grad_norm": 6.144109160099827, + "learning_rate": 9.025741801242959e-06, + "loss": 17.6064, + "step": 12366 + }, + { + "epoch": 0.22605882245416492, + "grad_norm": 7.027676643650388, + "learning_rate": 9.025566237603822e-06, + "loss": 17.5832, + "step": 12367 + }, + { + "epoch": 0.22607710165061143, + "grad_norm": 6.0564480838712775, + "learning_rate": 9.025390659855426e-06, + "loss": 17.2484, + "step": 12368 + }, + { + "epoch": 0.22609538084705796, + "grad_norm": 6.531320813353044, + "learning_rate": 9.025215067998386e-06, + "loss": 17.6086, + "step": 12369 + }, + { + "epoch": 0.2261136600435045, + "grad_norm": 6.95048430465292, + "learning_rate": 9.025039462033314e-06, + "loss": 17.643, + "step": 12370 + }, + { + "epoch": 0.226131939239951, + "grad_norm": 7.44620805011939, + "learning_rate": 9.024863841960829e-06, + "loss": 17.6497, + "step": 12371 + }, + { + "epoch": 0.22615021843639754, + "grad_norm": 8.917362870062181, + "learning_rate": 9.024688207781547e-06, + "loss": 18.8994, + "step": 12372 + }, + { + "epoch": 0.22616849763284405, + "grad_norm": 7.280697209067718, + "learning_rate": 9.02451255949608e-06, + "loss": 17.5772, + "step": 12373 + }, + { + "epoch": 0.2261867768292906, + "grad_norm": 6.702853337006878, + "learning_rate": 9.024336897105045e-06, + "loss": 17.9045, + "step": 12374 + }, + { + "epoch": 0.22620505602573712, + "grad_norm": 6.547542319992636, + "learning_rate": 9.024161220609061e-06, + "loss": 17.392, + "step": 12375 + }, + { + "epoch": 0.22622333522218363, + "grad_norm": 6.929016110654037, + "learning_rate": 9.023985530008742e-06, + "loss": 17.8594, + "step": 12376 + }, + { + "epoch": 0.22624161441863017, + "grad_norm": 5.367073952459153, + "learning_rate": 9.023809825304698e-06, + "loss": 17.1462, + "step": 12377 + }, + { + "epoch": 0.22625989361507667, + "grad_norm": 7.988439728945888, + "learning_rate": 9.023634106497555e-06, + "loss": 17.9267, + "step": 12378 + }, + { + "epoch": 0.2262781728115232, + "grad_norm": 7.494605964344144, + "learning_rate": 9.02345837358792e-06, + "loss": 18.0292, + "step": 12379 + }, + { + "epoch": 0.22629645200796972, + "grad_norm": 5.4240483208814, + "learning_rate": 9.023282626576413e-06, + "loss": 17.0903, + "step": 12380 + }, + { + "epoch": 0.22631473120441625, + "grad_norm": 7.704682973963291, + "learning_rate": 9.02310686546365e-06, + "loss": 17.8397, + "step": 12381 + }, + { + "epoch": 0.2263330104008628, + "grad_norm": 6.395365992338035, + "learning_rate": 9.022931090250247e-06, + "loss": 17.623, + "step": 12382 + }, + { + "epoch": 0.2263512895973093, + "grad_norm": 6.859183186820666, + "learning_rate": 9.022755300936821e-06, + "loss": 17.7596, + "step": 12383 + }, + { + "epoch": 0.22636956879375583, + "grad_norm": 5.765596472141772, + "learning_rate": 9.022579497523985e-06, + "loss": 17.1133, + "step": 12384 + }, + { + "epoch": 0.22638784799020234, + "grad_norm": 7.140007933470052, + "learning_rate": 9.022403680012357e-06, + "loss": 17.7501, + "step": 12385 + }, + { + "epoch": 0.22640612718664888, + "grad_norm": 6.718543300536023, + "learning_rate": 9.022227848402552e-06, + "loss": 17.6706, + "step": 12386 + }, + { + "epoch": 0.2264244063830954, + "grad_norm": 5.924532017217724, + "learning_rate": 9.02205200269519e-06, + "loss": 17.33, + "step": 12387 + }, + { + "epoch": 0.22644268557954192, + "grad_norm": 8.222872391505957, + "learning_rate": 9.021876142890882e-06, + "loss": 18.6182, + "step": 12388 + }, + { + "epoch": 0.22646096477598845, + "grad_norm": 6.734366446496459, + "learning_rate": 9.02170026899025e-06, + "loss": 17.9106, + "step": 12389 + }, + { + "epoch": 0.22647924397243496, + "grad_norm": 6.8857777869063055, + "learning_rate": 9.021524380993906e-06, + "loss": 17.7422, + "step": 12390 + }, + { + "epoch": 0.2264975231688815, + "grad_norm": 7.125279116793899, + "learning_rate": 9.021348478902468e-06, + "loss": 17.674, + "step": 12391 + }, + { + "epoch": 0.22651580236532803, + "grad_norm": 7.436442051858977, + "learning_rate": 9.021172562716551e-06, + "loss": 17.9451, + "step": 12392 + }, + { + "epoch": 0.22653408156177454, + "grad_norm": 6.187236880627777, + "learning_rate": 9.020996632436775e-06, + "loss": 17.5915, + "step": 12393 + }, + { + "epoch": 0.22655236075822108, + "grad_norm": 5.903576888005673, + "learning_rate": 9.020820688063755e-06, + "loss": 17.2732, + "step": 12394 + }, + { + "epoch": 0.22657063995466759, + "grad_norm": 6.413378047315744, + "learning_rate": 9.020644729598107e-06, + "loss": 17.5233, + "step": 12395 + }, + { + "epoch": 0.22658891915111412, + "grad_norm": 6.3484233436312, + "learning_rate": 9.020468757040449e-06, + "loss": 17.7247, + "step": 12396 + }, + { + "epoch": 0.22660719834756063, + "grad_norm": 5.911804654071197, + "learning_rate": 9.020292770391394e-06, + "loss": 17.2241, + "step": 12397 + }, + { + "epoch": 0.22662547754400716, + "grad_norm": 6.724395222585753, + "learning_rate": 9.020116769651565e-06, + "loss": 17.6779, + "step": 12398 + }, + { + "epoch": 0.2266437567404537, + "grad_norm": 7.966132708749416, + "learning_rate": 9.019940754821574e-06, + "loss": 17.897, + "step": 12399 + }, + { + "epoch": 0.2266620359369002, + "grad_norm": 8.099072340263199, + "learning_rate": 9.01976472590204e-06, + "loss": 18.1186, + "step": 12400 + }, + { + "epoch": 0.22668031513334674, + "grad_norm": 5.802227930996529, + "learning_rate": 9.01958868289358e-06, + "loss": 17.2699, + "step": 12401 + }, + { + "epoch": 0.22669859432979325, + "grad_norm": 6.146892512705219, + "learning_rate": 9.019412625796808e-06, + "loss": 17.5141, + "step": 12402 + }, + { + "epoch": 0.2267168735262398, + "grad_norm": 6.308607104957777, + "learning_rate": 9.019236554612346e-06, + "loss": 17.3927, + "step": 12403 + }, + { + "epoch": 0.22673515272268632, + "grad_norm": 8.704926628000624, + "learning_rate": 9.019060469340807e-06, + "loss": 18.2199, + "step": 12404 + }, + { + "epoch": 0.22675343191913283, + "grad_norm": 6.7601731358840516, + "learning_rate": 9.01888436998281e-06, + "loss": 17.7247, + "step": 12405 + }, + { + "epoch": 0.22677171111557937, + "grad_norm": 7.0988439140350135, + "learning_rate": 9.018708256538972e-06, + "loss": 17.521, + "step": 12406 + }, + { + "epoch": 0.22678999031202587, + "grad_norm": 6.072413155833133, + "learning_rate": 9.018532129009912e-06, + "loss": 17.3915, + "step": 12407 + }, + { + "epoch": 0.2268082695084724, + "grad_norm": 7.826093537676457, + "learning_rate": 9.018355987396244e-06, + "loss": 17.8245, + "step": 12408 + }, + { + "epoch": 0.22682654870491895, + "grad_norm": 6.606313446617124, + "learning_rate": 9.018179831698588e-06, + "loss": 17.714, + "step": 12409 + }, + { + "epoch": 0.22684482790136545, + "grad_norm": 8.096086364572532, + "learning_rate": 9.01800366191756e-06, + "loss": 17.9681, + "step": 12410 + }, + { + "epoch": 0.226863107097812, + "grad_norm": 6.754049321361079, + "learning_rate": 9.017827478053778e-06, + "loss": 17.6225, + "step": 12411 + }, + { + "epoch": 0.2268813862942585, + "grad_norm": 5.726172916773858, + "learning_rate": 9.017651280107859e-06, + "loss": 17.231, + "step": 12412 + }, + { + "epoch": 0.22689966549070503, + "grad_norm": 7.080996932121469, + "learning_rate": 9.01747506808042e-06, + "loss": 17.7147, + "step": 12413 + }, + { + "epoch": 0.22691794468715154, + "grad_norm": 7.132248945901099, + "learning_rate": 9.017298841972082e-06, + "loss": 17.9743, + "step": 12414 + }, + { + "epoch": 0.22693622388359808, + "grad_norm": 5.890459474635145, + "learning_rate": 9.017122601783457e-06, + "loss": 17.2655, + "step": 12415 + }, + { + "epoch": 0.2269545030800446, + "grad_norm": 7.9925754588042395, + "learning_rate": 9.016946347515168e-06, + "loss": 17.7998, + "step": 12416 + }, + { + "epoch": 0.22697278227649112, + "grad_norm": 6.339927451707241, + "learning_rate": 9.016770079167829e-06, + "loss": 17.4274, + "step": 12417 + }, + { + "epoch": 0.22699106147293766, + "grad_norm": 6.3839981397111, + "learning_rate": 9.016593796742062e-06, + "loss": 17.5346, + "step": 12418 + }, + { + "epoch": 0.22700934066938416, + "grad_norm": 6.714476029999333, + "learning_rate": 9.01641750023848e-06, + "loss": 17.6582, + "step": 12419 + }, + { + "epoch": 0.2270276198658307, + "grad_norm": 8.259896179033946, + "learning_rate": 9.016241189657705e-06, + "loss": 18.2777, + "step": 12420 + }, + { + "epoch": 0.22704589906227723, + "grad_norm": 6.835530238088462, + "learning_rate": 9.01606486500035e-06, + "loss": 17.698, + "step": 12421 + }, + { + "epoch": 0.22706417825872374, + "grad_norm": 5.3640041829032, + "learning_rate": 9.015888526267039e-06, + "loss": 17.0343, + "step": 12422 + }, + { + "epoch": 0.22708245745517028, + "grad_norm": 6.968767675137546, + "learning_rate": 9.015712173458387e-06, + "loss": 17.7252, + "step": 12423 + }, + { + "epoch": 0.22710073665161679, + "grad_norm": 7.748829080671614, + "learning_rate": 9.01553580657501e-06, + "loss": 17.9642, + "step": 12424 + }, + { + "epoch": 0.22711901584806332, + "grad_norm": 6.653206515480309, + "learning_rate": 9.015359425617532e-06, + "loss": 17.6293, + "step": 12425 + }, + { + "epoch": 0.22713729504450986, + "grad_norm": 6.874471388442792, + "learning_rate": 9.015183030586565e-06, + "loss": 17.8499, + "step": 12426 + }, + { + "epoch": 0.22715557424095636, + "grad_norm": 6.867013574778568, + "learning_rate": 9.015006621482731e-06, + "loss": 17.6569, + "step": 12427 + }, + { + "epoch": 0.2271738534374029, + "grad_norm": 7.165359716422239, + "learning_rate": 9.014830198306648e-06, + "loss": 18.0228, + "step": 12428 + }, + { + "epoch": 0.2271921326338494, + "grad_norm": 7.550212146351529, + "learning_rate": 9.014653761058932e-06, + "loss": 17.9426, + "step": 12429 + }, + { + "epoch": 0.22721041183029594, + "grad_norm": 5.286133948250526, + "learning_rate": 9.014477309740203e-06, + "loss": 17.0602, + "step": 12430 + }, + { + "epoch": 0.22722869102674245, + "grad_norm": 7.310267374318483, + "learning_rate": 9.014300844351081e-06, + "loss": 17.6793, + "step": 12431 + }, + { + "epoch": 0.227246970223189, + "grad_norm": 6.601547178421724, + "learning_rate": 9.014124364892181e-06, + "loss": 17.8393, + "step": 12432 + }, + { + "epoch": 0.22726524941963552, + "grad_norm": 5.8132377945373905, + "learning_rate": 9.013947871364123e-06, + "loss": 17.1707, + "step": 12433 + }, + { + "epoch": 0.22728352861608203, + "grad_norm": 6.378790816808448, + "learning_rate": 9.013771363767527e-06, + "loss": 17.482, + "step": 12434 + }, + { + "epoch": 0.22730180781252857, + "grad_norm": 7.174213886845487, + "learning_rate": 9.013594842103012e-06, + "loss": 17.3914, + "step": 12435 + }, + { + "epoch": 0.22732008700897507, + "grad_norm": 7.168202336406966, + "learning_rate": 9.013418306371194e-06, + "loss": 17.7358, + "step": 12436 + }, + { + "epoch": 0.2273383662054216, + "grad_norm": 6.661420616944232, + "learning_rate": 9.013241756572692e-06, + "loss": 17.6339, + "step": 12437 + }, + { + "epoch": 0.22735664540186815, + "grad_norm": 7.098420385874402, + "learning_rate": 9.013065192708128e-06, + "loss": 17.7022, + "step": 12438 + }, + { + "epoch": 0.22737492459831465, + "grad_norm": 5.441501533007861, + "learning_rate": 9.01288861477812e-06, + "loss": 16.9966, + "step": 12439 + }, + { + "epoch": 0.2273932037947612, + "grad_norm": 7.681352001784799, + "learning_rate": 9.012712022783283e-06, + "loss": 17.913, + "step": 12440 + }, + { + "epoch": 0.2274114829912077, + "grad_norm": 8.949349796420888, + "learning_rate": 9.012535416724238e-06, + "loss": 18.6086, + "step": 12441 + }, + { + "epoch": 0.22742976218765423, + "grad_norm": 7.739705134628737, + "learning_rate": 9.012358796601605e-06, + "loss": 18.1888, + "step": 12442 + }, + { + "epoch": 0.22744804138410077, + "grad_norm": 7.583475393017301, + "learning_rate": 9.012182162416003e-06, + "loss": 17.5618, + "step": 12443 + }, + { + "epoch": 0.22746632058054728, + "grad_norm": 7.548928680040296, + "learning_rate": 9.012005514168052e-06, + "loss": 17.7478, + "step": 12444 + }, + { + "epoch": 0.2274845997769938, + "grad_norm": 7.358591726462837, + "learning_rate": 9.01182885185837e-06, + "loss": 17.736, + "step": 12445 + }, + { + "epoch": 0.22750287897344032, + "grad_norm": 5.2893363235863395, + "learning_rate": 9.011652175487574e-06, + "loss": 16.9848, + "step": 12446 + }, + { + "epoch": 0.22752115816988686, + "grad_norm": 6.65548419118315, + "learning_rate": 9.011475485056285e-06, + "loss": 17.7805, + "step": 12447 + }, + { + "epoch": 0.22753943736633336, + "grad_norm": 7.441514325461889, + "learning_rate": 9.011298780565124e-06, + "loss": 17.8744, + "step": 12448 + }, + { + "epoch": 0.2275577165627799, + "grad_norm": 6.741444606272991, + "learning_rate": 9.011122062014709e-06, + "loss": 17.8172, + "step": 12449 + }, + { + "epoch": 0.22757599575922643, + "grad_norm": 8.334131899863701, + "learning_rate": 9.010945329405658e-06, + "loss": 18.2828, + "step": 12450 + }, + { + "epoch": 0.22759427495567294, + "grad_norm": 7.322963341330085, + "learning_rate": 9.010768582738592e-06, + "loss": 17.596, + "step": 12451 + }, + { + "epoch": 0.22761255415211948, + "grad_norm": 6.027285555663227, + "learning_rate": 9.01059182201413e-06, + "loss": 17.4733, + "step": 12452 + }, + { + "epoch": 0.227630833348566, + "grad_norm": 5.202836623511983, + "learning_rate": 9.010415047232894e-06, + "loss": 17.0539, + "step": 12453 + }, + { + "epoch": 0.22764911254501252, + "grad_norm": 6.665075150072932, + "learning_rate": 9.010238258395498e-06, + "loss": 17.7745, + "step": 12454 + }, + { + "epoch": 0.22766739174145906, + "grad_norm": 8.423251777382154, + "learning_rate": 9.010061455502567e-06, + "loss": 18.1023, + "step": 12455 + }, + { + "epoch": 0.22768567093790557, + "grad_norm": 6.547059997079752, + "learning_rate": 9.009884638554718e-06, + "loss": 17.5038, + "step": 12456 + }, + { + "epoch": 0.2277039501343521, + "grad_norm": 5.767718578287258, + "learning_rate": 9.00970780755257e-06, + "loss": 17.2456, + "step": 12457 + }, + { + "epoch": 0.2277222293307986, + "grad_norm": 7.283933715684057, + "learning_rate": 9.009530962496746e-06, + "loss": 18.127, + "step": 12458 + }, + { + "epoch": 0.22774050852724514, + "grad_norm": 5.845148615638448, + "learning_rate": 9.009354103387864e-06, + "loss": 17.2741, + "step": 12459 + }, + { + "epoch": 0.22775878772369168, + "grad_norm": 7.51111630909009, + "learning_rate": 9.009177230226542e-06, + "loss": 18.3568, + "step": 12460 + }, + { + "epoch": 0.2277770669201382, + "grad_norm": 7.1047743057025095, + "learning_rate": 9.009000343013403e-06, + "loss": 17.9295, + "step": 12461 + }, + { + "epoch": 0.22779534611658472, + "grad_norm": 7.938009218971499, + "learning_rate": 9.008823441749067e-06, + "loss": 17.8752, + "step": 12462 + }, + { + "epoch": 0.22781362531303123, + "grad_norm": 7.037058314775159, + "learning_rate": 9.008646526434151e-06, + "loss": 17.6727, + "step": 12463 + }, + { + "epoch": 0.22783190450947777, + "grad_norm": 6.121077682756427, + "learning_rate": 9.008469597069276e-06, + "loss": 17.6428, + "step": 12464 + }, + { + "epoch": 0.22785018370592428, + "grad_norm": 6.593101398863978, + "learning_rate": 9.008292653655064e-06, + "loss": 17.5556, + "step": 12465 + }, + { + "epoch": 0.2278684629023708, + "grad_norm": 7.31502964129193, + "learning_rate": 9.008115696192133e-06, + "loss": 18.0551, + "step": 12466 + }, + { + "epoch": 0.22788674209881735, + "grad_norm": 6.146276626518445, + "learning_rate": 9.007938724681106e-06, + "loss": 17.4062, + "step": 12467 + }, + { + "epoch": 0.22790502129526385, + "grad_norm": 5.469743171696297, + "learning_rate": 9.0077617391226e-06, + "loss": 17.1875, + "step": 12468 + }, + { + "epoch": 0.2279233004917104, + "grad_norm": 7.813582794895173, + "learning_rate": 9.007584739517237e-06, + "loss": 18.0963, + "step": 12469 + }, + { + "epoch": 0.2279415796881569, + "grad_norm": 7.550237307960634, + "learning_rate": 9.007407725865638e-06, + "loss": 17.8713, + "step": 12470 + }, + { + "epoch": 0.22795985888460343, + "grad_norm": 6.20967047309663, + "learning_rate": 9.007230698168422e-06, + "loss": 17.3119, + "step": 12471 + }, + { + "epoch": 0.22797813808104997, + "grad_norm": 6.346006043995053, + "learning_rate": 9.007053656426213e-06, + "loss": 17.5279, + "step": 12472 + }, + { + "epoch": 0.22799641727749648, + "grad_norm": 6.260159084648033, + "learning_rate": 9.006876600639624e-06, + "loss": 17.5299, + "step": 12473 + }, + { + "epoch": 0.228014696473943, + "grad_norm": 6.323058036066158, + "learning_rate": 9.006699530809284e-06, + "loss": 17.4276, + "step": 12474 + }, + { + "epoch": 0.22803297567038952, + "grad_norm": 6.19932219235715, + "learning_rate": 9.006522446935807e-06, + "loss": 17.4824, + "step": 12475 + }, + { + "epoch": 0.22805125486683606, + "grad_norm": 7.865928942197825, + "learning_rate": 9.006345349019818e-06, + "loss": 18.0053, + "step": 12476 + }, + { + "epoch": 0.2280695340632826, + "grad_norm": 6.363254127694012, + "learning_rate": 9.006168237061936e-06, + "loss": 17.3546, + "step": 12477 + }, + { + "epoch": 0.2280878132597291, + "grad_norm": 5.913155834157607, + "learning_rate": 9.005991111062782e-06, + "loss": 17.4868, + "step": 12478 + }, + { + "epoch": 0.22810609245617564, + "grad_norm": 7.065934559973342, + "learning_rate": 9.005813971022977e-06, + "loss": 17.7804, + "step": 12479 + }, + { + "epoch": 0.22812437165262214, + "grad_norm": 6.476520955100705, + "learning_rate": 9.005636816943141e-06, + "loss": 17.4125, + "step": 12480 + }, + { + "epoch": 0.22814265084906868, + "grad_norm": 6.216250226614293, + "learning_rate": 9.005459648823897e-06, + "loss": 17.3022, + "step": 12481 + }, + { + "epoch": 0.2281609300455152, + "grad_norm": 6.116706968470408, + "learning_rate": 9.005282466665864e-06, + "loss": 17.5586, + "step": 12482 + }, + { + "epoch": 0.22817920924196172, + "grad_norm": 6.999228409098107, + "learning_rate": 9.005105270469663e-06, + "loss": 17.8334, + "step": 12483 + }, + { + "epoch": 0.22819748843840826, + "grad_norm": 7.125612598589452, + "learning_rate": 9.004928060235915e-06, + "loss": 17.7083, + "step": 12484 + }, + { + "epoch": 0.22821576763485477, + "grad_norm": 7.8696084480006085, + "learning_rate": 9.004750835965241e-06, + "loss": 18.3996, + "step": 12485 + }, + { + "epoch": 0.2282340468313013, + "grad_norm": 6.2676428832425, + "learning_rate": 9.004573597658265e-06, + "loss": 17.313, + "step": 12486 + }, + { + "epoch": 0.2282523260277478, + "grad_norm": 7.765047865395796, + "learning_rate": 9.004396345315604e-06, + "loss": 17.9367, + "step": 12487 + }, + { + "epoch": 0.22827060522419435, + "grad_norm": 6.9273134841788035, + "learning_rate": 9.004219078937883e-06, + "loss": 17.5964, + "step": 12488 + }, + { + "epoch": 0.22828888442064088, + "grad_norm": 6.450689977922569, + "learning_rate": 9.004041798525723e-06, + "loss": 17.374, + "step": 12489 + }, + { + "epoch": 0.2283071636170874, + "grad_norm": 5.876191355914733, + "learning_rate": 9.00386450407974e-06, + "loss": 17.3217, + "step": 12490 + }, + { + "epoch": 0.22832544281353392, + "grad_norm": 6.824802720193081, + "learning_rate": 9.003687195600561e-06, + "loss": 17.6096, + "step": 12491 + }, + { + "epoch": 0.22834372200998043, + "grad_norm": 6.326670893919381, + "learning_rate": 9.003509873088806e-06, + "loss": 17.4869, + "step": 12492 + }, + { + "epoch": 0.22836200120642697, + "grad_norm": 6.080616837358299, + "learning_rate": 9.003332536545097e-06, + "loss": 17.6232, + "step": 12493 + }, + { + "epoch": 0.2283802804028735, + "grad_norm": 6.481898583726981, + "learning_rate": 9.003155185970055e-06, + "loss": 17.6116, + "step": 12494 + }, + { + "epoch": 0.22839855959932, + "grad_norm": 7.2780683463881495, + "learning_rate": 9.0029778213643e-06, + "loss": 17.7783, + "step": 12495 + }, + { + "epoch": 0.22841683879576655, + "grad_norm": 7.187827518993483, + "learning_rate": 9.002800442728456e-06, + "loss": 17.9959, + "step": 12496 + }, + { + "epoch": 0.22843511799221305, + "grad_norm": 6.31378587333316, + "learning_rate": 9.002623050063144e-06, + "loss": 17.3304, + "step": 12497 + }, + { + "epoch": 0.2284533971886596, + "grad_norm": 6.371838918299203, + "learning_rate": 9.002445643368985e-06, + "loss": 17.5231, + "step": 12498 + }, + { + "epoch": 0.2284716763851061, + "grad_norm": 6.670629055001435, + "learning_rate": 9.002268222646602e-06, + "loss": 17.683, + "step": 12499 + }, + { + "epoch": 0.22848995558155263, + "grad_norm": 7.111535221045845, + "learning_rate": 9.002090787896616e-06, + "loss": 17.6803, + "step": 12500 + }, + { + "epoch": 0.22850823477799917, + "grad_norm": 6.907882759243566, + "learning_rate": 9.001913339119647e-06, + "loss": 17.8974, + "step": 12501 + }, + { + "epoch": 0.22852651397444568, + "grad_norm": 6.7099161801259966, + "learning_rate": 9.001735876316323e-06, + "loss": 17.4129, + "step": 12502 + }, + { + "epoch": 0.2285447931708922, + "grad_norm": 6.24639562168642, + "learning_rate": 9.001558399487257e-06, + "loss": 17.2618, + "step": 12503 + }, + { + "epoch": 0.22856307236733872, + "grad_norm": 6.191726897974517, + "learning_rate": 9.00138090863308e-06, + "loss": 17.3679, + "step": 12504 + }, + { + "epoch": 0.22858135156378526, + "grad_norm": 6.494394177223834, + "learning_rate": 9.00120340375441e-06, + "loss": 17.8027, + "step": 12505 + }, + { + "epoch": 0.2285996307602318, + "grad_norm": 6.973080184641169, + "learning_rate": 9.001025884851868e-06, + "loss": 17.6395, + "step": 12506 + }, + { + "epoch": 0.2286179099566783, + "grad_norm": 6.050184229241976, + "learning_rate": 9.000848351926077e-06, + "loss": 17.1681, + "step": 12507 + }, + { + "epoch": 0.22863618915312484, + "grad_norm": 6.583322560980222, + "learning_rate": 9.000670804977661e-06, + "loss": 17.5065, + "step": 12508 + }, + { + "epoch": 0.22865446834957134, + "grad_norm": 8.761571713223706, + "learning_rate": 9.00049324400724e-06, + "loss": 18.2826, + "step": 12509 + }, + { + "epoch": 0.22867274754601788, + "grad_norm": 10.445797901169518, + "learning_rate": 9.000315669015438e-06, + "loss": 18.0063, + "step": 12510 + }, + { + "epoch": 0.22869102674246441, + "grad_norm": 7.187926896437648, + "learning_rate": 9.000138080002876e-06, + "loss": 17.5907, + "step": 12511 + }, + { + "epoch": 0.22870930593891092, + "grad_norm": 8.741326794947055, + "learning_rate": 8.999960476970178e-06, + "loss": 18.0142, + "step": 12512 + }, + { + "epoch": 0.22872758513535746, + "grad_norm": 5.776068892469077, + "learning_rate": 8.999782859917966e-06, + "loss": 17.1766, + "step": 12513 + }, + { + "epoch": 0.22874586433180397, + "grad_norm": 6.171879540219162, + "learning_rate": 8.99960522884686e-06, + "loss": 17.3487, + "step": 12514 + }, + { + "epoch": 0.2287641435282505, + "grad_norm": 6.41565242675409, + "learning_rate": 8.999427583757487e-06, + "loss": 17.6119, + "step": 12515 + }, + { + "epoch": 0.228782422724697, + "grad_norm": 6.044914902041561, + "learning_rate": 8.999249924650467e-06, + "loss": 17.156, + "step": 12516 + }, + { + "epoch": 0.22880070192114355, + "grad_norm": 6.738196133000485, + "learning_rate": 8.999072251526422e-06, + "loss": 17.8699, + "step": 12517 + }, + { + "epoch": 0.22881898111759008, + "grad_norm": 6.419311646700771, + "learning_rate": 8.998894564385976e-06, + "loss": 17.4026, + "step": 12518 + }, + { + "epoch": 0.2288372603140366, + "grad_norm": 6.392433973015512, + "learning_rate": 8.998716863229753e-06, + "loss": 17.6232, + "step": 12519 + }, + { + "epoch": 0.22885553951048312, + "grad_norm": 5.8958085404852865, + "learning_rate": 8.998539148058371e-06, + "loss": 17.3042, + "step": 12520 + }, + { + "epoch": 0.22887381870692963, + "grad_norm": 6.74062692196872, + "learning_rate": 8.99836141887246e-06, + "loss": 17.7199, + "step": 12521 + }, + { + "epoch": 0.22889209790337617, + "grad_norm": 6.636453772326242, + "learning_rate": 8.998183675672639e-06, + "loss": 17.727, + "step": 12522 + }, + { + "epoch": 0.2289103770998227, + "grad_norm": 6.151624342410829, + "learning_rate": 8.998005918459529e-06, + "loss": 17.3302, + "step": 12523 + }, + { + "epoch": 0.2289286562962692, + "grad_norm": 7.524741787304753, + "learning_rate": 8.997828147233756e-06, + "loss": 18.1206, + "step": 12524 + }, + { + "epoch": 0.22894693549271575, + "grad_norm": 7.0537688901822415, + "learning_rate": 8.997650361995942e-06, + "loss": 17.8464, + "step": 12525 + }, + { + "epoch": 0.22896521468916226, + "grad_norm": 6.47024756749618, + "learning_rate": 8.997472562746711e-06, + "loss": 17.3939, + "step": 12526 + }, + { + "epoch": 0.2289834938856088, + "grad_norm": 7.684796251635377, + "learning_rate": 8.997294749486685e-06, + "loss": 18.0094, + "step": 12527 + }, + { + "epoch": 0.22900177308205533, + "grad_norm": 7.475698948068777, + "learning_rate": 8.997116922216487e-06, + "loss": 17.8596, + "step": 12528 + }, + { + "epoch": 0.22902005227850183, + "grad_norm": 7.102665538397156, + "learning_rate": 8.996939080936743e-06, + "loss": 17.4937, + "step": 12529 + }, + { + "epoch": 0.22903833147494837, + "grad_norm": 6.416082285582877, + "learning_rate": 8.996761225648074e-06, + "loss": 17.3821, + "step": 12530 + }, + { + "epoch": 0.22905661067139488, + "grad_norm": 6.281991184229925, + "learning_rate": 8.996583356351103e-06, + "loss": 17.4101, + "step": 12531 + }, + { + "epoch": 0.2290748898678414, + "grad_norm": 5.685979659910896, + "learning_rate": 8.996405473046455e-06, + "loss": 17.2703, + "step": 12532 + }, + { + "epoch": 0.22909316906428792, + "grad_norm": 7.809788721353094, + "learning_rate": 8.996227575734751e-06, + "loss": 17.7103, + "step": 12533 + }, + { + "epoch": 0.22911144826073446, + "grad_norm": 5.807257328034285, + "learning_rate": 8.996049664416617e-06, + "loss": 17.0531, + "step": 12534 + }, + { + "epoch": 0.229129727457181, + "grad_norm": 6.141165254363331, + "learning_rate": 8.995871739092676e-06, + "loss": 17.3797, + "step": 12535 + }, + { + "epoch": 0.2291480066536275, + "grad_norm": 6.562547748460574, + "learning_rate": 8.99569379976355e-06, + "loss": 17.626, + "step": 12536 + }, + { + "epoch": 0.22916628585007404, + "grad_norm": 6.177743602828855, + "learning_rate": 8.995515846429865e-06, + "loss": 17.4222, + "step": 12537 + }, + { + "epoch": 0.22918456504652054, + "grad_norm": 6.947711500882194, + "learning_rate": 8.995337879092244e-06, + "loss": 17.5877, + "step": 12538 + }, + { + "epoch": 0.22920284424296708, + "grad_norm": 6.481432353025655, + "learning_rate": 8.995159897751311e-06, + "loss": 17.5561, + "step": 12539 + }, + { + "epoch": 0.22922112343941362, + "grad_norm": 6.853642406435706, + "learning_rate": 8.994981902407688e-06, + "loss": 17.5894, + "step": 12540 + }, + { + "epoch": 0.22923940263586012, + "grad_norm": 6.96098333316694, + "learning_rate": 8.994803893062e-06, + "loss": 17.7678, + "step": 12541 + }, + { + "epoch": 0.22925768183230666, + "grad_norm": 7.911748352299507, + "learning_rate": 8.994625869714872e-06, + "loss": 17.9697, + "step": 12542 + }, + { + "epoch": 0.22927596102875317, + "grad_norm": 8.921371968463037, + "learning_rate": 8.994447832366926e-06, + "loss": 18.1782, + "step": 12543 + }, + { + "epoch": 0.2292942402251997, + "grad_norm": 7.279355339017593, + "learning_rate": 8.994269781018787e-06, + "loss": 17.3983, + "step": 12544 + }, + { + "epoch": 0.22931251942164624, + "grad_norm": 7.475994898745621, + "learning_rate": 8.994091715671081e-06, + "loss": 18.0412, + "step": 12545 + }, + { + "epoch": 0.22933079861809275, + "grad_norm": 8.479527737811965, + "learning_rate": 8.993913636324427e-06, + "loss": 18.1023, + "step": 12546 + }, + { + "epoch": 0.22934907781453928, + "grad_norm": 5.878535855809932, + "learning_rate": 8.993735542979453e-06, + "loss": 17.0986, + "step": 12547 + }, + { + "epoch": 0.2293673570109858, + "grad_norm": 5.2999987002435, + "learning_rate": 8.993557435636784e-06, + "loss": 16.8318, + "step": 12548 + }, + { + "epoch": 0.22938563620743233, + "grad_norm": 8.142359630907281, + "learning_rate": 8.993379314297042e-06, + "loss": 18.0559, + "step": 12549 + }, + { + "epoch": 0.22940391540387883, + "grad_norm": 5.272730622394096, + "learning_rate": 8.993201178960853e-06, + "loss": 17.0443, + "step": 12550 + }, + { + "epoch": 0.22942219460032537, + "grad_norm": 5.915003041650118, + "learning_rate": 8.99302302962884e-06, + "loss": 17.2106, + "step": 12551 + }, + { + "epoch": 0.2294404737967719, + "grad_norm": 5.603933753552654, + "learning_rate": 8.992844866301627e-06, + "loss": 17.2869, + "step": 12552 + }, + { + "epoch": 0.2294587529932184, + "grad_norm": 6.60340195412723, + "learning_rate": 8.992666688979838e-06, + "loss": 17.4153, + "step": 12553 + }, + { + "epoch": 0.22947703218966495, + "grad_norm": 7.213417414091133, + "learning_rate": 8.992488497664101e-06, + "loss": 17.777, + "step": 12554 + }, + { + "epoch": 0.22949531138611146, + "grad_norm": 7.264297281644561, + "learning_rate": 8.992310292355037e-06, + "loss": 17.8815, + "step": 12555 + }, + { + "epoch": 0.229513590582558, + "grad_norm": 7.399917509915868, + "learning_rate": 8.992132073053272e-06, + "loss": 18.0332, + "step": 12556 + }, + { + "epoch": 0.22953186977900453, + "grad_norm": 6.5106449740748715, + "learning_rate": 8.991953839759432e-06, + "loss": 17.5683, + "step": 12557 + }, + { + "epoch": 0.22955014897545103, + "grad_norm": 6.162216353065298, + "learning_rate": 8.99177559247414e-06, + "loss": 17.4597, + "step": 12558 + }, + { + "epoch": 0.22956842817189757, + "grad_norm": 6.409297331701278, + "learning_rate": 8.991597331198018e-06, + "loss": 17.5378, + "step": 12559 + }, + { + "epoch": 0.22958670736834408, + "grad_norm": 6.79721675397959, + "learning_rate": 8.991419055931697e-06, + "loss": 17.4977, + "step": 12560 + }, + { + "epoch": 0.22960498656479061, + "grad_norm": 5.680684048615769, + "learning_rate": 8.991240766675798e-06, + "loss": 17.1447, + "step": 12561 + }, + { + "epoch": 0.22962326576123715, + "grad_norm": 7.109729819431134, + "learning_rate": 8.991062463430943e-06, + "loss": 17.5235, + "step": 12562 + }, + { + "epoch": 0.22964154495768366, + "grad_norm": 6.663936086487505, + "learning_rate": 8.990884146197765e-06, + "loss": 17.5464, + "step": 12563 + }, + { + "epoch": 0.2296598241541302, + "grad_norm": 7.0581609461212835, + "learning_rate": 8.990705814976883e-06, + "loss": 17.9784, + "step": 12564 + }, + { + "epoch": 0.2296781033505767, + "grad_norm": 6.513759642232706, + "learning_rate": 8.990527469768921e-06, + "loss": 17.4535, + "step": 12565 + }, + { + "epoch": 0.22969638254702324, + "grad_norm": 7.505513390026089, + "learning_rate": 8.99034911057451e-06, + "loss": 17.7397, + "step": 12566 + }, + { + "epoch": 0.22971466174346974, + "grad_norm": 6.137401951152139, + "learning_rate": 8.99017073739427e-06, + "loss": 17.4381, + "step": 12567 + }, + { + "epoch": 0.22973294093991628, + "grad_norm": 6.098398579223316, + "learning_rate": 8.989992350228827e-06, + "loss": 17.3569, + "step": 12568 + }, + { + "epoch": 0.22975122013636282, + "grad_norm": 5.639631094059768, + "learning_rate": 8.989813949078808e-06, + "loss": 17.3075, + "step": 12569 + }, + { + "epoch": 0.22976949933280932, + "grad_norm": 6.526841461697767, + "learning_rate": 8.989635533944837e-06, + "loss": 17.5349, + "step": 12570 + }, + { + "epoch": 0.22978777852925586, + "grad_norm": 7.68988900944073, + "learning_rate": 8.98945710482754e-06, + "loss": 18.0816, + "step": 12571 + }, + { + "epoch": 0.22980605772570237, + "grad_norm": 5.977019301105379, + "learning_rate": 8.989278661727541e-06, + "loss": 17.3921, + "step": 12572 + }, + { + "epoch": 0.2298243369221489, + "grad_norm": 6.478667226965484, + "learning_rate": 8.989100204645469e-06, + "loss": 17.3424, + "step": 12573 + }, + { + "epoch": 0.22984261611859544, + "grad_norm": 7.289630488656758, + "learning_rate": 8.988921733581944e-06, + "loss": 17.7558, + "step": 12574 + }, + { + "epoch": 0.22986089531504195, + "grad_norm": 7.0332537951974805, + "learning_rate": 8.988743248537597e-06, + "loss": 17.9819, + "step": 12575 + }, + { + "epoch": 0.22987917451148848, + "grad_norm": 7.884691853844178, + "learning_rate": 8.988564749513048e-06, + "loss": 17.991, + "step": 12576 + }, + { + "epoch": 0.229897453707935, + "grad_norm": 6.5376790791049375, + "learning_rate": 8.988386236508928e-06, + "loss": 17.553, + "step": 12577 + }, + { + "epoch": 0.22991573290438153, + "grad_norm": 7.066783121330573, + "learning_rate": 8.98820770952586e-06, + "loss": 17.7031, + "step": 12578 + }, + { + "epoch": 0.22993401210082806, + "grad_norm": 7.013659824042925, + "learning_rate": 8.988029168564471e-06, + "loss": 17.97, + "step": 12579 + }, + { + "epoch": 0.22995229129727457, + "grad_norm": 6.74694556488098, + "learning_rate": 8.987850613625384e-06, + "loss": 17.5762, + "step": 12580 + }, + { + "epoch": 0.2299705704937211, + "grad_norm": 6.3388721790425455, + "learning_rate": 8.987672044709228e-06, + "loss": 17.5761, + "step": 12581 + }, + { + "epoch": 0.2299888496901676, + "grad_norm": 7.183547300713726, + "learning_rate": 8.987493461816626e-06, + "loss": 17.6874, + "step": 12582 + }, + { + "epoch": 0.23000712888661415, + "grad_norm": 6.9699117910774175, + "learning_rate": 8.987314864948207e-06, + "loss": 17.8068, + "step": 12583 + }, + { + "epoch": 0.23002540808306066, + "grad_norm": 5.608276462681504, + "learning_rate": 8.987136254104594e-06, + "loss": 17.2474, + "step": 12584 + }, + { + "epoch": 0.2300436872795072, + "grad_norm": 6.677230612371447, + "learning_rate": 8.986957629286416e-06, + "loss": 17.5887, + "step": 12585 + }, + { + "epoch": 0.23006196647595373, + "grad_norm": 6.837957475119126, + "learning_rate": 8.986778990494296e-06, + "loss": 17.5032, + "step": 12586 + }, + { + "epoch": 0.23008024567240024, + "grad_norm": 7.362353618173081, + "learning_rate": 8.986600337728863e-06, + "loss": 17.7967, + "step": 12587 + }, + { + "epoch": 0.23009852486884677, + "grad_norm": 7.822005971236524, + "learning_rate": 8.98642167099074e-06, + "loss": 17.9193, + "step": 12588 + }, + { + "epoch": 0.23011680406529328, + "grad_norm": 7.1110178575429694, + "learning_rate": 8.986242990280556e-06, + "loss": 17.1459, + "step": 12589 + }, + { + "epoch": 0.23013508326173981, + "grad_norm": 7.040891386515305, + "learning_rate": 8.986064295598937e-06, + "loss": 17.6653, + "step": 12590 + }, + { + "epoch": 0.23015336245818635, + "grad_norm": 5.6072354844521906, + "learning_rate": 8.985885586946507e-06, + "loss": 16.9168, + "step": 12591 + }, + { + "epoch": 0.23017164165463286, + "grad_norm": 6.016958462413485, + "learning_rate": 8.985706864323896e-06, + "loss": 17.1112, + "step": 12592 + }, + { + "epoch": 0.2301899208510794, + "grad_norm": 7.227371297609713, + "learning_rate": 8.985528127731727e-06, + "loss": 17.474, + "step": 12593 + }, + { + "epoch": 0.2302082000475259, + "grad_norm": 6.279187554419717, + "learning_rate": 8.985349377170626e-06, + "loss": 17.4538, + "step": 12594 + }, + { + "epoch": 0.23022647924397244, + "grad_norm": 6.718872122911122, + "learning_rate": 8.985170612641222e-06, + "loss": 17.6669, + "step": 12595 + }, + { + "epoch": 0.23024475844041897, + "grad_norm": 6.508252219883713, + "learning_rate": 8.984991834144143e-06, + "loss": 17.583, + "step": 12596 + }, + { + "epoch": 0.23026303763686548, + "grad_norm": 5.965840370153033, + "learning_rate": 8.984813041680013e-06, + "loss": 17.1702, + "step": 12597 + }, + { + "epoch": 0.23028131683331202, + "grad_norm": 6.235020004113385, + "learning_rate": 8.984634235249457e-06, + "loss": 17.2796, + "step": 12598 + }, + { + "epoch": 0.23029959602975852, + "grad_norm": 7.665610962559877, + "learning_rate": 8.984455414853106e-06, + "loss": 18.0396, + "step": 12599 + }, + { + "epoch": 0.23031787522620506, + "grad_norm": 6.81995482604741, + "learning_rate": 8.984276580491585e-06, + "loss": 17.6362, + "step": 12600 + }, + { + "epoch": 0.23033615442265157, + "grad_norm": 7.102639361588913, + "learning_rate": 8.984097732165518e-06, + "loss": 17.8958, + "step": 12601 + }, + { + "epoch": 0.2303544336190981, + "grad_norm": 5.969602264257139, + "learning_rate": 8.983918869875535e-06, + "loss": 17.3347, + "step": 12602 + }, + { + "epoch": 0.23037271281554464, + "grad_norm": 7.001558828077685, + "learning_rate": 8.983739993622262e-06, + "loss": 17.5424, + "step": 12603 + }, + { + "epoch": 0.23039099201199115, + "grad_norm": 6.195951514051619, + "learning_rate": 8.983561103406326e-06, + "loss": 17.281, + "step": 12604 + }, + { + "epoch": 0.23040927120843768, + "grad_norm": 6.406386080044461, + "learning_rate": 8.983382199228355e-06, + "loss": 17.5129, + "step": 12605 + }, + { + "epoch": 0.2304275504048842, + "grad_norm": 6.80794578612935, + "learning_rate": 8.983203281088972e-06, + "loss": 17.8294, + "step": 12606 + }, + { + "epoch": 0.23044582960133073, + "grad_norm": 7.146491191474552, + "learning_rate": 8.983024348988812e-06, + "loss": 17.8156, + "step": 12607 + }, + { + "epoch": 0.23046410879777726, + "grad_norm": 6.658368575968671, + "learning_rate": 8.982845402928492e-06, + "loss": 17.831, + "step": 12608 + }, + { + "epoch": 0.23048238799422377, + "grad_norm": 7.188384913391123, + "learning_rate": 8.982666442908647e-06, + "loss": 17.928, + "step": 12609 + }, + { + "epoch": 0.2305006671906703, + "grad_norm": 7.181366336026206, + "learning_rate": 8.982487468929903e-06, + "loss": 17.5838, + "step": 12610 + }, + { + "epoch": 0.2305189463871168, + "grad_norm": 6.113867339288262, + "learning_rate": 8.982308480992886e-06, + "loss": 17.1665, + "step": 12611 + }, + { + "epoch": 0.23053722558356335, + "grad_norm": 6.6525692485777075, + "learning_rate": 8.982129479098221e-06, + "loss": 17.2119, + "step": 12612 + }, + { + "epoch": 0.23055550478000988, + "grad_norm": 6.1259140723075545, + "learning_rate": 8.981950463246538e-06, + "loss": 17.3334, + "step": 12613 + }, + { + "epoch": 0.2305737839764564, + "grad_norm": 6.034860566559422, + "learning_rate": 8.981771433438467e-06, + "loss": 17.1934, + "step": 12614 + }, + { + "epoch": 0.23059206317290293, + "grad_norm": 6.124692958782826, + "learning_rate": 8.98159238967463e-06, + "loss": 17.2687, + "step": 12615 + }, + { + "epoch": 0.23061034236934944, + "grad_norm": 6.116606685379102, + "learning_rate": 8.981413331955657e-06, + "loss": 17.4827, + "step": 12616 + }, + { + "epoch": 0.23062862156579597, + "grad_norm": 6.836158558654621, + "learning_rate": 8.981234260282177e-06, + "loss": 17.7202, + "step": 12617 + }, + { + "epoch": 0.23064690076224248, + "grad_norm": 6.262228607960215, + "learning_rate": 8.981055174654815e-06, + "loss": 17.3742, + "step": 12618 + }, + { + "epoch": 0.23066517995868902, + "grad_norm": 6.4218464165121825, + "learning_rate": 8.980876075074202e-06, + "loss": 17.4499, + "step": 12619 + }, + { + "epoch": 0.23068345915513555, + "grad_norm": 6.699852881167036, + "learning_rate": 8.980696961540964e-06, + "loss": 17.7633, + "step": 12620 + }, + { + "epoch": 0.23070173835158206, + "grad_norm": 6.044320608620665, + "learning_rate": 8.980517834055728e-06, + "loss": 17.2964, + "step": 12621 + }, + { + "epoch": 0.2307200175480286, + "grad_norm": 8.171391327262505, + "learning_rate": 8.980338692619122e-06, + "loss": 17.8528, + "step": 12622 + }, + { + "epoch": 0.2307382967444751, + "grad_norm": 6.700840355458898, + "learning_rate": 8.980159537231774e-06, + "loss": 17.5962, + "step": 12623 + }, + { + "epoch": 0.23075657594092164, + "grad_norm": 7.140536927571538, + "learning_rate": 8.979980367894313e-06, + "loss": 17.5118, + "step": 12624 + }, + { + "epoch": 0.23077485513736817, + "grad_norm": 6.444970035759333, + "learning_rate": 8.979801184607364e-06, + "loss": 17.3046, + "step": 12625 + }, + { + "epoch": 0.23079313433381468, + "grad_norm": 5.836426830078904, + "learning_rate": 8.97962198737156e-06, + "loss": 17.4554, + "step": 12626 + }, + { + "epoch": 0.23081141353026122, + "grad_norm": 6.501548222905501, + "learning_rate": 8.979442776187524e-06, + "loss": 17.4258, + "step": 12627 + }, + { + "epoch": 0.23082969272670772, + "grad_norm": 8.510282767251702, + "learning_rate": 8.979263551055887e-06, + "loss": 18.5991, + "step": 12628 + }, + { + "epoch": 0.23084797192315426, + "grad_norm": 6.544339383939356, + "learning_rate": 8.979084311977277e-06, + "loss": 17.2998, + "step": 12629 + }, + { + "epoch": 0.2308662511196008, + "grad_norm": 4.810441782297864, + "learning_rate": 8.978905058952323e-06, + "loss": 16.7762, + "step": 12630 + }, + { + "epoch": 0.2308845303160473, + "grad_norm": 6.753057630563337, + "learning_rate": 8.978725791981651e-06, + "loss": 17.7448, + "step": 12631 + }, + { + "epoch": 0.23090280951249384, + "grad_norm": 6.878295922185571, + "learning_rate": 8.978546511065889e-06, + "loss": 17.8306, + "step": 12632 + }, + { + "epoch": 0.23092108870894035, + "grad_norm": 6.601055261394896, + "learning_rate": 8.978367216205668e-06, + "loss": 17.6226, + "step": 12633 + }, + { + "epoch": 0.23093936790538688, + "grad_norm": 6.96294244153231, + "learning_rate": 8.978187907401615e-06, + "loss": 17.5169, + "step": 12634 + }, + { + "epoch": 0.2309576471018334, + "grad_norm": 7.629841294699073, + "learning_rate": 8.97800858465436e-06, + "loss": 17.7823, + "step": 12635 + }, + { + "epoch": 0.23097592629827993, + "grad_norm": 6.490291733731224, + "learning_rate": 8.977829247964526e-06, + "loss": 17.6466, + "step": 12636 + }, + { + "epoch": 0.23099420549472646, + "grad_norm": 5.933055992843987, + "learning_rate": 8.97764989733275e-06, + "loss": 17.1796, + "step": 12637 + }, + { + "epoch": 0.23101248469117297, + "grad_norm": 6.914293204747319, + "learning_rate": 8.977470532759654e-06, + "loss": 17.6409, + "step": 12638 + }, + { + "epoch": 0.2310307638876195, + "grad_norm": 7.44226165891364, + "learning_rate": 8.97729115424587e-06, + "loss": 18.0814, + "step": 12639 + }, + { + "epoch": 0.231049043084066, + "grad_norm": 5.928863316211935, + "learning_rate": 8.977111761792026e-06, + "loss": 17.2507, + "step": 12640 + }, + { + "epoch": 0.23106732228051255, + "grad_norm": 6.736219046219845, + "learning_rate": 8.97693235539875e-06, + "loss": 17.6348, + "step": 12641 + }, + { + "epoch": 0.23108560147695908, + "grad_norm": 6.705627412403998, + "learning_rate": 8.976752935066671e-06, + "loss": 17.7984, + "step": 12642 + }, + { + "epoch": 0.2311038806734056, + "grad_norm": 6.887482315364359, + "learning_rate": 8.976573500796417e-06, + "loss": 17.8859, + "step": 12643 + }, + { + "epoch": 0.23112215986985213, + "grad_norm": 6.418883513819873, + "learning_rate": 8.97639405258862e-06, + "loss": 17.6612, + "step": 12644 + }, + { + "epoch": 0.23114043906629864, + "grad_norm": 6.931046768284051, + "learning_rate": 8.976214590443905e-06, + "loss": 17.7548, + "step": 12645 + }, + { + "epoch": 0.23115871826274517, + "grad_norm": 6.651831068192549, + "learning_rate": 8.976035114362903e-06, + "loss": 17.7041, + "step": 12646 + }, + { + "epoch": 0.2311769974591917, + "grad_norm": 7.906315986151466, + "learning_rate": 8.975855624346244e-06, + "loss": 17.8006, + "step": 12647 + }, + { + "epoch": 0.23119527665563822, + "grad_norm": 7.181775795765101, + "learning_rate": 8.975676120394555e-06, + "loss": 17.8929, + "step": 12648 + }, + { + "epoch": 0.23121355585208475, + "grad_norm": 6.2908296422667185, + "learning_rate": 8.975496602508467e-06, + "loss": 17.3297, + "step": 12649 + }, + { + "epoch": 0.23123183504853126, + "grad_norm": 7.118094861537474, + "learning_rate": 8.975317070688608e-06, + "loss": 17.6819, + "step": 12650 + }, + { + "epoch": 0.2312501142449778, + "grad_norm": 7.577308961162696, + "learning_rate": 8.975137524935609e-06, + "loss": 17.8884, + "step": 12651 + }, + { + "epoch": 0.2312683934414243, + "grad_norm": 6.652621699368609, + "learning_rate": 8.974957965250097e-06, + "loss": 17.7604, + "step": 12652 + }, + { + "epoch": 0.23128667263787084, + "grad_norm": 6.353048449739577, + "learning_rate": 8.9747783916327e-06, + "loss": 17.4359, + "step": 12653 + }, + { + "epoch": 0.23130495183431737, + "grad_norm": 6.765843695500798, + "learning_rate": 8.974598804084052e-06, + "loss": 17.5919, + "step": 12654 + }, + { + "epoch": 0.23132323103076388, + "grad_norm": 8.23210520293349, + "learning_rate": 8.97441920260478e-06, + "loss": 18.1861, + "step": 12655 + }, + { + "epoch": 0.23134151022721042, + "grad_norm": 5.642530187166705, + "learning_rate": 8.974239587195514e-06, + "loss": 17.0812, + "step": 12656 + }, + { + "epoch": 0.23135978942365693, + "grad_norm": 5.990051907955578, + "learning_rate": 8.974059957856882e-06, + "loss": 17.1461, + "step": 12657 + }, + { + "epoch": 0.23137806862010346, + "grad_norm": 7.530881392920648, + "learning_rate": 8.973880314589516e-06, + "loss": 17.6615, + "step": 12658 + }, + { + "epoch": 0.23139634781655, + "grad_norm": 6.605208950411215, + "learning_rate": 8.973700657394043e-06, + "loss": 17.5248, + "step": 12659 + }, + { + "epoch": 0.2314146270129965, + "grad_norm": 7.059490635933132, + "learning_rate": 8.973520986271094e-06, + "loss": 17.6861, + "step": 12660 + }, + { + "epoch": 0.23143290620944304, + "grad_norm": 6.815923729239079, + "learning_rate": 8.9733413012213e-06, + "loss": 17.4846, + "step": 12661 + }, + { + "epoch": 0.23145118540588955, + "grad_norm": 6.614975434214911, + "learning_rate": 8.973161602245288e-06, + "loss": 17.7974, + "step": 12662 + }, + { + "epoch": 0.23146946460233608, + "grad_norm": 6.350078515902742, + "learning_rate": 8.97298188934369e-06, + "loss": 17.5161, + "step": 12663 + }, + { + "epoch": 0.23148774379878262, + "grad_norm": 6.253468614428047, + "learning_rate": 8.972802162517136e-06, + "loss": 17.8805, + "step": 12664 + }, + { + "epoch": 0.23150602299522913, + "grad_norm": 6.4332769519713775, + "learning_rate": 8.972622421766254e-06, + "loss": 17.5103, + "step": 12665 + }, + { + "epoch": 0.23152430219167566, + "grad_norm": 7.082491712673213, + "learning_rate": 8.972442667091676e-06, + "loss": 17.6118, + "step": 12666 + }, + { + "epoch": 0.23154258138812217, + "grad_norm": 5.952803943455232, + "learning_rate": 8.97226289849403e-06, + "loss": 17.4437, + "step": 12667 + }, + { + "epoch": 0.2315608605845687, + "grad_norm": 5.789099746715334, + "learning_rate": 8.972083115973949e-06, + "loss": 17.1918, + "step": 12668 + }, + { + "epoch": 0.23157913978101521, + "grad_norm": 7.577675757610278, + "learning_rate": 8.97190331953206e-06, + "loss": 17.6084, + "step": 12669 + }, + { + "epoch": 0.23159741897746175, + "grad_norm": 5.860040349956942, + "learning_rate": 8.971723509168996e-06, + "loss": 17.5445, + "step": 12670 + }, + { + "epoch": 0.23161569817390829, + "grad_norm": 6.469267679013469, + "learning_rate": 8.971543684885384e-06, + "loss": 17.7468, + "step": 12671 + }, + { + "epoch": 0.2316339773703548, + "grad_norm": 5.457767309634818, + "learning_rate": 8.971363846681858e-06, + "loss": 17.0538, + "step": 12672 + }, + { + "epoch": 0.23165225656680133, + "grad_norm": 7.300162018109881, + "learning_rate": 8.971183994559046e-06, + "loss": 17.9823, + "step": 12673 + }, + { + "epoch": 0.23167053576324784, + "grad_norm": 6.09247248393594, + "learning_rate": 8.971004128517577e-06, + "loss": 17.6069, + "step": 12674 + }, + { + "epoch": 0.23168881495969437, + "grad_norm": 7.876931662668772, + "learning_rate": 8.970824248558083e-06, + "loss": 17.938, + "step": 12675 + }, + { + "epoch": 0.2317070941561409, + "grad_norm": 5.714717702916206, + "learning_rate": 8.970644354681196e-06, + "loss": 17.1884, + "step": 12676 + }, + { + "epoch": 0.23172537335258742, + "grad_norm": 6.139455239961285, + "learning_rate": 8.970464446887544e-06, + "loss": 17.1756, + "step": 12677 + }, + { + "epoch": 0.23174365254903395, + "grad_norm": 6.4457883195426415, + "learning_rate": 8.97028452517776e-06, + "loss": 17.4024, + "step": 12678 + }, + { + "epoch": 0.23176193174548046, + "grad_norm": 10.106626906983957, + "learning_rate": 8.970104589552472e-06, + "loss": 19.3147, + "step": 12679 + }, + { + "epoch": 0.231780210941927, + "grad_norm": 7.817700354434268, + "learning_rate": 8.969924640012312e-06, + "loss": 18.1592, + "step": 12680 + }, + { + "epoch": 0.23179849013837353, + "grad_norm": 7.803055270135185, + "learning_rate": 8.969744676557912e-06, + "loss": 18.1815, + "step": 12681 + }, + { + "epoch": 0.23181676933482004, + "grad_norm": 8.738677894192076, + "learning_rate": 8.9695646991899e-06, + "loss": 18.6096, + "step": 12682 + }, + { + "epoch": 0.23183504853126657, + "grad_norm": 8.074642631159707, + "learning_rate": 8.96938470790891e-06, + "loss": 17.9484, + "step": 12683 + }, + { + "epoch": 0.23185332772771308, + "grad_norm": 6.445153140159986, + "learning_rate": 8.969204702715568e-06, + "loss": 17.2538, + "step": 12684 + }, + { + "epoch": 0.23187160692415962, + "grad_norm": 7.28801149522414, + "learning_rate": 8.96902468361051e-06, + "loss": 18.0793, + "step": 12685 + }, + { + "epoch": 0.23188988612060613, + "grad_norm": 6.45430193524104, + "learning_rate": 8.968844650594363e-06, + "loss": 17.4716, + "step": 12686 + }, + { + "epoch": 0.23190816531705266, + "grad_norm": 7.189783644839498, + "learning_rate": 8.968664603667763e-06, + "loss": 17.4625, + "step": 12687 + }, + { + "epoch": 0.2319264445134992, + "grad_norm": 6.673588427535199, + "learning_rate": 8.968484542831337e-06, + "loss": 17.4997, + "step": 12688 + }, + { + "epoch": 0.2319447237099457, + "grad_norm": 7.319510417789561, + "learning_rate": 8.968304468085715e-06, + "loss": 18.1252, + "step": 12689 + }, + { + "epoch": 0.23196300290639224, + "grad_norm": 6.755870910300304, + "learning_rate": 8.968124379431533e-06, + "loss": 17.6274, + "step": 12690 + }, + { + "epoch": 0.23198128210283875, + "grad_norm": 6.9792511913835655, + "learning_rate": 8.967944276869419e-06, + "loss": 17.5144, + "step": 12691 + }, + { + "epoch": 0.23199956129928528, + "grad_norm": 6.2217243828616455, + "learning_rate": 8.967764160400002e-06, + "loss": 17.3082, + "step": 12692 + }, + { + "epoch": 0.23201784049573182, + "grad_norm": 6.089639947574932, + "learning_rate": 8.967584030023916e-06, + "loss": 17.3434, + "step": 12693 + }, + { + "epoch": 0.23203611969217833, + "grad_norm": 6.430558611785808, + "learning_rate": 8.967403885741795e-06, + "loss": 17.5896, + "step": 12694 + }, + { + "epoch": 0.23205439888862486, + "grad_norm": 5.206884105273005, + "learning_rate": 8.967223727554267e-06, + "loss": 16.9609, + "step": 12695 + }, + { + "epoch": 0.23207267808507137, + "grad_norm": 6.467597193668249, + "learning_rate": 8.967043555461964e-06, + "loss": 17.4015, + "step": 12696 + }, + { + "epoch": 0.2320909572815179, + "grad_norm": 6.7907705601061235, + "learning_rate": 8.966863369465517e-06, + "loss": 17.4738, + "step": 12697 + }, + { + "epoch": 0.23210923647796444, + "grad_norm": 7.911519043403179, + "learning_rate": 8.966683169565557e-06, + "loss": 18.1934, + "step": 12698 + }, + { + "epoch": 0.23212751567441095, + "grad_norm": 6.4982645684128615, + "learning_rate": 8.96650295576272e-06, + "loss": 17.3533, + "step": 12699 + }, + { + "epoch": 0.23214579487085749, + "grad_norm": 5.802643014414197, + "learning_rate": 8.966322728057632e-06, + "loss": 17.2984, + "step": 12700 + }, + { + "epoch": 0.232164074067304, + "grad_norm": 6.9079188967253655, + "learning_rate": 8.966142486450925e-06, + "loss": 17.6317, + "step": 12701 + }, + { + "epoch": 0.23218235326375053, + "grad_norm": 6.650105180734968, + "learning_rate": 8.965962230943236e-06, + "loss": 17.5343, + "step": 12702 + }, + { + "epoch": 0.23220063246019704, + "grad_norm": 6.969674418237345, + "learning_rate": 8.965781961535194e-06, + "loss": 17.6164, + "step": 12703 + }, + { + "epoch": 0.23221891165664357, + "grad_norm": 6.3892235768617915, + "learning_rate": 8.96560167822743e-06, + "loss": 17.4689, + "step": 12704 + }, + { + "epoch": 0.2322371908530901, + "grad_norm": 6.387298192158086, + "learning_rate": 8.965421381020573e-06, + "loss": 17.2971, + "step": 12705 + }, + { + "epoch": 0.23225547004953662, + "grad_norm": 9.097806062465553, + "learning_rate": 8.965241069915262e-06, + "loss": 18.1642, + "step": 12706 + }, + { + "epoch": 0.23227374924598315, + "grad_norm": 7.635211232771337, + "learning_rate": 8.965060744912123e-06, + "loss": 18.0983, + "step": 12707 + }, + { + "epoch": 0.23229202844242966, + "grad_norm": 6.649749231872817, + "learning_rate": 8.96488040601179e-06, + "loss": 17.6738, + "step": 12708 + }, + { + "epoch": 0.2323103076388762, + "grad_norm": 8.254706481009299, + "learning_rate": 8.964700053214896e-06, + "loss": 17.9174, + "step": 12709 + }, + { + "epoch": 0.23232858683532273, + "grad_norm": 7.083713670044754, + "learning_rate": 8.964519686522073e-06, + "loss": 17.7431, + "step": 12710 + }, + { + "epoch": 0.23234686603176924, + "grad_norm": 6.654268352523479, + "learning_rate": 8.964339305933952e-06, + "loss": 17.5484, + "step": 12711 + }, + { + "epoch": 0.23236514522821577, + "grad_norm": 7.396280795160867, + "learning_rate": 8.964158911451165e-06, + "loss": 17.6135, + "step": 12712 + }, + { + "epoch": 0.23238342442466228, + "grad_norm": 6.058802860490418, + "learning_rate": 8.963978503074345e-06, + "loss": 17.4924, + "step": 12713 + }, + { + "epoch": 0.23240170362110882, + "grad_norm": 6.604578914598495, + "learning_rate": 8.963798080804126e-06, + "loss": 17.426, + "step": 12714 + }, + { + "epoch": 0.23241998281755535, + "grad_norm": 6.19249065287721, + "learning_rate": 8.963617644641138e-06, + "loss": 17.308, + "step": 12715 + }, + { + "epoch": 0.23243826201400186, + "grad_norm": 6.900919621465627, + "learning_rate": 8.963437194586013e-06, + "loss": 17.8536, + "step": 12716 + }, + { + "epoch": 0.2324565412104484, + "grad_norm": 6.7578084758593, + "learning_rate": 8.963256730639384e-06, + "loss": 17.2981, + "step": 12717 + }, + { + "epoch": 0.2324748204068949, + "grad_norm": 6.030502383830794, + "learning_rate": 8.963076252801886e-06, + "loss": 17.32, + "step": 12718 + }, + { + "epoch": 0.23249309960334144, + "grad_norm": 7.2092960685317555, + "learning_rate": 8.96289576107415e-06, + "loss": 17.8071, + "step": 12719 + }, + { + "epoch": 0.23251137879978795, + "grad_norm": 6.50642651713053, + "learning_rate": 8.962715255456806e-06, + "loss": 17.6434, + "step": 12720 + }, + { + "epoch": 0.23252965799623448, + "grad_norm": 8.128287658462336, + "learning_rate": 8.96253473595049e-06, + "loss": 17.6088, + "step": 12721 + }, + { + "epoch": 0.23254793719268102, + "grad_norm": 6.46303576738472, + "learning_rate": 8.962354202555834e-06, + "loss": 17.3952, + "step": 12722 + }, + { + "epoch": 0.23256621638912753, + "grad_norm": 8.03783768046001, + "learning_rate": 8.96217365527347e-06, + "loss": 18.1464, + "step": 12723 + }, + { + "epoch": 0.23258449558557406, + "grad_norm": 6.817841189634383, + "learning_rate": 8.961993094104031e-06, + "loss": 17.5995, + "step": 12724 + }, + { + "epoch": 0.23260277478202057, + "grad_norm": 6.97774497101537, + "learning_rate": 8.96181251904815e-06, + "loss": 17.9206, + "step": 12725 + }, + { + "epoch": 0.2326210539784671, + "grad_norm": 7.485176546420922, + "learning_rate": 8.96163193010646e-06, + "loss": 18.1183, + "step": 12726 + }, + { + "epoch": 0.23263933317491364, + "grad_norm": 7.727677566925891, + "learning_rate": 8.961451327279595e-06, + "loss": 17.796, + "step": 12727 + }, + { + "epoch": 0.23265761237136015, + "grad_norm": 7.2993677626770115, + "learning_rate": 8.961270710568185e-06, + "loss": 17.8907, + "step": 12728 + }, + { + "epoch": 0.2326758915678067, + "grad_norm": 7.207807208792943, + "learning_rate": 8.961090079972865e-06, + "loss": 17.7457, + "step": 12729 + }, + { + "epoch": 0.2326941707642532, + "grad_norm": 7.123250316362162, + "learning_rate": 8.960909435494269e-06, + "loss": 17.9557, + "step": 12730 + }, + { + "epoch": 0.23271244996069973, + "grad_norm": 5.795406937156422, + "learning_rate": 8.96072877713303e-06, + "loss": 17.187, + "step": 12731 + }, + { + "epoch": 0.23273072915714627, + "grad_norm": 6.060058257382089, + "learning_rate": 8.960548104889778e-06, + "loss": 17.377, + "step": 12732 + }, + { + "epoch": 0.23274900835359277, + "grad_norm": 9.463240980061615, + "learning_rate": 8.960367418765152e-06, + "loss": 18.9757, + "step": 12733 + }, + { + "epoch": 0.2327672875500393, + "grad_norm": 7.1875089109188535, + "learning_rate": 8.960186718759778e-06, + "loss": 17.7208, + "step": 12734 + }, + { + "epoch": 0.23278556674648582, + "grad_norm": 9.197511583933995, + "learning_rate": 8.960006004874295e-06, + "loss": 18.0441, + "step": 12735 + }, + { + "epoch": 0.23280384594293235, + "grad_norm": 7.426393746853364, + "learning_rate": 8.959825277109334e-06, + "loss": 18.1336, + "step": 12736 + }, + { + "epoch": 0.23282212513937886, + "grad_norm": 4.9142180878185915, + "learning_rate": 8.95964453546553e-06, + "loss": 16.8768, + "step": 12737 + }, + { + "epoch": 0.2328404043358254, + "grad_norm": 7.249341077977238, + "learning_rate": 8.959463779943516e-06, + "loss": 17.856, + "step": 12738 + }, + { + "epoch": 0.23285868353227193, + "grad_norm": 6.544874946263931, + "learning_rate": 8.959283010543923e-06, + "loss": 17.4914, + "step": 12739 + }, + { + "epoch": 0.23287696272871844, + "grad_norm": 6.973432445210602, + "learning_rate": 8.959102227267387e-06, + "loss": 17.5418, + "step": 12740 + }, + { + "epoch": 0.23289524192516498, + "grad_norm": 7.673467131029792, + "learning_rate": 8.958921430114542e-06, + "loss": 18.4472, + "step": 12741 + }, + { + "epoch": 0.23291352112161148, + "grad_norm": 6.957441134901725, + "learning_rate": 8.95874061908602e-06, + "loss": 17.5476, + "step": 12742 + }, + { + "epoch": 0.23293180031805802, + "grad_norm": 7.476236583511359, + "learning_rate": 8.958559794182457e-06, + "loss": 18.1306, + "step": 12743 + }, + { + "epoch": 0.23295007951450455, + "grad_norm": 7.373016572378293, + "learning_rate": 8.958378955404486e-06, + "loss": 17.622, + "step": 12744 + }, + { + "epoch": 0.23296835871095106, + "grad_norm": 7.485544217058992, + "learning_rate": 8.95819810275274e-06, + "loss": 17.9682, + "step": 12745 + }, + { + "epoch": 0.2329866379073976, + "grad_norm": 6.912468244774014, + "learning_rate": 8.958017236227851e-06, + "loss": 17.6617, + "step": 12746 + }, + { + "epoch": 0.2330049171038441, + "grad_norm": 6.7659822035034, + "learning_rate": 8.957836355830456e-06, + "loss": 17.5976, + "step": 12747 + }, + { + "epoch": 0.23302319630029064, + "grad_norm": 6.703200527664176, + "learning_rate": 8.957655461561188e-06, + "loss": 17.5962, + "step": 12748 + }, + { + "epoch": 0.23304147549673718, + "grad_norm": 6.790172140328692, + "learning_rate": 8.957474553420681e-06, + "loss": 17.5057, + "step": 12749 + }, + { + "epoch": 0.23305975469318368, + "grad_norm": 8.478019310612417, + "learning_rate": 8.957293631409571e-06, + "loss": 18.1948, + "step": 12750 + }, + { + "epoch": 0.23307803388963022, + "grad_norm": 6.657350113306031, + "learning_rate": 8.95711269552849e-06, + "loss": 17.4076, + "step": 12751 + }, + { + "epoch": 0.23309631308607673, + "grad_norm": 6.443575958162277, + "learning_rate": 8.95693174577807e-06, + "loss": 17.593, + "step": 12752 + }, + { + "epoch": 0.23311459228252326, + "grad_norm": 5.822279201249806, + "learning_rate": 8.956750782158948e-06, + "loss": 17.2569, + "step": 12753 + }, + { + "epoch": 0.23313287147896977, + "grad_norm": 5.829004248004184, + "learning_rate": 8.956569804671759e-06, + "loss": 17.3094, + "step": 12754 + }, + { + "epoch": 0.2331511506754163, + "grad_norm": 6.440022187282789, + "learning_rate": 8.956388813317136e-06, + "loss": 17.5126, + "step": 12755 + }, + { + "epoch": 0.23316942987186284, + "grad_norm": 5.129374670689684, + "learning_rate": 8.956207808095713e-06, + "loss": 16.9751, + "step": 12756 + }, + { + "epoch": 0.23318770906830935, + "grad_norm": 7.600104472662688, + "learning_rate": 8.956026789008126e-06, + "loss": 18.2364, + "step": 12757 + }, + { + "epoch": 0.2332059882647559, + "grad_norm": 6.964699187721291, + "learning_rate": 8.955845756055007e-06, + "loss": 17.9402, + "step": 12758 + }, + { + "epoch": 0.2332242674612024, + "grad_norm": 6.181002011419887, + "learning_rate": 8.955664709236992e-06, + "loss": 17.3155, + "step": 12759 + }, + { + "epoch": 0.23324254665764893, + "grad_norm": 6.814603945740874, + "learning_rate": 8.955483648554716e-06, + "loss": 17.8268, + "step": 12760 + }, + { + "epoch": 0.23326082585409547, + "grad_norm": 7.091746026402167, + "learning_rate": 8.955302574008813e-06, + "loss": 17.8012, + "step": 12761 + }, + { + "epoch": 0.23327910505054197, + "grad_norm": 8.211456720586112, + "learning_rate": 8.955121485599919e-06, + "loss": 18.3147, + "step": 12762 + }, + { + "epoch": 0.2332973842469885, + "grad_norm": 6.807629775205399, + "learning_rate": 8.954940383328666e-06, + "loss": 17.7392, + "step": 12763 + }, + { + "epoch": 0.23331566344343502, + "grad_norm": 6.383849031235497, + "learning_rate": 8.95475926719569e-06, + "loss": 17.5924, + "step": 12764 + }, + { + "epoch": 0.23333394263988155, + "grad_norm": 6.147107205787592, + "learning_rate": 8.954578137201625e-06, + "loss": 17.4159, + "step": 12765 + }, + { + "epoch": 0.2333522218363281, + "grad_norm": 7.702299531053069, + "learning_rate": 8.954396993347107e-06, + "loss": 17.5864, + "step": 12766 + }, + { + "epoch": 0.2333705010327746, + "grad_norm": 7.270374554348164, + "learning_rate": 8.954215835632774e-06, + "loss": 17.8174, + "step": 12767 + }, + { + "epoch": 0.23338878022922113, + "grad_norm": 7.072935465236767, + "learning_rate": 8.954034664059254e-06, + "loss": 18.1406, + "step": 12768 + }, + { + "epoch": 0.23340705942566764, + "grad_norm": 6.522551304604889, + "learning_rate": 8.953853478627187e-06, + "loss": 17.4885, + "step": 12769 + }, + { + "epoch": 0.23342533862211418, + "grad_norm": 7.153524358540162, + "learning_rate": 8.953672279337206e-06, + "loss": 17.5572, + "step": 12770 + }, + { + "epoch": 0.23344361781856068, + "grad_norm": 7.474197906048436, + "learning_rate": 8.953491066189948e-06, + "loss": 17.7423, + "step": 12771 + }, + { + "epoch": 0.23346189701500722, + "grad_norm": 6.416496933823563, + "learning_rate": 8.953309839186047e-06, + "loss": 17.5954, + "step": 12772 + }, + { + "epoch": 0.23348017621145375, + "grad_norm": 6.322843043443495, + "learning_rate": 8.953128598326136e-06, + "loss": 17.4895, + "step": 12773 + }, + { + "epoch": 0.23349845540790026, + "grad_norm": 6.33223118161616, + "learning_rate": 8.952947343610854e-06, + "loss": 17.1197, + "step": 12774 + }, + { + "epoch": 0.2335167346043468, + "grad_norm": 7.066635935635524, + "learning_rate": 8.952766075040833e-06, + "loss": 17.8293, + "step": 12775 + }, + { + "epoch": 0.2335350138007933, + "grad_norm": 5.944662456169544, + "learning_rate": 8.952584792616712e-06, + "loss": 17.1289, + "step": 12776 + }, + { + "epoch": 0.23355329299723984, + "grad_norm": 6.3679384570264554, + "learning_rate": 8.952403496339124e-06, + "loss": 17.5574, + "step": 12777 + }, + { + "epoch": 0.23357157219368638, + "grad_norm": 6.424547901175277, + "learning_rate": 8.952222186208703e-06, + "loss": 17.4749, + "step": 12778 + }, + { + "epoch": 0.23358985139013289, + "grad_norm": 7.124514106204784, + "learning_rate": 8.952040862226089e-06, + "loss": 17.9767, + "step": 12779 + }, + { + "epoch": 0.23360813058657942, + "grad_norm": 5.489985558523886, + "learning_rate": 8.951859524391912e-06, + "loss": 17.1535, + "step": 12780 + }, + { + "epoch": 0.23362640978302593, + "grad_norm": 6.904053710838957, + "learning_rate": 8.95167817270681e-06, + "loss": 17.8272, + "step": 12781 + }, + { + "epoch": 0.23364468897947246, + "grad_norm": 7.261939271079995, + "learning_rate": 8.95149680717142e-06, + "loss": 17.7751, + "step": 12782 + }, + { + "epoch": 0.233662968175919, + "grad_norm": 8.35605896603481, + "learning_rate": 8.951315427786378e-06, + "loss": 18.2255, + "step": 12783 + }, + { + "epoch": 0.2336812473723655, + "grad_norm": 6.444609110134202, + "learning_rate": 8.951134034552316e-06, + "loss": 17.2849, + "step": 12784 + }, + { + "epoch": 0.23369952656881204, + "grad_norm": 7.343231371580179, + "learning_rate": 8.950952627469873e-06, + "loss": 17.8932, + "step": 12785 + }, + { + "epoch": 0.23371780576525855, + "grad_norm": 6.195981454310583, + "learning_rate": 8.950771206539685e-06, + "loss": 17.4204, + "step": 12786 + }, + { + "epoch": 0.2337360849617051, + "grad_norm": 6.676724333091879, + "learning_rate": 8.950589771762386e-06, + "loss": 17.728, + "step": 12787 + }, + { + "epoch": 0.2337543641581516, + "grad_norm": 7.8657389649096645, + "learning_rate": 8.950408323138612e-06, + "loss": 17.8626, + "step": 12788 + }, + { + "epoch": 0.23377264335459813, + "grad_norm": 6.928884863418598, + "learning_rate": 8.950226860669001e-06, + "loss": 17.6, + "step": 12789 + }, + { + "epoch": 0.23379092255104467, + "grad_norm": 5.847325564075314, + "learning_rate": 8.950045384354185e-06, + "loss": 17.2806, + "step": 12790 + }, + { + "epoch": 0.23380920174749117, + "grad_norm": 7.902027079744283, + "learning_rate": 8.949863894194806e-06, + "loss": 18.4469, + "step": 12791 + }, + { + "epoch": 0.2338274809439377, + "grad_norm": 6.5891202226334515, + "learning_rate": 8.949682390191495e-06, + "loss": 17.6076, + "step": 12792 + }, + { + "epoch": 0.23384576014038422, + "grad_norm": 9.77565731334052, + "learning_rate": 8.94950087234489e-06, + "loss": 17.8782, + "step": 12793 + }, + { + "epoch": 0.23386403933683075, + "grad_norm": 5.912218694025467, + "learning_rate": 8.949319340655628e-06, + "loss": 17.1442, + "step": 12794 + }, + { + "epoch": 0.2338823185332773, + "grad_norm": 6.861787325914959, + "learning_rate": 8.949137795124342e-06, + "loss": 17.6416, + "step": 12795 + }, + { + "epoch": 0.2339005977297238, + "grad_norm": 7.681415121848571, + "learning_rate": 8.948956235751673e-06, + "loss": 18.4279, + "step": 12796 + }, + { + "epoch": 0.23391887692617033, + "grad_norm": 6.311152196060109, + "learning_rate": 8.948774662538255e-06, + "loss": 17.3943, + "step": 12797 + }, + { + "epoch": 0.23393715612261684, + "grad_norm": 5.858300943469095, + "learning_rate": 8.948593075484724e-06, + "loss": 17.4344, + "step": 12798 + }, + { + "epoch": 0.23395543531906338, + "grad_norm": 7.530169804622749, + "learning_rate": 8.948411474591716e-06, + "loss": 18.3767, + "step": 12799 + }, + { + "epoch": 0.2339737145155099, + "grad_norm": 5.9681224194622615, + "learning_rate": 8.94822985985987e-06, + "loss": 17.1825, + "step": 12800 + }, + { + "epoch": 0.23399199371195642, + "grad_norm": 8.049380654632524, + "learning_rate": 8.948048231289822e-06, + "loss": 18.1197, + "step": 12801 + }, + { + "epoch": 0.23401027290840296, + "grad_norm": 5.58038187881301, + "learning_rate": 8.947866588882204e-06, + "loss": 16.7757, + "step": 12802 + }, + { + "epoch": 0.23402855210484946, + "grad_norm": 5.958354439439862, + "learning_rate": 8.947684932637658e-06, + "loss": 17.3348, + "step": 12803 + }, + { + "epoch": 0.234046831301296, + "grad_norm": 6.689782861703746, + "learning_rate": 8.947503262556819e-06, + "loss": 17.3113, + "step": 12804 + }, + { + "epoch": 0.2340651104977425, + "grad_norm": 6.538472801654387, + "learning_rate": 8.947321578640323e-06, + "loss": 17.3539, + "step": 12805 + }, + { + "epoch": 0.23408338969418904, + "grad_norm": 7.123948310786515, + "learning_rate": 8.94713988088881e-06, + "loss": 18.0318, + "step": 12806 + }, + { + "epoch": 0.23410166889063558, + "grad_norm": 7.1576811407236915, + "learning_rate": 8.94695816930291e-06, + "loss": 17.4988, + "step": 12807 + }, + { + "epoch": 0.23411994808708209, + "grad_norm": 6.831561770504608, + "learning_rate": 8.946776443883267e-06, + "loss": 17.8309, + "step": 12808 + }, + { + "epoch": 0.23413822728352862, + "grad_norm": 6.5033539221865455, + "learning_rate": 8.946594704630514e-06, + "loss": 17.8588, + "step": 12809 + }, + { + "epoch": 0.23415650647997513, + "grad_norm": 8.120063990024946, + "learning_rate": 8.946412951545289e-06, + "loss": 17.649, + "step": 12810 + }, + { + "epoch": 0.23417478567642167, + "grad_norm": 6.680866550908306, + "learning_rate": 8.94623118462823e-06, + "loss": 17.6354, + "step": 12811 + }, + { + "epoch": 0.2341930648728682, + "grad_norm": 6.258034552886107, + "learning_rate": 8.946049403879973e-06, + "loss": 17.4059, + "step": 12812 + }, + { + "epoch": 0.2342113440693147, + "grad_norm": 6.586197990355327, + "learning_rate": 8.945867609301153e-06, + "loss": 17.6528, + "step": 12813 + }, + { + "epoch": 0.23422962326576124, + "grad_norm": 7.588036149324091, + "learning_rate": 8.945685800892412e-06, + "loss": 18.1779, + "step": 12814 + }, + { + "epoch": 0.23424790246220775, + "grad_norm": 8.872585606885204, + "learning_rate": 8.945503978654384e-06, + "loss": 16.9234, + "step": 12815 + }, + { + "epoch": 0.2342661816586543, + "grad_norm": 6.366790588751804, + "learning_rate": 8.945322142587706e-06, + "loss": 17.4695, + "step": 12816 + }, + { + "epoch": 0.23428446085510082, + "grad_norm": 6.534386797969441, + "learning_rate": 8.945140292693017e-06, + "loss": 17.5107, + "step": 12817 + }, + { + "epoch": 0.23430274005154733, + "grad_norm": 6.230167768905743, + "learning_rate": 8.944958428970954e-06, + "loss": 17.4147, + "step": 12818 + }, + { + "epoch": 0.23432101924799387, + "grad_norm": 5.579383110813056, + "learning_rate": 8.944776551422154e-06, + "loss": 17.3322, + "step": 12819 + }, + { + "epoch": 0.23433929844444037, + "grad_norm": 6.323520705722305, + "learning_rate": 8.944594660047254e-06, + "loss": 17.1582, + "step": 12820 + }, + { + "epoch": 0.2343575776408869, + "grad_norm": 6.766699744550753, + "learning_rate": 8.944412754846892e-06, + "loss": 17.4968, + "step": 12821 + }, + { + "epoch": 0.23437585683733342, + "grad_norm": 6.279371910677322, + "learning_rate": 8.944230835821706e-06, + "loss": 17.4284, + "step": 12822 + }, + { + "epoch": 0.23439413603377995, + "grad_norm": 7.40148709909635, + "learning_rate": 8.944048902972334e-06, + "loss": 18.0285, + "step": 12823 + }, + { + "epoch": 0.2344124152302265, + "grad_norm": 7.320197020027127, + "learning_rate": 8.943866956299413e-06, + "loss": 17.9752, + "step": 12824 + }, + { + "epoch": 0.234430694426673, + "grad_norm": 6.236710783299506, + "learning_rate": 8.943684995803578e-06, + "loss": 17.4248, + "step": 12825 + }, + { + "epoch": 0.23444897362311953, + "grad_norm": 7.525352570988221, + "learning_rate": 8.943503021485472e-06, + "loss": 18.0884, + "step": 12826 + }, + { + "epoch": 0.23446725281956604, + "grad_norm": 6.153165647566515, + "learning_rate": 8.943321033345726e-06, + "loss": 17.4561, + "step": 12827 + }, + { + "epoch": 0.23448553201601258, + "grad_norm": 6.773943377440282, + "learning_rate": 8.943139031384986e-06, + "loss": 17.6015, + "step": 12828 + }, + { + "epoch": 0.2345038112124591, + "grad_norm": 6.637106740298016, + "learning_rate": 8.942957015603883e-06, + "loss": 17.4788, + "step": 12829 + }, + { + "epoch": 0.23452209040890562, + "grad_norm": 5.7709940820779, + "learning_rate": 8.942774986003062e-06, + "loss": 17.2076, + "step": 12830 + }, + { + "epoch": 0.23454036960535216, + "grad_norm": 7.194045688760218, + "learning_rate": 8.942592942583152e-06, + "loss": 18.0743, + "step": 12831 + }, + { + "epoch": 0.23455864880179866, + "grad_norm": 8.773155649550077, + "learning_rate": 8.942410885344798e-06, + "loss": 18.5074, + "step": 12832 + }, + { + "epoch": 0.2345769279982452, + "grad_norm": 6.372220643277621, + "learning_rate": 8.942228814288638e-06, + "loss": 17.4747, + "step": 12833 + }, + { + "epoch": 0.23459520719469173, + "grad_norm": 7.278206166951027, + "learning_rate": 8.942046729415305e-06, + "loss": 17.9158, + "step": 12834 + }, + { + "epoch": 0.23461348639113824, + "grad_norm": 8.174325706353997, + "learning_rate": 8.941864630725442e-06, + "loss": 18.0042, + "step": 12835 + }, + { + "epoch": 0.23463176558758478, + "grad_norm": 6.661736661078449, + "learning_rate": 8.941682518219685e-06, + "loss": 17.703, + "step": 12836 + }, + { + "epoch": 0.2346500447840313, + "grad_norm": 6.85330602778627, + "learning_rate": 8.941500391898672e-06, + "loss": 17.486, + "step": 12837 + }, + { + "epoch": 0.23466832398047782, + "grad_norm": 6.094453699887702, + "learning_rate": 8.941318251763043e-06, + "loss": 17.5093, + "step": 12838 + }, + { + "epoch": 0.23468660317692433, + "grad_norm": 6.089287552241428, + "learning_rate": 8.941136097813437e-06, + "loss": 17.4301, + "step": 12839 + }, + { + "epoch": 0.23470488237337087, + "grad_norm": 6.786967476713644, + "learning_rate": 8.940953930050488e-06, + "loss": 17.8137, + "step": 12840 + }, + { + "epoch": 0.2347231615698174, + "grad_norm": 6.399359306518545, + "learning_rate": 8.94077174847484e-06, + "loss": 17.1748, + "step": 12841 + }, + { + "epoch": 0.2347414407662639, + "grad_norm": 5.57518412161416, + "learning_rate": 8.940589553087128e-06, + "loss": 17.1147, + "step": 12842 + }, + { + "epoch": 0.23475971996271044, + "grad_norm": 5.686589382383859, + "learning_rate": 8.940407343887991e-06, + "loss": 17.2473, + "step": 12843 + }, + { + "epoch": 0.23477799915915695, + "grad_norm": 7.324812993600869, + "learning_rate": 8.940225120878069e-06, + "loss": 17.9225, + "step": 12844 + }, + { + "epoch": 0.2347962783556035, + "grad_norm": 6.808685104709658, + "learning_rate": 8.940042884058e-06, + "loss": 17.626, + "step": 12845 + }, + { + "epoch": 0.23481455755205002, + "grad_norm": 7.521610994948991, + "learning_rate": 8.93986063342842e-06, + "loss": 17.8172, + "step": 12846 + }, + { + "epoch": 0.23483283674849653, + "grad_norm": 6.801419585653584, + "learning_rate": 8.939678368989973e-06, + "loss": 17.6693, + "step": 12847 + }, + { + "epoch": 0.23485111594494307, + "grad_norm": 6.844620130150307, + "learning_rate": 8.939496090743296e-06, + "loss": 17.5882, + "step": 12848 + }, + { + "epoch": 0.23486939514138958, + "grad_norm": 7.224860326249475, + "learning_rate": 8.939313798689026e-06, + "loss": 17.6952, + "step": 12849 + }, + { + "epoch": 0.2348876743378361, + "grad_norm": 7.1950922179245005, + "learning_rate": 8.939131492827801e-06, + "loss": 17.9068, + "step": 12850 + }, + { + "epoch": 0.23490595353428265, + "grad_norm": 7.036529686621534, + "learning_rate": 8.938949173160266e-06, + "loss": 17.6325, + "step": 12851 + }, + { + "epoch": 0.23492423273072915, + "grad_norm": 5.548452549926464, + "learning_rate": 8.938766839687053e-06, + "loss": 16.9409, + "step": 12852 + }, + { + "epoch": 0.2349425119271757, + "grad_norm": 6.460058088370336, + "learning_rate": 8.938584492408805e-06, + "loss": 17.7953, + "step": 12853 + }, + { + "epoch": 0.2349607911236222, + "grad_norm": 5.865582744887052, + "learning_rate": 8.938402131326158e-06, + "loss": 17.3237, + "step": 12854 + }, + { + "epoch": 0.23497907032006873, + "grad_norm": 5.507687234652714, + "learning_rate": 8.938219756439755e-06, + "loss": 16.9353, + "step": 12855 + }, + { + "epoch": 0.23499734951651524, + "grad_norm": 6.740356373881581, + "learning_rate": 8.938037367750234e-06, + "loss": 17.8141, + "step": 12856 + }, + { + "epoch": 0.23501562871296178, + "grad_norm": 8.740237844071164, + "learning_rate": 8.937854965258234e-06, + "loss": 18.3672, + "step": 12857 + }, + { + "epoch": 0.2350339079094083, + "grad_norm": 6.841865049004293, + "learning_rate": 8.937672548964394e-06, + "loss": 17.5426, + "step": 12858 + }, + { + "epoch": 0.23505218710585482, + "grad_norm": 6.5395683840692, + "learning_rate": 8.937490118869353e-06, + "loss": 17.7061, + "step": 12859 + }, + { + "epoch": 0.23507046630230136, + "grad_norm": 7.60807010734477, + "learning_rate": 8.93730767497375e-06, + "loss": 17.8479, + "step": 12860 + }, + { + "epoch": 0.23508874549874786, + "grad_norm": 8.067536951243808, + "learning_rate": 8.937125217278225e-06, + "loss": 17.991, + "step": 12861 + }, + { + "epoch": 0.2351070246951944, + "grad_norm": 9.064851064343651, + "learning_rate": 8.936942745783419e-06, + "loss": 18.023, + "step": 12862 + }, + { + "epoch": 0.23512530389164094, + "grad_norm": 6.907223170853111, + "learning_rate": 8.93676026048997e-06, + "loss": 17.6457, + "step": 12863 + }, + { + "epoch": 0.23514358308808744, + "grad_norm": 6.0732925254634695, + "learning_rate": 8.936577761398517e-06, + "loss": 17.3544, + "step": 12864 + }, + { + "epoch": 0.23516186228453398, + "grad_norm": 6.541697149923802, + "learning_rate": 8.936395248509701e-06, + "loss": 17.4098, + "step": 12865 + }, + { + "epoch": 0.2351801414809805, + "grad_norm": 7.675510136049659, + "learning_rate": 8.936212721824163e-06, + "loss": 18.1187, + "step": 12866 + }, + { + "epoch": 0.23519842067742702, + "grad_norm": 5.363468558491413, + "learning_rate": 8.936030181342538e-06, + "loss": 17.0235, + "step": 12867 + }, + { + "epoch": 0.23521669987387356, + "grad_norm": 7.878788765603472, + "learning_rate": 8.93584762706547e-06, + "loss": 18.066, + "step": 12868 + }, + { + "epoch": 0.23523497907032007, + "grad_norm": 6.932014476267707, + "learning_rate": 8.9356650589936e-06, + "loss": 17.7187, + "step": 12869 + }, + { + "epoch": 0.2352532582667666, + "grad_norm": 7.661415455649792, + "learning_rate": 8.935482477127562e-06, + "loss": 18.097, + "step": 12870 + }, + { + "epoch": 0.2352715374632131, + "grad_norm": 6.603252898003402, + "learning_rate": 8.935299881468e-06, + "loss": 17.6628, + "step": 12871 + }, + { + "epoch": 0.23528981665965965, + "grad_norm": 7.8976845859302935, + "learning_rate": 8.935117272015556e-06, + "loss": 17.9888, + "step": 12872 + }, + { + "epoch": 0.23530809585610615, + "grad_norm": 6.789037235858639, + "learning_rate": 8.934934648770865e-06, + "loss": 17.3038, + "step": 12873 + }, + { + "epoch": 0.2353263750525527, + "grad_norm": 7.693775745814941, + "learning_rate": 8.93475201173457e-06, + "loss": 17.7252, + "step": 12874 + }, + { + "epoch": 0.23534465424899922, + "grad_norm": 6.818681729605643, + "learning_rate": 8.934569360907311e-06, + "loss": 17.5436, + "step": 12875 + }, + { + "epoch": 0.23536293344544573, + "grad_norm": 5.904524982784955, + "learning_rate": 8.934386696289728e-06, + "loss": 17.2418, + "step": 12876 + }, + { + "epoch": 0.23538121264189227, + "grad_norm": 6.5986397666194, + "learning_rate": 8.93420401788246e-06, + "loss": 17.5913, + "step": 12877 + }, + { + "epoch": 0.23539949183833878, + "grad_norm": 6.815460505642056, + "learning_rate": 8.934021325686149e-06, + "loss": 17.6721, + "step": 12878 + }, + { + "epoch": 0.2354177710347853, + "grad_norm": 6.401695212403851, + "learning_rate": 8.933838619701435e-06, + "loss": 17.3721, + "step": 12879 + }, + { + "epoch": 0.23543605023123185, + "grad_norm": 8.346930827955845, + "learning_rate": 8.933655899928958e-06, + "loss": 18.7162, + "step": 12880 + }, + { + "epoch": 0.23545432942767835, + "grad_norm": 7.052508918572586, + "learning_rate": 8.933473166369358e-06, + "loss": 18.0449, + "step": 12881 + }, + { + "epoch": 0.2354726086241249, + "grad_norm": 8.469230945531697, + "learning_rate": 8.933290419023276e-06, + "loss": 17.8177, + "step": 12882 + }, + { + "epoch": 0.2354908878205714, + "grad_norm": 4.87375639354391, + "learning_rate": 8.933107657891352e-06, + "loss": 16.851, + "step": 12883 + }, + { + "epoch": 0.23550916701701793, + "grad_norm": 6.521544436001019, + "learning_rate": 8.932924882974228e-06, + "loss": 17.6313, + "step": 12884 + }, + { + "epoch": 0.23552744621346447, + "grad_norm": 6.171467419393666, + "learning_rate": 8.932742094272541e-06, + "loss": 17.2186, + "step": 12885 + }, + { + "epoch": 0.23554572540991098, + "grad_norm": 7.055093229422603, + "learning_rate": 8.932559291786937e-06, + "loss": 17.8389, + "step": 12886 + }, + { + "epoch": 0.2355640046063575, + "grad_norm": 6.894632962673046, + "learning_rate": 8.932376475518054e-06, + "loss": 17.9723, + "step": 12887 + }, + { + "epoch": 0.23558228380280402, + "grad_norm": 6.846858043183039, + "learning_rate": 8.932193645466531e-06, + "loss": 17.7025, + "step": 12888 + }, + { + "epoch": 0.23560056299925056, + "grad_norm": 7.236398562057061, + "learning_rate": 8.93201080163301e-06, + "loss": 17.7761, + "step": 12889 + }, + { + "epoch": 0.23561884219569706, + "grad_norm": 7.315558731868937, + "learning_rate": 8.931827944018134e-06, + "loss": 18.054, + "step": 12890 + }, + { + "epoch": 0.2356371213921436, + "grad_norm": 8.255749287326358, + "learning_rate": 8.931645072622544e-06, + "loss": 18.0403, + "step": 12891 + }, + { + "epoch": 0.23565540058859014, + "grad_norm": 6.367375865660743, + "learning_rate": 8.931462187446875e-06, + "loss": 17.4986, + "step": 12892 + }, + { + "epoch": 0.23567367978503664, + "grad_norm": 7.502225720027451, + "learning_rate": 8.931279288491774e-06, + "loss": 17.772, + "step": 12893 + }, + { + "epoch": 0.23569195898148318, + "grad_norm": 5.609192326773627, + "learning_rate": 8.931096375757882e-06, + "loss": 17.3737, + "step": 12894 + }, + { + "epoch": 0.2357102381779297, + "grad_norm": 8.235533754702457, + "learning_rate": 8.930913449245836e-06, + "loss": 18.11, + "step": 12895 + }, + { + "epoch": 0.23572851737437622, + "grad_norm": 6.837312824543016, + "learning_rate": 8.93073050895628e-06, + "loss": 17.6328, + "step": 12896 + }, + { + "epoch": 0.23574679657082276, + "grad_norm": 6.357263898915531, + "learning_rate": 8.930547554889854e-06, + "loss": 17.4212, + "step": 12897 + }, + { + "epoch": 0.23576507576726927, + "grad_norm": 6.196444524082612, + "learning_rate": 8.930364587047202e-06, + "loss": 17.5793, + "step": 12898 + }, + { + "epoch": 0.2357833549637158, + "grad_norm": 6.513390266384138, + "learning_rate": 8.930181605428962e-06, + "loss": 17.5772, + "step": 12899 + }, + { + "epoch": 0.2358016341601623, + "grad_norm": 7.346719368535289, + "learning_rate": 8.929998610035777e-06, + "loss": 17.6624, + "step": 12900 + }, + { + "epoch": 0.23581991335660885, + "grad_norm": 6.863613585163401, + "learning_rate": 8.929815600868286e-06, + "loss": 17.5932, + "step": 12901 + }, + { + "epoch": 0.23583819255305538, + "grad_norm": 6.334064267736984, + "learning_rate": 8.929632577927133e-06, + "loss": 17.518, + "step": 12902 + }, + { + "epoch": 0.2358564717495019, + "grad_norm": 7.053893819976255, + "learning_rate": 8.92944954121296e-06, + "loss": 17.9189, + "step": 12903 + }, + { + "epoch": 0.23587475094594842, + "grad_norm": 7.980046746522379, + "learning_rate": 8.929266490726408e-06, + "loss": 17.9997, + "step": 12904 + }, + { + "epoch": 0.23589303014239493, + "grad_norm": 6.962348750119907, + "learning_rate": 8.929083426468117e-06, + "loss": 17.6001, + "step": 12905 + }, + { + "epoch": 0.23591130933884147, + "grad_norm": 7.442237121091488, + "learning_rate": 8.92890034843873e-06, + "loss": 17.8534, + "step": 12906 + }, + { + "epoch": 0.23592958853528798, + "grad_norm": 6.388804993595424, + "learning_rate": 8.928717256638887e-06, + "loss": 17.6104, + "step": 12907 + }, + { + "epoch": 0.2359478677317345, + "grad_norm": 5.611522836208369, + "learning_rate": 8.928534151069231e-06, + "loss": 17.0817, + "step": 12908 + }, + { + "epoch": 0.23596614692818105, + "grad_norm": 7.23191169122668, + "learning_rate": 8.928351031730405e-06, + "loss": 17.8185, + "step": 12909 + }, + { + "epoch": 0.23598442612462756, + "grad_norm": 6.204385501247323, + "learning_rate": 8.928167898623048e-06, + "loss": 17.4054, + "step": 12910 + }, + { + "epoch": 0.2360027053210741, + "grad_norm": 8.513596232673757, + "learning_rate": 8.927984751747805e-06, + "loss": 18.1255, + "step": 12911 + }, + { + "epoch": 0.2360209845175206, + "grad_norm": 6.8787495159890035, + "learning_rate": 8.927801591105314e-06, + "loss": 17.8178, + "step": 12912 + }, + { + "epoch": 0.23603926371396713, + "grad_norm": 7.396079286809878, + "learning_rate": 8.92761841669622e-06, + "loss": 17.8402, + "step": 12913 + }, + { + "epoch": 0.23605754291041367, + "grad_norm": 6.512736366078796, + "learning_rate": 8.927435228521166e-06, + "loss": 17.4054, + "step": 12914 + }, + { + "epoch": 0.23607582210686018, + "grad_norm": 7.197978482763735, + "learning_rate": 8.92725202658079e-06, + "loss": 17.8006, + "step": 12915 + }, + { + "epoch": 0.2360941013033067, + "grad_norm": 6.898895637699667, + "learning_rate": 8.927068810875739e-06, + "loss": 17.586, + "step": 12916 + }, + { + "epoch": 0.23611238049975322, + "grad_norm": 6.525968119741383, + "learning_rate": 8.92688558140665e-06, + "loss": 17.5909, + "step": 12917 + }, + { + "epoch": 0.23613065969619976, + "grad_norm": 7.4227272985848565, + "learning_rate": 8.92670233817417e-06, + "loss": 17.6597, + "step": 12918 + }, + { + "epoch": 0.2361489388926463, + "grad_norm": 5.584828933954418, + "learning_rate": 8.926519081178938e-06, + "loss": 17.2085, + "step": 12919 + }, + { + "epoch": 0.2361672180890928, + "grad_norm": 6.658550046066718, + "learning_rate": 8.926335810421598e-06, + "loss": 17.8171, + "step": 12920 + }, + { + "epoch": 0.23618549728553934, + "grad_norm": 7.349185977238887, + "learning_rate": 8.926152525902792e-06, + "loss": 18.0539, + "step": 12921 + }, + { + "epoch": 0.23620377648198584, + "grad_norm": 6.463275205700063, + "learning_rate": 8.92596922762316e-06, + "loss": 17.5902, + "step": 12922 + }, + { + "epoch": 0.23622205567843238, + "grad_norm": 8.628181653084809, + "learning_rate": 8.925785915583348e-06, + "loss": 17.5339, + "step": 12923 + }, + { + "epoch": 0.2362403348748789, + "grad_norm": 8.540834670203148, + "learning_rate": 8.925602589783996e-06, + "loss": 18.7885, + "step": 12924 + }, + { + "epoch": 0.23625861407132542, + "grad_norm": 5.297173130657822, + "learning_rate": 8.925419250225748e-06, + "loss": 16.9093, + "step": 12925 + }, + { + "epoch": 0.23627689326777196, + "grad_norm": 6.726903272632686, + "learning_rate": 8.925235896909249e-06, + "loss": 17.4664, + "step": 12926 + }, + { + "epoch": 0.23629517246421847, + "grad_norm": 6.332665712850595, + "learning_rate": 8.925052529835135e-06, + "loss": 17.659, + "step": 12927 + }, + { + "epoch": 0.236313451660665, + "grad_norm": 7.600987729347651, + "learning_rate": 8.924869149004054e-06, + "loss": 18.2163, + "step": 12928 + }, + { + "epoch": 0.2363317308571115, + "grad_norm": 5.9369767914865115, + "learning_rate": 8.924685754416647e-06, + "loss": 17.4254, + "step": 12929 + }, + { + "epoch": 0.23635001005355805, + "grad_norm": 7.368030724782585, + "learning_rate": 8.924502346073557e-06, + "loss": 17.8777, + "step": 12930 + }, + { + "epoch": 0.23636828925000458, + "grad_norm": 6.516669152894072, + "learning_rate": 8.924318923975427e-06, + "loss": 17.4103, + "step": 12931 + }, + { + "epoch": 0.2363865684464511, + "grad_norm": 7.678784942366478, + "learning_rate": 8.924135488122901e-06, + "loss": 17.7098, + "step": 12932 + }, + { + "epoch": 0.23640484764289763, + "grad_norm": 7.832311221067831, + "learning_rate": 8.923952038516618e-06, + "loss": 18.3829, + "step": 12933 + }, + { + "epoch": 0.23642312683934413, + "grad_norm": 6.78578352786296, + "learning_rate": 8.923768575157225e-06, + "loss": 17.9866, + "step": 12934 + }, + { + "epoch": 0.23644140603579067, + "grad_norm": 8.651035737802951, + "learning_rate": 8.923585098045362e-06, + "loss": 17.7727, + "step": 12935 + }, + { + "epoch": 0.2364596852322372, + "grad_norm": 13.048462594403414, + "learning_rate": 8.923401607181676e-06, + "loss": 17.1627, + "step": 12936 + }, + { + "epoch": 0.2364779644286837, + "grad_norm": 5.5618522535398345, + "learning_rate": 8.923218102566807e-06, + "loss": 17.355, + "step": 12937 + }, + { + "epoch": 0.23649624362513025, + "grad_norm": 5.53513557869465, + "learning_rate": 8.923034584201399e-06, + "loss": 17.1682, + "step": 12938 + }, + { + "epoch": 0.23651452282157676, + "grad_norm": 6.201165463046301, + "learning_rate": 8.922851052086095e-06, + "loss": 17.4474, + "step": 12939 + }, + { + "epoch": 0.2365328020180233, + "grad_norm": 6.598837901107383, + "learning_rate": 8.922667506221538e-06, + "loss": 17.4188, + "step": 12940 + }, + { + "epoch": 0.2365510812144698, + "grad_norm": 8.209426477597665, + "learning_rate": 8.922483946608373e-06, + "loss": 17.8844, + "step": 12941 + }, + { + "epoch": 0.23656936041091634, + "grad_norm": 6.694747798026469, + "learning_rate": 8.922300373247243e-06, + "loss": 17.669, + "step": 12942 + }, + { + "epoch": 0.23658763960736287, + "grad_norm": 8.050533141119972, + "learning_rate": 8.922116786138787e-06, + "loss": 18.3496, + "step": 12943 + }, + { + "epoch": 0.23660591880380938, + "grad_norm": 8.172578495070955, + "learning_rate": 8.921933185283655e-06, + "loss": 18.6932, + "step": 12944 + }, + { + "epoch": 0.23662419800025591, + "grad_norm": 6.267768583441307, + "learning_rate": 8.921749570682487e-06, + "loss": 17.3662, + "step": 12945 + }, + { + "epoch": 0.23664247719670242, + "grad_norm": 6.258043866978442, + "learning_rate": 8.921565942335926e-06, + "loss": 17.7663, + "step": 12946 + }, + { + "epoch": 0.23666075639314896, + "grad_norm": 8.38711526934771, + "learning_rate": 8.92138230024462e-06, + "loss": 18.1128, + "step": 12947 + }, + { + "epoch": 0.2366790355895955, + "grad_norm": 6.5203555330921095, + "learning_rate": 8.921198644409205e-06, + "loss": 17.3761, + "step": 12948 + }, + { + "epoch": 0.236697314786042, + "grad_norm": 7.421198378402833, + "learning_rate": 8.92101497483033e-06, + "loss": 17.9431, + "step": 12949 + }, + { + "epoch": 0.23671559398248854, + "grad_norm": 7.692947297720974, + "learning_rate": 8.92083129150864e-06, + "loss": 17.9351, + "step": 12950 + }, + { + "epoch": 0.23673387317893504, + "grad_norm": 6.8739816391982735, + "learning_rate": 8.920647594444774e-06, + "loss": 17.3881, + "step": 12951 + }, + { + "epoch": 0.23675215237538158, + "grad_norm": 10.080419192852398, + "learning_rate": 8.92046388363938e-06, + "loss": 17.6047, + "step": 12952 + }, + { + "epoch": 0.23677043157182812, + "grad_norm": 5.3604729480078825, + "learning_rate": 8.9202801590931e-06, + "loss": 17.0675, + "step": 12953 + }, + { + "epoch": 0.23678871076827462, + "grad_norm": 8.015903862743421, + "learning_rate": 8.920096420806578e-06, + "loss": 17.9858, + "step": 12954 + }, + { + "epoch": 0.23680698996472116, + "grad_norm": 7.491044387526426, + "learning_rate": 8.919912668780458e-06, + "loss": 17.679, + "step": 12955 + }, + { + "epoch": 0.23682526916116767, + "grad_norm": 8.120830097300725, + "learning_rate": 8.919728903015383e-06, + "loss": 18.2456, + "step": 12956 + }, + { + "epoch": 0.2368435483576142, + "grad_norm": 6.5733303166914725, + "learning_rate": 8.919545123512001e-06, + "loss": 17.4546, + "step": 12957 + }, + { + "epoch": 0.2368618275540607, + "grad_norm": 7.039356996593743, + "learning_rate": 8.919361330270953e-06, + "loss": 17.6232, + "step": 12958 + }, + { + "epoch": 0.23688010675050725, + "grad_norm": 5.745232514859346, + "learning_rate": 8.919177523292882e-06, + "loss": 17.0815, + "step": 12959 + }, + { + "epoch": 0.23689838594695378, + "grad_norm": 7.6415996339346, + "learning_rate": 8.918993702578435e-06, + "loss": 17.7463, + "step": 12960 + }, + { + "epoch": 0.2369166651434003, + "grad_norm": 6.798791473928604, + "learning_rate": 8.918809868128255e-06, + "loss": 17.4995, + "step": 12961 + }, + { + "epoch": 0.23693494433984683, + "grad_norm": 7.3922512658335915, + "learning_rate": 8.918626019942987e-06, + "loss": 17.8245, + "step": 12962 + }, + { + "epoch": 0.23695322353629333, + "grad_norm": 6.017111653920449, + "learning_rate": 8.918442158023272e-06, + "loss": 17.3875, + "step": 12963 + }, + { + "epoch": 0.23697150273273987, + "grad_norm": 6.253744335544148, + "learning_rate": 8.91825828236976e-06, + "loss": 17.5898, + "step": 12964 + }, + { + "epoch": 0.2369897819291864, + "grad_norm": 5.89477590318735, + "learning_rate": 8.918074392983093e-06, + "loss": 17.1829, + "step": 12965 + }, + { + "epoch": 0.2370080611256329, + "grad_norm": 6.316536465879392, + "learning_rate": 8.917890489863915e-06, + "loss": 17.3599, + "step": 12966 + }, + { + "epoch": 0.23702634032207945, + "grad_norm": 6.1109856005017855, + "learning_rate": 8.91770657301287e-06, + "loss": 17.5658, + "step": 12967 + }, + { + "epoch": 0.23704461951852596, + "grad_norm": 6.237782407754986, + "learning_rate": 8.917522642430603e-06, + "loss": 17.3666, + "step": 12968 + }, + { + "epoch": 0.2370628987149725, + "grad_norm": 5.6297601624356215, + "learning_rate": 8.91733869811776e-06, + "loss": 17.3055, + "step": 12969 + }, + { + "epoch": 0.23708117791141903, + "grad_norm": 6.639135237707821, + "learning_rate": 8.917154740074984e-06, + "loss": 17.4063, + "step": 12970 + }, + { + "epoch": 0.23709945710786554, + "grad_norm": 9.328502384457716, + "learning_rate": 8.916970768302921e-06, + "loss": 18.093, + "step": 12971 + }, + { + "epoch": 0.23711773630431207, + "grad_norm": 8.167995637422464, + "learning_rate": 8.916786782802216e-06, + "loss": 17.8628, + "step": 12972 + }, + { + "epoch": 0.23713601550075858, + "grad_norm": 6.9413891111221, + "learning_rate": 8.916602783573514e-06, + "loss": 17.4451, + "step": 12973 + }, + { + "epoch": 0.23715429469720511, + "grad_norm": 5.304652642268245, + "learning_rate": 8.916418770617457e-06, + "loss": 17.0545, + "step": 12974 + }, + { + "epoch": 0.23717257389365162, + "grad_norm": 5.701778711333664, + "learning_rate": 8.916234743934693e-06, + "loss": 17.1716, + "step": 12975 + }, + { + "epoch": 0.23719085309009816, + "grad_norm": 5.964328240908642, + "learning_rate": 8.916050703525867e-06, + "loss": 17.4856, + "step": 12976 + }, + { + "epoch": 0.2372091322865447, + "grad_norm": 7.325778422518426, + "learning_rate": 8.91586664939162e-06, + "loss": 18.1201, + "step": 12977 + }, + { + "epoch": 0.2372274114829912, + "grad_norm": 7.438311462390998, + "learning_rate": 8.915682581532604e-06, + "loss": 17.6801, + "step": 12978 + }, + { + "epoch": 0.23724569067943774, + "grad_norm": 7.920146329375921, + "learning_rate": 8.915498499949458e-06, + "loss": 17.7486, + "step": 12979 + }, + { + "epoch": 0.23726396987588425, + "grad_norm": 6.617229114538101, + "learning_rate": 8.91531440464283e-06, + "loss": 17.3874, + "step": 12980 + }, + { + "epoch": 0.23728224907233078, + "grad_norm": 13.694352843739813, + "learning_rate": 8.915130295613364e-06, + "loss": 17.8205, + "step": 12981 + }, + { + "epoch": 0.23730052826877732, + "grad_norm": 7.00801840795046, + "learning_rate": 8.914946172861707e-06, + "loss": 17.4743, + "step": 12982 + }, + { + "epoch": 0.23731880746522382, + "grad_norm": 6.043636742624499, + "learning_rate": 8.914762036388504e-06, + "loss": 17.3542, + "step": 12983 + }, + { + "epoch": 0.23733708666167036, + "grad_norm": 6.713773364585318, + "learning_rate": 8.914577886194399e-06, + "loss": 17.7853, + "step": 12984 + }, + { + "epoch": 0.23735536585811687, + "grad_norm": 8.468798535179605, + "learning_rate": 8.914393722280039e-06, + "loss": 17.895, + "step": 12985 + }, + { + "epoch": 0.2373736450545634, + "grad_norm": 6.485210633111785, + "learning_rate": 8.914209544646066e-06, + "loss": 17.6248, + "step": 12986 + }, + { + "epoch": 0.23739192425100994, + "grad_norm": 6.579365789610776, + "learning_rate": 8.914025353293132e-06, + "loss": 17.8761, + "step": 12987 + }, + { + "epoch": 0.23741020344745645, + "grad_norm": 6.270679818890554, + "learning_rate": 8.913841148221875e-06, + "loss": 17.2867, + "step": 12988 + }, + { + "epoch": 0.23742848264390298, + "grad_norm": 7.067963757021928, + "learning_rate": 8.913656929432948e-06, + "loss": 17.7708, + "step": 12989 + }, + { + "epoch": 0.2374467618403495, + "grad_norm": 6.913535836401947, + "learning_rate": 8.91347269692699e-06, + "loss": 17.6606, + "step": 12990 + }, + { + "epoch": 0.23746504103679603, + "grad_norm": 8.262615181485657, + "learning_rate": 8.913288450704653e-06, + "loss": 18.3434, + "step": 12991 + }, + { + "epoch": 0.23748332023324253, + "grad_norm": 6.780892302162567, + "learning_rate": 8.913104190766577e-06, + "loss": 17.4476, + "step": 12992 + }, + { + "epoch": 0.23750159942968907, + "grad_norm": 5.48441401148886, + "learning_rate": 8.912919917113412e-06, + "loss": 17.1764, + "step": 12993 + }, + { + "epoch": 0.2375198786261356, + "grad_norm": 8.026787375146164, + "learning_rate": 8.9127356297458e-06, + "loss": 18.0069, + "step": 12994 + }, + { + "epoch": 0.2375381578225821, + "grad_norm": 7.492718384645257, + "learning_rate": 8.912551328664392e-06, + "loss": 18.2687, + "step": 12995 + }, + { + "epoch": 0.23755643701902865, + "grad_norm": 7.000165263766671, + "learning_rate": 8.91236701386983e-06, + "loss": 17.9321, + "step": 12996 + }, + { + "epoch": 0.23757471621547516, + "grad_norm": 7.877236264654493, + "learning_rate": 8.91218268536276e-06, + "loss": 17.5602, + "step": 12997 + }, + { + "epoch": 0.2375929954119217, + "grad_norm": 6.946470021963183, + "learning_rate": 8.91199834314383e-06, + "loss": 17.5178, + "step": 12998 + }, + { + "epoch": 0.23761127460836823, + "grad_norm": 5.90998841843763, + "learning_rate": 8.911813987213685e-06, + "loss": 17.1298, + "step": 12999 + }, + { + "epoch": 0.23762955380481474, + "grad_norm": 6.551602095948039, + "learning_rate": 8.911629617572971e-06, + "loss": 17.5774, + "step": 13000 + }, + { + "epoch": 0.23764783300126127, + "grad_norm": 5.662640598811488, + "learning_rate": 8.911445234222335e-06, + "loss": 17.0583, + "step": 13001 + }, + { + "epoch": 0.23766611219770778, + "grad_norm": 7.234619571718053, + "learning_rate": 8.911260837162423e-06, + "loss": 18.1144, + "step": 13002 + }, + { + "epoch": 0.23768439139415432, + "grad_norm": 6.715066588459917, + "learning_rate": 8.911076426393881e-06, + "loss": 17.698, + "step": 13003 + }, + { + "epoch": 0.23770267059060085, + "grad_norm": 6.230777428028448, + "learning_rate": 8.910892001917357e-06, + "loss": 17.3361, + "step": 13004 + }, + { + "epoch": 0.23772094978704736, + "grad_norm": 6.988059654715737, + "learning_rate": 8.910707563733495e-06, + "loss": 17.3595, + "step": 13005 + }, + { + "epoch": 0.2377392289834939, + "grad_norm": 7.159736432205771, + "learning_rate": 8.910523111842942e-06, + "loss": 17.7156, + "step": 13006 + }, + { + "epoch": 0.2377575081799404, + "grad_norm": 7.3308498510596225, + "learning_rate": 8.910338646246344e-06, + "loss": 17.9566, + "step": 13007 + }, + { + "epoch": 0.23777578737638694, + "grad_norm": 7.0589468435521825, + "learning_rate": 8.910154166944348e-06, + "loss": 17.6011, + "step": 13008 + }, + { + "epoch": 0.23779406657283345, + "grad_norm": 6.9181011176341585, + "learning_rate": 8.909969673937603e-06, + "loss": 17.7502, + "step": 13009 + }, + { + "epoch": 0.23781234576927998, + "grad_norm": 6.4022308663041985, + "learning_rate": 8.909785167226755e-06, + "loss": 17.4139, + "step": 13010 + }, + { + "epoch": 0.23783062496572652, + "grad_norm": 6.066161465753598, + "learning_rate": 8.909600646812446e-06, + "loss": 17.4185, + "step": 13011 + }, + { + "epoch": 0.23784890416217302, + "grad_norm": 8.197393433135085, + "learning_rate": 8.909416112695327e-06, + "loss": 18.1142, + "step": 13012 + }, + { + "epoch": 0.23786718335861956, + "grad_norm": 6.369683808467332, + "learning_rate": 8.909231564876045e-06, + "loss": 17.5209, + "step": 13013 + }, + { + "epoch": 0.23788546255506607, + "grad_norm": 7.031029507963712, + "learning_rate": 8.909047003355244e-06, + "loss": 17.7567, + "step": 13014 + }, + { + "epoch": 0.2379037417515126, + "grad_norm": 7.37520805755393, + "learning_rate": 8.90886242813357e-06, + "loss": 18.1819, + "step": 13015 + }, + { + "epoch": 0.23792202094795914, + "grad_norm": 6.454543253959291, + "learning_rate": 8.908677839211677e-06, + "loss": 17.4121, + "step": 13016 + }, + { + "epoch": 0.23794030014440565, + "grad_norm": 7.113403509963748, + "learning_rate": 8.908493236590206e-06, + "loss": 17.695, + "step": 13017 + }, + { + "epoch": 0.23795857934085218, + "grad_norm": 5.791101109650206, + "learning_rate": 8.908308620269806e-06, + "loss": 17.2096, + "step": 13018 + }, + { + "epoch": 0.2379768585372987, + "grad_norm": 6.6434223453450105, + "learning_rate": 8.908123990251121e-06, + "loss": 17.7579, + "step": 13019 + }, + { + "epoch": 0.23799513773374523, + "grad_norm": 5.8890298099976075, + "learning_rate": 8.907939346534802e-06, + "loss": 17.3712, + "step": 13020 + }, + { + "epoch": 0.23801341693019176, + "grad_norm": 6.076916740522336, + "learning_rate": 8.907754689121495e-06, + "loss": 17.2601, + "step": 13021 + }, + { + "epoch": 0.23803169612663827, + "grad_norm": 6.499479833887206, + "learning_rate": 8.907570018011846e-06, + "loss": 17.448, + "step": 13022 + }, + { + "epoch": 0.2380499753230848, + "grad_norm": 6.908927066972406, + "learning_rate": 8.907385333206505e-06, + "loss": 17.8089, + "step": 13023 + }, + { + "epoch": 0.2380682545195313, + "grad_norm": 9.149487012087734, + "learning_rate": 8.907200634706116e-06, + "loss": 18.5294, + "step": 13024 + }, + { + "epoch": 0.23808653371597785, + "grad_norm": 7.6501435084001175, + "learning_rate": 8.907015922511329e-06, + "loss": 18.1171, + "step": 13025 + }, + { + "epoch": 0.23810481291242436, + "grad_norm": 6.763376327046655, + "learning_rate": 8.906831196622787e-06, + "loss": 17.5626, + "step": 13026 + }, + { + "epoch": 0.2381230921088709, + "grad_norm": 7.702371224133755, + "learning_rate": 8.906646457041144e-06, + "loss": 17.7776, + "step": 13027 + }, + { + "epoch": 0.23814137130531743, + "grad_norm": 7.977697597983092, + "learning_rate": 8.906461703767043e-06, + "loss": 18.0928, + "step": 13028 + }, + { + "epoch": 0.23815965050176394, + "grad_norm": 5.379696040297563, + "learning_rate": 8.906276936801132e-06, + "loss": 17.231, + "step": 13029 + }, + { + "epoch": 0.23817792969821047, + "grad_norm": 6.577581666634239, + "learning_rate": 8.906092156144062e-06, + "loss": 17.7253, + "step": 13030 + }, + { + "epoch": 0.23819620889465698, + "grad_norm": 8.389811716463779, + "learning_rate": 8.905907361796476e-06, + "loss": 17.9966, + "step": 13031 + }, + { + "epoch": 0.23821448809110352, + "grad_norm": 6.530808002128086, + "learning_rate": 8.905722553759023e-06, + "loss": 17.6125, + "step": 13032 + }, + { + "epoch": 0.23823276728755005, + "grad_norm": 6.515670033066622, + "learning_rate": 8.905537732032352e-06, + "loss": 17.6081, + "step": 13033 + }, + { + "epoch": 0.23825104648399656, + "grad_norm": 5.760940434326444, + "learning_rate": 8.90535289661711e-06, + "loss": 17.2928, + "step": 13034 + }, + { + "epoch": 0.2382693256804431, + "grad_norm": 8.140661054656807, + "learning_rate": 8.905168047513945e-06, + "loss": 18.464, + "step": 13035 + }, + { + "epoch": 0.2382876048768896, + "grad_norm": 5.78604602671709, + "learning_rate": 8.904983184723505e-06, + "loss": 17.4396, + "step": 13036 + }, + { + "epoch": 0.23830588407333614, + "grad_norm": 7.311959534577604, + "learning_rate": 8.90479830824644e-06, + "loss": 17.2399, + "step": 13037 + }, + { + "epoch": 0.23832416326978267, + "grad_norm": 7.868191176366132, + "learning_rate": 8.904613418083393e-06, + "loss": 17.9482, + "step": 13038 + }, + { + "epoch": 0.23834244246622918, + "grad_norm": 7.234337688278188, + "learning_rate": 8.904428514235016e-06, + "loss": 17.7567, + "step": 13039 + }, + { + "epoch": 0.23836072166267572, + "grad_norm": 6.489097226413424, + "learning_rate": 8.904243596701954e-06, + "loss": 17.5105, + "step": 13040 + }, + { + "epoch": 0.23837900085912223, + "grad_norm": 5.794715028733325, + "learning_rate": 8.904058665484859e-06, + "loss": 17.4734, + "step": 13041 + }, + { + "epoch": 0.23839728005556876, + "grad_norm": 6.8670725460927295, + "learning_rate": 8.903873720584376e-06, + "loss": 17.6704, + "step": 13042 + }, + { + "epoch": 0.23841555925201527, + "grad_norm": 6.4415121819820165, + "learning_rate": 8.903688762001154e-06, + "loss": 17.5562, + "step": 13043 + }, + { + "epoch": 0.2384338384484618, + "grad_norm": 7.108884199427061, + "learning_rate": 8.903503789735843e-06, + "loss": 17.8931, + "step": 13044 + }, + { + "epoch": 0.23845211764490834, + "grad_norm": 7.446173452101878, + "learning_rate": 8.903318803789089e-06, + "loss": 18.1885, + "step": 13045 + }, + { + "epoch": 0.23847039684135485, + "grad_norm": 7.67893773097202, + "learning_rate": 8.903133804161543e-06, + "loss": 17.9477, + "step": 13046 + }, + { + "epoch": 0.23848867603780138, + "grad_norm": 6.6717209776126865, + "learning_rate": 8.90294879085385e-06, + "loss": 17.8488, + "step": 13047 + }, + { + "epoch": 0.2385069552342479, + "grad_norm": 6.984363402847883, + "learning_rate": 8.902763763866661e-06, + "loss": 17.969, + "step": 13048 + }, + { + "epoch": 0.23852523443069443, + "grad_norm": 5.666023704553353, + "learning_rate": 8.902578723200623e-06, + "loss": 17.2028, + "step": 13049 + }, + { + "epoch": 0.23854351362714096, + "grad_norm": 6.75865032362205, + "learning_rate": 8.902393668856386e-06, + "loss": 17.4377, + "step": 13050 + }, + { + "epoch": 0.23856179282358747, + "grad_norm": 9.262343010243793, + "learning_rate": 8.902208600834596e-06, + "loss": 18.5766, + "step": 13051 + }, + { + "epoch": 0.238580072020034, + "grad_norm": 7.539999763369353, + "learning_rate": 8.902023519135906e-06, + "loss": 17.9697, + "step": 13052 + }, + { + "epoch": 0.23859835121648051, + "grad_norm": 6.9925561920139385, + "learning_rate": 8.901838423760962e-06, + "loss": 17.8635, + "step": 13053 + }, + { + "epoch": 0.23861663041292705, + "grad_norm": 9.722389640327213, + "learning_rate": 8.90165331471041e-06, + "loss": 17.665, + "step": 13054 + }, + { + "epoch": 0.23863490960937359, + "grad_norm": 7.012750000220342, + "learning_rate": 8.901468191984905e-06, + "loss": 17.9913, + "step": 13055 + }, + { + "epoch": 0.2386531888058201, + "grad_norm": 6.897659451433708, + "learning_rate": 8.901283055585091e-06, + "loss": 18.0401, + "step": 13056 + }, + { + "epoch": 0.23867146800226663, + "grad_norm": 5.730732391719401, + "learning_rate": 8.90109790551162e-06, + "loss": 17.2895, + "step": 13057 + }, + { + "epoch": 0.23868974719871314, + "grad_norm": 8.528304224384835, + "learning_rate": 8.900912741765137e-06, + "loss": 19.029, + "step": 13058 + }, + { + "epoch": 0.23870802639515967, + "grad_norm": 7.6515832604941565, + "learning_rate": 8.900727564346294e-06, + "loss": 18.3438, + "step": 13059 + }, + { + "epoch": 0.23872630559160618, + "grad_norm": 6.588949651884337, + "learning_rate": 8.90054237325574e-06, + "loss": 17.5206, + "step": 13060 + }, + { + "epoch": 0.23874458478805272, + "grad_norm": 5.966831394317411, + "learning_rate": 8.900357168494123e-06, + "loss": 17.5629, + "step": 13061 + }, + { + "epoch": 0.23876286398449925, + "grad_norm": 5.853515766728405, + "learning_rate": 8.900171950062092e-06, + "loss": 17.0216, + "step": 13062 + }, + { + "epoch": 0.23878114318094576, + "grad_norm": 5.879637566081577, + "learning_rate": 8.899986717960298e-06, + "loss": 17.4537, + "step": 13063 + }, + { + "epoch": 0.2387994223773923, + "grad_norm": 6.965208279550614, + "learning_rate": 8.899801472189389e-06, + "loss": 17.9539, + "step": 13064 + }, + { + "epoch": 0.2388177015738388, + "grad_norm": 6.346859650529671, + "learning_rate": 8.899616212750013e-06, + "loss": 17.3691, + "step": 13065 + }, + { + "epoch": 0.23883598077028534, + "grad_norm": 10.205520720902255, + "learning_rate": 8.899430939642822e-06, + "loss": 17.953, + "step": 13066 + }, + { + "epoch": 0.23885425996673187, + "grad_norm": 6.291004518877176, + "learning_rate": 8.899245652868461e-06, + "loss": 17.7778, + "step": 13067 + }, + { + "epoch": 0.23887253916317838, + "grad_norm": 7.281603008158021, + "learning_rate": 8.899060352427587e-06, + "loss": 17.8255, + "step": 13068 + }, + { + "epoch": 0.23889081835962492, + "grad_norm": 7.89320208932405, + "learning_rate": 8.898875038320842e-06, + "loss": 17.6524, + "step": 13069 + }, + { + "epoch": 0.23890909755607143, + "grad_norm": 6.083023775630701, + "learning_rate": 8.898689710548878e-06, + "loss": 17.5337, + "step": 13070 + }, + { + "epoch": 0.23892737675251796, + "grad_norm": 6.442767289991403, + "learning_rate": 8.898504369112346e-06, + "loss": 17.5752, + "step": 13071 + }, + { + "epoch": 0.2389456559489645, + "grad_norm": 5.926364232680868, + "learning_rate": 8.898319014011893e-06, + "loss": 17.4152, + "step": 13072 + }, + { + "epoch": 0.238963935145411, + "grad_norm": 5.853578651618931, + "learning_rate": 8.898133645248172e-06, + "loss": 17.361, + "step": 13073 + }, + { + "epoch": 0.23898221434185754, + "grad_norm": 8.037353131529064, + "learning_rate": 8.89794826282183e-06, + "loss": 18.3901, + "step": 13074 + }, + { + "epoch": 0.23900049353830405, + "grad_norm": 6.839033028328603, + "learning_rate": 8.897762866733516e-06, + "loss": 17.7177, + "step": 13075 + }, + { + "epoch": 0.23901877273475058, + "grad_norm": 6.458383770802645, + "learning_rate": 8.897577456983884e-06, + "loss": 17.5274, + "step": 13076 + }, + { + "epoch": 0.2390370519311971, + "grad_norm": 6.186196520678368, + "learning_rate": 8.89739203357358e-06, + "loss": 17.5473, + "step": 13077 + }, + { + "epoch": 0.23905533112764363, + "grad_norm": 6.113652323107251, + "learning_rate": 8.897206596503256e-06, + "loss": 17.2725, + "step": 13078 + }, + { + "epoch": 0.23907361032409016, + "grad_norm": 7.155653850176197, + "learning_rate": 8.89702114577356e-06, + "loss": 17.838, + "step": 13079 + }, + { + "epoch": 0.23909188952053667, + "grad_norm": 6.4961595382647825, + "learning_rate": 8.896835681385143e-06, + "loss": 17.5084, + "step": 13080 + }, + { + "epoch": 0.2391101687169832, + "grad_norm": 5.803814602285096, + "learning_rate": 8.896650203338655e-06, + "loss": 17.1967, + "step": 13081 + }, + { + "epoch": 0.23912844791342971, + "grad_norm": 6.125421174580098, + "learning_rate": 8.896464711634748e-06, + "loss": 17.5201, + "step": 13082 + }, + { + "epoch": 0.23914672710987625, + "grad_norm": 7.170635367768776, + "learning_rate": 8.896279206274069e-06, + "loss": 17.9011, + "step": 13083 + }, + { + "epoch": 0.23916500630632279, + "grad_norm": 7.9103584189316765, + "learning_rate": 8.89609368725727e-06, + "loss": 17.7902, + "step": 13084 + }, + { + "epoch": 0.2391832855027693, + "grad_norm": 5.583691209122781, + "learning_rate": 8.895908154585e-06, + "loss": 17.2785, + "step": 13085 + }, + { + "epoch": 0.23920156469921583, + "grad_norm": 6.787952200758381, + "learning_rate": 8.895722608257909e-06, + "loss": 17.717, + "step": 13086 + }, + { + "epoch": 0.23921984389566234, + "grad_norm": 6.404965713753553, + "learning_rate": 8.89553704827665e-06, + "loss": 17.1054, + "step": 13087 + }, + { + "epoch": 0.23923812309210887, + "grad_norm": 6.5079624006987205, + "learning_rate": 8.895351474641872e-06, + "loss": 17.4883, + "step": 13088 + }, + { + "epoch": 0.2392564022885554, + "grad_norm": 6.930271806626031, + "learning_rate": 8.895165887354225e-06, + "loss": 17.6584, + "step": 13089 + }, + { + "epoch": 0.23927468148500192, + "grad_norm": 7.241331026542094, + "learning_rate": 8.894980286414358e-06, + "loss": 17.7407, + "step": 13090 + }, + { + "epoch": 0.23929296068144845, + "grad_norm": 8.199361623072374, + "learning_rate": 8.894794671822924e-06, + "loss": 18.387, + "step": 13091 + }, + { + "epoch": 0.23931123987789496, + "grad_norm": 6.145782542820028, + "learning_rate": 8.894609043580573e-06, + "loss": 17.5219, + "step": 13092 + }, + { + "epoch": 0.2393295190743415, + "grad_norm": 5.964790287197087, + "learning_rate": 8.894423401687954e-06, + "loss": 17.2074, + "step": 13093 + }, + { + "epoch": 0.239347798270788, + "grad_norm": 7.531933580267063, + "learning_rate": 8.894237746145719e-06, + "loss": 18.2056, + "step": 13094 + }, + { + "epoch": 0.23936607746723454, + "grad_norm": 6.628781494873603, + "learning_rate": 8.894052076954521e-06, + "loss": 17.7886, + "step": 13095 + }, + { + "epoch": 0.23938435666368107, + "grad_norm": 7.072780596815424, + "learning_rate": 8.893866394115006e-06, + "loss": 17.6488, + "step": 13096 + }, + { + "epoch": 0.23940263586012758, + "grad_norm": 7.582250839989128, + "learning_rate": 8.893680697627829e-06, + "loss": 17.9527, + "step": 13097 + }, + { + "epoch": 0.23942091505657412, + "grad_norm": 8.00874576216099, + "learning_rate": 8.893494987493637e-06, + "loss": 18.2301, + "step": 13098 + }, + { + "epoch": 0.23943919425302063, + "grad_norm": 6.624719063862705, + "learning_rate": 8.893309263713084e-06, + "loss": 17.5278, + "step": 13099 + }, + { + "epoch": 0.23945747344946716, + "grad_norm": 6.073831096744187, + "learning_rate": 8.89312352628682e-06, + "loss": 17.3268, + "step": 13100 + }, + { + "epoch": 0.2394757526459137, + "grad_norm": 5.6543958726883945, + "learning_rate": 8.892937775215496e-06, + "loss": 17.3742, + "step": 13101 + }, + { + "epoch": 0.2394940318423602, + "grad_norm": 6.598721554417625, + "learning_rate": 8.892752010499762e-06, + "loss": 17.414, + "step": 13102 + }, + { + "epoch": 0.23951231103880674, + "grad_norm": 8.802692124687134, + "learning_rate": 8.892566232140271e-06, + "loss": 18.4354, + "step": 13103 + }, + { + "epoch": 0.23953059023525325, + "grad_norm": 7.377216148339256, + "learning_rate": 8.892380440137674e-06, + "loss": 17.6755, + "step": 13104 + }, + { + "epoch": 0.23954886943169978, + "grad_norm": 6.042696197432081, + "learning_rate": 8.892194634492619e-06, + "loss": 17.3203, + "step": 13105 + }, + { + "epoch": 0.23956714862814632, + "grad_norm": 7.389500299627137, + "learning_rate": 8.89200881520576e-06, + "loss": 18.1021, + "step": 13106 + }, + { + "epoch": 0.23958542782459283, + "grad_norm": 6.678872073304817, + "learning_rate": 8.89182298227775e-06, + "loss": 17.6422, + "step": 13107 + }, + { + "epoch": 0.23960370702103936, + "grad_norm": 6.928150862474782, + "learning_rate": 8.891637135709236e-06, + "loss": 17.8863, + "step": 13108 + }, + { + "epoch": 0.23962198621748587, + "grad_norm": 7.159367597370521, + "learning_rate": 8.891451275500872e-06, + "loss": 17.8805, + "step": 13109 + }, + { + "epoch": 0.2396402654139324, + "grad_norm": 5.940165891535454, + "learning_rate": 8.891265401653308e-06, + "loss": 17.1754, + "step": 13110 + }, + { + "epoch": 0.23965854461037892, + "grad_norm": 8.137977278209508, + "learning_rate": 8.891079514167198e-06, + "loss": 18.3722, + "step": 13111 + }, + { + "epoch": 0.23967682380682545, + "grad_norm": 6.995921613837327, + "learning_rate": 8.890893613043191e-06, + "loss": 17.7443, + "step": 13112 + }, + { + "epoch": 0.239695103003272, + "grad_norm": 6.8188755108798995, + "learning_rate": 8.890707698281941e-06, + "loss": 17.7557, + "step": 13113 + }, + { + "epoch": 0.2397133821997185, + "grad_norm": 7.630031424376271, + "learning_rate": 8.890521769884097e-06, + "loss": 18.0336, + "step": 13114 + }, + { + "epoch": 0.23973166139616503, + "grad_norm": 6.747272391453249, + "learning_rate": 8.890335827850312e-06, + "loss": 17.648, + "step": 13115 + }, + { + "epoch": 0.23974994059261154, + "grad_norm": 5.524067398388494, + "learning_rate": 8.890149872181237e-06, + "loss": 17.2013, + "step": 13116 + }, + { + "epoch": 0.23976821978905807, + "grad_norm": 7.001019747305786, + "learning_rate": 8.889963902877525e-06, + "loss": 17.6696, + "step": 13117 + }, + { + "epoch": 0.2397864989855046, + "grad_norm": 5.892740280706896, + "learning_rate": 8.889777919939827e-06, + "loss": 17.3016, + "step": 13118 + }, + { + "epoch": 0.23980477818195112, + "grad_norm": 6.4090602622592465, + "learning_rate": 8.889591923368794e-06, + "loss": 17.3847, + "step": 13119 + }, + { + "epoch": 0.23982305737839765, + "grad_norm": 6.843988185794902, + "learning_rate": 8.88940591316508e-06, + "loss": 17.6176, + "step": 13120 + }, + { + "epoch": 0.23984133657484416, + "grad_norm": 7.653232303178483, + "learning_rate": 8.889219889329337e-06, + "loss": 17.6849, + "step": 13121 + }, + { + "epoch": 0.2398596157712907, + "grad_norm": 6.222701452795798, + "learning_rate": 8.889033851862213e-06, + "loss": 17.0545, + "step": 13122 + }, + { + "epoch": 0.23987789496773723, + "grad_norm": 6.872948127642796, + "learning_rate": 8.888847800764364e-06, + "loss": 17.8578, + "step": 13123 + }, + { + "epoch": 0.23989617416418374, + "grad_norm": 6.182030799781346, + "learning_rate": 8.888661736036442e-06, + "loss": 17.5718, + "step": 13124 + }, + { + "epoch": 0.23991445336063028, + "grad_norm": 8.496963706036816, + "learning_rate": 8.888475657679096e-06, + "loss": 18.1289, + "step": 13125 + }, + { + "epoch": 0.23993273255707678, + "grad_norm": 6.461940796817892, + "learning_rate": 8.88828956569298e-06, + "loss": 17.4589, + "step": 13126 + }, + { + "epoch": 0.23995101175352332, + "grad_norm": 7.833861150936122, + "learning_rate": 8.88810346007875e-06, + "loss": 17.9519, + "step": 13127 + }, + { + "epoch": 0.23996929094996983, + "grad_norm": 7.524091408967035, + "learning_rate": 8.88791734083705e-06, + "loss": 18.0065, + "step": 13128 + }, + { + "epoch": 0.23998757014641636, + "grad_norm": 7.119184391647039, + "learning_rate": 8.887731207968541e-06, + "loss": 17.6692, + "step": 13129 + }, + { + "epoch": 0.2400058493428629, + "grad_norm": 7.258629108754563, + "learning_rate": 8.88754506147387e-06, + "loss": 18.0561, + "step": 13130 + }, + { + "epoch": 0.2400241285393094, + "grad_norm": 6.744296533148103, + "learning_rate": 8.887358901353691e-06, + "loss": 17.7762, + "step": 13131 + }, + { + "epoch": 0.24004240773575594, + "grad_norm": 6.343742431230002, + "learning_rate": 8.887172727608656e-06, + "loss": 17.8432, + "step": 13132 + }, + { + "epoch": 0.24006068693220245, + "grad_norm": 7.908862263357061, + "learning_rate": 8.886986540239418e-06, + "loss": 17.9585, + "step": 13133 + }, + { + "epoch": 0.24007896612864899, + "grad_norm": 6.879068165449237, + "learning_rate": 8.88680033924663e-06, + "loss": 18.0544, + "step": 13134 + }, + { + "epoch": 0.24009724532509552, + "grad_norm": 6.480055404882929, + "learning_rate": 8.886614124630944e-06, + "loss": 17.5558, + "step": 13135 + }, + { + "epoch": 0.24011552452154203, + "grad_norm": 7.085668774874577, + "learning_rate": 8.886427896393014e-06, + "loss": 17.843, + "step": 13136 + }, + { + "epoch": 0.24013380371798856, + "grad_norm": 6.890374086423882, + "learning_rate": 8.886241654533489e-06, + "loss": 17.7323, + "step": 13137 + }, + { + "epoch": 0.24015208291443507, + "grad_norm": 5.987956008528032, + "learning_rate": 8.886055399053023e-06, + "loss": 17.2639, + "step": 13138 + }, + { + "epoch": 0.2401703621108816, + "grad_norm": 6.744770214996133, + "learning_rate": 8.885869129952273e-06, + "loss": 17.5546, + "step": 13139 + }, + { + "epoch": 0.24018864130732814, + "grad_norm": 6.60720983685564, + "learning_rate": 8.88568284723189e-06, + "loss": 17.6936, + "step": 13140 + }, + { + "epoch": 0.24020692050377465, + "grad_norm": 7.8990858114798845, + "learning_rate": 8.885496550892523e-06, + "loss": 18.0575, + "step": 13141 + }, + { + "epoch": 0.2402251997002212, + "grad_norm": 7.336673911300776, + "learning_rate": 8.885310240934829e-06, + "loss": 17.7049, + "step": 13142 + }, + { + "epoch": 0.2402434788966677, + "grad_norm": 7.110353962766776, + "learning_rate": 8.885123917359459e-06, + "loss": 17.8205, + "step": 13143 + }, + { + "epoch": 0.24026175809311423, + "grad_norm": 7.2817624107343555, + "learning_rate": 8.884937580167069e-06, + "loss": 18.0881, + "step": 13144 + }, + { + "epoch": 0.24028003728956074, + "grad_norm": 6.346453683807578, + "learning_rate": 8.884751229358307e-06, + "loss": 17.5181, + "step": 13145 + }, + { + "epoch": 0.24029831648600727, + "grad_norm": 9.293193205765766, + "learning_rate": 8.884564864933831e-06, + "loss": 18.3708, + "step": 13146 + }, + { + "epoch": 0.2403165956824538, + "grad_norm": 8.133346329073769, + "learning_rate": 8.884378486894292e-06, + "loss": 18.2355, + "step": 13147 + }, + { + "epoch": 0.24033487487890032, + "grad_norm": 7.132364080017355, + "learning_rate": 8.884192095240342e-06, + "loss": 17.7903, + "step": 13148 + }, + { + "epoch": 0.24035315407534685, + "grad_norm": 8.38705214516052, + "learning_rate": 8.884005689972638e-06, + "loss": 18.0754, + "step": 13149 + }, + { + "epoch": 0.24037143327179336, + "grad_norm": 6.907929538270149, + "learning_rate": 8.883819271091829e-06, + "loss": 17.922, + "step": 13150 + }, + { + "epoch": 0.2403897124682399, + "grad_norm": 6.488311848475475, + "learning_rate": 8.883632838598571e-06, + "loss": 17.4053, + "step": 13151 + }, + { + "epoch": 0.24040799166468643, + "grad_norm": 5.511859400811804, + "learning_rate": 8.883446392493517e-06, + "loss": 17.0804, + "step": 13152 + }, + { + "epoch": 0.24042627086113294, + "grad_norm": 7.59345289195484, + "learning_rate": 8.883259932777321e-06, + "loss": 18.2424, + "step": 13153 + }, + { + "epoch": 0.24044455005757948, + "grad_norm": 8.42928295970774, + "learning_rate": 8.883073459450634e-06, + "loss": 18.2357, + "step": 13154 + }, + { + "epoch": 0.24046282925402598, + "grad_norm": 6.6186807129970635, + "learning_rate": 8.882886972514115e-06, + "loss": 17.8616, + "step": 13155 + }, + { + "epoch": 0.24048110845047252, + "grad_norm": 5.684230765944608, + "learning_rate": 8.88270047196841e-06, + "loss": 16.9709, + "step": 13156 + }, + { + "epoch": 0.24049938764691906, + "grad_norm": 7.766390927007206, + "learning_rate": 8.882513957814181e-06, + "loss": 18.3054, + "step": 13157 + }, + { + "epoch": 0.24051766684336556, + "grad_norm": 8.166219961211254, + "learning_rate": 8.882327430052073e-06, + "loss": 17.8959, + "step": 13158 + }, + { + "epoch": 0.2405359460398121, + "grad_norm": 7.978006879651882, + "learning_rate": 8.882140888682749e-06, + "loss": 18.1603, + "step": 13159 + }, + { + "epoch": 0.2405542252362586, + "grad_norm": 4.8592994633480835, + "learning_rate": 8.881954333706854e-06, + "loss": 16.9342, + "step": 13160 + }, + { + "epoch": 0.24057250443270514, + "grad_norm": 6.951007142144269, + "learning_rate": 8.88176776512505e-06, + "loss": 17.8005, + "step": 13161 + }, + { + "epoch": 0.24059078362915165, + "grad_norm": 6.92737058163528, + "learning_rate": 8.881581182937983e-06, + "loss": 17.4765, + "step": 13162 + }, + { + "epoch": 0.24060906282559819, + "grad_norm": 6.316452115981913, + "learning_rate": 8.881394587146313e-06, + "loss": 17.4542, + "step": 13163 + }, + { + "epoch": 0.24062734202204472, + "grad_norm": 6.695363778855367, + "learning_rate": 8.88120797775069e-06, + "loss": 17.319, + "step": 13164 + }, + { + "epoch": 0.24064562121849123, + "grad_norm": 7.128956621944359, + "learning_rate": 8.881021354751772e-06, + "loss": 17.9394, + "step": 13165 + }, + { + "epoch": 0.24066390041493776, + "grad_norm": 6.611792036140452, + "learning_rate": 8.880834718150209e-06, + "loss": 17.4905, + "step": 13166 + }, + { + "epoch": 0.24068217961138427, + "grad_norm": 7.183528477062133, + "learning_rate": 8.880648067946658e-06, + "loss": 17.7129, + "step": 13167 + }, + { + "epoch": 0.2407004588078308, + "grad_norm": 8.30278988075915, + "learning_rate": 8.880461404141771e-06, + "loss": 18.2827, + "step": 13168 + }, + { + "epoch": 0.24071873800427734, + "grad_norm": 8.699138208964596, + "learning_rate": 8.880274726736204e-06, + "loss": 18.3182, + "step": 13169 + }, + { + "epoch": 0.24073701720072385, + "grad_norm": 6.688449049955284, + "learning_rate": 8.880088035730611e-06, + "loss": 17.6063, + "step": 13170 + }, + { + "epoch": 0.2407552963971704, + "grad_norm": 6.367619050050093, + "learning_rate": 8.879901331125649e-06, + "loss": 17.463, + "step": 13171 + }, + { + "epoch": 0.2407735755936169, + "grad_norm": 4.951303404699288, + "learning_rate": 8.879714612921966e-06, + "loss": 16.8762, + "step": 13172 + }, + { + "epoch": 0.24079185479006343, + "grad_norm": 7.467414625288263, + "learning_rate": 8.879527881120222e-06, + "loss": 18.2681, + "step": 13173 + }, + { + "epoch": 0.24081013398650997, + "grad_norm": 7.872630041482053, + "learning_rate": 8.879341135721067e-06, + "loss": 18.0894, + "step": 13174 + }, + { + "epoch": 0.24082841318295647, + "grad_norm": 7.730099311866059, + "learning_rate": 8.87915437672516e-06, + "loss": 17.6484, + "step": 13175 + }, + { + "epoch": 0.240846692379403, + "grad_norm": 5.927060754664423, + "learning_rate": 8.878967604133153e-06, + "loss": 17.2848, + "step": 13176 + }, + { + "epoch": 0.24086497157584952, + "grad_norm": 7.020997094443355, + "learning_rate": 8.878780817945701e-06, + "loss": 18.143, + "step": 13177 + }, + { + "epoch": 0.24088325077229605, + "grad_norm": 5.337990125998545, + "learning_rate": 8.87859401816346e-06, + "loss": 17.156, + "step": 13178 + }, + { + "epoch": 0.24090152996874256, + "grad_norm": 7.799283810345035, + "learning_rate": 8.87840720478708e-06, + "loss": 18.2052, + "step": 13179 + }, + { + "epoch": 0.2409198091651891, + "grad_norm": 7.465493378785632, + "learning_rate": 8.878220377817222e-06, + "loss": 18.0285, + "step": 13180 + }, + { + "epoch": 0.24093808836163563, + "grad_norm": 6.178996919767709, + "learning_rate": 8.878033537254537e-06, + "loss": 17.4663, + "step": 13181 + }, + { + "epoch": 0.24095636755808214, + "grad_norm": 5.900119250643999, + "learning_rate": 8.877846683099684e-06, + "loss": 17.1177, + "step": 13182 + }, + { + "epoch": 0.24097464675452868, + "grad_norm": 8.19533820450328, + "learning_rate": 8.877659815353313e-06, + "loss": 17.7, + "step": 13183 + }, + { + "epoch": 0.24099292595097518, + "grad_norm": 6.52315300706894, + "learning_rate": 8.87747293401608e-06, + "loss": 17.7248, + "step": 13184 + }, + { + "epoch": 0.24101120514742172, + "grad_norm": 6.058365530682624, + "learning_rate": 8.877286039088642e-06, + "loss": 17.0972, + "step": 13185 + }, + { + "epoch": 0.24102948434386826, + "grad_norm": 7.314506776774982, + "learning_rate": 8.87709913057165e-06, + "loss": 17.7921, + "step": 13186 + }, + { + "epoch": 0.24104776354031476, + "grad_norm": 6.62011695634416, + "learning_rate": 8.876912208465765e-06, + "loss": 17.7254, + "step": 13187 + }, + { + "epoch": 0.2410660427367613, + "grad_norm": 7.241284419157164, + "learning_rate": 8.876725272771639e-06, + "loss": 17.8527, + "step": 13188 + }, + { + "epoch": 0.2410843219332078, + "grad_norm": 6.002350376615827, + "learning_rate": 8.876538323489925e-06, + "loss": 17.4492, + "step": 13189 + }, + { + "epoch": 0.24110260112965434, + "grad_norm": 6.5183420354195585, + "learning_rate": 8.876351360621283e-06, + "loss": 17.6424, + "step": 13190 + }, + { + "epoch": 0.24112088032610088, + "grad_norm": 7.2924557115800726, + "learning_rate": 8.876164384166365e-06, + "loss": 17.5475, + "step": 13191 + }, + { + "epoch": 0.24113915952254739, + "grad_norm": 6.838292540672922, + "learning_rate": 8.875977394125828e-06, + "loss": 17.7877, + "step": 13192 + }, + { + "epoch": 0.24115743871899392, + "grad_norm": 7.474611917261855, + "learning_rate": 8.875790390500325e-06, + "loss": 17.638, + "step": 13193 + }, + { + "epoch": 0.24117571791544043, + "grad_norm": 6.373800063567433, + "learning_rate": 8.875603373290515e-06, + "loss": 17.3922, + "step": 13194 + }, + { + "epoch": 0.24119399711188697, + "grad_norm": 6.205982421908158, + "learning_rate": 8.875416342497049e-06, + "loss": 17.4431, + "step": 13195 + }, + { + "epoch": 0.24121227630833347, + "grad_norm": 6.899377166810302, + "learning_rate": 8.875229298120587e-06, + "loss": 17.8066, + "step": 13196 + }, + { + "epoch": 0.24123055550478, + "grad_norm": 7.718711203766756, + "learning_rate": 8.875042240161781e-06, + "loss": 17.9228, + "step": 13197 + }, + { + "epoch": 0.24124883470122654, + "grad_norm": 7.243355889180906, + "learning_rate": 8.87485516862129e-06, + "loss": 17.8304, + "step": 13198 + }, + { + "epoch": 0.24126711389767305, + "grad_norm": 7.233421254394535, + "learning_rate": 8.874668083499767e-06, + "loss": 17.9284, + "step": 13199 + }, + { + "epoch": 0.2412853930941196, + "grad_norm": 7.8699390802999725, + "learning_rate": 8.874480984797869e-06, + "loss": 17.888, + "step": 13200 + }, + { + "epoch": 0.2413036722905661, + "grad_norm": 7.071979508139762, + "learning_rate": 8.87429387251625e-06, + "loss": 17.6058, + "step": 13201 + }, + { + "epoch": 0.24132195148701263, + "grad_norm": 6.285841114328047, + "learning_rate": 8.874106746655569e-06, + "loss": 17.1742, + "step": 13202 + }, + { + "epoch": 0.24134023068345917, + "grad_norm": 6.578547586790232, + "learning_rate": 8.873919607216478e-06, + "loss": 17.5148, + "step": 13203 + }, + { + "epoch": 0.24135850987990567, + "grad_norm": 5.450388431257817, + "learning_rate": 8.873732454199638e-06, + "loss": 17.0852, + "step": 13204 + }, + { + "epoch": 0.2413767890763522, + "grad_norm": 7.313273339527103, + "learning_rate": 8.873545287605701e-06, + "loss": 18.157, + "step": 13205 + }, + { + "epoch": 0.24139506827279872, + "grad_norm": 6.348646613613565, + "learning_rate": 8.873358107435322e-06, + "loss": 17.5325, + "step": 13206 + }, + { + "epoch": 0.24141334746924525, + "grad_norm": 6.498333129874512, + "learning_rate": 8.87317091368916e-06, + "loss": 17.3581, + "step": 13207 + }, + { + "epoch": 0.2414316266656918, + "grad_norm": 6.580875241650784, + "learning_rate": 8.87298370636787e-06, + "loss": 17.6637, + "step": 13208 + }, + { + "epoch": 0.2414499058621383, + "grad_norm": 6.1743400167801035, + "learning_rate": 8.872796485472109e-06, + "loss": 17.4734, + "step": 13209 + }, + { + "epoch": 0.24146818505858483, + "grad_norm": 6.9045866598215335, + "learning_rate": 8.87260925100253e-06, + "loss": 17.6767, + "step": 13210 + }, + { + "epoch": 0.24148646425503134, + "grad_norm": 7.419330066661811, + "learning_rate": 8.872422002959792e-06, + "loss": 17.8789, + "step": 13211 + }, + { + "epoch": 0.24150474345147788, + "grad_norm": 6.687812864967737, + "learning_rate": 8.872234741344553e-06, + "loss": 17.4704, + "step": 13212 + }, + { + "epoch": 0.24152302264792438, + "grad_norm": 7.046758839553343, + "learning_rate": 8.872047466157467e-06, + "loss": 17.7868, + "step": 13213 + }, + { + "epoch": 0.24154130184437092, + "grad_norm": 8.749597595690418, + "learning_rate": 8.87186017739919e-06, + "loss": 18.1431, + "step": 13214 + }, + { + "epoch": 0.24155958104081746, + "grad_norm": 7.802112272264556, + "learning_rate": 8.871672875070378e-06, + "loss": 17.8983, + "step": 13215 + }, + { + "epoch": 0.24157786023726396, + "grad_norm": 6.5238253261698205, + "learning_rate": 8.87148555917169e-06, + "loss": 17.4987, + "step": 13216 + }, + { + "epoch": 0.2415961394337105, + "grad_norm": 8.130165120182328, + "learning_rate": 8.87129822970378e-06, + "loss": 18.2404, + "step": 13217 + }, + { + "epoch": 0.241614418630157, + "grad_norm": 5.819613447220564, + "learning_rate": 8.871110886667307e-06, + "loss": 17.2527, + "step": 13218 + }, + { + "epoch": 0.24163269782660354, + "grad_norm": 6.428250895224231, + "learning_rate": 8.870923530062925e-06, + "loss": 17.473, + "step": 13219 + }, + { + "epoch": 0.24165097702305008, + "grad_norm": 9.147063504535623, + "learning_rate": 8.870736159891294e-06, + "loss": 18.2164, + "step": 13220 + }, + { + "epoch": 0.2416692562194966, + "grad_norm": 6.396228529713454, + "learning_rate": 8.870548776153066e-06, + "loss": 17.3975, + "step": 13221 + }, + { + "epoch": 0.24168753541594312, + "grad_norm": 5.741575381771392, + "learning_rate": 8.870361378848902e-06, + "loss": 17.2317, + "step": 13222 + }, + { + "epoch": 0.24170581461238963, + "grad_norm": 6.002851179289545, + "learning_rate": 8.870173967979457e-06, + "loss": 17.4199, + "step": 13223 + }, + { + "epoch": 0.24172409380883617, + "grad_norm": 6.375433046931361, + "learning_rate": 8.869986543545386e-06, + "loss": 17.4255, + "step": 13224 + }, + { + "epoch": 0.2417423730052827, + "grad_norm": 6.964841275569278, + "learning_rate": 8.869799105547349e-06, + "loss": 17.7085, + "step": 13225 + }, + { + "epoch": 0.2417606522017292, + "grad_norm": 6.189593330134313, + "learning_rate": 8.869611653986004e-06, + "loss": 17.3318, + "step": 13226 + }, + { + "epoch": 0.24177893139817574, + "grad_norm": 6.558356271458293, + "learning_rate": 8.869424188862005e-06, + "loss": 17.6748, + "step": 13227 + }, + { + "epoch": 0.24179721059462225, + "grad_norm": 7.597312531011783, + "learning_rate": 8.869236710176011e-06, + "loss": 18.17, + "step": 13228 + }, + { + "epoch": 0.2418154897910688, + "grad_norm": 8.952541142696836, + "learning_rate": 8.869049217928675e-06, + "loss": 18.4476, + "step": 13229 + }, + { + "epoch": 0.2418337689875153, + "grad_norm": 5.988686992748572, + "learning_rate": 8.86886171212066e-06, + "loss": 17.1655, + "step": 13230 + }, + { + "epoch": 0.24185204818396183, + "grad_norm": 6.461742159027759, + "learning_rate": 8.868674192752617e-06, + "loss": 17.3513, + "step": 13231 + }, + { + "epoch": 0.24187032738040837, + "grad_norm": 6.431905079479456, + "learning_rate": 8.86848665982521e-06, + "loss": 17.3626, + "step": 13232 + }, + { + "epoch": 0.24188860657685488, + "grad_norm": 7.974793074328844, + "learning_rate": 8.868299113339093e-06, + "loss": 17.9988, + "step": 13233 + }, + { + "epoch": 0.2419068857733014, + "grad_norm": 7.704360688764585, + "learning_rate": 8.868111553294922e-06, + "loss": 18.0645, + "step": 13234 + }, + { + "epoch": 0.24192516496974792, + "grad_norm": 5.54608407310233, + "learning_rate": 8.867923979693355e-06, + "loss": 17.1499, + "step": 13235 + }, + { + "epoch": 0.24194344416619445, + "grad_norm": 7.503894412720114, + "learning_rate": 8.867736392535051e-06, + "loss": 17.714, + "step": 13236 + }, + { + "epoch": 0.241961723362641, + "grad_norm": 6.077081447168073, + "learning_rate": 8.867548791820669e-06, + "loss": 17.3986, + "step": 13237 + }, + { + "epoch": 0.2419800025590875, + "grad_norm": 7.31575454844323, + "learning_rate": 8.86736117755086e-06, + "loss": 18.2215, + "step": 13238 + }, + { + "epoch": 0.24199828175553403, + "grad_norm": 7.196479914345386, + "learning_rate": 8.867173549726288e-06, + "loss": 17.7561, + "step": 13239 + }, + { + "epoch": 0.24201656095198054, + "grad_norm": 6.805207069244284, + "learning_rate": 8.866985908347608e-06, + "loss": 17.7114, + "step": 13240 + }, + { + "epoch": 0.24203484014842708, + "grad_norm": 5.460241965285942, + "learning_rate": 8.866798253415477e-06, + "loss": 17.1375, + "step": 13241 + }, + { + "epoch": 0.2420531193448736, + "grad_norm": 6.996102130263047, + "learning_rate": 8.866610584930555e-06, + "loss": 17.9595, + "step": 13242 + }, + { + "epoch": 0.24207139854132012, + "grad_norm": 6.57622767178465, + "learning_rate": 8.866422902893497e-06, + "loss": 17.8044, + "step": 13243 + }, + { + "epoch": 0.24208967773776666, + "grad_norm": 7.344064393129531, + "learning_rate": 8.866235207304963e-06, + "loss": 17.5162, + "step": 13244 + }, + { + "epoch": 0.24210795693421316, + "grad_norm": 6.509075212063092, + "learning_rate": 8.86604749816561e-06, + "loss": 17.4359, + "step": 13245 + }, + { + "epoch": 0.2421262361306597, + "grad_norm": 6.909143099713661, + "learning_rate": 8.865859775476096e-06, + "loss": 17.8215, + "step": 13246 + }, + { + "epoch": 0.2421445153271062, + "grad_norm": 6.543960356519501, + "learning_rate": 8.865672039237079e-06, + "loss": 17.7186, + "step": 13247 + }, + { + "epoch": 0.24216279452355274, + "grad_norm": 6.254846658838982, + "learning_rate": 8.865484289449218e-06, + "loss": 17.3325, + "step": 13248 + }, + { + "epoch": 0.24218107371999928, + "grad_norm": 7.641910661522797, + "learning_rate": 8.865296526113167e-06, + "loss": 17.8863, + "step": 13249 + }, + { + "epoch": 0.2421993529164458, + "grad_norm": 5.761708675709654, + "learning_rate": 8.86510874922959e-06, + "loss": 17.3231, + "step": 13250 + }, + { + "epoch": 0.24221763211289232, + "grad_norm": 7.396955615086446, + "learning_rate": 8.864920958799141e-06, + "loss": 18.1548, + "step": 13251 + }, + { + "epoch": 0.24223591130933883, + "grad_norm": 6.385589888431112, + "learning_rate": 8.86473315482248e-06, + "loss": 17.4871, + "step": 13252 + }, + { + "epoch": 0.24225419050578537, + "grad_norm": 7.515137772539918, + "learning_rate": 8.864545337300264e-06, + "loss": 18.0275, + "step": 13253 + }, + { + "epoch": 0.2422724697022319, + "grad_norm": 7.713963158824052, + "learning_rate": 8.864357506233153e-06, + "loss": 17.8809, + "step": 13254 + }, + { + "epoch": 0.2422907488986784, + "grad_norm": 7.668300760106162, + "learning_rate": 8.864169661621803e-06, + "loss": 17.8319, + "step": 13255 + }, + { + "epoch": 0.24230902809512495, + "grad_norm": 7.517969916661936, + "learning_rate": 8.863981803466875e-06, + "loss": 17.7307, + "step": 13256 + }, + { + "epoch": 0.24232730729157145, + "grad_norm": 6.106088600959224, + "learning_rate": 8.863793931769024e-06, + "loss": 17.4672, + "step": 13257 + }, + { + "epoch": 0.242345586488018, + "grad_norm": 8.690410270027261, + "learning_rate": 8.863606046528911e-06, + "loss": 17.8399, + "step": 13258 + }, + { + "epoch": 0.24236386568446452, + "grad_norm": 5.9146146761704514, + "learning_rate": 8.863418147747196e-06, + "loss": 17.5012, + "step": 13259 + }, + { + "epoch": 0.24238214488091103, + "grad_norm": 6.576900680122438, + "learning_rate": 8.863230235424536e-06, + "loss": 17.61, + "step": 13260 + }, + { + "epoch": 0.24240042407735757, + "grad_norm": 6.039999456418752, + "learning_rate": 8.863042309561587e-06, + "loss": 17.3064, + "step": 13261 + }, + { + "epoch": 0.24241870327380408, + "grad_norm": 6.466453689308442, + "learning_rate": 8.86285437015901e-06, + "loss": 17.4131, + "step": 13262 + }, + { + "epoch": 0.2424369824702506, + "grad_norm": 8.318876179177755, + "learning_rate": 8.862666417217465e-06, + "loss": 18.016, + "step": 13263 + }, + { + "epoch": 0.24245526166669712, + "grad_norm": 6.039915497893999, + "learning_rate": 8.862478450737609e-06, + "loss": 17.3117, + "step": 13264 + }, + { + "epoch": 0.24247354086314366, + "grad_norm": 7.369563502714089, + "learning_rate": 8.862290470720101e-06, + "loss": 17.8927, + "step": 13265 + }, + { + "epoch": 0.2424918200595902, + "grad_norm": 6.822151712253876, + "learning_rate": 8.862102477165599e-06, + "loss": 17.4322, + "step": 13266 + }, + { + "epoch": 0.2425100992560367, + "grad_norm": 6.346122680891925, + "learning_rate": 8.861914470074765e-06, + "loss": 17.6086, + "step": 13267 + }, + { + "epoch": 0.24252837845248323, + "grad_norm": 7.771126110246708, + "learning_rate": 8.861726449448255e-06, + "loss": 18.0334, + "step": 13268 + }, + { + "epoch": 0.24254665764892974, + "grad_norm": 7.316897383884866, + "learning_rate": 8.861538415286727e-06, + "loss": 17.8483, + "step": 13269 + }, + { + "epoch": 0.24256493684537628, + "grad_norm": 7.346397828902137, + "learning_rate": 8.861350367590845e-06, + "loss": 17.6931, + "step": 13270 + }, + { + "epoch": 0.2425832160418228, + "grad_norm": 7.626975587403114, + "learning_rate": 8.861162306361263e-06, + "loss": 17.9159, + "step": 13271 + }, + { + "epoch": 0.24260149523826932, + "grad_norm": 6.737947123073347, + "learning_rate": 8.860974231598645e-06, + "loss": 17.3233, + "step": 13272 + }, + { + "epoch": 0.24261977443471586, + "grad_norm": 7.72127351728017, + "learning_rate": 8.860786143303645e-06, + "loss": 17.8567, + "step": 13273 + }, + { + "epoch": 0.24263805363116236, + "grad_norm": 5.858346090633954, + "learning_rate": 8.860598041476924e-06, + "loss": 17.2809, + "step": 13274 + }, + { + "epoch": 0.2426563328276089, + "grad_norm": 6.938778930396184, + "learning_rate": 8.860409926119142e-06, + "loss": 17.655, + "step": 13275 + }, + { + "epoch": 0.24267461202405544, + "grad_norm": 7.698228884170294, + "learning_rate": 8.86022179723096e-06, + "loss": 17.669, + "step": 13276 + }, + { + "epoch": 0.24269289122050194, + "grad_norm": 5.775058615370471, + "learning_rate": 8.860033654813033e-06, + "loss": 17.0366, + "step": 13277 + }, + { + "epoch": 0.24271117041694848, + "grad_norm": 6.537461079637749, + "learning_rate": 8.859845498866027e-06, + "loss": 17.527, + "step": 13278 + }, + { + "epoch": 0.242729449613395, + "grad_norm": 5.8875767795931475, + "learning_rate": 8.859657329390595e-06, + "loss": 17.2091, + "step": 13279 + }, + { + "epoch": 0.24274772880984152, + "grad_norm": 8.352655247672722, + "learning_rate": 8.859469146387399e-06, + "loss": 18.5289, + "step": 13280 + }, + { + "epoch": 0.24276600800628803, + "grad_norm": 7.035080451229858, + "learning_rate": 8.859280949857098e-06, + "loss": 17.9262, + "step": 13281 + }, + { + "epoch": 0.24278428720273457, + "grad_norm": 7.949003052893934, + "learning_rate": 8.859092739800353e-06, + "loss": 17.8076, + "step": 13282 + }, + { + "epoch": 0.2428025663991811, + "grad_norm": 6.352949564202136, + "learning_rate": 8.858904516217821e-06, + "loss": 17.4697, + "step": 13283 + }, + { + "epoch": 0.2428208455956276, + "grad_norm": 6.412127243721164, + "learning_rate": 8.858716279110166e-06, + "loss": 17.2088, + "step": 13284 + }, + { + "epoch": 0.24283912479207415, + "grad_norm": 7.182949874929911, + "learning_rate": 8.858528028478044e-06, + "loss": 17.8308, + "step": 13285 + }, + { + "epoch": 0.24285740398852065, + "grad_norm": 6.6943952253064865, + "learning_rate": 8.858339764322118e-06, + "loss": 17.285, + "step": 13286 + }, + { + "epoch": 0.2428756831849672, + "grad_norm": 7.803722863692137, + "learning_rate": 8.858151486643043e-06, + "loss": 18.2402, + "step": 13287 + }, + { + "epoch": 0.24289396238141372, + "grad_norm": 5.810079677159859, + "learning_rate": 8.857963195441483e-06, + "loss": 17.1884, + "step": 13288 + }, + { + "epoch": 0.24291224157786023, + "grad_norm": 8.499056624784934, + "learning_rate": 8.857774890718098e-06, + "loss": 18.0358, + "step": 13289 + }, + { + "epoch": 0.24293052077430677, + "grad_norm": 5.607226767592612, + "learning_rate": 8.857586572473544e-06, + "loss": 16.9867, + "step": 13290 + }, + { + "epoch": 0.24294879997075328, + "grad_norm": 7.082798162482926, + "learning_rate": 8.857398240708487e-06, + "loss": 17.9252, + "step": 13291 + }, + { + "epoch": 0.2429670791671998, + "grad_norm": 6.928844891573724, + "learning_rate": 8.857209895423582e-06, + "loss": 17.8962, + "step": 13292 + }, + { + "epoch": 0.24298535836364635, + "grad_norm": 6.79816833825027, + "learning_rate": 8.857021536619493e-06, + "loss": 17.7566, + "step": 13293 + }, + { + "epoch": 0.24300363756009286, + "grad_norm": 8.708056121991014, + "learning_rate": 8.856833164296877e-06, + "loss": 17.7842, + "step": 13294 + }, + { + "epoch": 0.2430219167565394, + "grad_norm": 6.249069389945501, + "learning_rate": 8.856644778456394e-06, + "loss": 17.4322, + "step": 13295 + }, + { + "epoch": 0.2430401959529859, + "grad_norm": 5.2644819088888015, + "learning_rate": 8.856456379098707e-06, + "loss": 16.7531, + "step": 13296 + }, + { + "epoch": 0.24305847514943243, + "grad_norm": 6.690942598412263, + "learning_rate": 8.856267966224474e-06, + "loss": 17.544, + "step": 13297 + }, + { + "epoch": 0.24307675434587894, + "grad_norm": 8.679717368008, + "learning_rate": 8.856079539834357e-06, + "loss": 18.7665, + "step": 13298 + }, + { + "epoch": 0.24309503354232548, + "grad_norm": 6.965027228531672, + "learning_rate": 8.855891099929017e-06, + "loss": 17.8125, + "step": 13299 + }, + { + "epoch": 0.243113312738772, + "grad_norm": 6.9312897841704615, + "learning_rate": 8.855702646509113e-06, + "loss": 17.747, + "step": 13300 + }, + { + "epoch": 0.24313159193521852, + "grad_norm": 7.540184567771135, + "learning_rate": 8.855514179575305e-06, + "loss": 18.2535, + "step": 13301 + }, + { + "epoch": 0.24314987113166506, + "grad_norm": 6.7747784609436135, + "learning_rate": 8.855325699128255e-06, + "loss": 17.7905, + "step": 13302 + }, + { + "epoch": 0.24316815032811157, + "grad_norm": 6.1467852029172985, + "learning_rate": 8.855137205168623e-06, + "loss": 17.6317, + "step": 13303 + }, + { + "epoch": 0.2431864295245581, + "grad_norm": 6.450889722139722, + "learning_rate": 8.854948697697068e-06, + "loss": 17.5033, + "step": 13304 + }, + { + "epoch": 0.24320470872100464, + "grad_norm": 6.690876513263371, + "learning_rate": 8.854760176714254e-06, + "loss": 17.7083, + "step": 13305 + }, + { + "epoch": 0.24322298791745114, + "grad_norm": 7.881204537273559, + "learning_rate": 8.854571642220839e-06, + "loss": 17.9876, + "step": 13306 + }, + { + "epoch": 0.24324126711389768, + "grad_norm": 6.481490924340865, + "learning_rate": 8.854383094217485e-06, + "loss": 17.5346, + "step": 13307 + }, + { + "epoch": 0.2432595463103442, + "grad_norm": 6.515330290429976, + "learning_rate": 8.854194532704854e-06, + "loss": 17.0989, + "step": 13308 + }, + { + "epoch": 0.24327782550679072, + "grad_norm": 6.822772907358152, + "learning_rate": 8.854005957683604e-06, + "loss": 17.7118, + "step": 13309 + }, + { + "epoch": 0.24329610470323726, + "grad_norm": 7.0022039778280885, + "learning_rate": 8.8538173691544e-06, + "loss": 17.6289, + "step": 13310 + }, + { + "epoch": 0.24331438389968377, + "grad_norm": 6.884454555826788, + "learning_rate": 8.853628767117899e-06, + "loss": 17.6373, + "step": 13311 + }, + { + "epoch": 0.2433326630961303, + "grad_norm": 6.250198007933299, + "learning_rate": 8.853440151574762e-06, + "loss": 17.3452, + "step": 13312 + }, + { + "epoch": 0.2433509422925768, + "grad_norm": 7.231128093485779, + "learning_rate": 8.853251522525655e-06, + "loss": 17.971, + "step": 13313 + }, + { + "epoch": 0.24336922148902335, + "grad_norm": 7.839890967614947, + "learning_rate": 8.853062879971232e-06, + "loss": 18.1151, + "step": 13314 + }, + { + "epoch": 0.24338750068546985, + "grad_norm": 6.908170487681074, + "learning_rate": 8.85287422391216e-06, + "loss": 17.5089, + "step": 13315 + }, + { + "epoch": 0.2434057798819164, + "grad_norm": 7.084431505572852, + "learning_rate": 8.8526855543491e-06, + "loss": 17.7467, + "step": 13316 + }, + { + "epoch": 0.24342405907836293, + "grad_norm": 6.4107654527837195, + "learning_rate": 8.852496871282707e-06, + "loss": 17.8434, + "step": 13317 + }, + { + "epoch": 0.24344233827480943, + "grad_norm": 6.668920080219149, + "learning_rate": 8.85230817471365e-06, + "loss": 17.7183, + "step": 13318 + }, + { + "epoch": 0.24346061747125597, + "grad_norm": 6.298989301834971, + "learning_rate": 8.852119464642586e-06, + "loss": 17.5342, + "step": 13319 + }, + { + "epoch": 0.24347889666770248, + "grad_norm": 5.739422634792878, + "learning_rate": 8.851930741070179e-06, + "loss": 17.2153, + "step": 13320 + }, + { + "epoch": 0.243497175864149, + "grad_norm": 5.214420217905649, + "learning_rate": 8.851742003997088e-06, + "loss": 17.0749, + "step": 13321 + }, + { + "epoch": 0.24351545506059555, + "grad_norm": 8.661960000088818, + "learning_rate": 8.851553253423974e-06, + "loss": 17.9828, + "step": 13322 + }, + { + "epoch": 0.24353373425704206, + "grad_norm": 6.674174401769316, + "learning_rate": 8.851364489351504e-06, + "loss": 17.4714, + "step": 13323 + }, + { + "epoch": 0.2435520134534886, + "grad_norm": 6.187393832054604, + "learning_rate": 8.851175711780331e-06, + "loss": 17.2636, + "step": 13324 + }, + { + "epoch": 0.2435702926499351, + "grad_norm": 6.536215441237082, + "learning_rate": 8.850986920711124e-06, + "loss": 17.3158, + "step": 13325 + }, + { + "epoch": 0.24358857184638164, + "grad_norm": 6.333101863390559, + "learning_rate": 8.850798116144542e-06, + "loss": 17.4345, + "step": 13326 + }, + { + "epoch": 0.24360685104282817, + "grad_norm": 7.629197284006749, + "learning_rate": 8.850609298081247e-06, + "loss": 18.2452, + "step": 13327 + }, + { + "epoch": 0.24362513023927468, + "grad_norm": 7.707916341218809, + "learning_rate": 8.8504204665219e-06, + "loss": 18.0921, + "step": 13328 + }, + { + "epoch": 0.24364340943572121, + "grad_norm": 6.931500141022469, + "learning_rate": 8.850231621467162e-06, + "loss": 17.552, + "step": 13329 + }, + { + "epoch": 0.24366168863216772, + "grad_norm": 6.7577127924474265, + "learning_rate": 8.850042762917698e-06, + "loss": 17.6586, + "step": 13330 + }, + { + "epoch": 0.24367996782861426, + "grad_norm": 6.71347104740245, + "learning_rate": 8.849853890874168e-06, + "loss": 17.7506, + "step": 13331 + }, + { + "epoch": 0.24369824702506077, + "grad_norm": 6.60361960143841, + "learning_rate": 8.849665005337234e-06, + "loss": 17.6244, + "step": 13332 + }, + { + "epoch": 0.2437165262215073, + "grad_norm": 6.404002992694197, + "learning_rate": 8.849476106307558e-06, + "loss": 17.5198, + "step": 13333 + }, + { + "epoch": 0.24373480541795384, + "grad_norm": 6.740798468982144, + "learning_rate": 8.849287193785803e-06, + "loss": 17.7947, + "step": 13334 + }, + { + "epoch": 0.24375308461440034, + "grad_norm": 7.478272733324508, + "learning_rate": 8.84909826777263e-06, + "loss": 17.9603, + "step": 13335 + }, + { + "epoch": 0.24377136381084688, + "grad_norm": 5.8534147617358165, + "learning_rate": 8.848909328268702e-06, + "loss": 17.2796, + "step": 13336 + }, + { + "epoch": 0.2437896430072934, + "grad_norm": 6.2500079619499305, + "learning_rate": 8.84872037527468e-06, + "loss": 17.4809, + "step": 13337 + }, + { + "epoch": 0.24380792220373992, + "grad_norm": 7.060013727147254, + "learning_rate": 8.848531408791226e-06, + "loss": 17.8596, + "step": 13338 + }, + { + "epoch": 0.24382620140018646, + "grad_norm": 6.129256935432026, + "learning_rate": 8.848342428819006e-06, + "loss": 17.5985, + "step": 13339 + }, + { + "epoch": 0.24384448059663297, + "grad_norm": 7.852104461299691, + "learning_rate": 8.848153435358678e-06, + "loss": 17.7454, + "step": 13340 + }, + { + "epoch": 0.2438627597930795, + "grad_norm": 7.179842200790302, + "learning_rate": 8.847964428410907e-06, + "loss": 17.709, + "step": 13341 + }, + { + "epoch": 0.243881038989526, + "grad_norm": 7.099405078928812, + "learning_rate": 8.847775407976353e-06, + "loss": 17.9011, + "step": 13342 + }, + { + "epoch": 0.24389931818597255, + "grad_norm": 6.579104355020609, + "learning_rate": 8.84758637405568e-06, + "loss": 17.4511, + "step": 13343 + }, + { + "epoch": 0.24391759738241908, + "grad_norm": 6.361251746274071, + "learning_rate": 8.847397326649553e-06, + "loss": 17.3535, + "step": 13344 + }, + { + "epoch": 0.2439358765788656, + "grad_norm": 6.134572598632472, + "learning_rate": 8.847208265758633e-06, + "loss": 17.4643, + "step": 13345 + }, + { + "epoch": 0.24395415577531213, + "grad_norm": 7.10404281793083, + "learning_rate": 8.84701919138358e-06, + "loss": 17.8232, + "step": 13346 + }, + { + "epoch": 0.24397243497175863, + "grad_norm": 6.672163379254932, + "learning_rate": 8.846830103525056e-06, + "loss": 17.4976, + "step": 13347 + }, + { + "epoch": 0.24399071416820517, + "grad_norm": 7.540641617800829, + "learning_rate": 8.84664100218373e-06, + "loss": 17.9285, + "step": 13348 + }, + { + "epoch": 0.24400899336465168, + "grad_norm": 8.027690292367463, + "learning_rate": 8.84645188736026e-06, + "loss": 18.2729, + "step": 13349 + }, + { + "epoch": 0.2440272725610982, + "grad_norm": 7.01108329933926, + "learning_rate": 8.846262759055311e-06, + "loss": 17.7341, + "step": 13350 + }, + { + "epoch": 0.24404555175754475, + "grad_norm": 7.618609732374895, + "learning_rate": 8.846073617269542e-06, + "loss": 17.7382, + "step": 13351 + }, + { + "epoch": 0.24406383095399126, + "grad_norm": 7.669566130584318, + "learning_rate": 8.84588446200362e-06, + "loss": 17.8425, + "step": 13352 + }, + { + "epoch": 0.2440821101504378, + "grad_norm": 7.6072715587573345, + "learning_rate": 8.845695293258207e-06, + "loss": 18.1951, + "step": 13353 + }, + { + "epoch": 0.2441003893468843, + "grad_norm": 6.899448402243731, + "learning_rate": 8.845506111033966e-06, + "loss": 17.5865, + "step": 13354 + }, + { + "epoch": 0.24411866854333084, + "grad_norm": 6.901240507089418, + "learning_rate": 8.84531691533156e-06, + "loss": 17.8634, + "step": 13355 + }, + { + "epoch": 0.24413694773977737, + "grad_norm": 8.183313145335115, + "learning_rate": 8.84512770615165e-06, + "loss": 16.903, + "step": 13356 + }, + { + "epoch": 0.24415522693622388, + "grad_norm": 7.5261891727210095, + "learning_rate": 8.844938483494905e-06, + "loss": 17.9907, + "step": 13357 + }, + { + "epoch": 0.24417350613267041, + "grad_norm": 7.149270791029402, + "learning_rate": 8.844749247361982e-06, + "loss": 17.6361, + "step": 13358 + }, + { + "epoch": 0.24419178532911692, + "grad_norm": 10.214463600002174, + "learning_rate": 8.844559997753546e-06, + "loss": 18.5494, + "step": 13359 + }, + { + "epoch": 0.24421006452556346, + "grad_norm": 7.225317121006948, + "learning_rate": 8.84437073467026e-06, + "loss": 17.7026, + "step": 13360 + }, + { + "epoch": 0.24422834372201, + "grad_norm": 7.632819658495426, + "learning_rate": 8.844181458112791e-06, + "loss": 17.9165, + "step": 13361 + }, + { + "epoch": 0.2442466229184565, + "grad_norm": 7.972366299924804, + "learning_rate": 8.843992168081796e-06, + "loss": 18.0311, + "step": 13362 + }, + { + "epoch": 0.24426490211490304, + "grad_norm": 6.2756417482546905, + "learning_rate": 8.843802864577944e-06, + "loss": 17.5389, + "step": 13363 + }, + { + "epoch": 0.24428318131134955, + "grad_norm": 8.193139934904673, + "learning_rate": 8.843613547601896e-06, + "loss": 18.3795, + "step": 13364 + }, + { + "epoch": 0.24430146050779608, + "grad_norm": 6.389396940466, + "learning_rate": 8.843424217154318e-06, + "loss": 17.7544, + "step": 13365 + }, + { + "epoch": 0.2443197397042426, + "grad_norm": 7.160598777970506, + "learning_rate": 8.843234873235869e-06, + "loss": 17.7244, + "step": 13366 + }, + { + "epoch": 0.24433801890068912, + "grad_norm": 6.64930968727387, + "learning_rate": 8.843045515847217e-06, + "loss": 17.5518, + "step": 13367 + }, + { + "epoch": 0.24435629809713566, + "grad_norm": 5.671549508002502, + "learning_rate": 8.842856144989023e-06, + "loss": 17.1291, + "step": 13368 + }, + { + "epoch": 0.24437457729358217, + "grad_norm": 7.902320979850315, + "learning_rate": 8.842666760661951e-06, + "loss": 17.6099, + "step": 13369 + }, + { + "epoch": 0.2443928564900287, + "grad_norm": 5.965673149399645, + "learning_rate": 8.842477362866667e-06, + "loss": 17.2664, + "step": 13370 + }, + { + "epoch": 0.2444111356864752, + "grad_norm": 5.644593567042512, + "learning_rate": 8.842287951603833e-06, + "loss": 17.3299, + "step": 13371 + }, + { + "epoch": 0.24442941488292175, + "grad_norm": 6.662397886446397, + "learning_rate": 8.842098526874113e-06, + "loss": 17.4437, + "step": 13372 + }, + { + "epoch": 0.24444769407936828, + "grad_norm": 7.231356628187275, + "learning_rate": 8.841909088678172e-06, + "loss": 17.954, + "step": 13373 + }, + { + "epoch": 0.2444659732758148, + "grad_norm": 6.770349758066719, + "learning_rate": 8.84171963701667e-06, + "loss": 17.6923, + "step": 13374 + }, + { + "epoch": 0.24448425247226133, + "grad_norm": 7.465032746595875, + "learning_rate": 8.841530171890275e-06, + "loss": 18.0996, + "step": 13375 + }, + { + "epoch": 0.24450253166870783, + "grad_norm": 6.4393473099515575, + "learning_rate": 8.841340693299653e-06, + "loss": 17.4858, + "step": 13376 + }, + { + "epoch": 0.24452081086515437, + "grad_norm": 7.163912942037377, + "learning_rate": 8.841151201245462e-06, + "loss": 17.8614, + "step": 13377 + }, + { + "epoch": 0.2445390900616009, + "grad_norm": 7.405374688845051, + "learning_rate": 8.840961695728372e-06, + "loss": 17.8952, + "step": 13378 + }, + { + "epoch": 0.2445573692580474, + "grad_norm": 5.637338579282636, + "learning_rate": 8.840772176749042e-06, + "loss": 17.0259, + "step": 13379 + }, + { + "epoch": 0.24457564845449395, + "grad_norm": 6.369503905085804, + "learning_rate": 8.84058264430814e-06, + "loss": 17.3215, + "step": 13380 + }, + { + "epoch": 0.24459392765094046, + "grad_norm": 5.335592655839366, + "learning_rate": 8.84039309840633e-06, + "loss": 17.1554, + "step": 13381 + }, + { + "epoch": 0.244612206847387, + "grad_norm": 6.716870205319661, + "learning_rate": 8.840203539044273e-06, + "loss": 17.7323, + "step": 13382 + }, + { + "epoch": 0.2446304860438335, + "grad_norm": 7.826668518376646, + "learning_rate": 8.840013966222638e-06, + "loss": 17.7759, + "step": 13383 + }, + { + "epoch": 0.24464876524028004, + "grad_norm": 6.812892731960148, + "learning_rate": 8.839824379942089e-06, + "loss": 17.7177, + "step": 13384 + }, + { + "epoch": 0.24466704443672657, + "grad_norm": 5.63299034570083, + "learning_rate": 8.839634780203285e-06, + "loss": 17.0862, + "step": 13385 + }, + { + "epoch": 0.24468532363317308, + "grad_norm": 7.337248803763474, + "learning_rate": 8.839445167006894e-06, + "loss": 18.2525, + "step": 13386 + }, + { + "epoch": 0.24470360282961962, + "grad_norm": 6.918295093298403, + "learning_rate": 8.839255540353583e-06, + "loss": 17.6056, + "step": 13387 + }, + { + "epoch": 0.24472188202606612, + "grad_norm": 7.020567883012543, + "learning_rate": 8.839065900244015e-06, + "loss": 18.1106, + "step": 13388 + }, + { + "epoch": 0.24474016122251266, + "grad_norm": 6.150241307155845, + "learning_rate": 8.838876246678854e-06, + "loss": 17.4877, + "step": 13389 + }, + { + "epoch": 0.2447584404189592, + "grad_norm": 7.208694299041957, + "learning_rate": 8.838686579658763e-06, + "loss": 18.076, + "step": 13390 + }, + { + "epoch": 0.2447767196154057, + "grad_norm": 5.86685802657085, + "learning_rate": 8.83849689918441e-06, + "loss": 17.3161, + "step": 13391 + }, + { + "epoch": 0.24479499881185224, + "grad_norm": 7.405617494174632, + "learning_rate": 8.838307205256458e-06, + "loss": 17.7358, + "step": 13392 + }, + { + "epoch": 0.24481327800829875, + "grad_norm": 8.410389976756395, + "learning_rate": 8.838117497875572e-06, + "loss": 18.2559, + "step": 13393 + }, + { + "epoch": 0.24483155720474528, + "grad_norm": 7.882687619487962, + "learning_rate": 8.83792777704242e-06, + "loss": 17.7044, + "step": 13394 + }, + { + "epoch": 0.24484983640119182, + "grad_norm": 5.986235546857629, + "learning_rate": 8.83773804275766e-06, + "loss": 17.1556, + "step": 13395 + }, + { + "epoch": 0.24486811559763833, + "grad_norm": 6.177255456277056, + "learning_rate": 8.837548295021963e-06, + "loss": 17.2895, + "step": 13396 + }, + { + "epoch": 0.24488639479408486, + "grad_norm": 6.904485474398583, + "learning_rate": 8.837358533835992e-06, + "loss": 17.6568, + "step": 13397 + }, + { + "epoch": 0.24490467399053137, + "grad_norm": 7.28159790149412, + "learning_rate": 8.837168759200413e-06, + "loss": 18.0456, + "step": 13398 + }, + { + "epoch": 0.2449229531869779, + "grad_norm": 7.663327211935231, + "learning_rate": 8.83697897111589e-06, + "loss": 18.1913, + "step": 13399 + }, + { + "epoch": 0.2449412323834244, + "grad_norm": 8.820204283182152, + "learning_rate": 8.836789169583089e-06, + "loss": 18.0061, + "step": 13400 + }, + { + "epoch": 0.24495951157987095, + "grad_norm": 8.317022410557518, + "learning_rate": 8.836599354602674e-06, + "loss": 17.9331, + "step": 13401 + }, + { + "epoch": 0.24497779077631748, + "grad_norm": 6.1434665356788205, + "learning_rate": 8.836409526175314e-06, + "loss": 17.0856, + "step": 13402 + }, + { + "epoch": 0.244996069972764, + "grad_norm": 6.046074046650978, + "learning_rate": 8.836219684301667e-06, + "loss": 17.3953, + "step": 13403 + }, + { + "epoch": 0.24501434916921053, + "grad_norm": 6.058140723586201, + "learning_rate": 8.836029828982407e-06, + "loss": 17.609, + "step": 13404 + }, + { + "epoch": 0.24503262836565703, + "grad_norm": 7.067027057229567, + "learning_rate": 8.835839960218193e-06, + "loss": 17.7556, + "step": 13405 + }, + { + "epoch": 0.24505090756210357, + "grad_norm": 7.224133713204975, + "learning_rate": 8.835650078009694e-06, + "loss": 17.7724, + "step": 13406 + }, + { + "epoch": 0.2450691867585501, + "grad_norm": 5.508434191393395, + "learning_rate": 8.835460182357573e-06, + "loss": 17.1609, + "step": 13407 + }, + { + "epoch": 0.24508746595499661, + "grad_norm": 6.611146244016963, + "learning_rate": 8.835270273262498e-06, + "loss": 17.6224, + "step": 13408 + }, + { + "epoch": 0.24510574515144315, + "grad_norm": 6.309339124168227, + "learning_rate": 8.835080350725133e-06, + "loss": 17.4649, + "step": 13409 + }, + { + "epoch": 0.24512402434788966, + "grad_norm": 7.618887437895719, + "learning_rate": 8.834890414746144e-06, + "loss": 17.9619, + "step": 13410 + }, + { + "epoch": 0.2451423035443362, + "grad_norm": 5.807315268732273, + "learning_rate": 8.834700465326198e-06, + "loss": 17.3501, + "step": 13411 + }, + { + "epoch": 0.24516058274078273, + "grad_norm": 5.200649025143558, + "learning_rate": 8.834510502465959e-06, + "loss": 17.21, + "step": 13412 + }, + { + "epoch": 0.24517886193722924, + "grad_norm": 7.39514446397721, + "learning_rate": 8.834320526166092e-06, + "loss": 17.6912, + "step": 13413 + }, + { + "epoch": 0.24519714113367577, + "grad_norm": 7.080208674680301, + "learning_rate": 8.834130536427266e-06, + "loss": 17.8667, + "step": 13414 + }, + { + "epoch": 0.24521542033012228, + "grad_norm": 6.669847957111236, + "learning_rate": 8.833940533250146e-06, + "loss": 17.5223, + "step": 13415 + }, + { + "epoch": 0.24523369952656882, + "grad_norm": 6.999556313077716, + "learning_rate": 8.833750516635395e-06, + "loss": 17.6839, + "step": 13416 + }, + { + "epoch": 0.24525197872301532, + "grad_norm": 7.493540137187102, + "learning_rate": 8.83356048658368e-06, + "loss": 17.5081, + "step": 13417 + }, + { + "epoch": 0.24527025791946186, + "grad_norm": 5.980800051120309, + "learning_rate": 8.833370443095671e-06, + "loss": 17.5579, + "step": 13418 + }, + { + "epoch": 0.2452885371159084, + "grad_norm": 6.675804185575348, + "learning_rate": 8.833180386172032e-06, + "loss": 17.6918, + "step": 13419 + }, + { + "epoch": 0.2453068163123549, + "grad_norm": 5.669189846765466, + "learning_rate": 8.832990315813425e-06, + "loss": 16.9523, + "step": 13420 + }, + { + "epoch": 0.24532509550880144, + "grad_norm": 8.225106534681808, + "learning_rate": 8.832800232020521e-06, + "loss": 18.0924, + "step": 13421 + }, + { + "epoch": 0.24534337470524795, + "grad_norm": 6.543938118391621, + "learning_rate": 8.832610134793985e-06, + "loss": 17.3952, + "step": 13422 + }, + { + "epoch": 0.24536165390169448, + "grad_norm": 6.19665585703796, + "learning_rate": 8.832420024134484e-06, + "loss": 17.3581, + "step": 13423 + }, + { + "epoch": 0.24537993309814102, + "grad_norm": 7.108267611247485, + "learning_rate": 8.83222990004268e-06, + "loss": 17.6781, + "step": 13424 + }, + { + "epoch": 0.24539821229458753, + "grad_norm": 7.422004670472646, + "learning_rate": 8.832039762519247e-06, + "loss": 18.1953, + "step": 13425 + }, + { + "epoch": 0.24541649149103406, + "grad_norm": 6.954008379135574, + "learning_rate": 8.831849611564845e-06, + "loss": 17.3529, + "step": 13426 + }, + { + "epoch": 0.24543477068748057, + "grad_norm": 5.8034365000088215, + "learning_rate": 8.831659447180143e-06, + "loss": 17.3271, + "step": 13427 + }, + { + "epoch": 0.2454530498839271, + "grad_norm": 7.666960779097515, + "learning_rate": 8.831469269365808e-06, + "loss": 18.2337, + "step": 13428 + }, + { + "epoch": 0.24547132908037364, + "grad_norm": 7.806620772604487, + "learning_rate": 8.831279078122505e-06, + "loss": 18.0384, + "step": 13429 + }, + { + "epoch": 0.24548960827682015, + "grad_norm": 5.910725999860661, + "learning_rate": 8.831088873450902e-06, + "loss": 17.5923, + "step": 13430 + }, + { + "epoch": 0.24550788747326668, + "grad_norm": 6.647740532577579, + "learning_rate": 8.830898655351663e-06, + "loss": 17.7018, + "step": 13431 + }, + { + "epoch": 0.2455261666697132, + "grad_norm": 6.381486614235224, + "learning_rate": 8.830708423825458e-06, + "loss": 17.5859, + "step": 13432 + }, + { + "epoch": 0.24554444586615973, + "grad_norm": 6.394719458885018, + "learning_rate": 8.83051817887295e-06, + "loss": 17.3302, + "step": 13433 + }, + { + "epoch": 0.24556272506260624, + "grad_norm": 6.066904758483784, + "learning_rate": 8.830327920494812e-06, + "loss": 17.4341, + "step": 13434 + }, + { + "epoch": 0.24558100425905277, + "grad_norm": 7.070178994718958, + "learning_rate": 8.830137648691705e-06, + "loss": 17.9941, + "step": 13435 + }, + { + "epoch": 0.2455992834554993, + "grad_norm": 6.085682839386033, + "learning_rate": 8.8299473634643e-06, + "loss": 17.2157, + "step": 13436 + }, + { + "epoch": 0.24561756265194581, + "grad_norm": 6.91618577276595, + "learning_rate": 8.829757064813257e-06, + "loss": 17.8577, + "step": 13437 + }, + { + "epoch": 0.24563584184839235, + "grad_norm": 7.0115887406466975, + "learning_rate": 8.829566752739252e-06, + "loss": 17.8506, + "step": 13438 + }, + { + "epoch": 0.24565412104483886, + "grad_norm": 6.654487020029335, + "learning_rate": 8.829376427242948e-06, + "loss": 17.6821, + "step": 13439 + }, + { + "epoch": 0.2456724002412854, + "grad_norm": 7.002473609128466, + "learning_rate": 8.82918608832501e-06, + "loss": 17.5262, + "step": 13440 + }, + { + "epoch": 0.24569067943773193, + "grad_norm": 6.400880240180771, + "learning_rate": 8.828995735986107e-06, + "loss": 17.5627, + "step": 13441 + }, + { + "epoch": 0.24570895863417844, + "grad_norm": 7.434675361462428, + "learning_rate": 8.828805370226906e-06, + "loss": 18.0045, + "step": 13442 + }, + { + "epoch": 0.24572723783062497, + "grad_norm": 7.331143894515387, + "learning_rate": 8.828614991048076e-06, + "loss": 17.7971, + "step": 13443 + }, + { + "epoch": 0.24574551702707148, + "grad_norm": 7.619034032513552, + "learning_rate": 8.828424598450282e-06, + "loss": 18.0238, + "step": 13444 + }, + { + "epoch": 0.24576379622351802, + "grad_norm": 7.39259731663535, + "learning_rate": 8.828234192434192e-06, + "loss": 17.5899, + "step": 13445 + }, + { + "epoch": 0.24578207541996455, + "grad_norm": 6.776273380382308, + "learning_rate": 8.828043773000474e-06, + "loss": 17.765, + "step": 13446 + }, + { + "epoch": 0.24580035461641106, + "grad_norm": 6.252730432829984, + "learning_rate": 8.827853340149794e-06, + "loss": 17.5249, + "step": 13447 + }, + { + "epoch": 0.2458186338128576, + "grad_norm": 7.435038084033365, + "learning_rate": 8.82766289388282e-06, + "loss": 17.8393, + "step": 13448 + }, + { + "epoch": 0.2458369130093041, + "grad_norm": 7.802496312818711, + "learning_rate": 8.827472434200219e-06, + "loss": 17.8409, + "step": 13449 + }, + { + "epoch": 0.24585519220575064, + "grad_norm": 7.202308153481494, + "learning_rate": 8.82728196110266e-06, + "loss": 18.0122, + "step": 13450 + }, + { + "epoch": 0.24587347140219715, + "grad_norm": 6.924654009065137, + "learning_rate": 8.827091474590811e-06, + "loss": 17.8219, + "step": 13451 + }, + { + "epoch": 0.24589175059864368, + "grad_norm": 7.404016704344671, + "learning_rate": 8.826900974665337e-06, + "loss": 17.8862, + "step": 13452 + }, + { + "epoch": 0.24591002979509022, + "grad_norm": 6.6462088088911875, + "learning_rate": 8.826710461326908e-06, + "loss": 17.6568, + "step": 13453 + }, + { + "epoch": 0.24592830899153673, + "grad_norm": 7.39997599394969, + "learning_rate": 8.826519934576192e-06, + "loss": 17.5758, + "step": 13454 + }, + { + "epoch": 0.24594658818798326, + "grad_norm": 6.15160971977275, + "learning_rate": 8.826329394413855e-06, + "loss": 17.3852, + "step": 13455 + }, + { + "epoch": 0.24596486738442977, + "grad_norm": 7.913428805973735, + "learning_rate": 8.826138840840563e-06, + "loss": 17.6378, + "step": 13456 + }, + { + "epoch": 0.2459831465808763, + "grad_norm": 8.847662828382003, + "learning_rate": 8.825948273856988e-06, + "loss": 18.1707, + "step": 13457 + }, + { + "epoch": 0.24600142577732284, + "grad_norm": 5.690044873405773, + "learning_rate": 8.825757693463797e-06, + "loss": 17.1624, + "step": 13458 + }, + { + "epoch": 0.24601970497376935, + "grad_norm": 7.173770483159854, + "learning_rate": 8.825567099661656e-06, + "loss": 17.9517, + "step": 13459 + }, + { + "epoch": 0.24603798417021588, + "grad_norm": 5.671500016403455, + "learning_rate": 8.825376492451237e-06, + "loss": 17.2507, + "step": 13460 + }, + { + "epoch": 0.2460562633666624, + "grad_norm": 5.760964048052998, + "learning_rate": 8.825185871833203e-06, + "loss": 17.3155, + "step": 13461 + }, + { + "epoch": 0.24607454256310893, + "grad_norm": 6.8402574657905255, + "learning_rate": 8.824995237808224e-06, + "loss": 17.7137, + "step": 13462 + }, + { + "epoch": 0.24609282175955546, + "grad_norm": 6.924451096801156, + "learning_rate": 8.82480459037697e-06, + "loss": 17.8487, + "step": 13463 + }, + { + "epoch": 0.24611110095600197, + "grad_norm": 6.399619906996067, + "learning_rate": 8.824613929540107e-06, + "loss": 17.5705, + "step": 13464 + }, + { + "epoch": 0.2461293801524485, + "grad_norm": 5.980177457081154, + "learning_rate": 8.824423255298305e-06, + "loss": 17.3176, + "step": 13465 + }, + { + "epoch": 0.24614765934889501, + "grad_norm": 6.200361250393343, + "learning_rate": 8.824232567652232e-06, + "loss": 17.407, + "step": 13466 + }, + { + "epoch": 0.24616593854534155, + "grad_norm": 7.278235109275367, + "learning_rate": 8.824041866602554e-06, + "loss": 17.62, + "step": 13467 + }, + { + "epoch": 0.24618421774178806, + "grad_norm": 6.757109365456597, + "learning_rate": 8.823851152149941e-06, + "loss": 18.0304, + "step": 13468 + }, + { + "epoch": 0.2462024969382346, + "grad_norm": 6.254819297715207, + "learning_rate": 8.823660424295062e-06, + "loss": 17.5373, + "step": 13469 + }, + { + "epoch": 0.24622077613468113, + "grad_norm": 6.996033752640962, + "learning_rate": 8.823469683038583e-06, + "loss": 17.8082, + "step": 13470 + }, + { + "epoch": 0.24623905533112764, + "grad_norm": 6.9903218827039915, + "learning_rate": 8.823278928381179e-06, + "loss": 17.9962, + "step": 13471 + }, + { + "epoch": 0.24625733452757417, + "grad_norm": 6.974909060118697, + "learning_rate": 8.82308816032351e-06, + "loss": 17.6751, + "step": 13472 + }, + { + "epoch": 0.24627561372402068, + "grad_norm": 7.311919385715046, + "learning_rate": 8.822897378866252e-06, + "loss": 17.8247, + "step": 13473 + }, + { + "epoch": 0.24629389292046722, + "grad_norm": 8.234798677547689, + "learning_rate": 8.822706584010068e-06, + "loss": 18.0569, + "step": 13474 + }, + { + "epoch": 0.24631217211691375, + "grad_norm": 7.492414245740768, + "learning_rate": 8.82251577575563e-06, + "loss": 17.7471, + "step": 13475 + }, + { + "epoch": 0.24633045131336026, + "grad_norm": 6.416032807014928, + "learning_rate": 8.822324954103606e-06, + "loss": 17.2773, + "step": 13476 + }, + { + "epoch": 0.2463487305098068, + "grad_norm": 6.605216545214185, + "learning_rate": 8.822134119054665e-06, + "loss": 17.3079, + "step": 13477 + }, + { + "epoch": 0.2463670097062533, + "grad_norm": 7.473347962662394, + "learning_rate": 8.821943270609475e-06, + "loss": 18.0302, + "step": 13478 + }, + { + "epoch": 0.24638528890269984, + "grad_norm": 6.399162085666547, + "learning_rate": 8.821752408768706e-06, + "loss": 17.5172, + "step": 13479 + }, + { + "epoch": 0.24640356809914638, + "grad_norm": 7.468345587531287, + "learning_rate": 8.821561533533026e-06, + "loss": 17.6476, + "step": 13480 + }, + { + "epoch": 0.24642184729559288, + "grad_norm": 7.025834326555146, + "learning_rate": 8.821370644903105e-06, + "loss": 17.8186, + "step": 13481 + }, + { + "epoch": 0.24644012649203942, + "grad_norm": 6.223068864164905, + "learning_rate": 8.821179742879611e-06, + "loss": 17.3983, + "step": 13482 + }, + { + "epoch": 0.24645840568848593, + "grad_norm": 7.284991771763925, + "learning_rate": 8.820988827463213e-06, + "loss": 17.8984, + "step": 13483 + }, + { + "epoch": 0.24647668488493246, + "grad_norm": 5.936303452699871, + "learning_rate": 8.820797898654581e-06, + "loss": 17.1977, + "step": 13484 + }, + { + "epoch": 0.24649496408137897, + "grad_norm": 5.221180502955405, + "learning_rate": 8.820606956454386e-06, + "loss": 16.9444, + "step": 13485 + }, + { + "epoch": 0.2465132432778255, + "grad_norm": 6.811217939005427, + "learning_rate": 8.820416000863292e-06, + "loss": 17.898, + "step": 13486 + }, + { + "epoch": 0.24653152247427204, + "grad_norm": 8.177478662926852, + "learning_rate": 8.820225031881974e-06, + "loss": 18.3779, + "step": 13487 + }, + { + "epoch": 0.24654980167071855, + "grad_norm": 7.4405321701852145, + "learning_rate": 8.820034049511097e-06, + "loss": 18.1126, + "step": 13488 + }, + { + "epoch": 0.24656808086716508, + "grad_norm": 6.192990290104176, + "learning_rate": 8.819843053751334e-06, + "loss": 17.7177, + "step": 13489 + }, + { + "epoch": 0.2465863600636116, + "grad_norm": 6.153071832176295, + "learning_rate": 8.81965204460335e-06, + "loss": 17.5496, + "step": 13490 + }, + { + "epoch": 0.24660463926005813, + "grad_norm": 6.078233706862764, + "learning_rate": 8.81946102206782e-06, + "loss": 17.4564, + "step": 13491 + }, + { + "epoch": 0.24662291845650466, + "grad_norm": 8.38149022730245, + "learning_rate": 8.819269986145407e-06, + "loss": 18.0966, + "step": 13492 + }, + { + "epoch": 0.24664119765295117, + "grad_norm": 6.9040545767910455, + "learning_rate": 8.819078936836786e-06, + "loss": 17.6468, + "step": 13493 + }, + { + "epoch": 0.2466594768493977, + "grad_norm": 6.477670606043992, + "learning_rate": 8.818887874142625e-06, + "loss": 17.3782, + "step": 13494 + }, + { + "epoch": 0.24667775604584422, + "grad_norm": 6.599624140942617, + "learning_rate": 8.818696798063594e-06, + "loss": 17.4778, + "step": 13495 + }, + { + "epoch": 0.24669603524229075, + "grad_norm": 6.918902171740237, + "learning_rate": 8.818505708600363e-06, + "loss": 18.0571, + "step": 13496 + }, + { + "epoch": 0.2467143144387373, + "grad_norm": 6.167114363379106, + "learning_rate": 8.818314605753598e-06, + "loss": 17.5789, + "step": 13497 + }, + { + "epoch": 0.2467325936351838, + "grad_norm": 5.4991456389946025, + "learning_rate": 8.818123489523973e-06, + "loss": 17.0248, + "step": 13498 + }, + { + "epoch": 0.24675087283163033, + "grad_norm": 10.464567584802637, + "learning_rate": 8.817932359912156e-06, + "loss": 17.6705, + "step": 13499 + }, + { + "epoch": 0.24676915202807684, + "grad_norm": 6.058815033485125, + "learning_rate": 8.817741216918818e-06, + "loss": 17.3583, + "step": 13500 + }, + { + "epoch": 0.24678743122452337, + "grad_norm": 7.607334550198356, + "learning_rate": 8.81755006054463e-06, + "loss": 17.8198, + "step": 13501 + }, + { + "epoch": 0.24680571042096988, + "grad_norm": 6.781707046645076, + "learning_rate": 8.817358890790256e-06, + "loss": 17.5437, + "step": 13502 + }, + { + "epoch": 0.24682398961741642, + "grad_norm": 6.236260480634458, + "learning_rate": 8.817167707656373e-06, + "loss": 17.4142, + "step": 13503 + }, + { + "epoch": 0.24684226881386295, + "grad_norm": 7.156356990978949, + "learning_rate": 8.816976511143648e-06, + "loss": 17.8336, + "step": 13504 + }, + { + "epoch": 0.24686054801030946, + "grad_norm": 7.385429118551683, + "learning_rate": 8.816785301252752e-06, + "loss": 17.8506, + "step": 13505 + }, + { + "epoch": 0.246878827206756, + "grad_norm": 7.008744010798375, + "learning_rate": 8.816594077984355e-06, + "loss": 17.7951, + "step": 13506 + }, + { + "epoch": 0.2468971064032025, + "grad_norm": 6.027277764001174, + "learning_rate": 8.816402841339125e-06, + "loss": 17.3543, + "step": 13507 + }, + { + "epoch": 0.24691538559964904, + "grad_norm": 7.306801806072262, + "learning_rate": 8.816211591317736e-06, + "loss": 18.0441, + "step": 13508 + }, + { + "epoch": 0.24693366479609558, + "grad_norm": 8.105265740723043, + "learning_rate": 8.816020327920855e-06, + "loss": 18.2235, + "step": 13509 + }, + { + "epoch": 0.24695194399254208, + "grad_norm": 6.669435866966423, + "learning_rate": 8.815829051149156e-06, + "loss": 17.3582, + "step": 13510 + }, + { + "epoch": 0.24697022318898862, + "grad_norm": 8.120380399622045, + "learning_rate": 8.815637761003306e-06, + "loss": 17.9397, + "step": 13511 + }, + { + "epoch": 0.24698850238543513, + "grad_norm": 7.940693990461981, + "learning_rate": 8.815446457483977e-06, + "loss": 17.9778, + "step": 13512 + }, + { + "epoch": 0.24700678158188166, + "grad_norm": 6.139624392103352, + "learning_rate": 8.81525514059184e-06, + "loss": 17.4869, + "step": 13513 + }, + { + "epoch": 0.2470250607783282, + "grad_norm": 6.140321004105239, + "learning_rate": 8.815063810327564e-06, + "loss": 17.4739, + "step": 13514 + }, + { + "epoch": 0.2470433399747747, + "grad_norm": 7.220012039096887, + "learning_rate": 8.81487246669182e-06, + "loss": 17.9978, + "step": 13515 + }, + { + "epoch": 0.24706161917122124, + "grad_norm": 6.052610323392659, + "learning_rate": 8.81468110968528e-06, + "loss": 17.4726, + "step": 13516 + }, + { + "epoch": 0.24707989836766775, + "grad_norm": 7.2392828573847945, + "learning_rate": 8.814489739308613e-06, + "loss": 17.749, + "step": 13517 + }, + { + "epoch": 0.24709817756411429, + "grad_norm": 9.178255731900403, + "learning_rate": 8.81429835556249e-06, + "loss": 18.6995, + "step": 13518 + }, + { + "epoch": 0.2471164567605608, + "grad_norm": 6.151055631501488, + "learning_rate": 8.814106958447584e-06, + "loss": 17.5311, + "step": 13519 + }, + { + "epoch": 0.24713473595700733, + "grad_norm": 6.351774958778554, + "learning_rate": 8.813915547964561e-06, + "loss": 17.5953, + "step": 13520 + }, + { + "epoch": 0.24715301515345386, + "grad_norm": 6.20297852456471, + "learning_rate": 8.813724124114099e-06, + "loss": 17.3503, + "step": 13521 + }, + { + "epoch": 0.24717129434990037, + "grad_norm": 7.4313810432804575, + "learning_rate": 8.813532686896861e-06, + "loss": 18.0759, + "step": 13522 + }, + { + "epoch": 0.2471895735463469, + "grad_norm": 5.945238925825888, + "learning_rate": 8.813341236313527e-06, + "loss": 17.5475, + "step": 13523 + }, + { + "epoch": 0.24720785274279342, + "grad_norm": 8.513827079892803, + "learning_rate": 8.813149772364758e-06, + "loss": 18.2024, + "step": 13524 + }, + { + "epoch": 0.24722613193923995, + "grad_norm": 5.6187932784378125, + "learning_rate": 8.812958295051232e-06, + "loss": 17.0056, + "step": 13525 + }, + { + "epoch": 0.2472444111356865, + "grad_norm": 7.632836096581084, + "learning_rate": 8.812766804373617e-06, + "loss": 18.03, + "step": 13526 + }, + { + "epoch": 0.247262690332133, + "grad_norm": 6.981690508258919, + "learning_rate": 8.812575300332587e-06, + "loss": 17.6715, + "step": 13527 + }, + { + "epoch": 0.24728096952857953, + "grad_norm": 8.709129676610265, + "learning_rate": 8.81238378292881e-06, + "loss": 18.5582, + "step": 13528 + }, + { + "epoch": 0.24729924872502604, + "grad_norm": 6.365832607157043, + "learning_rate": 8.812192252162958e-06, + "loss": 17.5791, + "step": 13529 + }, + { + "epoch": 0.24731752792147257, + "grad_norm": 6.899931755926244, + "learning_rate": 8.812000708035704e-06, + "loss": 17.9722, + "step": 13530 + }, + { + "epoch": 0.2473358071179191, + "grad_norm": 7.397138075266554, + "learning_rate": 8.811809150547718e-06, + "loss": 17.7979, + "step": 13531 + }, + { + "epoch": 0.24735408631436562, + "grad_norm": 6.563167452099872, + "learning_rate": 8.811617579699671e-06, + "loss": 17.4955, + "step": 13532 + }, + { + "epoch": 0.24737236551081215, + "grad_norm": 6.41796307114215, + "learning_rate": 8.811425995492238e-06, + "loss": 17.4837, + "step": 13533 + }, + { + "epoch": 0.24739064470725866, + "grad_norm": 7.329600825860249, + "learning_rate": 8.811234397926085e-06, + "loss": 17.8683, + "step": 13534 + }, + { + "epoch": 0.2474089239037052, + "grad_norm": 7.651120667298316, + "learning_rate": 8.811042787001887e-06, + "loss": 17.6583, + "step": 13535 + }, + { + "epoch": 0.2474272031001517, + "grad_norm": 6.719738297247194, + "learning_rate": 8.810851162720315e-06, + "loss": 17.9221, + "step": 13536 + }, + { + "epoch": 0.24744548229659824, + "grad_norm": 5.9192372353376745, + "learning_rate": 8.81065952508204e-06, + "loss": 17.3504, + "step": 13537 + }, + { + "epoch": 0.24746376149304478, + "grad_norm": 6.505587578775084, + "learning_rate": 8.810467874087733e-06, + "loss": 17.4247, + "step": 13538 + }, + { + "epoch": 0.24748204068949128, + "grad_norm": 6.073858449932765, + "learning_rate": 8.810276209738069e-06, + "loss": 17.3974, + "step": 13539 + }, + { + "epoch": 0.24750031988593782, + "grad_norm": 7.17598401883612, + "learning_rate": 8.810084532033715e-06, + "loss": 17.7629, + "step": 13540 + }, + { + "epoch": 0.24751859908238433, + "grad_norm": 6.072667004573831, + "learning_rate": 8.809892840975347e-06, + "loss": 17.5451, + "step": 13541 + }, + { + "epoch": 0.24753687827883086, + "grad_norm": 5.8307657994823545, + "learning_rate": 8.809701136563635e-06, + "loss": 17.2472, + "step": 13542 + }, + { + "epoch": 0.2475551574752774, + "grad_norm": 7.9425616507954215, + "learning_rate": 8.80950941879925e-06, + "loss": 18.4223, + "step": 13543 + }, + { + "epoch": 0.2475734366717239, + "grad_norm": 6.61336833311643, + "learning_rate": 8.809317687682865e-06, + "loss": 17.6788, + "step": 13544 + }, + { + "epoch": 0.24759171586817044, + "grad_norm": 7.353439073462062, + "learning_rate": 8.809125943215153e-06, + "loss": 17.7704, + "step": 13545 + }, + { + "epoch": 0.24760999506461695, + "grad_norm": 6.87891029506922, + "learning_rate": 8.808934185396787e-06, + "loss": 17.5211, + "step": 13546 + }, + { + "epoch": 0.24762827426106349, + "grad_norm": 7.2274130161211385, + "learning_rate": 8.808742414228435e-06, + "loss": 17.3957, + "step": 13547 + }, + { + "epoch": 0.24764655345751002, + "grad_norm": 6.5308196450905225, + "learning_rate": 8.808550629710772e-06, + "loss": 17.7209, + "step": 13548 + }, + { + "epoch": 0.24766483265395653, + "grad_norm": 5.982434903221757, + "learning_rate": 8.808358831844468e-06, + "loss": 17.1228, + "step": 13549 + }, + { + "epoch": 0.24768311185040306, + "grad_norm": 6.148208212333493, + "learning_rate": 8.808167020630198e-06, + "loss": 17.524, + "step": 13550 + }, + { + "epoch": 0.24770139104684957, + "grad_norm": 6.485957048327351, + "learning_rate": 8.807975196068633e-06, + "loss": 17.6687, + "step": 13551 + }, + { + "epoch": 0.2477196702432961, + "grad_norm": 5.341858827419484, + "learning_rate": 8.807783358160447e-06, + "loss": 17.0652, + "step": 13552 + }, + { + "epoch": 0.24773794943974262, + "grad_norm": 8.274691688700468, + "learning_rate": 8.807591506906307e-06, + "loss": 18.3032, + "step": 13553 + }, + { + "epoch": 0.24775622863618915, + "grad_norm": 6.638551424156307, + "learning_rate": 8.807399642306894e-06, + "loss": 17.8078, + "step": 13554 + }, + { + "epoch": 0.2477745078326357, + "grad_norm": 5.17861307736317, + "learning_rate": 8.80720776436287e-06, + "loss": 17.094, + "step": 13555 + }, + { + "epoch": 0.2477927870290822, + "grad_norm": 7.188883248739621, + "learning_rate": 8.807015873074918e-06, + "loss": 17.9123, + "step": 13556 + }, + { + "epoch": 0.24781106622552873, + "grad_norm": 6.701056752872817, + "learning_rate": 8.806823968443704e-06, + "loss": 17.7605, + "step": 13557 + }, + { + "epoch": 0.24782934542197524, + "grad_norm": 6.839555891126196, + "learning_rate": 8.806632050469901e-06, + "loss": 17.6233, + "step": 13558 + }, + { + "epoch": 0.24784762461842177, + "grad_norm": 7.8974902605052035, + "learning_rate": 8.806440119154185e-06, + "loss": 18.1118, + "step": 13559 + }, + { + "epoch": 0.2478659038148683, + "grad_norm": 5.850233448254094, + "learning_rate": 8.806248174497225e-06, + "loss": 17.1914, + "step": 13560 + }, + { + "epoch": 0.24788418301131482, + "grad_norm": 6.1803635027236, + "learning_rate": 8.806056216499697e-06, + "loss": 17.3134, + "step": 13561 + }, + { + "epoch": 0.24790246220776135, + "grad_norm": 7.44515695266595, + "learning_rate": 8.805864245162272e-06, + "loss": 17.8741, + "step": 13562 + }, + { + "epoch": 0.24792074140420786, + "grad_norm": 6.730564786986195, + "learning_rate": 8.805672260485623e-06, + "loss": 17.6039, + "step": 13563 + }, + { + "epoch": 0.2479390206006544, + "grad_norm": 6.6264531741244195, + "learning_rate": 8.805480262470422e-06, + "loss": 17.7734, + "step": 13564 + }, + { + "epoch": 0.24795729979710093, + "grad_norm": 6.879737015030938, + "learning_rate": 8.805288251117343e-06, + "loss": 17.3106, + "step": 13565 + }, + { + "epoch": 0.24797557899354744, + "grad_norm": 6.596703873166253, + "learning_rate": 8.805096226427059e-06, + "loss": 17.6785, + "step": 13566 + }, + { + "epoch": 0.24799385818999398, + "grad_norm": 6.330324217831363, + "learning_rate": 8.804904188400243e-06, + "loss": 17.4953, + "step": 13567 + }, + { + "epoch": 0.24801213738644048, + "grad_norm": 6.476138941986639, + "learning_rate": 8.80471213703757e-06, + "loss": 17.8462, + "step": 13568 + }, + { + "epoch": 0.24803041658288702, + "grad_norm": 7.202468307013269, + "learning_rate": 8.804520072339709e-06, + "loss": 17.9772, + "step": 13569 + }, + { + "epoch": 0.24804869577933353, + "grad_norm": 6.487313451592617, + "learning_rate": 8.804327994307335e-06, + "loss": 17.4103, + "step": 13570 + }, + { + "epoch": 0.24806697497578006, + "grad_norm": 5.396480993896546, + "learning_rate": 8.804135902941121e-06, + "loss": 16.9656, + "step": 13571 + }, + { + "epoch": 0.2480852541722266, + "grad_norm": 6.209982091023376, + "learning_rate": 8.803943798241744e-06, + "loss": 17.1646, + "step": 13572 + }, + { + "epoch": 0.2481035333686731, + "grad_norm": 6.259138045292799, + "learning_rate": 8.80375168020987e-06, + "loss": 17.5918, + "step": 13573 + }, + { + "epoch": 0.24812181256511964, + "grad_norm": 6.4363935474218, + "learning_rate": 8.803559548846178e-06, + "loss": 17.6681, + "step": 13574 + }, + { + "epoch": 0.24814009176156615, + "grad_norm": 8.033724951282872, + "learning_rate": 8.803367404151341e-06, + "loss": 18.1917, + "step": 13575 + }, + { + "epoch": 0.2481583709580127, + "grad_norm": 8.919849199637072, + "learning_rate": 8.803175246126032e-06, + "loss": 18.8369, + "step": 13576 + }, + { + "epoch": 0.24817665015445922, + "grad_norm": 8.243067495335772, + "learning_rate": 8.802983074770922e-06, + "loss": 18.2087, + "step": 13577 + }, + { + "epoch": 0.24819492935090573, + "grad_norm": 6.223858430854021, + "learning_rate": 8.802790890086686e-06, + "loss": 17.5654, + "step": 13578 + }, + { + "epoch": 0.24821320854735227, + "grad_norm": 7.387343307779693, + "learning_rate": 8.802598692074e-06, + "loss": 18.1791, + "step": 13579 + }, + { + "epoch": 0.24823148774379877, + "grad_norm": 6.975913950209664, + "learning_rate": 8.802406480733534e-06, + "loss": 17.8566, + "step": 13580 + }, + { + "epoch": 0.2482497669402453, + "grad_norm": 5.483926267998872, + "learning_rate": 8.802214256065963e-06, + "loss": 17.0997, + "step": 13581 + }, + { + "epoch": 0.24826804613669184, + "grad_norm": 7.6291144664324415, + "learning_rate": 8.802022018071961e-06, + "loss": 18.0416, + "step": 13582 + }, + { + "epoch": 0.24828632533313835, + "grad_norm": 6.39217728338996, + "learning_rate": 8.801829766752203e-06, + "loss": 17.6541, + "step": 13583 + }, + { + "epoch": 0.2483046045295849, + "grad_norm": 6.595641814029959, + "learning_rate": 8.80163750210736e-06, + "loss": 17.8408, + "step": 13584 + }, + { + "epoch": 0.2483228837260314, + "grad_norm": 7.254710124077112, + "learning_rate": 8.80144522413811e-06, + "loss": 17.797, + "step": 13585 + }, + { + "epoch": 0.24834116292247793, + "grad_norm": 6.5113618090311345, + "learning_rate": 8.801252932845122e-06, + "loss": 17.67, + "step": 13586 + }, + { + "epoch": 0.24835944211892444, + "grad_norm": 6.110250760314191, + "learning_rate": 8.801060628229072e-06, + "loss": 17.4549, + "step": 13587 + }, + { + "epoch": 0.24837772131537098, + "grad_norm": 7.17153914014889, + "learning_rate": 8.800868310290635e-06, + "loss": 17.944, + "step": 13588 + }, + { + "epoch": 0.2483960005118175, + "grad_norm": 6.6406924892044685, + "learning_rate": 8.800675979030484e-06, + "loss": 17.746, + "step": 13589 + }, + { + "epoch": 0.24841427970826402, + "grad_norm": 7.9014519895766355, + "learning_rate": 8.800483634449295e-06, + "loss": 18.5655, + "step": 13590 + }, + { + "epoch": 0.24843255890471055, + "grad_norm": 6.178339948853413, + "learning_rate": 8.80029127654774e-06, + "loss": 17.3984, + "step": 13591 + }, + { + "epoch": 0.24845083810115706, + "grad_norm": 7.3444746228631095, + "learning_rate": 8.800098905326493e-06, + "loss": 18.1451, + "step": 13592 + }, + { + "epoch": 0.2484691172976036, + "grad_norm": 7.1783347093880945, + "learning_rate": 8.79990652078623e-06, + "loss": 17.7128, + "step": 13593 + }, + { + "epoch": 0.24848739649405013, + "grad_norm": 6.075669041088203, + "learning_rate": 8.799714122927625e-06, + "loss": 17.5606, + "step": 13594 + }, + { + "epoch": 0.24850567569049664, + "grad_norm": 6.382625845225516, + "learning_rate": 8.79952171175135e-06, + "loss": 17.4704, + "step": 13595 + }, + { + "epoch": 0.24852395488694318, + "grad_norm": 6.3752340265404355, + "learning_rate": 8.799329287258083e-06, + "loss": 17.5366, + "step": 13596 + }, + { + "epoch": 0.24854223408338968, + "grad_norm": 6.915529798093468, + "learning_rate": 8.799136849448496e-06, + "loss": 17.659, + "step": 13597 + }, + { + "epoch": 0.24856051327983622, + "grad_norm": 6.612852806965295, + "learning_rate": 8.798944398323261e-06, + "loss": 17.8432, + "step": 13598 + }, + { + "epoch": 0.24857879247628276, + "grad_norm": 6.2407458376604055, + "learning_rate": 8.798751933883058e-06, + "loss": 17.5232, + "step": 13599 + }, + { + "epoch": 0.24859707167272926, + "grad_norm": 6.711701644131944, + "learning_rate": 8.79855945612856e-06, + "loss": 17.7623, + "step": 13600 + }, + { + "epoch": 0.2486153508691758, + "grad_norm": 7.128477037322055, + "learning_rate": 8.79836696506044e-06, + "loss": 18.0356, + "step": 13601 + }, + { + "epoch": 0.2486336300656223, + "grad_norm": 5.804775169214023, + "learning_rate": 8.798174460679374e-06, + "loss": 17.2787, + "step": 13602 + }, + { + "epoch": 0.24865190926206884, + "grad_norm": 5.70416433023589, + "learning_rate": 8.797981942986035e-06, + "loss": 17.5225, + "step": 13603 + }, + { + "epoch": 0.24867018845851535, + "grad_norm": 7.346183355605944, + "learning_rate": 8.797789411981098e-06, + "loss": 18.0112, + "step": 13604 + }, + { + "epoch": 0.2486884676549619, + "grad_norm": 8.26468927004845, + "learning_rate": 8.797596867665241e-06, + "loss": 18.4587, + "step": 13605 + }, + { + "epoch": 0.24870674685140842, + "grad_norm": 8.255270705426234, + "learning_rate": 8.797404310039135e-06, + "loss": 18.4868, + "step": 13606 + }, + { + "epoch": 0.24872502604785493, + "grad_norm": 8.923343128536514, + "learning_rate": 8.797211739103458e-06, + "loss": 18.3208, + "step": 13607 + }, + { + "epoch": 0.24874330524430147, + "grad_norm": 5.397304363700296, + "learning_rate": 8.797019154858881e-06, + "loss": 16.9809, + "step": 13608 + }, + { + "epoch": 0.24876158444074797, + "grad_norm": 7.430666195594035, + "learning_rate": 8.796826557306083e-06, + "loss": 18.066, + "step": 13609 + }, + { + "epoch": 0.2487798636371945, + "grad_norm": 6.003755063812365, + "learning_rate": 8.796633946445737e-06, + "loss": 17.4161, + "step": 13610 + }, + { + "epoch": 0.24879814283364104, + "grad_norm": 5.620294923954437, + "learning_rate": 8.796441322278518e-06, + "loss": 17.3661, + "step": 13611 + }, + { + "epoch": 0.24881642203008755, + "grad_norm": 5.7806892935003615, + "learning_rate": 8.796248684805103e-06, + "loss": 17.2277, + "step": 13612 + }, + { + "epoch": 0.2488347012265341, + "grad_norm": 6.581495995371712, + "learning_rate": 8.796056034026164e-06, + "loss": 17.485, + "step": 13613 + }, + { + "epoch": 0.2488529804229806, + "grad_norm": 7.568553920028167, + "learning_rate": 8.795863369942379e-06, + "loss": 18.2171, + "step": 13614 + }, + { + "epoch": 0.24887125961942713, + "grad_norm": 12.072031985212378, + "learning_rate": 8.795670692554422e-06, + "loss": 18.5831, + "step": 13615 + }, + { + "epoch": 0.24888953881587367, + "grad_norm": 6.5694429939948185, + "learning_rate": 8.79547800186297e-06, + "loss": 17.3885, + "step": 13616 + }, + { + "epoch": 0.24890781801232018, + "grad_norm": 7.464572581084682, + "learning_rate": 8.795285297868695e-06, + "loss": 18.21, + "step": 13617 + }, + { + "epoch": 0.2489260972087667, + "grad_norm": 7.097192977191152, + "learning_rate": 8.795092580572274e-06, + "loss": 17.9665, + "step": 13618 + }, + { + "epoch": 0.24894437640521322, + "grad_norm": 5.887254196280174, + "learning_rate": 8.794899849974384e-06, + "loss": 17.5091, + "step": 13619 + }, + { + "epoch": 0.24896265560165975, + "grad_norm": 6.292638117710199, + "learning_rate": 8.7947071060757e-06, + "loss": 17.9647, + "step": 13620 + }, + { + "epoch": 0.24898093479810626, + "grad_norm": 5.610482638041628, + "learning_rate": 8.794514348876894e-06, + "loss": 17.3419, + "step": 13621 + }, + { + "epoch": 0.2489992139945528, + "grad_norm": 6.985345441765878, + "learning_rate": 8.794321578378649e-06, + "loss": 17.9621, + "step": 13622 + }, + { + "epoch": 0.24901749319099933, + "grad_norm": 7.2929065272559646, + "learning_rate": 8.794128794581634e-06, + "loss": 18.2403, + "step": 13623 + }, + { + "epoch": 0.24903577238744584, + "grad_norm": 6.574658771008031, + "learning_rate": 8.793935997486525e-06, + "loss": 17.8019, + "step": 13624 + }, + { + "epoch": 0.24905405158389238, + "grad_norm": 5.88672130310422, + "learning_rate": 8.793743187094002e-06, + "loss": 17.4474, + "step": 13625 + }, + { + "epoch": 0.24907233078033889, + "grad_norm": 8.240587812286298, + "learning_rate": 8.793550363404737e-06, + "loss": 18.3002, + "step": 13626 + }, + { + "epoch": 0.24909060997678542, + "grad_norm": 7.550864745136357, + "learning_rate": 8.793357526419406e-06, + "loss": 17.6373, + "step": 13627 + }, + { + "epoch": 0.24910888917323196, + "grad_norm": 7.53562386247126, + "learning_rate": 8.793164676138687e-06, + "loss": 17.9381, + "step": 13628 + }, + { + "epoch": 0.24912716836967846, + "grad_norm": 6.208485718974186, + "learning_rate": 8.792971812563258e-06, + "loss": 17.4192, + "step": 13629 + }, + { + "epoch": 0.249145447566125, + "grad_norm": 6.042275434645566, + "learning_rate": 8.792778935693788e-06, + "loss": 17.1798, + "step": 13630 + }, + { + "epoch": 0.2491637267625715, + "grad_norm": 7.903588151094938, + "learning_rate": 8.792586045530958e-06, + "loss": 18.0765, + "step": 13631 + }, + { + "epoch": 0.24918200595901804, + "grad_norm": 6.725425760282418, + "learning_rate": 8.792393142075443e-06, + "loss": 17.4452, + "step": 13632 + }, + { + "epoch": 0.24920028515546458, + "grad_norm": 7.613950756375264, + "learning_rate": 8.79220022532792e-06, + "loss": 17.8006, + "step": 13633 + }, + { + "epoch": 0.2492185643519111, + "grad_norm": 6.311147051785585, + "learning_rate": 8.792007295289064e-06, + "loss": 17.6108, + "step": 13634 + }, + { + "epoch": 0.24923684354835762, + "grad_norm": 8.6277951319873, + "learning_rate": 8.791814351959551e-06, + "loss": 17.8419, + "step": 13635 + }, + { + "epoch": 0.24925512274480413, + "grad_norm": 6.009905504629565, + "learning_rate": 8.79162139534006e-06, + "loss": 17.4084, + "step": 13636 + }, + { + "epoch": 0.24927340194125067, + "grad_norm": 7.029158841169786, + "learning_rate": 8.791428425431263e-06, + "loss": 17.9272, + "step": 13637 + }, + { + "epoch": 0.24929168113769717, + "grad_norm": 9.781101938345776, + "learning_rate": 8.791235442233837e-06, + "loss": 18.2717, + "step": 13638 + }, + { + "epoch": 0.2493099603341437, + "grad_norm": 8.353990401220457, + "learning_rate": 8.791042445748462e-06, + "loss": 18.5817, + "step": 13639 + }, + { + "epoch": 0.24932823953059025, + "grad_norm": 6.6549857354042885, + "learning_rate": 8.790849435975813e-06, + "loss": 17.6618, + "step": 13640 + }, + { + "epoch": 0.24934651872703675, + "grad_norm": 6.2320122434893594, + "learning_rate": 8.790656412916563e-06, + "loss": 17.4033, + "step": 13641 + }, + { + "epoch": 0.2493647979234833, + "grad_norm": 7.176249965546837, + "learning_rate": 8.790463376571392e-06, + "loss": 17.7429, + "step": 13642 + }, + { + "epoch": 0.2493830771199298, + "grad_norm": 5.734651787696095, + "learning_rate": 8.790270326940976e-06, + "loss": 17.1494, + "step": 13643 + }, + { + "epoch": 0.24940135631637633, + "grad_norm": 6.9045363113073455, + "learning_rate": 8.790077264025992e-06, + "loss": 17.9641, + "step": 13644 + }, + { + "epoch": 0.24941963551282287, + "grad_norm": 5.184670303198655, + "learning_rate": 8.789884187827116e-06, + "loss": 17.0292, + "step": 13645 + }, + { + "epoch": 0.24943791470926938, + "grad_norm": 7.711816299566391, + "learning_rate": 8.789691098345023e-06, + "loss": 17.9144, + "step": 13646 + }, + { + "epoch": 0.2494561939057159, + "grad_norm": 7.719521491741951, + "learning_rate": 8.789497995580395e-06, + "loss": 18.3423, + "step": 13647 + }, + { + "epoch": 0.24947447310216242, + "grad_norm": 5.627474291296823, + "learning_rate": 8.789304879533901e-06, + "loss": 17.2978, + "step": 13648 + }, + { + "epoch": 0.24949275229860896, + "grad_norm": 6.158697685436154, + "learning_rate": 8.789111750206224e-06, + "loss": 17.36, + "step": 13649 + }, + { + "epoch": 0.2495110314950555, + "grad_norm": 6.65429193676253, + "learning_rate": 8.78891860759804e-06, + "loss": 17.7113, + "step": 13650 + }, + { + "epoch": 0.249529310691502, + "grad_norm": 5.166960750257122, + "learning_rate": 8.788725451710026e-06, + "loss": 16.8692, + "step": 13651 + }, + { + "epoch": 0.24954758988794853, + "grad_norm": 6.376480383954239, + "learning_rate": 8.788532282542857e-06, + "loss": 17.4281, + "step": 13652 + }, + { + "epoch": 0.24956586908439504, + "grad_norm": 5.847633936487089, + "learning_rate": 8.788339100097209e-06, + "loss": 17.232, + "step": 13653 + }, + { + "epoch": 0.24958414828084158, + "grad_norm": 8.168623132831579, + "learning_rate": 8.788145904373765e-06, + "loss": 18.1684, + "step": 13654 + }, + { + "epoch": 0.24960242747728809, + "grad_norm": 7.436815384741335, + "learning_rate": 8.787952695373197e-06, + "loss": 18.0997, + "step": 13655 + }, + { + "epoch": 0.24962070667373462, + "grad_norm": 5.440164677593967, + "learning_rate": 8.787759473096182e-06, + "loss": 17.3317, + "step": 13656 + }, + { + "epoch": 0.24963898587018116, + "grad_norm": 7.515954411219964, + "learning_rate": 8.7875662375434e-06, + "loss": 17.7748, + "step": 13657 + }, + { + "epoch": 0.24965726506662766, + "grad_norm": 7.113133122298919, + "learning_rate": 8.787372988715525e-06, + "loss": 18.1626, + "step": 13658 + }, + { + "epoch": 0.2496755442630742, + "grad_norm": 7.010804874667533, + "learning_rate": 8.78717972661324e-06, + "loss": 17.7421, + "step": 13659 + }, + { + "epoch": 0.2496938234595207, + "grad_norm": 5.665932415627742, + "learning_rate": 8.786986451237217e-06, + "loss": 17.1337, + "step": 13660 + }, + { + "epoch": 0.24971210265596724, + "grad_norm": 7.371647518398055, + "learning_rate": 8.786793162588135e-06, + "loss": 18.1092, + "step": 13661 + }, + { + "epoch": 0.24973038185241378, + "grad_norm": 5.964545631329175, + "learning_rate": 8.78659986066667e-06, + "loss": 17.1018, + "step": 13662 + }, + { + "epoch": 0.2497486610488603, + "grad_norm": 6.145087876449944, + "learning_rate": 8.786406545473503e-06, + "loss": 17.179, + "step": 13663 + }, + { + "epoch": 0.24976694024530682, + "grad_norm": 7.724709929005375, + "learning_rate": 8.786213217009309e-06, + "loss": 17.7277, + "step": 13664 + }, + { + "epoch": 0.24978521944175333, + "grad_norm": 5.767009279703939, + "learning_rate": 8.786019875274764e-06, + "loss": 17.0923, + "step": 13665 + }, + { + "epoch": 0.24980349863819987, + "grad_norm": 5.577553796148244, + "learning_rate": 8.785826520270553e-06, + "loss": 17.1695, + "step": 13666 + }, + { + "epoch": 0.2498217778346464, + "grad_norm": 7.324165428076282, + "learning_rate": 8.785633151997343e-06, + "loss": 17.6439, + "step": 13667 + }, + { + "epoch": 0.2498400570310929, + "grad_norm": 7.22453251197266, + "learning_rate": 8.785439770455821e-06, + "loss": 17.5587, + "step": 13668 + }, + { + "epoch": 0.24985833622753945, + "grad_norm": 6.3350418539347215, + "learning_rate": 8.785246375646662e-06, + "loss": 17.3439, + "step": 13669 + }, + { + "epoch": 0.24987661542398595, + "grad_norm": 8.105900333242337, + "learning_rate": 8.78505296757054e-06, + "loss": 18.0721, + "step": 13670 + }, + { + "epoch": 0.2498948946204325, + "grad_norm": 6.073732518571119, + "learning_rate": 8.784859546228136e-06, + "loss": 17.3716, + "step": 13671 + }, + { + "epoch": 0.249913173816879, + "grad_norm": 7.028398136879655, + "learning_rate": 8.78466611162013e-06, + "loss": 17.7907, + "step": 13672 + }, + { + "epoch": 0.24993145301332553, + "grad_norm": 7.816419386903626, + "learning_rate": 8.784472663747195e-06, + "loss": 17.9682, + "step": 13673 + }, + { + "epoch": 0.24994973220977207, + "grad_norm": 7.585974508246072, + "learning_rate": 8.784279202610012e-06, + "loss": 17.9536, + "step": 13674 + }, + { + "epoch": 0.24996801140621858, + "grad_norm": 6.368820454232451, + "learning_rate": 8.784085728209261e-06, + "loss": 17.5333, + "step": 13675 + }, + { + "epoch": 0.2499862906026651, + "grad_norm": 6.801929375828293, + "learning_rate": 8.783892240545618e-06, + "loss": 17.6958, + "step": 13676 + }, + { + "epoch": 0.2500045697991116, + "grad_norm": 7.71347140343787, + "learning_rate": 8.783698739619759e-06, + "loss": 18.0251, + "step": 13677 + }, + { + "epoch": 0.25002284899555816, + "grad_norm": 7.172448762691464, + "learning_rate": 8.783505225432364e-06, + "loss": 17.8857, + "step": 13678 + }, + { + "epoch": 0.2500411281920047, + "grad_norm": 7.8308999600022835, + "learning_rate": 8.783311697984113e-06, + "loss": 18.3854, + "step": 13679 + }, + { + "epoch": 0.2500594073884512, + "grad_norm": 7.909860254259011, + "learning_rate": 8.783118157275683e-06, + "loss": 18.1653, + "step": 13680 + }, + { + "epoch": 0.2500776865848977, + "grad_norm": 7.399177070632512, + "learning_rate": 8.78292460330775e-06, + "loss": 18.1998, + "step": 13681 + }, + { + "epoch": 0.25009596578134424, + "grad_norm": 5.943759962856429, + "learning_rate": 8.782731036080996e-06, + "loss": 17.5429, + "step": 13682 + }, + { + "epoch": 0.2501142449777908, + "grad_norm": 7.135033931021666, + "learning_rate": 8.782537455596099e-06, + "loss": 17.7688, + "step": 13683 + }, + { + "epoch": 0.2501325241742373, + "grad_norm": 6.35122949967104, + "learning_rate": 8.782343861853735e-06, + "loss": 17.3839, + "step": 13684 + }, + { + "epoch": 0.25015080337068385, + "grad_norm": 9.789846079296307, + "learning_rate": 8.782150254854584e-06, + "loss": 18.6432, + "step": 13685 + }, + { + "epoch": 0.25016908256713033, + "grad_norm": 7.367962037787293, + "learning_rate": 8.781956634599325e-06, + "loss": 18.0811, + "step": 13686 + }, + { + "epoch": 0.25018736176357687, + "grad_norm": 8.561889029472841, + "learning_rate": 8.781763001088636e-06, + "loss": 18.1209, + "step": 13687 + }, + { + "epoch": 0.2502056409600234, + "grad_norm": 6.019826809806316, + "learning_rate": 8.781569354323197e-06, + "loss": 17.1458, + "step": 13688 + }, + { + "epoch": 0.25022392015646994, + "grad_norm": 6.142316335916776, + "learning_rate": 8.781375694303683e-06, + "loss": 17.2524, + "step": 13689 + }, + { + "epoch": 0.2502421993529165, + "grad_norm": 6.430875967650894, + "learning_rate": 8.781182021030777e-06, + "loss": 17.3811, + "step": 13690 + }, + { + "epoch": 0.25026047854936295, + "grad_norm": 6.6397344390401845, + "learning_rate": 8.780988334505156e-06, + "loss": 17.4814, + "step": 13691 + }, + { + "epoch": 0.2502787577458095, + "grad_norm": 6.656274131397097, + "learning_rate": 8.7807946347275e-06, + "loss": 17.7153, + "step": 13692 + }, + { + "epoch": 0.250297036942256, + "grad_norm": 6.486317260392342, + "learning_rate": 8.780600921698485e-06, + "loss": 17.6734, + "step": 13693 + }, + { + "epoch": 0.25031531613870256, + "grad_norm": 6.526161603655303, + "learning_rate": 8.780407195418792e-06, + "loss": 17.3982, + "step": 13694 + }, + { + "epoch": 0.25033359533514904, + "grad_norm": 6.3537219414511, + "learning_rate": 8.7802134558891e-06, + "loss": 17.364, + "step": 13695 + }, + { + "epoch": 0.2503518745315956, + "grad_norm": 6.729428797262897, + "learning_rate": 8.78001970311009e-06, + "loss": 17.9462, + "step": 13696 + }, + { + "epoch": 0.2503701537280421, + "grad_norm": 6.65367901168732, + "learning_rate": 8.779825937082436e-06, + "loss": 17.4791, + "step": 13697 + }, + { + "epoch": 0.25038843292448865, + "grad_norm": 5.8306443302054385, + "learning_rate": 8.779632157806821e-06, + "loss": 17.4159, + "step": 13698 + }, + { + "epoch": 0.2504067121209352, + "grad_norm": 6.171992721075378, + "learning_rate": 8.779438365283924e-06, + "loss": 17.364, + "step": 13699 + }, + { + "epoch": 0.25042499131738166, + "grad_norm": 7.253356684839225, + "learning_rate": 8.779244559514424e-06, + "loss": 17.9746, + "step": 13700 + }, + { + "epoch": 0.2504432705138282, + "grad_norm": 6.053186052422697, + "learning_rate": 8.779050740498998e-06, + "loss": 17.4817, + "step": 13701 + }, + { + "epoch": 0.25046154971027473, + "grad_norm": 6.230493315371202, + "learning_rate": 8.77885690823833e-06, + "loss": 17.1891, + "step": 13702 + }, + { + "epoch": 0.25047982890672127, + "grad_norm": 6.227282377287225, + "learning_rate": 8.778663062733093e-06, + "loss": 17.286, + "step": 13703 + }, + { + "epoch": 0.2504981081031678, + "grad_norm": 7.03631259111469, + "learning_rate": 8.778469203983971e-06, + "loss": 17.4506, + "step": 13704 + }, + { + "epoch": 0.2505163872996143, + "grad_norm": 6.550702438917967, + "learning_rate": 8.778275331991643e-06, + "loss": 17.6496, + "step": 13705 + }, + { + "epoch": 0.2505346664960608, + "grad_norm": 6.766698513387287, + "learning_rate": 8.778081446756787e-06, + "loss": 17.5792, + "step": 13706 + }, + { + "epoch": 0.25055294569250736, + "grad_norm": 6.666839859058256, + "learning_rate": 8.777887548280084e-06, + "loss": 17.4515, + "step": 13707 + }, + { + "epoch": 0.2505712248889539, + "grad_norm": 6.154213037421661, + "learning_rate": 8.777693636562212e-06, + "loss": 17.2226, + "step": 13708 + }, + { + "epoch": 0.2505895040854004, + "grad_norm": 8.374144174894667, + "learning_rate": 8.777499711603854e-06, + "loss": 18.1339, + "step": 13709 + }, + { + "epoch": 0.2506077832818469, + "grad_norm": 7.375942271841453, + "learning_rate": 8.777305773405684e-06, + "loss": 17.7976, + "step": 13710 + }, + { + "epoch": 0.25062606247829344, + "grad_norm": 7.844301927949076, + "learning_rate": 8.777111821968386e-06, + "loss": 18.0166, + "step": 13711 + }, + { + "epoch": 0.25064434167474, + "grad_norm": 6.245290524021204, + "learning_rate": 8.776917857292641e-06, + "loss": 17.3728, + "step": 13712 + }, + { + "epoch": 0.2506626208711865, + "grad_norm": 7.027406029170456, + "learning_rate": 8.776723879379126e-06, + "loss": 18.0623, + "step": 13713 + }, + { + "epoch": 0.25068090006763305, + "grad_norm": 6.170920509560837, + "learning_rate": 8.77652988822852e-06, + "loss": 17.3041, + "step": 13714 + }, + { + "epoch": 0.25069917926407953, + "grad_norm": 7.877857309985771, + "learning_rate": 8.776335883841504e-06, + "loss": 18.2322, + "step": 13715 + }, + { + "epoch": 0.25071745846052607, + "grad_norm": 7.326621885665798, + "learning_rate": 8.776141866218761e-06, + "loss": 17.8354, + "step": 13716 + }, + { + "epoch": 0.2507357376569726, + "grad_norm": 7.026959857664446, + "learning_rate": 8.775947835360967e-06, + "loss": 17.7837, + "step": 13717 + }, + { + "epoch": 0.25075401685341914, + "grad_norm": 7.637534254447708, + "learning_rate": 8.775753791268804e-06, + "loss": 18.2689, + "step": 13718 + }, + { + "epoch": 0.2507722960498657, + "grad_norm": 7.447805613172726, + "learning_rate": 8.775559733942952e-06, + "loss": 17.8548, + "step": 13719 + }, + { + "epoch": 0.25079057524631215, + "grad_norm": 6.978672598215624, + "learning_rate": 8.775365663384088e-06, + "loss": 17.8056, + "step": 13720 + }, + { + "epoch": 0.2508088544427587, + "grad_norm": 6.789059085714313, + "learning_rate": 8.775171579592898e-06, + "loss": 17.8224, + "step": 13721 + }, + { + "epoch": 0.2508271336392052, + "grad_norm": 6.528536194001972, + "learning_rate": 8.774977482570058e-06, + "loss": 17.9032, + "step": 13722 + }, + { + "epoch": 0.25084541283565176, + "grad_norm": 5.894994552697368, + "learning_rate": 8.77478337231625e-06, + "loss": 17.3327, + "step": 13723 + }, + { + "epoch": 0.2508636920320983, + "grad_norm": 6.889212073845134, + "learning_rate": 8.774589248832153e-06, + "loss": 17.5949, + "step": 13724 + }, + { + "epoch": 0.2508819712285448, + "grad_norm": 5.663663159463208, + "learning_rate": 8.77439511211845e-06, + "loss": 17.4429, + "step": 13725 + }, + { + "epoch": 0.2509002504249913, + "grad_norm": 6.10287318775543, + "learning_rate": 8.774200962175816e-06, + "loss": 17.5407, + "step": 13726 + }, + { + "epoch": 0.25091852962143785, + "grad_norm": 7.309553641820336, + "learning_rate": 8.77400679900494e-06, + "loss": 17.8893, + "step": 13727 + }, + { + "epoch": 0.2509368088178844, + "grad_norm": 6.064058751608981, + "learning_rate": 8.773812622606494e-06, + "loss": 17.4146, + "step": 13728 + }, + { + "epoch": 0.25095508801433086, + "grad_norm": 6.750710304996495, + "learning_rate": 8.773618432981163e-06, + "loss": 17.7342, + "step": 13729 + }, + { + "epoch": 0.2509733672107774, + "grad_norm": 6.2763482507359925, + "learning_rate": 8.773424230129628e-06, + "loss": 17.5536, + "step": 13730 + }, + { + "epoch": 0.25099164640722393, + "grad_norm": 7.598059918733686, + "learning_rate": 8.773230014052568e-06, + "loss": 18.1678, + "step": 13731 + }, + { + "epoch": 0.25100992560367047, + "grad_norm": 7.145922873989569, + "learning_rate": 8.773035784750663e-06, + "loss": 17.4477, + "step": 13732 + }, + { + "epoch": 0.251028204800117, + "grad_norm": 6.458154817569209, + "learning_rate": 8.772841542224596e-06, + "loss": 17.4468, + "step": 13733 + }, + { + "epoch": 0.2510464839965635, + "grad_norm": 7.087951311196237, + "learning_rate": 8.772647286475047e-06, + "loss": 18.3194, + "step": 13734 + }, + { + "epoch": 0.25106476319301, + "grad_norm": 6.436018704490948, + "learning_rate": 8.772453017502695e-06, + "loss": 17.663, + "step": 13735 + }, + { + "epoch": 0.25108304238945656, + "grad_norm": 6.977212610563405, + "learning_rate": 8.772258735308225e-06, + "loss": 17.5539, + "step": 13736 + }, + { + "epoch": 0.2511013215859031, + "grad_norm": 6.239105916239187, + "learning_rate": 8.772064439892314e-06, + "loss": 17.4486, + "step": 13737 + }, + { + "epoch": 0.25111960078234963, + "grad_norm": 6.015031005242834, + "learning_rate": 8.771870131255646e-06, + "loss": 17.2309, + "step": 13738 + }, + { + "epoch": 0.2511378799787961, + "grad_norm": 6.721088540210563, + "learning_rate": 8.771675809398898e-06, + "loss": 17.5095, + "step": 13739 + }, + { + "epoch": 0.25115615917524264, + "grad_norm": 6.41520292709623, + "learning_rate": 8.771481474322755e-06, + "loss": 17.5005, + "step": 13740 + }, + { + "epoch": 0.2511744383716892, + "grad_norm": 6.2391058121266, + "learning_rate": 8.771287126027897e-06, + "loss": 17.1922, + "step": 13741 + }, + { + "epoch": 0.2511927175681357, + "grad_norm": 7.028197784691112, + "learning_rate": 8.771092764515006e-06, + "loss": 17.6695, + "step": 13742 + }, + { + "epoch": 0.25121099676458225, + "grad_norm": 6.927224606206834, + "learning_rate": 8.77089838978476e-06, + "loss": 17.3227, + "step": 13743 + }, + { + "epoch": 0.25122927596102873, + "grad_norm": 5.501662333540958, + "learning_rate": 8.770704001837843e-06, + "loss": 17.1795, + "step": 13744 + }, + { + "epoch": 0.25124755515747527, + "grad_norm": 7.98795049015665, + "learning_rate": 8.770509600674934e-06, + "loss": 17.6757, + "step": 13745 + }, + { + "epoch": 0.2512658343539218, + "grad_norm": 6.386750052454475, + "learning_rate": 8.770315186296719e-06, + "loss": 17.5961, + "step": 13746 + }, + { + "epoch": 0.25128411355036834, + "grad_norm": 5.7846685922114816, + "learning_rate": 8.770120758703874e-06, + "loss": 17.1132, + "step": 13747 + }, + { + "epoch": 0.2513023927468149, + "grad_norm": 7.50890297590246, + "learning_rate": 8.769926317897084e-06, + "loss": 18.0496, + "step": 13748 + }, + { + "epoch": 0.25132067194326135, + "grad_norm": 7.499891461814617, + "learning_rate": 8.76973186387703e-06, + "loss": 18.0797, + "step": 13749 + }, + { + "epoch": 0.2513389511397079, + "grad_norm": 6.708457406673459, + "learning_rate": 8.769537396644393e-06, + "loss": 17.261, + "step": 13750 + }, + { + "epoch": 0.2513572303361544, + "grad_norm": 5.309378532147494, + "learning_rate": 8.769342916199854e-06, + "loss": 17.024, + "step": 13751 + }, + { + "epoch": 0.25137550953260096, + "grad_norm": 6.084734857501226, + "learning_rate": 8.769148422544095e-06, + "loss": 17.3959, + "step": 13752 + }, + { + "epoch": 0.2513937887290475, + "grad_norm": 8.956039493326504, + "learning_rate": 8.768953915677798e-06, + "loss": 18.5505, + "step": 13753 + }, + { + "epoch": 0.251412067925494, + "grad_norm": 6.96444705521746, + "learning_rate": 8.768759395601645e-06, + "loss": 17.9796, + "step": 13754 + }, + { + "epoch": 0.2514303471219405, + "grad_norm": 6.557172749327448, + "learning_rate": 8.768564862316316e-06, + "loss": 17.5487, + "step": 13755 + }, + { + "epoch": 0.25144862631838705, + "grad_norm": 6.0961799240180135, + "learning_rate": 8.768370315822496e-06, + "loss": 17.4884, + "step": 13756 + }, + { + "epoch": 0.2514669055148336, + "grad_norm": 5.801513897231396, + "learning_rate": 8.768175756120864e-06, + "loss": 17.4211, + "step": 13757 + }, + { + "epoch": 0.2514851847112801, + "grad_norm": 7.995947668387565, + "learning_rate": 8.767981183212103e-06, + "loss": 18.1506, + "step": 13758 + }, + { + "epoch": 0.2515034639077266, + "grad_norm": 6.520774360548156, + "learning_rate": 8.767786597096895e-06, + "loss": 17.4924, + "step": 13759 + }, + { + "epoch": 0.25152174310417313, + "grad_norm": 5.587148555102545, + "learning_rate": 8.767591997775922e-06, + "loss": 16.8752, + "step": 13760 + }, + { + "epoch": 0.25154002230061967, + "grad_norm": 6.552470227336554, + "learning_rate": 8.767397385249865e-06, + "loss": 17.7395, + "step": 13761 + }, + { + "epoch": 0.2515583014970662, + "grad_norm": 8.321874718686061, + "learning_rate": 8.767202759519409e-06, + "loss": 17.7483, + "step": 13762 + }, + { + "epoch": 0.2515765806935127, + "grad_norm": 6.215825841701845, + "learning_rate": 8.767008120585233e-06, + "loss": 17.5189, + "step": 13763 + }, + { + "epoch": 0.2515948598899592, + "grad_norm": 6.550420398143107, + "learning_rate": 8.76681346844802e-06, + "loss": 17.7779, + "step": 13764 + }, + { + "epoch": 0.25161313908640576, + "grad_norm": 6.570276372280605, + "learning_rate": 8.766618803108454e-06, + "loss": 17.6302, + "step": 13765 + }, + { + "epoch": 0.2516314182828523, + "grad_norm": 6.98919670996425, + "learning_rate": 8.766424124567215e-06, + "loss": 17.8021, + "step": 13766 + }, + { + "epoch": 0.25164969747929883, + "grad_norm": 7.385807087426103, + "learning_rate": 8.766229432824986e-06, + "loss": 18.305, + "step": 13767 + }, + { + "epoch": 0.2516679766757453, + "grad_norm": 6.239690219179112, + "learning_rate": 8.76603472788245e-06, + "loss": 17.2774, + "step": 13768 + }, + { + "epoch": 0.25168625587219184, + "grad_norm": 7.529944805267041, + "learning_rate": 8.765840009740289e-06, + "loss": 17.7005, + "step": 13769 + }, + { + "epoch": 0.2517045350686384, + "grad_norm": 6.707427014138782, + "learning_rate": 8.765645278399187e-06, + "loss": 17.8625, + "step": 13770 + }, + { + "epoch": 0.2517228142650849, + "grad_norm": 6.376799995019203, + "learning_rate": 8.765450533859823e-06, + "loss": 17.7751, + "step": 13771 + }, + { + "epoch": 0.25174109346153145, + "grad_norm": 7.842346926988082, + "learning_rate": 8.765255776122884e-06, + "loss": 18.1796, + "step": 13772 + }, + { + "epoch": 0.25175937265797793, + "grad_norm": 7.827714310710927, + "learning_rate": 8.765061005189048e-06, + "loss": 17.8566, + "step": 13773 + }, + { + "epoch": 0.25177765185442447, + "grad_norm": 9.714632895819868, + "learning_rate": 8.764866221059e-06, + "loss": 18.6999, + "step": 13774 + }, + { + "epoch": 0.251795931050871, + "grad_norm": 5.1415860911994615, + "learning_rate": 8.764671423733424e-06, + "loss": 17.1295, + "step": 13775 + }, + { + "epoch": 0.25181421024731754, + "grad_norm": 5.465218869950854, + "learning_rate": 8.764476613213e-06, + "loss": 17.1065, + "step": 13776 + }, + { + "epoch": 0.2518324894437641, + "grad_norm": 7.352397364400496, + "learning_rate": 8.764281789498412e-06, + "loss": 17.879, + "step": 13777 + }, + { + "epoch": 0.25185076864021055, + "grad_norm": 5.749608396722773, + "learning_rate": 8.764086952590345e-06, + "loss": 17.3095, + "step": 13778 + }, + { + "epoch": 0.2518690478366571, + "grad_norm": 5.911839463727114, + "learning_rate": 8.763892102489478e-06, + "loss": 17.1792, + "step": 13779 + }, + { + "epoch": 0.2518873270331036, + "grad_norm": 6.544310704774565, + "learning_rate": 8.763697239196496e-06, + "loss": 17.6827, + "step": 13780 + }, + { + "epoch": 0.25190560622955016, + "grad_norm": 7.4383699086318815, + "learning_rate": 8.763502362712082e-06, + "loss": 17.9255, + "step": 13781 + }, + { + "epoch": 0.2519238854259967, + "grad_norm": 5.8094518104292385, + "learning_rate": 8.763307473036919e-06, + "loss": 17.2015, + "step": 13782 + }, + { + "epoch": 0.2519421646224432, + "grad_norm": 4.97326767951542, + "learning_rate": 8.76311257017169e-06, + "loss": 16.8358, + "step": 13783 + }, + { + "epoch": 0.2519604438188897, + "grad_norm": 6.373518140325814, + "learning_rate": 8.762917654117077e-06, + "loss": 17.5874, + "step": 13784 + }, + { + "epoch": 0.25197872301533625, + "grad_norm": 6.702180106813105, + "learning_rate": 8.762722724873766e-06, + "loss": 17.7037, + "step": 13785 + }, + { + "epoch": 0.2519970022117828, + "grad_norm": 5.64079520470863, + "learning_rate": 8.762527782442436e-06, + "loss": 17.2127, + "step": 13786 + }, + { + "epoch": 0.2520152814082293, + "grad_norm": 5.75393418504257, + "learning_rate": 8.762332826823774e-06, + "loss": 17.3287, + "step": 13787 + }, + { + "epoch": 0.2520335606046758, + "grad_norm": 8.09878369793486, + "learning_rate": 8.762137858018463e-06, + "loss": 18.1549, + "step": 13788 + }, + { + "epoch": 0.25205183980112233, + "grad_norm": 6.6744313033977445, + "learning_rate": 8.761942876027185e-06, + "loss": 17.7911, + "step": 13789 + }, + { + "epoch": 0.25207011899756887, + "grad_norm": 5.864234424938747, + "learning_rate": 8.761747880850622e-06, + "loss": 17.4366, + "step": 13790 + }, + { + "epoch": 0.2520883981940154, + "grad_norm": 5.890294583815388, + "learning_rate": 8.76155287248946e-06, + "loss": 17.2478, + "step": 13791 + }, + { + "epoch": 0.25210667739046194, + "grad_norm": 6.613126406826565, + "learning_rate": 8.76135785094438e-06, + "loss": 17.4939, + "step": 13792 + }, + { + "epoch": 0.2521249565869084, + "grad_norm": 5.950499194438246, + "learning_rate": 8.76116281621607e-06, + "loss": 17.2808, + "step": 13793 + }, + { + "epoch": 0.25214323578335496, + "grad_norm": 6.577486756184717, + "learning_rate": 8.760967768305208e-06, + "loss": 17.8002, + "step": 13794 + }, + { + "epoch": 0.2521615149798015, + "grad_norm": 7.997368502367322, + "learning_rate": 8.760772707212483e-06, + "loss": 18.0038, + "step": 13795 + }, + { + "epoch": 0.25217979417624803, + "grad_norm": 6.024074301066061, + "learning_rate": 8.760577632938574e-06, + "loss": 17.196, + "step": 13796 + }, + { + "epoch": 0.2521980733726945, + "grad_norm": 6.751303858541155, + "learning_rate": 8.760382545484167e-06, + "loss": 17.7404, + "step": 13797 + }, + { + "epoch": 0.25221635256914104, + "grad_norm": 6.5231244162970095, + "learning_rate": 8.760187444849946e-06, + "loss": 17.4269, + "step": 13798 + }, + { + "epoch": 0.2522346317655876, + "grad_norm": 6.231742603340174, + "learning_rate": 8.759992331036595e-06, + "loss": 17.4389, + "step": 13799 + }, + { + "epoch": 0.2522529109620341, + "grad_norm": 6.846407027722996, + "learning_rate": 8.759797204044796e-06, + "loss": 17.6918, + "step": 13800 + }, + { + "epoch": 0.25227119015848065, + "grad_norm": 5.93279747930007, + "learning_rate": 8.759602063875234e-06, + "loss": 17.2997, + "step": 13801 + }, + { + "epoch": 0.25228946935492713, + "grad_norm": 6.880431100924478, + "learning_rate": 8.759406910528595e-06, + "loss": 17.5852, + "step": 13802 + }, + { + "epoch": 0.25230774855137367, + "grad_norm": 6.464710572953449, + "learning_rate": 8.759211744005558e-06, + "loss": 17.5171, + "step": 13803 + }, + { + "epoch": 0.2523260277478202, + "grad_norm": 6.6056523743240145, + "learning_rate": 8.759016564306813e-06, + "loss": 17.8632, + "step": 13804 + }, + { + "epoch": 0.25234430694426674, + "grad_norm": 7.711844446399845, + "learning_rate": 8.758821371433038e-06, + "loss": 18.2702, + "step": 13805 + }, + { + "epoch": 0.2523625861407133, + "grad_norm": 8.361546716754969, + "learning_rate": 8.758626165384922e-06, + "loss": 17.6389, + "step": 13806 + }, + { + "epoch": 0.25238086533715975, + "grad_norm": 5.348057047525714, + "learning_rate": 8.758430946163147e-06, + "loss": 17.2781, + "step": 13807 + }, + { + "epoch": 0.2523991445336063, + "grad_norm": 5.907196701423856, + "learning_rate": 8.7582357137684e-06, + "loss": 17.2221, + "step": 13808 + }, + { + "epoch": 0.2524174237300528, + "grad_norm": 5.698606558156475, + "learning_rate": 8.75804046820136e-06, + "loss": 17.2344, + "step": 13809 + }, + { + "epoch": 0.25243570292649936, + "grad_norm": 7.619353413647588, + "learning_rate": 8.757845209462714e-06, + "loss": 18.3026, + "step": 13810 + }, + { + "epoch": 0.2524539821229459, + "grad_norm": 6.521224886232347, + "learning_rate": 8.757649937553149e-06, + "loss": 17.608, + "step": 13811 + }, + { + "epoch": 0.2524722613193924, + "grad_norm": 7.291541060939861, + "learning_rate": 8.757454652473345e-06, + "loss": 17.6405, + "step": 13812 + }, + { + "epoch": 0.2524905405158389, + "grad_norm": 10.104211927869024, + "learning_rate": 8.75725935422399e-06, + "loss": 17.9236, + "step": 13813 + }, + { + "epoch": 0.25250881971228545, + "grad_norm": 5.76059508199635, + "learning_rate": 8.757064042805767e-06, + "loss": 17.3052, + "step": 13814 + }, + { + "epoch": 0.252527098908732, + "grad_norm": 6.423065067097856, + "learning_rate": 8.75686871821936e-06, + "loss": 17.5747, + "step": 13815 + }, + { + "epoch": 0.2525453781051785, + "grad_norm": 6.507071909941204, + "learning_rate": 8.756673380465453e-06, + "loss": 17.6979, + "step": 13816 + }, + { + "epoch": 0.252563657301625, + "grad_norm": 5.826902914321585, + "learning_rate": 8.756478029544733e-06, + "loss": 17.3318, + "step": 13817 + }, + { + "epoch": 0.25258193649807154, + "grad_norm": 5.9334082046521965, + "learning_rate": 8.756282665457884e-06, + "loss": 17.2388, + "step": 13818 + }, + { + "epoch": 0.25260021569451807, + "grad_norm": 6.744000989321716, + "learning_rate": 8.756087288205588e-06, + "loss": 17.7638, + "step": 13819 + }, + { + "epoch": 0.2526184948909646, + "grad_norm": 5.564590959550204, + "learning_rate": 8.755891897788534e-06, + "loss": 17.0945, + "step": 13820 + }, + { + "epoch": 0.25263677408741114, + "grad_norm": 5.291913492772538, + "learning_rate": 8.755696494207405e-06, + "loss": 17.153, + "step": 13821 + }, + { + "epoch": 0.2526550532838576, + "grad_norm": 6.384061545444824, + "learning_rate": 8.755501077462885e-06, + "loss": 17.267, + "step": 13822 + }, + { + "epoch": 0.25267333248030416, + "grad_norm": 6.841622797234484, + "learning_rate": 8.75530564755566e-06, + "loss": 17.4829, + "step": 13823 + }, + { + "epoch": 0.2526916116767507, + "grad_norm": 6.412165616148324, + "learning_rate": 8.755110204486414e-06, + "loss": 17.7288, + "step": 13824 + }, + { + "epoch": 0.25270989087319723, + "grad_norm": 8.189640179660579, + "learning_rate": 8.754914748255832e-06, + "loss": 17.7499, + "step": 13825 + }, + { + "epoch": 0.25272817006964376, + "grad_norm": 6.376840522198051, + "learning_rate": 8.754719278864601e-06, + "loss": 17.3897, + "step": 13826 + }, + { + "epoch": 0.25274644926609025, + "grad_norm": 5.0816212245536345, + "learning_rate": 8.754523796313404e-06, + "loss": 16.8883, + "step": 13827 + }, + { + "epoch": 0.2527647284625368, + "grad_norm": 10.482001907576482, + "learning_rate": 8.754328300602928e-06, + "loss": 18.3032, + "step": 13828 + }, + { + "epoch": 0.2527830076589833, + "grad_norm": 6.792427911599967, + "learning_rate": 8.754132791733856e-06, + "loss": 17.7622, + "step": 13829 + }, + { + "epoch": 0.25280128685542985, + "grad_norm": 6.72865970828854, + "learning_rate": 8.753937269706873e-06, + "loss": 17.924, + "step": 13830 + }, + { + "epoch": 0.25281956605187633, + "grad_norm": 6.264580177397738, + "learning_rate": 8.753741734522668e-06, + "loss": 17.551, + "step": 13831 + }, + { + "epoch": 0.25283784524832287, + "grad_norm": 7.223483013362514, + "learning_rate": 8.753546186181924e-06, + "loss": 17.7338, + "step": 13832 + }, + { + "epoch": 0.2528561244447694, + "grad_norm": 5.476514762019108, + "learning_rate": 8.753350624685325e-06, + "loss": 17.0362, + "step": 13833 + }, + { + "epoch": 0.25287440364121594, + "grad_norm": 6.736717364355989, + "learning_rate": 8.753155050033558e-06, + "loss": 17.6898, + "step": 13834 + }, + { + "epoch": 0.2528926828376625, + "grad_norm": 6.826553700256823, + "learning_rate": 8.752959462227308e-06, + "loss": 17.6636, + "step": 13835 + }, + { + "epoch": 0.25291096203410895, + "grad_norm": 6.110653589759968, + "learning_rate": 8.752763861267262e-06, + "loss": 17.4454, + "step": 13836 + }, + { + "epoch": 0.2529292412305555, + "grad_norm": 8.146771509318832, + "learning_rate": 8.752568247154103e-06, + "loss": 18.3319, + "step": 13837 + }, + { + "epoch": 0.252947520427002, + "grad_norm": 6.042919896918304, + "learning_rate": 8.752372619888519e-06, + "loss": 17.3727, + "step": 13838 + }, + { + "epoch": 0.25296579962344856, + "grad_norm": 5.608386316499777, + "learning_rate": 8.752176979471194e-06, + "loss": 17.0876, + "step": 13839 + }, + { + "epoch": 0.2529840788198951, + "grad_norm": 6.371974087607878, + "learning_rate": 8.751981325902814e-06, + "loss": 17.6724, + "step": 13840 + }, + { + "epoch": 0.2530023580163416, + "grad_norm": 7.217163091063996, + "learning_rate": 8.751785659184066e-06, + "loss": 17.7458, + "step": 13841 + }, + { + "epoch": 0.2530206372127881, + "grad_norm": 8.851814579077875, + "learning_rate": 8.751589979315634e-06, + "loss": 18.2688, + "step": 13842 + }, + { + "epoch": 0.25303891640923465, + "grad_norm": 7.482599558175977, + "learning_rate": 8.751394286298204e-06, + "loss": 17.707, + "step": 13843 + }, + { + "epoch": 0.2530571956056812, + "grad_norm": 6.040627743486871, + "learning_rate": 8.751198580132464e-06, + "loss": 17.5163, + "step": 13844 + }, + { + "epoch": 0.2530754748021277, + "grad_norm": 8.717437003205008, + "learning_rate": 8.751002860819098e-06, + "loss": 18.3448, + "step": 13845 + }, + { + "epoch": 0.2530937539985742, + "grad_norm": 5.543502707869062, + "learning_rate": 8.750807128358792e-06, + "loss": 17.028, + "step": 13846 + }, + { + "epoch": 0.25311203319502074, + "grad_norm": 5.832826377516405, + "learning_rate": 8.750611382752233e-06, + "loss": 17.4799, + "step": 13847 + }, + { + "epoch": 0.25313031239146727, + "grad_norm": 6.421644335761602, + "learning_rate": 8.750415624000105e-06, + "loss": 17.5648, + "step": 13848 + }, + { + "epoch": 0.2531485915879138, + "grad_norm": 7.0638634413962205, + "learning_rate": 8.750219852103098e-06, + "loss": 17.6503, + "step": 13849 + }, + { + "epoch": 0.25316687078436034, + "grad_norm": 6.496441590330903, + "learning_rate": 8.750024067061895e-06, + "loss": 17.3566, + "step": 13850 + }, + { + "epoch": 0.2531851499808068, + "grad_norm": 7.657369861136381, + "learning_rate": 8.749828268877182e-06, + "loss": 18.2053, + "step": 13851 + }, + { + "epoch": 0.25320342917725336, + "grad_norm": 5.9427225811770645, + "learning_rate": 8.74963245754965e-06, + "loss": 17.3519, + "step": 13852 + }, + { + "epoch": 0.2532217083736999, + "grad_norm": 8.107318124705973, + "learning_rate": 8.749436633079977e-06, + "loss": 17.6699, + "step": 13853 + }, + { + "epoch": 0.25323998757014643, + "grad_norm": 6.6438036051962825, + "learning_rate": 8.749240795468856e-06, + "loss": 17.5672, + "step": 13854 + }, + { + "epoch": 0.25325826676659297, + "grad_norm": 6.85004338489899, + "learning_rate": 8.749044944716972e-06, + "loss": 17.7759, + "step": 13855 + }, + { + "epoch": 0.25327654596303945, + "grad_norm": 5.702389014906411, + "learning_rate": 8.748849080825011e-06, + "loss": 17.2129, + "step": 13856 + }, + { + "epoch": 0.253294825159486, + "grad_norm": 5.6610331545114, + "learning_rate": 8.748653203793658e-06, + "loss": 17.0836, + "step": 13857 + }, + { + "epoch": 0.2533131043559325, + "grad_norm": 7.884955625477877, + "learning_rate": 8.7484573136236e-06, + "loss": 18.0809, + "step": 13858 + }, + { + "epoch": 0.25333138355237905, + "grad_norm": 6.298036069861454, + "learning_rate": 8.748261410315527e-06, + "loss": 17.5352, + "step": 13859 + }, + { + "epoch": 0.2533496627488256, + "grad_norm": 5.899677885519186, + "learning_rate": 8.748065493870122e-06, + "loss": 17.3049, + "step": 13860 + }, + { + "epoch": 0.25336794194527207, + "grad_norm": 7.452406224920589, + "learning_rate": 8.747869564288072e-06, + "loss": 18.1885, + "step": 13861 + }, + { + "epoch": 0.2533862211417186, + "grad_norm": 6.616031007667161, + "learning_rate": 8.747673621570063e-06, + "loss": 17.498, + "step": 13862 + }, + { + "epoch": 0.25340450033816514, + "grad_norm": 6.190275743116507, + "learning_rate": 8.747477665716786e-06, + "loss": 17.239, + "step": 13863 + }, + { + "epoch": 0.2534227795346117, + "grad_norm": 6.69834684601848, + "learning_rate": 8.747281696728922e-06, + "loss": 17.6468, + "step": 13864 + }, + { + "epoch": 0.25344105873105816, + "grad_norm": 6.465176767968881, + "learning_rate": 8.747085714607164e-06, + "loss": 17.3516, + "step": 13865 + }, + { + "epoch": 0.2534593379275047, + "grad_norm": 7.138906865721986, + "learning_rate": 8.746889719352194e-06, + "loss": 18.082, + "step": 13866 + }, + { + "epoch": 0.2534776171239512, + "grad_norm": 7.035732221360487, + "learning_rate": 8.746693710964702e-06, + "loss": 17.628, + "step": 13867 + }, + { + "epoch": 0.25349589632039776, + "grad_norm": 6.487640928438261, + "learning_rate": 8.746497689445373e-06, + "loss": 17.6296, + "step": 13868 + }, + { + "epoch": 0.2535141755168443, + "grad_norm": 6.642219085040461, + "learning_rate": 8.746301654794894e-06, + "loss": 17.533, + "step": 13869 + }, + { + "epoch": 0.2535324547132908, + "grad_norm": 6.657256827746655, + "learning_rate": 8.746105607013952e-06, + "loss": 17.4731, + "step": 13870 + }, + { + "epoch": 0.2535507339097373, + "grad_norm": 8.098118528783477, + "learning_rate": 8.745909546103237e-06, + "loss": 17.325, + "step": 13871 + }, + { + "epoch": 0.25356901310618385, + "grad_norm": 6.5370206159418425, + "learning_rate": 8.745713472063432e-06, + "loss": 17.4677, + "step": 13872 + }, + { + "epoch": 0.2535872923026304, + "grad_norm": 5.555840875869279, + "learning_rate": 8.745517384895228e-06, + "loss": 17.1296, + "step": 13873 + }, + { + "epoch": 0.2536055714990769, + "grad_norm": 5.61644510958449, + "learning_rate": 8.745321284599311e-06, + "loss": 17.238, + "step": 13874 + }, + { + "epoch": 0.2536238506955234, + "grad_norm": 7.451073328635847, + "learning_rate": 8.745125171176367e-06, + "loss": 17.946, + "step": 13875 + }, + { + "epoch": 0.25364212989196994, + "grad_norm": 6.068830688793433, + "learning_rate": 8.744929044627084e-06, + "loss": 17.3497, + "step": 13876 + }, + { + "epoch": 0.25366040908841647, + "grad_norm": 6.33350173519903, + "learning_rate": 8.74473290495215e-06, + "loss": 17.3324, + "step": 13877 + }, + { + "epoch": 0.253678688284863, + "grad_norm": 6.278681972251112, + "learning_rate": 8.744536752152251e-06, + "loss": 17.4805, + "step": 13878 + }, + { + "epoch": 0.25369696748130954, + "grad_norm": 6.739404763073544, + "learning_rate": 8.744340586228077e-06, + "loss": 17.6275, + "step": 13879 + }, + { + "epoch": 0.253715246677756, + "grad_norm": 5.301174165885163, + "learning_rate": 8.744144407180315e-06, + "loss": 16.9004, + "step": 13880 + }, + { + "epoch": 0.25373352587420256, + "grad_norm": 5.948201083202224, + "learning_rate": 8.74394821500965e-06, + "loss": 17.1906, + "step": 13881 + }, + { + "epoch": 0.2537518050706491, + "grad_norm": 6.435881926797897, + "learning_rate": 8.743752009716772e-06, + "loss": 17.597, + "step": 13882 + }, + { + "epoch": 0.25377008426709563, + "grad_norm": 5.458443195542765, + "learning_rate": 8.743555791302368e-06, + "loss": 17.2105, + "step": 13883 + }, + { + "epoch": 0.25378836346354217, + "grad_norm": 6.027649550947092, + "learning_rate": 8.743359559767127e-06, + "loss": 17.2979, + "step": 13884 + }, + { + "epoch": 0.25380664265998865, + "grad_norm": 6.813964020203235, + "learning_rate": 8.743163315111733e-06, + "loss": 17.7843, + "step": 13885 + }, + { + "epoch": 0.2538249218564352, + "grad_norm": 7.348223519766389, + "learning_rate": 8.742967057336877e-06, + "loss": 17.8382, + "step": 13886 + }, + { + "epoch": 0.2538432010528817, + "grad_norm": 5.694232252564015, + "learning_rate": 8.742770786443249e-06, + "loss": 17.0623, + "step": 13887 + }, + { + "epoch": 0.25386148024932825, + "grad_norm": 6.130295450701716, + "learning_rate": 8.742574502431532e-06, + "loss": 17.6467, + "step": 13888 + }, + { + "epoch": 0.2538797594457748, + "grad_norm": 7.682631583749733, + "learning_rate": 8.742378205302415e-06, + "loss": 17.8717, + "step": 13889 + }, + { + "epoch": 0.25389803864222127, + "grad_norm": 9.483293760520688, + "learning_rate": 8.74218189505659e-06, + "loss": 18.3314, + "step": 13890 + }, + { + "epoch": 0.2539163178386678, + "grad_norm": 6.698039296153758, + "learning_rate": 8.74198557169474e-06, + "loss": 17.7914, + "step": 13891 + }, + { + "epoch": 0.25393459703511434, + "grad_norm": 6.219013252527224, + "learning_rate": 8.741789235217558e-06, + "loss": 17.3265, + "step": 13892 + }, + { + "epoch": 0.2539528762315609, + "grad_norm": 7.93899289527623, + "learning_rate": 8.741592885625724e-06, + "loss": 18.2619, + "step": 13893 + }, + { + "epoch": 0.2539711554280074, + "grad_norm": 7.431414942399366, + "learning_rate": 8.741396522919937e-06, + "loss": 17.8052, + "step": 13894 + }, + { + "epoch": 0.2539894346244539, + "grad_norm": 6.055708376786302, + "learning_rate": 8.741200147100877e-06, + "loss": 17.224, + "step": 13895 + }, + { + "epoch": 0.2540077138209004, + "grad_norm": 7.543950459697201, + "learning_rate": 8.741003758169236e-06, + "loss": 17.7052, + "step": 13896 + }, + { + "epoch": 0.25402599301734696, + "grad_norm": 6.290080852767708, + "learning_rate": 8.740807356125702e-06, + "loss": 17.3903, + "step": 13897 + }, + { + "epoch": 0.2540442722137935, + "grad_norm": 6.140791835594011, + "learning_rate": 8.740610940970962e-06, + "loss": 17.4723, + "step": 13898 + }, + { + "epoch": 0.25406255141024, + "grad_norm": 5.786914095509982, + "learning_rate": 8.740414512705706e-06, + "loss": 17.2791, + "step": 13899 + }, + { + "epoch": 0.2540808306066865, + "grad_norm": 6.452620799877661, + "learning_rate": 8.740218071330622e-06, + "loss": 17.7083, + "step": 13900 + }, + { + "epoch": 0.25409910980313305, + "grad_norm": 7.575533529925206, + "learning_rate": 8.740021616846397e-06, + "loss": 18.1212, + "step": 13901 + }, + { + "epoch": 0.2541173889995796, + "grad_norm": 7.746599654235914, + "learning_rate": 8.739825149253721e-06, + "loss": 18.0708, + "step": 13902 + }, + { + "epoch": 0.2541356681960261, + "grad_norm": 6.4143169159359, + "learning_rate": 8.739628668553283e-06, + "loss": 17.7577, + "step": 13903 + }, + { + "epoch": 0.2541539473924726, + "grad_norm": 7.181830896596694, + "learning_rate": 8.73943217474577e-06, + "loss": 18.2878, + "step": 13904 + }, + { + "epoch": 0.25417222658891914, + "grad_norm": 6.212225736628983, + "learning_rate": 8.739235667831874e-06, + "loss": 17.5087, + "step": 13905 + }, + { + "epoch": 0.2541905057853657, + "grad_norm": 7.833271790484041, + "learning_rate": 8.739039147812278e-06, + "loss": 18.1298, + "step": 13906 + }, + { + "epoch": 0.2542087849818122, + "grad_norm": 7.227486585626569, + "learning_rate": 8.738842614687676e-06, + "loss": 17.6637, + "step": 13907 + }, + { + "epoch": 0.25422706417825874, + "grad_norm": 7.374676572810546, + "learning_rate": 8.738646068458757e-06, + "loss": 17.8617, + "step": 13908 + }, + { + "epoch": 0.2542453433747052, + "grad_norm": 6.5000955280762245, + "learning_rate": 8.738449509126205e-06, + "loss": 17.7477, + "step": 13909 + }, + { + "epoch": 0.25426362257115176, + "grad_norm": 6.593971964695243, + "learning_rate": 8.738252936690713e-06, + "loss": 17.4493, + "step": 13910 + }, + { + "epoch": 0.2542819017675983, + "grad_norm": 7.818910279079011, + "learning_rate": 8.73805635115297e-06, + "loss": 18.1238, + "step": 13911 + }, + { + "epoch": 0.25430018096404483, + "grad_norm": 7.10818976180064, + "learning_rate": 8.737859752513661e-06, + "loss": 17.6805, + "step": 13912 + }, + { + "epoch": 0.25431846016049137, + "grad_norm": 6.605018965616754, + "learning_rate": 8.73766314077348e-06, + "loss": 17.3774, + "step": 13913 + }, + { + "epoch": 0.25433673935693785, + "grad_norm": 6.737453418711159, + "learning_rate": 8.737466515933116e-06, + "loss": 17.7287, + "step": 13914 + }, + { + "epoch": 0.2543550185533844, + "grad_norm": 6.726761825052466, + "learning_rate": 8.737269877993254e-06, + "loss": 17.3611, + "step": 13915 + }, + { + "epoch": 0.2543732977498309, + "grad_norm": 6.686468349938027, + "learning_rate": 8.737073226954585e-06, + "loss": 17.5626, + "step": 13916 + }, + { + "epoch": 0.25439157694627745, + "grad_norm": 9.721580061837525, + "learning_rate": 8.736876562817798e-06, + "loss": 18.8001, + "step": 13917 + }, + { + "epoch": 0.254409856142724, + "grad_norm": 7.320850100564549, + "learning_rate": 8.736679885583583e-06, + "loss": 18.041, + "step": 13918 + }, + { + "epoch": 0.25442813533917047, + "grad_norm": 7.4543355840587475, + "learning_rate": 8.73648319525263e-06, + "loss": 17.9807, + "step": 13919 + }, + { + "epoch": 0.254446414535617, + "grad_norm": 7.569963979882603, + "learning_rate": 8.736286491825627e-06, + "loss": 17.8133, + "step": 13920 + }, + { + "epoch": 0.25446469373206354, + "grad_norm": 6.071590080065675, + "learning_rate": 8.736089775303266e-06, + "loss": 17.4813, + "step": 13921 + }, + { + "epoch": 0.2544829729285101, + "grad_norm": 6.161994516871227, + "learning_rate": 8.735893045686233e-06, + "loss": 17.3722, + "step": 13922 + }, + { + "epoch": 0.2545012521249566, + "grad_norm": 7.862371069244069, + "learning_rate": 8.735696302975219e-06, + "loss": 18.0639, + "step": 13923 + }, + { + "epoch": 0.2545195313214031, + "grad_norm": 6.276884236657523, + "learning_rate": 8.735499547170914e-06, + "loss": 17.3353, + "step": 13924 + }, + { + "epoch": 0.2545378105178496, + "grad_norm": 5.967898857485332, + "learning_rate": 8.735302778274009e-06, + "loss": 17.3818, + "step": 13925 + }, + { + "epoch": 0.25455608971429616, + "grad_norm": 7.193800609691263, + "learning_rate": 8.73510599628519e-06, + "loss": 18.0089, + "step": 13926 + }, + { + "epoch": 0.2545743689107427, + "grad_norm": 6.54437382860853, + "learning_rate": 8.734909201205148e-06, + "loss": 17.6759, + "step": 13927 + }, + { + "epoch": 0.25459264810718923, + "grad_norm": 5.981309927884284, + "learning_rate": 8.734712393034574e-06, + "loss": 17.073, + "step": 13928 + }, + { + "epoch": 0.2546109273036357, + "grad_norm": 6.252306285214709, + "learning_rate": 8.734515571774157e-06, + "loss": 17.2187, + "step": 13929 + }, + { + "epoch": 0.25462920650008225, + "grad_norm": 6.1530465363391915, + "learning_rate": 8.734318737424588e-06, + "loss": 17.4144, + "step": 13930 + }, + { + "epoch": 0.2546474856965288, + "grad_norm": 7.53964874121549, + "learning_rate": 8.734121889986555e-06, + "loss": 17.6794, + "step": 13931 + }, + { + "epoch": 0.2546657648929753, + "grad_norm": 6.32611595168549, + "learning_rate": 8.733925029460747e-06, + "loss": 17.4249, + "step": 13932 + }, + { + "epoch": 0.2546840440894218, + "grad_norm": 6.687145282129531, + "learning_rate": 8.733728155847858e-06, + "loss": 17.9875, + "step": 13933 + }, + { + "epoch": 0.25470232328586834, + "grad_norm": 6.882934764697469, + "learning_rate": 8.733531269148576e-06, + "loss": 17.7335, + "step": 13934 + }, + { + "epoch": 0.2547206024823149, + "grad_norm": 7.678694872833678, + "learning_rate": 8.73333436936359e-06, + "loss": 18.0639, + "step": 13935 + }, + { + "epoch": 0.2547388816787614, + "grad_norm": 6.626212475299726, + "learning_rate": 8.733137456493593e-06, + "loss": 17.6482, + "step": 13936 + }, + { + "epoch": 0.25475716087520794, + "grad_norm": 7.1339521180408445, + "learning_rate": 8.732940530539271e-06, + "loss": 17.8985, + "step": 13937 + }, + { + "epoch": 0.2547754400716544, + "grad_norm": 6.607697252050391, + "learning_rate": 8.732743591501316e-06, + "loss": 17.5549, + "step": 13938 + }, + { + "epoch": 0.25479371926810096, + "grad_norm": 6.155478463975609, + "learning_rate": 8.732546639380419e-06, + "loss": 17.4897, + "step": 13939 + }, + { + "epoch": 0.2548119984645475, + "grad_norm": 7.410949350228957, + "learning_rate": 8.732349674177272e-06, + "loss": 17.678, + "step": 13940 + }, + { + "epoch": 0.25483027766099403, + "grad_norm": 5.955900673297592, + "learning_rate": 8.732152695892562e-06, + "loss": 17.3842, + "step": 13941 + }, + { + "epoch": 0.25484855685744057, + "grad_norm": 7.429077063390365, + "learning_rate": 8.73195570452698e-06, + "loss": 17.7786, + "step": 13942 + }, + { + "epoch": 0.25486683605388705, + "grad_norm": 6.964177608064067, + "learning_rate": 8.731758700081217e-06, + "loss": 17.8819, + "step": 13943 + }, + { + "epoch": 0.2548851152503336, + "grad_norm": 7.494081046872606, + "learning_rate": 8.731561682555965e-06, + "loss": 18.024, + "step": 13944 + }, + { + "epoch": 0.2549033944467801, + "grad_norm": 7.457489692667915, + "learning_rate": 8.73136465195191e-06, + "loss": 17.6645, + "step": 13945 + }, + { + "epoch": 0.25492167364322665, + "grad_norm": 7.575199443544168, + "learning_rate": 8.73116760826975e-06, + "loss": 17.6916, + "step": 13946 + }, + { + "epoch": 0.2549399528396732, + "grad_norm": 6.211321489064392, + "learning_rate": 8.73097055151017e-06, + "loss": 17.3002, + "step": 13947 + }, + { + "epoch": 0.25495823203611967, + "grad_norm": 6.231393405979343, + "learning_rate": 8.73077348167386e-06, + "loss": 17.3247, + "step": 13948 + }, + { + "epoch": 0.2549765112325662, + "grad_norm": 7.8589940161868475, + "learning_rate": 8.730576398761514e-06, + "loss": 17.8705, + "step": 13949 + }, + { + "epoch": 0.25499479042901274, + "grad_norm": 6.092213046044782, + "learning_rate": 8.730379302773822e-06, + "loss": 17.2994, + "step": 13950 + }, + { + "epoch": 0.2550130696254593, + "grad_norm": 6.144376781693334, + "learning_rate": 8.730182193711472e-06, + "loss": 17.2841, + "step": 13951 + }, + { + "epoch": 0.2550313488219058, + "grad_norm": 6.309420077332737, + "learning_rate": 8.729985071575158e-06, + "loss": 17.5078, + "step": 13952 + }, + { + "epoch": 0.2550496280183523, + "grad_norm": 6.982088765553154, + "learning_rate": 8.729787936365572e-06, + "loss": 17.7818, + "step": 13953 + }, + { + "epoch": 0.25506790721479883, + "grad_norm": 7.710406989976027, + "learning_rate": 8.729590788083403e-06, + "loss": 17.8721, + "step": 13954 + }, + { + "epoch": 0.25508618641124536, + "grad_norm": 6.559110564146433, + "learning_rate": 8.72939362672934e-06, + "loss": 17.9672, + "step": 13955 + }, + { + "epoch": 0.2551044656076919, + "grad_norm": 6.982277674262234, + "learning_rate": 8.729196452304076e-06, + "loss": 17.608, + "step": 13956 + }, + { + "epoch": 0.25512274480413843, + "grad_norm": 7.703209985365643, + "learning_rate": 8.728999264808303e-06, + "loss": 18.0226, + "step": 13957 + }, + { + "epoch": 0.2551410240005849, + "grad_norm": 7.204138122688372, + "learning_rate": 8.72880206424271e-06, + "loss": 17.6252, + "step": 13958 + }, + { + "epoch": 0.25515930319703145, + "grad_norm": 7.232827552461167, + "learning_rate": 8.72860485060799e-06, + "loss": 17.8974, + "step": 13959 + }, + { + "epoch": 0.255177582393478, + "grad_norm": 6.758013803673432, + "learning_rate": 8.728407623904833e-06, + "loss": 17.7697, + "step": 13960 + }, + { + "epoch": 0.2551958615899245, + "grad_norm": 7.997805217356011, + "learning_rate": 8.728210384133932e-06, + "loss": 18.4118, + "step": 13961 + }, + { + "epoch": 0.25521414078637106, + "grad_norm": 6.200605591400633, + "learning_rate": 8.728013131295976e-06, + "loss": 17.4238, + "step": 13962 + }, + { + "epoch": 0.25523241998281754, + "grad_norm": 6.175082033002962, + "learning_rate": 8.727815865391657e-06, + "loss": 17.2796, + "step": 13963 + }, + { + "epoch": 0.2552506991792641, + "grad_norm": 6.141657054367813, + "learning_rate": 8.727618586421669e-06, + "loss": 17.1876, + "step": 13964 + }, + { + "epoch": 0.2552689783757106, + "grad_norm": 6.099863422243416, + "learning_rate": 8.7274212943867e-06, + "loss": 17.4914, + "step": 13965 + }, + { + "epoch": 0.25528725757215714, + "grad_norm": 5.8144270085021486, + "learning_rate": 8.727223989287443e-06, + "loss": 17.0464, + "step": 13966 + }, + { + "epoch": 0.2553055367686036, + "grad_norm": 8.306539054197764, + "learning_rate": 8.72702667112459e-06, + "loss": 18.2899, + "step": 13967 + }, + { + "epoch": 0.25532381596505016, + "grad_norm": 5.482667624083453, + "learning_rate": 8.72682933989883e-06, + "loss": 17.1317, + "step": 13968 + }, + { + "epoch": 0.2553420951614967, + "grad_norm": 5.693117008850541, + "learning_rate": 8.72663199561086e-06, + "loss": 17.2629, + "step": 13969 + }, + { + "epoch": 0.25536037435794323, + "grad_norm": 6.306026003840627, + "learning_rate": 8.726434638261365e-06, + "loss": 17.4804, + "step": 13970 + }, + { + "epoch": 0.25537865355438977, + "grad_norm": 6.4212860185533716, + "learning_rate": 8.726237267851041e-06, + "loss": 17.4157, + "step": 13971 + }, + { + "epoch": 0.25539693275083625, + "grad_norm": 6.958322437037328, + "learning_rate": 8.726039884380579e-06, + "loss": 17.7611, + "step": 13972 + }, + { + "epoch": 0.2554152119472828, + "grad_norm": 5.555300668995391, + "learning_rate": 8.72584248785067e-06, + "loss": 17.1776, + "step": 13973 + }, + { + "epoch": 0.2554334911437293, + "grad_norm": 6.687111880084589, + "learning_rate": 8.725645078262007e-06, + "loss": 17.6282, + "step": 13974 + }, + { + "epoch": 0.25545177034017585, + "grad_norm": 6.638536443221294, + "learning_rate": 8.72544765561528e-06, + "loss": 17.5349, + "step": 13975 + }, + { + "epoch": 0.2554700495366224, + "grad_norm": 5.914273252875999, + "learning_rate": 8.725250219911184e-06, + "loss": 17.1228, + "step": 13976 + }, + { + "epoch": 0.25548832873306887, + "grad_norm": 6.425812346335787, + "learning_rate": 8.725052771150409e-06, + "loss": 17.4639, + "step": 13977 + }, + { + "epoch": 0.2555066079295154, + "grad_norm": 5.8223807022443985, + "learning_rate": 8.724855309333646e-06, + "loss": 16.9239, + "step": 13978 + }, + { + "epoch": 0.25552488712596194, + "grad_norm": 6.484578700575855, + "learning_rate": 8.72465783446159e-06, + "loss": 17.4878, + "step": 13979 + }, + { + "epoch": 0.2555431663224085, + "grad_norm": 6.083479783688803, + "learning_rate": 8.72446034653493e-06, + "loss": 17.4781, + "step": 13980 + }, + { + "epoch": 0.255561445518855, + "grad_norm": 8.393779815892302, + "learning_rate": 8.72426284555436e-06, + "loss": 18.4201, + "step": 13981 + }, + { + "epoch": 0.2555797247153015, + "grad_norm": 7.917411502202908, + "learning_rate": 8.724065331520572e-06, + "loss": 18.1957, + "step": 13982 + }, + { + "epoch": 0.25559800391174803, + "grad_norm": 6.664139696039288, + "learning_rate": 8.723867804434259e-06, + "loss": 17.3455, + "step": 13983 + }, + { + "epoch": 0.25561628310819456, + "grad_norm": 6.308596894214555, + "learning_rate": 8.723670264296111e-06, + "loss": 17.3711, + "step": 13984 + }, + { + "epoch": 0.2556345623046411, + "grad_norm": 8.337303492069122, + "learning_rate": 8.723472711106825e-06, + "loss": 18.7995, + "step": 13985 + }, + { + "epoch": 0.25565284150108764, + "grad_norm": 7.278473652304813, + "learning_rate": 8.723275144867086e-06, + "loss": 18.0212, + "step": 13986 + }, + { + "epoch": 0.2556711206975341, + "grad_norm": 5.6860856800645445, + "learning_rate": 8.723077565577594e-06, + "loss": 17.3226, + "step": 13987 + }, + { + "epoch": 0.25568939989398065, + "grad_norm": 6.1040925281351885, + "learning_rate": 8.722879973239035e-06, + "loss": 17.3142, + "step": 13988 + }, + { + "epoch": 0.2557076790904272, + "grad_norm": 7.849493113221841, + "learning_rate": 8.722682367852107e-06, + "loss": 17.8536, + "step": 13989 + }, + { + "epoch": 0.2557259582868737, + "grad_norm": 7.419112982774977, + "learning_rate": 8.722484749417502e-06, + "loss": 18.091, + "step": 13990 + }, + { + "epoch": 0.25574423748332026, + "grad_norm": 6.844159293020602, + "learning_rate": 8.722287117935908e-06, + "loss": 17.6159, + "step": 13991 + }, + { + "epoch": 0.25576251667976674, + "grad_norm": 11.027302576659178, + "learning_rate": 8.722089473408023e-06, + "loss": 18.0748, + "step": 13992 + }, + { + "epoch": 0.2557807958762133, + "grad_norm": 6.231287286717342, + "learning_rate": 8.721891815834534e-06, + "loss": 17.5187, + "step": 13993 + }, + { + "epoch": 0.2557990750726598, + "grad_norm": 5.926319474170429, + "learning_rate": 8.72169414521614e-06, + "loss": 17.2324, + "step": 13994 + }, + { + "epoch": 0.25581735426910635, + "grad_norm": 9.830976195430448, + "learning_rate": 8.721496461553528e-06, + "loss": 18.9214, + "step": 13995 + }, + { + "epoch": 0.2558356334655529, + "grad_norm": 5.332807377255744, + "learning_rate": 8.721298764847397e-06, + "loss": 16.7563, + "step": 13996 + }, + { + "epoch": 0.25585391266199936, + "grad_norm": 5.556155595301225, + "learning_rate": 8.721101055098436e-06, + "loss": 17.1854, + "step": 13997 + }, + { + "epoch": 0.2558721918584459, + "grad_norm": 6.470278581951466, + "learning_rate": 8.720903332307339e-06, + "loss": 17.4058, + "step": 13998 + }, + { + "epoch": 0.25589047105489243, + "grad_norm": 5.6971359483934965, + "learning_rate": 8.720705596474797e-06, + "loss": 17.1146, + "step": 13999 + }, + { + "epoch": 0.25590875025133897, + "grad_norm": 7.508495639754144, + "learning_rate": 8.720507847601508e-06, + "loss": 17.8727, + "step": 14000 + }, + { + "epoch": 0.25592702944778545, + "grad_norm": 6.073201872739274, + "learning_rate": 8.720310085688158e-06, + "loss": 17.1639, + "step": 14001 + }, + { + "epoch": 0.255945308644232, + "grad_norm": 6.219751981885219, + "learning_rate": 8.720112310735445e-06, + "loss": 17.5806, + "step": 14002 + }, + { + "epoch": 0.2559635878406785, + "grad_norm": 6.3722758522487135, + "learning_rate": 8.719914522744063e-06, + "loss": 17.4845, + "step": 14003 + }, + { + "epoch": 0.25598186703712505, + "grad_norm": 6.430087771797434, + "learning_rate": 8.719716721714702e-06, + "loss": 17.2951, + "step": 14004 + }, + { + "epoch": 0.2560001462335716, + "grad_norm": 5.954364206203981, + "learning_rate": 8.719518907648057e-06, + "loss": 17.0703, + "step": 14005 + }, + { + "epoch": 0.25601842543001807, + "grad_norm": 7.3816256220639405, + "learning_rate": 8.71932108054482e-06, + "loss": 17.8422, + "step": 14006 + }, + { + "epoch": 0.2560367046264646, + "grad_norm": 5.585556965955438, + "learning_rate": 8.719123240405686e-06, + "loss": 17.3696, + "step": 14007 + }, + { + "epoch": 0.25605498382291114, + "grad_norm": 6.562984981021811, + "learning_rate": 8.71892538723135e-06, + "loss": 17.5934, + "step": 14008 + }, + { + "epoch": 0.2560732630193577, + "grad_norm": 5.994157750088668, + "learning_rate": 8.7187275210225e-06, + "loss": 17.5556, + "step": 14009 + }, + { + "epoch": 0.2560915422158042, + "grad_norm": 7.149441666470342, + "learning_rate": 8.718529641779834e-06, + "loss": 17.8358, + "step": 14010 + }, + { + "epoch": 0.2561098214122507, + "grad_norm": 10.585428630174878, + "learning_rate": 8.718331749504045e-06, + "loss": 18.3699, + "step": 14011 + }, + { + "epoch": 0.25612810060869723, + "grad_norm": 5.729931093024647, + "learning_rate": 8.718133844195825e-06, + "loss": 17.2859, + "step": 14012 + }, + { + "epoch": 0.25614637980514376, + "grad_norm": 8.041743853428542, + "learning_rate": 8.717935925855869e-06, + "loss": 17.9278, + "step": 14013 + }, + { + "epoch": 0.2561646590015903, + "grad_norm": 6.644423763100476, + "learning_rate": 8.717737994484869e-06, + "loss": 17.8461, + "step": 14014 + }, + { + "epoch": 0.25618293819803684, + "grad_norm": 7.099340966536888, + "learning_rate": 8.717540050083522e-06, + "loss": 18.0045, + "step": 14015 + }, + { + "epoch": 0.2562012173944833, + "grad_norm": 6.345347347165285, + "learning_rate": 8.717342092652518e-06, + "loss": 17.5001, + "step": 14016 + }, + { + "epoch": 0.25621949659092985, + "grad_norm": 6.670132674655566, + "learning_rate": 8.717144122192553e-06, + "loss": 17.6985, + "step": 14017 + }, + { + "epoch": 0.2562377757873764, + "grad_norm": 7.582148281771751, + "learning_rate": 8.71694613870432e-06, + "loss": 18.2426, + "step": 14018 + }, + { + "epoch": 0.2562560549838229, + "grad_norm": 7.761620215830936, + "learning_rate": 8.716748142188514e-06, + "loss": 17.9051, + "step": 14019 + }, + { + "epoch": 0.25627433418026946, + "grad_norm": 6.94165788412036, + "learning_rate": 8.71655013264583e-06, + "loss": 17.7852, + "step": 14020 + }, + { + "epoch": 0.25629261337671594, + "grad_norm": 6.112493460876916, + "learning_rate": 8.716352110076958e-06, + "loss": 17.0819, + "step": 14021 + }, + { + "epoch": 0.2563108925731625, + "grad_norm": 6.820209225329127, + "learning_rate": 8.716154074482594e-06, + "loss": 17.7083, + "step": 14022 + }, + { + "epoch": 0.256329171769609, + "grad_norm": 6.890240706035527, + "learning_rate": 8.715956025863433e-06, + "loss": 17.4598, + "step": 14023 + }, + { + "epoch": 0.25634745096605555, + "grad_norm": 5.865000848072777, + "learning_rate": 8.71575796422017e-06, + "loss": 17.1963, + "step": 14024 + }, + { + "epoch": 0.2563657301625021, + "grad_norm": 5.9749062755893485, + "learning_rate": 8.715559889553496e-06, + "loss": 17.4812, + "step": 14025 + }, + { + "epoch": 0.25638400935894856, + "grad_norm": 6.635796194940061, + "learning_rate": 8.715361801864107e-06, + "loss": 17.6757, + "step": 14026 + }, + { + "epoch": 0.2564022885553951, + "grad_norm": 6.332782098815111, + "learning_rate": 8.715163701152698e-06, + "loss": 17.6062, + "step": 14027 + }, + { + "epoch": 0.25642056775184163, + "grad_norm": 6.569707572976014, + "learning_rate": 8.714965587419964e-06, + "loss": 17.7143, + "step": 14028 + }, + { + "epoch": 0.25643884694828817, + "grad_norm": 7.926658291308322, + "learning_rate": 8.714767460666595e-06, + "loss": 18.3433, + "step": 14029 + }, + { + "epoch": 0.2564571261447347, + "grad_norm": 6.703570270573447, + "learning_rate": 8.71456932089329e-06, + "loss": 17.3925, + "step": 14030 + }, + { + "epoch": 0.2564754053411812, + "grad_norm": 8.7198142927402, + "learning_rate": 8.714371168100742e-06, + "loss": 18.5173, + "step": 14031 + }, + { + "epoch": 0.2564936845376277, + "grad_norm": 6.5711447764108, + "learning_rate": 8.714173002289645e-06, + "loss": 17.5438, + "step": 14032 + }, + { + "epoch": 0.25651196373407426, + "grad_norm": 6.498513708490086, + "learning_rate": 8.713974823460693e-06, + "loss": 17.5866, + "step": 14033 + }, + { + "epoch": 0.2565302429305208, + "grad_norm": 6.707410223345111, + "learning_rate": 8.713776631614583e-06, + "loss": 17.7918, + "step": 14034 + }, + { + "epoch": 0.25654852212696727, + "grad_norm": 7.497156527708566, + "learning_rate": 8.71357842675201e-06, + "loss": 17.8623, + "step": 14035 + }, + { + "epoch": 0.2565668013234138, + "grad_norm": 6.806956228926704, + "learning_rate": 8.713380208873663e-06, + "loss": 17.7035, + "step": 14036 + }, + { + "epoch": 0.25658508051986034, + "grad_norm": 6.737806666072355, + "learning_rate": 8.713181977980242e-06, + "loss": 17.9184, + "step": 14037 + }, + { + "epoch": 0.2566033597163069, + "grad_norm": 5.857737233930091, + "learning_rate": 8.712983734072442e-06, + "loss": 17.3217, + "step": 14038 + }, + { + "epoch": 0.2566216389127534, + "grad_norm": 6.127108906197689, + "learning_rate": 8.712785477150954e-06, + "loss": 17.6265, + "step": 14039 + }, + { + "epoch": 0.2566399181091999, + "grad_norm": 5.505189658724705, + "learning_rate": 8.712587207216476e-06, + "loss": 17.1689, + "step": 14040 + }, + { + "epoch": 0.25665819730564643, + "grad_norm": 5.2633515517705955, + "learning_rate": 8.712388924269701e-06, + "loss": 17.2424, + "step": 14041 + }, + { + "epoch": 0.25667647650209297, + "grad_norm": 7.385141693929254, + "learning_rate": 8.712190628311327e-06, + "loss": 17.9504, + "step": 14042 + }, + { + "epoch": 0.2566947556985395, + "grad_norm": 7.28567741353326, + "learning_rate": 8.711992319342047e-06, + "loss": 17.8721, + "step": 14043 + }, + { + "epoch": 0.25671303489498604, + "grad_norm": 7.0865725746620365, + "learning_rate": 8.711793997362555e-06, + "loss": 17.5467, + "step": 14044 + }, + { + "epoch": 0.2567313140914325, + "grad_norm": 6.524143900895566, + "learning_rate": 8.711595662373545e-06, + "loss": 17.818, + "step": 14045 + }, + { + "epoch": 0.25674959328787905, + "grad_norm": 7.689675182264577, + "learning_rate": 8.711397314375717e-06, + "loss": 17.8134, + "step": 14046 + }, + { + "epoch": 0.2567678724843256, + "grad_norm": 7.314549210631258, + "learning_rate": 8.711198953369763e-06, + "loss": 17.7351, + "step": 14047 + }, + { + "epoch": 0.2567861516807721, + "grad_norm": 7.349427702300283, + "learning_rate": 8.711000579356379e-06, + "loss": 17.9681, + "step": 14048 + }, + { + "epoch": 0.25680443087721866, + "grad_norm": 5.522520021641442, + "learning_rate": 8.710802192336258e-06, + "loss": 17.2265, + "step": 14049 + }, + { + "epoch": 0.25682271007366514, + "grad_norm": 6.809408078333556, + "learning_rate": 8.7106037923101e-06, + "loss": 17.8225, + "step": 14050 + }, + { + "epoch": 0.2568409892701117, + "grad_norm": 5.658643531266042, + "learning_rate": 8.710405379278597e-06, + "loss": 17.1175, + "step": 14051 + }, + { + "epoch": 0.2568592684665582, + "grad_norm": 7.664486081417579, + "learning_rate": 8.710206953242444e-06, + "loss": 18.2874, + "step": 14052 + }, + { + "epoch": 0.25687754766300475, + "grad_norm": 6.86069447753021, + "learning_rate": 8.710008514202336e-06, + "loss": 17.63, + "step": 14053 + }, + { + "epoch": 0.2568958268594513, + "grad_norm": 6.8516514476035715, + "learning_rate": 8.709810062158974e-06, + "loss": 17.5871, + "step": 14054 + }, + { + "epoch": 0.25691410605589776, + "grad_norm": 6.834744715769432, + "learning_rate": 8.709611597113048e-06, + "loss": 17.7219, + "step": 14055 + }, + { + "epoch": 0.2569323852523443, + "grad_norm": 6.19308693771849, + "learning_rate": 8.709413119065255e-06, + "loss": 17.4019, + "step": 14056 + }, + { + "epoch": 0.25695066444879083, + "grad_norm": 5.851593282103616, + "learning_rate": 8.709214628016292e-06, + "loss": 17.3128, + "step": 14057 + }, + { + "epoch": 0.25696894364523737, + "grad_norm": 6.868437862739134, + "learning_rate": 8.709016123966851e-06, + "loss": 17.3939, + "step": 14058 + }, + { + "epoch": 0.2569872228416839, + "grad_norm": 5.822625513677289, + "learning_rate": 8.708817606917633e-06, + "loss": 17.2102, + "step": 14059 + }, + { + "epoch": 0.2570055020381304, + "grad_norm": 7.895151638643454, + "learning_rate": 8.70861907686933e-06, + "loss": 18.2374, + "step": 14060 + }, + { + "epoch": 0.2570237812345769, + "grad_norm": 6.178522074785444, + "learning_rate": 8.70842053382264e-06, + "loss": 17.3785, + "step": 14061 + }, + { + "epoch": 0.25704206043102346, + "grad_norm": 6.946304226628333, + "learning_rate": 8.708221977778256e-06, + "loss": 17.5099, + "step": 14062 + }, + { + "epoch": 0.25706033962747, + "grad_norm": 7.167277756268976, + "learning_rate": 8.708023408736877e-06, + "loss": 17.826, + "step": 14063 + }, + { + "epoch": 0.2570786188239165, + "grad_norm": 6.693399938414784, + "learning_rate": 8.707824826699199e-06, + "loss": 17.3958, + "step": 14064 + }, + { + "epoch": 0.257096898020363, + "grad_norm": 6.101790113077589, + "learning_rate": 8.707626231665914e-06, + "loss": 17.3234, + "step": 14065 + }, + { + "epoch": 0.25711517721680954, + "grad_norm": 6.464939958515629, + "learning_rate": 8.70742762363772e-06, + "loss": 17.7037, + "step": 14066 + }, + { + "epoch": 0.2571334564132561, + "grad_norm": 6.746677057749509, + "learning_rate": 8.707229002615317e-06, + "loss": 17.8575, + "step": 14067 + }, + { + "epoch": 0.2571517356097026, + "grad_norm": 6.621445967947263, + "learning_rate": 8.707030368599398e-06, + "loss": 17.5681, + "step": 14068 + }, + { + "epoch": 0.2571700148061491, + "grad_norm": 6.322926138021842, + "learning_rate": 8.706831721590657e-06, + "loss": 17.5165, + "step": 14069 + }, + { + "epoch": 0.25718829400259563, + "grad_norm": 7.287228708380956, + "learning_rate": 8.706633061589794e-06, + "loss": 17.9319, + "step": 14070 + }, + { + "epoch": 0.25720657319904217, + "grad_norm": 6.064790777506287, + "learning_rate": 8.706434388597503e-06, + "loss": 17.2462, + "step": 14071 + }, + { + "epoch": 0.2572248523954887, + "grad_norm": 6.563810232322666, + "learning_rate": 8.706235702614482e-06, + "loss": 17.7225, + "step": 14072 + }, + { + "epoch": 0.25724313159193524, + "grad_norm": 8.809152384498775, + "learning_rate": 8.706037003641426e-06, + "loss": 18.0658, + "step": 14073 + }, + { + "epoch": 0.2572614107883817, + "grad_norm": 6.113233339539685, + "learning_rate": 8.705838291679032e-06, + "loss": 17.4984, + "step": 14074 + }, + { + "epoch": 0.25727968998482825, + "grad_norm": 7.392843049875668, + "learning_rate": 8.705639566727997e-06, + "loss": 18.0276, + "step": 14075 + }, + { + "epoch": 0.2572979691812748, + "grad_norm": 6.22602578544969, + "learning_rate": 8.705440828789015e-06, + "loss": 17.7045, + "step": 14076 + }, + { + "epoch": 0.2573162483777213, + "grad_norm": 7.396244514051507, + "learning_rate": 8.705242077862786e-06, + "loss": 17.5306, + "step": 14077 + }, + { + "epoch": 0.25733452757416786, + "grad_norm": 6.418514741742404, + "learning_rate": 8.705043313950004e-06, + "loss": 17.5961, + "step": 14078 + }, + { + "epoch": 0.25735280677061434, + "grad_norm": 6.534272716960642, + "learning_rate": 8.704844537051368e-06, + "loss": 17.4244, + "step": 14079 + }, + { + "epoch": 0.2573710859670609, + "grad_norm": 5.499315925609631, + "learning_rate": 8.704645747167572e-06, + "loss": 16.9318, + "step": 14080 + }, + { + "epoch": 0.2573893651635074, + "grad_norm": 7.21959522451206, + "learning_rate": 8.704446944299314e-06, + "loss": 18.1147, + "step": 14081 + }, + { + "epoch": 0.25740764435995395, + "grad_norm": 7.815593595945188, + "learning_rate": 8.704248128447293e-06, + "loss": 18.2904, + "step": 14082 + }, + { + "epoch": 0.2574259235564005, + "grad_norm": 6.228024877521958, + "learning_rate": 8.704049299612203e-06, + "loss": 17.3302, + "step": 14083 + }, + { + "epoch": 0.25744420275284696, + "grad_norm": 6.8981733996518235, + "learning_rate": 8.70385045779474e-06, + "loss": 17.8574, + "step": 14084 + }, + { + "epoch": 0.2574624819492935, + "grad_norm": 7.958254587077419, + "learning_rate": 8.703651602995605e-06, + "loss": 18.485, + "step": 14085 + }, + { + "epoch": 0.25748076114574003, + "grad_norm": 7.544062214252388, + "learning_rate": 8.70345273521549e-06, + "loss": 18.1102, + "step": 14086 + }, + { + "epoch": 0.25749904034218657, + "grad_norm": 5.914422136425655, + "learning_rate": 8.703253854455095e-06, + "loss": 17.2718, + "step": 14087 + }, + { + "epoch": 0.2575173195386331, + "grad_norm": 7.573257093818667, + "learning_rate": 8.703054960715118e-06, + "loss": 18.1964, + "step": 14088 + }, + { + "epoch": 0.2575355987350796, + "grad_norm": 9.158315114811106, + "learning_rate": 8.702856053996254e-06, + "loss": 18.8402, + "step": 14089 + }, + { + "epoch": 0.2575538779315261, + "grad_norm": 6.991139114717488, + "learning_rate": 8.702657134299201e-06, + "loss": 17.718, + "step": 14090 + }, + { + "epoch": 0.25757215712797266, + "grad_norm": 7.629686502898165, + "learning_rate": 8.702458201624657e-06, + "loss": 17.9014, + "step": 14091 + }, + { + "epoch": 0.2575904363244192, + "grad_norm": 5.873192405342047, + "learning_rate": 8.702259255973315e-06, + "loss": 17.5239, + "step": 14092 + }, + { + "epoch": 0.2576087155208657, + "grad_norm": 8.36751721033347, + "learning_rate": 8.70206029734588e-06, + "loss": 17.4272, + "step": 14093 + }, + { + "epoch": 0.2576269947173122, + "grad_norm": 6.438144667109233, + "learning_rate": 8.701861325743043e-06, + "loss": 17.3664, + "step": 14094 + }, + { + "epoch": 0.25764527391375874, + "grad_norm": 5.985339058250906, + "learning_rate": 8.701662341165502e-06, + "loss": 17.5232, + "step": 14095 + }, + { + "epoch": 0.2576635531102053, + "grad_norm": 7.386255272232896, + "learning_rate": 8.701463343613957e-06, + "loss": 17.9152, + "step": 14096 + }, + { + "epoch": 0.2576818323066518, + "grad_norm": 6.83644269450758, + "learning_rate": 8.701264333089104e-06, + "loss": 17.8121, + "step": 14097 + }, + { + "epoch": 0.25770011150309835, + "grad_norm": 6.551490461044074, + "learning_rate": 8.70106530959164e-06, + "loss": 17.7306, + "step": 14098 + }, + { + "epoch": 0.25771839069954483, + "grad_norm": 7.642030726394805, + "learning_rate": 8.700866273122264e-06, + "loss": 18.1874, + "step": 14099 + }, + { + "epoch": 0.25773666989599137, + "grad_norm": 6.977711470901913, + "learning_rate": 8.700667223681672e-06, + "loss": 17.7862, + "step": 14100 + }, + { + "epoch": 0.2577549490924379, + "grad_norm": 7.207789253193888, + "learning_rate": 8.700468161270563e-06, + "loss": 17.9599, + "step": 14101 + }, + { + "epoch": 0.25777322828888444, + "grad_norm": 6.095674515026532, + "learning_rate": 8.700269085889634e-06, + "loss": 17.4106, + "step": 14102 + }, + { + "epoch": 0.2577915074853309, + "grad_norm": 7.4632469140407585, + "learning_rate": 8.700069997539584e-06, + "loss": 17.8782, + "step": 14103 + }, + { + "epoch": 0.25780978668177745, + "grad_norm": 5.210845208316065, + "learning_rate": 8.69987089622111e-06, + "loss": 16.9404, + "step": 14104 + }, + { + "epoch": 0.257828065878224, + "grad_norm": 8.190158864530341, + "learning_rate": 8.699671781934907e-06, + "loss": 18.2329, + "step": 14105 + }, + { + "epoch": 0.2578463450746705, + "grad_norm": 8.530594742188688, + "learning_rate": 8.699472654681677e-06, + "loss": 18.5188, + "step": 14106 + }, + { + "epoch": 0.25786462427111706, + "grad_norm": 5.757533253815192, + "learning_rate": 8.699273514462116e-06, + "loss": 17.2406, + "step": 14107 + }, + { + "epoch": 0.25788290346756354, + "grad_norm": 7.059106984663106, + "learning_rate": 8.699074361276923e-06, + "loss": 17.765, + "step": 14108 + }, + { + "epoch": 0.2579011826640101, + "grad_norm": 6.105087348487071, + "learning_rate": 8.698875195126796e-06, + "loss": 17.5095, + "step": 14109 + }, + { + "epoch": 0.2579194618604566, + "grad_norm": 7.334075919128877, + "learning_rate": 8.69867601601243e-06, + "loss": 17.6755, + "step": 14110 + }, + { + "epoch": 0.25793774105690315, + "grad_norm": 6.858709746550251, + "learning_rate": 8.698476823934529e-06, + "loss": 17.954, + "step": 14111 + }, + { + "epoch": 0.2579560202533497, + "grad_norm": 5.552323518060006, + "learning_rate": 8.698277618893784e-06, + "loss": 17.2246, + "step": 14112 + }, + { + "epoch": 0.25797429944979616, + "grad_norm": 7.004207031243522, + "learning_rate": 8.6980784008909e-06, + "loss": 17.7233, + "step": 14113 + }, + { + "epoch": 0.2579925786462427, + "grad_norm": 6.8987513859118685, + "learning_rate": 8.69787916992657e-06, + "loss": 18.1197, + "step": 14114 + }, + { + "epoch": 0.25801085784268923, + "grad_norm": 6.120743129137839, + "learning_rate": 8.697679926001496e-06, + "loss": 17.2854, + "step": 14115 + }, + { + "epoch": 0.25802913703913577, + "grad_norm": 7.381495184158092, + "learning_rate": 8.697480669116373e-06, + "loss": 18.2839, + "step": 14116 + }, + { + "epoch": 0.2580474162355823, + "grad_norm": 9.896775387834815, + "learning_rate": 8.697281399271902e-06, + "loss": 18.4107, + "step": 14117 + }, + { + "epoch": 0.2580656954320288, + "grad_norm": 6.9585472563611654, + "learning_rate": 8.69708211646878e-06, + "loss": 17.7435, + "step": 14118 + }, + { + "epoch": 0.2580839746284753, + "grad_norm": 8.22483594138805, + "learning_rate": 8.696882820707708e-06, + "loss": 18.5855, + "step": 14119 + }, + { + "epoch": 0.25810225382492186, + "grad_norm": 7.280764723094892, + "learning_rate": 8.696683511989381e-06, + "loss": 17.8302, + "step": 14120 + }, + { + "epoch": 0.2581205330213684, + "grad_norm": 8.321951653277008, + "learning_rate": 8.696484190314498e-06, + "loss": 18.6115, + "step": 14121 + }, + { + "epoch": 0.25813881221781493, + "grad_norm": 6.6238999856305405, + "learning_rate": 8.69628485568376e-06, + "loss": 17.4643, + "step": 14122 + }, + { + "epoch": 0.2581570914142614, + "grad_norm": 6.503710160939189, + "learning_rate": 8.696085508097865e-06, + "loss": 17.7514, + "step": 14123 + }, + { + "epoch": 0.25817537061070794, + "grad_norm": 5.775511509425728, + "learning_rate": 8.695886147557508e-06, + "loss": 17.2477, + "step": 14124 + }, + { + "epoch": 0.2581936498071545, + "grad_norm": 6.4818374067254485, + "learning_rate": 8.695686774063394e-06, + "loss": 17.3527, + "step": 14125 + }, + { + "epoch": 0.258211929003601, + "grad_norm": 7.701902668089003, + "learning_rate": 8.695487387616217e-06, + "loss": 17.9372, + "step": 14126 + }, + { + "epoch": 0.25823020820004755, + "grad_norm": 6.697959652546757, + "learning_rate": 8.695287988216679e-06, + "loss": 17.6515, + "step": 14127 + }, + { + "epoch": 0.25824848739649403, + "grad_norm": 8.041435335901662, + "learning_rate": 8.695088575865476e-06, + "loss": 18.4181, + "step": 14128 + }, + { + "epoch": 0.25826676659294057, + "grad_norm": 6.567403984090422, + "learning_rate": 8.694889150563308e-06, + "loss": 17.7741, + "step": 14129 + }, + { + "epoch": 0.2582850457893871, + "grad_norm": 6.918198819549715, + "learning_rate": 8.694689712310875e-06, + "loss": 18.0391, + "step": 14130 + }, + { + "epoch": 0.25830332498583364, + "grad_norm": 6.697927322725348, + "learning_rate": 8.694490261108874e-06, + "loss": 17.2621, + "step": 14131 + }, + { + "epoch": 0.2583216041822802, + "grad_norm": 6.011527171912248, + "learning_rate": 8.694290796958004e-06, + "loss": 17.4086, + "step": 14132 + }, + { + "epoch": 0.25833988337872665, + "grad_norm": 6.1745037575596475, + "learning_rate": 8.694091319858968e-06, + "loss": 17.41, + "step": 14133 + }, + { + "epoch": 0.2583581625751732, + "grad_norm": 6.627972132134733, + "learning_rate": 8.693891829812463e-06, + "loss": 17.4986, + "step": 14134 + }, + { + "epoch": 0.2583764417716197, + "grad_norm": 6.36915373828169, + "learning_rate": 8.693692326819185e-06, + "loss": 17.4596, + "step": 14135 + }, + { + "epoch": 0.25839472096806626, + "grad_norm": 8.292436876517803, + "learning_rate": 8.693492810879838e-06, + "loss": 18.0977, + "step": 14136 + }, + { + "epoch": 0.25841300016451274, + "grad_norm": 7.847096691742847, + "learning_rate": 8.693293281995118e-06, + "loss": 17.936, + "step": 14137 + }, + { + "epoch": 0.2584312793609593, + "grad_norm": 6.133172170823049, + "learning_rate": 8.693093740165725e-06, + "loss": 17.4722, + "step": 14138 + }, + { + "epoch": 0.2584495585574058, + "grad_norm": 8.876256817910992, + "learning_rate": 8.69289418539236e-06, + "loss": 18.5546, + "step": 14139 + }, + { + "epoch": 0.25846783775385235, + "grad_norm": 6.003085546094814, + "learning_rate": 8.692694617675721e-06, + "loss": 17.5186, + "step": 14140 + }, + { + "epoch": 0.2584861169502989, + "grad_norm": 6.202426404784945, + "learning_rate": 8.692495037016509e-06, + "loss": 17.2455, + "step": 14141 + }, + { + "epoch": 0.25850439614674536, + "grad_norm": 8.097268470393931, + "learning_rate": 8.692295443415422e-06, + "loss": 17.5599, + "step": 14142 + }, + { + "epoch": 0.2585226753431919, + "grad_norm": 5.829988422698184, + "learning_rate": 8.692095836873159e-06, + "loss": 17.1431, + "step": 14143 + }, + { + "epoch": 0.25854095453963843, + "grad_norm": 6.237348241267024, + "learning_rate": 8.691896217390421e-06, + "loss": 17.3951, + "step": 14144 + }, + { + "epoch": 0.25855923373608497, + "grad_norm": 6.28420789545548, + "learning_rate": 8.691696584967905e-06, + "loss": 17.5018, + "step": 14145 + }, + { + "epoch": 0.2585775129325315, + "grad_norm": 6.371079497773243, + "learning_rate": 8.691496939606315e-06, + "loss": 17.581, + "step": 14146 + }, + { + "epoch": 0.258595792128978, + "grad_norm": 5.7376703639207705, + "learning_rate": 8.69129728130635e-06, + "loss": 17.3224, + "step": 14147 + }, + { + "epoch": 0.2586140713254245, + "grad_norm": 6.198038893127131, + "learning_rate": 8.691097610068705e-06, + "loss": 17.3983, + "step": 14148 + }, + { + "epoch": 0.25863235052187106, + "grad_norm": 6.718181695319792, + "learning_rate": 8.690897925894085e-06, + "loss": 17.2986, + "step": 14149 + }, + { + "epoch": 0.2586506297183176, + "grad_norm": 6.42715301445234, + "learning_rate": 8.690698228783188e-06, + "loss": 17.5038, + "step": 14150 + }, + { + "epoch": 0.25866890891476413, + "grad_norm": 6.500218420373457, + "learning_rate": 8.690498518736715e-06, + "loss": 17.7886, + "step": 14151 + }, + { + "epoch": 0.2586871881112106, + "grad_norm": 5.362641201212311, + "learning_rate": 8.690298795755362e-06, + "loss": 16.9478, + "step": 14152 + }, + { + "epoch": 0.25870546730765714, + "grad_norm": 9.544536326229629, + "learning_rate": 8.690099059839834e-06, + "loss": 18.2867, + "step": 14153 + }, + { + "epoch": 0.2587237465041037, + "grad_norm": 7.370924052965017, + "learning_rate": 8.689899310990828e-06, + "loss": 18.1317, + "step": 14154 + }, + { + "epoch": 0.2587420257005502, + "grad_norm": 8.413257304783276, + "learning_rate": 8.689699549209046e-06, + "loss": 18.2081, + "step": 14155 + }, + { + "epoch": 0.25876030489699675, + "grad_norm": 5.693286958941834, + "learning_rate": 8.689499774495186e-06, + "loss": 17.1675, + "step": 14156 + }, + { + "epoch": 0.25877858409344323, + "grad_norm": 6.486964766282015, + "learning_rate": 8.689299986849952e-06, + "loss": 17.5089, + "step": 14157 + }, + { + "epoch": 0.25879686328988977, + "grad_norm": 7.348963029950461, + "learning_rate": 8.68910018627404e-06, + "loss": 18.0337, + "step": 14158 + }, + { + "epoch": 0.2588151424863363, + "grad_norm": 5.7470422693526, + "learning_rate": 8.688900372768152e-06, + "loss": 17.1518, + "step": 14159 + }, + { + "epoch": 0.25883342168278284, + "grad_norm": 7.724256889793154, + "learning_rate": 8.688700546332989e-06, + "loss": 18.0078, + "step": 14160 + }, + { + "epoch": 0.2588517008792294, + "grad_norm": 7.23284061844154, + "learning_rate": 8.68850070696925e-06, + "loss": 18.124, + "step": 14161 + }, + { + "epoch": 0.25886998007567585, + "grad_norm": 6.394090609045597, + "learning_rate": 8.688300854677636e-06, + "loss": 17.4614, + "step": 14162 + }, + { + "epoch": 0.2588882592721224, + "grad_norm": 6.993271961062448, + "learning_rate": 8.688100989458848e-06, + "loss": 17.6922, + "step": 14163 + }, + { + "epoch": 0.2589065384685689, + "grad_norm": 6.712416485913673, + "learning_rate": 8.687901111313587e-06, + "loss": 17.6312, + "step": 14164 + }, + { + "epoch": 0.25892481766501546, + "grad_norm": 7.060217217080197, + "learning_rate": 8.68770122024255e-06, + "loss": 17.6092, + "step": 14165 + }, + { + "epoch": 0.258943096861462, + "grad_norm": 6.572287768613955, + "learning_rate": 8.687501316246441e-06, + "loss": 17.6795, + "step": 14166 + }, + { + "epoch": 0.2589613760579085, + "grad_norm": 6.32235361034704, + "learning_rate": 8.68730139932596e-06, + "loss": 17.4758, + "step": 14167 + }, + { + "epoch": 0.258979655254355, + "grad_norm": 6.795511266468757, + "learning_rate": 8.687101469481809e-06, + "loss": 17.9259, + "step": 14168 + }, + { + "epoch": 0.25899793445080155, + "grad_norm": 6.103485003033015, + "learning_rate": 8.686901526714686e-06, + "loss": 17.1723, + "step": 14169 + }, + { + "epoch": 0.2590162136472481, + "grad_norm": 8.167025485819599, + "learning_rate": 8.686701571025293e-06, + "loss": 18.2686, + "step": 14170 + }, + { + "epoch": 0.25903449284369456, + "grad_norm": 8.928956404945458, + "learning_rate": 8.686501602414332e-06, + "loss": 18.4698, + "step": 14171 + }, + { + "epoch": 0.2590527720401411, + "grad_norm": 6.538180948003185, + "learning_rate": 8.686301620882502e-06, + "loss": 17.5651, + "step": 14172 + }, + { + "epoch": 0.25907105123658764, + "grad_norm": 8.051077866943555, + "learning_rate": 8.686101626430505e-06, + "loss": 18.3328, + "step": 14173 + }, + { + "epoch": 0.25908933043303417, + "grad_norm": 7.46434567692638, + "learning_rate": 8.685901619059041e-06, + "loss": 17.6777, + "step": 14174 + }, + { + "epoch": 0.2591076096294807, + "grad_norm": 6.7844431445493365, + "learning_rate": 8.685701598768813e-06, + "loss": 18.0084, + "step": 14175 + }, + { + "epoch": 0.2591258888259272, + "grad_norm": 7.663998584431132, + "learning_rate": 8.685501565560519e-06, + "loss": 18.1988, + "step": 14176 + }, + { + "epoch": 0.2591441680223737, + "grad_norm": 5.93982789782059, + "learning_rate": 8.685301519434863e-06, + "loss": 17.4027, + "step": 14177 + }, + { + "epoch": 0.25916244721882026, + "grad_norm": 6.029004675891996, + "learning_rate": 8.685101460392545e-06, + "loss": 17.2887, + "step": 14178 + }, + { + "epoch": 0.2591807264152668, + "grad_norm": 9.013383911709616, + "learning_rate": 8.684901388434266e-06, + "loss": 17.7601, + "step": 14179 + }, + { + "epoch": 0.25919900561171333, + "grad_norm": 7.1253852801615984, + "learning_rate": 8.684701303560728e-06, + "loss": 17.8046, + "step": 14180 + }, + { + "epoch": 0.2592172848081598, + "grad_norm": 6.100035498801834, + "learning_rate": 8.68450120577263e-06, + "loss": 17.3895, + "step": 14181 + }, + { + "epoch": 0.25923556400460634, + "grad_norm": 5.189228718505733, + "learning_rate": 8.684301095070677e-06, + "loss": 17.1329, + "step": 14182 + }, + { + "epoch": 0.2592538432010529, + "grad_norm": 6.665781143457299, + "learning_rate": 8.684100971455567e-06, + "loss": 17.7856, + "step": 14183 + }, + { + "epoch": 0.2592721223974994, + "grad_norm": 7.47350467978439, + "learning_rate": 8.683900834928003e-06, + "loss": 18.08, + "step": 14184 + }, + { + "epoch": 0.25929040159394595, + "grad_norm": 7.525122210015121, + "learning_rate": 8.683700685488687e-06, + "loss": 18.0142, + "step": 14185 + }, + { + "epoch": 0.25930868079039243, + "grad_norm": 6.440948791857707, + "learning_rate": 8.683500523138318e-06, + "loss": 17.8687, + "step": 14186 + }, + { + "epoch": 0.25932695998683897, + "grad_norm": 6.380271622063288, + "learning_rate": 8.6833003478776e-06, + "loss": 17.5271, + "step": 14187 + }, + { + "epoch": 0.2593452391832855, + "grad_norm": 6.1840804899448765, + "learning_rate": 8.683100159707235e-06, + "loss": 17.2603, + "step": 14188 + }, + { + "epoch": 0.25936351837973204, + "grad_norm": 7.107260335076593, + "learning_rate": 8.682899958627922e-06, + "loss": 18.0198, + "step": 14189 + }, + { + "epoch": 0.2593817975761786, + "grad_norm": 7.361694465003641, + "learning_rate": 8.682699744640365e-06, + "loss": 17.6505, + "step": 14190 + }, + { + "epoch": 0.25940007677262505, + "grad_norm": 9.544649755749758, + "learning_rate": 8.682499517745266e-06, + "loss": 18.7415, + "step": 14191 + }, + { + "epoch": 0.2594183559690716, + "grad_norm": 7.23844238396064, + "learning_rate": 8.682299277943325e-06, + "loss": 17.8093, + "step": 14192 + }, + { + "epoch": 0.2594366351655181, + "grad_norm": 7.331660525820289, + "learning_rate": 8.682099025235244e-06, + "loss": 17.6294, + "step": 14193 + }, + { + "epoch": 0.25945491436196466, + "grad_norm": 6.918174831241119, + "learning_rate": 8.681898759621726e-06, + "loss": 17.565, + "step": 14194 + }, + { + "epoch": 0.2594731935584112, + "grad_norm": 7.533312081848028, + "learning_rate": 8.681698481103472e-06, + "loss": 17.773, + "step": 14195 + }, + { + "epoch": 0.2594914727548577, + "grad_norm": 6.6700859081693205, + "learning_rate": 8.681498189681185e-06, + "loss": 17.4458, + "step": 14196 + }, + { + "epoch": 0.2595097519513042, + "grad_norm": 6.484848541244922, + "learning_rate": 8.681297885355567e-06, + "loss": 17.4729, + "step": 14197 + }, + { + "epoch": 0.25952803114775075, + "grad_norm": 9.179260736816243, + "learning_rate": 8.681097568127316e-06, + "loss": 18.4558, + "step": 14198 + }, + { + "epoch": 0.2595463103441973, + "grad_norm": 6.965157906115924, + "learning_rate": 8.680897237997141e-06, + "loss": 17.6901, + "step": 14199 + }, + { + "epoch": 0.2595645895406438, + "grad_norm": 7.252637507695194, + "learning_rate": 8.68069689496574e-06, + "loss": 18.0804, + "step": 14200 + }, + { + "epoch": 0.2595828687370903, + "grad_norm": 8.236373944975917, + "learning_rate": 8.680496539033814e-06, + "loss": 17.9463, + "step": 14201 + }, + { + "epoch": 0.25960114793353684, + "grad_norm": 7.359481017944861, + "learning_rate": 8.680296170202066e-06, + "loss": 17.9905, + "step": 14202 + }, + { + "epoch": 0.25961942712998337, + "grad_norm": 6.535176048230325, + "learning_rate": 8.680095788471202e-06, + "loss": 17.5322, + "step": 14203 + }, + { + "epoch": 0.2596377063264299, + "grad_norm": 6.628991932210777, + "learning_rate": 8.679895393841922e-06, + "loss": 17.411, + "step": 14204 + }, + { + "epoch": 0.2596559855228764, + "grad_norm": 6.523055493984605, + "learning_rate": 8.679694986314926e-06, + "loss": 17.4709, + "step": 14205 + }, + { + "epoch": 0.2596742647193229, + "grad_norm": 7.371541668892755, + "learning_rate": 8.67949456589092e-06, + "loss": 17.7989, + "step": 14206 + }, + { + "epoch": 0.25969254391576946, + "grad_norm": 6.085751981455777, + "learning_rate": 8.679294132570604e-06, + "loss": 17.2445, + "step": 14207 + }, + { + "epoch": 0.259710823112216, + "grad_norm": 5.324822737500185, + "learning_rate": 8.67909368635468e-06, + "loss": 16.9973, + "step": 14208 + }, + { + "epoch": 0.25972910230866253, + "grad_norm": 6.077237755113356, + "learning_rate": 8.678893227243854e-06, + "loss": 17.3057, + "step": 14209 + }, + { + "epoch": 0.259747381505109, + "grad_norm": 6.874065151832448, + "learning_rate": 8.678692755238827e-06, + "loss": 17.8281, + "step": 14210 + }, + { + "epoch": 0.25976566070155555, + "grad_norm": 6.495768176593526, + "learning_rate": 8.678492270340299e-06, + "loss": 17.7346, + "step": 14211 + }, + { + "epoch": 0.2597839398980021, + "grad_norm": 7.387890247946272, + "learning_rate": 8.678291772548975e-06, + "loss": 17.9833, + "step": 14212 + }, + { + "epoch": 0.2598022190944486, + "grad_norm": 6.996674065804481, + "learning_rate": 8.67809126186556e-06, + "loss": 17.7884, + "step": 14213 + }, + { + "epoch": 0.25982049829089515, + "grad_norm": 6.281673670852194, + "learning_rate": 8.67789073829075e-06, + "loss": 17.3121, + "step": 14214 + }, + { + "epoch": 0.25983877748734163, + "grad_norm": 7.452836805760374, + "learning_rate": 8.677690201825255e-06, + "loss": 17.789, + "step": 14215 + }, + { + "epoch": 0.25985705668378817, + "grad_norm": 5.957035006205307, + "learning_rate": 8.677489652469775e-06, + "loss": 17.4055, + "step": 14216 + }, + { + "epoch": 0.2598753358802347, + "grad_norm": 6.3458021800711615, + "learning_rate": 8.677289090225013e-06, + "loss": 17.3333, + "step": 14217 + }, + { + "epoch": 0.25989361507668124, + "grad_norm": 6.118127226736724, + "learning_rate": 8.677088515091671e-06, + "loss": 17.3757, + "step": 14218 + }, + { + "epoch": 0.2599118942731278, + "grad_norm": 6.751555649206739, + "learning_rate": 8.676887927070453e-06, + "loss": 17.5914, + "step": 14219 + }, + { + "epoch": 0.25993017346957425, + "grad_norm": 6.897398110608393, + "learning_rate": 8.676687326162063e-06, + "loss": 17.4974, + "step": 14220 + }, + { + "epoch": 0.2599484526660208, + "grad_norm": 5.837874375748976, + "learning_rate": 8.676486712367201e-06, + "loss": 17.2377, + "step": 14221 + }, + { + "epoch": 0.2599667318624673, + "grad_norm": 7.752103161333534, + "learning_rate": 8.676286085686573e-06, + "loss": 18.2379, + "step": 14222 + }, + { + "epoch": 0.25998501105891386, + "grad_norm": 5.741388568907354, + "learning_rate": 8.676085446120882e-06, + "loss": 17.176, + "step": 14223 + }, + { + "epoch": 0.2600032902553604, + "grad_norm": 7.389644836119392, + "learning_rate": 8.67588479367083e-06, + "loss": 17.7166, + "step": 14224 + }, + { + "epoch": 0.2600215694518069, + "grad_norm": 6.8645852808263825, + "learning_rate": 8.675684128337121e-06, + "loss": 17.7229, + "step": 14225 + }, + { + "epoch": 0.2600398486482534, + "grad_norm": 6.655345409236313, + "learning_rate": 8.675483450120456e-06, + "loss": 17.732, + "step": 14226 + }, + { + "epoch": 0.26005812784469995, + "grad_norm": 6.494098822622264, + "learning_rate": 8.675282759021544e-06, + "loss": 17.2915, + "step": 14227 + }, + { + "epoch": 0.2600764070411465, + "grad_norm": 6.522231857490047, + "learning_rate": 8.675082055041082e-06, + "loss": 17.5535, + "step": 14228 + }, + { + "epoch": 0.260094686237593, + "grad_norm": 6.703443328300827, + "learning_rate": 8.674881338179778e-06, + "loss": 17.7362, + "step": 14229 + }, + { + "epoch": 0.2601129654340395, + "grad_norm": 7.665242439256601, + "learning_rate": 8.674680608438332e-06, + "loss": 17.1427, + "step": 14230 + }, + { + "epoch": 0.26013124463048604, + "grad_norm": 5.979924565427676, + "learning_rate": 8.674479865817452e-06, + "loss": 17.3177, + "step": 14231 + }, + { + "epoch": 0.26014952382693257, + "grad_norm": 6.298123909968524, + "learning_rate": 8.674279110317837e-06, + "loss": 17.3941, + "step": 14232 + }, + { + "epoch": 0.2601678030233791, + "grad_norm": 6.134596837049285, + "learning_rate": 8.674078341940193e-06, + "loss": 17.3473, + "step": 14233 + }, + { + "epoch": 0.26018608221982564, + "grad_norm": 6.690030188988819, + "learning_rate": 8.673877560685225e-06, + "loss": 17.6278, + "step": 14234 + }, + { + "epoch": 0.2602043614162721, + "grad_norm": 7.04849350575925, + "learning_rate": 8.673676766553632e-06, + "loss": 17.5964, + "step": 14235 + }, + { + "epoch": 0.26022264061271866, + "grad_norm": 5.807512367297847, + "learning_rate": 8.673475959546122e-06, + "loss": 17.1041, + "step": 14236 + }, + { + "epoch": 0.2602409198091652, + "grad_norm": 7.804949548570082, + "learning_rate": 8.673275139663397e-06, + "loss": 18.0493, + "step": 14237 + }, + { + "epoch": 0.26025919900561173, + "grad_norm": 5.631732771120599, + "learning_rate": 8.673074306906162e-06, + "loss": 17.1282, + "step": 14238 + }, + { + "epoch": 0.2602774782020582, + "grad_norm": 6.623288987579542, + "learning_rate": 8.67287346127512e-06, + "loss": 17.4896, + "step": 14239 + }, + { + "epoch": 0.26029575739850475, + "grad_norm": 7.451615977421559, + "learning_rate": 8.672672602770976e-06, + "loss": 17.6082, + "step": 14240 + }, + { + "epoch": 0.2603140365949513, + "grad_norm": 6.66514506670663, + "learning_rate": 8.672471731394432e-06, + "loss": 17.5331, + "step": 14241 + }, + { + "epoch": 0.2603323157913978, + "grad_norm": 6.899726283753662, + "learning_rate": 8.672270847146193e-06, + "loss": 17.6937, + "step": 14242 + }, + { + "epoch": 0.26035059498784435, + "grad_norm": 7.7005808922718115, + "learning_rate": 8.672069950026964e-06, + "loss": 18.1453, + "step": 14243 + }, + { + "epoch": 0.26036887418429083, + "grad_norm": 6.37171306374178, + "learning_rate": 8.67186904003745e-06, + "loss": 17.5089, + "step": 14244 + }, + { + "epoch": 0.26038715338073737, + "grad_norm": 7.908325408627519, + "learning_rate": 8.671668117178351e-06, + "loss": 18.2844, + "step": 14245 + }, + { + "epoch": 0.2604054325771839, + "grad_norm": 5.920169292135648, + "learning_rate": 8.671467181450376e-06, + "loss": 17.1884, + "step": 14246 + }, + { + "epoch": 0.26042371177363044, + "grad_norm": 6.898063108802871, + "learning_rate": 8.671266232854227e-06, + "loss": 17.935, + "step": 14247 + }, + { + "epoch": 0.260441990970077, + "grad_norm": 6.529362513605127, + "learning_rate": 8.671065271390606e-06, + "loss": 17.6221, + "step": 14248 + }, + { + "epoch": 0.26046027016652346, + "grad_norm": 7.228320070578015, + "learning_rate": 8.670864297060223e-06, + "loss": 18.1665, + "step": 14249 + }, + { + "epoch": 0.26047854936297, + "grad_norm": 5.948830610352112, + "learning_rate": 8.670663309863778e-06, + "loss": 17.3457, + "step": 14250 + }, + { + "epoch": 0.2604968285594165, + "grad_norm": 6.602771669230802, + "learning_rate": 8.670462309801976e-06, + "loss": 17.6515, + "step": 14251 + }, + { + "epoch": 0.26051510775586306, + "grad_norm": 7.077321993934937, + "learning_rate": 8.670261296875521e-06, + "loss": 17.8299, + "step": 14252 + }, + { + "epoch": 0.2605333869523096, + "grad_norm": 4.706703887188802, + "learning_rate": 8.670060271085122e-06, + "loss": 16.854, + "step": 14253 + }, + { + "epoch": 0.2605516661487561, + "grad_norm": 6.60047464851554, + "learning_rate": 8.669859232431478e-06, + "loss": 17.4978, + "step": 14254 + }, + { + "epoch": 0.2605699453452026, + "grad_norm": 5.9323500391190285, + "learning_rate": 8.669658180915295e-06, + "loss": 17.3478, + "step": 14255 + }, + { + "epoch": 0.26058822454164915, + "grad_norm": 6.901721871946896, + "learning_rate": 8.669457116537281e-06, + "loss": 17.6855, + "step": 14256 + }, + { + "epoch": 0.2606065037380957, + "grad_norm": 6.578632990964375, + "learning_rate": 8.669256039298136e-06, + "loss": 17.3722, + "step": 14257 + }, + { + "epoch": 0.2606247829345422, + "grad_norm": 5.39057658087867, + "learning_rate": 8.669054949198567e-06, + "loss": 16.9551, + "step": 14258 + }, + { + "epoch": 0.2606430621309887, + "grad_norm": 6.211108870611746, + "learning_rate": 8.66885384623928e-06, + "loss": 17.3891, + "step": 14259 + }, + { + "epoch": 0.26066134132743524, + "grad_norm": 6.384579728924579, + "learning_rate": 8.668652730420979e-06, + "loss": 17.6692, + "step": 14260 + }, + { + "epoch": 0.26067962052388177, + "grad_norm": 5.313832268782017, + "learning_rate": 8.668451601744367e-06, + "loss": 16.9212, + "step": 14261 + }, + { + "epoch": 0.2606978997203283, + "grad_norm": 6.327507524674901, + "learning_rate": 8.668250460210152e-06, + "loss": 17.2745, + "step": 14262 + }, + { + "epoch": 0.26071617891677484, + "grad_norm": 7.276705564390637, + "learning_rate": 8.668049305819038e-06, + "loss": 17.9003, + "step": 14263 + }, + { + "epoch": 0.2607344581132213, + "grad_norm": 6.954134425200045, + "learning_rate": 8.667848138571727e-06, + "loss": 17.9699, + "step": 14264 + }, + { + "epoch": 0.26075273730966786, + "grad_norm": 6.360331324137992, + "learning_rate": 8.667646958468928e-06, + "loss": 17.4674, + "step": 14265 + }, + { + "epoch": 0.2607710165061144, + "grad_norm": 6.5458612551608555, + "learning_rate": 8.667445765511345e-06, + "loss": 17.5799, + "step": 14266 + }, + { + "epoch": 0.26078929570256093, + "grad_norm": 6.615452750596066, + "learning_rate": 8.66724455969968e-06, + "loss": 17.5238, + "step": 14267 + }, + { + "epoch": 0.26080757489900747, + "grad_norm": 6.133766441116057, + "learning_rate": 8.667043341034646e-06, + "loss": 17.353, + "step": 14268 + }, + { + "epoch": 0.26082585409545395, + "grad_norm": 6.822704766326976, + "learning_rate": 8.66684210951694e-06, + "loss": 17.8244, + "step": 14269 + }, + { + "epoch": 0.2608441332919005, + "grad_norm": 6.510204609711557, + "learning_rate": 8.66664086514727e-06, + "loss": 17.4648, + "step": 14270 + }, + { + "epoch": 0.260862412488347, + "grad_norm": 7.596246445747984, + "learning_rate": 8.666439607926345e-06, + "loss": 17.7896, + "step": 14271 + }, + { + "epoch": 0.26088069168479355, + "grad_norm": 5.235495628093616, + "learning_rate": 8.666238337854864e-06, + "loss": 16.7636, + "step": 14272 + }, + { + "epoch": 0.26089897088124003, + "grad_norm": 7.000231983092996, + "learning_rate": 8.666037054933538e-06, + "loss": 17.706, + "step": 14273 + }, + { + "epoch": 0.26091725007768657, + "grad_norm": 6.577012523913233, + "learning_rate": 8.66583575916307e-06, + "loss": 17.6584, + "step": 14274 + }, + { + "epoch": 0.2609355292741331, + "grad_norm": 6.546348910698961, + "learning_rate": 8.665634450544165e-06, + "loss": 17.2979, + "step": 14275 + }, + { + "epoch": 0.26095380847057964, + "grad_norm": 6.821977935311509, + "learning_rate": 8.66543312907753e-06, + "loss": 17.4793, + "step": 14276 + }, + { + "epoch": 0.2609720876670262, + "grad_norm": 7.523576969932582, + "learning_rate": 8.665231794763868e-06, + "loss": 18.2738, + "step": 14277 + }, + { + "epoch": 0.26099036686347266, + "grad_norm": 5.151727795926751, + "learning_rate": 8.66503044760389e-06, + "loss": 16.9207, + "step": 14278 + }, + { + "epoch": 0.2610086460599192, + "grad_norm": 6.743005251847526, + "learning_rate": 8.664829087598297e-06, + "loss": 17.8879, + "step": 14279 + }, + { + "epoch": 0.2610269252563657, + "grad_norm": 7.27416254634992, + "learning_rate": 8.664627714747796e-06, + "loss": 17.6607, + "step": 14280 + }, + { + "epoch": 0.26104520445281226, + "grad_norm": 5.7338638288078725, + "learning_rate": 8.66442632905309e-06, + "loss": 17.0997, + "step": 14281 + }, + { + "epoch": 0.2610634836492588, + "grad_norm": 6.238847600389716, + "learning_rate": 8.664224930514891e-06, + "loss": 17.543, + "step": 14282 + }, + { + "epoch": 0.2610817628457053, + "grad_norm": 6.262960496814813, + "learning_rate": 8.664023519133901e-06, + "loss": 17.1668, + "step": 14283 + }, + { + "epoch": 0.2611000420421518, + "grad_norm": 6.312394038715761, + "learning_rate": 8.663822094910826e-06, + "loss": 17.2952, + "step": 14284 + }, + { + "epoch": 0.26111832123859835, + "grad_norm": 7.314451336858432, + "learning_rate": 8.663620657846372e-06, + "loss": 17.9633, + "step": 14285 + }, + { + "epoch": 0.2611366004350449, + "grad_norm": 7.068644350015441, + "learning_rate": 8.663419207941248e-06, + "loss": 17.6935, + "step": 14286 + }, + { + "epoch": 0.2611548796314914, + "grad_norm": 6.731138266776832, + "learning_rate": 8.663217745196155e-06, + "loss": 17.6665, + "step": 14287 + }, + { + "epoch": 0.2611731588279379, + "grad_norm": 7.2949333980703726, + "learning_rate": 8.663016269611802e-06, + "loss": 17.6784, + "step": 14288 + }, + { + "epoch": 0.26119143802438444, + "grad_norm": 6.200160114403457, + "learning_rate": 8.662814781188896e-06, + "loss": 17.2703, + "step": 14289 + }, + { + "epoch": 0.261209717220831, + "grad_norm": 7.511082518027258, + "learning_rate": 8.662613279928141e-06, + "loss": 17.5261, + "step": 14290 + }, + { + "epoch": 0.2612279964172775, + "grad_norm": 5.669264255267169, + "learning_rate": 8.662411765830245e-06, + "loss": 17.1438, + "step": 14291 + }, + { + "epoch": 0.26124627561372404, + "grad_norm": 5.7018480564985365, + "learning_rate": 8.662210238895914e-06, + "loss": 17.0191, + "step": 14292 + }, + { + "epoch": 0.2612645548101705, + "grad_norm": 7.153404570753518, + "learning_rate": 8.662008699125853e-06, + "loss": 17.5494, + "step": 14293 + }, + { + "epoch": 0.26128283400661706, + "grad_norm": 5.644824221333599, + "learning_rate": 8.661807146520769e-06, + "loss": 17.2817, + "step": 14294 + }, + { + "epoch": 0.2613011132030636, + "grad_norm": 6.488183206471269, + "learning_rate": 8.661605581081369e-06, + "loss": 17.728, + "step": 14295 + }, + { + "epoch": 0.26131939239951013, + "grad_norm": 6.725374470937904, + "learning_rate": 8.66140400280836e-06, + "loss": 17.5082, + "step": 14296 + }, + { + "epoch": 0.26133767159595667, + "grad_norm": 6.779234937895279, + "learning_rate": 8.661202411702448e-06, + "loss": 17.4111, + "step": 14297 + }, + { + "epoch": 0.26135595079240315, + "grad_norm": 7.621404763034471, + "learning_rate": 8.661000807764338e-06, + "loss": 17.9106, + "step": 14298 + }, + { + "epoch": 0.2613742299888497, + "grad_norm": 6.2451423751081405, + "learning_rate": 8.660799190994738e-06, + "loss": 17.3519, + "step": 14299 + }, + { + "epoch": 0.2613925091852962, + "grad_norm": 6.7033198164479, + "learning_rate": 8.660597561394354e-06, + "loss": 17.8168, + "step": 14300 + }, + { + "epoch": 0.26141078838174275, + "grad_norm": 5.580826480366983, + "learning_rate": 8.660395918963894e-06, + "loss": 17.1418, + "step": 14301 + }, + { + "epoch": 0.2614290675781893, + "grad_norm": 8.43652772775704, + "learning_rate": 8.660194263704063e-06, + "loss": 18.2843, + "step": 14302 + }, + { + "epoch": 0.26144734677463577, + "grad_norm": 8.126025296361352, + "learning_rate": 8.659992595615569e-06, + "loss": 18.0668, + "step": 14303 + }, + { + "epoch": 0.2614656259710823, + "grad_norm": 9.150277131661936, + "learning_rate": 8.65979091469912e-06, + "loss": 18.4163, + "step": 14304 + }, + { + "epoch": 0.26148390516752884, + "grad_norm": 9.556173073928285, + "learning_rate": 8.65958922095542e-06, + "loss": 18.7502, + "step": 14305 + }, + { + "epoch": 0.2615021843639754, + "grad_norm": 6.128587431759308, + "learning_rate": 8.659387514385179e-06, + "loss": 17.253, + "step": 14306 + }, + { + "epoch": 0.26152046356042186, + "grad_norm": 6.943525255298647, + "learning_rate": 8.6591857949891e-06, + "loss": 17.7416, + "step": 14307 + }, + { + "epoch": 0.2615387427568684, + "grad_norm": 6.444330711899049, + "learning_rate": 8.658984062767893e-06, + "loss": 17.4775, + "step": 14308 + }, + { + "epoch": 0.2615570219533149, + "grad_norm": 8.10083039055212, + "learning_rate": 8.658782317722265e-06, + "loss": 18.2508, + "step": 14309 + }, + { + "epoch": 0.26157530114976146, + "grad_norm": 6.220214814321148, + "learning_rate": 8.658580559852922e-06, + "loss": 17.3301, + "step": 14310 + }, + { + "epoch": 0.261593580346208, + "grad_norm": 7.169790426415843, + "learning_rate": 8.658378789160573e-06, + "loss": 17.8293, + "step": 14311 + }, + { + "epoch": 0.2616118595426545, + "grad_norm": 6.650129065417017, + "learning_rate": 8.658177005645922e-06, + "loss": 17.2107, + "step": 14312 + }, + { + "epoch": 0.261630138739101, + "grad_norm": 5.8399668845782955, + "learning_rate": 8.657975209309679e-06, + "loss": 17.4181, + "step": 14313 + }, + { + "epoch": 0.26164841793554755, + "grad_norm": 6.8481940107929455, + "learning_rate": 8.657773400152549e-06, + "loss": 17.5672, + "step": 14314 + }, + { + "epoch": 0.2616666971319941, + "grad_norm": 6.905469070345408, + "learning_rate": 8.657571578175243e-06, + "loss": 18.1343, + "step": 14315 + }, + { + "epoch": 0.2616849763284406, + "grad_norm": 6.860346363177648, + "learning_rate": 8.657369743378464e-06, + "loss": 17.866, + "step": 14316 + }, + { + "epoch": 0.2617032555248871, + "grad_norm": 6.535592953025446, + "learning_rate": 8.657167895762923e-06, + "loss": 17.4644, + "step": 14317 + }, + { + "epoch": 0.26172153472133364, + "grad_norm": 7.669681712929007, + "learning_rate": 8.656966035329325e-06, + "loss": 17.6827, + "step": 14318 + }, + { + "epoch": 0.2617398139177802, + "grad_norm": 5.8820288919884876, + "learning_rate": 8.656764162078377e-06, + "loss": 17.1236, + "step": 14319 + }, + { + "epoch": 0.2617580931142267, + "grad_norm": 6.043916132868095, + "learning_rate": 8.65656227601079e-06, + "loss": 17.5664, + "step": 14320 + }, + { + "epoch": 0.26177637231067324, + "grad_norm": 7.263720419922502, + "learning_rate": 8.656360377127269e-06, + "loss": 18.2175, + "step": 14321 + }, + { + "epoch": 0.2617946515071197, + "grad_norm": 8.64963241796326, + "learning_rate": 8.656158465428523e-06, + "loss": 17.9681, + "step": 14322 + }, + { + "epoch": 0.26181293070356626, + "grad_norm": 5.8059009438770515, + "learning_rate": 8.655956540915256e-06, + "loss": 17.4548, + "step": 14323 + }, + { + "epoch": 0.2618312099000128, + "grad_norm": 5.79451611257415, + "learning_rate": 8.65575460358818e-06, + "loss": 17.3694, + "step": 14324 + }, + { + "epoch": 0.26184948909645933, + "grad_norm": 6.402302482213285, + "learning_rate": 8.655552653448003e-06, + "loss": 17.8392, + "step": 14325 + }, + { + "epoch": 0.26186776829290587, + "grad_norm": 8.61783076396563, + "learning_rate": 8.65535069049543e-06, + "loss": 18.7223, + "step": 14326 + }, + { + "epoch": 0.26188604748935235, + "grad_norm": 8.587924319046229, + "learning_rate": 8.65514871473117e-06, + "loss": 18.417, + "step": 14327 + }, + { + "epoch": 0.2619043266857989, + "grad_norm": 5.952877435089593, + "learning_rate": 8.654946726155931e-06, + "loss": 17.1031, + "step": 14328 + }, + { + "epoch": 0.2619226058822454, + "grad_norm": 6.014144087383838, + "learning_rate": 8.65474472477042e-06, + "loss": 17.2704, + "step": 14329 + }, + { + "epoch": 0.26194088507869195, + "grad_norm": 5.634620473152719, + "learning_rate": 8.654542710575348e-06, + "loss": 17.1087, + "step": 14330 + }, + { + "epoch": 0.2619591642751385, + "grad_norm": 7.144477716391988, + "learning_rate": 8.654340683571418e-06, + "loss": 17.6253, + "step": 14331 + }, + { + "epoch": 0.26197744347158497, + "grad_norm": 6.951082058232852, + "learning_rate": 8.654138643759344e-06, + "loss": 17.5187, + "step": 14332 + }, + { + "epoch": 0.2619957226680315, + "grad_norm": 6.539490304534041, + "learning_rate": 8.653936591139829e-06, + "loss": 17.753, + "step": 14333 + }, + { + "epoch": 0.26201400186447804, + "grad_norm": 7.091311866726209, + "learning_rate": 8.653734525713583e-06, + "loss": 17.6396, + "step": 14334 + }, + { + "epoch": 0.2620322810609246, + "grad_norm": 7.244588545356523, + "learning_rate": 8.653532447481316e-06, + "loss": 18.1019, + "step": 14335 + }, + { + "epoch": 0.2620505602573711, + "grad_norm": 6.7605898758674625, + "learning_rate": 8.653330356443735e-06, + "loss": 18.0615, + "step": 14336 + }, + { + "epoch": 0.2620688394538176, + "grad_norm": 6.235687289679575, + "learning_rate": 8.653128252601547e-06, + "loss": 17.3753, + "step": 14337 + }, + { + "epoch": 0.26208711865026413, + "grad_norm": 6.913880155405421, + "learning_rate": 8.652926135955464e-06, + "loss": 17.8539, + "step": 14338 + }, + { + "epoch": 0.26210539784671066, + "grad_norm": 8.72100510318729, + "learning_rate": 8.652724006506189e-06, + "loss": 18.2457, + "step": 14339 + }, + { + "epoch": 0.2621236770431572, + "grad_norm": 7.547763899914024, + "learning_rate": 8.652521864254435e-06, + "loss": 18.258, + "step": 14340 + }, + { + "epoch": 0.2621419562396037, + "grad_norm": 8.664727161749722, + "learning_rate": 8.652319709200907e-06, + "loss": 17.8626, + "step": 14341 + }, + { + "epoch": 0.2621602354360502, + "grad_norm": 6.820000589447856, + "learning_rate": 8.652117541346317e-06, + "loss": 17.6796, + "step": 14342 + }, + { + "epoch": 0.26217851463249675, + "grad_norm": 7.484719337981444, + "learning_rate": 8.651915360691373e-06, + "loss": 17.9818, + "step": 14343 + }, + { + "epoch": 0.2621967938289433, + "grad_norm": 6.702311338215585, + "learning_rate": 8.65171316723678e-06, + "loss": 17.9814, + "step": 14344 + }, + { + "epoch": 0.2622150730253898, + "grad_norm": 6.274173578402506, + "learning_rate": 8.651510960983249e-06, + "loss": 17.5691, + "step": 14345 + }, + { + "epoch": 0.2622333522218363, + "grad_norm": 5.154677440741493, + "learning_rate": 8.65130874193149e-06, + "loss": 17.0477, + "step": 14346 + }, + { + "epoch": 0.26225163141828284, + "grad_norm": 6.143556865771956, + "learning_rate": 8.651106510082211e-06, + "loss": 17.2127, + "step": 14347 + }, + { + "epoch": 0.2622699106147294, + "grad_norm": 6.597435458258558, + "learning_rate": 8.65090426543612e-06, + "loss": 17.7153, + "step": 14348 + }, + { + "epoch": 0.2622881898111759, + "grad_norm": 7.095482561854488, + "learning_rate": 8.650702007993928e-06, + "loss": 17.5848, + "step": 14349 + }, + { + "epoch": 0.26230646900762244, + "grad_norm": 7.919294559570361, + "learning_rate": 8.65049973775634e-06, + "loss": 18.287, + "step": 14350 + }, + { + "epoch": 0.2623247482040689, + "grad_norm": 6.989630058163078, + "learning_rate": 8.650297454724069e-06, + "loss": 17.9026, + "step": 14351 + }, + { + "epoch": 0.26234302740051546, + "grad_norm": 7.142012838853306, + "learning_rate": 8.65009515889782e-06, + "loss": 17.6811, + "step": 14352 + }, + { + "epoch": 0.262361306596962, + "grad_norm": 7.663580209790349, + "learning_rate": 8.649892850278305e-06, + "loss": 17.713, + "step": 14353 + }, + { + "epoch": 0.26237958579340853, + "grad_norm": 6.804175445291541, + "learning_rate": 8.649690528866234e-06, + "loss": 17.546, + "step": 14354 + }, + { + "epoch": 0.26239786498985507, + "grad_norm": 5.806853932241845, + "learning_rate": 8.649488194662313e-06, + "loss": 17.1717, + "step": 14355 + }, + { + "epoch": 0.26241614418630155, + "grad_norm": 7.266232184784769, + "learning_rate": 8.649285847667252e-06, + "loss": 17.4944, + "step": 14356 + }, + { + "epoch": 0.2624344233827481, + "grad_norm": 6.913518241050929, + "learning_rate": 8.649083487881762e-06, + "loss": 17.7306, + "step": 14357 + }, + { + "epoch": 0.2624527025791946, + "grad_norm": 7.103733007219354, + "learning_rate": 8.64888111530655e-06, + "loss": 17.883, + "step": 14358 + }, + { + "epoch": 0.26247098177564115, + "grad_norm": 7.970926037303044, + "learning_rate": 8.648678729942329e-06, + "loss": 18.3431, + "step": 14359 + }, + { + "epoch": 0.2624892609720877, + "grad_norm": 6.871957535751437, + "learning_rate": 8.648476331789802e-06, + "loss": 17.7505, + "step": 14360 + }, + { + "epoch": 0.26250754016853417, + "grad_norm": 6.039876635119999, + "learning_rate": 8.648273920849684e-06, + "loss": 17.4392, + "step": 14361 + }, + { + "epoch": 0.2625258193649807, + "grad_norm": 7.349633870407833, + "learning_rate": 8.648071497122681e-06, + "loss": 17.7897, + "step": 14362 + }, + { + "epoch": 0.26254409856142724, + "grad_norm": 6.617894636394728, + "learning_rate": 8.647869060609506e-06, + "loss": 17.5581, + "step": 14363 + }, + { + "epoch": 0.2625623777578738, + "grad_norm": 7.306333841700882, + "learning_rate": 8.647666611310865e-06, + "loss": 17.6994, + "step": 14364 + }, + { + "epoch": 0.2625806569543203, + "grad_norm": 7.493017193538997, + "learning_rate": 8.647464149227469e-06, + "loss": 17.8527, + "step": 14365 + }, + { + "epoch": 0.2625989361507668, + "grad_norm": 6.956499612622345, + "learning_rate": 8.647261674360029e-06, + "loss": 17.6242, + "step": 14366 + }, + { + "epoch": 0.26261721534721333, + "grad_norm": 9.324868903766106, + "learning_rate": 8.647059186709252e-06, + "loss": 17.9245, + "step": 14367 + }, + { + "epoch": 0.26263549454365986, + "grad_norm": 6.0025171735272185, + "learning_rate": 8.646856686275851e-06, + "loss": 17.3891, + "step": 14368 + }, + { + "epoch": 0.2626537737401064, + "grad_norm": 7.528251366992634, + "learning_rate": 8.646654173060531e-06, + "loss": 18.0916, + "step": 14369 + }, + { + "epoch": 0.26267205293655294, + "grad_norm": 7.942889659972795, + "learning_rate": 8.646451647064007e-06, + "loss": 18.0743, + "step": 14370 + }, + { + "epoch": 0.2626903321329994, + "grad_norm": 6.4762044959895695, + "learning_rate": 8.646249108286985e-06, + "loss": 17.4625, + "step": 14371 + }, + { + "epoch": 0.26270861132944595, + "grad_norm": 6.371259526902738, + "learning_rate": 8.646046556730176e-06, + "loss": 17.4361, + "step": 14372 + }, + { + "epoch": 0.2627268905258925, + "grad_norm": 8.00752586799204, + "learning_rate": 8.645843992394291e-06, + "loss": 18.4024, + "step": 14373 + }, + { + "epoch": 0.262745169722339, + "grad_norm": 7.283049339422857, + "learning_rate": 8.64564141528004e-06, + "loss": 17.7196, + "step": 14374 + }, + { + "epoch": 0.2627634489187855, + "grad_norm": 5.903938845138504, + "learning_rate": 8.64543882538813e-06, + "loss": 17.3946, + "step": 14375 + }, + { + "epoch": 0.26278172811523204, + "grad_norm": 6.541995076847163, + "learning_rate": 8.645236222719275e-06, + "loss": 17.7814, + "step": 14376 + }, + { + "epoch": 0.2628000073116786, + "grad_norm": 6.989811084200486, + "learning_rate": 8.645033607274183e-06, + "loss": 17.7339, + "step": 14377 + }, + { + "epoch": 0.2628182865081251, + "grad_norm": 6.963482701141724, + "learning_rate": 8.644830979053565e-06, + "loss": 17.754, + "step": 14378 + }, + { + "epoch": 0.26283656570457165, + "grad_norm": 6.733724655273646, + "learning_rate": 8.64462833805813e-06, + "loss": 17.6504, + "step": 14379 + }, + { + "epoch": 0.2628548449010181, + "grad_norm": 6.9312705576119145, + "learning_rate": 8.644425684288589e-06, + "loss": 17.6847, + "step": 14380 + }, + { + "epoch": 0.26287312409746466, + "grad_norm": 5.788289883011871, + "learning_rate": 8.644223017745653e-06, + "loss": 17.2606, + "step": 14381 + }, + { + "epoch": 0.2628914032939112, + "grad_norm": 6.175323501223622, + "learning_rate": 8.644020338430029e-06, + "loss": 17.3564, + "step": 14382 + }, + { + "epoch": 0.26290968249035773, + "grad_norm": 8.410916425732914, + "learning_rate": 8.643817646342433e-06, + "loss": 18.51, + "step": 14383 + }, + { + "epoch": 0.26292796168680427, + "grad_norm": 7.237520964244881, + "learning_rate": 8.64361494148357e-06, + "loss": 17.6957, + "step": 14384 + }, + { + "epoch": 0.26294624088325075, + "grad_norm": 7.886911415886462, + "learning_rate": 8.643412223854154e-06, + "loss": 18.1002, + "step": 14385 + }, + { + "epoch": 0.2629645200796973, + "grad_norm": 6.373927767746825, + "learning_rate": 8.643209493454893e-06, + "loss": 17.5343, + "step": 14386 + }, + { + "epoch": 0.2629827992761438, + "grad_norm": 4.982759648815514, + "learning_rate": 8.6430067502865e-06, + "loss": 16.9397, + "step": 14387 + }, + { + "epoch": 0.26300107847259035, + "grad_norm": 7.369552463687001, + "learning_rate": 8.642803994349686e-06, + "loss": 17.7397, + "step": 14388 + }, + { + "epoch": 0.2630193576690369, + "grad_norm": 6.155446198720482, + "learning_rate": 8.642601225645158e-06, + "loss": 17.3574, + "step": 14389 + }, + { + "epoch": 0.26303763686548337, + "grad_norm": 6.270228967039644, + "learning_rate": 8.64239844417363e-06, + "loss": 17.4627, + "step": 14390 + }, + { + "epoch": 0.2630559160619299, + "grad_norm": 6.196351432520529, + "learning_rate": 8.64219564993581e-06, + "loss": 17.2828, + "step": 14391 + }, + { + "epoch": 0.26307419525837644, + "grad_norm": 6.101617643826063, + "learning_rate": 8.641992842932411e-06, + "loss": 17.301, + "step": 14392 + }, + { + "epoch": 0.263092474454823, + "grad_norm": 6.358708273663958, + "learning_rate": 8.641790023164146e-06, + "loss": 17.3201, + "step": 14393 + }, + { + "epoch": 0.2631107536512695, + "grad_norm": 6.078181863374826, + "learning_rate": 8.641587190631719e-06, + "loss": 17.1216, + "step": 14394 + }, + { + "epoch": 0.263129032847716, + "grad_norm": 6.656127913610376, + "learning_rate": 8.641384345335845e-06, + "loss": 17.4851, + "step": 14395 + }, + { + "epoch": 0.26314731204416253, + "grad_norm": 6.16934047646913, + "learning_rate": 8.641181487277237e-06, + "loss": 17.3669, + "step": 14396 + }, + { + "epoch": 0.26316559124060906, + "grad_norm": 7.6829119752010495, + "learning_rate": 8.640978616456604e-06, + "loss": 17.688, + "step": 14397 + }, + { + "epoch": 0.2631838704370556, + "grad_norm": 7.996643508772909, + "learning_rate": 8.640775732874655e-06, + "loss": 17.9074, + "step": 14398 + }, + { + "epoch": 0.26320214963350214, + "grad_norm": 5.867890861865463, + "learning_rate": 8.640572836532104e-06, + "loss": 17.182, + "step": 14399 + }, + { + "epoch": 0.2632204288299486, + "grad_norm": 6.574988478382767, + "learning_rate": 8.64036992742966e-06, + "loss": 17.6623, + "step": 14400 + }, + { + "epoch": 0.26323870802639515, + "grad_norm": 7.9735831120235074, + "learning_rate": 8.640167005568036e-06, + "loss": 17.795, + "step": 14401 + }, + { + "epoch": 0.2632569872228417, + "grad_norm": 6.9479004744618775, + "learning_rate": 8.639964070947944e-06, + "loss": 17.6584, + "step": 14402 + }, + { + "epoch": 0.2632752664192882, + "grad_norm": 6.30627480439822, + "learning_rate": 8.639761123570093e-06, + "loss": 17.3807, + "step": 14403 + }, + { + "epoch": 0.26329354561573476, + "grad_norm": 6.508813470371485, + "learning_rate": 8.639558163435195e-06, + "loss": 17.6887, + "step": 14404 + }, + { + "epoch": 0.26331182481218124, + "grad_norm": 6.034621266277597, + "learning_rate": 8.63935519054396e-06, + "loss": 17.3219, + "step": 14405 + }, + { + "epoch": 0.2633301040086278, + "grad_norm": 7.550417473570721, + "learning_rate": 8.639152204897103e-06, + "loss": 17.4877, + "step": 14406 + }, + { + "epoch": 0.2633483832050743, + "grad_norm": 5.8813861967069725, + "learning_rate": 8.63894920649533e-06, + "loss": 17.2511, + "step": 14407 + }, + { + "epoch": 0.26336666240152085, + "grad_norm": 6.541356247451769, + "learning_rate": 8.63874619533936e-06, + "loss": 17.7163, + "step": 14408 + }, + { + "epoch": 0.2633849415979673, + "grad_norm": 6.868842271992227, + "learning_rate": 8.638543171429898e-06, + "loss": 17.5807, + "step": 14409 + }, + { + "epoch": 0.26340322079441386, + "grad_norm": 8.45474998240171, + "learning_rate": 8.638340134767658e-06, + "loss": 17.9626, + "step": 14410 + }, + { + "epoch": 0.2634214999908604, + "grad_norm": 8.6414741204504, + "learning_rate": 8.638137085353353e-06, + "loss": 18.3836, + "step": 14411 + }, + { + "epoch": 0.26343977918730693, + "grad_norm": 8.247130410778336, + "learning_rate": 8.63793402318769e-06, + "loss": 18.1161, + "step": 14412 + }, + { + "epoch": 0.26345805838375347, + "grad_norm": 6.67902185171827, + "learning_rate": 8.637730948271388e-06, + "loss": 17.6532, + "step": 14413 + }, + { + "epoch": 0.26347633758019995, + "grad_norm": 6.451326450170129, + "learning_rate": 8.637527860605153e-06, + "loss": 17.7221, + "step": 14414 + }, + { + "epoch": 0.2634946167766465, + "grad_norm": 7.756004704377866, + "learning_rate": 8.637324760189698e-06, + "loss": 17.9408, + "step": 14415 + }, + { + "epoch": 0.263512895973093, + "grad_norm": 7.634821663966823, + "learning_rate": 8.637121647025734e-06, + "loss": 18.153, + "step": 14416 + }, + { + "epoch": 0.26353117516953956, + "grad_norm": 7.6466337157005295, + "learning_rate": 8.636918521113978e-06, + "loss": 18.325, + "step": 14417 + }, + { + "epoch": 0.2635494543659861, + "grad_norm": 6.582028684780957, + "learning_rate": 8.636715382455136e-06, + "loss": 17.5692, + "step": 14418 + }, + { + "epoch": 0.26356773356243257, + "grad_norm": 7.190494107280675, + "learning_rate": 8.636512231049921e-06, + "loss": 17.8639, + "step": 14419 + }, + { + "epoch": 0.2635860127588791, + "grad_norm": 6.38178429188656, + "learning_rate": 8.636309066899049e-06, + "loss": 17.5957, + "step": 14420 + }, + { + "epoch": 0.26360429195532564, + "grad_norm": 7.037021519255821, + "learning_rate": 8.636105890003226e-06, + "loss": 17.8983, + "step": 14421 + }, + { + "epoch": 0.2636225711517722, + "grad_norm": 7.051985242002053, + "learning_rate": 8.63590270036317e-06, + "loss": 17.7895, + "step": 14422 + }, + { + "epoch": 0.2636408503482187, + "grad_norm": 8.026578241369034, + "learning_rate": 8.635699497979589e-06, + "loss": 18.2436, + "step": 14423 + }, + { + "epoch": 0.2636591295446652, + "grad_norm": 5.57639793423407, + "learning_rate": 8.635496282853198e-06, + "loss": 17.2686, + "step": 14424 + }, + { + "epoch": 0.26367740874111173, + "grad_norm": 6.7780316664429385, + "learning_rate": 8.635293054984708e-06, + "loss": 17.6475, + "step": 14425 + }, + { + "epoch": 0.26369568793755827, + "grad_norm": 5.959951157596639, + "learning_rate": 8.63508981437483e-06, + "loss": 17.4411, + "step": 14426 + }, + { + "epoch": 0.2637139671340048, + "grad_norm": 7.292027672746734, + "learning_rate": 8.634886561024278e-06, + "loss": 17.8111, + "step": 14427 + }, + { + "epoch": 0.26373224633045134, + "grad_norm": 6.441026208286518, + "learning_rate": 8.634683294933764e-06, + "loss": 17.4282, + "step": 14428 + }, + { + "epoch": 0.2637505255268978, + "grad_norm": 8.410255753178866, + "learning_rate": 8.634480016104e-06, + "loss": 18.1782, + "step": 14429 + }, + { + "epoch": 0.26376880472334435, + "grad_norm": 6.094913465751023, + "learning_rate": 8.6342767245357e-06, + "loss": 17.4715, + "step": 14430 + }, + { + "epoch": 0.2637870839197909, + "grad_norm": 8.85569749760153, + "learning_rate": 8.634073420229576e-06, + "loss": 18.1532, + "step": 14431 + }, + { + "epoch": 0.2638053631162374, + "grad_norm": 5.934754241360311, + "learning_rate": 8.63387010318634e-06, + "loss": 17.4521, + "step": 14432 + }, + { + "epoch": 0.26382364231268396, + "grad_norm": 5.7609081418518935, + "learning_rate": 8.633666773406703e-06, + "loss": 17.3163, + "step": 14433 + }, + { + "epoch": 0.26384192150913044, + "grad_norm": 7.808406130301736, + "learning_rate": 8.63346343089138e-06, + "loss": 17.7417, + "step": 14434 + }, + { + "epoch": 0.263860200705577, + "grad_norm": 7.9984500516837596, + "learning_rate": 8.633260075641084e-06, + "loss": 17.859, + "step": 14435 + }, + { + "epoch": 0.2638784799020235, + "grad_norm": 7.958538072619891, + "learning_rate": 8.633056707656524e-06, + "loss": 17.8181, + "step": 14436 + }, + { + "epoch": 0.26389675909847005, + "grad_norm": 6.342604782252801, + "learning_rate": 8.632853326938417e-06, + "loss": 17.5564, + "step": 14437 + }, + { + "epoch": 0.2639150382949166, + "grad_norm": 6.403760931250704, + "learning_rate": 8.632649933487476e-06, + "loss": 17.3521, + "step": 14438 + }, + { + "epoch": 0.26393331749136306, + "grad_norm": 5.973544886025513, + "learning_rate": 8.63244652730441e-06, + "loss": 17.3415, + "step": 14439 + }, + { + "epoch": 0.2639515966878096, + "grad_norm": 7.24917701930819, + "learning_rate": 8.632243108389935e-06, + "loss": 17.8393, + "step": 14440 + }, + { + "epoch": 0.26396987588425613, + "grad_norm": 6.79616203564851, + "learning_rate": 8.632039676744764e-06, + "loss": 17.7178, + "step": 14441 + }, + { + "epoch": 0.26398815508070267, + "grad_norm": 5.588646667658552, + "learning_rate": 8.631836232369607e-06, + "loss": 17.0608, + "step": 14442 + }, + { + "epoch": 0.26400643427714915, + "grad_norm": 5.409842674096702, + "learning_rate": 8.63163277526518e-06, + "loss": 16.9792, + "step": 14443 + }, + { + "epoch": 0.2640247134735957, + "grad_norm": 8.08944987169309, + "learning_rate": 8.631429305432196e-06, + "loss": 18.0805, + "step": 14444 + }, + { + "epoch": 0.2640429926700422, + "grad_norm": 5.948482874138709, + "learning_rate": 8.631225822871368e-06, + "loss": 17.1899, + "step": 14445 + }, + { + "epoch": 0.26406127186648876, + "grad_norm": 6.837103304402086, + "learning_rate": 8.631022327583407e-06, + "loss": 17.5914, + "step": 14446 + }, + { + "epoch": 0.2640795510629353, + "grad_norm": 5.961297593954145, + "learning_rate": 8.630818819569028e-06, + "loss": 17.2807, + "step": 14447 + }, + { + "epoch": 0.26409783025938177, + "grad_norm": 8.932231364435959, + "learning_rate": 8.630615298828946e-06, + "loss": 18.4577, + "step": 14448 + }, + { + "epoch": 0.2641161094558283, + "grad_norm": 7.367569768686041, + "learning_rate": 8.63041176536387e-06, + "loss": 17.9257, + "step": 14449 + }, + { + "epoch": 0.26413438865227484, + "grad_norm": 7.204328541088552, + "learning_rate": 8.630208219174516e-06, + "loss": 17.8198, + "step": 14450 + }, + { + "epoch": 0.2641526678487214, + "grad_norm": 6.21378854137978, + "learning_rate": 8.630004660261598e-06, + "loss": 17.2751, + "step": 14451 + }, + { + "epoch": 0.2641709470451679, + "grad_norm": 6.513665379950189, + "learning_rate": 8.629801088625829e-06, + "loss": 17.2317, + "step": 14452 + }, + { + "epoch": 0.2641892262416144, + "grad_norm": 7.386715292027365, + "learning_rate": 8.629597504267922e-06, + "loss": 17.7965, + "step": 14453 + }, + { + "epoch": 0.26420750543806093, + "grad_norm": 7.460690865408325, + "learning_rate": 8.62939390718859e-06, + "loss": 17.8174, + "step": 14454 + }, + { + "epoch": 0.26422578463450747, + "grad_norm": 7.629898885971991, + "learning_rate": 8.629190297388548e-06, + "loss": 18.054, + "step": 14455 + }, + { + "epoch": 0.264244063830954, + "grad_norm": 8.84710712682071, + "learning_rate": 8.62898667486851e-06, + "loss": 17.9528, + "step": 14456 + }, + { + "epoch": 0.26426234302740054, + "grad_norm": 6.6735793700097235, + "learning_rate": 8.628783039629185e-06, + "loss": 17.5621, + "step": 14457 + }, + { + "epoch": 0.264280622223847, + "grad_norm": 6.3392768492060725, + "learning_rate": 8.628579391671294e-06, + "loss": 17.2754, + "step": 14458 + }, + { + "epoch": 0.26429890142029355, + "grad_norm": 6.123077425313889, + "learning_rate": 8.628375730995546e-06, + "loss": 17.5178, + "step": 14459 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 6.035249372008523, + "learning_rate": 8.628172057602655e-06, + "loss": 17.315, + "step": 14460 + }, + { + "epoch": 0.2643354598131866, + "grad_norm": 6.471941189761329, + "learning_rate": 8.627968371493337e-06, + "loss": 17.3207, + "step": 14461 + }, + { + "epoch": 0.26435373900963316, + "grad_norm": 6.960751916740965, + "learning_rate": 8.627764672668303e-06, + "loss": 17.8303, + "step": 14462 + }, + { + "epoch": 0.26437201820607964, + "grad_norm": 7.260904605695864, + "learning_rate": 8.62756096112827e-06, + "loss": 17.9214, + "step": 14463 + }, + { + "epoch": 0.2643902974025262, + "grad_norm": 7.283451103095669, + "learning_rate": 8.62735723687395e-06, + "loss": 17.9416, + "step": 14464 + }, + { + "epoch": 0.2644085765989727, + "grad_norm": 8.309014198787658, + "learning_rate": 8.627153499906058e-06, + "loss": 18.113, + "step": 14465 + }, + { + "epoch": 0.26442685579541925, + "grad_norm": 7.576112195195461, + "learning_rate": 8.626949750225309e-06, + "loss": 18.0264, + "step": 14466 + }, + { + "epoch": 0.2644451349918658, + "grad_norm": 10.809108033541476, + "learning_rate": 8.626745987832415e-06, + "loss": 18.7501, + "step": 14467 + }, + { + "epoch": 0.26446341418831226, + "grad_norm": 7.762386969248576, + "learning_rate": 8.626542212728089e-06, + "loss": 17.9793, + "step": 14468 + }, + { + "epoch": 0.2644816933847588, + "grad_norm": 6.39395296041559, + "learning_rate": 8.62633842491305e-06, + "loss": 17.3432, + "step": 14469 + }, + { + "epoch": 0.26449997258120533, + "grad_norm": 7.274541419351831, + "learning_rate": 8.626134624388008e-06, + "loss": 17.459, + "step": 14470 + }, + { + "epoch": 0.26451825177765187, + "grad_norm": 8.082208749520747, + "learning_rate": 8.625930811153679e-06, + "loss": 18.1278, + "step": 14471 + }, + { + "epoch": 0.2645365309740984, + "grad_norm": 7.935183376958134, + "learning_rate": 8.625726985210778e-06, + "loss": 17.8109, + "step": 14472 + }, + { + "epoch": 0.2645548101705449, + "grad_norm": 6.301684228265136, + "learning_rate": 8.625523146560017e-06, + "loss": 17.5847, + "step": 14473 + }, + { + "epoch": 0.2645730893669914, + "grad_norm": 7.030397305329106, + "learning_rate": 8.625319295202113e-06, + "loss": 17.9024, + "step": 14474 + }, + { + "epoch": 0.26459136856343796, + "grad_norm": 6.374695690179215, + "learning_rate": 8.625115431137779e-06, + "loss": 17.9539, + "step": 14475 + }, + { + "epoch": 0.2646096477598845, + "grad_norm": 6.083516775600971, + "learning_rate": 8.62491155436773e-06, + "loss": 17.3094, + "step": 14476 + }, + { + "epoch": 0.26462792695633097, + "grad_norm": 6.625890657782554, + "learning_rate": 8.624707664892683e-06, + "loss": 17.8002, + "step": 14477 + }, + { + "epoch": 0.2646462061527775, + "grad_norm": 5.524092202770874, + "learning_rate": 8.624503762713347e-06, + "loss": 17.1825, + "step": 14478 + }, + { + "epoch": 0.26466448534922404, + "grad_norm": 6.2277588844392024, + "learning_rate": 8.624299847830441e-06, + "loss": 17.3205, + "step": 14479 + }, + { + "epoch": 0.2646827645456706, + "grad_norm": 5.560057789845338, + "learning_rate": 8.624095920244677e-06, + "loss": 16.9911, + "step": 14480 + }, + { + "epoch": 0.2647010437421171, + "grad_norm": 7.52043293493985, + "learning_rate": 8.623891979956773e-06, + "loss": 18.0707, + "step": 14481 + }, + { + "epoch": 0.2647193229385636, + "grad_norm": 6.405890877516015, + "learning_rate": 8.623688026967443e-06, + "loss": 17.3705, + "step": 14482 + }, + { + "epoch": 0.26473760213501013, + "grad_norm": 5.585600076849905, + "learning_rate": 8.623484061277398e-06, + "loss": 17.0289, + "step": 14483 + }, + { + "epoch": 0.26475588133145667, + "grad_norm": 7.335419385052503, + "learning_rate": 8.623280082887357e-06, + "loss": 18.0216, + "step": 14484 + }, + { + "epoch": 0.2647741605279032, + "grad_norm": 6.6583291029687715, + "learning_rate": 8.623076091798036e-06, + "loss": 17.7233, + "step": 14485 + }, + { + "epoch": 0.26479243972434974, + "grad_norm": 5.613739837826254, + "learning_rate": 8.622872088010145e-06, + "loss": 17.111, + "step": 14486 + }, + { + "epoch": 0.2648107189207962, + "grad_norm": 6.1139705866781835, + "learning_rate": 8.622668071524403e-06, + "loss": 17.5578, + "step": 14487 + }, + { + "epoch": 0.26482899811724275, + "grad_norm": 8.973784823611156, + "learning_rate": 8.622464042341523e-06, + "loss": 18.1655, + "step": 14488 + }, + { + "epoch": 0.2648472773136893, + "grad_norm": 6.036535448526162, + "learning_rate": 8.622260000462222e-06, + "loss": 17.6154, + "step": 14489 + }, + { + "epoch": 0.2648655565101358, + "grad_norm": 8.662657752172933, + "learning_rate": 8.622055945887213e-06, + "loss": 17.9798, + "step": 14490 + }, + { + "epoch": 0.26488383570658236, + "grad_norm": 6.563700174415634, + "learning_rate": 8.621851878617212e-06, + "loss": 17.4027, + "step": 14491 + }, + { + "epoch": 0.26490211490302884, + "grad_norm": 7.4564540130171215, + "learning_rate": 8.621647798652935e-06, + "loss": 18.0004, + "step": 14492 + }, + { + "epoch": 0.2649203940994754, + "grad_norm": 6.749344302101685, + "learning_rate": 8.621443705995097e-06, + "loss": 17.5439, + "step": 14493 + }, + { + "epoch": 0.2649386732959219, + "grad_norm": 5.882199541576623, + "learning_rate": 8.621239600644414e-06, + "loss": 17.3339, + "step": 14494 + }, + { + "epoch": 0.26495695249236845, + "grad_norm": 6.383130482277111, + "learning_rate": 8.621035482601599e-06, + "loss": 17.7137, + "step": 14495 + }, + { + "epoch": 0.264975231688815, + "grad_norm": 7.484397129034903, + "learning_rate": 8.620831351867368e-06, + "loss": 18.2378, + "step": 14496 + }, + { + "epoch": 0.26499351088526146, + "grad_norm": 5.502308298688108, + "learning_rate": 8.620627208442439e-06, + "loss": 17.0519, + "step": 14497 + }, + { + "epoch": 0.265011790081708, + "grad_norm": 6.728858981392801, + "learning_rate": 8.620423052327525e-06, + "loss": 17.559, + "step": 14498 + }, + { + "epoch": 0.26503006927815453, + "grad_norm": 6.985670955971041, + "learning_rate": 8.620218883523342e-06, + "loss": 17.6186, + "step": 14499 + }, + { + "epoch": 0.26504834847460107, + "grad_norm": 6.758263002532076, + "learning_rate": 8.620014702030607e-06, + "loss": 17.909, + "step": 14500 + }, + { + "epoch": 0.2650666276710476, + "grad_norm": 7.034051090086027, + "learning_rate": 8.619810507850034e-06, + "loss": 18.0054, + "step": 14501 + }, + { + "epoch": 0.2650849068674941, + "grad_norm": 6.88773969867417, + "learning_rate": 8.619606300982339e-06, + "loss": 17.8061, + "step": 14502 + }, + { + "epoch": 0.2651031860639406, + "grad_norm": 7.212959710152913, + "learning_rate": 8.619402081428238e-06, + "loss": 17.6886, + "step": 14503 + }, + { + "epoch": 0.26512146526038716, + "grad_norm": 5.972178634538793, + "learning_rate": 8.619197849188447e-06, + "loss": 17.1257, + "step": 14504 + }, + { + "epoch": 0.2651397444568337, + "grad_norm": 6.370199677002604, + "learning_rate": 8.618993604263683e-06, + "loss": 17.6626, + "step": 14505 + }, + { + "epoch": 0.26515802365328023, + "grad_norm": 7.295011005090328, + "learning_rate": 8.618789346654659e-06, + "loss": 18.0519, + "step": 14506 + }, + { + "epoch": 0.2651763028497267, + "grad_norm": 6.9344097714109365, + "learning_rate": 8.618585076362092e-06, + "loss": 17.8175, + "step": 14507 + }, + { + "epoch": 0.26519458204617324, + "grad_norm": 6.1592932685006305, + "learning_rate": 8.618380793386699e-06, + "loss": 17.4501, + "step": 14508 + }, + { + "epoch": 0.2652128612426198, + "grad_norm": 5.916675937894433, + "learning_rate": 8.618176497729197e-06, + "loss": 17.4744, + "step": 14509 + }, + { + "epoch": 0.2652311404390663, + "grad_norm": 7.562135774921811, + "learning_rate": 8.617972189390297e-06, + "loss": 17.6995, + "step": 14510 + }, + { + "epoch": 0.2652494196355128, + "grad_norm": 6.258737997475496, + "learning_rate": 8.61776786837072e-06, + "loss": 17.5063, + "step": 14511 + }, + { + "epoch": 0.26526769883195933, + "grad_norm": 5.661577547064191, + "learning_rate": 8.61756353467118e-06, + "loss": 17.2176, + "step": 14512 + }, + { + "epoch": 0.26528597802840587, + "grad_norm": 7.15404900491835, + "learning_rate": 8.617359188292395e-06, + "loss": 17.4497, + "step": 14513 + }, + { + "epoch": 0.2653042572248524, + "grad_norm": 8.59396164793559, + "learning_rate": 8.61715482923508e-06, + "loss": 18.2819, + "step": 14514 + }, + { + "epoch": 0.26532253642129894, + "grad_norm": 7.052255945748163, + "learning_rate": 8.61695045749995e-06, + "loss": 17.7761, + "step": 14515 + }, + { + "epoch": 0.2653408156177454, + "grad_norm": 6.875029062853168, + "learning_rate": 8.616746073087723e-06, + "loss": 17.636, + "step": 14516 + }, + { + "epoch": 0.26535909481419195, + "grad_norm": 6.914797687086098, + "learning_rate": 8.616541675999114e-06, + "loss": 17.8272, + "step": 14517 + }, + { + "epoch": 0.2653773740106385, + "grad_norm": 7.397885008124451, + "learning_rate": 8.616337266234841e-06, + "loss": 18.0082, + "step": 14518 + }, + { + "epoch": 0.265395653207085, + "grad_norm": 7.338120618208661, + "learning_rate": 8.616132843795619e-06, + "loss": 17.4697, + "step": 14519 + }, + { + "epoch": 0.26541393240353156, + "grad_norm": 6.894638124573064, + "learning_rate": 8.615928408682167e-06, + "loss": 17.6686, + "step": 14520 + }, + { + "epoch": 0.26543221159997804, + "grad_norm": 6.8820780687649785, + "learning_rate": 8.615723960895198e-06, + "loss": 17.5165, + "step": 14521 + }, + { + "epoch": 0.2654504907964246, + "grad_norm": 9.896950902426656, + "learning_rate": 8.615519500435432e-06, + "loss": 17.7061, + "step": 14522 + }, + { + "epoch": 0.2654687699928711, + "grad_norm": 6.559777394456265, + "learning_rate": 8.615315027303582e-06, + "loss": 17.9128, + "step": 14523 + }, + { + "epoch": 0.26548704918931765, + "grad_norm": 6.956146471135961, + "learning_rate": 8.615110541500366e-06, + "loss": 17.4917, + "step": 14524 + }, + { + "epoch": 0.2655053283857642, + "grad_norm": 7.823582414680977, + "learning_rate": 8.614906043026503e-06, + "loss": 18.1326, + "step": 14525 + }, + { + "epoch": 0.26552360758221066, + "grad_norm": 7.476708248993483, + "learning_rate": 8.614701531882708e-06, + "loss": 17.8015, + "step": 14526 + }, + { + "epoch": 0.2655418867786572, + "grad_norm": 7.054524244592589, + "learning_rate": 8.614497008069697e-06, + "loss": 17.6633, + "step": 14527 + }, + { + "epoch": 0.26556016597510373, + "grad_norm": 6.199399833770583, + "learning_rate": 8.614292471588188e-06, + "loss": 17.1404, + "step": 14528 + }, + { + "epoch": 0.26557844517155027, + "grad_norm": 5.429267886410322, + "learning_rate": 8.614087922438899e-06, + "loss": 16.9066, + "step": 14529 + }, + { + "epoch": 0.2655967243679968, + "grad_norm": 6.745613806224077, + "learning_rate": 8.613883360622543e-06, + "loss": 17.7029, + "step": 14530 + }, + { + "epoch": 0.2656150035644433, + "grad_norm": 7.164765055197569, + "learning_rate": 8.61367878613984e-06, + "loss": 17.398, + "step": 14531 + }, + { + "epoch": 0.2656332827608898, + "grad_norm": 7.451254490299786, + "learning_rate": 8.613474198991508e-06, + "loss": 17.8344, + "step": 14532 + }, + { + "epoch": 0.26565156195733636, + "grad_norm": 6.210960468432399, + "learning_rate": 8.61326959917826e-06, + "loss": 17.331, + "step": 14533 + }, + { + "epoch": 0.2656698411537829, + "grad_norm": 6.761316470227943, + "learning_rate": 8.613064986700817e-06, + "loss": 17.7626, + "step": 14534 + }, + { + "epoch": 0.26568812035022943, + "grad_norm": 7.566935423976791, + "learning_rate": 8.612860361559895e-06, + "loss": 17.8178, + "step": 14535 + }, + { + "epoch": 0.2657063995466759, + "grad_norm": 8.833309756272051, + "learning_rate": 8.61265572375621e-06, + "loss": 18.8689, + "step": 14536 + }, + { + "epoch": 0.26572467874312244, + "grad_norm": 6.8978068956033365, + "learning_rate": 8.612451073290481e-06, + "loss": 17.5856, + "step": 14537 + }, + { + "epoch": 0.265742957939569, + "grad_norm": 6.444851151410214, + "learning_rate": 8.612246410163426e-06, + "loss": 17.4258, + "step": 14538 + }, + { + "epoch": 0.2657612371360155, + "grad_norm": 6.46776566739074, + "learning_rate": 8.612041734375758e-06, + "loss": 17.569, + "step": 14539 + }, + { + "epoch": 0.26577951633246205, + "grad_norm": 6.977395888144881, + "learning_rate": 8.611837045928199e-06, + "loss": 17.8813, + "step": 14540 + }, + { + "epoch": 0.26579779552890853, + "grad_norm": 6.410358467372472, + "learning_rate": 8.611632344821463e-06, + "loss": 17.4866, + "step": 14541 + }, + { + "epoch": 0.26581607472535507, + "grad_norm": 7.950135684060448, + "learning_rate": 8.61142763105627e-06, + "loss": 18.0721, + "step": 14542 + }, + { + "epoch": 0.2658343539218016, + "grad_norm": 6.61809402317436, + "learning_rate": 8.611222904633336e-06, + "loss": 17.6161, + "step": 14543 + }, + { + "epoch": 0.26585263311824814, + "grad_norm": 7.336321824901097, + "learning_rate": 8.61101816555338e-06, + "loss": 17.8365, + "step": 14544 + }, + { + "epoch": 0.2658709123146946, + "grad_norm": 6.737137848233468, + "learning_rate": 8.61081341381712e-06, + "loss": 17.5722, + "step": 14545 + }, + { + "epoch": 0.26588919151114115, + "grad_norm": 7.192786767385998, + "learning_rate": 8.610608649425269e-06, + "loss": 17.6626, + "step": 14546 + }, + { + "epoch": 0.2659074707075877, + "grad_norm": 6.510261348040814, + "learning_rate": 8.61040387237855e-06, + "loss": 17.5307, + "step": 14547 + }, + { + "epoch": 0.2659257499040342, + "grad_norm": 6.578354409706376, + "learning_rate": 8.610199082677676e-06, + "loss": 17.2704, + "step": 14548 + }, + { + "epoch": 0.26594402910048076, + "grad_norm": 6.025007721595394, + "learning_rate": 8.609994280323371e-06, + "loss": 17.1077, + "step": 14549 + }, + { + "epoch": 0.26596230829692724, + "grad_norm": 5.938124798069925, + "learning_rate": 8.609789465316349e-06, + "loss": 17.3366, + "step": 14550 + }, + { + "epoch": 0.2659805874933738, + "grad_norm": 7.9793418290082325, + "learning_rate": 8.609584637657327e-06, + "loss": 17.2621, + "step": 14551 + }, + { + "epoch": 0.2659988666898203, + "grad_norm": 7.599806684319761, + "learning_rate": 8.609379797347025e-06, + "loss": 18.1463, + "step": 14552 + }, + { + "epoch": 0.26601714588626685, + "grad_norm": 6.192626430350422, + "learning_rate": 8.609174944386158e-06, + "loss": 17.5365, + "step": 14553 + }, + { + "epoch": 0.2660354250827134, + "grad_norm": 7.024956925027557, + "learning_rate": 8.608970078775448e-06, + "loss": 17.8295, + "step": 14554 + }, + { + "epoch": 0.26605370427915986, + "grad_norm": 7.1205064808478475, + "learning_rate": 8.60876520051561e-06, + "loss": 17.9813, + "step": 14555 + }, + { + "epoch": 0.2660719834756064, + "grad_norm": 6.742292643754117, + "learning_rate": 8.608560309607364e-06, + "loss": 17.7733, + "step": 14556 + }, + { + "epoch": 0.26609026267205294, + "grad_norm": 6.602844337637779, + "learning_rate": 8.608355406051426e-06, + "loss": 17.6018, + "step": 14557 + }, + { + "epoch": 0.26610854186849947, + "grad_norm": 6.674092450200186, + "learning_rate": 8.608150489848516e-06, + "loss": 17.5333, + "step": 14558 + }, + { + "epoch": 0.266126821064946, + "grad_norm": 6.330228946805338, + "learning_rate": 8.607945560999351e-06, + "loss": 17.2829, + "step": 14559 + }, + { + "epoch": 0.2661451002613925, + "grad_norm": 5.141724277165614, + "learning_rate": 8.607740619504651e-06, + "loss": 16.9891, + "step": 14560 + }, + { + "epoch": 0.266163379457839, + "grad_norm": 6.274518320128126, + "learning_rate": 8.607535665365133e-06, + "loss": 17.0762, + "step": 14561 + }, + { + "epoch": 0.26618165865428556, + "grad_norm": 7.271786139082388, + "learning_rate": 8.607330698581516e-06, + "loss": 17.8042, + "step": 14562 + }, + { + "epoch": 0.2661999378507321, + "grad_norm": 6.7092664226247525, + "learning_rate": 8.607125719154517e-06, + "loss": 17.6421, + "step": 14563 + }, + { + "epoch": 0.26621821704717863, + "grad_norm": 7.780958231915097, + "learning_rate": 8.606920727084856e-06, + "loss": 17.9761, + "step": 14564 + }, + { + "epoch": 0.2662364962436251, + "grad_norm": 10.250632342629785, + "learning_rate": 8.60671572237325e-06, + "loss": 18.1369, + "step": 14565 + }, + { + "epoch": 0.26625477544007164, + "grad_norm": 5.938952848297895, + "learning_rate": 8.606510705020418e-06, + "loss": 17.3673, + "step": 14566 + }, + { + "epoch": 0.2662730546365182, + "grad_norm": 6.644777949786896, + "learning_rate": 8.606305675027081e-06, + "loss": 17.4135, + "step": 14567 + }, + { + "epoch": 0.2662913338329647, + "grad_norm": 6.9680846421093525, + "learning_rate": 8.606100632393955e-06, + "loss": 17.6119, + "step": 14568 + }, + { + "epoch": 0.26630961302941125, + "grad_norm": 7.228445164925551, + "learning_rate": 8.605895577121756e-06, + "loss": 17.6872, + "step": 14569 + }, + { + "epoch": 0.26632789222585773, + "grad_norm": 6.793796231481781, + "learning_rate": 8.60569050921121e-06, + "loss": 17.6463, + "step": 14570 + }, + { + "epoch": 0.26634617142230427, + "grad_norm": 8.184261291908337, + "learning_rate": 8.60548542866303e-06, + "loss": 18.0862, + "step": 14571 + }, + { + "epoch": 0.2663644506187508, + "grad_norm": 6.669093698274262, + "learning_rate": 8.605280335477937e-06, + "loss": 17.3625, + "step": 14572 + }, + { + "epoch": 0.26638272981519734, + "grad_norm": 8.022540232457578, + "learning_rate": 8.60507522965665e-06, + "loss": 17.8668, + "step": 14573 + }, + { + "epoch": 0.2664010090116439, + "grad_norm": 6.359887614228244, + "learning_rate": 8.604870111199884e-06, + "loss": 17.3532, + "step": 14574 + }, + { + "epoch": 0.26641928820809035, + "grad_norm": 6.00556214434223, + "learning_rate": 8.604664980108363e-06, + "loss": 17.2011, + "step": 14575 + }, + { + "epoch": 0.2664375674045369, + "grad_norm": 8.240097576013113, + "learning_rate": 8.604459836382805e-06, + "loss": 18.0064, + "step": 14576 + }, + { + "epoch": 0.2664558466009834, + "grad_norm": 7.621136000245503, + "learning_rate": 8.604254680023926e-06, + "loss": 18.0655, + "step": 14577 + }, + { + "epoch": 0.26647412579742996, + "grad_norm": 7.241932252630637, + "learning_rate": 8.604049511032448e-06, + "loss": 17.7946, + "step": 14578 + }, + { + "epoch": 0.26649240499387644, + "grad_norm": 7.38691990934202, + "learning_rate": 8.60384432940909e-06, + "loss": 18.3702, + "step": 14579 + }, + { + "epoch": 0.266510684190323, + "grad_norm": 5.840428221561425, + "learning_rate": 8.603639135154571e-06, + "loss": 17.2816, + "step": 14580 + }, + { + "epoch": 0.2665289633867695, + "grad_norm": 5.866197550155619, + "learning_rate": 8.603433928269607e-06, + "loss": 17.1281, + "step": 14581 + }, + { + "epoch": 0.26654724258321605, + "grad_norm": 5.739027440440411, + "learning_rate": 8.603228708754923e-06, + "loss": 17.2506, + "step": 14582 + }, + { + "epoch": 0.2665655217796626, + "grad_norm": 7.235600044854483, + "learning_rate": 8.603023476611231e-06, + "loss": 18.0437, + "step": 14583 + }, + { + "epoch": 0.26658380097610906, + "grad_norm": 5.410088168252828, + "learning_rate": 8.602818231839258e-06, + "loss": 17.0568, + "step": 14584 + }, + { + "epoch": 0.2666020801725556, + "grad_norm": 5.259849600457909, + "learning_rate": 8.602612974439719e-06, + "loss": 17.0523, + "step": 14585 + }, + { + "epoch": 0.26662035936900214, + "grad_norm": 7.771111353775161, + "learning_rate": 8.602407704413333e-06, + "loss": 18.318, + "step": 14586 + }, + { + "epoch": 0.26663863856544867, + "grad_norm": 6.016256629883184, + "learning_rate": 8.602202421760821e-06, + "loss": 17.3069, + "step": 14587 + }, + { + "epoch": 0.2666569177618952, + "grad_norm": 8.535257209161756, + "learning_rate": 8.601997126482902e-06, + "loss": 18.725, + "step": 14588 + }, + { + "epoch": 0.2666751969583417, + "grad_norm": 6.675129458470847, + "learning_rate": 8.601791818580296e-06, + "loss": 17.8127, + "step": 14589 + }, + { + "epoch": 0.2666934761547882, + "grad_norm": 7.876609683187753, + "learning_rate": 8.60158649805372e-06, + "loss": 18.1652, + "step": 14590 + }, + { + "epoch": 0.26671175535123476, + "grad_norm": 17.921230096264836, + "learning_rate": 8.6013811649039e-06, + "loss": 17.9632, + "step": 14591 + }, + { + "epoch": 0.2667300345476813, + "grad_norm": 6.598397901157439, + "learning_rate": 8.601175819131548e-06, + "loss": 17.505, + "step": 14592 + }, + { + "epoch": 0.26674831374412783, + "grad_norm": 7.095345991456853, + "learning_rate": 8.60097046073739e-06, + "loss": 18.1829, + "step": 14593 + }, + { + "epoch": 0.2667665929405743, + "grad_norm": 9.159927122606996, + "learning_rate": 8.600765089722141e-06, + "loss": 17.9415, + "step": 14594 + }, + { + "epoch": 0.26678487213702085, + "grad_norm": 6.433372609963939, + "learning_rate": 8.600559706086524e-06, + "loss": 17.3582, + "step": 14595 + }, + { + "epoch": 0.2668031513334674, + "grad_norm": 6.477033924801939, + "learning_rate": 8.600354309831256e-06, + "loss": 17.3693, + "step": 14596 + }, + { + "epoch": 0.2668214305299139, + "grad_norm": 7.83575756565574, + "learning_rate": 8.60014890095706e-06, + "loss": 18.4398, + "step": 14597 + }, + { + "epoch": 0.26683970972636045, + "grad_norm": 7.317859995714428, + "learning_rate": 8.599943479464655e-06, + "loss": 17.9391, + "step": 14598 + }, + { + "epoch": 0.26685798892280693, + "grad_norm": 6.995436993292523, + "learning_rate": 8.59973804535476e-06, + "loss": 17.8577, + "step": 14599 + }, + { + "epoch": 0.26687626811925347, + "grad_norm": 5.771502054950156, + "learning_rate": 8.599532598628095e-06, + "loss": 17.3081, + "step": 14600 + }, + { + "epoch": 0.2668945473157, + "grad_norm": 6.9322850295424105, + "learning_rate": 8.599327139285381e-06, + "loss": 17.506, + "step": 14601 + }, + { + "epoch": 0.26691282651214654, + "grad_norm": 7.608625176015776, + "learning_rate": 8.599121667327336e-06, + "loss": 17.9819, + "step": 14602 + }, + { + "epoch": 0.2669311057085931, + "grad_norm": 6.300309634688053, + "learning_rate": 8.598916182754684e-06, + "loss": 17.4234, + "step": 14603 + }, + { + "epoch": 0.26694938490503956, + "grad_norm": 6.457807239142948, + "learning_rate": 8.598710685568144e-06, + "loss": 17.3832, + "step": 14604 + }, + { + "epoch": 0.2669676641014861, + "grad_norm": 6.317312787942724, + "learning_rate": 8.598505175768433e-06, + "loss": 17.5229, + "step": 14605 + }, + { + "epoch": 0.2669859432979326, + "grad_norm": 6.8525647886978245, + "learning_rate": 8.598299653356275e-06, + "loss": 17.914, + "step": 14606 + }, + { + "epoch": 0.26700422249437916, + "grad_norm": 6.08648184309682, + "learning_rate": 8.598094118332387e-06, + "loss": 17.3699, + "step": 14607 + }, + { + "epoch": 0.2670225016908257, + "grad_norm": 7.168418025498164, + "learning_rate": 8.597888570697493e-06, + "loss": 17.8875, + "step": 14608 + }, + { + "epoch": 0.2670407808872722, + "grad_norm": 6.978072260586743, + "learning_rate": 8.597683010452313e-06, + "loss": 17.8669, + "step": 14609 + }, + { + "epoch": 0.2670590600837187, + "grad_norm": 5.68928948623838, + "learning_rate": 8.597477437597564e-06, + "loss": 17.4055, + "step": 14610 + }, + { + "epoch": 0.26707733928016525, + "grad_norm": 6.7831148775858034, + "learning_rate": 8.59727185213397e-06, + "loss": 17.8555, + "step": 14611 + }, + { + "epoch": 0.2670956184766118, + "grad_norm": 6.9694905142115395, + "learning_rate": 8.59706625406225e-06, + "loss": 17.8434, + "step": 14612 + }, + { + "epoch": 0.26711389767305826, + "grad_norm": 6.878696532269876, + "learning_rate": 8.596860643383124e-06, + "loss": 17.6899, + "step": 14613 + }, + { + "epoch": 0.2671321768695048, + "grad_norm": 5.971757207828341, + "learning_rate": 8.596655020097314e-06, + "loss": 17.3628, + "step": 14614 + }, + { + "epoch": 0.26715045606595134, + "grad_norm": 5.842889536628196, + "learning_rate": 8.596449384205541e-06, + "loss": 17.2973, + "step": 14615 + }, + { + "epoch": 0.26716873526239787, + "grad_norm": 7.238832715010462, + "learning_rate": 8.596243735708525e-06, + "loss": 17.9668, + "step": 14616 + }, + { + "epoch": 0.2671870144588444, + "grad_norm": 7.1924676079170276, + "learning_rate": 8.596038074606987e-06, + "loss": 17.6888, + "step": 14617 + }, + { + "epoch": 0.2672052936552909, + "grad_norm": 6.889844749206356, + "learning_rate": 8.595832400901645e-06, + "loss": 17.5642, + "step": 14618 + }, + { + "epoch": 0.2672235728517374, + "grad_norm": 7.144385758465032, + "learning_rate": 8.595626714593226e-06, + "loss": 17.5755, + "step": 14619 + }, + { + "epoch": 0.26724185204818396, + "grad_norm": 7.062425882232001, + "learning_rate": 8.595421015682446e-06, + "loss": 17.8704, + "step": 14620 + }, + { + "epoch": 0.2672601312446305, + "grad_norm": 6.977461440582134, + "learning_rate": 8.595215304170026e-06, + "loss": 17.7499, + "step": 14621 + }, + { + "epoch": 0.26727841044107703, + "grad_norm": 7.886577020516829, + "learning_rate": 8.59500958005669e-06, + "loss": 17.8037, + "step": 14622 + }, + { + "epoch": 0.2672966896375235, + "grad_norm": 6.188005188647903, + "learning_rate": 8.594803843343158e-06, + "loss": 17.3182, + "step": 14623 + }, + { + "epoch": 0.26731496883397005, + "grad_norm": 7.089846529298709, + "learning_rate": 8.59459809403015e-06, + "loss": 17.7172, + "step": 14624 + }, + { + "epoch": 0.2673332480304166, + "grad_norm": 8.26504148083267, + "learning_rate": 8.594392332118385e-06, + "loss": 18.5234, + "step": 14625 + }, + { + "epoch": 0.2673515272268631, + "grad_norm": 6.886041654859538, + "learning_rate": 8.594186557608587e-06, + "loss": 17.6315, + "step": 14626 + }, + { + "epoch": 0.26736980642330965, + "grad_norm": 8.288044682399347, + "learning_rate": 8.59398077050148e-06, + "loss": 17.97, + "step": 14627 + }, + { + "epoch": 0.26738808561975613, + "grad_norm": 7.169456483789491, + "learning_rate": 8.59377497079778e-06, + "loss": 18.0894, + "step": 14628 + }, + { + "epoch": 0.26740636481620267, + "grad_norm": 5.904160673266783, + "learning_rate": 8.593569158498213e-06, + "loss": 17.3124, + "step": 14629 + }, + { + "epoch": 0.2674246440126492, + "grad_norm": 6.192390315062664, + "learning_rate": 8.593363333603498e-06, + "loss": 17.4309, + "step": 14630 + }, + { + "epoch": 0.26744292320909574, + "grad_norm": 6.815067021765786, + "learning_rate": 8.593157496114353e-06, + "loss": 17.8669, + "step": 14631 + }, + { + "epoch": 0.2674612024055423, + "grad_norm": 9.663353977931491, + "learning_rate": 8.592951646031504e-06, + "loss": 18.0593, + "step": 14632 + }, + { + "epoch": 0.26747948160198876, + "grad_norm": 7.027431175448057, + "learning_rate": 8.592745783355673e-06, + "loss": 17.511, + "step": 14633 + }, + { + "epoch": 0.2674977607984353, + "grad_norm": 7.165705104412249, + "learning_rate": 8.592539908087578e-06, + "loss": 17.9487, + "step": 14634 + }, + { + "epoch": 0.2675160399948818, + "grad_norm": 7.1448749636832725, + "learning_rate": 8.592334020227943e-06, + "loss": 17.998, + "step": 14635 + }, + { + "epoch": 0.26753431919132836, + "grad_norm": 7.8146278493319, + "learning_rate": 8.59212811977749e-06, + "loss": 17.9695, + "step": 14636 + }, + { + "epoch": 0.2675525983877749, + "grad_norm": 5.7202952675042384, + "learning_rate": 8.591922206736937e-06, + "loss": 17.4506, + "step": 14637 + }, + { + "epoch": 0.2675708775842214, + "grad_norm": 6.279828032073376, + "learning_rate": 8.591716281107013e-06, + "loss": 17.7869, + "step": 14638 + }, + { + "epoch": 0.2675891567806679, + "grad_norm": 5.402484171696131, + "learning_rate": 8.59151034288843e-06, + "loss": 17.0872, + "step": 14639 + }, + { + "epoch": 0.26760743597711445, + "grad_norm": 6.172026518418237, + "learning_rate": 8.591304392081917e-06, + "loss": 17.471, + "step": 14640 + }, + { + "epoch": 0.267625715173561, + "grad_norm": 6.823439932628753, + "learning_rate": 8.591098428688194e-06, + "loss": 18.1081, + "step": 14641 + }, + { + "epoch": 0.2676439943700075, + "grad_norm": 6.5398142477364924, + "learning_rate": 8.590892452707983e-06, + "loss": 17.6611, + "step": 14642 + }, + { + "epoch": 0.267662273566454, + "grad_norm": 5.766165614977785, + "learning_rate": 8.590686464142006e-06, + "loss": 17.4685, + "step": 14643 + }, + { + "epoch": 0.26768055276290054, + "grad_norm": 6.625144022004418, + "learning_rate": 8.590480462990983e-06, + "loss": 18.028, + "step": 14644 + }, + { + "epoch": 0.26769883195934707, + "grad_norm": 6.958001094920115, + "learning_rate": 8.590274449255638e-06, + "loss": 17.6994, + "step": 14645 + }, + { + "epoch": 0.2677171111557936, + "grad_norm": 6.206127260729637, + "learning_rate": 8.590068422936693e-06, + "loss": 17.5663, + "step": 14646 + }, + { + "epoch": 0.2677353903522401, + "grad_norm": 5.524311365369359, + "learning_rate": 8.589862384034869e-06, + "loss": 17.2548, + "step": 14647 + }, + { + "epoch": 0.2677536695486866, + "grad_norm": 6.794287230566698, + "learning_rate": 8.589656332550888e-06, + "loss": 17.6116, + "step": 14648 + }, + { + "epoch": 0.26777194874513316, + "grad_norm": 6.011601703938022, + "learning_rate": 8.589450268485475e-06, + "loss": 17.4124, + "step": 14649 + }, + { + "epoch": 0.2677902279415797, + "grad_norm": 7.3348265994159, + "learning_rate": 8.58924419183935e-06, + "loss": 18.0039, + "step": 14650 + }, + { + "epoch": 0.26780850713802623, + "grad_norm": 6.469191733093986, + "learning_rate": 8.589038102613235e-06, + "loss": 17.4543, + "step": 14651 + }, + { + "epoch": 0.2678267863344727, + "grad_norm": 6.734930912931436, + "learning_rate": 8.588832000807852e-06, + "loss": 18.0038, + "step": 14652 + }, + { + "epoch": 0.26784506553091925, + "grad_norm": 6.841974117358319, + "learning_rate": 8.588625886423926e-06, + "loss": 17.8994, + "step": 14653 + }, + { + "epoch": 0.2678633447273658, + "grad_norm": 6.354695599013013, + "learning_rate": 8.588419759462176e-06, + "loss": 17.1604, + "step": 14654 + }, + { + "epoch": 0.2678816239238123, + "grad_norm": 6.437731998455454, + "learning_rate": 8.588213619923328e-06, + "loss": 17.7682, + "step": 14655 + }, + { + "epoch": 0.26789990312025885, + "grad_norm": 6.279993746879131, + "learning_rate": 8.588007467808101e-06, + "loss": 17.2485, + "step": 14656 + }, + { + "epoch": 0.26791818231670533, + "grad_norm": 8.061355332527341, + "learning_rate": 8.58780130311722e-06, + "loss": 18.0273, + "step": 14657 + }, + { + "epoch": 0.26793646151315187, + "grad_norm": 5.96166967807284, + "learning_rate": 8.587595125851407e-06, + "loss": 17.2501, + "step": 14658 + }, + { + "epoch": 0.2679547407095984, + "grad_norm": 7.3753159021860055, + "learning_rate": 8.587388936011386e-06, + "loss": 18.1227, + "step": 14659 + }, + { + "epoch": 0.26797301990604494, + "grad_norm": 6.456202514061895, + "learning_rate": 8.587182733597874e-06, + "loss": 17.478, + "step": 14660 + }, + { + "epoch": 0.2679912991024915, + "grad_norm": 6.459535256272963, + "learning_rate": 8.5869765186116e-06, + "loss": 17.5381, + "step": 14661 + }, + { + "epoch": 0.26800957829893796, + "grad_norm": 5.474720377856238, + "learning_rate": 8.586770291053286e-06, + "loss": 17.1773, + "step": 14662 + }, + { + "epoch": 0.2680278574953845, + "grad_norm": 6.648757690869516, + "learning_rate": 8.586564050923651e-06, + "loss": 17.7136, + "step": 14663 + }, + { + "epoch": 0.268046136691831, + "grad_norm": 6.966701301159261, + "learning_rate": 8.586357798223421e-06, + "loss": 17.5491, + "step": 14664 + }, + { + "epoch": 0.26806441588827756, + "grad_norm": 8.349272246970072, + "learning_rate": 8.586151532953319e-06, + "loss": 18.0761, + "step": 14665 + }, + { + "epoch": 0.2680826950847241, + "grad_norm": 8.472676963353326, + "learning_rate": 8.585945255114065e-06, + "loss": 18.4994, + "step": 14666 + }, + { + "epoch": 0.2681009742811706, + "grad_norm": 6.414726136351401, + "learning_rate": 8.585738964706387e-06, + "loss": 17.297, + "step": 14667 + }, + { + "epoch": 0.2681192534776171, + "grad_norm": 6.668931944589009, + "learning_rate": 8.585532661731002e-06, + "loss": 17.6723, + "step": 14668 + }, + { + "epoch": 0.26813753267406365, + "grad_norm": 6.954940343528749, + "learning_rate": 8.585326346188639e-06, + "loss": 17.718, + "step": 14669 + }, + { + "epoch": 0.2681558118705102, + "grad_norm": 5.9340071981898035, + "learning_rate": 8.585120018080016e-06, + "loss": 17.4597, + "step": 14670 + }, + { + "epoch": 0.2681740910669567, + "grad_norm": 7.326870791647553, + "learning_rate": 8.58491367740586e-06, + "loss": 18.0068, + "step": 14671 + }, + { + "epoch": 0.2681923702634032, + "grad_norm": 7.827022488528485, + "learning_rate": 8.584707324166892e-06, + "loss": 17.7907, + "step": 14672 + }, + { + "epoch": 0.26821064945984974, + "grad_norm": 10.011623012769867, + "learning_rate": 8.584500958363835e-06, + "loss": 17.5316, + "step": 14673 + }, + { + "epoch": 0.2682289286562963, + "grad_norm": 6.798564723673695, + "learning_rate": 8.584294579997414e-06, + "loss": 17.4025, + "step": 14674 + }, + { + "epoch": 0.2682472078527428, + "grad_norm": 7.56727091434625, + "learning_rate": 8.584088189068352e-06, + "loss": 17.8541, + "step": 14675 + }, + { + "epoch": 0.26826548704918934, + "grad_norm": 7.037791027692974, + "learning_rate": 8.583881785577372e-06, + "loss": 17.7043, + "step": 14676 + }, + { + "epoch": 0.2682837662456358, + "grad_norm": 7.1995554718453185, + "learning_rate": 8.583675369525196e-06, + "loss": 17.6086, + "step": 14677 + }, + { + "epoch": 0.26830204544208236, + "grad_norm": 7.771830931547811, + "learning_rate": 8.583468940912549e-06, + "loss": 18.2094, + "step": 14678 + }, + { + "epoch": 0.2683203246385289, + "grad_norm": 6.473819895791835, + "learning_rate": 8.583262499740156e-06, + "loss": 17.6683, + "step": 14679 + }, + { + "epoch": 0.26833860383497543, + "grad_norm": 5.336545610883745, + "learning_rate": 8.583056046008738e-06, + "loss": 17.047, + "step": 14680 + }, + { + "epoch": 0.2683568830314219, + "grad_norm": 6.476933158971933, + "learning_rate": 8.58284957971902e-06, + "loss": 17.7432, + "step": 14681 + }, + { + "epoch": 0.26837516222786845, + "grad_norm": 7.194844716455216, + "learning_rate": 8.582643100871723e-06, + "loss": 18.0126, + "step": 14682 + }, + { + "epoch": 0.268393441424315, + "grad_norm": 9.837567698985303, + "learning_rate": 8.582436609467574e-06, + "loss": 18.3976, + "step": 14683 + }, + { + "epoch": 0.2684117206207615, + "grad_norm": 6.302737098745762, + "learning_rate": 8.582230105507297e-06, + "loss": 17.3444, + "step": 14684 + }, + { + "epoch": 0.26842999981720805, + "grad_norm": 6.031561994737572, + "learning_rate": 8.582023588991612e-06, + "loss": 17.243, + "step": 14685 + }, + { + "epoch": 0.26844827901365453, + "grad_norm": 6.7063610725854135, + "learning_rate": 8.581817059921246e-06, + "loss": 17.7287, + "step": 14686 + }, + { + "epoch": 0.26846655821010107, + "grad_norm": 5.076633321812692, + "learning_rate": 8.581610518296923e-06, + "loss": 17.0169, + "step": 14687 + }, + { + "epoch": 0.2684848374065476, + "grad_norm": 7.5560594426511285, + "learning_rate": 8.581403964119366e-06, + "loss": 18.364, + "step": 14688 + }, + { + "epoch": 0.26850311660299414, + "grad_norm": 6.224973921589403, + "learning_rate": 8.581197397389299e-06, + "loss": 17.4573, + "step": 14689 + }, + { + "epoch": 0.2685213957994407, + "grad_norm": 7.531539240589316, + "learning_rate": 8.580990818107444e-06, + "loss": 18.3858, + "step": 14690 + }, + { + "epoch": 0.26853967499588716, + "grad_norm": 8.32563910370348, + "learning_rate": 8.580784226274527e-06, + "loss": 18.243, + "step": 14691 + }, + { + "epoch": 0.2685579541923337, + "grad_norm": 6.877877813170907, + "learning_rate": 8.580577621891274e-06, + "loss": 17.7593, + "step": 14692 + }, + { + "epoch": 0.26857623338878023, + "grad_norm": 5.580909439182357, + "learning_rate": 8.580371004958406e-06, + "loss": 17.2123, + "step": 14693 + }, + { + "epoch": 0.26859451258522676, + "grad_norm": 7.284307503234026, + "learning_rate": 8.580164375476649e-06, + "loss": 17.8056, + "step": 14694 + }, + { + "epoch": 0.2686127917816733, + "grad_norm": 6.045374886150081, + "learning_rate": 8.579957733446725e-06, + "loss": 17.376, + "step": 14695 + }, + { + "epoch": 0.2686310709781198, + "grad_norm": 5.784900666939494, + "learning_rate": 8.57975107886936e-06, + "loss": 16.9846, + "step": 14696 + }, + { + "epoch": 0.2686493501745663, + "grad_norm": 5.744827589367595, + "learning_rate": 8.579544411745279e-06, + "loss": 17.1729, + "step": 14697 + }, + { + "epoch": 0.26866762937101285, + "grad_norm": 6.333080325574875, + "learning_rate": 8.579337732075207e-06, + "loss": 17.565, + "step": 14698 + }, + { + "epoch": 0.2686859085674594, + "grad_norm": 7.097610393011616, + "learning_rate": 8.579131039859865e-06, + "loss": 18.1011, + "step": 14699 + }, + { + "epoch": 0.2687041877639059, + "grad_norm": 6.688524342749437, + "learning_rate": 8.578924335099979e-06, + "loss": 17.5679, + "step": 14700 + }, + { + "epoch": 0.2687224669603524, + "grad_norm": 7.050501574071304, + "learning_rate": 8.578717617796275e-06, + "loss": 17.8998, + "step": 14701 + }, + { + "epoch": 0.26874074615679894, + "grad_norm": 6.523816221993048, + "learning_rate": 8.578510887949475e-06, + "loss": 17.5661, + "step": 14702 + }, + { + "epoch": 0.2687590253532455, + "grad_norm": 7.428933579013749, + "learning_rate": 8.578304145560306e-06, + "loss": 17.7928, + "step": 14703 + }, + { + "epoch": 0.268777304549692, + "grad_norm": 6.858388721237472, + "learning_rate": 8.578097390629491e-06, + "loss": 17.6887, + "step": 14704 + }, + { + "epoch": 0.26879558374613854, + "grad_norm": 6.412739863494405, + "learning_rate": 8.577890623157754e-06, + "loss": 17.45, + "step": 14705 + }, + { + "epoch": 0.268813862942585, + "grad_norm": 6.470617535701904, + "learning_rate": 8.577683843145823e-06, + "loss": 17.5752, + "step": 14706 + }, + { + "epoch": 0.26883214213903156, + "grad_norm": 6.746735318473387, + "learning_rate": 8.577477050594419e-06, + "loss": 17.7946, + "step": 14707 + }, + { + "epoch": 0.2688504213354781, + "grad_norm": 5.971641974763012, + "learning_rate": 8.57727024550427e-06, + "loss": 17.4674, + "step": 14708 + }, + { + "epoch": 0.26886870053192463, + "grad_norm": 6.175575672423403, + "learning_rate": 8.577063427876096e-06, + "loss": 17.384, + "step": 14709 + }, + { + "epoch": 0.26888697972837117, + "grad_norm": 7.383734868781345, + "learning_rate": 8.576856597710628e-06, + "loss": 17.8635, + "step": 14710 + }, + { + "epoch": 0.26890525892481765, + "grad_norm": 12.295733493486797, + "learning_rate": 8.576649755008587e-06, + "loss": 18.6965, + "step": 14711 + }, + { + "epoch": 0.2689235381212642, + "grad_norm": 6.466883236775337, + "learning_rate": 8.576442899770699e-06, + "loss": 17.5949, + "step": 14712 + }, + { + "epoch": 0.2689418173177107, + "grad_norm": 7.800539756039745, + "learning_rate": 8.576236031997689e-06, + "loss": 17.9315, + "step": 14713 + }, + { + "epoch": 0.26896009651415725, + "grad_norm": 6.894007335467035, + "learning_rate": 8.576029151690282e-06, + "loss": 17.6541, + "step": 14714 + }, + { + "epoch": 0.26897837571060373, + "grad_norm": 6.148859316110533, + "learning_rate": 8.575822258849203e-06, + "loss": 17.4111, + "step": 14715 + }, + { + "epoch": 0.26899665490705027, + "grad_norm": 6.58256675453406, + "learning_rate": 8.575615353475178e-06, + "loss": 17.6077, + "step": 14716 + }, + { + "epoch": 0.2690149341034968, + "grad_norm": 7.441271575092082, + "learning_rate": 8.57540843556893e-06, + "loss": 17.9539, + "step": 14717 + }, + { + "epoch": 0.26903321329994334, + "grad_norm": 5.361833844903456, + "learning_rate": 8.575201505131188e-06, + "loss": 16.9696, + "step": 14718 + }, + { + "epoch": 0.2690514924963899, + "grad_norm": 5.678366120117593, + "learning_rate": 8.574994562162672e-06, + "loss": 17.3139, + "step": 14719 + }, + { + "epoch": 0.26906977169283636, + "grad_norm": 6.0893837231018715, + "learning_rate": 8.574787606664115e-06, + "loss": 17.4303, + "step": 14720 + }, + { + "epoch": 0.2690880508892829, + "grad_norm": 6.978650869235168, + "learning_rate": 8.574580638636233e-06, + "loss": 17.7617, + "step": 14721 + }, + { + "epoch": 0.26910633008572943, + "grad_norm": 6.399336943829046, + "learning_rate": 8.574373658079758e-06, + "loss": 17.4543, + "step": 14722 + }, + { + "epoch": 0.26912460928217596, + "grad_norm": 6.7738837958731795, + "learning_rate": 8.574166664995413e-06, + "loss": 17.5596, + "step": 14723 + }, + { + "epoch": 0.2691428884786225, + "grad_norm": 6.833367457168881, + "learning_rate": 8.573959659383923e-06, + "loss": 17.2438, + "step": 14724 + }, + { + "epoch": 0.269161167675069, + "grad_norm": 6.853494356915995, + "learning_rate": 8.573752641246018e-06, + "loss": 17.9752, + "step": 14725 + }, + { + "epoch": 0.2691794468715155, + "grad_norm": 6.813532429082994, + "learning_rate": 8.573545610582416e-06, + "loss": 17.7973, + "step": 14726 + }, + { + "epoch": 0.26919772606796205, + "grad_norm": 7.404253275459462, + "learning_rate": 8.57333856739385e-06, + "loss": 18.1289, + "step": 14727 + }, + { + "epoch": 0.2692160052644086, + "grad_norm": 5.67083780852939, + "learning_rate": 8.57313151168104e-06, + "loss": 17.1733, + "step": 14728 + }, + { + "epoch": 0.2692342844608551, + "grad_norm": 6.618964024735214, + "learning_rate": 8.572924443444714e-06, + "loss": 17.6785, + "step": 14729 + }, + { + "epoch": 0.2692525636573016, + "grad_norm": 4.780337707127116, + "learning_rate": 8.572717362685601e-06, + "loss": 16.988, + "step": 14730 + }, + { + "epoch": 0.26927084285374814, + "grad_norm": 6.151126655675175, + "learning_rate": 8.57251026940442e-06, + "loss": 17.3929, + "step": 14731 + }, + { + "epoch": 0.2692891220501947, + "grad_norm": 6.235799780952094, + "learning_rate": 8.572303163601902e-06, + "loss": 17.585, + "step": 14732 + }, + { + "epoch": 0.2693074012466412, + "grad_norm": 5.437297085695997, + "learning_rate": 8.572096045278773e-06, + "loss": 17.1169, + "step": 14733 + }, + { + "epoch": 0.26932568044308774, + "grad_norm": 7.3332844311428484, + "learning_rate": 8.571888914435756e-06, + "loss": 17.8667, + "step": 14734 + }, + { + "epoch": 0.2693439596395342, + "grad_norm": 6.5775330738577305, + "learning_rate": 8.571681771073576e-06, + "loss": 17.3096, + "step": 14735 + }, + { + "epoch": 0.26936223883598076, + "grad_norm": 6.474062535106741, + "learning_rate": 8.571474615192965e-06, + "loss": 17.3985, + "step": 14736 + }, + { + "epoch": 0.2693805180324273, + "grad_norm": 6.667701680170781, + "learning_rate": 8.571267446794643e-06, + "loss": 17.4353, + "step": 14737 + }, + { + "epoch": 0.26939879722887383, + "grad_norm": 6.9320060803068, + "learning_rate": 8.57106026587934e-06, + "loss": 17.827, + "step": 14738 + }, + { + "epoch": 0.26941707642532037, + "grad_norm": 6.716519546581107, + "learning_rate": 8.57085307244778e-06, + "loss": 17.6117, + "step": 14739 + }, + { + "epoch": 0.26943535562176685, + "grad_norm": 6.790569901350317, + "learning_rate": 8.57064586650069e-06, + "loss": 17.2713, + "step": 14740 + }, + { + "epoch": 0.2694536348182134, + "grad_norm": 7.202666511234887, + "learning_rate": 8.570438648038795e-06, + "loss": 17.6958, + "step": 14741 + }, + { + "epoch": 0.2694719140146599, + "grad_norm": 6.375522015279112, + "learning_rate": 8.570231417062824e-06, + "loss": 17.4966, + "step": 14742 + }, + { + "epoch": 0.26949019321110645, + "grad_norm": 6.551655264635537, + "learning_rate": 8.570024173573502e-06, + "loss": 17.4394, + "step": 14743 + }, + { + "epoch": 0.269508472407553, + "grad_norm": 7.195717947624498, + "learning_rate": 8.569816917571553e-06, + "loss": 17.6685, + "step": 14744 + }, + { + "epoch": 0.26952675160399947, + "grad_norm": 5.360594593921079, + "learning_rate": 8.569609649057706e-06, + "loss": 16.7244, + "step": 14745 + }, + { + "epoch": 0.269545030800446, + "grad_norm": 6.824536535650638, + "learning_rate": 8.569402368032688e-06, + "loss": 17.8826, + "step": 14746 + }, + { + "epoch": 0.26956330999689254, + "grad_norm": 7.403124834009568, + "learning_rate": 8.569195074497224e-06, + "loss": 17.7747, + "step": 14747 + }, + { + "epoch": 0.2695815891933391, + "grad_norm": 7.319336075058227, + "learning_rate": 8.56898776845204e-06, + "loss": 17.659, + "step": 14748 + }, + { + "epoch": 0.26959986838978556, + "grad_norm": 5.614063931073716, + "learning_rate": 8.568780449897866e-06, + "loss": 17.0689, + "step": 14749 + }, + { + "epoch": 0.2696181475862321, + "grad_norm": 6.691994166706363, + "learning_rate": 8.568573118835423e-06, + "loss": 17.5283, + "step": 14750 + }, + { + "epoch": 0.26963642678267863, + "grad_norm": 8.027288059140304, + "learning_rate": 8.568365775265443e-06, + "loss": 17.7729, + "step": 14751 + }, + { + "epoch": 0.26965470597912516, + "grad_norm": 5.625788602106047, + "learning_rate": 8.56815841918865e-06, + "loss": 17.3177, + "step": 14752 + }, + { + "epoch": 0.2696729851755717, + "grad_norm": 6.418223505014459, + "learning_rate": 8.567951050605771e-06, + "loss": 17.3564, + "step": 14753 + }, + { + "epoch": 0.2696912643720182, + "grad_norm": 6.191659165789929, + "learning_rate": 8.567743669517534e-06, + "loss": 17.565, + "step": 14754 + }, + { + "epoch": 0.2697095435684647, + "grad_norm": 6.952676833930705, + "learning_rate": 8.567536275924666e-06, + "loss": 17.789, + "step": 14755 + }, + { + "epoch": 0.26972782276491125, + "grad_norm": 6.728365928300831, + "learning_rate": 8.567328869827891e-06, + "loss": 17.4448, + "step": 14756 + }, + { + "epoch": 0.2697461019613578, + "grad_norm": 6.9397302438025665, + "learning_rate": 8.56712145122794e-06, + "loss": 17.8716, + "step": 14757 + }, + { + "epoch": 0.2697643811578043, + "grad_norm": 6.421020196663757, + "learning_rate": 8.566914020125536e-06, + "loss": 17.47, + "step": 14758 + }, + { + "epoch": 0.2697826603542508, + "grad_norm": 6.311965000870438, + "learning_rate": 8.566706576521407e-06, + "loss": 17.5558, + "step": 14759 + }, + { + "epoch": 0.26980093955069734, + "grad_norm": 6.596533882459412, + "learning_rate": 8.566499120416283e-06, + "loss": 17.5017, + "step": 14760 + }, + { + "epoch": 0.2698192187471439, + "grad_norm": 6.058452312693287, + "learning_rate": 8.566291651810887e-06, + "loss": 16.9859, + "step": 14761 + }, + { + "epoch": 0.2698374979435904, + "grad_norm": 8.718469231759896, + "learning_rate": 8.56608417070595e-06, + "loss": 18.3771, + "step": 14762 + }, + { + "epoch": 0.26985577714003695, + "grad_norm": 5.622414403765055, + "learning_rate": 8.565876677102198e-06, + "loss": 17.2049, + "step": 14763 + }, + { + "epoch": 0.2698740563364834, + "grad_norm": 6.467884336016666, + "learning_rate": 8.565669171000357e-06, + "loss": 17.454, + "step": 14764 + }, + { + "epoch": 0.26989233553292996, + "grad_norm": 6.698985687010132, + "learning_rate": 8.565461652401155e-06, + "loss": 17.6975, + "step": 14765 + }, + { + "epoch": 0.2699106147293765, + "grad_norm": 6.320444788794005, + "learning_rate": 8.565254121305318e-06, + "loss": 17.2421, + "step": 14766 + }, + { + "epoch": 0.26992889392582303, + "grad_norm": 7.469850399429338, + "learning_rate": 8.565046577713576e-06, + "loss": 17.7894, + "step": 14767 + }, + { + "epoch": 0.26994717312226957, + "grad_norm": 8.681383457387533, + "learning_rate": 8.564839021626653e-06, + "loss": 18.0657, + "step": 14768 + }, + { + "epoch": 0.26996545231871605, + "grad_norm": 6.303465573667526, + "learning_rate": 8.564631453045283e-06, + "loss": 17.695, + "step": 14769 + }, + { + "epoch": 0.2699837315151626, + "grad_norm": 6.587916498995225, + "learning_rate": 8.564423871970185e-06, + "loss": 17.5307, + "step": 14770 + }, + { + "epoch": 0.2700020107116091, + "grad_norm": 8.782459938025932, + "learning_rate": 8.564216278402092e-06, + "loss": 18.4805, + "step": 14771 + }, + { + "epoch": 0.27002028990805566, + "grad_norm": 6.264035444124062, + "learning_rate": 8.564008672341731e-06, + "loss": 17.5215, + "step": 14772 + }, + { + "epoch": 0.2700385691045022, + "grad_norm": 6.1909731522116225, + "learning_rate": 8.563801053789828e-06, + "loss": 17.3149, + "step": 14773 + }, + { + "epoch": 0.27005684830094867, + "grad_norm": 7.125301342293887, + "learning_rate": 8.563593422747112e-06, + "loss": 18.0346, + "step": 14774 + }, + { + "epoch": 0.2700751274973952, + "grad_norm": 8.09012375487751, + "learning_rate": 8.563385779214308e-06, + "loss": 18.1556, + "step": 14775 + }, + { + "epoch": 0.27009340669384174, + "grad_norm": 6.838611555632258, + "learning_rate": 8.563178123192148e-06, + "loss": 17.4825, + "step": 14776 + }, + { + "epoch": 0.2701116858902883, + "grad_norm": 7.091593604846901, + "learning_rate": 8.562970454681359e-06, + "loss": 17.9692, + "step": 14777 + }, + { + "epoch": 0.2701299650867348, + "grad_norm": 27.42424937940139, + "learning_rate": 8.562762773682667e-06, + "loss": 18.6833, + "step": 14778 + }, + { + "epoch": 0.2701482442831813, + "grad_norm": 6.105921118984554, + "learning_rate": 8.5625550801968e-06, + "loss": 17.5999, + "step": 14779 + }, + { + "epoch": 0.27016652347962783, + "grad_norm": 7.891322010519658, + "learning_rate": 8.562347374224487e-06, + "loss": 18.0696, + "step": 14780 + }, + { + "epoch": 0.27018480267607436, + "grad_norm": 5.089558546095618, + "learning_rate": 8.562139655766456e-06, + "loss": 17.0662, + "step": 14781 + }, + { + "epoch": 0.2702030818725209, + "grad_norm": 6.964422777174042, + "learning_rate": 8.561931924823433e-06, + "loss": 17.9785, + "step": 14782 + }, + { + "epoch": 0.2702213610689674, + "grad_norm": 6.644066222632282, + "learning_rate": 8.56172418139615e-06, + "loss": 17.6049, + "step": 14783 + }, + { + "epoch": 0.2702396402654139, + "grad_norm": 6.717271400281902, + "learning_rate": 8.56151642548533e-06, + "loss": 17.7115, + "step": 14784 + }, + { + "epoch": 0.27025791946186045, + "grad_norm": 6.717954264561899, + "learning_rate": 8.561308657091707e-06, + "loss": 17.4734, + "step": 14785 + }, + { + "epoch": 0.270276198658307, + "grad_norm": 7.859560363688032, + "learning_rate": 8.561100876216004e-06, + "loss": 18.0413, + "step": 14786 + }, + { + "epoch": 0.2702944778547535, + "grad_norm": 6.054518782247669, + "learning_rate": 8.560893082858952e-06, + "loss": 17.5724, + "step": 14787 + }, + { + "epoch": 0.2703127570512, + "grad_norm": 6.357519366751193, + "learning_rate": 8.560685277021278e-06, + "loss": 17.5095, + "step": 14788 + }, + { + "epoch": 0.27033103624764654, + "grad_norm": 6.419423602347501, + "learning_rate": 8.560477458703714e-06, + "loss": 17.3069, + "step": 14789 + }, + { + "epoch": 0.2703493154440931, + "grad_norm": 5.996425424062401, + "learning_rate": 8.560269627906983e-06, + "loss": 17.3726, + "step": 14790 + }, + { + "epoch": 0.2703675946405396, + "grad_norm": 7.306677999264691, + "learning_rate": 8.560061784631816e-06, + "loss": 18.0985, + "step": 14791 + }, + { + "epoch": 0.27038587383698615, + "grad_norm": 7.643579111586635, + "learning_rate": 8.559853928878941e-06, + "loss": 18.2205, + "step": 14792 + }, + { + "epoch": 0.2704041530334326, + "grad_norm": 7.4952996707611, + "learning_rate": 8.559646060649086e-06, + "loss": 18.0018, + "step": 14793 + }, + { + "epoch": 0.27042243222987916, + "grad_norm": 6.56709098086221, + "learning_rate": 8.559438179942985e-06, + "loss": 17.5855, + "step": 14794 + }, + { + "epoch": 0.2704407114263257, + "grad_norm": 6.906590166390661, + "learning_rate": 8.559230286761359e-06, + "loss": 17.7104, + "step": 14795 + }, + { + "epoch": 0.27045899062277223, + "grad_norm": 7.337185897035148, + "learning_rate": 8.55902238110494e-06, + "loss": 17.888, + "step": 14796 + }, + { + "epoch": 0.27047726981921877, + "grad_norm": 7.5890777682589965, + "learning_rate": 8.558814462974457e-06, + "loss": 17.9747, + "step": 14797 + }, + { + "epoch": 0.27049554901566525, + "grad_norm": 6.966595784185228, + "learning_rate": 8.558606532370635e-06, + "loss": 17.587, + "step": 14798 + }, + { + "epoch": 0.2705138282121118, + "grad_norm": 6.432904766596979, + "learning_rate": 8.55839858929421e-06, + "loss": 17.7732, + "step": 14799 + }, + { + "epoch": 0.2705321074085583, + "grad_norm": 6.468720967410889, + "learning_rate": 8.558190633745904e-06, + "loss": 17.4364, + "step": 14800 + }, + { + "epoch": 0.27055038660500486, + "grad_norm": 7.185045518131907, + "learning_rate": 8.55798266572645e-06, + "loss": 17.6472, + "step": 14801 + }, + { + "epoch": 0.2705686658014514, + "grad_norm": 5.51531722764198, + "learning_rate": 8.557774685236575e-06, + "loss": 17.1683, + "step": 14802 + }, + { + "epoch": 0.27058694499789787, + "grad_norm": 8.265389929871407, + "learning_rate": 8.557566692277008e-06, + "loss": 18.4747, + "step": 14803 + }, + { + "epoch": 0.2706052241943444, + "grad_norm": 6.174231177908045, + "learning_rate": 8.55735868684848e-06, + "loss": 17.317, + "step": 14804 + }, + { + "epoch": 0.27062350339079094, + "grad_norm": 5.838048605328569, + "learning_rate": 8.557150668951717e-06, + "loss": 17.4088, + "step": 14805 + }, + { + "epoch": 0.2706417825872375, + "grad_norm": 6.0436949025897615, + "learning_rate": 8.55694263858745e-06, + "loss": 17.2727, + "step": 14806 + }, + { + "epoch": 0.270660061783684, + "grad_norm": 6.548063149899086, + "learning_rate": 8.556734595756409e-06, + "loss": 17.5251, + "step": 14807 + }, + { + "epoch": 0.2706783409801305, + "grad_norm": 8.060561911075787, + "learning_rate": 8.55652654045932e-06, + "loss": 18.448, + "step": 14808 + }, + { + "epoch": 0.27069662017657703, + "grad_norm": 6.624057521585169, + "learning_rate": 8.556318472696915e-06, + "loss": 17.572, + "step": 14809 + }, + { + "epoch": 0.27071489937302357, + "grad_norm": 6.718039617867972, + "learning_rate": 8.556110392469923e-06, + "loss": 17.7396, + "step": 14810 + }, + { + "epoch": 0.2707331785694701, + "grad_norm": 6.602064901695705, + "learning_rate": 8.555902299779071e-06, + "loss": 17.6202, + "step": 14811 + }, + { + "epoch": 0.27075145776591664, + "grad_norm": 5.040929278256974, + "learning_rate": 8.55569419462509e-06, + "loss": 17.0216, + "step": 14812 + }, + { + "epoch": 0.2707697369623631, + "grad_norm": 5.99684895534528, + "learning_rate": 8.55548607700871e-06, + "loss": 17.267, + "step": 14813 + }, + { + "epoch": 0.27078801615880965, + "grad_norm": 7.119770017639812, + "learning_rate": 8.55527794693066e-06, + "loss": 18.0201, + "step": 14814 + }, + { + "epoch": 0.2708062953552562, + "grad_norm": 6.668933993092999, + "learning_rate": 8.55506980439167e-06, + "loss": 17.7468, + "step": 14815 + }, + { + "epoch": 0.2708245745517027, + "grad_norm": 7.787840570922672, + "learning_rate": 8.554861649392468e-06, + "loss": 17.9452, + "step": 14816 + }, + { + "epoch": 0.2708428537481492, + "grad_norm": 9.98440755798373, + "learning_rate": 8.554653481933784e-06, + "loss": 18.604, + "step": 14817 + }, + { + "epoch": 0.27086113294459574, + "grad_norm": 6.193246128929784, + "learning_rate": 8.55444530201635e-06, + "loss": 17.3345, + "step": 14818 + }, + { + "epoch": 0.2708794121410423, + "grad_norm": 6.473158814119927, + "learning_rate": 8.554237109640891e-06, + "loss": 17.4942, + "step": 14819 + }, + { + "epoch": 0.2708976913374888, + "grad_norm": 5.477792509880523, + "learning_rate": 8.554028904808141e-06, + "loss": 17.167, + "step": 14820 + }, + { + "epoch": 0.27091597053393535, + "grad_norm": 5.824733941489929, + "learning_rate": 8.553820687518828e-06, + "loss": 17.1212, + "step": 14821 + }, + { + "epoch": 0.2709342497303818, + "grad_norm": 6.969990034486477, + "learning_rate": 8.553612457773681e-06, + "loss": 17.816, + "step": 14822 + }, + { + "epoch": 0.27095252892682836, + "grad_norm": 7.040876437065263, + "learning_rate": 8.55340421557343e-06, + "loss": 17.7973, + "step": 14823 + }, + { + "epoch": 0.2709708081232749, + "grad_norm": 6.981931948666637, + "learning_rate": 8.553195960918808e-06, + "loss": 17.7812, + "step": 14824 + }, + { + "epoch": 0.27098908731972143, + "grad_norm": 17.040261005387123, + "learning_rate": 8.552987693810542e-06, + "loss": 17.6161, + "step": 14825 + }, + { + "epoch": 0.27100736651616797, + "grad_norm": 6.440303304400119, + "learning_rate": 8.552779414249362e-06, + "loss": 17.7299, + "step": 14826 + }, + { + "epoch": 0.27102564571261445, + "grad_norm": 6.066347738116784, + "learning_rate": 8.552571122235998e-06, + "loss": 17.4761, + "step": 14827 + }, + { + "epoch": 0.271043924909061, + "grad_norm": 6.101927145793574, + "learning_rate": 8.55236281777118e-06, + "loss": 17.3433, + "step": 14828 + }, + { + "epoch": 0.2710622041055075, + "grad_norm": 7.106033455011255, + "learning_rate": 8.55215450085564e-06, + "loss": 18.0241, + "step": 14829 + }, + { + "epoch": 0.27108048330195406, + "grad_norm": 5.42307002578256, + "learning_rate": 8.551946171490107e-06, + "loss": 17.2391, + "step": 14830 + }, + { + "epoch": 0.2710987624984006, + "grad_norm": 5.149141178645675, + "learning_rate": 8.551737829675309e-06, + "loss": 16.998, + "step": 14831 + }, + { + "epoch": 0.27111704169484707, + "grad_norm": 5.687623706865836, + "learning_rate": 8.551529475411979e-06, + "loss": 17.3505, + "step": 14832 + }, + { + "epoch": 0.2711353208912936, + "grad_norm": 6.000635344331766, + "learning_rate": 8.551321108700846e-06, + "loss": 17.1844, + "step": 14833 + }, + { + "epoch": 0.27115360008774014, + "grad_norm": 7.146910464294515, + "learning_rate": 8.551112729542642e-06, + "loss": 17.7004, + "step": 14834 + }, + { + "epoch": 0.2711718792841867, + "grad_norm": 6.072344435017131, + "learning_rate": 8.550904337938095e-06, + "loss": 17.3035, + "step": 14835 + }, + { + "epoch": 0.2711901584806332, + "grad_norm": 5.883160755130152, + "learning_rate": 8.550695933887935e-06, + "loss": 17.3569, + "step": 14836 + }, + { + "epoch": 0.2712084376770797, + "grad_norm": 6.013574719384061, + "learning_rate": 8.550487517392896e-06, + "loss": 17.2092, + "step": 14837 + }, + { + "epoch": 0.27122671687352623, + "grad_norm": 6.581247721001421, + "learning_rate": 8.550279088453705e-06, + "loss": 17.361, + "step": 14838 + }, + { + "epoch": 0.27124499606997277, + "grad_norm": 7.229041708791945, + "learning_rate": 8.550070647071095e-06, + "loss": 17.9663, + "step": 14839 + }, + { + "epoch": 0.2712632752664193, + "grad_norm": 9.233208225351778, + "learning_rate": 8.549862193245795e-06, + "loss": 17.8901, + "step": 14840 + }, + { + "epoch": 0.27128155446286584, + "grad_norm": 7.9748289764601985, + "learning_rate": 8.549653726978538e-06, + "loss": 18.2539, + "step": 14841 + }, + { + "epoch": 0.2712998336593123, + "grad_norm": 5.6688321120436385, + "learning_rate": 8.549445248270049e-06, + "loss": 17.2339, + "step": 14842 + }, + { + "epoch": 0.27131811285575885, + "grad_norm": 6.736587469374453, + "learning_rate": 8.549236757121065e-06, + "loss": 17.5542, + "step": 14843 + }, + { + "epoch": 0.2713363920522054, + "grad_norm": 7.316596212398285, + "learning_rate": 8.549028253532313e-06, + "loss": 18.1392, + "step": 14844 + }, + { + "epoch": 0.2713546712486519, + "grad_norm": 7.007929928197967, + "learning_rate": 8.548819737504525e-06, + "loss": 17.7737, + "step": 14845 + }, + { + "epoch": 0.27137295044509846, + "grad_norm": 6.586250496167358, + "learning_rate": 8.548611209038433e-06, + "loss": 17.5552, + "step": 14846 + }, + { + "epoch": 0.27139122964154494, + "grad_norm": 7.821666686457381, + "learning_rate": 8.548402668134766e-06, + "loss": 18.471, + "step": 14847 + }, + { + "epoch": 0.2714095088379915, + "grad_norm": 6.463512499320828, + "learning_rate": 8.548194114794255e-06, + "loss": 17.4083, + "step": 14848 + }, + { + "epoch": 0.271427788034438, + "grad_norm": 5.91809828471948, + "learning_rate": 8.547985549017633e-06, + "loss": 17.3628, + "step": 14849 + }, + { + "epoch": 0.27144606723088455, + "grad_norm": 5.691880825744473, + "learning_rate": 8.547776970805627e-06, + "loss": 17.0096, + "step": 14850 + }, + { + "epoch": 0.271464346427331, + "grad_norm": 5.823969072739495, + "learning_rate": 8.547568380158973e-06, + "loss": 17.1374, + "step": 14851 + }, + { + "epoch": 0.27148262562377756, + "grad_norm": 6.396999802354459, + "learning_rate": 8.547359777078399e-06, + "loss": 17.5505, + "step": 14852 + }, + { + "epoch": 0.2715009048202241, + "grad_norm": 6.055306693350301, + "learning_rate": 8.547151161564636e-06, + "loss": 17.3091, + "step": 14853 + }, + { + "epoch": 0.27151918401667063, + "grad_norm": 6.948563482297491, + "learning_rate": 8.546942533618417e-06, + "loss": 17.6344, + "step": 14854 + }, + { + "epoch": 0.27153746321311717, + "grad_norm": 6.777693158596401, + "learning_rate": 8.54673389324047e-06, + "loss": 17.8987, + "step": 14855 + }, + { + "epoch": 0.27155574240956365, + "grad_norm": 6.641151041194556, + "learning_rate": 8.54652524043153e-06, + "loss": 17.8, + "step": 14856 + }, + { + "epoch": 0.2715740216060102, + "grad_norm": 7.462915438731451, + "learning_rate": 8.546316575192328e-06, + "loss": 18.3269, + "step": 14857 + }, + { + "epoch": 0.2715923008024567, + "grad_norm": 6.2508430357152704, + "learning_rate": 8.546107897523592e-06, + "loss": 17.6674, + "step": 14858 + }, + { + "epoch": 0.27161057999890326, + "grad_norm": 6.624476504090634, + "learning_rate": 8.545899207426056e-06, + "loss": 17.6947, + "step": 14859 + }, + { + "epoch": 0.2716288591953498, + "grad_norm": 6.221072911878215, + "learning_rate": 8.545690504900452e-06, + "loss": 17.4292, + "step": 14860 + }, + { + "epoch": 0.2716471383917963, + "grad_norm": 10.603644841939005, + "learning_rate": 8.54548178994751e-06, + "loss": 18.6189, + "step": 14861 + }, + { + "epoch": 0.2716654175882428, + "grad_norm": 7.174358900772363, + "learning_rate": 8.545273062567963e-06, + "loss": 17.9352, + "step": 14862 + }, + { + "epoch": 0.27168369678468934, + "grad_norm": 6.203643896967865, + "learning_rate": 8.54506432276254e-06, + "loss": 17.3654, + "step": 14863 + }, + { + "epoch": 0.2717019759811359, + "grad_norm": 7.270744433630595, + "learning_rate": 8.544855570531974e-06, + "loss": 17.7381, + "step": 14864 + }, + { + "epoch": 0.2717202551775824, + "grad_norm": 5.932433514868857, + "learning_rate": 8.544646805876999e-06, + "loss": 17.3567, + "step": 14865 + }, + { + "epoch": 0.2717385343740289, + "grad_norm": 5.7418441762364765, + "learning_rate": 8.544438028798342e-06, + "loss": 16.9767, + "step": 14866 + }, + { + "epoch": 0.27175681357047543, + "grad_norm": 5.8992892914146635, + "learning_rate": 8.54422923929674e-06, + "loss": 17.4688, + "step": 14867 + }, + { + "epoch": 0.27177509276692197, + "grad_norm": 5.4741094203996665, + "learning_rate": 8.544020437372919e-06, + "loss": 17.1419, + "step": 14868 + }, + { + "epoch": 0.2717933719633685, + "grad_norm": 8.441886739004868, + "learning_rate": 8.543811623027616e-06, + "loss": 18.798, + "step": 14869 + }, + { + "epoch": 0.27181165115981504, + "grad_norm": 7.094430727346486, + "learning_rate": 8.54360279626156e-06, + "loss": 17.8168, + "step": 14870 + }, + { + "epoch": 0.2718299303562615, + "grad_norm": 7.59321936874265, + "learning_rate": 8.543393957075483e-06, + "loss": 17.806, + "step": 14871 + }, + { + "epoch": 0.27184820955270805, + "grad_norm": 7.263613009008624, + "learning_rate": 8.54318510547012e-06, + "loss": 18.1295, + "step": 14872 + }, + { + "epoch": 0.2718664887491546, + "grad_norm": 6.984233747273519, + "learning_rate": 8.5429762414462e-06, + "loss": 17.5231, + "step": 14873 + }, + { + "epoch": 0.2718847679456011, + "grad_norm": 5.080683098741494, + "learning_rate": 8.542767365004454e-06, + "loss": 16.8671, + "step": 14874 + }, + { + "epoch": 0.27190304714204766, + "grad_norm": 6.434155928121867, + "learning_rate": 8.542558476145616e-06, + "loss": 17.6545, + "step": 14875 + }, + { + "epoch": 0.27192132633849414, + "grad_norm": 8.879287344854903, + "learning_rate": 8.54234957487042e-06, + "loss": 18.6452, + "step": 14876 + }, + { + "epoch": 0.2719396055349407, + "grad_norm": 6.323199241643357, + "learning_rate": 8.542140661179594e-06, + "loss": 17.7601, + "step": 14877 + }, + { + "epoch": 0.2719578847313872, + "grad_norm": 6.301146414422232, + "learning_rate": 8.541931735073872e-06, + "loss": 17.6795, + "step": 14878 + }, + { + "epoch": 0.27197616392783375, + "grad_norm": 6.785669327266767, + "learning_rate": 8.541722796553988e-06, + "loss": 17.5498, + "step": 14879 + }, + { + "epoch": 0.2719944431242803, + "grad_norm": 5.844865942548047, + "learning_rate": 8.541513845620672e-06, + "loss": 17.3334, + "step": 14880 + }, + { + "epoch": 0.27201272232072676, + "grad_norm": 22.634779033249952, + "learning_rate": 8.54130488227466e-06, + "loss": 17.8193, + "step": 14881 + }, + { + "epoch": 0.2720310015171733, + "grad_norm": 8.142868092799928, + "learning_rate": 8.54109590651668e-06, + "loss": 18.1071, + "step": 14882 + }, + { + "epoch": 0.27204928071361983, + "grad_norm": 8.427618812524283, + "learning_rate": 8.540886918347465e-06, + "loss": 17.4152, + "step": 14883 + }, + { + "epoch": 0.27206755991006637, + "grad_norm": 6.93423553369138, + "learning_rate": 8.540677917767749e-06, + "loss": 17.6729, + "step": 14884 + }, + { + "epoch": 0.27208583910651285, + "grad_norm": 7.193372060519649, + "learning_rate": 8.540468904778265e-06, + "loss": 17.6561, + "step": 14885 + }, + { + "epoch": 0.2721041183029594, + "grad_norm": 8.280421848562657, + "learning_rate": 8.540259879379744e-06, + "loss": 18.4438, + "step": 14886 + }, + { + "epoch": 0.2721223974994059, + "grad_norm": 5.945522270852512, + "learning_rate": 8.54005084157292e-06, + "loss": 17.331, + "step": 14887 + }, + { + "epoch": 0.27214067669585246, + "grad_norm": 7.74093699090885, + "learning_rate": 8.539841791358523e-06, + "loss": 17.8281, + "step": 14888 + }, + { + "epoch": 0.272158955892299, + "grad_norm": 7.664311082329237, + "learning_rate": 8.539632728737291e-06, + "loss": 17.9029, + "step": 14889 + }, + { + "epoch": 0.2721772350887455, + "grad_norm": 5.472508028003336, + "learning_rate": 8.53942365370995e-06, + "loss": 17.1254, + "step": 14890 + }, + { + "epoch": 0.272195514285192, + "grad_norm": 7.2114970326785395, + "learning_rate": 8.539214566277239e-06, + "loss": 17.8657, + "step": 14891 + }, + { + "epoch": 0.27221379348163854, + "grad_norm": 5.760661427336524, + "learning_rate": 8.539005466439886e-06, + "loss": 17.424, + "step": 14892 + }, + { + "epoch": 0.2722320726780851, + "grad_norm": 6.507112035057774, + "learning_rate": 8.538796354198629e-06, + "loss": 17.6589, + "step": 14893 + }, + { + "epoch": 0.2722503518745316, + "grad_norm": 6.348619332970952, + "learning_rate": 8.538587229554195e-06, + "loss": 17.5345, + "step": 14894 + }, + { + "epoch": 0.2722686310709781, + "grad_norm": 8.662099122948792, + "learning_rate": 8.53837809250732e-06, + "loss": 18.2038, + "step": 14895 + }, + { + "epoch": 0.27228691026742463, + "grad_norm": 6.92314883051264, + "learning_rate": 8.538168943058738e-06, + "loss": 17.8408, + "step": 14896 + }, + { + "epoch": 0.27230518946387117, + "grad_norm": 8.953082316841071, + "learning_rate": 8.537959781209181e-06, + "loss": 18.1729, + "step": 14897 + }, + { + "epoch": 0.2723234686603177, + "grad_norm": 7.092600096105889, + "learning_rate": 8.537750606959381e-06, + "loss": 18.1305, + "step": 14898 + }, + { + "epoch": 0.27234174785676424, + "grad_norm": 7.241337699361368, + "learning_rate": 8.537541420310072e-06, + "loss": 17.98, + "step": 14899 + }, + { + "epoch": 0.2723600270532107, + "grad_norm": 8.802883025943345, + "learning_rate": 8.537332221261988e-06, + "loss": 18.5611, + "step": 14900 + }, + { + "epoch": 0.27237830624965725, + "grad_norm": 7.593543267720216, + "learning_rate": 8.537123009815861e-06, + "loss": 18.0538, + "step": 14901 + }, + { + "epoch": 0.2723965854461038, + "grad_norm": 5.630264023919765, + "learning_rate": 8.536913785972424e-06, + "loss": 17.1605, + "step": 14902 + }, + { + "epoch": 0.2724148646425503, + "grad_norm": 6.380544619337826, + "learning_rate": 8.536704549732413e-06, + "loss": 17.7363, + "step": 14903 + }, + { + "epoch": 0.27243314383899686, + "grad_norm": 6.157146247117923, + "learning_rate": 8.536495301096559e-06, + "loss": 17.3588, + "step": 14904 + }, + { + "epoch": 0.27245142303544334, + "grad_norm": 6.743080721881028, + "learning_rate": 8.536286040065595e-06, + "loss": 17.7994, + "step": 14905 + }, + { + "epoch": 0.2724697022318899, + "grad_norm": 7.064760121129258, + "learning_rate": 8.536076766640258e-06, + "loss": 18.0931, + "step": 14906 + }, + { + "epoch": 0.2724879814283364, + "grad_norm": 7.407565010651201, + "learning_rate": 8.535867480821275e-06, + "loss": 17.8473, + "step": 14907 + }, + { + "epoch": 0.27250626062478295, + "grad_norm": 7.81441975945342, + "learning_rate": 8.535658182609386e-06, + "loss": 18.1158, + "step": 14908 + }, + { + "epoch": 0.2725245398212295, + "grad_norm": 6.045435312495618, + "learning_rate": 8.53544887200532e-06, + "loss": 17.3503, + "step": 14909 + }, + { + "epoch": 0.27254281901767596, + "grad_norm": 5.707968340287296, + "learning_rate": 8.535239549009813e-06, + "loss": 17.099, + "step": 14910 + }, + { + "epoch": 0.2725610982141225, + "grad_norm": 6.066638919619541, + "learning_rate": 8.535030213623599e-06, + "loss": 17.2943, + "step": 14911 + }, + { + "epoch": 0.27257937741056903, + "grad_norm": 5.900745626891175, + "learning_rate": 8.53482086584741e-06, + "loss": 17.4147, + "step": 14912 + }, + { + "epoch": 0.27259765660701557, + "grad_norm": 6.370327324649582, + "learning_rate": 8.53461150568198e-06, + "loss": 17.4563, + "step": 14913 + }, + { + "epoch": 0.2726159358034621, + "grad_norm": 8.7428366840071, + "learning_rate": 8.534402133128044e-06, + "loss": 18.1068, + "step": 14914 + }, + { + "epoch": 0.2726342149999086, + "grad_norm": 7.311969184989779, + "learning_rate": 8.534192748186337e-06, + "loss": 18.1549, + "step": 14915 + }, + { + "epoch": 0.2726524941963551, + "grad_norm": 7.222571937555025, + "learning_rate": 8.53398335085759e-06, + "loss": 17.9561, + "step": 14916 + }, + { + "epoch": 0.27267077339280166, + "grad_norm": 6.817968807815459, + "learning_rate": 8.533773941142535e-06, + "loss": 17.6033, + "step": 14917 + }, + { + "epoch": 0.2726890525892482, + "grad_norm": 6.645826784715755, + "learning_rate": 8.533564519041913e-06, + "loss": 17.4718, + "step": 14918 + }, + { + "epoch": 0.2727073317856947, + "grad_norm": 7.061940441232614, + "learning_rate": 8.533355084556452e-06, + "loss": 17.3461, + "step": 14919 + }, + { + "epoch": 0.2727256109821412, + "grad_norm": 9.420514471152142, + "learning_rate": 8.533145637686889e-06, + "loss": 18.8242, + "step": 14920 + }, + { + "epoch": 0.27274389017858774, + "grad_norm": 5.597796718640966, + "learning_rate": 8.532936178433958e-06, + "loss": 16.9579, + "step": 14921 + }, + { + "epoch": 0.2727621693750343, + "grad_norm": 6.1370517575521735, + "learning_rate": 8.53272670679839e-06, + "loss": 17.1182, + "step": 14922 + }, + { + "epoch": 0.2727804485714808, + "grad_norm": 5.938901037287833, + "learning_rate": 8.532517222780922e-06, + "loss": 17.2498, + "step": 14923 + }, + { + "epoch": 0.2727987277679273, + "grad_norm": 7.485266218181458, + "learning_rate": 8.532307726382288e-06, + "loss": 18.0095, + "step": 14924 + }, + { + "epoch": 0.27281700696437383, + "grad_norm": 7.9292761561775835, + "learning_rate": 8.532098217603222e-06, + "loss": 18.264, + "step": 14925 + }, + { + "epoch": 0.27283528616082037, + "grad_norm": 8.333234729593757, + "learning_rate": 8.53188869644446e-06, + "loss": 17.9407, + "step": 14926 + }, + { + "epoch": 0.2728535653572669, + "grad_norm": 7.081977271341544, + "learning_rate": 8.531679162906732e-06, + "loss": 17.4561, + "step": 14927 + }, + { + "epoch": 0.27287184455371344, + "grad_norm": 6.116784005544873, + "learning_rate": 8.531469616990776e-06, + "loss": 17.3566, + "step": 14928 + }, + { + "epoch": 0.2728901237501599, + "grad_norm": 5.5221505721350175, + "learning_rate": 8.531260058697326e-06, + "loss": 17.4277, + "step": 14929 + }, + { + "epoch": 0.27290840294660645, + "grad_norm": 7.0237127520366425, + "learning_rate": 8.531050488027115e-06, + "loss": 17.7201, + "step": 14930 + }, + { + "epoch": 0.272926682143053, + "grad_norm": 9.834204976154917, + "learning_rate": 8.530840904980878e-06, + "loss": 18.8974, + "step": 14931 + }, + { + "epoch": 0.2729449613394995, + "grad_norm": 7.53462570626672, + "learning_rate": 8.53063130955935e-06, + "loss": 17.8331, + "step": 14932 + }, + { + "epoch": 0.27296324053594606, + "grad_norm": 10.658949058256267, + "learning_rate": 8.530421701763268e-06, + "loss": 17.5252, + "step": 14933 + }, + { + "epoch": 0.27298151973239254, + "grad_norm": 6.624393401300358, + "learning_rate": 8.530212081593362e-06, + "loss": 17.5098, + "step": 14934 + }, + { + "epoch": 0.2729997989288391, + "grad_norm": 7.940640834054324, + "learning_rate": 8.53000244905037e-06, + "loss": 18.0564, + "step": 14935 + }, + { + "epoch": 0.2730180781252856, + "grad_norm": 6.292335902678796, + "learning_rate": 8.529792804135025e-06, + "loss": 17.5755, + "step": 14936 + }, + { + "epoch": 0.27303635732173215, + "grad_norm": 7.781507536910704, + "learning_rate": 8.529583146848063e-06, + "loss": 17.5398, + "step": 14937 + }, + { + "epoch": 0.2730546365181787, + "grad_norm": 6.929292405792139, + "learning_rate": 8.529373477190218e-06, + "loss": 17.5629, + "step": 14938 + }, + { + "epoch": 0.27307291571462516, + "grad_norm": 5.940024667463652, + "learning_rate": 8.529163795162225e-06, + "loss": 17.3973, + "step": 14939 + }, + { + "epoch": 0.2730911949110717, + "grad_norm": 6.528807380553815, + "learning_rate": 8.528954100764822e-06, + "loss": 17.619, + "step": 14940 + }, + { + "epoch": 0.27310947410751824, + "grad_norm": 8.385382036274383, + "learning_rate": 8.528744393998736e-06, + "loss": 18.0734, + "step": 14941 + }, + { + "epoch": 0.27312775330396477, + "grad_norm": 6.400757498934926, + "learning_rate": 8.52853467486471e-06, + "loss": 17.0158, + "step": 14942 + }, + { + "epoch": 0.2731460325004113, + "grad_norm": 7.138467067392454, + "learning_rate": 8.528324943363477e-06, + "loss": 17.5021, + "step": 14943 + }, + { + "epoch": 0.2731643116968578, + "grad_norm": 5.918351364852236, + "learning_rate": 8.52811519949577e-06, + "loss": 17.325, + "step": 14944 + }, + { + "epoch": 0.2731825908933043, + "grad_norm": 6.330543141395617, + "learning_rate": 8.527905443262325e-06, + "loss": 17.5151, + "step": 14945 + }, + { + "epoch": 0.27320087008975086, + "grad_norm": 6.996704629960327, + "learning_rate": 8.527695674663878e-06, + "loss": 17.7408, + "step": 14946 + }, + { + "epoch": 0.2732191492861974, + "grad_norm": 8.191466999963437, + "learning_rate": 8.527485893701166e-06, + "loss": 17.0555, + "step": 14947 + }, + { + "epoch": 0.27323742848264393, + "grad_norm": 5.806599569310809, + "learning_rate": 8.527276100374919e-06, + "loss": 17.3758, + "step": 14948 + }, + { + "epoch": 0.2732557076790904, + "grad_norm": 7.703322244771542, + "learning_rate": 8.527066294685878e-06, + "loss": 18.212, + "step": 14949 + }, + { + "epoch": 0.27327398687553695, + "grad_norm": 20.52918467256652, + "learning_rate": 8.526856476634773e-06, + "loss": 18.5831, + "step": 14950 + }, + { + "epoch": 0.2732922660719835, + "grad_norm": 6.8727490222692795, + "learning_rate": 8.526646646222343e-06, + "loss": 17.6496, + "step": 14951 + }, + { + "epoch": 0.27331054526843, + "grad_norm": 7.175407514213813, + "learning_rate": 8.526436803449323e-06, + "loss": 17.4754, + "step": 14952 + }, + { + "epoch": 0.2733288244648765, + "grad_norm": 6.653790660641926, + "learning_rate": 8.526226948316447e-06, + "loss": 17.6719, + "step": 14953 + }, + { + "epoch": 0.27334710366132303, + "grad_norm": 5.722892147579569, + "learning_rate": 8.526017080824452e-06, + "loss": 17.2459, + "step": 14954 + }, + { + "epoch": 0.27336538285776957, + "grad_norm": 6.279245110003837, + "learning_rate": 8.525807200974074e-06, + "loss": 17.3389, + "step": 14955 + }, + { + "epoch": 0.2733836620542161, + "grad_norm": 7.746941569880032, + "learning_rate": 8.525597308766047e-06, + "loss": 17.9757, + "step": 14956 + }, + { + "epoch": 0.27340194125066264, + "grad_norm": 5.989310738332474, + "learning_rate": 8.525387404201108e-06, + "loss": 17.2385, + "step": 14957 + }, + { + "epoch": 0.2734202204471091, + "grad_norm": 6.196033537084806, + "learning_rate": 8.525177487279992e-06, + "loss": 17.2249, + "step": 14958 + }, + { + "epoch": 0.27343849964355565, + "grad_norm": 7.11327945725399, + "learning_rate": 8.524967558003434e-06, + "loss": 18.0703, + "step": 14959 + }, + { + "epoch": 0.2734567788400022, + "grad_norm": 8.4964544098655, + "learning_rate": 8.52475761637217e-06, + "loss": 17.5121, + "step": 14960 + }, + { + "epoch": 0.2734750580364487, + "grad_norm": 6.516779618103367, + "learning_rate": 8.524547662386937e-06, + "loss": 17.4908, + "step": 14961 + }, + { + "epoch": 0.27349333723289526, + "grad_norm": 9.044453293633982, + "learning_rate": 8.52433769604847e-06, + "loss": 18.6725, + "step": 14962 + }, + { + "epoch": 0.27351161642934174, + "grad_norm": 6.709590072747863, + "learning_rate": 8.524127717357506e-06, + "loss": 17.2904, + "step": 14963 + }, + { + "epoch": 0.2735298956257883, + "grad_norm": 6.880374789406854, + "learning_rate": 8.52391772631478e-06, + "loss": 17.7179, + "step": 14964 + }, + { + "epoch": 0.2735481748222348, + "grad_norm": 7.765052854218366, + "learning_rate": 8.52370772292103e-06, + "loss": 17.9241, + "step": 14965 + }, + { + "epoch": 0.27356645401868135, + "grad_norm": 7.258938768054528, + "learning_rate": 8.523497707176987e-06, + "loss": 17.9309, + "step": 14966 + }, + { + "epoch": 0.2735847332151279, + "grad_norm": 7.668267933278983, + "learning_rate": 8.523287679083393e-06, + "loss": 18.2377, + "step": 14967 + }, + { + "epoch": 0.27360301241157436, + "grad_norm": 6.0395295066238885, + "learning_rate": 8.52307763864098e-06, + "loss": 17.3118, + "step": 14968 + }, + { + "epoch": 0.2736212916080209, + "grad_norm": 6.61967279349748, + "learning_rate": 8.522867585850484e-06, + "loss": 17.5734, + "step": 14969 + }, + { + "epoch": 0.27363957080446744, + "grad_norm": 9.006013486169277, + "learning_rate": 8.522657520712645e-06, + "loss": 18.049, + "step": 14970 + }, + { + "epoch": 0.27365785000091397, + "grad_norm": 6.182940380764518, + "learning_rate": 8.522447443228196e-06, + "loss": 17.3366, + "step": 14971 + }, + { + "epoch": 0.2736761291973605, + "grad_norm": 7.791436530074532, + "learning_rate": 8.522237353397876e-06, + "loss": 18.1704, + "step": 14972 + }, + { + "epoch": 0.273694408393807, + "grad_norm": 9.39990182708251, + "learning_rate": 8.522027251222418e-06, + "loss": 18.3011, + "step": 14973 + }, + { + "epoch": 0.2737126875902535, + "grad_norm": 7.578812576891775, + "learning_rate": 8.521817136702561e-06, + "loss": 18.2556, + "step": 14974 + }, + { + "epoch": 0.27373096678670006, + "grad_norm": 5.352122012319598, + "learning_rate": 8.52160700983904e-06, + "loss": 17.3055, + "step": 14975 + }, + { + "epoch": 0.2737492459831466, + "grad_norm": 7.664265293018091, + "learning_rate": 8.521396870632593e-06, + "loss": 18.1554, + "step": 14976 + }, + { + "epoch": 0.27376752517959313, + "grad_norm": 6.232458080284897, + "learning_rate": 8.521186719083954e-06, + "loss": 17.4724, + "step": 14977 + }, + { + "epoch": 0.2737858043760396, + "grad_norm": 6.451326554195879, + "learning_rate": 8.520976555193862e-06, + "loss": 17.4553, + "step": 14978 + }, + { + "epoch": 0.27380408357248615, + "grad_norm": 7.112308104893728, + "learning_rate": 8.520766378963054e-06, + "loss": 17.8498, + "step": 14979 + }, + { + "epoch": 0.2738223627689327, + "grad_norm": 6.854962262606233, + "learning_rate": 8.520556190392263e-06, + "loss": 17.6454, + "step": 14980 + }, + { + "epoch": 0.2738406419653792, + "grad_norm": 6.434217395773612, + "learning_rate": 8.52034598948223e-06, + "loss": 17.3864, + "step": 14981 + }, + { + "epoch": 0.27385892116182575, + "grad_norm": 7.466742836596856, + "learning_rate": 8.520135776233689e-06, + "loss": 18.077, + "step": 14982 + }, + { + "epoch": 0.27387720035827223, + "grad_norm": 8.569752531016313, + "learning_rate": 8.519925550647377e-06, + "loss": 17.762, + "step": 14983 + }, + { + "epoch": 0.27389547955471877, + "grad_norm": 6.082117234849648, + "learning_rate": 8.519715312724032e-06, + "loss": 17.2923, + "step": 14984 + }, + { + "epoch": 0.2739137587511653, + "grad_norm": 6.103727004459452, + "learning_rate": 8.519505062464391e-06, + "loss": 17.1734, + "step": 14985 + }, + { + "epoch": 0.27393203794761184, + "grad_norm": 6.199682998443549, + "learning_rate": 8.519294799869189e-06, + "loss": 17.4157, + "step": 14986 + }, + { + "epoch": 0.2739503171440583, + "grad_norm": 9.39093050723225, + "learning_rate": 8.519084524939163e-06, + "loss": 18.3416, + "step": 14987 + }, + { + "epoch": 0.27396859634050486, + "grad_norm": 7.2478387446581545, + "learning_rate": 8.518874237675053e-06, + "loss": 18.1011, + "step": 14988 + }, + { + "epoch": 0.2739868755369514, + "grad_norm": 6.429645863607817, + "learning_rate": 8.518663938077594e-06, + "loss": 17.7044, + "step": 14989 + }, + { + "epoch": 0.2740051547333979, + "grad_norm": 7.608277744380132, + "learning_rate": 8.518453626147522e-06, + "loss": 17.5516, + "step": 14990 + }, + { + "epoch": 0.27402343392984446, + "grad_norm": 7.515836219198456, + "learning_rate": 8.518243301885577e-06, + "loss": 18.1083, + "step": 14991 + }, + { + "epoch": 0.27404171312629094, + "grad_norm": 7.008733753396864, + "learning_rate": 8.518032965292494e-06, + "loss": 17.7349, + "step": 14992 + }, + { + "epoch": 0.2740599923227375, + "grad_norm": 6.56982189128168, + "learning_rate": 8.51782261636901e-06, + "loss": 17.6305, + "step": 14993 + }, + { + "epoch": 0.274078271519184, + "grad_norm": 9.070740479244042, + "learning_rate": 8.517612255115864e-06, + "loss": 17.8937, + "step": 14994 + }, + { + "epoch": 0.27409655071563055, + "grad_norm": 7.192224862176739, + "learning_rate": 8.51740188153379e-06, + "loss": 18.0503, + "step": 14995 + }, + { + "epoch": 0.2741148299120771, + "grad_norm": 6.73445119174964, + "learning_rate": 8.517191495623532e-06, + "loss": 17.3977, + "step": 14996 + }, + { + "epoch": 0.27413310910852356, + "grad_norm": 7.535594093343021, + "learning_rate": 8.516981097385819e-06, + "loss": 17.8374, + "step": 14997 + }, + { + "epoch": 0.2741513883049701, + "grad_norm": 8.484477246434745, + "learning_rate": 8.516770686821394e-06, + "loss": 17.9086, + "step": 14998 + }, + { + "epoch": 0.27416966750141664, + "grad_norm": 6.7194537965221395, + "learning_rate": 8.516560263930994e-06, + "loss": 17.6165, + "step": 14999 + }, + { + "epoch": 0.27418794669786317, + "grad_norm": 6.20939998983172, + "learning_rate": 8.516349828715354e-06, + "loss": 17.3942, + "step": 15000 + }, + { + "epoch": 0.2742062258943097, + "grad_norm": 6.63079232764498, + "learning_rate": 8.516139381175212e-06, + "loss": 17.3441, + "step": 15001 + }, + { + "epoch": 0.2742245050907562, + "grad_norm": 6.460953619207861, + "learning_rate": 8.515928921311309e-06, + "loss": 17.4968, + "step": 15002 + }, + { + "epoch": 0.2742427842872027, + "grad_norm": 7.039951232819122, + "learning_rate": 8.515718449124378e-06, + "loss": 17.429, + "step": 15003 + }, + { + "epoch": 0.27426106348364926, + "grad_norm": 6.60295901253017, + "learning_rate": 8.51550796461516e-06, + "loss": 17.5674, + "step": 15004 + }, + { + "epoch": 0.2742793426800958, + "grad_norm": 6.809811201680587, + "learning_rate": 8.515297467784392e-06, + "loss": 17.6928, + "step": 15005 + }, + { + "epoch": 0.27429762187654233, + "grad_norm": 5.919394271802698, + "learning_rate": 8.515086958632812e-06, + "loss": 17.132, + "step": 15006 + }, + { + "epoch": 0.2743159010729888, + "grad_norm": 9.171031124987774, + "learning_rate": 8.514876437161154e-06, + "loss": 18.0037, + "step": 15007 + }, + { + "epoch": 0.27433418026943535, + "grad_norm": 6.5707004573525785, + "learning_rate": 8.514665903370163e-06, + "loss": 17.612, + "step": 15008 + }, + { + "epoch": 0.2743524594658819, + "grad_norm": 7.942019601890254, + "learning_rate": 8.514455357260572e-06, + "loss": 18.1443, + "step": 15009 + }, + { + "epoch": 0.2743707386623284, + "grad_norm": 7.361567440445024, + "learning_rate": 8.514244798833118e-06, + "loss": 17.7264, + "step": 15010 + }, + { + "epoch": 0.27438901785877495, + "grad_norm": 7.443035909101035, + "learning_rate": 8.514034228088542e-06, + "loss": 17.4236, + "step": 15011 + }, + { + "epoch": 0.27440729705522143, + "grad_norm": 6.680721693866458, + "learning_rate": 8.513823645027581e-06, + "loss": 17.6669, + "step": 15012 + }, + { + "epoch": 0.27442557625166797, + "grad_norm": 7.444716603510005, + "learning_rate": 8.513613049650972e-06, + "loss": 17.6669, + "step": 15013 + }, + { + "epoch": 0.2744438554481145, + "grad_norm": 5.649032063460323, + "learning_rate": 8.513402441959457e-06, + "loss": 17.1612, + "step": 15014 + }, + { + "epoch": 0.27446213464456104, + "grad_norm": 7.1527954607750495, + "learning_rate": 8.513191821953771e-06, + "loss": 17.6386, + "step": 15015 + }, + { + "epoch": 0.2744804138410076, + "grad_norm": 5.197978034091865, + "learning_rate": 8.51298118963465e-06, + "loss": 16.9496, + "step": 15016 + }, + { + "epoch": 0.27449869303745406, + "grad_norm": 5.957658222387719, + "learning_rate": 8.512770545002835e-06, + "loss": 17.0909, + "step": 15017 + }, + { + "epoch": 0.2745169722339006, + "grad_norm": 5.579661912686926, + "learning_rate": 8.512559888059066e-06, + "loss": 17.0729, + "step": 15018 + }, + { + "epoch": 0.2745352514303471, + "grad_norm": 6.901410980448181, + "learning_rate": 8.51234921880408e-06, + "loss": 17.6866, + "step": 15019 + }, + { + "epoch": 0.27455353062679366, + "grad_norm": 7.105586637830406, + "learning_rate": 8.51213853723861e-06, + "loss": 17.481, + "step": 15020 + }, + { + "epoch": 0.27457180982324014, + "grad_norm": 6.268941979747861, + "learning_rate": 8.511927843363403e-06, + "loss": 17.5731, + "step": 15021 + }, + { + "epoch": 0.2745900890196867, + "grad_norm": 6.8508749311522745, + "learning_rate": 8.511717137179193e-06, + "loss": 17.7081, + "step": 15022 + }, + { + "epoch": 0.2746083682161332, + "grad_norm": 7.883185569147722, + "learning_rate": 8.51150641868672e-06, + "loss": 17.7303, + "step": 15023 + }, + { + "epoch": 0.27462664741257975, + "grad_norm": 7.524898587945424, + "learning_rate": 8.511295687886721e-06, + "loss": 17.9761, + "step": 15024 + }, + { + "epoch": 0.2746449266090263, + "grad_norm": 5.85153730137204, + "learning_rate": 8.511084944779935e-06, + "loss": 17.2036, + "step": 15025 + }, + { + "epoch": 0.27466320580547277, + "grad_norm": 6.219150490546941, + "learning_rate": 8.510874189367103e-06, + "loss": 17.5252, + "step": 15026 + }, + { + "epoch": 0.2746814850019193, + "grad_norm": 5.920963708816412, + "learning_rate": 8.51066342164896e-06, + "loss": 17.2042, + "step": 15027 + }, + { + "epoch": 0.27469976419836584, + "grad_norm": 7.768715350254114, + "learning_rate": 8.510452641626244e-06, + "loss": 17.8859, + "step": 15028 + }, + { + "epoch": 0.2747180433948124, + "grad_norm": 6.978706382257655, + "learning_rate": 8.510241849299698e-06, + "loss": 17.3981, + "step": 15029 + }, + { + "epoch": 0.2747363225912589, + "grad_norm": 6.927354688887749, + "learning_rate": 8.51003104467006e-06, + "loss": 17.3634, + "step": 15030 + }, + { + "epoch": 0.2747546017877054, + "grad_norm": 6.215084155147541, + "learning_rate": 8.509820227738068e-06, + "loss": 17.346, + "step": 15031 + }, + { + "epoch": 0.2747728809841519, + "grad_norm": 7.464315358997942, + "learning_rate": 8.50960939850446e-06, + "loss": 17.6887, + "step": 15032 + }, + { + "epoch": 0.27479116018059846, + "grad_norm": 8.080319942077573, + "learning_rate": 8.509398556969975e-06, + "loss": 18.0081, + "step": 15033 + }, + { + "epoch": 0.274809439377045, + "grad_norm": 6.1164665928270185, + "learning_rate": 8.509187703135352e-06, + "loss": 17.149, + "step": 15034 + }, + { + "epoch": 0.27482771857349153, + "grad_norm": 7.341736505958095, + "learning_rate": 8.508976837001331e-06, + "loss": 17.5834, + "step": 15035 + }, + { + "epoch": 0.274845997769938, + "grad_norm": 5.674655937620607, + "learning_rate": 8.508765958568651e-06, + "loss": 17.2539, + "step": 15036 + }, + { + "epoch": 0.27486427696638455, + "grad_norm": 6.70835978578212, + "learning_rate": 8.508555067838051e-06, + "loss": 17.6066, + "step": 15037 + }, + { + "epoch": 0.2748825561628311, + "grad_norm": 9.383413690301147, + "learning_rate": 8.50834416481027e-06, + "loss": 18.3229, + "step": 15038 + }, + { + "epoch": 0.2749008353592776, + "grad_norm": 6.253300911206716, + "learning_rate": 8.508133249486048e-06, + "loss": 17.358, + "step": 15039 + }, + { + "epoch": 0.27491911455572415, + "grad_norm": 6.783213990413725, + "learning_rate": 8.507922321866122e-06, + "loss": 17.7181, + "step": 15040 + }, + { + "epoch": 0.27493739375217063, + "grad_norm": 6.3578078618944875, + "learning_rate": 8.507711381951234e-06, + "loss": 17.2029, + "step": 15041 + }, + { + "epoch": 0.27495567294861717, + "grad_norm": 6.230551192999519, + "learning_rate": 8.50750042974212e-06, + "loss": 17.4552, + "step": 15042 + }, + { + "epoch": 0.2749739521450637, + "grad_norm": 7.4337802008058755, + "learning_rate": 8.507289465239523e-06, + "loss": 17.7231, + "step": 15043 + }, + { + "epoch": 0.27499223134151024, + "grad_norm": 6.350096799832979, + "learning_rate": 8.50707848844418e-06, + "loss": 17.2701, + "step": 15044 + }, + { + "epoch": 0.2750105105379568, + "grad_norm": 6.071829725294881, + "learning_rate": 8.506867499356832e-06, + "loss": 17.0834, + "step": 15045 + }, + { + "epoch": 0.27502878973440326, + "grad_norm": 8.824764724261522, + "learning_rate": 8.506656497978216e-06, + "loss": 18.5188, + "step": 15046 + }, + { + "epoch": 0.2750470689308498, + "grad_norm": 6.326763097824839, + "learning_rate": 8.506445484309075e-06, + "loss": 17.5691, + "step": 15047 + }, + { + "epoch": 0.2750653481272963, + "grad_norm": 6.299871779090824, + "learning_rate": 8.506234458350146e-06, + "loss": 17.5947, + "step": 15048 + }, + { + "epoch": 0.27508362732374286, + "grad_norm": 6.784974901995447, + "learning_rate": 8.50602342010217e-06, + "loss": 17.2225, + "step": 15049 + }, + { + "epoch": 0.2751019065201894, + "grad_norm": 5.913411381000654, + "learning_rate": 8.505812369565886e-06, + "loss": 17.0461, + "step": 15050 + }, + { + "epoch": 0.2751201857166359, + "grad_norm": 9.54384824095882, + "learning_rate": 8.505601306742035e-06, + "loss": 17.6065, + "step": 15051 + }, + { + "epoch": 0.2751384649130824, + "grad_norm": 8.708857677923003, + "learning_rate": 8.505390231631354e-06, + "loss": 18.7173, + "step": 15052 + }, + { + "epoch": 0.27515674410952895, + "grad_norm": 6.889078884155962, + "learning_rate": 8.505179144234583e-06, + "loss": 17.5667, + "step": 15053 + }, + { + "epoch": 0.2751750233059755, + "grad_norm": 6.497347007870209, + "learning_rate": 8.504968044552466e-06, + "loss": 17.6546, + "step": 15054 + }, + { + "epoch": 0.27519330250242197, + "grad_norm": 6.421667153551877, + "learning_rate": 8.50475693258574e-06, + "loss": 17.5946, + "step": 15055 + }, + { + "epoch": 0.2752115816988685, + "grad_norm": 5.667448860596541, + "learning_rate": 8.504545808335144e-06, + "loss": 16.8418, + "step": 15056 + }, + { + "epoch": 0.27522986089531504, + "grad_norm": 6.312400458371548, + "learning_rate": 8.50433467180142e-06, + "loss": 17.1851, + "step": 15057 + }, + { + "epoch": 0.2752481400917616, + "grad_norm": 8.135814509613942, + "learning_rate": 8.504123522985306e-06, + "loss": 18.0047, + "step": 15058 + }, + { + "epoch": 0.2752664192882081, + "grad_norm": 6.2714951204111165, + "learning_rate": 8.503912361887543e-06, + "loss": 17.4653, + "step": 15059 + }, + { + "epoch": 0.2752846984846546, + "grad_norm": 6.249592788376225, + "learning_rate": 8.503701188508872e-06, + "loss": 17.3866, + "step": 15060 + }, + { + "epoch": 0.2753029776811011, + "grad_norm": 9.173638770203379, + "learning_rate": 8.503490002850032e-06, + "loss": 18.9345, + "step": 15061 + }, + { + "epoch": 0.27532125687754766, + "grad_norm": 6.725246630707651, + "learning_rate": 8.503278804911763e-06, + "loss": 17.5321, + "step": 15062 + }, + { + "epoch": 0.2753395360739942, + "grad_norm": 6.304788185687925, + "learning_rate": 8.503067594694807e-06, + "loss": 17.5342, + "step": 15063 + }, + { + "epoch": 0.27535781527044073, + "grad_norm": 6.899815264393031, + "learning_rate": 8.502856372199903e-06, + "loss": 17.7755, + "step": 15064 + }, + { + "epoch": 0.2753760944668872, + "grad_norm": 7.4373658126487525, + "learning_rate": 8.502645137427793e-06, + "loss": 18.2004, + "step": 15065 + }, + { + "epoch": 0.27539437366333375, + "grad_norm": 6.685823341875556, + "learning_rate": 8.502433890379212e-06, + "loss": 17.7411, + "step": 15066 + }, + { + "epoch": 0.2754126528597803, + "grad_norm": 6.258873398752116, + "learning_rate": 8.502222631054906e-06, + "loss": 17.3507, + "step": 15067 + }, + { + "epoch": 0.2754309320562268, + "grad_norm": 6.580744993073601, + "learning_rate": 8.502011359455613e-06, + "loss": 17.6622, + "step": 15068 + }, + { + "epoch": 0.27544921125267335, + "grad_norm": 5.382922333129405, + "learning_rate": 8.501800075582073e-06, + "loss": 16.902, + "step": 15069 + }, + { + "epoch": 0.27546749044911983, + "grad_norm": 6.387901150632027, + "learning_rate": 8.501588779435031e-06, + "loss": 17.3893, + "step": 15070 + }, + { + "epoch": 0.27548576964556637, + "grad_norm": 6.282565559703935, + "learning_rate": 8.501377471015222e-06, + "loss": 17.4775, + "step": 15071 + }, + { + "epoch": 0.2755040488420129, + "grad_norm": 8.347736709520138, + "learning_rate": 8.501166150323389e-06, + "loss": 18.3767, + "step": 15072 + }, + { + "epoch": 0.27552232803845944, + "grad_norm": 6.133801029727497, + "learning_rate": 8.500954817360271e-06, + "loss": 17.4881, + "step": 15073 + }, + { + "epoch": 0.275540607234906, + "grad_norm": 6.7079679931503025, + "learning_rate": 8.500743472126613e-06, + "loss": 17.2911, + "step": 15074 + }, + { + "epoch": 0.27555888643135246, + "grad_norm": 6.837401980531877, + "learning_rate": 8.500532114623152e-06, + "loss": 17.5818, + "step": 15075 + }, + { + "epoch": 0.275577165627799, + "grad_norm": 7.802182273194156, + "learning_rate": 8.50032074485063e-06, + "loss": 17.8849, + "step": 15076 + }, + { + "epoch": 0.27559544482424553, + "grad_norm": 6.60104311402147, + "learning_rate": 8.500109362809785e-06, + "loss": 17.4672, + "step": 15077 + }, + { + "epoch": 0.27561372402069206, + "grad_norm": 7.163784549768292, + "learning_rate": 8.499897968501363e-06, + "loss": 17.4801, + "step": 15078 + }, + { + "epoch": 0.2756320032171386, + "grad_norm": 5.791770037994613, + "learning_rate": 8.499686561926102e-06, + "loss": 17.0722, + "step": 15079 + }, + { + "epoch": 0.2756502824135851, + "grad_norm": 8.259606438480516, + "learning_rate": 8.499475143084743e-06, + "loss": 18.5193, + "step": 15080 + }, + { + "epoch": 0.2756685616100316, + "grad_norm": 9.952855206016688, + "learning_rate": 8.499263711978027e-06, + "loss": 18.9349, + "step": 15081 + }, + { + "epoch": 0.27568684080647815, + "grad_norm": 7.221818700686994, + "learning_rate": 8.499052268606696e-06, + "loss": 18.1122, + "step": 15082 + }, + { + "epoch": 0.2757051200029247, + "grad_norm": 5.80364490686129, + "learning_rate": 8.49884081297149e-06, + "loss": 17.311, + "step": 15083 + }, + { + "epoch": 0.2757233991993712, + "grad_norm": 6.391870139942319, + "learning_rate": 8.49862934507315e-06, + "loss": 17.7093, + "step": 15084 + }, + { + "epoch": 0.2757416783958177, + "grad_norm": 11.325604702446954, + "learning_rate": 8.498417864912418e-06, + "loss": 18.6128, + "step": 15085 + }, + { + "epoch": 0.27575995759226424, + "grad_norm": 6.693411345117484, + "learning_rate": 8.498206372490036e-06, + "loss": 17.5917, + "step": 15086 + }, + { + "epoch": 0.2757782367887108, + "grad_norm": 6.856529657181099, + "learning_rate": 8.497994867806743e-06, + "loss": 17.7577, + "step": 15087 + }, + { + "epoch": 0.2757965159851573, + "grad_norm": 6.169603565284105, + "learning_rate": 8.497783350863282e-06, + "loss": 17.3158, + "step": 15088 + }, + { + "epoch": 0.2758147951816038, + "grad_norm": 6.683157507006591, + "learning_rate": 8.497571821660393e-06, + "loss": 17.6057, + "step": 15089 + }, + { + "epoch": 0.2758330743780503, + "grad_norm": 6.776299964282804, + "learning_rate": 8.497360280198819e-06, + "loss": 17.5894, + "step": 15090 + }, + { + "epoch": 0.27585135357449686, + "grad_norm": 6.663756349873055, + "learning_rate": 8.497148726479302e-06, + "loss": 17.6064, + "step": 15091 + }, + { + "epoch": 0.2758696327709434, + "grad_norm": 5.606218917028794, + "learning_rate": 8.49693716050258e-06, + "loss": 17.1346, + "step": 15092 + }, + { + "epoch": 0.27588791196738993, + "grad_norm": 6.761619243793991, + "learning_rate": 8.496725582269399e-06, + "loss": 17.8594, + "step": 15093 + }, + { + "epoch": 0.2759061911638364, + "grad_norm": 7.383838866864371, + "learning_rate": 8.496513991780496e-06, + "loss": 17.9355, + "step": 15094 + }, + { + "epoch": 0.27592447036028295, + "grad_norm": 5.878574225581679, + "learning_rate": 8.496302389036618e-06, + "loss": 17.3235, + "step": 15095 + }, + { + "epoch": 0.2759427495567295, + "grad_norm": 7.045753771894859, + "learning_rate": 8.4960907740385e-06, + "loss": 17.9473, + "step": 15096 + }, + { + "epoch": 0.275961028753176, + "grad_norm": 6.874159400949003, + "learning_rate": 8.495879146786888e-06, + "loss": 17.6823, + "step": 15097 + }, + { + "epoch": 0.27597930794962255, + "grad_norm": 5.931777682275133, + "learning_rate": 8.495667507282523e-06, + "loss": 17.2954, + "step": 15098 + }, + { + "epoch": 0.27599758714606903, + "grad_norm": 5.658249804303634, + "learning_rate": 8.495455855526149e-06, + "loss": 17.437, + "step": 15099 + }, + { + "epoch": 0.27601586634251557, + "grad_norm": 7.709657203321893, + "learning_rate": 8.495244191518503e-06, + "loss": 18.0923, + "step": 15100 + }, + { + "epoch": 0.2760341455389621, + "grad_norm": 6.419851040462697, + "learning_rate": 8.495032515260332e-06, + "loss": 17.3243, + "step": 15101 + }, + { + "epoch": 0.27605242473540864, + "grad_norm": 6.235144609086725, + "learning_rate": 8.494820826752373e-06, + "loss": 17.1859, + "step": 15102 + }, + { + "epoch": 0.2760707039318552, + "grad_norm": 6.224796415996176, + "learning_rate": 8.49460912599537e-06, + "loss": 17.5906, + "step": 15103 + }, + { + "epoch": 0.27608898312830166, + "grad_norm": 6.818567391082437, + "learning_rate": 8.494397412990064e-06, + "loss": 17.7974, + "step": 15104 + }, + { + "epoch": 0.2761072623247482, + "grad_norm": 6.345565752759843, + "learning_rate": 8.494185687737202e-06, + "loss": 17.3961, + "step": 15105 + }, + { + "epoch": 0.27612554152119473, + "grad_norm": 6.181317681537005, + "learning_rate": 8.49397395023752e-06, + "loss": 17.3983, + "step": 15106 + }, + { + "epoch": 0.27614382071764126, + "grad_norm": 5.922731987022923, + "learning_rate": 8.493762200491764e-06, + "loss": 17.3817, + "step": 15107 + }, + { + "epoch": 0.2761620999140878, + "grad_norm": 7.005057570083185, + "learning_rate": 8.493550438500674e-06, + "loss": 17.7266, + "step": 15108 + }, + { + "epoch": 0.2761803791105343, + "grad_norm": 6.056443598234483, + "learning_rate": 8.493338664264994e-06, + "loss": 17.4693, + "step": 15109 + }, + { + "epoch": 0.2761986583069808, + "grad_norm": 6.870748091461127, + "learning_rate": 8.493126877785462e-06, + "loss": 17.5765, + "step": 15110 + }, + { + "epoch": 0.27621693750342735, + "grad_norm": 6.255024517094979, + "learning_rate": 8.492915079062825e-06, + "loss": 17.3604, + "step": 15111 + }, + { + "epoch": 0.2762352166998739, + "grad_norm": 7.560081869785773, + "learning_rate": 8.492703268097826e-06, + "loss": 17.9092, + "step": 15112 + }, + { + "epoch": 0.2762534958963204, + "grad_norm": 6.943035687647867, + "learning_rate": 8.492491444891202e-06, + "loss": 17.582, + "step": 15113 + }, + { + "epoch": 0.2762717750927669, + "grad_norm": 6.237283884131271, + "learning_rate": 8.4922796094437e-06, + "loss": 17.3341, + "step": 15114 + }, + { + "epoch": 0.27629005428921344, + "grad_norm": 8.386729445077309, + "learning_rate": 8.49206776175606e-06, + "loss": 18.5546, + "step": 15115 + }, + { + "epoch": 0.27630833348566, + "grad_norm": 6.636602832320478, + "learning_rate": 8.491855901829028e-06, + "loss": 17.4801, + "step": 15116 + }, + { + "epoch": 0.2763266126821065, + "grad_norm": 5.739313425652385, + "learning_rate": 8.491644029663342e-06, + "loss": 17.1745, + "step": 15117 + }, + { + "epoch": 0.27634489187855305, + "grad_norm": 5.728039251304245, + "learning_rate": 8.491432145259746e-06, + "loss": 17.0786, + "step": 15118 + }, + { + "epoch": 0.2763631710749995, + "grad_norm": 6.301290758430111, + "learning_rate": 8.491220248618985e-06, + "loss": 17.5457, + "step": 15119 + }, + { + "epoch": 0.27638145027144606, + "grad_norm": 7.303871962582155, + "learning_rate": 8.4910083397418e-06, + "loss": 18.0031, + "step": 15120 + }, + { + "epoch": 0.2763997294678926, + "grad_norm": 6.767786507919163, + "learning_rate": 8.490796418628933e-06, + "loss": 17.4871, + "step": 15121 + }, + { + "epoch": 0.27641800866433913, + "grad_norm": 7.215094564305718, + "learning_rate": 8.490584485281126e-06, + "loss": 17.8481, + "step": 15122 + }, + { + "epoch": 0.2764362878607856, + "grad_norm": 6.125606787595317, + "learning_rate": 8.490372539699125e-06, + "loss": 17.2545, + "step": 15123 + }, + { + "epoch": 0.27645456705723215, + "grad_norm": 7.274970598348587, + "learning_rate": 8.49016058188367e-06, + "loss": 17.6086, + "step": 15124 + }, + { + "epoch": 0.2764728462536787, + "grad_norm": 7.0521901233773745, + "learning_rate": 8.489948611835507e-06, + "loss": 17.9983, + "step": 15125 + }, + { + "epoch": 0.2764911254501252, + "grad_norm": 7.406438818444638, + "learning_rate": 8.489736629555376e-06, + "loss": 17.8855, + "step": 15126 + }, + { + "epoch": 0.27650940464657175, + "grad_norm": 5.464736523236706, + "learning_rate": 8.48952463504402e-06, + "loss": 17.2808, + "step": 15127 + }, + { + "epoch": 0.27652768384301823, + "grad_norm": 6.7985981641731925, + "learning_rate": 8.489312628302184e-06, + "loss": 17.553, + "step": 15128 + }, + { + "epoch": 0.27654596303946477, + "grad_norm": 5.7880092429170755, + "learning_rate": 8.489100609330611e-06, + "loss": 17.5625, + "step": 15129 + }, + { + "epoch": 0.2765642422359113, + "grad_norm": 7.5237098939421445, + "learning_rate": 8.48888857813004e-06, + "loss": 17.8427, + "step": 15130 + }, + { + "epoch": 0.27658252143235784, + "grad_norm": 5.818770988021056, + "learning_rate": 8.488676534701222e-06, + "loss": 17.4699, + "step": 15131 + }, + { + "epoch": 0.2766008006288044, + "grad_norm": 7.122951813383191, + "learning_rate": 8.48846447904489e-06, + "loss": 17.8285, + "step": 15132 + }, + { + "epoch": 0.27661907982525086, + "grad_norm": 7.38715813034999, + "learning_rate": 8.488252411161797e-06, + "loss": 18.2292, + "step": 15133 + }, + { + "epoch": 0.2766373590216974, + "grad_norm": 6.675865547873947, + "learning_rate": 8.48804033105268e-06, + "loss": 17.6094, + "step": 15134 + }, + { + "epoch": 0.27665563821814393, + "grad_norm": 6.027580623893715, + "learning_rate": 8.487828238718286e-06, + "loss": 17.2813, + "step": 15135 + }, + { + "epoch": 0.27667391741459046, + "grad_norm": 7.023246440827114, + "learning_rate": 8.487616134159355e-06, + "loss": 17.7784, + "step": 15136 + }, + { + "epoch": 0.276692196611037, + "grad_norm": 6.092554966001453, + "learning_rate": 8.487404017376632e-06, + "loss": 17.2843, + "step": 15137 + }, + { + "epoch": 0.2767104758074835, + "grad_norm": 5.93347495303618, + "learning_rate": 8.48719188837086e-06, + "loss": 17.4982, + "step": 15138 + }, + { + "epoch": 0.27672875500393, + "grad_norm": 5.654297826826496, + "learning_rate": 8.486979747142785e-06, + "loss": 16.9761, + "step": 15139 + }, + { + "epoch": 0.27674703420037655, + "grad_norm": 6.0145806159862, + "learning_rate": 8.486767593693148e-06, + "loss": 17.1847, + "step": 15140 + }, + { + "epoch": 0.2767653133968231, + "grad_norm": 13.957909146183193, + "learning_rate": 8.486555428022692e-06, + "loss": 18.0061, + "step": 15141 + }, + { + "epoch": 0.2767835925932696, + "grad_norm": 7.1051146284803135, + "learning_rate": 8.486343250132163e-06, + "loss": 17.6647, + "step": 15142 + }, + { + "epoch": 0.2768018717897161, + "grad_norm": 6.318525136817179, + "learning_rate": 8.486131060022303e-06, + "loss": 17.2715, + "step": 15143 + }, + { + "epoch": 0.27682015098616264, + "grad_norm": 6.556186221346598, + "learning_rate": 8.485918857693855e-06, + "loss": 17.885, + "step": 15144 + }, + { + "epoch": 0.2768384301826092, + "grad_norm": 5.674162468492682, + "learning_rate": 8.485706643147567e-06, + "loss": 17.1208, + "step": 15145 + }, + { + "epoch": 0.2768567093790557, + "grad_norm": 7.981543699737078, + "learning_rate": 8.485494416384177e-06, + "loss": 17.711, + "step": 15146 + }, + { + "epoch": 0.27687498857550225, + "grad_norm": 5.898058900565279, + "learning_rate": 8.485282177404433e-06, + "loss": 17.3461, + "step": 15147 + }, + { + "epoch": 0.2768932677719487, + "grad_norm": 7.49345719971531, + "learning_rate": 8.485069926209076e-06, + "loss": 17.9474, + "step": 15148 + }, + { + "epoch": 0.27691154696839526, + "grad_norm": 6.075856025454034, + "learning_rate": 8.484857662798853e-06, + "loss": 17.3403, + "step": 15149 + }, + { + "epoch": 0.2769298261648418, + "grad_norm": 8.057692135962697, + "learning_rate": 8.484645387174505e-06, + "loss": 18.4278, + "step": 15150 + }, + { + "epoch": 0.27694810536128833, + "grad_norm": 5.72547415657414, + "learning_rate": 8.484433099336778e-06, + "loss": 17.1654, + "step": 15151 + }, + { + "epoch": 0.27696638455773487, + "grad_norm": 8.258010290672802, + "learning_rate": 8.484220799286414e-06, + "loss": 18.2429, + "step": 15152 + }, + { + "epoch": 0.27698466375418135, + "grad_norm": 6.320224459549836, + "learning_rate": 8.484008487024159e-06, + "loss": 17.6195, + "step": 15153 + }, + { + "epoch": 0.2770029429506279, + "grad_norm": 5.809548766519819, + "learning_rate": 8.483796162550756e-06, + "loss": 17.1871, + "step": 15154 + }, + { + "epoch": 0.2770212221470744, + "grad_norm": 6.292791614927964, + "learning_rate": 8.483583825866952e-06, + "loss": 17.5189, + "step": 15155 + }, + { + "epoch": 0.27703950134352096, + "grad_norm": 7.2897651927097975, + "learning_rate": 8.483371476973488e-06, + "loss": 17.9668, + "step": 15156 + }, + { + "epoch": 0.27705778053996744, + "grad_norm": 6.346951510267883, + "learning_rate": 8.483159115871109e-06, + "loss": 17.4059, + "step": 15157 + }, + { + "epoch": 0.27707605973641397, + "grad_norm": 8.069388792248683, + "learning_rate": 8.48294674256056e-06, + "loss": 18.2912, + "step": 15158 + }, + { + "epoch": 0.2770943389328605, + "grad_norm": 6.267336526982611, + "learning_rate": 8.482734357042584e-06, + "loss": 17.3161, + "step": 15159 + }, + { + "epoch": 0.27711261812930704, + "grad_norm": 6.705459898303039, + "learning_rate": 8.482521959317926e-06, + "loss": 17.3089, + "step": 15160 + }, + { + "epoch": 0.2771308973257536, + "grad_norm": 5.928807624867987, + "learning_rate": 8.482309549387333e-06, + "loss": 17.4545, + "step": 15161 + }, + { + "epoch": 0.27714917652220006, + "grad_norm": 7.814767906037155, + "learning_rate": 8.482097127251545e-06, + "loss": 17.5292, + "step": 15162 + }, + { + "epoch": 0.2771674557186466, + "grad_norm": 5.816355538136252, + "learning_rate": 8.481884692911308e-06, + "loss": 17.2492, + "step": 15163 + }, + { + "epoch": 0.27718573491509313, + "grad_norm": 7.033786910236613, + "learning_rate": 8.48167224636737e-06, + "loss": 18.1615, + "step": 15164 + }, + { + "epoch": 0.27720401411153966, + "grad_norm": 5.430195463635135, + "learning_rate": 8.481459787620472e-06, + "loss": 17.1092, + "step": 15165 + }, + { + "epoch": 0.2772222933079862, + "grad_norm": 6.123967341476146, + "learning_rate": 8.481247316671358e-06, + "loss": 17.4193, + "step": 15166 + }, + { + "epoch": 0.2772405725044327, + "grad_norm": 5.5224896668202765, + "learning_rate": 8.481034833520776e-06, + "loss": 17.1137, + "step": 15167 + }, + { + "epoch": 0.2772588517008792, + "grad_norm": 5.863051389843126, + "learning_rate": 8.480822338169468e-06, + "loss": 17.2692, + "step": 15168 + }, + { + "epoch": 0.27727713089732575, + "grad_norm": 6.441945504782519, + "learning_rate": 8.48060983061818e-06, + "loss": 17.6335, + "step": 15169 + }, + { + "epoch": 0.2772954100937723, + "grad_norm": 6.34402376146172, + "learning_rate": 8.480397310867657e-06, + "loss": 17.4571, + "step": 15170 + }, + { + "epoch": 0.2773136892902188, + "grad_norm": 8.353750088079025, + "learning_rate": 8.480184778918644e-06, + "loss": 18.0318, + "step": 15171 + }, + { + "epoch": 0.2773319684866653, + "grad_norm": 7.132669222601691, + "learning_rate": 8.479972234771883e-06, + "loss": 17.8458, + "step": 15172 + }, + { + "epoch": 0.27735024768311184, + "grad_norm": 6.210608983505865, + "learning_rate": 8.479759678428123e-06, + "loss": 17.5225, + "step": 15173 + }, + { + "epoch": 0.2773685268795584, + "grad_norm": 6.017708530134503, + "learning_rate": 8.479547109888108e-06, + "loss": 17.2827, + "step": 15174 + }, + { + "epoch": 0.2773868060760049, + "grad_norm": 7.086861207018966, + "learning_rate": 8.479334529152582e-06, + "loss": 17.6825, + "step": 15175 + }, + { + "epoch": 0.27740508527245145, + "grad_norm": 6.989417468633682, + "learning_rate": 8.479121936222288e-06, + "loss": 17.7719, + "step": 15176 + }, + { + "epoch": 0.2774233644688979, + "grad_norm": 6.034472674549152, + "learning_rate": 8.478909331097975e-06, + "loss": 17.4463, + "step": 15177 + }, + { + "epoch": 0.27744164366534446, + "grad_norm": 6.788649532752917, + "learning_rate": 8.478696713780388e-06, + "loss": 17.771, + "step": 15178 + }, + { + "epoch": 0.277459922861791, + "grad_norm": 6.179746451369153, + "learning_rate": 8.47848408427027e-06, + "loss": 17.2697, + "step": 15179 + }, + { + "epoch": 0.27747820205823753, + "grad_norm": 6.021264277558732, + "learning_rate": 8.478271442568366e-06, + "loss": 17.1685, + "step": 15180 + }, + { + "epoch": 0.27749648125468407, + "grad_norm": 6.211870963934204, + "learning_rate": 8.478058788675424e-06, + "loss": 17.2924, + "step": 15181 + }, + { + "epoch": 0.27751476045113055, + "grad_norm": 6.08344944737126, + "learning_rate": 8.477846122592188e-06, + "loss": 17.3834, + "step": 15182 + }, + { + "epoch": 0.2775330396475771, + "grad_norm": 7.322540912423537, + "learning_rate": 8.477633444319401e-06, + "loss": 17.6685, + "step": 15183 + }, + { + "epoch": 0.2775513188440236, + "grad_norm": 6.987847075776526, + "learning_rate": 8.477420753857813e-06, + "loss": 17.7501, + "step": 15184 + }, + { + "epoch": 0.27756959804047016, + "grad_norm": 7.145721719355926, + "learning_rate": 8.477208051208166e-06, + "loss": 17.8193, + "step": 15185 + }, + { + "epoch": 0.2775878772369167, + "grad_norm": 7.465490201658691, + "learning_rate": 8.476995336371207e-06, + "loss": 17.9038, + "step": 15186 + }, + { + "epoch": 0.27760615643336317, + "grad_norm": 6.450686128331409, + "learning_rate": 8.47678260934768e-06, + "loss": 17.7545, + "step": 15187 + }, + { + "epoch": 0.2776244356298097, + "grad_norm": 5.831032254584482, + "learning_rate": 8.476569870138332e-06, + "loss": 17.2206, + "step": 15188 + }, + { + "epoch": 0.27764271482625624, + "grad_norm": 6.9563310533709055, + "learning_rate": 8.476357118743909e-06, + "loss": 17.5481, + "step": 15189 + }, + { + "epoch": 0.2776609940227028, + "grad_norm": 7.393013819431112, + "learning_rate": 8.476144355165154e-06, + "loss": 17.7355, + "step": 15190 + }, + { + "epoch": 0.27767927321914926, + "grad_norm": 6.801743686131153, + "learning_rate": 8.475931579402816e-06, + "loss": 17.6852, + "step": 15191 + }, + { + "epoch": 0.2776975524155958, + "grad_norm": 6.118024024183996, + "learning_rate": 8.475718791457642e-06, + "loss": 17.3726, + "step": 15192 + }, + { + "epoch": 0.27771583161204233, + "grad_norm": 6.365383240770513, + "learning_rate": 8.47550599133037e-06, + "loss": 17.4786, + "step": 15193 + }, + { + "epoch": 0.27773411080848887, + "grad_norm": 7.625483733167843, + "learning_rate": 8.475293179021756e-06, + "loss": 18.2475, + "step": 15194 + }, + { + "epoch": 0.2777523900049354, + "grad_norm": 5.163126788299081, + "learning_rate": 8.475080354532538e-06, + "loss": 17.2556, + "step": 15195 + }, + { + "epoch": 0.2777706692013819, + "grad_norm": 6.736192556992801, + "learning_rate": 8.474867517863466e-06, + "loss": 17.5523, + "step": 15196 + }, + { + "epoch": 0.2777889483978284, + "grad_norm": 6.561879531359489, + "learning_rate": 8.474654669015285e-06, + "loss": 17.5083, + "step": 15197 + }, + { + "epoch": 0.27780722759427495, + "grad_norm": 6.758920266786231, + "learning_rate": 8.47444180798874e-06, + "loss": 17.5761, + "step": 15198 + }, + { + "epoch": 0.2778255067907215, + "grad_norm": 6.142855284877659, + "learning_rate": 8.474228934784579e-06, + "loss": 17.2216, + "step": 15199 + }, + { + "epoch": 0.277843785987168, + "grad_norm": 6.649292497145365, + "learning_rate": 8.474016049403547e-06, + "loss": 17.4655, + "step": 15200 + }, + { + "epoch": 0.2778620651836145, + "grad_norm": 5.50710130195196, + "learning_rate": 8.473803151846389e-06, + "loss": 17.1131, + "step": 15201 + }, + { + "epoch": 0.27788034438006104, + "grad_norm": 5.051523217599611, + "learning_rate": 8.473590242113854e-06, + "loss": 16.9992, + "step": 15202 + }, + { + "epoch": 0.2778986235765076, + "grad_norm": 7.419341418076164, + "learning_rate": 8.473377320206685e-06, + "loss": 18.1681, + "step": 15203 + }, + { + "epoch": 0.2779169027729541, + "grad_norm": 6.583411215133172, + "learning_rate": 8.47316438612563e-06, + "loss": 17.6993, + "step": 15204 + }, + { + "epoch": 0.27793518196940065, + "grad_norm": 10.621010132856448, + "learning_rate": 8.472951439871437e-06, + "loss": 17.979, + "step": 15205 + }, + { + "epoch": 0.2779534611658471, + "grad_norm": 6.9761154458200485, + "learning_rate": 8.472738481444849e-06, + "loss": 17.7062, + "step": 15206 + }, + { + "epoch": 0.27797174036229366, + "grad_norm": 8.09995108118209, + "learning_rate": 8.472525510846615e-06, + "loss": 18.1606, + "step": 15207 + }, + { + "epoch": 0.2779900195587402, + "grad_norm": 6.237726067150974, + "learning_rate": 8.472312528077478e-06, + "loss": 17.2962, + "step": 15208 + }, + { + "epoch": 0.27800829875518673, + "grad_norm": 8.064495181918465, + "learning_rate": 8.472099533138189e-06, + "loss": 18.4658, + "step": 15209 + }, + { + "epoch": 0.27802657795163327, + "grad_norm": 5.936813021744895, + "learning_rate": 8.471886526029494e-06, + "loss": 17.1052, + "step": 15210 + }, + { + "epoch": 0.27804485714807975, + "grad_norm": 6.116096489328429, + "learning_rate": 8.471673506752135e-06, + "loss": 17.3807, + "step": 15211 + }, + { + "epoch": 0.2780631363445263, + "grad_norm": 6.622652093645998, + "learning_rate": 8.471460475306862e-06, + "loss": 17.6376, + "step": 15212 + }, + { + "epoch": 0.2780814155409728, + "grad_norm": 6.51173487662506, + "learning_rate": 8.471247431694422e-06, + "loss": 17.6722, + "step": 15213 + }, + { + "epoch": 0.27809969473741936, + "grad_norm": 6.6349494648415925, + "learning_rate": 8.471034375915562e-06, + "loss": 17.2563, + "step": 15214 + }, + { + "epoch": 0.2781179739338659, + "grad_norm": 6.05963094836774, + "learning_rate": 8.470821307971026e-06, + "loss": 17.3188, + "step": 15215 + }, + { + "epoch": 0.27813625313031237, + "grad_norm": 7.724124385030072, + "learning_rate": 8.470608227861565e-06, + "loss": 17.6223, + "step": 15216 + }, + { + "epoch": 0.2781545323267589, + "grad_norm": 7.930717661901581, + "learning_rate": 8.47039513558792e-06, + "loss": 18.4473, + "step": 15217 + }, + { + "epoch": 0.27817281152320544, + "grad_norm": 6.870144256274125, + "learning_rate": 8.470182031150843e-06, + "loss": 17.8599, + "step": 15218 + }, + { + "epoch": 0.278191090719652, + "grad_norm": 6.339470043941988, + "learning_rate": 8.46996891455108e-06, + "loss": 17.6292, + "step": 15219 + }, + { + "epoch": 0.2782093699160985, + "grad_norm": 6.876491826935089, + "learning_rate": 8.469755785789375e-06, + "loss": 17.8334, + "step": 15220 + }, + { + "epoch": 0.278227649112545, + "grad_norm": 8.450256509774235, + "learning_rate": 8.469542644866478e-06, + "loss": 18.408, + "step": 15221 + }, + { + "epoch": 0.27824592830899153, + "grad_norm": 7.71338462243798, + "learning_rate": 8.469329491783136e-06, + "loss": 17.814, + "step": 15222 + }, + { + "epoch": 0.27826420750543807, + "grad_norm": 6.619253118741429, + "learning_rate": 8.469116326540094e-06, + "loss": 17.4123, + "step": 15223 + }, + { + "epoch": 0.2782824867018846, + "grad_norm": 8.064322766637254, + "learning_rate": 8.468903149138102e-06, + "loss": 18.4396, + "step": 15224 + }, + { + "epoch": 0.2783007658983311, + "grad_norm": 7.097520508723367, + "learning_rate": 8.468689959577902e-06, + "loss": 17.9868, + "step": 15225 + }, + { + "epoch": 0.2783190450947776, + "grad_norm": 6.357331624950789, + "learning_rate": 8.468476757860247e-06, + "loss": 17.1258, + "step": 15226 + }, + { + "epoch": 0.27833732429122415, + "grad_norm": 6.795499416133291, + "learning_rate": 8.468263543985882e-06, + "loss": 18.116, + "step": 15227 + }, + { + "epoch": 0.2783556034876707, + "grad_norm": 5.91248706771861, + "learning_rate": 8.468050317955554e-06, + "loss": 17.3861, + "step": 15228 + }, + { + "epoch": 0.2783738826841172, + "grad_norm": 6.875592199296723, + "learning_rate": 8.467837079770012e-06, + "loss": 17.7825, + "step": 15229 + }, + { + "epoch": 0.2783921618805637, + "grad_norm": 7.0886399327734475, + "learning_rate": 8.46762382943e-06, + "loss": 17.9897, + "step": 15230 + }, + { + "epoch": 0.27841044107701024, + "grad_norm": 6.236725079378205, + "learning_rate": 8.467410566936267e-06, + "loss": 17.7535, + "step": 15231 + }, + { + "epoch": 0.2784287202734568, + "grad_norm": 7.666071532080812, + "learning_rate": 8.467197292289562e-06, + "loss": 18.0279, + "step": 15232 + }, + { + "epoch": 0.2784469994699033, + "grad_norm": 6.587910759013121, + "learning_rate": 8.46698400549063e-06, + "loss": 17.2562, + "step": 15233 + }, + { + "epoch": 0.27846527866634985, + "grad_norm": 6.868167550343062, + "learning_rate": 8.466770706540222e-06, + "loss": 17.8229, + "step": 15234 + }, + { + "epoch": 0.2784835578627963, + "grad_norm": 5.841143692922496, + "learning_rate": 8.466557395439083e-06, + "loss": 17.2415, + "step": 15235 + }, + { + "epoch": 0.27850183705924286, + "grad_norm": 6.171709736836082, + "learning_rate": 8.46634407218796e-06, + "loss": 17.4398, + "step": 15236 + }, + { + "epoch": 0.2785201162556894, + "grad_norm": 8.425015007776755, + "learning_rate": 8.466130736787603e-06, + "loss": 18.005, + "step": 15237 + }, + { + "epoch": 0.27853839545213593, + "grad_norm": 6.6812126239316125, + "learning_rate": 8.465917389238757e-06, + "loss": 17.6213, + "step": 15238 + }, + { + "epoch": 0.27855667464858247, + "grad_norm": 5.946752238077004, + "learning_rate": 8.465704029542173e-06, + "loss": 17.321, + "step": 15239 + }, + { + "epoch": 0.27857495384502895, + "grad_norm": 6.8249810466527405, + "learning_rate": 8.465490657698596e-06, + "loss": 17.6091, + "step": 15240 + }, + { + "epoch": 0.2785932330414755, + "grad_norm": 6.257116990467253, + "learning_rate": 8.465277273708777e-06, + "loss": 17.5928, + "step": 15241 + }, + { + "epoch": 0.278611512237922, + "grad_norm": 8.377971703319272, + "learning_rate": 8.465063877573459e-06, + "loss": 18.2086, + "step": 15242 + }, + { + "epoch": 0.27862979143436856, + "grad_norm": 6.373933554991297, + "learning_rate": 8.464850469293396e-06, + "loss": 17.3858, + "step": 15243 + }, + { + "epoch": 0.2786480706308151, + "grad_norm": 6.426640808081779, + "learning_rate": 8.46463704886933e-06, + "loss": 17.4808, + "step": 15244 + }, + { + "epoch": 0.2786663498272616, + "grad_norm": 6.208908698906014, + "learning_rate": 8.464423616302012e-06, + "loss": 17.2587, + "step": 15245 + }, + { + "epoch": 0.2786846290237081, + "grad_norm": 6.426478401626894, + "learning_rate": 8.46421017159219e-06, + "loss": 17.3913, + "step": 15246 + }, + { + "epoch": 0.27870290822015464, + "grad_norm": 6.509789723463751, + "learning_rate": 8.463996714740614e-06, + "loss": 17.543, + "step": 15247 + }, + { + "epoch": 0.2787211874166012, + "grad_norm": 6.309781055249573, + "learning_rate": 8.463783245748028e-06, + "loss": 17.9775, + "step": 15248 + }, + { + "epoch": 0.2787394666130477, + "grad_norm": 7.551419698529556, + "learning_rate": 8.463569764615183e-06, + "loss": 18.1277, + "step": 15249 + }, + { + "epoch": 0.2787577458094942, + "grad_norm": 5.969849642908827, + "learning_rate": 8.463356271342826e-06, + "loss": 17.3494, + "step": 15250 + }, + { + "epoch": 0.27877602500594073, + "grad_norm": 6.701235421654121, + "learning_rate": 8.463142765931706e-06, + "loss": 17.9205, + "step": 15251 + }, + { + "epoch": 0.27879430420238727, + "grad_norm": 5.386231186161452, + "learning_rate": 8.462929248382572e-06, + "loss": 17.265, + "step": 15252 + }, + { + "epoch": 0.2788125833988338, + "grad_norm": 7.491369075230487, + "learning_rate": 8.46271571869617e-06, + "loss": 17.4947, + "step": 15253 + }, + { + "epoch": 0.27883086259528034, + "grad_norm": 6.546424260274679, + "learning_rate": 8.46250217687325e-06, + "loss": 17.5241, + "step": 15254 + }, + { + "epoch": 0.2788491417917268, + "grad_norm": 5.975339806240297, + "learning_rate": 8.462288622914563e-06, + "loss": 17.4023, + "step": 15255 + }, + { + "epoch": 0.27886742098817335, + "grad_norm": 5.741204871009067, + "learning_rate": 8.462075056820852e-06, + "loss": 17.3395, + "step": 15256 + }, + { + "epoch": 0.2788857001846199, + "grad_norm": 6.986842947076972, + "learning_rate": 8.46186147859287e-06, + "loss": 17.5549, + "step": 15257 + }, + { + "epoch": 0.2789039793810664, + "grad_norm": 8.520068982359538, + "learning_rate": 8.461647888231363e-06, + "loss": 18.7468, + "step": 15258 + }, + { + "epoch": 0.2789222585775129, + "grad_norm": 5.132084312559286, + "learning_rate": 8.46143428573708e-06, + "loss": 16.9836, + "step": 15259 + }, + { + "epoch": 0.27894053777395944, + "grad_norm": 6.018714877199563, + "learning_rate": 8.46122067111077e-06, + "loss": 17.1421, + "step": 15260 + }, + { + "epoch": 0.278958816970406, + "grad_norm": 8.834433967022733, + "learning_rate": 8.461007044353184e-06, + "loss": 18.7856, + "step": 15261 + }, + { + "epoch": 0.2789770961668525, + "grad_norm": 6.720650682562521, + "learning_rate": 8.460793405465066e-06, + "loss": 17.8257, + "step": 15262 + }, + { + "epoch": 0.27899537536329905, + "grad_norm": 5.181558983727919, + "learning_rate": 8.46057975444717e-06, + "loss": 16.9849, + "step": 15263 + }, + { + "epoch": 0.2790136545597455, + "grad_norm": 6.330977568142363, + "learning_rate": 8.460366091300241e-06, + "loss": 17.465, + "step": 15264 + }, + { + "epoch": 0.27903193375619206, + "grad_norm": 7.0797762558721935, + "learning_rate": 8.460152416025029e-06, + "loss": 17.7323, + "step": 15265 + }, + { + "epoch": 0.2790502129526386, + "grad_norm": 6.604603805911028, + "learning_rate": 8.459938728622284e-06, + "loss": 17.4136, + "step": 15266 + }, + { + "epoch": 0.27906849214908513, + "grad_norm": 5.953423256831014, + "learning_rate": 8.459725029092754e-06, + "loss": 17.2549, + "step": 15267 + }, + { + "epoch": 0.27908677134553167, + "grad_norm": 5.211822564212015, + "learning_rate": 8.459511317437185e-06, + "loss": 17.1655, + "step": 15268 + }, + { + "epoch": 0.27910505054197815, + "grad_norm": 7.650219297859471, + "learning_rate": 8.459297593656333e-06, + "loss": 18.0305, + "step": 15269 + }, + { + "epoch": 0.2791233297384247, + "grad_norm": 5.4568084959639025, + "learning_rate": 8.45908385775094e-06, + "loss": 17.2651, + "step": 15270 + }, + { + "epoch": 0.2791416089348712, + "grad_norm": 6.108865668184069, + "learning_rate": 8.45887010972176e-06, + "loss": 17.3542, + "step": 15271 + }, + { + "epoch": 0.27915988813131776, + "grad_norm": 7.042935463620705, + "learning_rate": 8.45865634956954e-06, + "loss": 17.7592, + "step": 15272 + }, + { + "epoch": 0.2791781673277643, + "grad_norm": 7.0895414363913165, + "learning_rate": 8.45844257729503e-06, + "loss": 17.6137, + "step": 15273 + }, + { + "epoch": 0.2791964465242108, + "grad_norm": 5.97027588161131, + "learning_rate": 8.45822879289898e-06, + "loss": 17.2089, + "step": 15274 + }, + { + "epoch": 0.2792147257206573, + "grad_norm": 6.328044998520751, + "learning_rate": 8.458014996382136e-06, + "loss": 17.2472, + "step": 15275 + }, + { + "epoch": 0.27923300491710384, + "grad_norm": 8.500126130847903, + "learning_rate": 8.457801187745249e-06, + "loss": 18.5316, + "step": 15276 + }, + { + "epoch": 0.2792512841135504, + "grad_norm": 5.9999936008840296, + "learning_rate": 8.45758736698907e-06, + "loss": 17.2843, + "step": 15277 + }, + { + "epoch": 0.2792695633099969, + "grad_norm": 5.768989858580754, + "learning_rate": 8.457373534114347e-06, + "loss": 17.23, + "step": 15278 + }, + { + "epoch": 0.2792878425064434, + "grad_norm": 5.676785352440452, + "learning_rate": 8.457159689121832e-06, + "loss": 17.2136, + "step": 15279 + }, + { + "epoch": 0.27930612170288993, + "grad_norm": 5.623070110265875, + "learning_rate": 8.45694583201227e-06, + "loss": 17.233, + "step": 15280 + }, + { + "epoch": 0.27932440089933647, + "grad_norm": 5.297347293676569, + "learning_rate": 8.456731962786413e-06, + "loss": 16.9945, + "step": 15281 + }, + { + "epoch": 0.279342680095783, + "grad_norm": 5.718687791115308, + "learning_rate": 8.456518081445012e-06, + "loss": 17.4141, + "step": 15282 + }, + { + "epoch": 0.27936095929222954, + "grad_norm": 7.002780859931119, + "learning_rate": 8.456304187988813e-06, + "loss": 17.5433, + "step": 15283 + }, + { + "epoch": 0.279379238488676, + "grad_norm": 4.166955275633923, + "learning_rate": 8.456090282418567e-06, + "loss": 16.6323, + "step": 15284 + }, + { + "epoch": 0.27939751768512255, + "grad_norm": 6.373873436434632, + "learning_rate": 8.455876364735029e-06, + "loss": 17.451, + "step": 15285 + }, + { + "epoch": 0.2794157968815691, + "grad_norm": 6.079561982362909, + "learning_rate": 8.45566243493894e-06, + "loss": 17.389, + "step": 15286 + }, + { + "epoch": 0.2794340760780156, + "grad_norm": 6.948018820344659, + "learning_rate": 8.455448493031055e-06, + "loss": 17.5749, + "step": 15287 + }, + { + "epoch": 0.27945235527446216, + "grad_norm": 5.979548652802669, + "learning_rate": 8.455234539012124e-06, + "loss": 17.3088, + "step": 15288 + }, + { + "epoch": 0.27947063447090864, + "grad_norm": 6.235225684157927, + "learning_rate": 8.455020572882895e-06, + "loss": 17.519, + "step": 15289 + }, + { + "epoch": 0.2794889136673552, + "grad_norm": 5.57882309167049, + "learning_rate": 8.454806594644118e-06, + "loss": 16.9601, + "step": 15290 + }, + { + "epoch": 0.2795071928638017, + "grad_norm": 8.709692939224485, + "learning_rate": 8.454592604296544e-06, + "loss": 18.4858, + "step": 15291 + }, + { + "epoch": 0.27952547206024825, + "grad_norm": 5.684572081931391, + "learning_rate": 8.454378601840924e-06, + "loss": 17.139, + "step": 15292 + }, + { + "epoch": 0.27954375125669473, + "grad_norm": 5.942677312137361, + "learning_rate": 8.454164587278005e-06, + "loss": 17.2136, + "step": 15293 + }, + { + "epoch": 0.27956203045314126, + "grad_norm": 5.81287032481252, + "learning_rate": 8.45395056060854e-06, + "loss": 17.1906, + "step": 15294 + }, + { + "epoch": 0.2795803096495878, + "grad_norm": 7.4421357602697, + "learning_rate": 8.453736521833279e-06, + "loss": 17.7026, + "step": 15295 + }, + { + "epoch": 0.27959858884603433, + "grad_norm": 6.87095794038651, + "learning_rate": 8.453522470952968e-06, + "loss": 17.4585, + "step": 15296 + }, + { + "epoch": 0.27961686804248087, + "grad_norm": 8.767742792641915, + "learning_rate": 8.453308407968363e-06, + "loss": 17.8881, + "step": 15297 + }, + { + "epoch": 0.27963514723892735, + "grad_norm": 6.283689084853071, + "learning_rate": 8.45309433288021e-06, + "loss": 17.3557, + "step": 15298 + }, + { + "epoch": 0.2796534264353739, + "grad_norm": 6.797083943751389, + "learning_rate": 8.452880245689264e-06, + "loss": 17.7693, + "step": 15299 + }, + { + "epoch": 0.2796717056318204, + "grad_norm": 6.775824178656483, + "learning_rate": 8.452666146396268e-06, + "loss": 17.8483, + "step": 15300 + }, + { + "epoch": 0.27968998482826696, + "grad_norm": 7.137447876229566, + "learning_rate": 8.452452035001981e-06, + "loss": 18.0176, + "step": 15301 + }, + { + "epoch": 0.2797082640247135, + "grad_norm": 6.7099557814928055, + "learning_rate": 8.452237911507147e-06, + "loss": 17.6612, + "step": 15302 + }, + { + "epoch": 0.27972654322116, + "grad_norm": 6.140429114030082, + "learning_rate": 8.45202377591252e-06, + "loss": 17.333, + "step": 15303 + }, + { + "epoch": 0.2797448224176065, + "grad_norm": 5.7035182472633705, + "learning_rate": 8.451809628218847e-06, + "loss": 17.1685, + "step": 15304 + }, + { + "epoch": 0.27976310161405304, + "grad_norm": 6.961475674515516, + "learning_rate": 8.451595468426882e-06, + "loss": 17.4019, + "step": 15305 + }, + { + "epoch": 0.2797813808104996, + "grad_norm": 6.981320637514193, + "learning_rate": 8.451381296537375e-06, + "loss": 17.6227, + "step": 15306 + }, + { + "epoch": 0.2797996600069461, + "grad_norm": 7.217081963757125, + "learning_rate": 8.451167112551076e-06, + "loss": 17.7034, + "step": 15307 + }, + { + "epoch": 0.2798179392033926, + "grad_norm": 6.63198113376361, + "learning_rate": 8.450952916468734e-06, + "loss": 17.5213, + "step": 15308 + }, + { + "epoch": 0.27983621839983913, + "grad_norm": 6.327382881871625, + "learning_rate": 8.450738708291105e-06, + "loss": 17.511, + "step": 15309 + }, + { + "epoch": 0.27985449759628567, + "grad_norm": 6.813876586995132, + "learning_rate": 8.450524488018933e-06, + "loss": 17.7001, + "step": 15310 + }, + { + "epoch": 0.2798727767927322, + "grad_norm": 6.38672657410487, + "learning_rate": 8.450310255652972e-06, + "loss": 17.3549, + "step": 15311 + }, + { + "epoch": 0.27989105598917874, + "grad_norm": 6.390512941314512, + "learning_rate": 8.450096011193975e-06, + "loss": 17.6012, + "step": 15312 + }, + { + "epoch": 0.2799093351856252, + "grad_norm": 5.632404099979596, + "learning_rate": 8.44988175464269e-06, + "loss": 16.9748, + "step": 15313 + }, + { + "epoch": 0.27992761438207175, + "grad_norm": 7.337163444232834, + "learning_rate": 8.449667485999868e-06, + "loss": 17.5949, + "step": 15314 + }, + { + "epoch": 0.2799458935785183, + "grad_norm": 6.18730547586552, + "learning_rate": 8.449453205266262e-06, + "loss": 17.6807, + "step": 15315 + }, + { + "epoch": 0.2799641727749648, + "grad_norm": 7.3207355037896225, + "learning_rate": 8.449238912442621e-06, + "loss": 17.6556, + "step": 15316 + }, + { + "epoch": 0.27998245197141136, + "grad_norm": 5.9872163701769265, + "learning_rate": 8.449024607529696e-06, + "loss": 17.3126, + "step": 15317 + }, + { + "epoch": 0.28000073116785784, + "grad_norm": 5.858221998433997, + "learning_rate": 8.448810290528242e-06, + "loss": 17.4937, + "step": 15318 + }, + { + "epoch": 0.2800190103643044, + "grad_norm": 6.761771220793409, + "learning_rate": 8.448595961439006e-06, + "loss": 17.6793, + "step": 15319 + }, + { + "epoch": 0.2800372895607509, + "grad_norm": 5.870076405893251, + "learning_rate": 8.44838162026274e-06, + "loss": 17.0499, + "step": 15320 + }, + { + "epoch": 0.28005556875719745, + "grad_norm": 7.916559103623928, + "learning_rate": 8.448167267000195e-06, + "loss": 18.4175, + "step": 15321 + }, + { + "epoch": 0.280073847953644, + "grad_norm": 6.791354085807862, + "learning_rate": 8.447952901652123e-06, + "loss": 17.4933, + "step": 15322 + }, + { + "epoch": 0.28009212715009046, + "grad_norm": 7.122444690494139, + "learning_rate": 8.447738524219277e-06, + "loss": 18.0934, + "step": 15323 + }, + { + "epoch": 0.280110406346537, + "grad_norm": 5.3402499262137155, + "learning_rate": 8.447524134702405e-06, + "loss": 17.1074, + "step": 15324 + }, + { + "epoch": 0.28012868554298354, + "grad_norm": 6.198544112878502, + "learning_rate": 8.44730973310226e-06, + "loss": 17.4524, + "step": 15325 + }, + { + "epoch": 0.28014696473943007, + "grad_norm": 7.0907625758521675, + "learning_rate": 8.447095319419594e-06, + "loss": 17.8427, + "step": 15326 + }, + { + "epoch": 0.28016524393587655, + "grad_norm": 5.831380913616366, + "learning_rate": 8.446880893655158e-06, + "loss": 17.3841, + "step": 15327 + }, + { + "epoch": 0.2801835231323231, + "grad_norm": 7.4686457544120275, + "learning_rate": 8.446666455809705e-06, + "loss": 17.7447, + "step": 15328 + }, + { + "epoch": 0.2802018023287696, + "grad_norm": 6.541385684027912, + "learning_rate": 8.446452005883982e-06, + "loss": 17.8826, + "step": 15329 + }, + { + "epoch": 0.28022008152521616, + "grad_norm": 6.4541582637804, + "learning_rate": 8.446237543878748e-06, + "loss": 17.5701, + "step": 15330 + }, + { + "epoch": 0.2802383607216627, + "grad_norm": 6.377299000804159, + "learning_rate": 8.446023069794747e-06, + "loss": 17.5911, + "step": 15331 + }, + { + "epoch": 0.2802566399181092, + "grad_norm": 6.58921984015208, + "learning_rate": 8.445808583632734e-06, + "loss": 17.7445, + "step": 15332 + }, + { + "epoch": 0.2802749191145557, + "grad_norm": 7.439213185690679, + "learning_rate": 8.445594085393463e-06, + "loss": 17.6031, + "step": 15333 + }, + { + "epoch": 0.28029319831100225, + "grad_norm": 6.227352568603799, + "learning_rate": 8.445379575077683e-06, + "loss": 17.3409, + "step": 15334 + }, + { + "epoch": 0.2803114775074488, + "grad_norm": 8.549842256884657, + "learning_rate": 8.445165052686147e-06, + "loss": 18.3006, + "step": 15335 + }, + { + "epoch": 0.2803297567038953, + "grad_norm": 7.032400686381592, + "learning_rate": 8.444950518219605e-06, + "loss": 17.9843, + "step": 15336 + }, + { + "epoch": 0.2803480359003418, + "grad_norm": 6.67063177388569, + "learning_rate": 8.444735971678812e-06, + "loss": 17.4586, + "step": 15337 + }, + { + "epoch": 0.28036631509678833, + "grad_norm": 7.207074160898403, + "learning_rate": 8.444521413064517e-06, + "loss": 17.9207, + "step": 15338 + }, + { + "epoch": 0.28038459429323487, + "grad_norm": 5.3906090184452085, + "learning_rate": 8.444306842377474e-06, + "loss": 16.894, + "step": 15339 + }, + { + "epoch": 0.2804028734896814, + "grad_norm": 8.653548263979046, + "learning_rate": 8.444092259618435e-06, + "loss": 18.2815, + "step": 15340 + }, + { + "epoch": 0.28042115268612794, + "grad_norm": 6.18004532586676, + "learning_rate": 8.443877664788151e-06, + "loss": 17.6296, + "step": 15341 + }, + { + "epoch": 0.2804394318825744, + "grad_norm": 8.501764140211932, + "learning_rate": 8.443663057887374e-06, + "loss": 17.7249, + "step": 15342 + }, + { + "epoch": 0.28045771107902095, + "grad_norm": 5.7218125891780325, + "learning_rate": 8.44344843891686e-06, + "loss": 16.9147, + "step": 15343 + }, + { + "epoch": 0.2804759902754675, + "grad_norm": 5.913127454267365, + "learning_rate": 8.443233807877353e-06, + "loss": 17.2629, + "step": 15344 + }, + { + "epoch": 0.280494269471914, + "grad_norm": 6.820627725264064, + "learning_rate": 8.443019164769613e-06, + "loss": 17.5698, + "step": 15345 + }, + { + "epoch": 0.28051254866836056, + "grad_norm": 6.022789544760202, + "learning_rate": 8.442804509594388e-06, + "loss": 17.3107, + "step": 15346 + }, + { + "epoch": 0.28053082786480704, + "grad_norm": 8.94514036680201, + "learning_rate": 8.442589842352436e-06, + "loss": 18.5076, + "step": 15347 + }, + { + "epoch": 0.2805491070612536, + "grad_norm": 6.1553618854905485, + "learning_rate": 8.442375163044502e-06, + "loss": 17.2621, + "step": 15348 + }, + { + "epoch": 0.2805673862577001, + "grad_norm": 5.815419277840977, + "learning_rate": 8.442160471671341e-06, + "loss": 17.3209, + "step": 15349 + }, + { + "epoch": 0.28058566545414665, + "grad_norm": 7.022413121021528, + "learning_rate": 8.441945768233709e-06, + "loss": 17.7672, + "step": 15350 + }, + { + "epoch": 0.2806039446505932, + "grad_norm": 6.314723020154745, + "learning_rate": 8.441731052732354e-06, + "loss": 17.2963, + "step": 15351 + }, + { + "epoch": 0.28062222384703966, + "grad_norm": 6.0191116234295645, + "learning_rate": 8.44151632516803e-06, + "loss": 17.4075, + "step": 15352 + }, + { + "epoch": 0.2806405030434862, + "grad_norm": 5.652747273261852, + "learning_rate": 8.44130158554149e-06, + "loss": 17.2558, + "step": 15353 + }, + { + "epoch": 0.28065878223993274, + "grad_norm": 7.9446397636589765, + "learning_rate": 8.441086833853489e-06, + "loss": 17.9289, + "step": 15354 + }, + { + "epoch": 0.28067706143637927, + "grad_norm": 7.053167863551996, + "learning_rate": 8.440872070104776e-06, + "loss": 17.8498, + "step": 15355 + }, + { + "epoch": 0.2806953406328258, + "grad_norm": 7.827079397809964, + "learning_rate": 8.440657294296103e-06, + "loss": 17.8238, + "step": 15356 + }, + { + "epoch": 0.2807136198292723, + "grad_norm": 6.41107055789248, + "learning_rate": 8.440442506428225e-06, + "loss": 17.5368, + "step": 15357 + }, + { + "epoch": 0.2807318990257188, + "grad_norm": 6.278500525959484, + "learning_rate": 8.440227706501897e-06, + "loss": 17.7145, + "step": 15358 + }, + { + "epoch": 0.28075017822216536, + "grad_norm": 7.4986718163922745, + "learning_rate": 8.440012894517868e-06, + "loss": 17.7521, + "step": 15359 + }, + { + "epoch": 0.2807684574186119, + "grad_norm": 6.184931507062844, + "learning_rate": 8.439798070476891e-06, + "loss": 17.5835, + "step": 15360 + }, + { + "epoch": 0.2807867366150584, + "grad_norm": 6.681625724115947, + "learning_rate": 8.439583234379722e-06, + "loss": 17.6689, + "step": 15361 + }, + { + "epoch": 0.2808050158115049, + "grad_norm": 5.35760730682152, + "learning_rate": 8.439368386227114e-06, + "loss": 16.8947, + "step": 15362 + }, + { + "epoch": 0.28082329500795145, + "grad_norm": 6.548154694259325, + "learning_rate": 8.439153526019814e-06, + "loss": 17.3629, + "step": 15363 + }, + { + "epoch": 0.280841574204398, + "grad_norm": 5.17716918435155, + "learning_rate": 8.438938653758583e-06, + "loss": 16.9253, + "step": 15364 + }, + { + "epoch": 0.2808598534008445, + "grad_norm": 6.725656835076324, + "learning_rate": 8.438723769444167e-06, + "loss": 17.6387, + "step": 15365 + }, + { + "epoch": 0.280878132597291, + "grad_norm": 7.6716436303256605, + "learning_rate": 8.438508873077326e-06, + "loss": 17.9415, + "step": 15366 + }, + { + "epoch": 0.28089641179373753, + "grad_norm": 6.062060340168047, + "learning_rate": 8.438293964658808e-06, + "loss": 17.4276, + "step": 15367 + }, + { + "epoch": 0.28091469099018407, + "grad_norm": 6.5146708472749, + "learning_rate": 8.438079044189369e-06, + "loss": 17.4647, + "step": 15368 + }, + { + "epoch": 0.2809329701866306, + "grad_norm": 6.356540002782304, + "learning_rate": 8.43786411166976e-06, + "loss": 17.5814, + "step": 15369 + }, + { + "epoch": 0.28095124938307714, + "grad_norm": 7.0890911572255995, + "learning_rate": 8.437649167100736e-06, + "loss": 18.2637, + "step": 15370 + }, + { + "epoch": 0.2809695285795236, + "grad_norm": 6.842492378927014, + "learning_rate": 8.43743421048305e-06, + "loss": 17.8266, + "step": 15371 + }, + { + "epoch": 0.28098780777597016, + "grad_norm": 7.198672708326323, + "learning_rate": 8.437219241817456e-06, + "loss": 18.1389, + "step": 15372 + }, + { + "epoch": 0.2810060869724167, + "grad_norm": 7.038255403981865, + "learning_rate": 8.437004261104706e-06, + "loss": 17.7662, + "step": 15373 + }, + { + "epoch": 0.2810243661688632, + "grad_norm": 7.696241643414113, + "learning_rate": 8.436789268345555e-06, + "loss": 17.8438, + "step": 15374 + }, + { + "epoch": 0.28104264536530976, + "grad_norm": 6.362290596275576, + "learning_rate": 8.436574263540756e-06, + "loss": 17.4141, + "step": 15375 + }, + { + "epoch": 0.28106092456175624, + "grad_norm": 6.772775790329577, + "learning_rate": 8.436359246691062e-06, + "loss": 17.8197, + "step": 15376 + }, + { + "epoch": 0.2810792037582028, + "grad_norm": 6.125749331631318, + "learning_rate": 8.436144217797227e-06, + "loss": 17.3761, + "step": 15377 + }, + { + "epoch": 0.2810974829546493, + "grad_norm": 7.208241423222614, + "learning_rate": 8.435929176860004e-06, + "loss": 18.3448, + "step": 15378 + }, + { + "epoch": 0.28111576215109585, + "grad_norm": 7.937853810954681, + "learning_rate": 8.435714123880149e-06, + "loss": 17.792, + "step": 15379 + }, + { + "epoch": 0.2811340413475424, + "grad_norm": 6.1100040916416605, + "learning_rate": 8.435499058858413e-06, + "loss": 17.5025, + "step": 15380 + }, + { + "epoch": 0.28115232054398887, + "grad_norm": 8.344809145928709, + "learning_rate": 8.435283981795551e-06, + "loss": 18.2036, + "step": 15381 + }, + { + "epoch": 0.2811705997404354, + "grad_norm": 7.70988367137395, + "learning_rate": 8.435068892692317e-06, + "loss": 18.2193, + "step": 15382 + }, + { + "epoch": 0.28118887893688194, + "grad_norm": 6.554828258743084, + "learning_rate": 8.434853791549464e-06, + "loss": 17.4626, + "step": 15383 + }, + { + "epoch": 0.28120715813332847, + "grad_norm": 5.832048397802159, + "learning_rate": 8.434638678367747e-06, + "loss": 17.0833, + "step": 15384 + }, + { + "epoch": 0.281225437329775, + "grad_norm": 6.280671045705303, + "learning_rate": 8.43442355314792e-06, + "loss": 17.5083, + "step": 15385 + }, + { + "epoch": 0.2812437165262215, + "grad_norm": 7.426942848885526, + "learning_rate": 8.434208415890738e-06, + "loss": 18.0593, + "step": 15386 + }, + { + "epoch": 0.281261995722668, + "grad_norm": 6.082815319521803, + "learning_rate": 8.433993266596949e-06, + "loss": 17.0419, + "step": 15387 + }, + { + "epoch": 0.28128027491911456, + "grad_norm": 15.46209403251262, + "learning_rate": 8.433778105267314e-06, + "loss": 18.4867, + "step": 15388 + }, + { + "epoch": 0.2812985541155611, + "grad_norm": 7.5902355214347255, + "learning_rate": 8.433562931902585e-06, + "loss": 17.9188, + "step": 15389 + }, + { + "epoch": 0.28131683331200763, + "grad_norm": 6.61907768246289, + "learning_rate": 8.433347746503516e-06, + "loss": 17.4187, + "step": 15390 + }, + { + "epoch": 0.2813351125084541, + "grad_norm": 5.5708106598423965, + "learning_rate": 8.433132549070861e-06, + "loss": 17.286, + "step": 15391 + }, + { + "epoch": 0.28135339170490065, + "grad_norm": 7.01446519671279, + "learning_rate": 8.432917339605375e-06, + "loss": 17.9308, + "step": 15392 + }, + { + "epoch": 0.2813716709013472, + "grad_norm": 7.418061223298455, + "learning_rate": 8.43270211810781e-06, + "loss": 17.9232, + "step": 15393 + }, + { + "epoch": 0.2813899500977937, + "grad_norm": 6.169425131779138, + "learning_rate": 8.432486884578922e-06, + "loss": 17.5341, + "step": 15394 + }, + { + "epoch": 0.2814082292942402, + "grad_norm": 7.663989986517547, + "learning_rate": 8.432271639019466e-06, + "loss": 17.9359, + "step": 15395 + }, + { + "epoch": 0.28142650849068673, + "grad_norm": 6.417772884984047, + "learning_rate": 8.432056381430196e-06, + "loss": 17.4621, + "step": 15396 + }, + { + "epoch": 0.28144478768713327, + "grad_norm": 7.620574417470514, + "learning_rate": 8.431841111811864e-06, + "loss": 17.4967, + "step": 15397 + }, + { + "epoch": 0.2814630668835798, + "grad_norm": 5.205424429005494, + "learning_rate": 8.43162583016523e-06, + "loss": 16.956, + "step": 15398 + }, + { + "epoch": 0.28148134608002634, + "grad_norm": 9.94632121770399, + "learning_rate": 8.431410536491044e-06, + "loss": 18.3539, + "step": 15399 + }, + { + "epoch": 0.2814996252764728, + "grad_norm": 6.594947742304105, + "learning_rate": 8.431195230790062e-06, + "loss": 17.7453, + "step": 15400 + }, + { + "epoch": 0.28151790447291936, + "grad_norm": 5.987288957161305, + "learning_rate": 8.430979913063038e-06, + "loss": 17.1959, + "step": 15401 + }, + { + "epoch": 0.2815361836693659, + "grad_norm": 7.359071074054522, + "learning_rate": 8.430764583310727e-06, + "loss": 17.8892, + "step": 15402 + }, + { + "epoch": 0.2815544628658124, + "grad_norm": 6.409411253387844, + "learning_rate": 8.430549241533885e-06, + "loss": 17.3761, + "step": 15403 + }, + { + "epoch": 0.28157274206225896, + "grad_norm": 6.850570345828789, + "learning_rate": 8.430333887733265e-06, + "loss": 17.7419, + "step": 15404 + }, + { + "epoch": 0.28159102125870544, + "grad_norm": 7.4295910040714634, + "learning_rate": 8.430118521909623e-06, + "loss": 18.2057, + "step": 15405 + }, + { + "epoch": 0.281609300455152, + "grad_norm": 7.416582043114402, + "learning_rate": 8.429903144063711e-06, + "loss": 17.7305, + "step": 15406 + }, + { + "epoch": 0.2816275796515985, + "grad_norm": 8.537220994678437, + "learning_rate": 8.429687754196287e-06, + "loss": 18.4273, + "step": 15407 + }, + { + "epoch": 0.28164585884804505, + "grad_norm": 6.840629534012434, + "learning_rate": 8.429472352308106e-06, + "loss": 17.5493, + "step": 15408 + }, + { + "epoch": 0.2816641380444916, + "grad_norm": 7.47801949131773, + "learning_rate": 8.429256938399922e-06, + "loss": 17.9558, + "step": 15409 + }, + { + "epoch": 0.28168241724093807, + "grad_norm": 6.860487889768455, + "learning_rate": 8.42904151247249e-06, + "loss": 17.5536, + "step": 15410 + }, + { + "epoch": 0.2817006964373846, + "grad_norm": 7.406907383950079, + "learning_rate": 8.428826074526564e-06, + "loss": 18.189, + "step": 15411 + }, + { + "epoch": 0.28171897563383114, + "grad_norm": 6.839722894076168, + "learning_rate": 8.428610624562901e-06, + "loss": 17.9266, + "step": 15412 + }, + { + "epoch": 0.2817372548302777, + "grad_norm": 6.394184142195576, + "learning_rate": 8.428395162582255e-06, + "loss": 17.7828, + "step": 15413 + }, + { + "epoch": 0.2817555340267242, + "grad_norm": 6.365026512467458, + "learning_rate": 8.428179688585381e-06, + "loss": 17.3171, + "step": 15414 + }, + { + "epoch": 0.2817738132231707, + "grad_norm": 6.239152923057427, + "learning_rate": 8.427964202573035e-06, + "loss": 17.1903, + "step": 15415 + }, + { + "epoch": 0.2817920924196172, + "grad_norm": 6.800554911763028, + "learning_rate": 8.427748704545973e-06, + "loss": 17.5401, + "step": 15416 + }, + { + "epoch": 0.28181037161606376, + "grad_norm": 6.606856572064669, + "learning_rate": 8.427533194504947e-06, + "loss": 17.622, + "step": 15417 + }, + { + "epoch": 0.2818286508125103, + "grad_norm": 7.890341607230291, + "learning_rate": 8.427317672450717e-06, + "loss": 18.1035, + "step": 15418 + }, + { + "epoch": 0.28184693000895683, + "grad_norm": 4.968144029529073, + "learning_rate": 8.427102138384035e-06, + "loss": 16.7538, + "step": 15419 + }, + { + "epoch": 0.2818652092054033, + "grad_norm": 6.890989192427271, + "learning_rate": 8.426886592305656e-06, + "loss": 17.4269, + "step": 15420 + }, + { + "epoch": 0.28188348840184985, + "grad_norm": 6.498491295188947, + "learning_rate": 8.426671034216339e-06, + "loss": 17.6894, + "step": 15421 + }, + { + "epoch": 0.2819017675982964, + "grad_norm": 7.178414595108676, + "learning_rate": 8.426455464116836e-06, + "loss": 17.9236, + "step": 15422 + }, + { + "epoch": 0.2819200467947429, + "grad_norm": 8.1254247136996, + "learning_rate": 8.426239882007906e-06, + "loss": 18.5536, + "step": 15423 + }, + { + "epoch": 0.28193832599118945, + "grad_norm": 7.220048205570177, + "learning_rate": 8.4260242878903e-06, + "loss": 17.6249, + "step": 15424 + }, + { + "epoch": 0.28195660518763593, + "grad_norm": 7.2183637462018515, + "learning_rate": 8.425808681764776e-06, + "loss": 17.3812, + "step": 15425 + }, + { + "epoch": 0.28197488438408247, + "grad_norm": 6.614329901433477, + "learning_rate": 8.425593063632092e-06, + "loss": 17.5936, + "step": 15426 + }, + { + "epoch": 0.281993163580529, + "grad_norm": 7.934813215507083, + "learning_rate": 8.425377433493e-06, + "loss": 17.8976, + "step": 15427 + }, + { + "epoch": 0.28201144277697554, + "grad_norm": 7.4831568881450785, + "learning_rate": 8.425161791348258e-06, + "loss": 17.8004, + "step": 15428 + }, + { + "epoch": 0.282029721973422, + "grad_norm": 6.702634420573207, + "learning_rate": 8.424946137198621e-06, + "loss": 17.7498, + "step": 15429 + }, + { + "epoch": 0.28204800116986856, + "grad_norm": 6.774928563263164, + "learning_rate": 8.424730471044845e-06, + "loss": 17.8457, + "step": 15430 + }, + { + "epoch": 0.2820662803663151, + "grad_norm": 6.819093186442741, + "learning_rate": 8.424514792887686e-06, + "loss": 17.7781, + "step": 15431 + }, + { + "epoch": 0.2820845595627616, + "grad_norm": 5.697913517231326, + "learning_rate": 8.424299102727899e-06, + "loss": 17.3248, + "step": 15432 + }, + { + "epoch": 0.28210283875920816, + "grad_norm": 7.531715739191498, + "learning_rate": 8.424083400566243e-06, + "loss": 17.9968, + "step": 15433 + }, + { + "epoch": 0.28212111795565464, + "grad_norm": 6.673835150266609, + "learning_rate": 8.42386768640347e-06, + "loss": 17.8088, + "step": 15434 + }, + { + "epoch": 0.2821393971521012, + "grad_norm": 5.569046211206729, + "learning_rate": 8.423651960240339e-06, + "loss": 17.2213, + "step": 15435 + }, + { + "epoch": 0.2821576763485477, + "grad_norm": 6.4540470447502285, + "learning_rate": 8.423436222077603e-06, + "loss": 17.4253, + "step": 15436 + }, + { + "epoch": 0.28217595554499425, + "grad_norm": 7.489503211144116, + "learning_rate": 8.423220471916022e-06, + "loss": 17.9254, + "step": 15437 + }, + { + "epoch": 0.2821942347414408, + "grad_norm": 7.188916281016509, + "learning_rate": 8.423004709756348e-06, + "loss": 17.5623, + "step": 15438 + }, + { + "epoch": 0.28221251393788727, + "grad_norm": 7.454564411197409, + "learning_rate": 8.422788935599341e-06, + "loss": 17.6314, + "step": 15439 + }, + { + "epoch": 0.2822307931343338, + "grad_norm": 5.741675049568417, + "learning_rate": 8.422573149445756e-06, + "loss": 17.2068, + "step": 15440 + }, + { + "epoch": 0.28224907233078034, + "grad_norm": 6.689218308835845, + "learning_rate": 8.422357351296349e-06, + "loss": 17.7591, + "step": 15441 + }, + { + "epoch": 0.2822673515272269, + "grad_norm": 6.211700649871322, + "learning_rate": 8.422141541151878e-06, + "loss": 17.6763, + "step": 15442 + }, + { + "epoch": 0.2822856307236734, + "grad_norm": 6.638265554748326, + "learning_rate": 8.421925719013096e-06, + "loss": 17.5908, + "step": 15443 + }, + { + "epoch": 0.2823039099201199, + "grad_norm": 6.925772444016115, + "learning_rate": 8.421709884880762e-06, + "loss": 17.3167, + "step": 15444 + }, + { + "epoch": 0.2823221891165664, + "grad_norm": 7.298203244186044, + "learning_rate": 8.42149403875563e-06, + "loss": 17.8008, + "step": 15445 + }, + { + "epoch": 0.28234046831301296, + "grad_norm": 6.7739748624787195, + "learning_rate": 8.42127818063846e-06, + "loss": 17.886, + "step": 15446 + }, + { + "epoch": 0.2823587475094595, + "grad_norm": 6.675542708786623, + "learning_rate": 8.421062310530008e-06, + "loss": 17.7136, + "step": 15447 + }, + { + "epoch": 0.28237702670590603, + "grad_norm": 6.530552262727731, + "learning_rate": 8.420846428431026e-06, + "loss": 17.6029, + "step": 15448 + }, + { + "epoch": 0.2823953059023525, + "grad_norm": 7.964362371597339, + "learning_rate": 8.420630534342277e-06, + "loss": 18.4993, + "step": 15449 + }, + { + "epoch": 0.28241358509879905, + "grad_norm": 6.968830900129372, + "learning_rate": 8.420414628264515e-06, + "loss": 17.7445, + "step": 15450 + }, + { + "epoch": 0.2824318642952456, + "grad_norm": 8.277947052614286, + "learning_rate": 8.420198710198495e-06, + "loss": 17.8483, + "step": 15451 + }, + { + "epoch": 0.2824501434916921, + "grad_norm": 5.802198301028127, + "learning_rate": 8.419982780144974e-06, + "loss": 17.0768, + "step": 15452 + }, + { + "epoch": 0.28246842268813865, + "grad_norm": 6.816259597711021, + "learning_rate": 8.419766838104712e-06, + "loss": 17.5801, + "step": 15453 + }, + { + "epoch": 0.28248670188458513, + "grad_norm": 6.2967691082072035, + "learning_rate": 8.419550884078465e-06, + "loss": 17.6557, + "step": 15454 + }, + { + "epoch": 0.28250498108103167, + "grad_norm": 6.258892931758237, + "learning_rate": 8.419334918066987e-06, + "loss": 17.4094, + "step": 15455 + }, + { + "epoch": 0.2825232602774782, + "grad_norm": 6.862327542047562, + "learning_rate": 8.419118940071039e-06, + "loss": 17.9269, + "step": 15456 + }, + { + "epoch": 0.28254153947392474, + "grad_norm": 6.718567381681014, + "learning_rate": 8.418902950091374e-06, + "loss": 17.5564, + "step": 15457 + }, + { + "epoch": 0.2825598186703713, + "grad_norm": 6.717837238026571, + "learning_rate": 8.418686948128752e-06, + "loss": 17.4344, + "step": 15458 + }, + { + "epoch": 0.28257809786681776, + "grad_norm": 11.766996616844157, + "learning_rate": 8.418470934183927e-06, + "loss": 18.5154, + "step": 15459 + }, + { + "epoch": 0.2825963770632643, + "grad_norm": 5.102631544374815, + "learning_rate": 8.418254908257659e-06, + "loss": 16.8542, + "step": 15460 + }, + { + "epoch": 0.28261465625971083, + "grad_norm": 6.080400088498884, + "learning_rate": 8.418038870350706e-06, + "loss": 17.3789, + "step": 15461 + }, + { + "epoch": 0.28263293545615736, + "grad_norm": 6.188239214951652, + "learning_rate": 8.417822820463822e-06, + "loss": 17.4083, + "step": 15462 + }, + { + "epoch": 0.28265121465260384, + "grad_norm": 5.514424312512443, + "learning_rate": 8.417606758597765e-06, + "loss": 17.0802, + "step": 15463 + }, + { + "epoch": 0.2826694938490504, + "grad_norm": 5.558196896729641, + "learning_rate": 8.417390684753292e-06, + "loss": 17.2066, + "step": 15464 + }, + { + "epoch": 0.2826877730454969, + "grad_norm": 5.907595343879084, + "learning_rate": 8.417174598931163e-06, + "loss": 17.2644, + "step": 15465 + }, + { + "epoch": 0.28270605224194345, + "grad_norm": 6.63700062319701, + "learning_rate": 8.416958501132133e-06, + "loss": 17.699, + "step": 15466 + }, + { + "epoch": 0.28272433143839, + "grad_norm": 5.6432284918911755, + "learning_rate": 8.41674239135696e-06, + "loss": 17.0312, + "step": 15467 + }, + { + "epoch": 0.28274261063483647, + "grad_norm": 6.289393242218711, + "learning_rate": 8.416526269606402e-06, + "loss": 17.5631, + "step": 15468 + }, + { + "epoch": 0.282760889831283, + "grad_norm": 7.5551513783786355, + "learning_rate": 8.416310135881214e-06, + "loss": 17.7653, + "step": 15469 + }, + { + "epoch": 0.28277916902772954, + "grad_norm": 5.956620052584515, + "learning_rate": 8.416093990182157e-06, + "loss": 17.3785, + "step": 15470 + }, + { + "epoch": 0.2827974482241761, + "grad_norm": 6.86001654615364, + "learning_rate": 8.415877832509987e-06, + "loss": 17.6636, + "step": 15471 + }, + { + "epoch": 0.2828157274206226, + "grad_norm": 6.382719215755634, + "learning_rate": 8.415661662865462e-06, + "loss": 17.7263, + "step": 15472 + }, + { + "epoch": 0.2828340066170691, + "grad_norm": 6.1246574539133585, + "learning_rate": 8.415445481249339e-06, + "loss": 17.2325, + "step": 15473 + }, + { + "epoch": 0.2828522858135156, + "grad_norm": 7.390938265665142, + "learning_rate": 8.415229287662375e-06, + "loss": 17.7339, + "step": 15474 + }, + { + "epoch": 0.28287056500996216, + "grad_norm": 5.215737196107322, + "learning_rate": 8.415013082105328e-06, + "loss": 17.035, + "step": 15475 + }, + { + "epoch": 0.2828888442064087, + "grad_norm": 7.755466898336756, + "learning_rate": 8.414796864578957e-06, + "loss": 17.7883, + "step": 15476 + }, + { + "epoch": 0.28290712340285523, + "grad_norm": 6.8293170026329255, + "learning_rate": 8.41458063508402e-06, + "loss": 17.6541, + "step": 15477 + }, + { + "epoch": 0.2829254025993017, + "grad_norm": 6.517733841349658, + "learning_rate": 8.414364393621274e-06, + "loss": 17.5341, + "step": 15478 + }, + { + "epoch": 0.28294368179574825, + "grad_norm": 5.862465874817472, + "learning_rate": 8.414148140191479e-06, + "loss": 17.1817, + "step": 15479 + }, + { + "epoch": 0.2829619609921948, + "grad_norm": 7.381018863754203, + "learning_rate": 8.41393187479539e-06, + "loss": 17.7526, + "step": 15480 + }, + { + "epoch": 0.2829802401886413, + "grad_norm": 6.833948647150858, + "learning_rate": 8.413715597433764e-06, + "loss": 17.6928, + "step": 15481 + }, + { + "epoch": 0.28299851938508785, + "grad_norm": 6.521558147809372, + "learning_rate": 8.413499308107363e-06, + "loss": 17.5961, + "step": 15482 + }, + { + "epoch": 0.28301679858153433, + "grad_norm": 5.946615365920805, + "learning_rate": 8.413283006816943e-06, + "loss": 17.3043, + "step": 15483 + }, + { + "epoch": 0.28303507777798087, + "grad_norm": 6.485929179868215, + "learning_rate": 8.413066693563262e-06, + "loss": 17.5305, + "step": 15484 + }, + { + "epoch": 0.2830533569744274, + "grad_norm": 5.945363387251339, + "learning_rate": 8.412850368347077e-06, + "loss": 17.0616, + "step": 15485 + }, + { + "epoch": 0.28307163617087394, + "grad_norm": 6.54235154309566, + "learning_rate": 8.412634031169148e-06, + "loss": 17.3017, + "step": 15486 + }, + { + "epoch": 0.2830899153673205, + "grad_norm": 5.686563129486015, + "learning_rate": 8.412417682030234e-06, + "loss": 17.4352, + "step": 15487 + }, + { + "epoch": 0.28310819456376696, + "grad_norm": 6.595737961887192, + "learning_rate": 8.412201320931092e-06, + "loss": 17.5812, + "step": 15488 + }, + { + "epoch": 0.2831264737602135, + "grad_norm": 6.398344498781888, + "learning_rate": 8.41198494787248e-06, + "loss": 17.2987, + "step": 15489 + }, + { + "epoch": 0.28314475295666003, + "grad_norm": 6.557725214850864, + "learning_rate": 8.411768562855157e-06, + "loss": 17.7187, + "step": 15490 + }, + { + "epoch": 0.28316303215310656, + "grad_norm": 6.50233786074878, + "learning_rate": 8.411552165879881e-06, + "loss": 17.3658, + "step": 15491 + }, + { + "epoch": 0.2831813113495531, + "grad_norm": 6.1620822188746125, + "learning_rate": 8.411335756947411e-06, + "loss": 17.5341, + "step": 15492 + }, + { + "epoch": 0.2831995905459996, + "grad_norm": 5.969493183820305, + "learning_rate": 8.411119336058506e-06, + "loss": 17.4539, + "step": 15493 + }, + { + "epoch": 0.2832178697424461, + "grad_norm": 5.99854252879164, + "learning_rate": 8.410902903213924e-06, + "loss": 17.3071, + "step": 15494 + }, + { + "epoch": 0.28323614893889265, + "grad_norm": 5.433585164652213, + "learning_rate": 8.410686458414421e-06, + "loss": 17.0368, + "step": 15495 + }, + { + "epoch": 0.2832544281353392, + "grad_norm": 7.437621076301175, + "learning_rate": 8.41047000166076e-06, + "loss": 17.613, + "step": 15496 + }, + { + "epoch": 0.28327270733178567, + "grad_norm": 6.106388244878, + "learning_rate": 8.410253532953696e-06, + "loss": 17.1693, + "step": 15497 + }, + { + "epoch": 0.2832909865282322, + "grad_norm": 6.386802219835082, + "learning_rate": 8.410037052293992e-06, + "loss": 17.2761, + "step": 15498 + }, + { + "epoch": 0.28330926572467874, + "grad_norm": 6.369372505903644, + "learning_rate": 8.409820559682402e-06, + "loss": 17.7474, + "step": 15499 + }, + { + "epoch": 0.2833275449211253, + "grad_norm": 6.271390582198113, + "learning_rate": 8.409604055119687e-06, + "loss": 17.5237, + "step": 15500 + }, + { + "epoch": 0.2833458241175718, + "grad_norm": 6.5256147398044835, + "learning_rate": 8.409387538606605e-06, + "loss": 17.1695, + "step": 15501 + }, + { + "epoch": 0.2833641033140183, + "grad_norm": 5.786452002426799, + "learning_rate": 8.409171010143916e-06, + "loss": 17.4172, + "step": 15502 + }, + { + "epoch": 0.2833823825104648, + "grad_norm": 8.84299267926821, + "learning_rate": 8.40895446973238e-06, + "loss": 18.0781, + "step": 15503 + }, + { + "epoch": 0.28340066170691136, + "grad_norm": 5.691610922571614, + "learning_rate": 8.408737917372751e-06, + "loss": 17.104, + "step": 15504 + }, + { + "epoch": 0.2834189409033579, + "grad_norm": 6.379030894126828, + "learning_rate": 8.408521353065796e-06, + "loss": 17.4691, + "step": 15505 + }, + { + "epoch": 0.28343722009980443, + "grad_norm": 7.063629606285928, + "learning_rate": 8.408304776812266e-06, + "loss": 17.5241, + "step": 15506 + }, + { + "epoch": 0.2834554992962509, + "grad_norm": 6.4461397749199545, + "learning_rate": 8.408088188612923e-06, + "loss": 17.4663, + "step": 15507 + }, + { + "epoch": 0.28347377849269745, + "grad_norm": 7.239733713449572, + "learning_rate": 8.407871588468527e-06, + "loss": 17.5837, + "step": 15508 + }, + { + "epoch": 0.283492057689144, + "grad_norm": 6.703860169477245, + "learning_rate": 8.40765497637984e-06, + "loss": 17.66, + "step": 15509 + }, + { + "epoch": 0.2835103368855905, + "grad_norm": 6.8981920422858485, + "learning_rate": 8.407438352347614e-06, + "loss": 17.2924, + "step": 15510 + }, + { + "epoch": 0.28352861608203705, + "grad_norm": 6.9308056613648485, + "learning_rate": 8.407221716372615e-06, + "loss": 17.6194, + "step": 15511 + }, + { + "epoch": 0.28354689527848354, + "grad_norm": 8.630598151276228, + "learning_rate": 8.4070050684556e-06, + "loss": 18.5181, + "step": 15512 + }, + { + "epoch": 0.28356517447493007, + "grad_norm": 6.140531931427257, + "learning_rate": 8.406788408597324e-06, + "loss": 17.1919, + "step": 15513 + }, + { + "epoch": 0.2835834536713766, + "grad_norm": 6.105063839189512, + "learning_rate": 8.406571736798554e-06, + "loss": 17.3691, + "step": 15514 + }, + { + "epoch": 0.28360173286782314, + "grad_norm": 5.6438490398010845, + "learning_rate": 8.406355053060044e-06, + "loss": 17.1524, + "step": 15515 + }, + { + "epoch": 0.2836200120642697, + "grad_norm": 5.594531492297863, + "learning_rate": 8.406138357382556e-06, + "loss": 17.2135, + "step": 15516 + }, + { + "epoch": 0.28363829126071616, + "grad_norm": 8.008643271750694, + "learning_rate": 8.405921649766849e-06, + "loss": 18.2914, + "step": 15517 + }, + { + "epoch": 0.2836565704571627, + "grad_norm": 5.957476832027726, + "learning_rate": 8.40570493021368e-06, + "loss": 17.182, + "step": 15518 + }, + { + "epoch": 0.28367484965360923, + "grad_norm": 6.777397209515814, + "learning_rate": 8.405488198723813e-06, + "loss": 17.5969, + "step": 15519 + }, + { + "epoch": 0.28369312885005576, + "grad_norm": 6.620547159990193, + "learning_rate": 8.405271455298005e-06, + "loss": 17.5505, + "step": 15520 + }, + { + "epoch": 0.2837114080465023, + "grad_norm": 6.648656132268679, + "learning_rate": 8.405054699937014e-06, + "loss": 17.4893, + "step": 15521 + }, + { + "epoch": 0.2837296872429488, + "grad_norm": 5.712029974103151, + "learning_rate": 8.404837932641604e-06, + "loss": 17.2915, + "step": 15522 + }, + { + "epoch": 0.2837479664393953, + "grad_norm": 7.236765825272202, + "learning_rate": 8.404621153412532e-06, + "loss": 17.6133, + "step": 15523 + }, + { + "epoch": 0.28376624563584185, + "grad_norm": 8.10122431474489, + "learning_rate": 8.404404362250558e-06, + "loss": 18.0702, + "step": 15524 + }, + { + "epoch": 0.2837845248322884, + "grad_norm": 7.144456215384271, + "learning_rate": 8.404187559156443e-06, + "loss": 17.7497, + "step": 15525 + }, + { + "epoch": 0.2838028040287349, + "grad_norm": 6.659411430034418, + "learning_rate": 8.403970744130945e-06, + "loss": 17.6221, + "step": 15526 + }, + { + "epoch": 0.2838210832251814, + "grad_norm": 7.175396214025024, + "learning_rate": 8.403753917174825e-06, + "loss": 17.8914, + "step": 15527 + }, + { + "epoch": 0.28383936242162794, + "grad_norm": 6.189528774703327, + "learning_rate": 8.403537078288843e-06, + "loss": 17.5811, + "step": 15528 + }, + { + "epoch": 0.2838576416180745, + "grad_norm": 5.856297634820472, + "learning_rate": 8.403320227473759e-06, + "loss": 17.1073, + "step": 15529 + }, + { + "epoch": 0.283875920814521, + "grad_norm": 8.643114153220317, + "learning_rate": 8.403103364730333e-06, + "loss": 18.4561, + "step": 15530 + }, + { + "epoch": 0.2838942000109675, + "grad_norm": 7.828290680158178, + "learning_rate": 8.402886490059325e-06, + "loss": 18.0083, + "step": 15531 + }, + { + "epoch": 0.283912479207414, + "grad_norm": 6.445601967259433, + "learning_rate": 8.402669603461495e-06, + "loss": 17.2818, + "step": 15532 + }, + { + "epoch": 0.28393075840386056, + "grad_norm": 8.173572547923358, + "learning_rate": 8.402452704937602e-06, + "loss": 18.4044, + "step": 15533 + }, + { + "epoch": 0.2839490376003071, + "grad_norm": 7.3105068229246415, + "learning_rate": 8.40223579448841e-06, + "loss": 17.8771, + "step": 15534 + }, + { + "epoch": 0.28396731679675363, + "grad_norm": 8.043554647759427, + "learning_rate": 8.402018872114675e-06, + "loss": 17.6716, + "step": 15535 + }, + { + "epoch": 0.2839855959932001, + "grad_norm": 8.575846251880327, + "learning_rate": 8.40180193781716e-06, + "loss": 18.127, + "step": 15536 + }, + { + "epoch": 0.28400387518964665, + "grad_norm": 8.448946370612017, + "learning_rate": 8.401584991596623e-06, + "loss": 18.2131, + "step": 15537 + }, + { + "epoch": 0.2840221543860932, + "grad_norm": 6.12334406272895, + "learning_rate": 8.401368033453827e-06, + "loss": 17.3053, + "step": 15538 + }, + { + "epoch": 0.2840404335825397, + "grad_norm": 6.517376042846156, + "learning_rate": 8.401151063389533e-06, + "loss": 17.3515, + "step": 15539 + }, + { + "epoch": 0.28405871277898626, + "grad_norm": 6.126027630011614, + "learning_rate": 8.400934081404497e-06, + "loss": 17.2624, + "step": 15540 + }, + { + "epoch": 0.28407699197543274, + "grad_norm": 6.057541862518091, + "learning_rate": 8.400717087499483e-06, + "loss": 17.1479, + "step": 15541 + }, + { + "epoch": 0.28409527117187927, + "grad_norm": 6.5286989223360345, + "learning_rate": 8.40050008167525e-06, + "loss": 17.3018, + "step": 15542 + }, + { + "epoch": 0.2841135503683258, + "grad_norm": 7.968956088806122, + "learning_rate": 8.40028306393256e-06, + "loss": 17.7986, + "step": 15543 + }, + { + "epoch": 0.28413182956477234, + "grad_norm": 5.88287122620639, + "learning_rate": 8.400066034272173e-06, + "loss": 17.3807, + "step": 15544 + }, + { + "epoch": 0.2841501087612189, + "grad_norm": 7.255389013799567, + "learning_rate": 8.399848992694849e-06, + "loss": 17.6915, + "step": 15545 + }, + { + "epoch": 0.28416838795766536, + "grad_norm": 6.207393249947155, + "learning_rate": 8.39963193920135e-06, + "loss": 17.6513, + "step": 15546 + }, + { + "epoch": 0.2841866671541119, + "grad_norm": 5.493874442691914, + "learning_rate": 8.399414873792435e-06, + "loss": 17.0561, + "step": 15547 + }, + { + "epoch": 0.28420494635055843, + "grad_norm": 6.265429232395019, + "learning_rate": 8.399197796468867e-06, + "loss": 17.4793, + "step": 15548 + }, + { + "epoch": 0.28422322554700497, + "grad_norm": 6.986659093185191, + "learning_rate": 8.398980707231405e-06, + "loss": 17.6765, + "step": 15549 + }, + { + "epoch": 0.2842415047434515, + "grad_norm": 5.722656947055226, + "learning_rate": 8.398763606080812e-06, + "loss": 17.0972, + "step": 15550 + }, + { + "epoch": 0.284259783939898, + "grad_norm": 6.6297669680583065, + "learning_rate": 8.398546493017846e-06, + "loss": 17.5315, + "step": 15551 + }, + { + "epoch": 0.2842780631363445, + "grad_norm": 6.3911646778028155, + "learning_rate": 8.39832936804327e-06, + "loss": 17.3867, + "step": 15552 + }, + { + "epoch": 0.28429634233279105, + "grad_norm": 6.234982800213382, + "learning_rate": 8.398112231157844e-06, + "loss": 17.3341, + "step": 15553 + }, + { + "epoch": 0.2843146215292376, + "grad_norm": 7.501273893139183, + "learning_rate": 8.39789508236233e-06, + "loss": 18.1063, + "step": 15554 + }, + { + "epoch": 0.2843329007256841, + "grad_norm": 5.731369411501362, + "learning_rate": 8.397677921657488e-06, + "loss": 17.0956, + "step": 15555 + }, + { + "epoch": 0.2843511799221306, + "grad_norm": 5.81032010845882, + "learning_rate": 8.397460749044079e-06, + "loss": 17.3655, + "step": 15556 + }, + { + "epoch": 0.28436945911857714, + "grad_norm": 7.082287812047839, + "learning_rate": 8.397243564522867e-06, + "loss": 17.5869, + "step": 15557 + }, + { + "epoch": 0.2843877383150237, + "grad_norm": 6.594087648334816, + "learning_rate": 8.39702636809461e-06, + "loss": 17.4453, + "step": 15558 + }, + { + "epoch": 0.2844060175114702, + "grad_norm": 6.80664865386882, + "learning_rate": 8.39680915976007e-06, + "loss": 17.7205, + "step": 15559 + }, + { + "epoch": 0.28442429670791675, + "grad_norm": 7.412781021068577, + "learning_rate": 8.39659193952001e-06, + "loss": 17.7192, + "step": 15560 + }, + { + "epoch": 0.2844425759043632, + "grad_norm": 5.867012061418011, + "learning_rate": 8.39637470737519e-06, + "loss": 17.2483, + "step": 15561 + }, + { + "epoch": 0.28446085510080976, + "grad_norm": 9.014390204684132, + "learning_rate": 8.39615746332637e-06, + "loss": 18.6399, + "step": 15562 + }, + { + "epoch": 0.2844791342972563, + "grad_norm": 7.759039131336317, + "learning_rate": 8.395940207374314e-06, + "loss": 17.5036, + "step": 15563 + }, + { + "epoch": 0.28449741349370283, + "grad_norm": 6.503756645272427, + "learning_rate": 8.395722939519782e-06, + "loss": 17.5279, + "step": 15564 + }, + { + "epoch": 0.2845156926901493, + "grad_norm": 7.171876314404471, + "learning_rate": 8.395505659763534e-06, + "loss": 17.8446, + "step": 15565 + }, + { + "epoch": 0.28453397188659585, + "grad_norm": 6.245292427653352, + "learning_rate": 8.395288368106334e-06, + "loss": 17.5238, + "step": 15566 + }, + { + "epoch": 0.2845522510830424, + "grad_norm": 6.8851167604665084, + "learning_rate": 8.395071064548945e-06, + "loss": 17.7864, + "step": 15567 + }, + { + "epoch": 0.2845705302794889, + "grad_norm": 5.5415031339971925, + "learning_rate": 8.394853749092125e-06, + "loss": 17.0479, + "step": 15568 + }, + { + "epoch": 0.28458880947593546, + "grad_norm": 5.524736013309662, + "learning_rate": 8.394636421736637e-06, + "loss": 17.1075, + "step": 15569 + }, + { + "epoch": 0.28460708867238194, + "grad_norm": 5.053254637388772, + "learning_rate": 8.394419082483242e-06, + "loss": 16.9987, + "step": 15570 + }, + { + "epoch": 0.28462536786882847, + "grad_norm": 6.435384113223739, + "learning_rate": 8.394201731332705e-06, + "loss": 17.3633, + "step": 15571 + }, + { + "epoch": 0.284643647065275, + "grad_norm": 7.507276131995696, + "learning_rate": 8.393984368285784e-06, + "loss": 18.0902, + "step": 15572 + }, + { + "epoch": 0.28466192626172154, + "grad_norm": 7.964726173875384, + "learning_rate": 8.393766993343241e-06, + "loss": 18.1453, + "step": 15573 + }, + { + "epoch": 0.2846802054581681, + "grad_norm": 7.2337262115679914, + "learning_rate": 8.393549606505842e-06, + "loss": 17.8401, + "step": 15574 + }, + { + "epoch": 0.28469848465461456, + "grad_norm": 5.573186046338195, + "learning_rate": 8.393332207774345e-06, + "loss": 17.1204, + "step": 15575 + }, + { + "epoch": 0.2847167638510611, + "grad_norm": 8.1926876728909, + "learning_rate": 8.393114797149513e-06, + "loss": 18.4901, + "step": 15576 + }, + { + "epoch": 0.28473504304750763, + "grad_norm": 6.337786905884441, + "learning_rate": 8.392897374632107e-06, + "loss": 17.5824, + "step": 15577 + }, + { + "epoch": 0.28475332224395417, + "grad_norm": 6.646307893594947, + "learning_rate": 8.392679940222893e-06, + "loss": 17.3747, + "step": 15578 + }, + { + "epoch": 0.2847716014404007, + "grad_norm": 5.773095729081687, + "learning_rate": 8.392462493922629e-06, + "loss": 17.1753, + "step": 15579 + }, + { + "epoch": 0.2847898806368472, + "grad_norm": 7.033828873581546, + "learning_rate": 8.392245035732077e-06, + "loss": 17.9067, + "step": 15580 + }, + { + "epoch": 0.2848081598332937, + "grad_norm": 6.589963825307848, + "learning_rate": 8.392027565652001e-06, + "loss": 17.4564, + "step": 15581 + }, + { + "epoch": 0.28482643902974025, + "grad_norm": 6.496213127727764, + "learning_rate": 8.391810083683163e-06, + "loss": 17.5729, + "step": 15582 + }, + { + "epoch": 0.2848447182261868, + "grad_norm": 6.22426655532525, + "learning_rate": 8.391592589826325e-06, + "loss": 17.6297, + "step": 15583 + }, + { + "epoch": 0.2848629974226333, + "grad_norm": 7.330933426305583, + "learning_rate": 8.391375084082249e-06, + "loss": 17.9936, + "step": 15584 + }, + { + "epoch": 0.2848812766190798, + "grad_norm": 5.628551605391713, + "learning_rate": 8.391157566451697e-06, + "loss": 16.9961, + "step": 15585 + }, + { + "epoch": 0.28489955581552634, + "grad_norm": 5.046472674557297, + "learning_rate": 8.390940036935433e-06, + "loss": 16.9365, + "step": 15586 + }, + { + "epoch": 0.2849178350119729, + "grad_norm": 6.914505875474923, + "learning_rate": 8.39072249553422e-06, + "loss": 17.3626, + "step": 15587 + }, + { + "epoch": 0.2849361142084194, + "grad_norm": 7.25076524166895, + "learning_rate": 8.390504942248817e-06, + "loss": 17.9625, + "step": 15588 + }, + { + "epoch": 0.28495439340486595, + "grad_norm": 7.699190980540354, + "learning_rate": 8.390287377079989e-06, + "loss": 17.5692, + "step": 15589 + }, + { + "epoch": 0.2849726726013124, + "grad_norm": 6.518746589959383, + "learning_rate": 8.390069800028497e-06, + "loss": 17.3474, + "step": 15590 + }, + { + "epoch": 0.28499095179775896, + "grad_norm": 6.774196109811676, + "learning_rate": 8.389852211095104e-06, + "loss": 17.5621, + "step": 15591 + }, + { + "epoch": 0.2850092309942055, + "grad_norm": 7.007953092574309, + "learning_rate": 8.389634610280576e-06, + "loss": 17.7761, + "step": 15592 + }, + { + "epoch": 0.28502751019065203, + "grad_norm": 7.785073531272435, + "learning_rate": 8.38941699758567e-06, + "loss": 18.2594, + "step": 15593 + }, + { + "epoch": 0.28504578938709857, + "grad_norm": 5.737027901527635, + "learning_rate": 8.389199373011151e-06, + "loss": 16.9646, + "step": 15594 + }, + { + "epoch": 0.28506406858354505, + "grad_norm": 6.1317094096416005, + "learning_rate": 8.388981736557786e-06, + "loss": 17.276, + "step": 15595 + }, + { + "epoch": 0.2850823477799916, + "grad_norm": 5.552608549638589, + "learning_rate": 8.388764088226332e-06, + "loss": 17.1453, + "step": 15596 + }, + { + "epoch": 0.2851006269764381, + "grad_norm": 6.541323176059032, + "learning_rate": 8.388546428017553e-06, + "loss": 17.572, + "step": 15597 + }, + { + "epoch": 0.28511890617288466, + "grad_norm": 6.5776082966284495, + "learning_rate": 8.388328755932213e-06, + "loss": 17.3414, + "step": 15598 + }, + { + "epoch": 0.28513718536933114, + "grad_norm": 5.859995221506433, + "learning_rate": 8.388111071971077e-06, + "loss": 17.3288, + "step": 15599 + }, + { + "epoch": 0.28515546456577767, + "grad_norm": 6.283259630316175, + "learning_rate": 8.387893376134903e-06, + "loss": 17.615, + "step": 15600 + }, + { + "epoch": 0.2851737437622242, + "grad_norm": 8.207639137259413, + "learning_rate": 8.387675668424457e-06, + "loss": 18.3846, + "step": 15601 + }, + { + "epoch": 0.28519202295867074, + "grad_norm": 6.3159778451564, + "learning_rate": 8.387457948840503e-06, + "loss": 17.7309, + "step": 15602 + }, + { + "epoch": 0.2852103021551173, + "grad_norm": 7.207083762354696, + "learning_rate": 8.387240217383804e-06, + "loss": 17.754, + "step": 15603 + }, + { + "epoch": 0.28522858135156376, + "grad_norm": 6.95631087327876, + "learning_rate": 8.38702247405512e-06, + "loss": 17.6824, + "step": 15604 + }, + { + "epoch": 0.2852468605480103, + "grad_norm": 7.055379196665715, + "learning_rate": 8.386804718855217e-06, + "loss": 17.7365, + "step": 15605 + }, + { + "epoch": 0.28526513974445683, + "grad_norm": 6.141082121277028, + "learning_rate": 8.386586951784857e-06, + "loss": 17.2589, + "step": 15606 + }, + { + "epoch": 0.28528341894090337, + "grad_norm": 6.2588554003648555, + "learning_rate": 8.386369172844803e-06, + "loss": 17.4175, + "step": 15607 + }, + { + "epoch": 0.2853016981373499, + "grad_norm": 5.849630957286629, + "learning_rate": 8.386151382035819e-06, + "loss": 17.2994, + "step": 15608 + }, + { + "epoch": 0.2853199773337964, + "grad_norm": 7.919239102329731, + "learning_rate": 8.38593357935867e-06, + "loss": 18.2311, + "step": 15609 + }, + { + "epoch": 0.2853382565302429, + "grad_norm": 7.460346656682483, + "learning_rate": 8.385715764814115e-06, + "loss": 18.0355, + "step": 15610 + }, + { + "epoch": 0.28535653572668945, + "grad_norm": 7.166477420381712, + "learning_rate": 8.385497938402921e-06, + "loss": 17.9849, + "step": 15611 + }, + { + "epoch": 0.285374814923136, + "grad_norm": 7.811072381805779, + "learning_rate": 8.385280100125852e-06, + "loss": 17.874, + "step": 15612 + }, + { + "epoch": 0.2853930941195825, + "grad_norm": 6.212841050772617, + "learning_rate": 8.385062249983668e-06, + "loss": 17.7724, + "step": 15613 + }, + { + "epoch": 0.285411373316029, + "grad_norm": 8.286750865831028, + "learning_rate": 8.384844387977136e-06, + "loss": 17.7, + "step": 15614 + }, + { + "epoch": 0.28542965251247554, + "grad_norm": 7.108771609697807, + "learning_rate": 8.384626514107017e-06, + "loss": 17.7998, + "step": 15615 + }, + { + "epoch": 0.2854479317089221, + "grad_norm": 5.327994253042193, + "learning_rate": 8.384408628374076e-06, + "loss": 17.1421, + "step": 15616 + }, + { + "epoch": 0.2854662109053686, + "grad_norm": 6.386484428558483, + "learning_rate": 8.384190730779077e-06, + "loss": 17.5845, + "step": 15617 + }, + { + "epoch": 0.28548449010181515, + "grad_norm": 5.628792152596963, + "learning_rate": 8.383972821322783e-06, + "loss": 17.4626, + "step": 15618 + }, + { + "epoch": 0.2855027692982616, + "grad_norm": 6.1920812252869855, + "learning_rate": 8.383754900005958e-06, + "loss": 17.5209, + "step": 15619 + }, + { + "epoch": 0.28552104849470816, + "grad_norm": 7.196441400429767, + "learning_rate": 8.383536966829365e-06, + "loss": 17.8727, + "step": 15620 + }, + { + "epoch": 0.2855393276911547, + "grad_norm": 6.394134189644871, + "learning_rate": 8.38331902179377e-06, + "loss": 17.3357, + "step": 15621 + }, + { + "epoch": 0.28555760688760123, + "grad_norm": 7.351549259202254, + "learning_rate": 8.383101064899934e-06, + "loss": 18.0577, + "step": 15622 + }, + { + "epoch": 0.28557588608404777, + "grad_norm": 8.885390520908818, + "learning_rate": 8.382883096148623e-06, + "loss": 17.5862, + "step": 15623 + }, + { + "epoch": 0.28559416528049425, + "grad_norm": 7.79950581234333, + "learning_rate": 8.382665115540601e-06, + "loss": 18.0429, + "step": 15624 + }, + { + "epoch": 0.2856124444769408, + "grad_norm": 7.142911245825157, + "learning_rate": 8.38244712307663e-06, + "loss": 17.7714, + "step": 15625 + }, + { + "epoch": 0.2856307236733873, + "grad_norm": 7.328069278498716, + "learning_rate": 8.382229118757475e-06, + "loss": 17.7837, + "step": 15626 + }, + { + "epoch": 0.28564900286983386, + "grad_norm": 6.390514179583239, + "learning_rate": 8.382011102583903e-06, + "loss": 17.5888, + "step": 15627 + }, + { + "epoch": 0.2856672820662804, + "grad_norm": 6.003095180740606, + "learning_rate": 8.381793074556673e-06, + "loss": 17.6111, + "step": 15628 + }, + { + "epoch": 0.2856855612627269, + "grad_norm": 7.090927574776756, + "learning_rate": 8.38157503467655e-06, + "loss": 17.7163, + "step": 15629 + }, + { + "epoch": 0.2857038404591734, + "grad_norm": 10.159727596808448, + "learning_rate": 8.381356982944304e-06, + "loss": 18.8136, + "step": 15630 + }, + { + "epoch": 0.28572211965561994, + "grad_norm": 6.746606551618541, + "learning_rate": 8.381138919360693e-06, + "loss": 17.4561, + "step": 15631 + }, + { + "epoch": 0.2857403988520665, + "grad_norm": 8.895814824886411, + "learning_rate": 8.380920843926485e-06, + "loss": 18.0273, + "step": 15632 + }, + { + "epoch": 0.285758678048513, + "grad_norm": 7.931643137647633, + "learning_rate": 8.380702756642443e-06, + "loss": 18.2132, + "step": 15633 + }, + { + "epoch": 0.2857769572449595, + "grad_norm": 6.607209696763906, + "learning_rate": 8.380484657509329e-06, + "loss": 17.5462, + "step": 15634 + }, + { + "epoch": 0.28579523644140603, + "grad_norm": 6.397507164123776, + "learning_rate": 8.380266546527911e-06, + "loss": 17.6273, + "step": 15635 + }, + { + "epoch": 0.28581351563785257, + "grad_norm": 6.787735261967875, + "learning_rate": 8.380048423698952e-06, + "loss": 17.4471, + "step": 15636 + }, + { + "epoch": 0.2858317948342991, + "grad_norm": 8.94183718488205, + "learning_rate": 8.379830289023216e-06, + "loss": 18.6105, + "step": 15637 + }, + { + "epoch": 0.2858500740307456, + "grad_norm": 6.901059258190413, + "learning_rate": 8.379612142501468e-06, + "loss": 17.6378, + "step": 15638 + }, + { + "epoch": 0.2858683532271921, + "grad_norm": 7.276057520784042, + "learning_rate": 8.379393984134473e-06, + "loss": 17.645, + "step": 15639 + }, + { + "epoch": 0.28588663242363865, + "grad_norm": 6.7987410673042605, + "learning_rate": 8.379175813922998e-06, + "loss": 17.6281, + "step": 15640 + }, + { + "epoch": 0.2859049116200852, + "grad_norm": 5.242972500649996, + "learning_rate": 8.378957631867801e-06, + "loss": 17.0635, + "step": 15641 + }, + { + "epoch": 0.2859231908165317, + "grad_norm": 6.686899850055457, + "learning_rate": 8.378739437969653e-06, + "loss": 17.3893, + "step": 15642 + }, + { + "epoch": 0.2859414700129782, + "grad_norm": 6.254068526072431, + "learning_rate": 8.378521232229316e-06, + "loss": 17.3998, + "step": 15643 + }, + { + "epoch": 0.28595974920942474, + "grad_norm": 6.1499387344539915, + "learning_rate": 8.378303014647555e-06, + "loss": 17.3369, + "step": 15644 + }, + { + "epoch": 0.2859780284058713, + "grad_norm": 7.888230663721426, + "learning_rate": 8.378084785225134e-06, + "loss": 18.1959, + "step": 15645 + }, + { + "epoch": 0.2859963076023178, + "grad_norm": 6.289409435598981, + "learning_rate": 8.37786654396282e-06, + "loss": 17.5666, + "step": 15646 + }, + { + "epoch": 0.28601458679876435, + "grad_norm": 7.090039170347586, + "learning_rate": 8.377648290861377e-06, + "loss": 17.9468, + "step": 15647 + }, + { + "epoch": 0.2860328659952108, + "grad_norm": 6.036925531325897, + "learning_rate": 8.37743002592157e-06, + "loss": 17.1986, + "step": 15648 + }, + { + "epoch": 0.28605114519165736, + "grad_norm": 6.868252158953648, + "learning_rate": 8.377211749144165e-06, + "loss": 17.752, + "step": 15649 + }, + { + "epoch": 0.2860694243881039, + "grad_norm": 9.033682671133217, + "learning_rate": 8.376993460529925e-06, + "loss": 18.5495, + "step": 15650 + }, + { + "epoch": 0.28608770358455043, + "grad_norm": 5.882631257234656, + "learning_rate": 8.376775160079614e-06, + "loss": 17.2984, + "step": 15651 + }, + { + "epoch": 0.28610598278099697, + "grad_norm": 7.8151648561632845, + "learning_rate": 8.376556847794001e-06, + "loss": 18.2152, + "step": 15652 + }, + { + "epoch": 0.28612426197744345, + "grad_norm": 5.1476495119161, + "learning_rate": 8.376338523673848e-06, + "loss": 16.9924, + "step": 15653 + }, + { + "epoch": 0.28614254117389, + "grad_norm": 5.977907295560187, + "learning_rate": 8.376120187719924e-06, + "loss": 17.4418, + "step": 15654 + }, + { + "epoch": 0.2861608203703365, + "grad_norm": 8.701996806963585, + "learning_rate": 8.37590183993299e-06, + "loss": 18.1837, + "step": 15655 + }, + { + "epoch": 0.28617909956678306, + "grad_norm": 6.594309340151606, + "learning_rate": 8.375683480313812e-06, + "loss": 17.8694, + "step": 15656 + }, + { + "epoch": 0.2861973787632296, + "grad_norm": 7.245940354928539, + "learning_rate": 8.375465108863159e-06, + "loss": 17.9216, + "step": 15657 + }, + { + "epoch": 0.2862156579596761, + "grad_norm": 7.323500051559651, + "learning_rate": 8.375246725581792e-06, + "loss": 18.2038, + "step": 15658 + }, + { + "epoch": 0.2862339371561226, + "grad_norm": 7.425768035062593, + "learning_rate": 8.375028330470477e-06, + "loss": 17.7077, + "step": 15659 + }, + { + "epoch": 0.28625221635256914, + "grad_norm": 6.646181546882208, + "learning_rate": 8.374809923529981e-06, + "loss": 17.5014, + "step": 15660 + }, + { + "epoch": 0.2862704955490157, + "grad_norm": 7.815951148838298, + "learning_rate": 8.374591504761072e-06, + "loss": 18.3835, + "step": 15661 + }, + { + "epoch": 0.2862887747454622, + "grad_norm": 9.259074008802063, + "learning_rate": 8.37437307416451e-06, + "loss": 17.9353, + "step": 15662 + }, + { + "epoch": 0.2863070539419087, + "grad_norm": 5.373786956405164, + "learning_rate": 8.374154631741063e-06, + "loss": 17.1724, + "step": 15663 + }, + { + "epoch": 0.28632533313835523, + "grad_norm": 6.369099122450056, + "learning_rate": 8.373936177491497e-06, + "loss": 17.517, + "step": 15664 + }, + { + "epoch": 0.28634361233480177, + "grad_norm": 6.033493817967538, + "learning_rate": 8.373717711416578e-06, + "loss": 17.278, + "step": 15665 + }, + { + "epoch": 0.2863618915312483, + "grad_norm": 6.127814021510294, + "learning_rate": 8.373499233517071e-06, + "loss": 17.382, + "step": 15666 + }, + { + "epoch": 0.28638017072769484, + "grad_norm": 5.894522159937451, + "learning_rate": 8.373280743793741e-06, + "loss": 17.2759, + "step": 15667 + }, + { + "epoch": 0.2863984499241413, + "grad_norm": 12.338123559113056, + "learning_rate": 8.373062242247358e-06, + "loss": 17.7412, + "step": 15668 + }, + { + "epoch": 0.28641672912058785, + "grad_norm": 7.369914316078894, + "learning_rate": 8.372843728878681e-06, + "loss": 17.9116, + "step": 15669 + }, + { + "epoch": 0.2864350083170344, + "grad_norm": 6.947360513007213, + "learning_rate": 8.37262520368848e-06, + "loss": 17.5352, + "step": 15670 + }, + { + "epoch": 0.2864532875134809, + "grad_norm": 6.656061287890204, + "learning_rate": 8.372406666677521e-06, + "loss": 17.6119, + "step": 15671 + }, + { + "epoch": 0.2864715667099274, + "grad_norm": 5.5692997538235725, + "learning_rate": 8.37218811784657e-06, + "loss": 16.9218, + "step": 15672 + }, + { + "epoch": 0.28648984590637394, + "grad_norm": 6.848217536842453, + "learning_rate": 8.371969557196391e-06, + "loss": 17.6411, + "step": 15673 + }, + { + "epoch": 0.2865081251028205, + "grad_norm": 6.129048388804936, + "learning_rate": 8.371750984727753e-06, + "loss": 17.4972, + "step": 15674 + }, + { + "epoch": 0.286526404299267, + "grad_norm": 7.013742207397539, + "learning_rate": 8.37153240044142e-06, + "loss": 17.6785, + "step": 15675 + }, + { + "epoch": 0.28654468349571355, + "grad_norm": 7.179054398456335, + "learning_rate": 8.371313804338156e-06, + "loss": 17.6121, + "step": 15676 + }, + { + "epoch": 0.28656296269216003, + "grad_norm": 7.800869695566196, + "learning_rate": 8.371095196418731e-06, + "loss": 18.1214, + "step": 15677 + }, + { + "epoch": 0.28658124188860656, + "grad_norm": 6.261844071848223, + "learning_rate": 8.370876576683913e-06, + "loss": 17.2797, + "step": 15678 + }, + { + "epoch": 0.2865995210850531, + "grad_norm": 6.810676162116741, + "learning_rate": 8.37065794513446e-06, + "loss": 17.4442, + "step": 15679 + }, + { + "epoch": 0.28661780028149964, + "grad_norm": 7.497837283605271, + "learning_rate": 8.370439301771146e-06, + "loss": 17.9312, + "step": 15680 + }, + { + "epoch": 0.28663607947794617, + "grad_norm": 6.724830039731928, + "learning_rate": 8.370220646594736e-06, + "loss": 17.5545, + "step": 15681 + }, + { + "epoch": 0.28665435867439265, + "grad_norm": 6.520261805262163, + "learning_rate": 8.370001979605993e-06, + "loss": 17.6657, + "step": 15682 + }, + { + "epoch": 0.2866726378708392, + "grad_norm": 7.030807258369695, + "learning_rate": 8.369783300805685e-06, + "loss": 17.7105, + "step": 15683 + }, + { + "epoch": 0.2866909170672857, + "grad_norm": 9.927787256458823, + "learning_rate": 8.36956461019458e-06, + "loss": 17.9591, + "step": 15684 + }, + { + "epoch": 0.28670919626373226, + "grad_norm": 6.033712258318712, + "learning_rate": 8.369345907773444e-06, + "loss": 17.3323, + "step": 15685 + }, + { + "epoch": 0.2867274754601788, + "grad_norm": 6.195990386468086, + "learning_rate": 8.369127193543044e-06, + "loss": 17.4414, + "step": 15686 + }, + { + "epoch": 0.2867457546566253, + "grad_norm": 6.896687238686422, + "learning_rate": 8.368908467504142e-06, + "loss": 17.5145, + "step": 15687 + }, + { + "epoch": 0.2867640338530718, + "grad_norm": 6.95707880779647, + "learning_rate": 8.368689729657511e-06, + "loss": 17.7917, + "step": 15688 + }, + { + "epoch": 0.28678231304951834, + "grad_norm": 7.336968328842924, + "learning_rate": 8.368470980003914e-06, + "loss": 17.6072, + "step": 15689 + }, + { + "epoch": 0.2868005922459649, + "grad_norm": 6.1451909369189215, + "learning_rate": 8.368252218544117e-06, + "loss": 17.4768, + "step": 15690 + }, + { + "epoch": 0.2868188714424114, + "grad_norm": 6.061638916616824, + "learning_rate": 8.368033445278892e-06, + "loss": 17.5442, + "step": 15691 + }, + { + "epoch": 0.2868371506388579, + "grad_norm": 6.548939791459066, + "learning_rate": 8.367814660208999e-06, + "loss": 17.635, + "step": 15692 + }, + { + "epoch": 0.28685542983530443, + "grad_norm": 7.527961122980897, + "learning_rate": 8.367595863335208e-06, + "loss": 17.7922, + "step": 15693 + }, + { + "epoch": 0.28687370903175097, + "grad_norm": 6.5655215870551045, + "learning_rate": 8.367377054658287e-06, + "loss": 17.3734, + "step": 15694 + }, + { + "epoch": 0.2868919882281975, + "grad_norm": 6.477341736143212, + "learning_rate": 8.367158234179001e-06, + "loss": 17.4314, + "step": 15695 + }, + { + "epoch": 0.28691026742464404, + "grad_norm": 7.475140751791854, + "learning_rate": 8.366939401898117e-06, + "loss": 17.6992, + "step": 15696 + }, + { + "epoch": 0.2869285466210905, + "grad_norm": 7.305250930061159, + "learning_rate": 8.366720557816404e-06, + "loss": 17.6783, + "step": 15697 + }, + { + "epoch": 0.28694682581753705, + "grad_norm": 7.440542250326834, + "learning_rate": 8.366501701934626e-06, + "loss": 17.774, + "step": 15698 + }, + { + "epoch": 0.2869651050139836, + "grad_norm": 7.427751760169241, + "learning_rate": 8.366282834253553e-06, + "loss": 17.9105, + "step": 15699 + }, + { + "epoch": 0.2869833842104301, + "grad_norm": 7.019817900435624, + "learning_rate": 8.366063954773949e-06, + "loss": 17.6111, + "step": 15700 + }, + { + "epoch": 0.28700166340687666, + "grad_norm": 5.732647211925649, + "learning_rate": 8.365845063496585e-06, + "loss": 17.1256, + "step": 15701 + }, + { + "epoch": 0.28701994260332314, + "grad_norm": 7.037915434264202, + "learning_rate": 8.365626160422226e-06, + "loss": 17.7761, + "step": 15702 + }, + { + "epoch": 0.2870382217997697, + "grad_norm": 5.67736410354966, + "learning_rate": 8.365407245551638e-06, + "loss": 17.062, + "step": 15703 + }, + { + "epoch": 0.2870565009962162, + "grad_norm": 5.468890560092592, + "learning_rate": 8.36518831888559e-06, + "loss": 17.0763, + "step": 15704 + }, + { + "epoch": 0.28707478019266275, + "grad_norm": 5.434551600521866, + "learning_rate": 8.364969380424849e-06, + "loss": 16.9165, + "step": 15705 + }, + { + "epoch": 0.28709305938910923, + "grad_norm": 6.323974577584368, + "learning_rate": 8.364750430170183e-06, + "loss": 17.3111, + "step": 15706 + }, + { + "epoch": 0.28711133858555576, + "grad_norm": 7.9254576665947365, + "learning_rate": 8.36453146812236e-06, + "loss": 18.2983, + "step": 15707 + }, + { + "epoch": 0.2871296177820023, + "grad_norm": 7.312198334280671, + "learning_rate": 8.364312494282143e-06, + "loss": 17.672, + "step": 15708 + }, + { + "epoch": 0.28714789697844884, + "grad_norm": 6.6101487821054565, + "learning_rate": 8.364093508650304e-06, + "loss": 17.6043, + "step": 15709 + }, + { + "epoch": 0.28716617617489537, + "grad_norm": 6.559681846630912, + "learning_rate": 8.36387451122761e-06, + "loss": 17.525, + "step": 15710 + }, + { + "epoch": 0.28718445537134185, + "grad_norm": 7.656501045636494, + "learning_rate": 8.363655502014826e-06, + "loss": 18.0472, + "step": 15711 + }, + { + "epoch": 0.2872027345677884, + "grad_norm": 9.269767916265716, + "learning_rate": 8.363436481012722e-06, + "loss": 17.7054, + "step": 15712 + }, + { + "epoch": 0.2872210137642349, + "grad_norm": 6.74409848397112, + "learning_rate": 8.363217448222065e-06, + "loss": 17.3151, + "step": 15713 + }, + { + "epoch": 0.28723929296068146, + "grad_norm": 8.016435392112882, + "learning_rate": 8.362998403643623e-06, + "loss": 18.1044, + "step": 15714 + }, + { + "epoch": 0.287257572157128, + "grad_norm": 6.570574622683178, + "learning_rate": 8.362779347278163e-06, + "loss": 17.3039, + "step": 15715 + }, + { + "epoch": 0.2872758513535745, + "grad_norm": 7.366678450115921, + "learning_rate": 8.362560279126454e-06, + "loss": 17.9155, + "step": 15716 + }, + { + "epoch": 0.287294130550021, + "grad_norm": 7.345328132863208, + "learning_rate": 8.362341199189264e-06, + "loss": 18.1081, + "step": 15717 + }, + { + "epoch": 0.28731240974646755, + "grad_norm": 6.579812525042959, + "learning_rate": 8.362122107467357e-06, + "loss": 17.4539, + "step": 15718 + }, + { + "epoch": 0.2873306889429141, + "grad_norm": 6.677578022932323, + "learning_rate": 8.361903003961507e-06, + "loss": 17.6364, + "step": 15719 + }, + { + "epoch": 0.2873489681393606, + "grad_norm": 7.310175659897703, + "learning_rate": 8.361683888672475e-06, + "loss": 17.9238, + "step": 15720 + }, + { + "epoch": 0.2873672473358071, + "grad_norm": 7.462718364959586, + "learning_rate": 8.361464761601036e-06, + "loss": 17.6278, + "step": 15721 + }, + { + "epoch": 0.28738552653225363, + "grad_norm": 7.227925124793609, + "learning_rate": 8.361245622747954e-06, + "loss": 17.6803, + "step": 15722 + }, + { + "epoch": 0.28740380572870017, + "grad_norm": 5.185760579323815, + "learning_rate": 8.361026472113997e-06, + "loss": 16.9618, + "step": 15723 + }, + { + "epoch": 0.2874220849251467, + "grad_norm": 6.827928535280947, + "learning_rate": 8.360807309699934e-06, + "loss": 17.7473, + "step": 15724 + }, + { + "epoch": 0.28744036412159324, + "grad_norm": 7.476993260591754, + "learning_rate": 8.360588135506532e-06, + "loss": 17.9636, + "step": 15725 + }, + { + "epoch": 0.2874586433180397, + "grad_norm": 6.337144269226875, + "learning_rate": 8.360368949534562e-06, + "loss": 17.6345, + "step": 15726 + }, + { + "epoch": 0.28747692251448626, + "grad_norm": 6.127933451755372, + "learning_rate": 8.360149751784789e-06, + "loss": 17.37, + "step": 15727 + }, + { + "epoch": 0.2874952017109328, + "grad_norm": 6.0698608722058935, + "learning_rate": 8.359930542257984e-06, + "loss": 17.1567, + "step": 15728 + }, + { + "epoch": 0.2875134809073793, + "grad_norm": 5.84965151610325, + "learning_rate": 8.359711320954913e-06, + "loss": 17.3871, + "step": 15729 + }, + { + "epoch": 0.28753176010382586, + "grad_norm": 7.0989870416467875, + "learning_rate": 8.359492087876346e-06, + "loss": 17.6362, + "step": 15730 + }, + { + "epoch": 0.28755003930027234, + "grad_norm": 7.949534533597411, + "learning_rate": 8.359272843023049e-06, + "loss": 18.5204, + "step": 15731 + }, + { + "epoch": 0.2875683184967189, + "grad_norm": 8.35695095818438, + "learning_rate": 8.359053586395796e-06, + "loss": 18.2508, + "step": 15732 + }, + { + "epoch": 0.2875865976931654, + "grad_norm": 5.440921624763122, + "learning_rate": 8.358834317995349e-06, + "loss": 17.1725, + "step": 15733 + }, + { + "epoch": 0.28760487688961195, + "grad_norm": 7.033113579652731, + "learning_rate": 8.35861503782248e-06, + "loss": 17.7829, + "step": 15734 + }, + { + "epoch": 0.2876231560860585, + "grad_norm": 6.064502051210512, + "learning_rate": 8.358395745877956e-06, + "loss": 17.2337, + "step": 15735 + }, + { + "epoch": 0.28764143528250496, + "grad_norm": 6.502516987498833, + "learning_rate": 8.358176442162545e-06, + "loss": 17.3482, + "step": 15736 + }, + { + "epoch": 0.2876597144789515, + "grad_norm": 7.3532004121531, + "learning_rate": 8.35795712667702e-06, + "loss": 18.0178, + "step": 15737 + }, + { + "epoch": 0.28767799367539804, + "grad_norm": 7.212506742678808, + "learning_rate": 8.357737799422144e-06, + "loss": 17.9006, + "step": 15738 + }, + { + "epoch": 0.28769627287184457, + "grad_norm": 5.851440011004375, + "learning_rate": 8.35751846039869e-06, + "loss": 17.3012, + "step": 15739 + }, + { + "epoch": 0.28771455206829105, + "grad_norm": 7.668508686807459, + "learning_rate": 8.357299109607425e-06, + "loss": 18.4711, + "step": 15740 + }, + { + "epoch": 0.2877328312647376, + "grad_norm": 8.088217389170984, + "learning_rate": 8.357079747049116e-06, + "loss": 18.3095, + "step": 15741 + }, + { + "epoch": 0.2877511104611841, + "grad_norm": 6.679334674361961, + "learning_rate": 8.356860372724538e-06, + "loss": 17.736, + "step": 15742 + }, + { + "epoch": 0.28776938965763066, + "grad_norm": 6.996640799362128, + "learning_rate": 8.356640986634453e-06, + "loss": 17.7252, + "step": 15743 + }, + { + "epoch": 0.2877876688540772, + "grad_norm": 6.84840280288548, + "learning_rate": 8.356421588779633e-06, + "loss": 17.6636, + "step": 15744 + }, + { + "epoch": 0.2878059480505237, + "grad_norm": 6.754510511269283, + "learning_rate": 8.356202179160847e-06, + "loss": 17.4008, + "step": 15745 + }, + { + "epoch": 0.2878242272469702, + "grad_norm": 5.698967045748068, + "learning_rate": 8.355982757778861e-06, + "loss": 17.262, + "step": 15746 + }, + { + "epoch": 0.28784250644341675, + "grad_norm": 6.573891907933512, + "learning_rate": 8.35576332463445e-06, + "loss": 17.4271, + "step": 15747 + }, + { + "epoch": 0.2878607856398633, + "grad_norm": 6.243566704387631, + "learning_rate": 8.355543879728378e-06, + "loss": 17.4468, + "step": 15748 + }, + { + "epoch": 0.2878790648363098, + "grad_norm": 6.31556660161982, + "learning_rate": 8.355324423061415e-06, + "loss": 17.5753, + "step": 15749 + }, + { + "epoch": 0.2878973440327563, + "grad_norm": 5.770455445087101, + "learning_rate": 8.355104954634334e-06, + "loss": 17.0861, + "step": 15750 + }, + { + "epoch": 0.28791562322920283, + "grad_norm": 7.50727004029445, + "learning_rate": 8.3548854744479e-06, + "loss": 18.0645, + "step": 15751 + }, + { + "epoch": 0.28793390242564937, + "grad_norm": 8.677714095973903, + "learning_rate": 8.354665982502883e-06, + "loss": 18.379, + "step": 15752 + }, + { + "epoch": 0.2879521816220959, + "grad_norm": 4.7307551149284945, + "learning_rate": 8.354446478800053e-06, + "loss": 16.738, + "step": 15753 + }, + { + "epoch": 0.28797046081854244, + "grad_norm": 6.695736112471265, + "learning_rate": 8.35422696334018e-06, + "loss": 17.8252, + "step": 15754 + }, + { + "epoch": 0.2879887400149889, + "grad_norm": 6.32623048304772, + "learning_rate": 8.354007436124031e-06, + "loss": 17.4677, + "step": 15755 + }, + { + "epoch": 0.28800701921143546, + "grad_norm": 5.641460203140147, + "learning_rate": 8.353787897152377e-06, + "loss": 17.2396, + "step": 15756 + }, + { + "epoch": 0.288025298407882, + "grad_norm": 5.224213157568367, + "learning_rate": 8.353568346425989e-06, + "loss": 17.1579, + "step": 15757 + }, + { + "epoch": 0.2880435776043285, + "grad_norm": 6.951452327045251, + "learning_rate": 8.353348783945633e-06, + "loss": 17.7508, + "step": 15758 + }, + { + "epoch": 0.28806185680077506, + "grad_norm": 6.943798294859284, + "learning_rate": 8.353129209712084e-06, + "loss": 17.5634, + "step": 15759 + }, + { + "epoch": 0.28808013599722154, + "grad_norm": 8.345537359122252, + "learning_rate": 8.352909623726105e-06, + "loss": 18.1797, + "step": 15760 + }, + { + "epoch": 0.2880984151936681, + "grad_norm": 5.59276660194879, + "learning_rate": 8.352690025988468e-06, + "loss": 17.2527, + "step": 15761 + }, + { + "epoch": 0.2881166943901146, + "grad_norm": 6.721423592195531, + "learning_rate": 8.352470416499945e-06, + "loss": 17.5115, + "step": 15762 + }, + { + "epoch": 0.28813497358656115, + "grad_norm": 6.34198069378264, + "learning_rate": 8.352250795261304e-06, + "loss": 17.4371, + "step": 15763 + }, + { + "epoch": 0.2881532527830077, + "grad_norm": 6.625762213620717, + "learning_rate": 8.352031162273316e-06, + "loss": 17.8069, + "step": 15764 + }, + { + "epoch": 0.28817153197945417, + "grad_norm": 7.369267609233369, + "learning_rate": 8.351811517536748e-06, + "loss": 17.8182, + "step": 15765 + }, + { + "epoch": 0.2881898111759007, + "grad_norm": 6.127724232807258, + "learning_rate": 8.351591861052371e-06, + "loss": 17.4318, + "step": 15766 + }, + { + "epoch": 0.28820809037234724, + "grad_norm": 5.957781267863218, + "learning_rate": 8.351372192820956e-06, + "loss": 17.2612, + "step": 15767 + }, + { + "epoch": 0.28822636956879377, + "grad_norm": 7.40018453570937, + "learning_rate": 8.351152512843273e-06, + "loss": 18.0546, + "step": 15768 + }, + { + "epoch": 0.2882446487652403, + "grad_norm": 7.588501590064388, + "learning_rate": 8.350932821120093e-06, + "loss": 17.5848, + "step": 15769 + }, + { + "epoch": 0.2882629279616868, + "grad_norm": 6.9466494681157505, + "learning_rate": 8.35071311765218e-06, + "loss": 17.8109, + "step": 15770 + }, + { + "epoch": 0.2882812071581333, + "grad_norm": 8.034972029273105, + "learning_rate": 8.350493402440312e-06, + "loss": 18.2194, + "step": 15771 + }, + { + "epoch": 0.28829948635457986, + "grad_norm": 6.376879871939102, + "learning_rate": 8.350273675485251e-06, + "loss": 17.489, + "step": 15772 + }, + { + "epoch": 0.2883177655510264, + "grad_norm": 6.655720185303255, + "learning_rate": 8.350053936787777e-06, + "loss": 17.5294, + "step": 15773 + }, + { + "epoch": 0.2883360447474729, + "grad_norm": 6.714952995024809, + "learning_rate": 8.349834186348652e-06, + "loss": 17.5506, + "step": 15774 + }, + { + "epoch": 0.2883543239439194, + "grad_norm": 6.856311084096559, + "learning_rate": 8.349614424168649e-06, + "loss": 17.8293, + "step": 15775 + }, + { + "epoch": 0.28837260314036595, + "grad_norm": 5.681622234775624, + "learning_rate": 8.349394650248537e-06, + "loss": 17.2425, + "step": 15776 + }, + { + "epoch": 0.2883908823368125, + "grad_norm": 5.712181956332461, + "learning_rate": 8.349174864589088e-06, + "loss": 17.1507, + "step": 15777 + }, + { + "epoch": 0.288409161533259, + "grad_norm": 5.832129861901588, + "learning_rate": 8.348955067191071e-06, + "loss": 17.1996, + "step": 15778 + }, + { + "epoch": 0.2884274407297055, + "grad_norm": 7.35506781927844, + "learning_rate": 8.348735258055258e-06, + "loss": 17.5264, + "step": 15779 + }, + { + "epoch": 0.28844571992615203, + "grad_norm": 8.321872434461017, + "learning_rate": 8.34851543718242e-06, + "loss": 17.928, + "step": 15780 + }, + { + "epoch": 0.28846399912259857, + "grad_norm": 7.331634591493145, + "learning_rate": 8.348295604573324e-06, + "loss": 17.6809, + "step": 15781 + }, + { + "epoch": 0.2884822783190451, + "grad_norm": 6.64089174685003, + "learning_rate": 8.348075760228744e-06, + "loss": 17.5471, + "step": 15782 + }, + { + "epoch": 0.28850055751549164, + "grad_norm": 6.379516298623369, + "learning_rate": 8.347855904149447e-06, + "loss": 17.4211, + "step": 15783 + }, + { + "epoch": 0.2885188367119381, + "grad_norm": 5.813734280984642, + "learning_rate": 8.347636036336207e-06, + "loss": 17.1827, + "step": 15784 + }, + { + "epoch": 0.28853711590838466, + "grad_norm": 7.679506660534976, + "learning_rate": 8.347416156789791e-06, + "loss": 18.2738, + "step": 15785 + }, + { + "epoch": 0.2885553951048312, + "grad_norm": 6.070713105725202, + "learning_rate": 8.347196265510976e-06, + "loss": 17.6549, + "step": 15786 + }, + { + "epoch": 0.2885736743012777, + "grad_norm": 8.341650233524906, + "learning_rate": 8.346976362500526e-06, + "loss": 17.9239, + "step": 15787 + }, + { + "epoch": 0.28859195349772426, + "grad_norm": 6.223210640444305, + "learning_rate": 8.346756447759215e-06, + "loss": 17.5592, + "step": 15788 + }, + { + "epoch": 0.28861023269417074, + "grad_norm": 7.440064451023787, + "learning_rate": 8.346536521287812e-06, + "loss": 18.4392, + "step": 15789 + }, + { + "epoch": 0.2886285118906173, + "grad_norm": 6.827000463227836, + "learning_rate": 8.346316583087088e-06, + "loss": 17.6463, + "step": 15790 + }, + { + "epoch": 0.2886467910870638, + "grad_norm": 7.2689956750364555, + "learning_rate": 8.346096633157816e-06, + "loss": 17.6978, + "step": 15791 + }, + { + "epoch": 0.28866507028351035, + "grad_norm": 9.560304213282526, + "learning_rate": 8.345876671500766e-06, + "loss": 18.0112, + "step": 15792 + }, + { + "epoch": 0.2886833494799569, + "grad_norm": 6.371597099476278, + "learning_rate": 8.345656698116708e-06, + "loss": 17.3692, + "step": 15793 + }, + { + "epoch": 0.28870162867640337, + "grad_norm": 5.686262289011201, + "learning_rate": 8.345436713006416e-06, + "loss": 16.9762, + "step": 15794 + }, + { + "epoch": 0.2887199078728499, + "grad_norm": 7.621295718506177, + "learning_rate": 8.345216716170656e-06, + "loss": 17.5674, + "step": 15795 + }, + { + "epoch": 0.28873818706929644, + "grad_norm": 7.620971429766454, + "learning_rate": 8.344996707610202e-06, + "loss": 17.7524, + "step": 15796 + }, + { + "epoch": 0.288756466265743, + "grad_norm": 6.191796140829016, + "learning_rate": 8.344776687325825e-06, + "loss": 17.313, + "step": 15797 + }, + { + "epoch": 0.2887747454621895, + "grad_norm": 6.181568030098211, + "learning_rate": 8.344556655318296e-06, + "loss": 17.5748, + "step": 15798 + }, + { + "epoch": 0.288793024658636, + "grad_norm": 6.268887034304854, + "learning_rate": 8.344336611588385e-06, + "loss": 17.3379, + "step": 15799 + }, + { + "epoch": 0.2888113038550825, + "grad_norm": 6.341147994667579, + "learning_rate": 8.344116556136867e-06, + "loss": 17.4737, + "step": 15800 + }, + { + "epoch": 0.28882958305152906, + "grad_norm": 6.033665314323493, + "learning_rate": 8.34389648896451e-06, + "loss": 17.6287, + "step": 15801 + }, + { + "epoch": 0.2888478622479756, + "grad_norm": 6.986899797204273, + "learning_rate": 8.343676410072086e-06, + "loss": 17.2298, + "step": 15802 + }, + { + "epoch": 0.28886614144442213, + "grad_norm": 8.022259512284283, + "learning_rate": 8.343456319460365e-06, + "loss": 18.4381, + "step": 15803 + }, + { + "epoch": 0.2888844206408686, + "grad_norm": 7.335846744323855, + "learning_rate": 8.34323621713012e-06, + "loss": 18.1217, + "step": 15804 + }, + { + "epoch": 0.28890269983731515, + "grad_norm": 5.4181572914117195, + "learning_rate": 8.343016103082122e-06, + "loss": 17.2419, + "step": 15805 + }, + { + "epoch": 0.2889209790337617, + "grad_norm": 7.668717511255242, + "learning_rate": 8.342795977317144e-06, + "loss": 18.0679, + "step": 15806 + }, + { + "epoch": 0.2889392582302082, + "grad_norm": 6.007362244453671, + "learning_rate": 8.342575839835954e-06, + "loss": 17.324, + "step": 15807 + }, + { + "epoch": 0.2889575374266547, + "grad_norm": 5.766159683698141, + "learning_rate": 8.342355690639329e-06, + "loss": 17.1475, + "step": 15808 + }, + { + "epoch": 0.28897581662310123, + "grad_norm": 7.18569407857634, + "learning_rate": 8.342135529728036e-06, + "loss": 18.1369, + "step": 15809 + }, + { + "epoch": 0.28899409581954777, + "grad_norm": 8.230697519035877, + "learning_rate": 8.341915357102846e-06, + "loss": 18.0357, + "step": 15810 + }, + { + "epoch": 0.2890123750159943, + "grad_norm": 6.858098229288025, + "learning_rate": 8.341695172764533e-06, + "loss": 17.6922, + "step": 15811 + }, + { + "epoch": 0.28903065421244084, + "grad_norm": 6.996834134548224, + "learning_rate": 8.34147497671387e-06, + "loss": 17.7166, + "step": 15812 + }, + { + "epoch": 0.2890489334088873, + "grad_norm": 7.044271065411338, + "learning_rate": 8.341254768951627e-06, + "loss": 17.9316, + "step": 15813 + }, + { + "epoch": 0.28906721260533386, + "grad_norm": 6.186792034925866, + "learning_rate": 8.341034549478575e-06, + "loss": 17.378, + "step": 15814 + }, + { + "epoch": 0.2890854918017804, + "grad_norm": 6.568810237734127, + "learning_rate": 8.340814318295488e-06, + "loss": 17.3772, + "step": 15815 + }, + { + "epoch": 0.2891037709982269, + "grad_norm": 7.044154342767684, + "learning_rate": 8.340594075403137e-06, + "loss": 17.8485, + "step": 15816 + }, + { + "epoch": 0.28912205019467346, + "grad_norm": 5.307773036124306, + "learning_rate": 8.340373820802292e-06, + "loss": 17.0261, + "step": 15817 + }, + { + "epoch": 0.28914032939111994, + "grad_norm": 6.701566476623037, + "learning_rate": 8.340153554493727e-06, + "loss": 17.7325, + "step": 15818 + }, + { + "epoch": 0.2891586085875665, + "grad_norm": 7.622078532809152, + "learning_rate": 8.339933276478215e-06, + "loss": 17.7651, + "step": 15819 + }, + { + "epoch": 0.289176887784013, + "grad_norm": 7.24709114490203, + "learning_rate": 8.339712986756524e-06, + "loss": 18.1042, + "step": 15820 + }, + { + "epoch": 0.28919516698045955, + "grad_norm": 6.313386930704077, + "learning_rate": 8.339492685329431e-06, + "loss": 17.5091, + "step": 15821 + }, + { + "epoch": 0.2892134461769061, + "grad_norm": 7.392391100618513, + "learning_rate": 8.339272372197707e-06, + "loss": 17.9825, + "step": 15822 + }, + { + "epoch": 0.28923172537335257, + "grad_norm": 6.142106388509246, + "learning_rate": 8.339052047362122e-06, + "loss": 17.5634, + "step": 15823 + }, + { + "epoch": 0.2892500045697991, + "grad_norm": 5.677965210339009, + "learning_rate": 8.338831710823448e-06, + "loss": 17.2084, + "step": 15824 + }, + { + "epoch": 0.28926828376624564, + "grad_norm": 6.277885906577304, + "learning_rate": 8.338611362582458e-06, + "loss": 17.598, + "step": 15825 + }, + { + "epoch": 0.2892865629626922, + "grad_norm": 6.359339975831886, + "learning_rate": 8.338391002639927e-06, + "loss": 17.3931, + "step": 15826 + }, + { + "epoch": 0.2893048421591387, + "grad_norm": 6.134284777287734, + "learning_rate": 8.338170630996625e-06, + "loss": 17.5104, + "step": 15827 + }, + { + "epoch": 0.2893231213555852, + "grad_norm": 7.572483903418803, + "learning_rate": 8.337950247653323e-06, + "loss": 17.9657, + "step": 15828 + }, + { + "epoch": 0.2893414005520317, + "grad_norm": 6.421304801092773, + "learning_rate": 8.337729852610797e-06, + "loss": 17.6365, + "step": 15829 + }, + { + "epoch": 0.28935967974847826, + "grad_norm": 8.723364308966541, + "learning_rate": 8.337509445869818e-06, + "loss": 18.4818, + "step": 15830 + }, + { + "epoch": 0.2893779589449248, + "grad_norm": 6.2155569847895515, + "learning_rate": 8.337289027431156e-06, + "loss": 17.3669, + "step": 15831 + }, + { + "epoch": 0.28939623814137133, + "grad_norm": 6.416649729972007, + "learning_rate": 8.337068597295585e-06, + "loss": 17.4181, + "step": 15832 + }, + { + "epoch": 0.2894145173378178, + "grad_norm": 8.053614383827934, + "learning_rate": 8.33684815546388e-06, + "loss": 18.5884, + "step": 15833 + }, + { + "epoch": 0.28943279653426435, + "grad_norm": 7.767476813720748, + "learning_rate": 8.336627701936813e-06, + "loss": 17.7285, + "step": 15834 + }, + { + "epoch": 0.2894510757307109, + "grad_norm": 7.30343678621806, + "learning_rate": 8.336407236715152e-06, + "loss": 17.9958, + "step": 15835 + }, + { + "epoch": 0.2894693549271574, + "grad_norm": 6.469256363024108, + "learning_rate": 8.336186759799675e-06, + "loss": 17.4907, + "step": 15836 + }, + { + "epoch": 0.28948763412360395, + "grad_norm": 7.4002692280470175, + "learning_rate": 8.335966271191154e-06, + "loss": 17.884, + "step": 15837 + }, + { + "epoch": 0.28950591332005043, + "grad_norm": 7.020862962918426, + "learning_rate": 8.335745770890359e-06, + "loss": 17.6649, + "step": 15838 + }, + { + "epoch": 0.28952419251649697, + "grad_norm": 5.913564074879372, + "learning_rate": 8.335525258898065e-06, + "loss": 17.2542, + "step": 15839 + }, + { + "epoch": 0.2895424717129435, + "grad_norm": 6.964672161097862, + "learning_rate": 8.335304735215044e-06, + "loss": 17.5509, + "step": 15840 + }, + { + "epoch": 0.28956075090939004, + "grad_norm": 7.032762438823331, + "learning_rate": 8.33508419984207e-06, + "loss": 18.202, + "step": 15841 + }, + { + "epoch": 0.2895790301058365, + "grad_norm": 7.0709440844314, + "learning_rate": 8.334863652779914e-06, + "loss": 17.6258, + "step": 15842 + }, + { + "epoch": 0.28959730930228306, + "grad_norm": 7.833117486528488, + "learning_rate": 8.334643094029354e-06, + "loss": 17.5652, + "step": 15843 + }, + { + "epoch": 0.2896155884987296, + "grad_norm": 7.707687027103323, + "learning_rate": 8.334422523591154e-06, + "loss": 17.7136, + "step": 15844 + }, + { + "epoch": 0.28963386769517613, + "grad_norm": 8.420145673153188, + "learning_rate": 8.334201941466096e-06, + "loss": 18.1864, + "step": 15845 + }, + { + "epoch": 0.28965214689162266, + "grad_norm": 5.708632640111591, + "learning_rate": 8.333981347654947e-06, + "loss": 17.0223, + "step": 15846 + }, + { + "epoch": 0.28967042608806914, + "grad_norm": 5.904381181598544, + "learning_rate": 8.333760742158485e-06, + "loss": 17.3161, + "step": 15847 + }, + { + "epoch": 0.2896887052845157, + "grad_norm": 6.625647036338524, + "learning_rate": 8.333540124977482e-06, + "loss": 17.3811, + "step": 15848 + }, + { + "epoch": 0.2897069844809622, + "grad_norm": 8.40824991275657, + "learning_rate": 8.333319496112707e-06, + "loss": 18.5501, + "step": 15849 + }, + { + "epoch": 0.28972526367740875, + "grad_norm": 5.080051963454226, + "learning_rate": 8.333098855564938e-06, + "loss": 16.8455, + "step": 15850 + }, + { + "epoch": 0.2897435428738553, + "grad_norm": 6.884719320562312, + "learning_rate": 8.332878203334946e-06, + "loss": 17.5062, + "step": 15851 + }, + { + "epoch": 0.28976182207030177, + "grad_norm": 8.473160270872027, + "learning_rate": 8.332657539423505e-06, + "loss": 17.6787, + "step": 15852 + }, + { + "epoch": 0.2897801012667483, + "grad_norm": 6.920592389283835, + "learning_rate": 8.33243686383139e-06, + "loss": 17.5786, + "step": 15853 + }, + { + "epoch": 0.28979838046319484, + "grad_norm": 7.619456747101325, + "learning_rate": 8.332216176559371e-06, + "loss": 18.207, + "step": 15854 + }, + { + "epoch": 0.2898166596596414, + "grad_norm": 7.784304950394402, + "learning_rate": 8.331995477608225e-06, + "loss": 18.1824, + "step": 15855 + }, + { + "epoch": 0.2898349388560879, + "grad_norm": 8.779377212254373, + "learning_rate": 8.331774766978723e-06, + "loss": 18.0307, + "step": 15856 + }, + { + "epoch": 0.2898532180525344, + "grad_norm": 7.566250554103422, + "learning_rate": 8.331554044671641e-06, + "loss": 17.8003, + "step": 15857 + }, + { + "epoch": 0.2898714972489809, + "grad_norm": 6.331108699259542, + "learning_rate": 8.331333310687751e-06, + "loss": 17.5381, + "step": 15858 + }, + { + "epoch": 0.28988977644542746, + "grad_norm": 5.620430241750999, + "learning_rate": 8.331112565027825e-06, + "loss": 17.1739, + "step": 15859 + }, + { + "epoch": 0.289908055641874, + "grad_norm": 7.172176970203313, + "learning_rate": 8.33089180769264e-06, + "loss": 17.8289, + "step": 15860 + }, + { + "epoch": 0.28992633483832053, + "grad_norm": 6.1488761230885824, + "learning_rate": 8.330671038682967e-06, + "loss": 17.2673, + "step": 15861 + }, + { + "epoch": 0.289944614034767, + "grad_norm": 6.707370241170042, + "learning_rate": 8.330450257999582e-06, + "loss": 17.4739, + "step": 15862 + }, + { + "epoch": 0.28996289323121355, + "grad_norm": 7.522112889595386, + "learning_rate": 8.330229465643257e-06, + "loss": 17.9497, + "step": 15863 + }, + { + "epoch": 0.2899811724276601, + "grad_norm": 7.0785085349866925, + "learning_rate": 8.330008661614769e-06, + "loss": 17.644, + "step": 15864 + }, + { + "epoch": 0.2899994516241066, + "grad_norm": 5.762830020144023, + "learning_rate": 8.329787845914888e-06, + "loss": 17.4525, + "step": 15865 + }, + { + "epoch": 0.29001773082055315, + "grad_norm": 6.201282452027288, + "learning_rate": 8.32956701854439e-06, + "loss": 17.4657, + "step": 15866 + }, + { + "epoch": 0.29003601001699963, + "grad_norm": 6.9848531405467815, + "learning_rate": 8.329346179504046e-06, + "loss": 17.948, + "step": 15867 + }, + { + "epoch": 0.29005428921344617, + "grad_norm": 6.343855021026234, + "learning_rate": 8.329125328794635e-06, + "loss": 17.1982, + "step": 15868 + }, + { + "epoch": 0.2900725684098927, + "grad_norm": 7.5193987271791585, + "learning_rate": 8.328904466416929e-06, + "loss": 18.4442, + "step": 15869 + }, + { + "epoch": 0.29009084760633924, + "grad_norm": 6.16276905751907, + "learning_rate": 8.3286835923717e-06, + "loss": 17.5355, + "step": 15870 + }, + { + "epoch": 0.2901091268027858, + "grad_norm": 6.806165075539182, + "learning_rate": 8.328462706659726e-06, + "loss": 17.5512, + "step": 15871 + }, + { + "epoch": 0.29012740599923226, + "grad_norm": 7.470899654380862, + "learning_rate": 8.328241809281776e-06, + "loss": 17.7824, + "step": 15872 + }, + { + "epoch": 0.2901456851956788, + "grad_norm": 7.926610709436997, + "learning_rate": 8.32802090023863e-06, + "loss": 18.0969, + "step": 15873 + }, + { + "epoch": 0.29016396439212533, + "grad_norm": 5.75081515714187, + "learning_rate": 8.327799979531058e-06, + "loss": 17.2268, + "step": 15874 + }, + { + "epoch": 0.29018224358857186, + "grad_norm": 6.982746896800702, + "learning_rate": 8.327579047159837e-06, + "loss": 17.4291, + "step": 15875 + }, + { + "epoch": 0.29020052278501834, + "grad_norm": 5.570328508642877, + "learning_rate": 8.32735810312574e-06, + "loss": 17.2382, + "step": 15876 + }, + { + "epoch": 0.2902188019814649, + "grad_norm": 6.244755244437659, + "learning_rate": 8.32713714742954e-06, + "loss": 17.2208, + "step": 15877 + }, + { + "epoch": 0.2902370811779114, + "grad_norm": 6.844615777860132, + "learning_rate": 8.326916180072015e-06, + "loss": 17.7951, + "step": 15878 + }, + { + "epoch": 0.29025536037435795, + "grad_norm": 6.211093470232108, + "learning_rate": 8.326695201053937e-06, + "loss": 17.3398, + "step": 15879 + }, + { + "epoch": 0.2902736395708045, + "grad_norm": 7.295018932536425, + "learning_rate": 8.32647421037608e-06, + "loss": 17.6137, + "step": 15880 + }, + { + "epoch": 0.29029191876725097, + "grad_norm": 6.186315851716792, + "learning_rate": 8.326253208039222e-06, + "loss": 17.4425, + "step": 15881 + }, + { + "epoch": 0.2903101979636975, + "grad_norm": 6.9896372933184345, + "learning_rate": 8.326032194044132e-06, + "loss": 17.6139, + "step": 15882 + }, + { + "epoch": 0.29032847716014404, + "grad_norm": 5.796558153213868, + "learning_rate": 8.325811168391589e-06, + "loss": 17.2714, + "step": 15883 + }, + { + "epoch": 0.2903467563565906, + "grad_norm": 5.973745328774716, + "learning_rate": 8.325590131082367e-06, + "loss": 17.4456, + "step": 15884 + }, + { + "epoch": 0.2903650355530371, + "grad_norm": 7.042997243532951, + "learning_rate": 8.32536908211724e-06, + "loss": 17.5358, + "step": 15885 + }, + { + "epoch": 0.2903833147494836, + "grad_norm": 6.664916227130399, + "learning_rate": 8.325148021496982e-06, + "loss": 17.671, + "step": 15886 + }, + { + "epoch": 0.2904015939459301, + "grad_norm": 6.326458415158664, + "learning_rate": 8.32492694922237e-06, + "loss": 17.5055, + "step": 15887 + }, + { + "epoch": 0.29041987314237666, + "grad_norm": 6.8060649565236435, + "learning_rate": 8.324705865294178e-06, + "loss": 17.7007, + "step": 15888 + }, + { + "epoch": 0.2904381523388232, + "grad_norm": 6.149219705617901, + "learning_rate": 8.324484769713179e-06, + "loss": 17.4992, + "step": 15889 + }, + { + "epoch": 0.29045643153526973, + "grad_norm": 6.0369480476305455, + "learning_rate": 8.32426366248015e-06, + "loss": 17.3001, + "step": 15890 + }, + { + "epoch": 0.2904747107317162, + "grad_norm": 6.877425771103101, + "learning_rate": 8.324042543595866e-06, + "loss": 17.8269, + "step": 15891 + }, + { + "epoch": 0.29049298992816275, + "grad_norm": 5.443113205173727, + "learning_rate": 8.3238214130611e-06, + "loss": 17.36, + "step": 15892 + }, + { + "epoch": 0.2905112691246093, + "grad_norm": 6.024407802871284, + "learning_rate": 8.323600270876628e-06, + "loss": 17.4058, + "step": 15893 + }, + { + "epoch": 0.2905295483210558, + "grad_norm": 6.59169659842555, + "learning_rate": 8.323379117043226e-06, + "loss": 17.6154, + "step": 15894 + }, + { + "epoch": 0.29054782751750236, + "grad_norm": 8.11732513575511, + "learning_rate": 8.32315795156167e-06, + "loss": 18.2892, + "step": 15895 + }, + { + "epoch": 0.29056610671394884, + "grad_norm": 6.8012147171376975, + "learning_rate": 8.322936774432733e-06, + "loss": 17.4193, + "step": 15896 + }, + { + "epoch": 0.29058438591039537, + "grad_norm": 6.4210491553833124, + "learning_rate": 8.322715585657191e-06, + "loss": 17.922, + "step": 15897 + }, + { + "epoch": 0.2906026651068419, + "grad_norm": 6.96120011220932, + "learning_rate": 8.322494385235818e-06, + "loss": 17.9317, + "step": 15898 + }, + { + "epoch": 0.29062094430328844, + "grad_norm": 6.308022217355439, + "learning_rate": 8.322273173169392e-06, + "loss": 17.5599, + "step": 15899 + }, + { + "epoch": 0.290639223499735, + "grad_norm": 6.59785364956779, + "learning_rate": 8.322051949458686e-06, + "loss": 17.4036, + "step": 15900 + }, + { + "epoch": 0.29065750269618146, + "grad_norm": 6.912208280964955, + "learning_rate": 8.321830714104476e-06, + "loss": 17.7087, + "step": 15901 + }, + { + "epoch": 0.290675781892628, + "grad_norm": 7.600567631357769, + "learning_rate": 8.321609467107538e-06, + "loss": 18.1136, + "step": 15902 + }, + { + "epoch": 0.29069406108907453, + "grad_norm": 5.760471125677956, + "learning_rate": 8.321388208468647e-06, + "loss": 17.3006, + "step": 15903 + }, + { + "epoch": 0.29071234028552106, + "grad_norm": 6.084253569765747, + "learning_rate": 8.321166938188578e-06, + "loss": 17.0782, + "step": 15904 + }, + { + "epoch": 0.2907306194819676, + "grad_norm": 5.168974005331681, + "learning_rate": 8.320945656268109e-06, + "loss": 17.0572, + "step": 15905 + }, + { + "epoch": 0.2907488986784141, + "grad_norm": 5.221581518003498, + "learning_rate": 8.320724362708013e-06, + "loss": 17.0476, + "step": 15906 + }, + { + "epoch": 0.2907671778748606, + "grad_norm": 7.866374632689439, + "learning_rate": 8.320503057509064e-06, + "loss": 17.6504, + "step": 15907 + }, + { + "epoch": 0.29078545707130715, + "grad_norm": 6.143672742519724, + "learning_rate": 8.320281740672042e-06, + "loss": 17.6234, + "step": 15908 + }, + { + "epoch": 0.2908037362677537, + "grad_norm": 6.609740979769964, + "learning_rate": 8.32006041219772e-06, + "loss": 17.6301, + "step": 15909 + }, + { + "epoch": 0.29082201546420017, + "grad_norm": 6.571069191355662, + "learning_rate": 8.319839072086876e-06, + "loss": 17.6319, + "step": 15910 + }, + { + "epoch": 0.2908402946606467, + "grad_norm": 7.908554049421495, + "learning_rate": 8.31961772034028e-06, + "loss": 17.9841, + "step": 15911 + }, + { + "epoch": 0.29085857385709324, + "grad_norm": 6.285597972319262, + "learning_rate": 8.319396356958716e-06, + "loss": 17.3774, + "step": 15912 + }, + { + "epoch": 0.2908768530535398, + "grad_norm": 7.75251298088019, + "learning_rate": 8.319174981942955e-06, + "loss": 18.1743, + "step": 15913 + }, + { + "epoch": 0.2908951322499863, + "grad_norm": 6.516258591895095, + "learning_rate": 8.318953595293772e-06, + "loss": 17.6466, + "step": 15914 + }, + { + "epoch": 0.2909134114464328, + "grad_norm": 6.639266152220219, + "learning_rate": 8.318732197011945e-06, + "loss": 17.7367, + "step": 15915 + }, + { + "epoch": 0.2909316906428793, + "grad_norm": 5.321623586949735, + "learning_rate": 8.318510787098252e-06, + "loss": 17.0874, + "step": 15916 + }, + { + "epoch": 0.29094996983932586, + "grad_norm": 6.611776542798462, + "learning_rate": 8.318289365553465e-06, + "loss": 17.68, + "step": 15917 + }, + { + "epoch": 0.2909682490357724, + "grad_norm": 7.1980416374132945, + "learning_rate": 8.318067932378361e-06, + "loss": 17.6493, + "step": 15918 + }, + { + "epoch": 0.29098652823221893, + "grad_norm": 6.15059107590305, + "learning_rate": 8.317846487573717e-06, + "loss": 17.3689, + "step": 15919 + }, + { + "epoch": 0.2910048074286654, + "grad_norm": 6.84114950642885, + "learning_rate": 8.31762503114031e-06, + "loss": 17.456, + "step": 15920 + }, + { + "epoch": 0.29102308662511195, + "grad_norm": 5.920430701908309, + "learning_rate": 8.317403563078915e-06, + "loss": 17.1576, + "step": 15921 + }, + { + "epoch": 0.2910413658215585, + "grad_norm": 6.375604895983556, + "learning_rate": 8.317182083390307e-06, + "loss": 17.4953, + "step": 15922 + }, + { + "epoch": 0.291059645018005, + "grad_norm": 6.330529357697266, + "learning_rate": 8.316960592075267e-06, + "loss": 17.5169, + "step": 15923 + }, + { + "epoch": 0.29107792421445156, + "grad_norm": 7.647725504781784, + "learning_rate": 8.316739089134564e-06, + "loss": 18.0041, + "step": 15924 + }, + { + "epoch": 0.29109620341089804, + "grad_norm": 6.060455983108166, + "learning_rate": 8.316517574568981e-06, + "loss": 17.3912, + "step": 15925 + }, + { + "epoch": 0.29111448260734457, + "grad_norm": 7.1133631092179135, + "learning_rate": 8.31629604837929e-06, + "loss": 17.7133, + "step": 15926 + }, + { + "epoch": 0.2911327618037911, + "grad_norm": 4.92741934288932, + "learning_rate": 8.31607451056627e-06, + "loss": 16.7877, + "step": 15927 + }, + { + "epoch": 0.29115104100023764, + "grad_norm": 5.807936016586364, + "learning_rate": 8.315852961130697e-06, + "loss": 17.3348, + "step": 15928 + }, + { + "epoch": 0.2911693201966842, + "grad_norm": 6.720183569198894, + "learning_rate": 8.315631400073346e-06, + "loss": 17.5512, + "step": 15929 + }, + { + "epoch": 0.29118759939313066, + "grad_norm": 6.337216179019596, + "learning_rate": 8.315409827394996e-06, + "loss": 17.5489, + "step": 15930 + }, + { + "epoch": 0.2912058785895772, + "grad_norm": 8.794982143667239, + "learning_rate": 8.315188243096421e-06, + "loss": 19.0212, + "step": 15931 + }, + { + "epoch": 0.29122415778602373, + "grad_norm": 7.48505748242081, + "learning_rate": 8.3149666471784e-06, + "loss": 17.9526, + "step": 15932 + }, + { + "epoch": 0.29124243698247027, + "grad_norm": 5.8641357013429, + "learning_rate": 8.314745039641708e-06, + "loss": 17.4658, + "step": 15933 + }, + { + "epoch": 0.2912607161789168, + "grad_norm": 6.536095360010347, + "learning_rate": 8.314523420487122e-06, + "loss": 17.5087, + "step": 15934 + }, + { + "epoch": 0.2912789953753633, + "grad_norm": 6.992953286405548, + "learning_rate": 8.314301789715419e-06, + "loss": 17.6882, + "step": 15935 + }, + { + "epoch": 0.2912972745718098, + "grad_norm": 5.702328158462788, + "learning_rate": 8.314080147327376e-06, + "loss": 17.1967, + "step": 15936 + }, + { + "epoch": 0.29131555376825635, + "grad_norm": 6.203032349576431, + "learning_rate": 8.31385849332377e-06, + "loss": 17.5671, + "step": 15937 + }, + { + "epoch": 0.2913338329647029, + "grad_norm": 7.527407895431283, + "learning_rate": 8.313636827705376e-06, + "loss": 17.6928, + "step": 15938 + }, + { + "epoch": 0.2913521121611494, + "grad_norm": 6.583331630723015, + "learning_rate": 8.313415150472974e-06, + "loss": 17.6124, + "step": 15939 + }, + { + "epoch": 0.2913703913575959, + "grad_norm": 5.763251944549388, + "learning_rate": 8.31319346162734e-06, + "loss": 17.0309, + "step": 15940 + }, + { + "epoch": 0.29138867055404244, + "grad_norm": 5.955730027311099, + "learning_rate": 8.31297176116925e-06, + "loss": 17.1212, + "step": 15941 + }, + { + "epoch": 0.291406949750489, + "grad_norm": 9.129691374230369, + "learning_rate": 8.31275004909948e-06, + "loss": 18.2103, + "step": 15942 + }, + { + "epoch": 0.2914252289469355, + "grad_norm": 5.801987910334633, + "learning_rate": 8.31252832541881e-06, + "loss": 17.2921, + "step": 15943 + }, + { + "epoch": 0.291443508143382, + "grad_norm": 5.2325612262252825, + "learning_rate": 8.312306590128015e-06, + "loss": 16.7652, + "step": 15944 + }, + { + "epoch": 0.2914617873398285, + "grad_norm": 6.368534594401016, + "learning_rate": 8.312084843227873e-06, + "loss": 17.443, + "step": 15945 + }, + { + "epoch": 0.29148006653627506, + "grad_norm": 6.093266259069604, + "learning_rate": 8.311863084719161e-06, + "loss": 17.4423, + "step": 15946 + }, + { + "epoch": 0.2914983457327216, + "grad_norm": 7.184285949649789, + "learning_rate": 8.311641314602657e-06, + "loss": 17.6637, + "step": 15947 + }, + { + "epoch": 0.29151662492916813, + "grad_norm": 6.639613039608739, + "learning_rate": 8.311419532879137e-06, + "loss": 17.3799, + "step": 15948 + }, + { + "epoch": 0.2915349041256146, + "grad_norm": 6.936285137221981, + "learning_rate": 8.311197739549378e-06, + "loss": 18.0556, + "step": 15949 + }, + { + "epoch": 0.29155318332206115, + "grad_norm": 7.292562960656055, + "learning_rate": 8.31097593461416e-06, + "loss": 18.0502, + "step": 15950 + }, + { + "epoch": 0.2915714625185077, + "grad_norm": 7.423271528676154, + "learning_rate": 8.310754118074258e-06, + "loss": 17.7295, + "step": 15951 + }, + { + "epoch": 0.2915897417149542, + "grad_norm": 6.857453059461951, + "learning_rate": 8.310532289930449e-06, + "loss": 17.9657, + "step": 15952 + }, + { + "epoch": 0.29160802091140076, + "grad_norm": 6.849705261001428, + "learning_rate": 8.310310450183512e-06, + "loss": 17.6968, + "step": 15953 + }, + { + "epoch": 0.29162630010784724, + "grad_norm": 6.846373698840621, + "learning_rate": 8.310088598834226e-06, + "loss": 17.7055, + "step": 15954 + }, + { + "epoch": 0.29164457930429377, + "grad_norm": 5.983680416740459, + "learning_rate": 8.309866735883365e-06, + "loss": 17.2364, + "step": 15955 + }, + { + "epoch": 0.2916628585007403, + "grad_norm": 7.717862622944557, + "learning_rate": 8.309644861331707e-06, + "loss": 18.4288, + "step": 15956 + }, + { + "epoch": 0.29168113769718684, + "grad_norm": 6.166111719059242, + "learning_rate": 8.309422975180036e-06, + "loss": 17.4886, + "step": 15957 + }, + { + "epoch": 0.2916994168936334, + "grad_norm": 6.259664873126532, + "learning_rate": 8.30920107742912e-06, + "loss": 17.6488, + "step": 15958 + }, + { + "epoch": 0.29171769609007986, + "grad_norm": 5.640824133826954, + "learning_rate": 8.308979168079742e-06, + "loss": 17.5109, + "step": 15959 + }, + { + "epoch": 0.2917359752865264, + "grad_norm": 5.6084383085018, + "learning_rate": 8.308757247132679e-06, + "loss": 17.3469, + "step": 15960 + }, + { + "epoch": 0.29175425448297293, + "grad_norm": 5.7750588523022515, + "learning_rate": 8.30853531458871e-06, + "loss": 17.3453, + "step": 15961 + }, + { + "epoch": 0.29177253367941947, + "grad_norm": 7.083811626591957, + "learning_rate": 8.308313370448611e-06, + "loss": 17.7419, + "step": 15962 + }, + { + "epoch": 0.291790812875866, + "grad_norm": 7.035315389410472, + "learning_rate": 8.308091414713162e-06, + "loss": 17.8954, + "step": 15963 + }, + { + "epoch": 0.2918090920723125, + "grad_norm": 6.371758361429908, + "learning_rate": 8.307869447383139e-06, + "loss": 17.5146, + "step": 15964 + }, + { + "epoch": 0.291827371268759, + "grad_norm": 4.803194078634297, + "learning_rate": 8.30764746845932e-06, + "loss": 16.8787, + "step": 15965 + }, + { + "epoch": 0.29184565046520555, + "grad_norm": 6.802235942204773, + "learning_rate": 8.307425477942485e-06, + "loss": 17.6656, + "step": 15966 + }, + { + "epoch": 0.2918639296616521, + "grad_norm": 7.262053207815283, + "learning_rate": 8.30720347583341e-06, + "loss": 18.0788, + "step": 15967 + }, + { + "epoch": 0.2918822088580986, + "grad_norm": 7.844899505760226, + "learning_rate": 8.306981462132873e-06, + "loss": 18.1277, + "step": 15968 + }, + { + "epoch": 0.2919004880545451, + "grad_norm": 6.267639267917648, + "learning_rate": 8.306759436841653e-06, + "loss": 17.4688, + "step": 15969 + }, + { + "epoch": 0.29191876725099164, + "grad_norm": 6.941059769945796, + "learning_rate": 8.306537399960528e-06, + "loss": 17.8929, + "step": 15970 + }, + { + "epoch": 0.2919370464474382, + "grad_norm": 7.0941521241078, + "learning_rate": 8.306315351490279e-06, + "loss": 17.9915, + "step": 15971 + }, + { + "epoch": 0.2919553256438847, + "grad_norm": 6.878623015133455, + "learning_rate": 8.30609329143168e-06, + "loss": 17.9059, + "step": 15972 + }, + { + "epoch": 0.29197360484033125, + "grad_norm": 5.563252523447362, + "learning_rate": 8.305871219785509e-06, + "loss": 17.025, + "step": 15973 + }, + { + "epoch": 0.2919918840367777, + "grad_norm": 5.711905719455599, + "learning_rate": 8.30564913655255e-06, + "loss": 17.2821, + "step": 15974 + }, + { + "epoch": 0.29201016323322426, + "grad_norm": 6.946104489699604, + "learning_rate": 8.305427041733573e-06, + "loss": 17.7724, + "step": 15975 + }, + { + "epoch": 0.2920284424296708, + "grad_norm": 5.92126910155695, + "learning_rate": 8.305204935329365e-06, + "loss": 17.3468, + "step": 15976 + }, + { + "epoch": 0.29204672162611733, + "grad_norm": 9.455120274795474, + "learning_rate": 8.304982817340699e-06, + "loss": 18.5487, + "step": 15977 + }, + { + "epoch": 0.2920650008225638, + "grad_norm": 6.930638307766184, + "learning_rate": 8.304760687768355e-06, + "loss": 17.6471, + "step": 15978 + }, + { + "epoch": 0.29208328001901035, + "grad_norm": 7.163525702393523, + "learning_rate": 8.304538546613111e-06, + "loss": 18.0668, + "step": 15979 + }, + { + "epoch": 0.2921015592154569, + "grad_norm": 6.304269962437889, + "learning_rate": 8.304316393875746e-06, + "loss": 17.4759, + "step": 15980 + }, + { + "epoch": 0.2921198384119034, + "grad_norm": 6.6073744214449395, + "learning_rate": 8.304094229557041e-06, + "loss": 17.7584, + "step": 15981 + }, + { + "epoch": 0.29213811760834996, + "grad_norm": 6.917237855040101, + "learning_rate": 8.30387205365777e-06, + "loss": 17.6626, + "step": 15982 + }, + { + "epoch": 0.29215639680479644, + "grad_norm": 7.831925164171523, + "learning_rate": 8.303649866178716e-06, + "loss": 18.0621, + "step": 15983 + }, + { + "epoch": 0.29217467600124297, + "grad_norm": 7.766589484273645, + "learning_rate": 8.303427667120655e-06, + "loss": 18.0887, + "step": 15984 + }, + { + "epoch": 0.2921929551976895, + "grad_norm": 7.177838681952092, + "learning_rate": 8.303205456484367e-06, + "loss": 17.45, + "step": 15985 + }, + { + "epoch": 0.29221123439413604, + "grad_norm": 6.036841186751471, + "learning_rate": 8.30298323427063e-06, + "loss": 17.4499, + "step": 15986 + }, + { + "epoch": 0.2922295135905826, + "grad_norm": 7.082155768465045, + "learning_rate": 8.302761000480223e-06, + "loss": 17.826, + "step": 15987 + }, + { + "epoch": 0.29224779278702906, + "grad_norm": 6.580597836497529, + "learning_rate": 8.302538755113927e-06, + "loss": 17.5262, + "step": 15988 + }, + { + "epoch": 0.2922660719834756, + "grad_norm": 8.288081416192268, + "learning_rate": 8.302316498172518e-06, + "loss": 17.7942, + "step": 15989 + }, + { + "epoch": 0.29228435117992213, + "grad_norm": 6.33771745536431, + "learning_rate": 8.302094229656776e-06, + "loss": 17.4197, + "step": 15990 + }, + { + "epoch": 0.29230263037636867, + "grad_norm": 7.04313423853878, + "learning_rate": 8.30187194956748e-06, + "loss": 17.6455, + "step": 15991 + }, + { + "epoch": 0.2923209095728152, + "grad_norm": 5.712160325010722, + "learning_rate": 8.30164965790541e-06, + "loss": 17.2575, + "step": 15992 + }, + { + "epoch": 0.2923391887692617, + "grad_norm": 7.276121630883649, + "learning_rate": 8.301427354671345e-06, + "loss": 18.0208, + "step": 15993 + }, + { + "epoch": 0.2923574679657082, + "grad_norm": 6.383029706669919, + "learning_rate": 8.301205039866063e-06, + "loss": 17.774, + "step": 15994 + }, + { + "epoch": 0.29237574716215475, + "grad_norm": 6.627319589607053, + "learning_rate": 8.300982713490344e-06, + "loss": 17.5862, + "step": 15995 + }, + { + "epoch": 0.2923940263586013, + "grad_norm": 7.220860590953939, + "learning_rate": 8.300760375544967e-06, + "loss": 17.9415, + "step": 15996 + }, + { + "epoch": 0.2924123055550478, + "grad_norm": 6.978643408968633, + "learning_rate": 8.300538026030712e-06, + "loss": 17.569, + "step": 15997 + }, + { + "epoch": 0.2924305847514943, + "grad_norm": 6.531278179387211, + "learning_rate": 8.300315664948355e-06, + "loss": 17.5871, + "step": 15998 + }, + { + "epoch": 0.29244886394794084, + "grad_norm": 6.357891325954263, + "learning_rate": 8.300093292298681e-06, + "loss": 17.3975, + "step": 15999 + }, + { + "epoch": 0.2924671431443874, + "grad_norm": 8.095016958306415, + "learning_rate": 8.299870908082465e-06, + "loss": 18.3559, + "step": 16000 + }, + { + "epoch": 0.2924854223408339, + "grad_norm": 7.719323042506764, + "learning_rate": 8.299648512300487e-06, + "loss": 18.284, + "step": 16001 + }, + { + "epoch": 0.29250370153728045, + "grad_norm": 7.570844006464962, + "learning_rate": 8.29942610495353e-06, + "loss": 17.994, + "step": 16002 + }, + { + "epoch": 0.2925219807337269, + "grad_norm": 7.601757116027508, + "learning_rate": 8.299203686042367e-06, + "loss": 17.9397, + "step": 16003 + }, + { + "epoch": 0.29254025993017346, + "grad_norm": 6.740646743610164, + "learning_rate": 8.298981255567785e-06, + "loss": 17.8283, + "step": 16004 + }, + { + "epoch": 0.29255853912662, + "grad_norm": 6.308900735552201, + "learning_rate": 8.298758813530559e-06, + "loss": 17.4854, + "step": 16005 + }, + { + "epoch": 0.29257681832306653, + "grad_norm": 7.362631597178887, + "learning_rate": 8.298536359931469e-06, + "loss": 18.0763, + "step": 16006 + }, + { + "epoch": 0.29259509751951307, + "grad_norm": 5.7634680549419235, + "learning_rate": 8.298313894771294e-06, + "loss": 17.2389, + "step": 16007 + }, + { + "epoch": 0.29261337671595955, + "grad_norm": 6.239958089881529, + "learning_rate": 8.298091418050817e-06, + "loss": 17.3514, + "step": 16008 + }, + { + "epoch": 0.2926316559124061, + "grad_norm": 9.225766618544824, + "learning_rate": 8.297868929770815e-06, + "loss": 18.6185, + "step": 16009 + }, + { + "epoch": 0.2926499351088526, + "grad_norm": 5.553300930458291, + "learning_rate": 8.29764642993207e-06, + "loss": 17.0238, + "step": 16010 + }, + { + "epoch": 0.29266821430529916, + "grad_norm": 8.737045883777931, + "learning_rate": 8.29742391853536e-06, + "loss": 17.7353, + "step": 16011 + }, + { + "epoch": 0.29268649350174564, + "grad_norm": 8.113213857813278, + "learning_rate": 8.297201395581463e-06, + "loss": 18.0144, + "step": 16012 + }, + { + "epoch": 0.2927047726981922, + "grad_norm": 6.930256829166329, + "learning_rate": 8.296978861071163e-06, + "loss": 17.8505, + "step": 16013 + }, + { + "epoch": 0.2927230518946387, + "grad_norm": 5.76729045647949, + "learning_rate": 8.296756315005237e-06, + "loss": 17.3446, + "step": 16014 + }, + { + "epoch": 0.29274133109108524, + "grad_norm": 6.852138600941832, + "learning_rate": 8.296533757384467e-06, + "loss": 17.7116, + "step": 16015 + }, + { + "epoch": 0.2927596102875318, + "grad_norm": 7.1098600478062535, + "learning_rate": 8.296311188209634e-06, + "loss": 17.5953, + "step": 16016 + }, + { + "epoch": 0.29277788948397826, + "grad_norm": 6.152991216930885, + "learning_rate": 8.296088607481514e-06, + "loss": 17.6729, + "step": 16017 + }, + { + "epoch": 0.2927961686804248, + "grad_norm": 6.699902350562617, + "learning_rate": 8.295866015200889e-06, + "loss": 17.5917, + "step": 16018 + }, + { + "epoch": 0.29281444787687133, + "grad_norm": 6.718593645829086, + "learning_rate": 8.29564341136854e-06, + "loss": 17.4667, + "step": 16019 + }, + { + "epoch": 0.29283272707331787, + "grad_norm": 6.238437528064015, + "learning_rate": 8.29542079598525e-06, + "loss": 17.4897, + "step": 16020 + }, + { + "epoch": 0.2928510062697644, + "grad_norm": 7.803161615971059, + "learning_rate": 8.295198169051792e-06, + "loss": 17.9566, + "step": 16021 + }, + { + "epoch": 0.2928692854662109, + "grad_norm": 5.851969317334456, + "learning_rate": 8.294975530568952e-06, + "loss": 17.2075, + "step": 16022 + }, + { + "epoch": 0.2928875646626574, + "grad_norm": 6.903866718538288, + "learning_rate": 8.29475288053751e-06, + "loss": 17.684, + "step": 16023 + }, + { + "epoch": 0.29290584385910395, + "grad_norm": 9.073972148485757, + "learning_rate": 8.294530218958243e-06, + "loss": 18.4389, + "step": 16024 + }, + { + "epoch": 0.2929241230555505, + "grad_norm": 5.92501062690928, + "learning_rate": 8.294307545831935e-06, + "loss": 17.5402, + "step": 16025 + }, + { + "epoch": 0.292942402251997, + "grad_norm": 5.2315268846003695, + "learning_rate": 8.294084861159363e-06, + "loss": 16.9985, + "step": 16026 + }, + { + "epoch": 0.2929606814484435, + "grad_norm": 6.86822506179636, + "learning_rate": 8.293862164941311e-06, + "loss": 17.5747, + "step": 16027 + }, + { + "epoch": 0.29297896064489004, + "grad_norm": 5.756534441651972, + "learning_rate": 8.293639457178557e-06, + "loss": 17.2154, + "step": 16028 + }, + { + "epoch": 0.2929972398413366, + "grad_norm": 8.803201480989898, + "learning_rate": 8.293416737871882e-06, + "loss": 18.1632, + "step": 16029 + }, + { + "epoch": 0.2930155190377831, + "grad_norm": 7.056439052161994, + "learning_rate": 8.29319400702207e-06, + "loss": 18.0589, + "step": 16030 + }, + { + "epoch": 0.29303379823422965, + "grad_norm": 7.8384031209301375, + "learning_rate": 8.292971264629895e-06, + "loss": 18.3924, + "step": 16031 + }, + { + "epoch": 0.29305207743067613, + "grad_norm": 5.30923451141389, + "learning_rate": 8.292748510696144e-06, + "loss": 17.0437, + "step": 16032 + }, + { + "epoch": 0.29307035662712266, + "grad_norm": 5.420403371338054, + "learning_rate": 8.292525745221595e-06, + "loss": 17.1702, + "step": 16033 + }, + { + "epoch": 0.2930886358235692, + "grad_norm": 8.135822152292993, + "learning_rate": 8.292302968207028e-06, + "loss": 17.872, + "step": 16034 + }, + { + "epoch": 0.29310691502001573, + "grad_norm": 4.77519323958378, + "learning_rate": 8.292080179653225e-06, + "loss": 16.8895, + "step": 16035 + }, + { + "epoch": 0.29312519421646227, + "grad_norm": 6.505618821002592, + "learning_rate": 8.291857379560968e-06, + "loss": 17.8212, + "step": 16036 + }, + { + "epoch": 0.29314347341290875, + "grad_norm": 7.792316429728093, + "learning_rate": 8.291634567931036e-06, + "loss": 17.7504, + "step": 16037 + }, + { + "epoch": 0.2931617526093553, + "grad_norm": 6.299430618102456, + "learning_rate": 8.291411744764209e-06, + "loss": 17.4734, + "step": 16038 + }, + { + "epoch": 0.2931800318058018, + "grad_norm": 7.2576532554125235, + "learning_rate": 8.29118891006127e-06, + "loss": 17.9509, + "step": 16039 + }, + { + "epoch": 0.29319831100224836, + "grad_norm": 8.165665908077704, + "learning_rate": 8.290966063823e-06, + "loss": 18.0751, + "step": 16040 + }, + { + "epoch": 0.2932165901986949, + "grad_norm": 7.610675332319149, + "learning_rate": 8.29074320605018e-06, + "loss": 17.8204, + "step": 16041 + }, + { + "epoch": 0.2932348693951414, + "grad_norm": 6.104090895652702, + "learning_rate": 8.290520336743589e-06, + "loss": 17.5176, + "step": 16042 + }, + { + "epoch": 0.2932531485915879, + "grad_norm": 7.843404621359061, + "learning_rate": 8.290297455904011e-06, + "loss": 18.1972, + "step": 16043 + }, + { + "epoch": 0.29327142778803444, + "grad_norm": 8.210727392862546, + "learning_rate": 8.290074563532227e-06, + "loss": 18.4069, + "step": 16044 + }, + { + "epoch": 0.293289706984481, + "grad_norm": 5.698596075513861, + "learning_rate": 8.289851659629014e-06, + "loss": 17.1512, + "step": 16045 + }, + { + "epoch": 0.29330798618092746, + "grad_norm": 7.1254844095808565, + "learning_rate": 8.28962874419516e-06, + "loss": 17.9916, + "step": 16046 + }, + { + "epoch": 0.293326265377374, + "grad_norm": 6.408700500381423, + "learning_rate": 8.289405817231439e-06, + "loss": 17.7296, + "step": 16047 + }, + { + "epoch": 0.29334454457382053, + "grad_norm": 6.294001819723074, + "learning_rate": 8.28918287873864e-06, + "loss": 17.5453, + "step": 16048 + }, + { + "epoch": 0.29336282377026707, + "grad_norm": 8.891507157521445, + "learning_rate": 8.288959928717538e-06, + "loss": 17.6735, + "step": 16049 + }, + { + "epoch": 0.2933811029667136, + "grad_norm": 6.445046404182921, + "learning_rate": 8.288736967168917e-06, + "loss": 17.4465, + "step": 16050 + }, + { + "epoch": 0.2933993821631601, + "grad_norm": 6.512188189130023, + "learning_rate": 8.288513994093558e-06, + "loss": 18.1263, + "step": 16051 + }, + { + "epoch": 0.2934176613596066, + "grad_norm": 6.035160251880754, + "learning_rate": 8.288291009492245e-06, + "loss": 17.4996, + "step": 16052 + }, + { + "epoch": 0.29343594055605315, + "grad_norm": 5.99175988250994, + "learning_rate": 8.288068013365755e-06, + "loss": 17.3569, + "step": 16053 + }, + { + "epoch": 0.2934542197524997, + "grad_norm": 6.9243016139231015, + "learning_rate": 8.287845005714872e-06, + "loss": 17.4461, + "step": 16054 + }, + { + "epoch": 0.2934724989489462, + "grad_norm": 7.028727291165785, + "learning_rate": 8.287621986540379e-06, + "loss": 17.3805, + "step": 16055 + }, + { + "epoch": 0.2934907781453927, + "grad_norm": 4.964826885366487, + "learning_rate": 8.287398955843056e-06, + "loss": 16.9784, + "step": 16056 + }, + { + "epoch": 0.29350905734183924, + "grad_norm": 5.559467036698685, + "learning_rate": 8.287175913623683e-06, + "loss": 17.0553, + "step": 16057 + }, + { + "epoch": 0.2935273365382858, + "grad_norm": 5.561341548724766, + "learning_rate": 8.286952859883046e-06, + "loss": 17.0916, + "step": 16058 + }, + { + "epoch": 0.2935456157347323, + "grad_norm": 8.510322733422155, + "learning_rate": 8.286729794621924e-06, + "loss": 18.8003, + "step": 16059 + }, + { + "epoch": 0.29356389493117885, + "grad_norm": 7.407605325077076, + "learning_rate": 8.286506717841098e-06, + "loss": 17.7896, + "step": 16060 + }, + { + "epoch": 0.29358217412762533, + "grad_norm": 6.351108811363999, + "learning_rate": 8.286283629541354e-06, + "loss": 17.6701, + "step": 16061 + }, + { + "epoch": 0.29360045332407186, + "grad_norm": 6.637702791225121, + "learning_rate": 8.286060529723467e-06, + "loss": 17.6626, + "step": 16062 + }, + { + "epoch": 0.2936187325205184, + "grad_norm": 6.9148735357227755, + "learning_rate": 8.285837418388225e-06, + "loss": 17.7434, + "step": 16063 + }, + { + "epoch": 0.29363701171696494, + "grad_norm": 6.5891439576524755, + "learning_rate": 8.285614295536408e-06, + "loss": 17.6507, + "step": 16064 + }, + { + "epoch": 0.29365529091341147, + "grad_norm": 7.0251678746844615, + "learning_rate": 8.285391161168798e-06, + "loss": 17.5474, + "step": 16065 + }, + { + "epoch": 0.29367357010985795, + "grad_norm": 7.031143782963527, + "learning_rate": 8.285168015286177e-06, + "loss": 17.8143, + "step": 16066 + }, + { + "epoch": 0.2936918493063045, + "grad_norm": 7.893692081698446, + "learning_rate": 8.284944857889327e-06, + "loss": 18.5228, + "step": 16067 + }, + { + "epoch": 0.293710128502751, + "grad_norm": 6.516554887842644, + "learning_rate": 8.284721688979032e-06, + "loss": 17.3555, + "step": 16068 + }, + { + "epoch": 0.29372840769919756, + "grad_norm": 6.303948742692822, + "learning_rate": 8.284498508556072e-06, + "loss": 17.4553, + "step": 16069 + }, + { + "epoch": 0.2937466868956441, + "grad_norm": 8.522657992212642, + "learning_rate": 8.284275316621227e-06, + "loss": 18.4694, + "step": 16070 + }, + { + "epoch": 0.2937649660920906, + "grad_norm": 6.756765906445806, + "learning_rate": 8.284052113175285e-06, + "loss": 17.8483, + "step": 16071 + }, + { + "epoch": 0.2937832452885371, + "grad_norm": 6.263059521885723, + "learning_rate": 8.283828898219025e-06, + "loss": 17.4964, + "step": 16072 + }, + { + "epoch": 0.29380152448498364, + "grad_norm": 6.480726018118758, + "learning_rate": 8.283605671753228e-06, + "loss": 17.6313, + "step": 16073 + }, + { + "epoch": 0.2938198036814302, + "grad_norm": 6.316229642215812, + "learning_rate": 8.283382433778678e-06, + "loss": 17.733, + "step": 16074 + }, + { + "epoch": 0.2938380828778767, + "grad_norm": 5.5538384810390005, + "learning_rate": 8.283159184296158e-06, + "loss": 16.9584, + "step": 16075 + }, + { + "epoch": 0.2938563620743232, + "grad_norm": 6.588095026098765, + "learning_rate": 8.282935923306452e-06, + "loss": 17.619, + "step": 16076 + }, + { + "epoch": 0.29387464127076973, + "grad_norm": 6.07862577927372, + "learning_rate": 8.282712650810339e-06, + "loss": 17.302, + "step": 16077 + }, + { + "epoch": 0.29389292046721627, + "grad_norm": 7.78484540887961, + "learning_rate": 8.282489366808603e-06, + "loss": 18.1963, + "step": 16078 + }, + { + "epoch": 0.2939111996636628, + "grad_norm": 6.954039498244128, + "learning_rate": 8.282266071302025e-06, + "loss": 17.9513, + "step": 16079 + }, + { + "epoch": 0.2939294788601093, + "grad_norm": 6.391492039911019, + "learning_rate": 8.282042764291392e-06, + "loss": 17.5255, + "step": 16080 + }, + { + "epoch": 0.2939477580565558, + "grad_norm": 6.943123103228119, + "learning_rate": 8.281819445777483e-06, + "loss": 17.8471, + "step": 16081 + }, + { + "epoch": 0.29396603725300235, + "grad_norm": 4.913061747853871, + "learning_rate": 8.281596115761082e-06, + "loss": 16.976, + "step": 16082 + }, + { + "epoch": 0.2939843164494489, + "grad_norm": 6.9758949576322165, + "learning_rate": 8.281372774242968e-06, + "loss": 17.8159, + "step": 16083 + }, + { + "epoch": 0.2940025956458954, + "grad_norm": 6.624762103518529, + "learning_rate": 8.28114942122393e-06, + "loss": 17.527, + "step": 16084 + }, + { + "epoch": 0.2940208748423419, + "grad_norm": 7.075738927009336, + "learning_rate": 8.28092605670475e-06, + "loss": 17.815, + "step": 16085 + }, + { + "epoch": 0.29403915403878844, + "grad_norm": 6.632446800746286, + "learning_rate": 8.280702680686206e-06, + "loss": 17.7209, + "step": 16086 + }, + { + "epoch": 0.294057433235235, + "grad_norm": 7.477268407655306, + "learning_rate": 8.280479293169083e-06, + "loss": 17.8343, + "step": 16087 + }, + { + "epoch": 0.2940757124316815, + "grad_norm": 7.735048943739937, + "learning_rate": 8.280255894154167e-06, + "loss": 17.6479, + "step": 16088 + }, + { + "epoch": 0.29409399162812805, + "grad_norm": 6.944062305694892, + "learning_rate": 8.280032483642238e-06, + "loss": 18.0583, + "step": 16089 + }, + { + "epoch": 0.29411227082457453, + "grad_norm": 6.292512509475764, + "learning_rate": 8.27980906163408e-06, + "loss": 17.3399, + "step": 16090 + }, + { + "epoch": 0.29413055002102106, + "grad_norm": 6.856975000258546, + "learning_rate": 8.279585628130476e-06, + "loss": 17.7249, + "step": 16091 + }, + { + "epoch": 0.2941488292174676, + "grad_norm": 6.587628718956014, + "learning_rate": 8.279362183132208e-06, + "loss": 17.5801, + "step": 16092 + }, + { + "epoch": 0.29416710841391414, + "grad_norm": 6.10922530217729, + "learning_rate": 8.27913872664006e-06, + "loss": 17.3902, + "step": 16093 + }, + { + "epoch": 0.29418538761036067, + "grad_norm": 5.769721979891175, + "learning_rate": 8.278915258654816e-06, + "loss": 17.4178, + "step": 16094 + }, + { + "epoch": 0.29420366680680715, + "grad_norm": 8.461191822315362, + "learning_rate": 8.27869177917726e-06, + "loss": 18.7004, + "step": 16095 + }, + { + "epoch": 0.2942219460032537, + "grad_norm": 5.960529254388798, + "learning_rate": 8.278468288208173e-06, + "loss": 17.2166, + "step": 16096 + }, + { + "epoch": 0.2942402251997002, + "grad_norm": 6.0029111031728135, + "learning_rate": 8.278244785748337e-06, + "loss": 17.3049, + "step": 16097 + }, + { + "epoch": 0.29425850439614676, + "grad_norm": 7.2388768396319, + "learning_rate": 8.27802127179854e-06, + "loss": 17.8213, + "step": 16098 + }, + { + "epoch": 0.2942767835925933, + "grad_norm": 6.964455211596097, + "learning_rate": 8.277797746359562e-06, + "loss": 17.8663, + "step": 16099 + }, + { + "epoch": 0.2942950627890398, + "grad_norm": 7.143780228227318, + "learning_rate": 8.277574209432187e-06, + "loss": 17.6374, + "step": 16100 + }, + { + "epoch": 0.2943133419854863, + "grad_norm": 7.404051063205473, + "learning_rate": 8.277350661017198e-06, + "loss": 18.0763, + "step": 16101 + }, + { + "epoch": 0.29433162118193285, + "grad_norm": 5.681771181830303, + "learning_rate": 8.277127101115381e-06, + "loss": 17.1563, + "step": 16102 + }, + { + "epoch": 0.2943499003783794, + "grad_norm": 6.541284403697504, + "learning_rate": 8.276903529727517e-06, + "loss": 17.4682, + "step": 16103 + }, + { + "epoch": 0.2943681795748259, + "grad_norm": 6.463094141425445, + "learning_rate": 8.276679946854392e-06, + "loss": 17.3894, + "step": 16104 + }, + { + "epoch": 0.2943864587712724, + "grad_norm": 7.1175319393264305, + "learning_rate": 8.276456352496785e-06, + "loss": 17.7138, + "step": 16105 + }, + { + "epoch": 0.29440473796771893, + "grad_norm": 7.0808893139446605, + "learning_rate": 8.276232746655485e-06, + "loss": 17.5719, + "step": 16106 + }, + { + "epoch": 0.29442301716416547, + "grad_norm": 5.617139148213048, + "learning_rate": 8.276009129331273e-06, + "loss": 17.3385, + "step": 16107 + }, + { + "epoch": 0.294441296360612, + "grad_norm": 6.638985159144182, + "learning_rate": 8.275785500524933e-06, + "loss": 17.6396, + "step": 16108 + }, + { + "epoch": 0.29445957555705854, + "grad_norm": 6.126398449831048, + "learning_rate": 8.275561860237252e-06, + "loss": 17.4253, + "step": 16109 + }, + { + "epoch": 0.294477854753505, + "grad_norm": 7.9825261159233625, + "learning_rate": 8.275338208469007e-06, + "loss": 18.7895, + "step": 16110 + }, + { + "epoch": 0.29449613394995156, + "grad_norm": 6.348114834009812, + "learning_rate": 8.275114545220986e-06, + "loss": 17.5388, + "step": 16111 + }, + { + "epoch": 0.2945144131463981, + "grad_norm": 6.363968832816221, + "learning_rate": 8.274890870493975e-06, + "loss": 17.4444, + "step": 16112 + }, + { + "epoch": 0.2945326923428446, + "grad_norm": 6.605263007879188, + "learning_rate": 8.274667184288755e-06, + "loss": 17.6027, + "step": 16113 + }, + { + "epoch": 0.2945509715392911, + "grad_norm": 5.957730997015338, + "learning_rate": 8.27444348660611e-06, + "loss": 17.288, + "step": 16114 + }, + { + "epoch": 0.29456925073573764, + "grad_norm": 7.155158853752062, + "learning_rate": 8.274219777446826e-06, + "loss": 18.0591, + "step": 16115 + }, + { + "epoch": 0.2945875299321842, + "grad_norm": 5.690349804479167, + "learning_rate": 8.273996056811684e-06, + "loss": 17.1678, + "step": 16116 + }, + { + "epoch": 0.2946058091286307, + "grad_norm": 8.759875273515886, + "learning_rate": 8.27377232470147e-06, + "loss": 18.4531, + "step": 16117 + }, + { + "epoch": 0.29462408832507725, + "grad_norm": 7.071037803884969, + "learning_rate": 8.27354858111697e-06, + "loss": 17.9133, + "step": 16118 + }, + { + "epoch": 0.29464236752152373, + "grad_norm": 5.211525432522588, + "learning_rate": 8.273324826058966e-06, + "loss": 17.1834, + "step": 16119 + }, + { + "epoch": 0.29466064671797026, + "grad_norm": 6.635936234020447, + "learning_rate": 8.273101059528242e-06, + "loss": 17.4478, + "step": 16120 + }, + { + "epoch": 0.2946789259144168, + "grad_norm": 5.075378179443972, + "learning_rate": 8.272877281525581e-06, + "loss": 16.7821, + "step": 16121 + }, + { + "epoch": 0.29469720511086334, + "grad_norm": 7.059489589531912, + "learning_rate": 8.27265349205177e-06, + "loss": 18.1215, + "step": 16122 + }, + { + "epoch": 0.29471548430730987, + "grad_norm": 9.00772735041651, + "learning_rate": 8.272429691107595e-06, + "loss": 17.2768, + "step": 16123 + }, + { + "epoch": 0.29473376350375635, + "grad_norm": 6.863778251720272, + "learning_rate": 8.272205878693835e-06, + "loss": 18.007, + "step": 16124 + }, + { + "epoch": 0.2947520427002029, + "grad_norm": 5.685395242343081, + "learning_rate": 8.271982054811279e-06, + "loss": 17.2372, + "step": 16125 + }, + { + "epoch": 0.2947703218966494, + "grad_norm": 7.226228088489144, + "learning_rate": 8.271758219460708e-06, + "loss": 17.967, + "step": 16126 + }, + { + "epoch": 0.29478860109309596, + "grad_norm": 6.642752145506515, + "learning_rate": 8.27153437264291e-06, + "loss": 17.7786, + "step": 16127 + }, + { + "epoch": 0.2948068802895425, + "grad_norm": 7.521831132525992, + "learning_rate": 8.271310514358667e-06, + "loss": 18.4123, + "step": 16128 + }, + { + "epoch": 0.294825159485989, + "grad_norm": 7.055337495464242, + "learning_rate": 8.271086644608766e-06, + "loss": 17.9119, + "step": 16129 + }, + { + "epoch": 0.2948434386824355, + "grad_norm": 6.715668028490851, + "learning_rate": 8.27086276339399e-06, + "loss": 17.9008, + "step": 16130 + }, + { + "epoch": 0.29486171787888205, + "grad_norm": 6.226074771096643, + "learning_rate": 8.270638870715122e-06, + "loss": 17.6105, + "step": 16131 + }, + { + "epoch": 0.2948799970753286, + "grad_norm": 7.970005508418957, + "learning_rate": 8.27041496657295e-06, + "loss": 18.0794, + "step": 16132 + }, + { + "epoch": 0.2948982762717751, + "grad_norm": 7.312806125808853, + "learning_rate": 8.270191050968257e-06, + "loss": 17.7882, + "step": 16133 + }, + { + "epoch": 0.2949165554682216, + "grad_norm": 7.343051756783496, + "learning_rate": 8.269967123901828e-06, + "loss": 17.7306, + "step": 16134 + }, + { + "epoch": 0.29493483466466813, + "grad_norm": 6.028786414168179, + "learning_rate": 8.269743185374449e-06, + "loss": 17.3196, + "step": 16135 + }, + { + "epoch": 0.29495311386111467, + "grad_norm": 7.446803114877071, + "learning_rate": 8.269519235386902e-06, + "loss": 18.0951, + "step": 16136 + }, + { + "epoch": 0.2949713930575612, + "grad_norm": 5.85371185484508, + "learning_rate": 8.269295273939974e-06, + "loss": 17.2621, + "step": 16137 + }, + { + "epoch": 0.29498967225400774, + "grad_norm": 6.221291944454979, + "learning_rate": 8.26907130103445e-06, + "loss": 17.3017, + "step": 16138 + }, + { + "epoch": 0.2950079514504542, + "grad_norm": 6.240179982039564, + "learning_rate": 8.268847316671116e-06, + "loss": 17.4279, + "step": 16139 + }, + { + "epoch": 0.29502623064690076, + "grad_norm": 7.634717676856093, + "learning_rate": 8.268623320850755e-06, + "loss": 17.6661, + "step": 16140 + }, + { + "epoch": 0.2950445098433473, + "grad_norm": 7.903036835138847, + "learning_rate": 8.268399313574154e-06, + "loss": 18.2374, + "step": 16141 + }, + { + "epoch": 0.2950627890397938, + "grad_norm": 6.865211273666724, + "learning_rate": 8.268175294842096e-06, + "loss": 17.7922, + "step": 16142 + }, + { + "epoch": 0.29508106823624036, + "grad_norm": 6.0218696280341355, + "learning_rate": 8.267951264655367e-06, + "loss": 17.1883, + "step": 16143 + }, + { + "epoch": 0.29509934743268684, + "grad_norm": 5.3851609690899656, + "learning_rate": 8.267727223014752e-06, + "loss": 17.3298, + "step": 16144 + }, + { + "epoch": 0.2951176266291334, + "grad_norm": 7.415122471618471, + "learning_rate": 8.267503169921037e-06, + "loss": 17.9095, + "step": 16145 + }, + { + "epoch": 0.2951359058255799, + "grad_norm": 7.143329963730976, + "learning_rate": 8.267279105375007e-06, + "loss": 17.428, + "step": 16146 + }, + { + "epoch": 0.29515418502202645, + "grad_norm": 6.871888122379926, + "learning_rate": 8.267055029377448e-06, + "loss": 17.5288, + "step": 16147 + }, + { + "epoch": 0.29517246421847293, + "grad_norm": 8.243891775073257, + "learning_rate": 8.266830941929144e-06, + "loss": 17.8825, + "step": 16148 + }, + { + "epoch": 0.29519074341491947, + "grad_norm": 6.784484797692967, + "learning_rate": 8.26660684303088e-06, + "loss": 17.7391, + "step": 16149 + }, + { + "epoch": 0.295209022611366, + "grad_norm": 9.112605991119066, + "learning_rate": 8.266382732683445e-06, + "loss": 18.6377, + "step": 16150 + }, + { + "epoch": 0.29522730180781254, + "grad_norm": 6.986089492669639, + "learning_rate": 8.26615861088762e-06, + "loss": 17.7572, + "step": 16151 + }, + { + "epoch": 0.2952455810042591, + "grad_norm": 5.6192299528481175, + "learning_rate": 8.265934477644193e-06, + "loss": 17.2478, + "step": 16152 + }, + { + "epoch": 0.29526386020070555, + "grad_norm": 6.16900873742378, + "learning_rate": 8.265710332953949e-06, + "loss": 17.5938, + "step": 16153 + }, + { + "epoch": 0.2952821393971521, + "grad_norm": 7.15423716363989, + "learning_rate": 8.265486176817675e-06, + "loss": 17.6952, + "step": 16154 + }, + { + "epoch": 0.2953004185935986, + "grad_norm": 5.2767902577615695, + "learning_rate": 8.265262009236152e-06, + "loss": 17.137, + "step": 16155 + }, + { + "epoch": 0.29531869779004516, + "grad_norm": 6.146251710364802, + "learning_rate": 8.265037830210172e-06, + "loss": 17.4041, + "step": 16156 + }, + { + "epoch": 0.2953369769864917, + "grad_norm": 6.758768374203726, + "learning_rate": 8.264813639740517e-06, + "loss": 17.8532, + "step": 16157 + }, + { + "epoch": 0.2953552561829382, + "grad_norm": 6.4511848223802195, + "learning_rate": 8.264589437827971e-06, + "loss": 17.5946, + "step": 16158 + }, + { + "epoch": 0.2953735353793847, + "grad_norm": 6.447252327774815, + "learning_rate": 8.264365224473327e-06, + "loss": 17.6131, + "step": 16159 + }, + { + "epoch": 0.29539181457583125, + "grad_norm": 6.5147614417505775, + "learning_rate": 8.264140999677363e-06, + "loss": 17.4324, + "step": 16160 + }, + { + "epoch": 0.2954100937722778, + "grad_norm": 5.917691681255722, + "learning_rate": 8.26391676344087e-06, + "loss": 17.3937, + "step": 16161 + }, + { + "epoch": 0.2954283729687243, + "grad_norm": 5.627604957337583, + "learning_rate": 8.26369251576463e-06, + "loss": 17.1035, + "step": 16162 + }, + { + "epoch": 0.2954466521651708, + "grad_norm": 6.813752414980774, + "learning_rate": 8.263468256649432e-06, + "loss": 17.7017, + "step": 16163 + }, + { + "epoch": 0.29546493136161733, + "grad_norm": 7.170450657184277, + "learning_rate": 8.26324398609606e-06, + "loss": 17.8039, + "step": 16164 + }, + { + "epoch": 0.29548321055806387, + "grad_norm": 6.794922461702556, + "learning_rate": 8.263019704105301e-06, + "loss": 17.6664, + "step": 16165 + }, + { + "epoch": 0.2955014897545104, + "grad_norm": 9.545740247540214, + "learning_rate": 8.262795410677942e-06, + "loss": 17.8033, + "step": 16166 + }, + { + "epoch": 0.29551976895095694, + "grad_norm": 5.873040185505237, + "learning_rate": 8.262571105814768e-06, + "loss": 17.3408, + "step": 16167 + }, + { + "epoch": 0.2955380481474034, + "grad_norm": 7.591817860083055, + "learning_rate": 8.262346789516567e-06, + "loss": 18.2472, + "step": 16168 + }, + { + "epoch": 0.29555632734384996, + "grad_norm": 6.756312239314254, + "learning_rate": 8.262122461784121e-06, + "loss": 17.5218, + "step": 16169 + }, + { + "epoch": 0.2955746065402965, + "grad_norm": 6.458097026716254, + "learning_rate": 8.26189812261822e-06, + "loss": 17.6256, + "step": 16170 + }, + { + "epoch": 0.295592885736743, + "grad_norm": 6.966415218401644, + "learning_rate": 8.261673772019649e-06, + "loss": 17.4215, + "step": 16171 + }, + { + "epoch": 0.29561116493318956, + "grad_norm": 6.382851441378164, + "learning_rate": 8.261449409989194e-06, + "loss": 17.7032, + "step": 16172 + }, + { + "epoch": 0.29562944412963604, + "grad_norm": 5.805456529681644, + "learning_rate": 8.261225036527642e-06, + "loss": 17.3363, + "step": 16173 + }, + { + "epoch": 0.2956477233260826, + "grad_norm": 6.408086235868578, + "learning_rate": 8.26100065163578e-06, + "loss": 17.4591, + "step": 16174 + }, + { + "epoch": 0.2956660025225291, + "grad_norm": 5.1312227955501895, + "learning_rate": 8.260776255314394e-06, + "loss": 17.0203, + "step": 16175 + }, + { + "epoch": 0.29568428171897565, + "grad_norm": 6.232576056759392, + "learning_rate": 8.260551847564268e-06, + "loss": 17.4782, + "step": 16176 + }, + { + "epoch": 0.2957025609154222, + "grad_norm": 6.546301101195819, + "learning_rate": 8.260327428386191e-06, + "loss": 17.5471, + "step": 16177 + }, + { + "epoch": 0.29572084011186867, + "grad_norm": 7.393112461830841, + "learning_rate": 8.260102997780952e-06, + "loss": 18.0688, + "step": 16178 + }, + { + "epoch": 0.2957391193083152, + "grad_norm": 5.768559251810734, + "learning_rate": 8.259878555749332e-06, + "loss": 17.2527, + "step": 16179 + }, + { + "epoch": 0.29575739850476174, + "grad_norm": 6.482737775699103, + "learning_rate": 8.259654102292123e-06, + "loss": 17.5944, + "step": 16180 + }, + { + "epoch": 0.2957756777012083, + "grad_norm": 8.191801246187003, + "learning_rate": 8.259429637410108e-06, + "loss": 17.6921, + "step": 16181 + }, + { + "epoch": 0.29579395689765475, + "grad_norm": 6.895390327766547, + "learning_rate": 8.259205161104075e-06, + "loss": 17.6604, + "step": 16182 + }, + { + "epoch": 0.2958122360941013, + "grad_norm": 6.8447549875082805, + "learning_rate": 8.25898067337481e-06, + "loss": 17.6201, + "step": 16183 + }, + { + "epoch": 0.2958305152905478, + "grad_norm": 6.061554104175592, + "learning_rate": 8.258756174223101e-06, + "loss": 17.4606, + "step": 16184 + }, + { + "epoch": 0.29584879448699436, + "grad_norm": 10.694557749786986, + "learning_rate": 8.258531663649735e-06, + "loss": 19.1512, + "step": 16185 + }, + { + "epoch": 0.2958670736834409, + "grad_norm": 7.995631273483882, + "learning_rate": 8.258307141655499e-06, + "loss": 18.1532, + "step": 16186 + }, + { + "epoch": 0.2958853528798874, + "grad_norm": 6.339244400609893, + "learning_rate": 8.258082608241177e-06, + "loss": 17.1876, + "step": 16187 + }, + { + "epoch": 0.2959036320763339, + "grad_norm": 7.27091552991875, + "learning_rate": 8.25785806340756e-06, + "loss": 17.7918, + "step": 16188 + }, + { + "epoch": 0.29592191127278045, + "grad_norm": 7.415439253092029, + "learning_rate": 8.257633507155431e-06, + "loss": 17.6777, + "step": 16189 + }, + { + "epoch": 0.295940190469227, + "grad_norm": 5.553385193729211, + "learning_rate": 8.257408939485582e-06, + "loss": 17.097, + "step": 16190 + }, + { + "epoch": 0.2959584696656735, + "grad_norm": 6.635004356880178, + "learning_rate": 8.257184360398796e-06, + "loss": 17.4957, + "step": 16191 + }, + { + "epoch": 0.29597674886212, + "grad_norm": 7.517149146968689, + "learning_rate": 8.256959769895861e-06, + "loss": 17.5058, + "step": 16192 + }, + { + "epoch": 0.29599502805856653, + "grad_norm": 6.089891699890243, + "learning_rate": 8.256735167977566e-06, + "loss": 17.3818, + "step": 16193 + }, + { + "epoch": 0.29601330725501307, + "grad_norm": 5.788348349656729, + "learning_rate": 8.256510554644696e-06, + "loss": 17.1936, + "step": 16194 + }, + { + "epoch": 0.2960315864514596, + "grad_norm": 4.823369899702506, + "learning_rate": 8.25628592989804e-06, + "loss": 16.9966, + "step": 16195 + }, + { + "epoch": 0.29604986564790614, + "grad_norm": 7.191284231122164, + "learning_rate": 8.256061293738382e-06, + "loss": 17.8501, + "step": 16196 + }, + { + "epoch": 0.2960681448443526, + "grad_norm": 7.338598339373476, + "learning_rate": 8.255836646166512e-06, + "loss": 17.7325, + "step": 16197 + }, + { + "epoch": 0.29608642404079916, + "grad_norm": 5.882320845772196, + "learning_rate": 8.255611987183218e-06, + "loss": 17.3934, + "step": 16198 + }, + { + "epoch": 0.2961047032372457, + "grad_norm": 7.050088584913749, + "learning_rate": 8.255387316789289e-06, + "loss": 17.8632, + "step": 16199 + }, + { + "epoch": 0.29612298243369223, + "grad_norm": 7.354316577228631, + "learning_rate": 8.255162634985508e-06, + "loss": 17.7189, + "step": 16200 + }, + { + "epoch": 0.29614126163013876, + "grad_norm": 6.596201647834317, + "learning_rate": 8.254937941772663e-06, + "loss": 17.8398, + "step": 16201 + }, + { + "epoch": 0.29615954082658524, + "grad_norm": 5.997520771429841, + "learning_rate": 8.254713237151546e-06, + "loss": 17.3732, + "step": 16202 + }, + { + "epoch": 0.2961778200230318, + "grad_norm": 5.567684933785494, + "learning_rate": 8.254488521122937e-06, + "loss": 17.0583, + "step": 16203 + }, + { + "epoch": 0.2961960992194783, + "grad_norm": 6.988167398003105, + "learning_rate": 8.25426379368763e-06, + "loss": 17.4395, + "step": 16204 + }, + { + "epoch": 0.29621437841592485, + "grad_norm": 6.334472459987664, + "learning_rate": 8.254039054846413e-06, + "loss": 17.3746, + "step": 16205 + }, + { + "epoch": 0.2962326576123714, + "grad_norm": 6.792457675376119, + "learning_rate": 8.25381430460007e-06, + "loss": 17.6924, + "step": 16206 + }, + { + "epoch": 0.29625093680881787, + "grad_norm": 6.543604059497521, + "learning_rate": 8.253589542949391e-06, + "loss": 17.4972, + "step": 16207 + }, + { + "epoch": 0.2962692160052644, + "grad_norm": 8.213450685786313, + "learning_rate": 8.25336476989516e-06, + "loss": 18.0698, + "step": 16208 + }, + { + "epoch": 0.29628749520171094, + "grad_norm": 6.590426678547136, + "learning_rate": 8.253139985438172e-06, + "loss": 17.5321, + "step": 16209 + }, + { + "epoch": 0.2963057743981575, + "grad_norm": 5.826370838699314, + "learning_rate": 8.252915189579209e-06, + "loss": 17.3427, + "step": 16210 + }, + { + "epoch": 0.296324053594604, + "grad_norm": 6.142322848068911, + "learning_rate": 8.25269038231906e-06, + "loss": 17.4707, + "step": 16211 + }, + { + "epoch": 0.2963423327910505, + "grad_norm": 6.313431713920389, + "learning_rate": 8.252465563658514e-06, + "loss": 17.3804, + "step": 16212 + }, + { + "epoch": 0.296360611987497, + "grad_norm": 6.7735844023314815, + "learning_rate": 8.252240733598357e-06, + "loss": 17.5166, + "step": 16213 + }, + { + "epoch": 0.29637889118394356, + "grad_norm": 6.634904623775015, + "learning_rate": 8.25201589213938e-06, + "loss": 17.3804, + "step": 16214 + }, + { + "epoch": 0.2963971703803901, + "grad_norm": 6.574053095469506, + "learning_rate": 8.251791039282369e-06, + "loss": 17.6128, + "step": 16215 + }, + { + "epoch": 0.2964154495768366, + "grad_norm": 6.523212168595939, + "learning_rate": 8.251566175028114e-06, + "loss": 17.2457, + "step": 16216 + }, + { + "epoch": 0.2964337287732831, + "grad_norm": 7.227468536478085, + "learning_rate": 8.2513412993774e-06, + "loss": 17.6916, + "step": 16217 + }, + { + "epoch": 0.29645200796972965, + "grad_norm": 6.522893347865626, + "learning_rate": 8.251116412331017e-06, + "loss": 17.553, + "step": 16218 + }, + { + "epoch": 0.2964702871661762, + "grad_norm": 5.442161858411724, + "learning_rate": 8.250891513889754e-06, + "loss": 17.1043, + "step": 16219 + }, + { + "epoch": 0.2964885663626227, + "grad_norm": 6.756243611043344, + "learning_rate": 8.250666604054396e-06, + "loss": 17.7974, + "step": 16220 + }, + { + "epoch": 0.2965068455590692, + "grad_norm": 6.723419642373557, + "learning_rate": 8.250441682825736e-06, + "loss": 17.526, + "step": 16221 + }, + { + "epoch": 0.29652512475551573, + "grad_norm": 6.723178081527187, + "learning_rate": 8.250216750204559e-06, + "loss": 17.7818, + "step": 16222 + }, + { + "epoch": 0.29654340395196227, + "grad_norm": 6.47743698662654, + "learning_rate": 8.249991806191656e-06, + "loss": 17.6314, + "step": 16223 + }, + { + "epoch": 0.2965616831484088, + "grad_norm": 6.9977355079827674, + "learning_rate": 8.249766850787811e-06, + "loss": 17.6568, + "step": 16224 + }, + { + "epoch": 0.29657996234485534, + "grad_norm": 6.719055048688414, + "learning_rate": 8.249541883993816e-06, + "loss": 17.4425, + "step": 16225 + }, + { + "epoch": 0.2965982415413018, + "grad_norm": 7.75375840497314, + "learning_rate": 8.24931690581046e-06, + "loss": 18.2105, + "step": 16226 + }, + { + "epoch": 0.29661652073774836, + "grad_norm": 6.969221779509029, + "learning_rate": 8.24909191623853e-06, + "loss": 17.887, + "step": 16227 + }, + { + "epoch": 0.2966347999341949, + "grad_norm": 7.026830897623245, + "learning_rate": 8.248866915278814e-06, + "loss": 17.8746, + "step": 16228 + }, + { + "epoch": 0.29665307913064143, + "grad_norm": 6.494742477544057, + "learning_rate": 8.248641902932102e-06, + "loss": 17.6973, + "step": 16229 + }, + { + "epoch": 0.29667135832708796, + "grad_norm": 7.425141486599022, + "learning_rate": 8.248416879199182e-06, + "loss": 17.9923, + "step": 16230 + }, + { + "epoch": 0.29668963752353444, + "grad_norm": 6.136964101879413, + "learning_rate": 8.248191844080841e-06, + "loss": 17.4555, + "step": 16231 + }, + { + "epoch": 0.296707916719981, + "grad_norm": 6.159605954960472, + "learning_rate": 8.247966797577871e-06, + "loss": 17.4765, + "step": 16232 + }, + { + "epoch": 0.2967261959164275, + "grad_norm": 6.623923583884193, + "learning_rate": 8.24774173969106e-06, + "loss": 17.7391, + "step": 16233 + }, + { + "epoch": 0.29674447511287405, + "grad_norm": 6.149862095375403, + "learning_rate": 8.247516670421195e-06, + "loss": 17.3587, + "step": 16234 + }, + { + "epoch": 0.2967627543093206, + "grad_norm": 7.031136387036252, + "learning_rate": 8.247291589769065e-06, + "loss": 17.9162, + "step": 16235 + }, + { + "epoch": 0.29678103350576707, + "grad_norm": 6.355655195530405, + "learning_rate": 8.24706649773546e-06, + "loss": 17.638, + "step": 16236 + }, + { + "epoch": 0.2967993127022136, + "grad_norm": 6.13200706403997, + "learning_rate": 8.246841394321172e-06, + "loss": 17.454, + "step": 16237 + }, + { + "epoch": 0.29681759189866014, + "grad_norm": 5.62872690399162, + "learning_rate": 8.246616279526982e-06, + "loss": 17.1959, + "step": 16238 + }, + { + "epoch": 0.2968358710951067, + "grad_norm": 7.210139026661183, + "learning_rate": 8.246391153353687e-06, + "loss": 17.5475, + "step": 16239 + }, + { + "epoch": 0.2968541502915532, + "grad_norm": 6.75275509676336, + "learning_rate": 8.24616601580207e-06, + "loss": 17.5892, + "step": 16240 + }, + { + "epoch": 0.2968724294879997, + "grad_norm": 6.834470207428036, + "learning_rate": 8.245940866872925e-06, + "loss": 17.3088, + "step": 16241 + }, + { + "epoch": 0.2968907086844462, + "grad_norm": 7.7373090466346275, + "learning_rate": 8.245715706567038e-06, + "loss": 18.2506, + "step": 16242 + }, + { + "epoch": 0.29690898788089276, + "grad_norm": 6.253828221575055, + "learning_rate": 8.2454905348852e-06, + "loss": 17.335, + "step": 16243 + }, + { + "epoch": 0.2969272670773393, + "grad_norm": 7.914715304597217, + "learning_rate": 8.245265351828197e-06, + "loss": 18.1206, + "step": 16244 + }, + { + "epoch": 0.29694554627378583, + "grad_norm": 6.230357335525374, + "learning_rate": 8.245040157396824e-06, + "loss": 17.6677, + "step": 16245 + }, + { + "epoch": 0.2969638254702323, + "grad_norm": 6.535313520280362, + "learning_rate": 8.244814951591864e-06, + "loss": 17.6605, + "step": 16246 + }, + { + "epoch": 0.29698210466667885, + "grad_norm": 6.625128264606999, + "learning_rate": 8.244589734414112e-06, + "loss": 17.7093, + "step": 16247 + }, + { + "epoch": 0.2970003838631254, + "grad_norm": 7.679745526749129, + "learning_rate": 8.244364505864351e-06, + "loss": 18.37, + "step": 16248 + }, + { + "epoch": 0.2970186630595719, + "grad_norm": 7.535364743788393, + "learning_rate": 8.244139265943376e-06, + "loss": 18.0094, + "step": 16249 + }, + { + "epoch": 0.2970369422560184, + "grad_norm": 6.396184890742791, + "learning_rate": 8.243914014651975e-06, + "loss": 17.475, + "step": 16250 + }, + { + "epoch": 0.29705522145246493, + "grad_norm": 6.868777124854813, + "learning_rate": 8.243688751990935e-06, + "loss": 17.7841, + "step": 16251 + }, + { + "epoch": 0.29707350064891147, + "grad_norm": 6.31584488355534, + "learning_rate": 8.243463477961048e-06, + "loss": 17.452, + "step": 16252 + }, + { + "epoch": 0.297091779845358, + "grad_norm": 6.467455494339748, + "learning_rate": 8.243238192563103e-06, + "loss": 17.4342, + "step": 16253 + }, + { + "epoch": 0.29711005904180454, + "grad_norm": 5.442241482256332, + "learning_rate": 8.243012895797891e-06, + "loss": 17.1801, + "step": 16254 + }, + { + "epoch": 0.297128338238251, + "grad_norm": 6.469735857165421, + "learning_rate": 8.242787587666198e-06, + "loss": 17.731, + "step": 16255 + }, + { + "epoch": 0.29714661743469756, + "grad_norm": 6.322748020825891, + "learning_rate": 8.242562268168817e-06, + "loss": 17.2883, + "step": 16256 + }, + { + "epoch": 0.2971648966311441, + "grad_norm": 6.610727907010459, + "learning_rate": 8.242336937306536e-06, + "loss": 17.8377, + "step": 16257 + }, + { + "epoch": 0.29718317582759063, + "grad_norm": 5.088755091306931, + "learning_rate": 8.242111595080146e-06, + "loss": 17.1507, + "step": 16258 + }, + { + "epoch": 0.29720145502403716, + "grad_norm": 8.340190513043664, + "learning_rate": 8.241886241490438e-06, + "loss": 18.0603, + "step": 16259 + }, + { + "epoch": 0.29721973422048364, + "grad_norm": 6.459625104554761, + "learning_rate": 8.241660876538198e-06, + "loss": 17.7535, + "step": 16260 + }, + { + "epoch": 0.2972380134169302, + "grad_norm": 5.587747198410822, + "learning_rate": 8.241435500224217e-06, + "loss": 17.0845, + "step": 16261 + }, + { + "epoch": 0.2972562926133767, + "grad_norm": 6.732006582626895, + "learning_rate": 8.241210112549287e-06, + "loss": 17.7029, + "step": 16262 + }, + { + "epoch": 0.29727457180982325, + "grad_norm": 7.984138342881745, + "learning_rate": 8.240984713514198e-06, + "loss": 17.9232, + "step": 16263 + }, + { + "epoch": 0.2972928510062698, + "grad_norm": 7.675562147358658, + "learning_rate": 8.240759303119736e-06, + "loss": 18.595, + "step": 16264 + }, + { + "epoch": 0.29731113020271627, + "grad_norm": 6.360932458947583, + "learning_rate": 8.240533881366696e-06, + "loss": 17.6675, + "step": 16265 + }, + { + "epoch": 0.2973294093991628, + "grad_norm": 6.27721314278734, + "learning_rate": 8.240308448255866e-06, + "loss": 17.6947, + "step": 16266 + }, + { + "epoch": 0.29734768859560934, + "grad_norm": 6.977908367355837, + "learning_rate": 8.240083003788036e-06, + "loss": 17.6504, + "step": 16267 + }, + { + "epoch": 0.2973659677920559, + "grad_norm": 6.049040808097772, + "learning_rate": 8.239857547963995e-06, + "loss": 17.3966, + "step": 16268 + }, + { + "epoch": 0.2973842469885024, + "grad_norm": 7.1086045804802795, + "learning_rate": 8.239632080784535e-06, + "loss": 17.7306, + "step": 16269 + }, + { + "epoch": 0.2974025261849489, + "grad_norm": 6.054916242819913, + "learning_rate": 8.239406602250447e-06, + "loss": 17.3749, + "step": 16270 + }, + { + "epoch": 0.2974208053813954, + "grad_norm": 6.9152581995438895, + "learning_rate": 8.239181112362517e-06, + "loss": 17.6033, + "step": 16271 + }, + { + "epoch": 0.29743908457784196, + "grad_norm": 6.459269066450177, + "learning_rate": 8.238955611121541e-06, + "loss": 17.4815, + "step": 16272 + }, + { + "epoch": 0.2974573637742885, + "grad_norm": 6.8023051578082026, + "learning_rate": 8.238730098528306e-06, + "loss": 17.6099, + "step": 16273 + }, + { + "epoch": 0.29747564297073503, + "grad_norm": 8.092449666154756, + "learning_rate": 8.2385045745836e-06, + "loss": 17.9357, + "step": 16274 + }, + { + "epoch": 0.2974939221671815, + "grad_norm": 7.398706690100068, + "learning_rate": 8.238279039288222e-06, + "loss": 17.9046, + "step": 16275 + }, + { + "epoch": 0.29751220136362805, + "grad_norm": 5.909733446999433, + "learning_rate": 8.238053492642954e-06, + "loss": 17.2984, + "step": 16276 + }, + { + "epoch": 0.2975304805600746, + "grad_norm": 6.661307944514938, + "learning_rate": 8.23782793464859e-06, + "loss": 17.5077, + "step": 16277 + }, + { + "epoch": 0.2975487597565211, + "grad_norm": 5.915677565068317, + "learning_rate": 8.23760236530592e-06, + "loss": 17.0839, + "step": 16278 + }, + { + "epoch": 0.29756703895296766, + "grad_norm": 6.2602485757742645, + "learning_rate": 8.237376784615734e-06, + "loss": 17.6571, + "step": 16279 + }, + { + "epoch": 0.29758531814941414, + "grad_norm": 6.740954056650279, + "learning_rate": 8.237151192578823e-06, + "loss": 17.648, + "step": 16280 + }, + { + "epoch": 0.29760359734586067, + "grad_norm": 6.573906646349148, + "learning_rate": 8.236925589195978e-06, + "loss": 17.4232, + "step": 16281 + }, + { + "epoch": 0.2976218765423072, + "grad_norm": 7.325810720028958, + "learning_rate": 8.236699974467993e-06, + "loss": 17.9357, + "step": 16282 + }, + { + "epoch": 0.29764015573875374, + "grad_norm": 7.240468148216298, + "learning_rate": 8.236474348395651e-06, + "loss": 17.394, + "step": 16283 + }, + { + "epoch": 0.2976584349352002, + "grad_norm": 6.918691931463542, + "learning_rate": 8.23624871097975e-06, + "loss": 17.3358, + "step": 16284 + }, + { + "epoch": 0.29767671413164676, + "grad_norm": 6.13736298047117, + "learning_rate": 8.236023062221077e-06, + "loss": 17.4781, + "step": 16285 + }, + { + "epoch": 0.2976949933280933, + "grad_norm": 7.117920319422106, + "learning_rate": 8.235797402120425e-06, + "loss": 17.8098, + "step": 16286 + }, + { + "epoch": 0.29771327252453983, + "grad_norm": 6.822319572920566, + "learning_rate": 8.235571730678583e-06, + "loss": 17.5788, + "step": 16287 + }, + { + "epoch": 0.29773155172098636, + "grad_norm": 5.6378570072738, + "learning_rate": 8.235346047896342e-06, + "loss": 17.246, + "step": 16288 + }, + { + "epoch": 0.29774983091743285, + "grad_norm": 6.837267184240421, + "learning_rate": 8.235120353774494e-06, + "loss": 17.9497, + "step": 16289 + }, + { + "epoch": 0.2977681101138794, + "grad_norm": 6.56729590587665, + "learning_rate": 8.234894648313832e-06, + "loss": 17.8588, + "step": 16290 + }, + { + "epoch": 0.2977863893103259, + "grad_norm": 7.694086139703035, + "learning_rate": 8.234668931515143e-06, + "loss": 18.0188, + "step": 16291 + }, + { + "epoch": 0.29780466850677245, + "grad_norm": 7.2056602639730345, + "learning_rate": 8.23444320337922e-06, + "loss": 17.957, + "step": 16292 + }, + { + "epoch": 0.297822947703219, + "grad_norm": 6.620513928103384, + "learning_rate": 8.234217463906857e-06, + "loss": 17.4882, + "step": 16293 + }, + { + "epoch": 0.29784122689966547, + "grad_norm": 6.255504383389368, + "learning_rate": 8.233991713098839e-06, + "loss": 17.3204, + "step": 16294 + }, + { + "epoch": 0.297859506096112, + "grad_norm": 5.402672496530524, + "learning_rate": 8.233765950955963e-06, + "loss": 17.1263, + "step": 16295 + }, + { + "epoch": 0.29787778529255854, + "grad_norm": 7.434840654253358, + "learning_rate": 8.233540177479016e-06, + "loss": 17.7537, + "step": 16296 + }, + { + "epoch": 0.2978960644890051, + "grad_norm": 6.053828911386629, + "learning_rate": 8.233314392668794e-06, + "loss": 17.3347, + "step": 16297 + }, + { + "epoch": 0.2979143436854516, + "grad_norm": 8.279375604491499, + "learning_rate": 8.233088596526082e-06, + "loss": 18.3454, + "step": 16298 + }, + { + "epoch": 0.2979326228818981, + "grad_norm": 7.516060747930949, + "learning_rate": 8.232862789051678e-06, + "loss": 18.3192, + "step": 16299 + }, + { + "epoch": 0.2979509020783446, + "grad_norm": 5.65837228236491, + "learning_rate": 8.232636970246371e-06, + "loss": 17.2985, + "step": 16300 + }, + { + "epoch": 0.29796918127479116, + "grad_norm": 7.764916684762759, + "learning_rate": 8.232411140110948e-06, + "loss": 18.1349, + "step": 16301 + }, + { + "epoch": 0.2979874604712377, + "grad_norm": 6.2881691007591485, + "learning_rate": 8.232185298646209e-06, + "loss": 17.316, + "step": 16302 + }, + { + "epoch": 0.29800573966768423, + "grad_norm": 5.6482328866201525, + "learning_rate": 8.231959445852937e-06, + "loss": 17.1771, + "step": 16303 + }, + { + "epoch": 0.2980240188641307, + "grad_norm": 5.602594649059896, + "learning_rate": 8.23173358173193e-06, + "loss": 17.1372, + "step": 16304 + }, + { + "epoch": 0.29804229806057725, + "grad_norm": 7.803580560883558, + "learning_rate": 8.231507706283976e-06, + "loss": 17.8065, + "step": 16305 + }, + { + "epoch": 0.2980605772570238, + "grad_norm": 7.578179986971631, + "learning_rate": 8.231281819509869e-06, + "loss": 18.0388, + "step": 16306 + }, + { + "epoch": 0.2980788564534703, + "grad_norm": 6.970383338331267, + "learning_rate": 8.2310559214104e-06, + "loss": 17.875, + "step": 16307 + }, + { + "epoch": 0.29809713564991686, + "grad_norm": 5.25820880547372, + "learning_rate": 8.23083001198636e-06, + "loss": 17.0394, + "step": 16308 + }, + { + "epoch": 0.29811541484636334, + "grad_norm": 5.943940907953198, + "learning_rate": 8.230604091238542e-06, + "loss": 17.5735, + "step": 16309 + }, + { + "epoch": 0.29813369404280987, + "grad_norm": 6.077625711083019, + "learning_rate": 8.230378159167733e-06, + "loss": 17.4468, + "step": 16310 + }, + { + "epoch": 0.2981519732392564, + "grad_norm": 6.435290454954497, + "learning_rate": 8.230152215774731e-06, + "loss": 17.3735, + "step": 16311 + }, + { + "epoch": 0.29817025243570294, + "grad_norm": 5.978024970966035, + "learning_rate": 8.229926261060328e-06, + "loss": 17.0599, + "step": 16312 + }, + { + "epoch": 0.2981885316321495, + "grad_norm": 7.255333358452495, + "learning_rate": 8.229700295025311e-06, + "loss": 18.2076, + "step": 16313 + }, + { + "epoch": 0.29820681082859596, + "grad_norm": 5.767137796494151, + "learning_rate": 8.229474317670476e-06, + "loss": 17.2848, + "step": 16314 + }, + { + "epoch": 0.2982250900250425, + "grad_norm": 5.74214433296508, + "learning_rate": 8.229248328996615e-06, + "loss": 17.396, + "step": 16315 + }, + { + "epoch": 0.29824336922148903, + "grad_norm": 7.15303670243752, + "learning_rate": 8.229022329004518e-06, + "loss": 17.8744, + "step": 16316 + }, + { + "epoch": 0.29826164841793557, + "grad_norm": 6.630547596158621, + "learning_rate": 8.228796317694976e-06, + "loss": 17.6934, + "step": 16317 + }, + { + "epoch": 0.29827992761438205, + "grad_norm": 6.012624207429132, + "learning_rate": 8.228570295068785e-06, + "loss": 17.3032, + "step": 16318 + }, + { + "epoch": 0.2982982068108286, + "grad_norm": 7.6240797394970325, + "learning_rate": 8.228344261126735e-06, + "loss": 18.0754, + "step": 16319 + }, + { + "epoch": 0.2983164860072751, + "grad_norm": 7.414234900991309, + "learning_rate": 8.228118215869619e-06, + "loss": 18.1664, + "step": 16320 + }, + { + "epoch": 0.29833476520372165, + "grad_norm": 7.557201962615848, + "learning_rate": 8.227892159298228e-06, + "loss": 17.6655, + "step": 16321 + }, + { + "epoch": 0.2983530444001682, + "grad_norm": 6.3213765342509, + "learning_rate": 8.227666091413354e-06, + "loss": 17.3342, + "step": 16322 + }, + { + "epoch": 0.29837132359661467, + "grad_norm": 7.023726114079908, + "learning_rate": 8.22744001221579e-06, + "loss": 17.743, + "step": 16323 + }, + { + "epoch": 0.2983896027930612, + "grad_norm": 6.289313939971966, + "learning_rate": 8.227213921706332e-06, + "loss": 17.5194, + "step": 16324 + }, + { + "epoch": 0.29840788198950774, + "grad_norm": 5.8862396089787845, + "learning_rate": 8.226987819885767e-06, + "loss": 17.2599, + "step": 16325 + }, + { + "epoch": 0.2984261611859543, + "grad_norm": 7.5430086296374474, + "learning_rate": 8.226761706754891e-06, + "loss": 18.1224, + "step": 16326 + }, + { + "epoch": 0.2984444403824008, + "grad_norm": 8.393691036336323, + "learning_rate": 8.226535582314494e-06, + "loss": 18.579, + "step": 16327 + }, + { + "epoch": 0.2984627195788473, + "grad_norm": 5.713785414661842, + "learning_rate": 8.226309446565371e-06, + "loss": 17.2607, + "step": 16328 + }, + { + "epoch": 0.2984809987752938, + "grad_norm": 6.678237594393372, + "learning_rate": 8.226083299508312e-06, + "loss": 17.6117, + "step": 16329 + }, + { + "epoch": 0.29849927797174036, + "grad_norm": 7.120928405474422, + "learning_rate": 8.225857141144111e-06, + "loss": 17.8109, + "step": 16330 + }, + { + "epoch": 0.2985175571681869, + "grad_norm": 6.400104494718224, + "learning_rate": 8.225630971473561e-06, + "loss": 17.559, + "step": 16331 + }, + { + "epoch": 0.29853583636463343, + "grad_norm": 6.076709500381016, + "learning_rate": 8.225404790497456e-06, + "loss": 17.2295, + "step": 16332 + }, + { + "epoch": 0.2985541155610799, + "grad_norm": 6.556860148667879, + "learning_rate": 8.225178598216586e-06, + "loss": 17.7958, + "step": 16333 + }, + { + "epoch": 0.29857239475752645, + "grad_norm": 6.5912662165209746, + "learning_rate": 8.224952394631744e-06, + "loss": 17.3398, + "step": 16334 + }, + { + "epoch": 0.298590673953973, + "grad_norm": 7.461391451472866, + "learning_rate": 8.224726179743726e-06, + "loss": 17.8425, + "step": 16335 + }, + { + "epoch": 0.2986089531504195, + "grad_norm": 6.171684837161076, + "learning_rate": 8.22449995355332e-06, + "loss": 17.5799, + "step": 16336 + }, + { + "epoch": 0.29862723234686606, + "grad_norm": 7.531757010209729, + "learning_rate": 8.224273716061321e-06, + "loss": 17.9508, + "step": 16337 + }, + { + "epoch": 0.29864551154331254, + "grad_norm": 6.823430736037084, + "learning_rate": 8.224047467268524e-06, + "loss": 17.7491, + "step": 16338 + }, + { + "epoch": 0.29866379073975907, + "grad_norm": 5.504777292052472, + "learning_rate": 8.22382120717572e-06, + "loss": 17.0871, + "step": 16339 + }, + { + "epoch": 0.2986820699362056, + "grad_norm": 6.439319508459507, + "learning_rate": 8.223594935783703e-06, + "loss": 17.5903, + "step": 16340 + }, + { + "epoch": 0.29870034913265214, + "grad_norm": 7.342933055695366, + "learning_rate": 8.223368653093267e-06, + "loss": 17.9005, + "step": 16341 + }, + { + "epoch": 0.2987186283290987, + "grad_norm": 6.640712689129402, + "learning_rate": 8.2231423591052e-06, + "loss": 17.3471, + "step": 16342 + }, + { + "epoch": 0.29873690752554516, + "grad_norm": 5.821954409355846, + "learning_rate": 8.2229160538203e-06, + "loss": 17.3971, + "step": 16343 + }, + { + "epoch": 0.2987551867219917, + "grad_norm": 5.710282016845329, + "learning_rate": 8.22268973723936e-06, + "loss": 17.2268, + "step": 16344 + }, + { + "epoch": 0.29877346591843823, + "grad_norm": 5.344218447819602, + "learning_rate": 8.222463409363171e-06, + "loss": 17.2301, + "step": 16345 + }, + { + "epoch": 0.29879174511488477, + "grad_norm": 6.029696883942533, + "learning_rate": 8.222237070192528e-06, + "loss": 17.2053, + "step": 16346 + }, + { + "epoch": 0.2988100243113313, + "grad_norm": 10.209855700168081, + "learning_rate": 8.222010719728225e-06, + "loss": 18.889, + "step": 16347 + }, + { + "epoch": 0.2988283035077778, + "grad_norm": 6.3143922895963716, + "learning_rate": 8.221784357971053e-06, + "loss": 17.282, + "step": 16348 + }, + { + "epoch": 0.2988465827042243, + "grad_norm": 6.721410765336859, + "learning_rate": 8.221557984921803e-06, + "loss": 17.7417, + "step": 16349 + }, + { + "epoch": 0.29886486190067085, + "grad_norm": 6.411746297474031, + "learning_rate": 8.221331600581276e-06, + "loss": 17.4306, + "step": 16350 + }, + { + "epoch": 0.2988831410971174, + "grad_norm": 6.359058951813595, + "learning_rate": 8.221105204950259e-06, + "loss": 17.7775, + "step": 16351 + }, + { + "epoch": 0.29890142029356387, + "grad_norm": 9.464212744525556, + "learning_rate": 8.22087879802955e-06, + "loss": 18.5987, + "step": 16352 + }, + { + "epoch": 0.2989196994900104, + "grad_norm": 7.341539248348361, + "learning_rate": 8.220652379819939e-06, + "loss": 18.0818, + "step": 16353 + }, + { + "epoch": 0.29893797868645694, + "grad_norm": 5.992769917037668, + "learning_rate": 8.220425950322222e-06, + "loss": 17.4905, + "step": 16354 + }, + { + "epoch": 0.2989562578829035, + "grad_norm": 6.541544820443978, + "learning_rate": 8.22019950953719e-06, + "loss": 17.6631, + "step": 16355 + }, + { + "epoch": 0.29897453707935, + "grad_norm": 6.882540981182489, + "learning_rate": 8.219973057465638e-06, + "loss": 17.9089, + "step": 16356 + }, + { + "epoch": 0.2989928162757965, + "grad_norm": 6.2809731953307955, + "learning_rate": 8.21974659410836e-06, + "loss": 17.573, + "step": 16357 + }, + { + "epoch": 0.299011095472243, + "grad_norm": 6.748124825600281, + "learning_rate": 8.219520119466152e-06, + "loss": 17.7794, + "step": 16358 + }, + { + "epoch": 0.29902937466868956, + "grad_norm": 6.647893390589946, + "learning_rate": 8.219293633539803e-06, + "loss": 17.3586, + "step": 16359 + }, + { + "epoch": 0.2990476538651361, + "grad_norm": 6.014042612491038, + "learning_rate": 8.219067136330107e-06, + "loss": 17.3431, + "step": 16360 + }, + { + "epoch": 0.29906593306158263, + "grad_norm": 5.92188614220963, + "learning_rate": 8.218840627837866e-06, + "loss": 17.2457, + "step": 16361 + }, + { + "epoch": 0.2990842122580291, + "grad_norm": 7.5322215277244355, + "learning_rate": 8.218614108063863e-06, + "loss": 17.8183, + "step": 16362 + }, + { + "epoch": 0.29910249145447565, + "grad_norm": 6.318718321398887, + "learning_rate": 8.218387577008896e-06, + "loss": 17.3413, + "step": 16363 + }, + { + "epoch": 0.2991207706509222, + "grad_norm": 8.63721541871255, + "learning_rate": 8.218161034673763e-06, + "loss": 18.7411, + "step": 16364 + }, + { + "epoch": 0.2991390498473687, + "grad_norm": 5.896134279393408, + "learning_rate": 8.217934481059255e-06, + "loss": 17.4976, + "step": 16365 + }, + { + "epoch": 0.29915732904381526, + "grad_norm": 6.614586099824656, + "learning_rate": 8.217707916166165e-06, + "loss": 17.6771, + "step": 16366 + }, + { + "epoch": 0.29917560824026174, + "grad_norm": 7.594683145901173, + "learning_rate": 8.217481339995288e-06, + "loss": 17.923, + "step": 16367 + }, + { + "epoch": 0.2991938874367083, + "grad_norm": 7.1342326422791515, + "learning_rate": 8.217254752547419e-06, + "loss": 17.8564, + "step": 16368 + }, + { + "epoch": 0.2992121666331548, + "grad_norm": 8.750236790492691, + "learning_rate": 8.21702815382335e-06, + "loss": 18.7921, + "step": 16369 + }, + { + "epoch": 0.29923044582960134, + "grad_norm": 7.5708330389398855, + "learning_rate": 8.216801543823875e-06, + "loss": 18.0739, + "step": 16370 + }, + { + "epoch": 0.2992487250260479, + "grad_norm": 6.925853287739947, + "learning_rate": 8.216574922549794e-06, + "loss": 17.4019, + "step": 16371 + }, + { + "epoch": 0.29926700422249436, + "grad_norm": 8.817725573612206, + "learning_rate": 8.216348290001893e-06, + "loss": 18.5605, + "step": 16372 + }, + { + "epoch": 0.2992852834189409, + "grad_norm": 6.942994691492411, + "learning_rate": 8.216121646180973e-06, + "loss": 17.6855, + "step": 16373 + }, + { + "epoch": 0.29930356261538743, + "grad_norm": 5.870285652412425, + "learning_rate": 8.215894991087823e-06, + "loss": 17.1564, + "step": 16374 + }, + { + "epoch": 0.29932184181183397, + "grad_norm": 7.281625574319899, + "learning_rate": 8.215668324723242e-06, + "loss": 18.0881, + "step": 16375 + }, + { + "epoch": 0.2993401210082805, + "grad_norm": 6.719466972932332, + "learning_rate": 8.215441647088023e-06, + "loss": 17.588, + "step": 16376 + }, + { + "epoch": 0.299358400204727, + "grad_norm": 7.8788804222029265, + "learning_rate": 8.215214958182959e-06, + "loss": 17.8596, + "step": 16377 + }, + { + "epoch": 0.2993766794011735, + "grad_norm": 6.53591483455379, + "learning_rate": 8.214988258008845e-06, + "loss": 17.5788, + "step": 16378 + }, + { + "epoch": 0.29939495859762005, + "grad_norm": 6.936515196876394, + "learning_rate": 8.214761546566478e-06, + "loss": 17.7465, + "step": 16379 + }, + { + "epoch": 0.2994132377940666, + "grad_norm": 8.580979463421398, + "learning_rate": 8.21453482385665e-06, + "loss": 18.464, + "step": 16380 + }, + { + "epoch": 0.2994315169905131, + "grad_norm": 6.168998388455126, + "learning_rate": 8.214308089880156e-06, + "loss": 17.3842, + "step": 16381 + }, + { + "epoch": 0.2994497961869596, + "grad_norm": 7.31217079621152, + "learning_rate": 8.214081344637792e-06, + "loss": 17.7132, + "step": 16382 + }, + { + "epoch": 0.29946807538340614, + "grad_norm": 9.892344239339227, + "learning_rate": 8.21385458813035e-06, + "loss": 17.5744, + "step": 16383 + }, + { + "epoch": 0.2994863545798527, + "grad_norm": 6.62911889642519, + "learning_rate": 8.213627820358627e-06, + "loss": 17.6832, + "step": 16384 + }, + { + "epoch": 0.2995046337762992, + "grad_norm": 6.143767905709122, + "learning_rate": 8.213401041323418e-06, + "loss": 17.6862, + "step": 16385 + }, + { + "epoch": 0.2995229129727457, + "grad_norm": 7.191470006510474, + "learning_rate": 8.213174251025517e-06, + "loss": 17.8029, + "step": 16386 + }, + { + "epoch": 0.2995411921691922, + "grad_norm": 5.654302385320921, + "learning_rate": 8.21294744946572e-06, + "loss": 17.2512, + "step": 16387 + }, + { + "epoch": 0.29955947136563876, + "grad_norm": 6.370351161826269, + "learning_rate": 8.21272063664482e-06, + "loss": 17.2756, + "step": 16388 + }, + { + "epoch": 0.2995777505620853, + "grad_norm": 6.32073824523956, + "learning_rate": 8.212493812563613e-06, + "loss": 17.5443, + "step": 16389 + }, + { + "epoch": 0.29959602975853183, + "grad_norm": 6.537775541813034, + "learning_rate": 8.212266977222893e-06, + "loss": 17.6793, + "step": 16390 + }, + { + "epoch": 0.2996143089549783, + "grad_norm": 8.518777316757442, + "learning_rate": 8.212040130623458e-06, + "loss": 18.5988, + "step": 16391 + }, + { + "epoch": 0.29963258815142485, + "grad_norm": 6.986553902632954, + "learning_rate": 8.2118132727661e-06, + "loss": 17.9079, + "step": 16392 + }, + { + "epoch": 0.2996508673478714, + "grad_norm": 7.459135995463893, + "learning_rate": 8.211586403651616e-06, + "loss": 18.0181, + "step": 16393 + }, + { + "epoch": 0.2996691465443179, + "grad_norm": 6.109765355923692, + "learning_rate": 8.2113595232808e-06, + "loss": 17.2701, + "step": 16394 + }, + { + "epoch": 0.29968742574076446, + "grad_norm": 7.112389133833306, + "learning_rate": 8.211132631654447e-06, + "loss": 17.8923, + "step": 16395 + }, + { + "epoch": 0.29970570493721094, + "grad_norm": 5.862526486348486, + "learning_rate": 8.210905728773353e-06, + "loss": 17.3055, + "step": 16396 + }, + { + "epoch": 0.2997239841336575, + "grad_norm": 6.354541865061286, + "learning_rate": 8.210678814638313e-06, + "loss": 17.4199, + "step": 16397 + }, + { + "epoch": 0.299742263330104, + "grad_norm": 6.5336974846492035, + "learning_rate": 8.210451889250121e-06, + "loss": 17.5184, + "step": 16398 + }, + { + "epoch": 0.29976054252655054, + "grad_norm": 8.431012648038225, + "learning_rate": 8.210224952609575e-06, + "loss": 17.8148, + "step": 16399 + }, + { + "epoch": 0.2997788217229971, + "grad_norm": 7.255367776565569, + "learning_rate": 8.20999800471747e-06, + "loss": 17.6895, + "step": 16400 + }, + { + "epoch": 0.29979710091944356, + "grad_norm": 5.651987035069994, + "learning_rate": 8.209771045574599e-06, + "loss": 17.1961, + "step": 16401 + }, + { + "epoch": 0.2998153801158901, + "grad_norm": 6.4202266438013655, + "learning_rate": 8.20954407518176e-06, + "loss": 17.5035, + "step": 16402 + }, + { + "epoch": 0.29983365931233663, + "grad_norm": 7.965755841062741, + "learning_rate": 8.209317093539748e-06, + "loss": 18.2732, + "step": 16403 + }, + { + "epoch": 0.29985193850878317, + "grad_norm": 7.526054502795034, + "learning_rate": 8.209090100649357e-06, + "loss": 17.8266, + "step": 16404 + }, + { + "epoch": 0.2998702177052297, + "grad_norm": 7.239781023690425, + "learning_rate": 8.208863096511385e-06, + "loss": 17.6852, + "step": 16405 + }, + { + "epoch": 0.2998884969016762, + "grad_norm": 7.012467979271183, + "learning_rate": 8.208636081126625e-06, + "loss": 17.8401, + "step": 16406 + }, + { + "epoch": 0.2999067760981227, + "grad_norm": 6.49402094062138, + "learning_rate": 8.208409054495874e-06, + "loss": 17.4207, + "step": 16407 + }, + { + "epoch": 0.29992505529456925, + "grad_norm": 6.415587472946477, + "learning_rate": 8.208182016619928e-06, + "loss": 17.5619, + "step": 16408 + }, + { + "epoch": 0.2999433344910158, + "grad_norm": 5.439534581469668, + "learning_rate": 8.207954967499583e-06, + "loss": 17.0143, + "step": 16409 + }, + { + "epoch": 0.2999616136874623, + "grad_norm": 6.0970909843815475, + "learning_rate": 8.207727907135634e-06, + "loss": 17.4049, + "step": 16410 + }, + { + "epoch": 0.2999798928839088, + "grad_norm": 6.199160967168086, + "learning_rate": 8.207500835528877e-06, + "loss": 17.5505, + "step": 16411 + }, + { + "epoch": 0.29999817208035534, + "grad_norm": 6.3804577425469065, + "learning_rate": 8.207273752680107e-06, + "loss": 17.4565, + "step": 16412 + }, + { + "epoch": 0.3000164512768019, + "grad_norm": 6.856210443440484, + "learning_rate": 8.207046658590121e-06, + "loss": 17.643, + "step": 16413 + }, + { + "epoch": 0.3000347304732484, + "grad_norm": 7.289911227589726, + "learning_rate": 8.206819553259716e-06, + "loss": 17.8324, + "step": 16414 + }, + { + "epoch": 0.30005300966969495, + "grad_norm": 6.578725571598782, + "learning_rate": 8.206592436689686e-06, + "loss": 17.4686, + "step": 16415 + }, + { + "epoch": 0.30007128886614143, + "grad_norm": 7.295758397619474, + "learning_rate": 8.206365308880828e-06, + "loss": 17.6599, + "step": 16416 + }, + { + "epoch": 0.30008956806258796, + "grad_norm": 7.035031104762345, + "learning_rate": 8.206138169833938e-06, + "loss": 17.7502, + "step": 16417 + }, + { + "epoch": 0.3001078472590345, + "grad_norm": 7.635415994651476, + "learning_rate": 8.20591101954981e-06, + "loss": 17.8712, + "step": 16418 + }, + { + "epoch": 0.30012612645548103, + "grad_norm": 6.0513149454420505, + "learning_rate": 8.205683858029244e-06, + "loss": 17.5575, + "step": 16419 + }, + { + "epoch": 0.3001444056519275, + "grad_norm": 7.690193874636865, + "learning_rate": 8.205456685273035e-06, + "loss": 18.0409, + "step": 16420 + }, + { + "epoch": 0.30016268484837405, + "grad_norm": 6.337069261755316, + "learning_rate": 8.205229501281976e-06, + "loss": 17.4, + "step": 16421 + }, + { + "epoch": 0.3001809640448206, + "grad_norm": 7.08058405623771, + "learning_rate": 8.205002306056865e-06, + "loss": 17.9945, + "step": 16422 + }, + { + "epoch": 0.3001992432412671, + "grad_norm": 5.790310461273087, + "learning_rate": 8.204775099598503e-06, + "loss": 17.2035, + "step": 16423 + }, + { + "epoch": 0.30021752243771366, + "grad_norm": 6.460634673234715, + "learning_rate": 8.20454788190768e-06, + "loss": 17.6697, + "step": 16424 + }, + { + "epoch": 0.30023580163416014, + "grad_norm": 7.638564574355802, + "learning_rate": 8.204320652985195e-06, + "loss": 17.8488, + "step": 16425 + }, + { + "epoch": 0.3002540808306067, + "grad_norm": 6.639336955554244, + "learning_rate": 8.204093412831845e-06, + "loss": 17.6881, + "step": 16426 + }, + { + "epoch": 0.3002723600270532, + "grad_norm": 6.42767590274851, + "learning_rate": 8.203866161448425e-06, + "loss": 17.4481, + "step": 16427 + }, + { + "epoch": 0.30029063922349974, + "grad_norm": 7.207889403731094, + "learning_rate": 8.20363889883573e-06, + "loss": 18.0579, + "step": 16428 + }, + { + "epoch": 0.3003089184199463, + "grad_norm": 6.107333428761575, + "learning_rate": 8.203411624994561e-06, + "loss": 17.398, + "step": 16429 + }, + { + "epoch": 0.30032719761639276, + "grad_norm": 5.7498502185620115, + "learning_rate": 8.203184339925714e-06, + "loss": 17.209, + "step": 16430 + }, + { + "epoch": 0.3003454768128393, + "grad_norm": 6.980368445476265, + "learning_rate": 8.202957043629981e-06, + "loss": 17.6543, + "step": 16431 + }, + { + "epoch": 0.30036375600928583, + "grad_norm": 6.612427473663922, + "learning_rate": 8.202729736108163e-06, + "loss": 17.503, + "step": 16432 + }, + { + "epoch": 0.30038203520573237, + "grad_norm": 6.970997891185264, + "learning_rate": 8.202502417361053e-06, + "loss": 17.7627, + "step": 16433 + }, + { + "epoch": 0.3004003144021789, + "grad_norm": 6.73318970432507, + "learning_rate": 8.202275087389452e-06, + "loss": 17.5573, + "step": 16434 + }, + { + "epoch": 0.3004185935986254, + "grad_norm": 7.305255101930601, + "learning_rate": 8.202047746194155e-06, + "loss": 17.8708, + "step": 16435 + }, + { + "epoch": 0.3004368727950719, + "grad_norm": 6.06333359743604, + "learning_rate": 8.201820393775957e-06, + "loss": 17.3008, + "step": 16436 + }, + { + "epoch": 0.30045515199151845, + "grad_norm": 8.089268148461798, + "learning_rate": 8.201593030135657e-06, + "loss": 18.2991, + "step": 16437 + }, + { + "epoch": 0.300473431187965, + "grad_norm": 5.927432331427687, + "learning_rate": 8.201365655274051e-06, + "loss": 17.5833, + "step": 16438 + }, + { + "epoch": 0.3004917103844115, + "grad_norm": 5.144567413931674, + "learning_rate": 8.201138269191937e-06, + "loss": 17.0671, + "step": 16439 + }, + { + "epoch": 0.300509989580858, + "grad_norm": 5.5994736216512315, + "learning_rate": 8.20091087189011e-06, + "loss": 17.1244, + "step": 16440 + }, + { + "epoch": 0.30052826877730454, + "grad_norm": 7.242150273910535, + "learning_rate": 8.20068346336937e-06, + "loss": 17.9618, + "step": 16441 + }, + { + "epoch": 0.3005465479737511, + "grad_norm": 6.085807824257408, + "learning_rate": 8.20045604363051e-06, + "loss": 17.5531, + "step": 16442 + }, + { + "epoch": 0.3005648271701976, + "grad_norm": 5.901827548152579, + "learning_rate": 8.20022861267433e-06, + "loss": 17.2967, + "step": 16443 + }, + { + "epoch": 0.30058310636664415, + "grad_norm": 6.5713707264465375, + "learning_rate": 8.200001170501627e-06, + "loss": 17.168, + "step": 16444 + }, + { + "epoch": 0.30060138556309063, + "grad_norm": 6.241234340365578, + "learning_rate": 8.199773717113198e-06, + "loss": 17.3489, + "step": 16445 + }, + { + "epoch": 0.30061966475953716, + "grad_norm": 7.99825696017237, + "learning_rate": 8.199546252509838e-06, + "loss": 18.1971, + "step": 16446 + }, + { + "epoch": 0.3006379439559837, + "grad_norm": 5.952035644458607, + "learning_rate": 8.199318776692347e-06, + "loss": 17.4981, + "step": 16447 + }, + { + "epoch": 0.30065622315243024, + "grad_norm": 6.8341513635009425, + "learning_rate": 8.199091289661522e-06, + "loss": 17.498, + "step": 16448 + }, + { + "epoch": 0.30067450234887677, + "grad_norm": 6.460937181230472, + "learning_rate": 8.198863791418159e-06, + "loss": 17.712, + "step": 16449 + }, + { + "epoch": 0.30069278154532325, + "grad_norm": 7.310936784672614, + "learning_rate": 8.198636281963055e-06, + "loss": 17.8312, + "step": 16450 + }, + { + "epoch": 0.3007110607417698, + "grad_norm": 5.779597444089757, + "learning_rate": 8.19840876129701e-06, + "loss": 17.1703, + "step": 16451 + }, + { + "epoch": 0.3007293399382163, + "grad_norm": 7.292474928284607, + "learning_rate": 8.198181229420819e-06, + "loss": 17.6399, + "step": 16452 + }, + { + "epoch": 0.30074761913466286, + "grad_norm": 5.460471257386947, + "learning_rate": 8.197953686335281e-06, + "loss": 17.1148, + "step": 16453 + }, + { + "epoch": 0.30076589833110934, + "grad_norm": 7.682451591408236, + "learning_rate": 8.197726132041194e-06, + "loss": 18.075, + "step": 16454 + }, + { + "epoch": 0.3007841775275559, + "grad_norm": 7.517908304719064, + "learning_rate": 8.19749856653935e-06, + "loss": 18.0855, + "step": 16455 + }, + { + "epoch": 0.3008024567240024, + "grad_norm": 7.706676260127519, + "learning_rate": 8.197270989830554e-06, + "loss": 17.9831, + "step": 16456 + }, + { + "epoch": 0.30082073592044895, + "grad_norm": 8.199037568674859, + "learning_rate": 8.197043401915601e-06, + "loss": 17.9961, + "step": 16457 + }, + { + "epoch": 0.3008390151168955, + "grad_norm": 6.534789165792984, + "learning_rate": 8.196815802795288e-06, + "loss": 17.4348, + "step": 16458 + }, + { + "epoch": 0.30085729431334196, + "grad_norm": 6.904812089585506, + "learning_rate": 8.196588192470412e-06, + "loss": 17.5515, + "step": 16459 + }, + { + "epoch": 0.3008755735097885, + "grad_norm": 5.8144758827684795, + "learning_rate": 8.196360570941773e-06, + "loss": 17.4175, + "step": 16460 + }, + { + "epoch": 0.30089385270623503, + "grad_norm": 13.19151471823941, + "learning_rate": 8.196132938210166e-06, + "loss": 18.2953, + "step": 16461 + }, + { + "epoch": 0.30091213190268157, + "grad_norm": 7.153700518477827, + "learning_rate": 8.195905294276392e-06, + "loss": 17.8263, + "step": 16462 + }, + { + "epoch": 0.3009304110991281, + "grad_norm": 6.088575872454362, + "learning_rate": 8.195677639141247e-06, + "loss": 17.5734, + "step": 16463 + }, + { + "epoch": 0.3009486902955746, + "grad_norm": 7.01020178943264, + "learning_rate": 8.195449972805529e-06, + "loss": 17.3798, + "step": 16464 + }, + { + "epoch": 0.3009669694920211, + "grad_norm": 6.772620443157715, + "learning_rate": 8.195222295270035e-06, + "loss": 17.3864, + "step": 16465 + }, + { + "epoch": 0.30098524868846765, + "grad_norm": 7.187676691443884, + "learning_rate": 8.194994606535566e-06, + "loss": 17.8388, + "step": 16466 + }, + { + "epoch": 0.3010035278849142, + "grad_norm": 6.564834304727205, + "learning_rate": 8.194766906602916e-06, + "loss": 17.4207, + "step": 16467 + }, + { + "epoch": 0.3010218070813607, + "grad_norm": 7.6128025116026885, + "learning_rate": 8.194539195472888e-06, + "loss": 18.0054, + "step": 16468 + }, + { + "epoch": 0.3010400862778072, + "grad_norm": 7.329839875497806, + "learning_rate": 8.194311473146274e-06, + "loss": 17.9493, + "step": 16469 + }, + { + "epoch": 0.30105836547425374, + "grad_norm": 6.162042508545888, + "learning_rate": 8.19408373962388e-06, + "loss": 17.4555, + "step": 16470 + }, + { + "epoch": 0.3010766446707003, + "grad_norm": 6.166557836595641, + "learning_rate": 8.193855994906497e-06, + "loss": 17.5689, + "step": 16471 + }, + { + "epoch": 0.3010949238671468, + "grad_norm": 6.038105628137205, + "learning_rate": 8.193628238994924e-06, + "loss": 17.223, + "step": 16472 + }, + { + "epoch": 0.30111320306359335, + "grad_norm": 5.687321592545057, + "learning_rate": 8.193400471889965e-06, + "loss": 17.3932, + "step": 16473 + }, + { + "epoch": 0.30113148226003983, + "grad_norm": 6.6969656967844085, + "learning_rate": 8.19317269359241e-06, + "loss": 17.6497, + "step": 16474 + }, + { + "epoch": 0.30114976145648636, + "grad_norm": 8.598318612771129, + "learning_rate": 8.192944904103065e-06, + "loss": 18.3712, + "step": 16475 + }, + { + "epoch": 0.3011680406529329, + "grad_norm": 6.489485945335258, + "learning_rate": 8.192717103422725e-06, + "loss": 17.5235, + "step": 16476 + }, + { + "epoch": 0.30118631984937944, + "grad_norm": 7.399255963939922, + "learning_rate": 8.192489291552188e-06, + "loss": 17.6362, + "step": 16477 + }, + { + "epoch": 0.30120459904582597, + "grad_norm": 12.276893317189007, + "learning_rate": 8.192261468492252e-06, + "loss": 18.7239, + "step": 16478 + }, + { + "epoch": 0.30122287824227245, + "grad_norm": 6.283561086603921, + "learning_rate": 8.19203363424372e-06, + "loss": 17.6668, + "step": 16479 + }, + { + "epoch": 0.301241157438719, + "grad_norm": 8.687683138104715, + "learning_rate": 8.191805788807383e-06, + "loss": 18.1202, + "step": 16480 + }, + { + "epoch": 0.3012594366351655, + "grad_norm": 6.722267533928825, + "learning_rate": 8.191577932184045e-06, + "loss": 17.5651, + "step": 16481 + }, + { + "epoch": 0.30127771583161206, + "grad_norm": 7.543462823867828, + "learning_rate": 8.191350064374505e-06, + "loss": 17.8648, + "step": 16482 + }, + { + "epoch": 0.3012959950280586, + "grad_norm": 7.695502028635188, + "learning_rate": 8.19112218537956e-06, + "loss": 17.7225, + "step": 16483 + }, + { + "epoch": 0.3013142742245051, + "grad_norm": 7.016309067710564, + "learning_rate": 8.190894295200006e-06, + "loss": 17.6521, + "step": 16484 + }, + { + "epoch": 0.3013325534209516, + "grad_norm": 5.179047886656715, + "learning_rate": 8.190666393836646e-06, + "loss": 16.982, + "step": 16485 + }, + { + "epoch": 0.30135083261739815, + "grad_norm": 7.388548267853332, + "learning_rate": 8.190438481290278e-06, + "loss": 17.9728, + "step": 16486 + }, + { + "epoch": 0.3013691118138447, + "grad_norm": 5.749814460472834, + "learning_rate": 8.190210557561698e-06, + "loss": 17.4074, + "step": 16487 + }, + { + "epoch": 0.30138739101029116, + "grad_norm": 7.430658950912746, + "learning_rate": 8.189982622651707e-06, + "loss": 17.75, + "step": 16488 + }, + { + "epoch": 0.3014056702067377, + "grad_norm": 8.619400391358544, + "learning_rate": 8.189754676561105e-06, + "loss": 17.8165, + "step": 16489 + }, + { + "epoch": 0.30142394940318423, + "grad_norm": 7.090906533322632, + "learning_rate": 8.189526719290688e-06, + "loss": 17.644, + "step": 16490 + }, + { + "epoch": 0.30144222859963077, + "grad_norm": 6.997694260141088, + "learning_rate": 8.18929875084126e-06, + "loss": 17.5898, + "step": 16491 + }, + { + "epoch": 0.3014605077960773, + "grad_norm": 6.926362107786558, + "learning_rate": 8.189070771213614e-06, + "loss": 18.1243, + "step": 16492 + }, + { + "epoch": 0.3014787869925238, + "grad_norm": 7.318321542607569, + "learning_rate": 8.188842780408551e-06, + "loss": 17.595, + "step": 16493 + }, + { + "epoch": 0.3014970661889703, + "grad_norm": 6.952968749692479, + "learning_rate": 8.188614778426871e-06, + "loss": 17.9359, + "step": 16494 + }, + { + "epoch": 0.30151534538541686, + "grad_norm": 6.615195948124856, + "learning_rate": 8.188386765269376e-06, + "loss": 17.546, + "step": 16495 + }, + { + "epoch": 0.3015336245818634, + "grad_norm": 8.695163492420042, + "learning_rate": 8.188158740936859e-06, + "loss": 18.9347, + "step": 16496 + }, + { + "epoch": 0.3015519037783099, + "grad_norm": 5.85640219611583, + "learning_rate": 8.187930705430123e-06, + "loss": 17.3828, + "step": 16497 + }, + { + "epoch": 0.3015701829747564, + "grad_norm": 6.906288848652323, + "learning_rate": 8.187702658749966e-06, + "loss": 17.544, + "step": 16498 + }, + { + "epoch": 0.30158846217120294, + "grad_norm": 8.21743779413207, + "learning_rate": 8.18747460089719e-06, + "loss": 18.0989, + "step": 16499 + }, + { + "epoch": 0.3016067413676495, + "grad_norm": 6.508468927991813, + "learning_rate": 8.187246531872588e-06, + "loss": 17.5793, + "step": 16500 + }, + { + "epoch": 0.301625020564096, + "grad_norm": 6.812483683032258, + "learning_rate": 8.187018451676967e-06, + "loss": 17.7585, + "step": 16501 + }, + { + "epoch": 0.30164329976054255, + "grad_norm": 7.712239528760078, + "learning_rate": 8.186790360311123e-06, + "loss": 18.1059, + "step": 16502 + }, + { + "epoch": 0.30166157895698903, + "grad_norm": 5.874934679959864, + "learning_rate": 8.186562257775853e-06, + "loss": 17.2574, + "step": 16503 + }, + { + "epoch": 0.30167985815343556, + "grad_norm": 6.555338602061579, + "learning_rate": 8.18633414407196e-06, + "loss": 17.6712, + "step": 16504 + }, + { + "epoch": 0.3016981373498821, + "grad_norm": 7.227967719558234, + "learning_rate": 8.186106019200242e-06, + "loss": 17.7025, + "step": 16505 + }, + { + "epoch": 0.30171641654632864, + "grad_norm": 5.869019190080841, + "learning_rate": 8.185877883161499e-06, + "loss": 17.3396, + "step": 16506 + }, + { + "epoch": 0.30173469574277517, + "grad_norm": 7.133923522152701, + "learning_rate": 8.185649735956532e-06, + "loss": 17.9019, + "step": 16507 + }, + { + "epoch": 0.30175297493922165, + "grad_norm": 7.245080198261911, + "learning_rate": 8.185421577586136e-06, + "loss": 17.7752, + "step": 16508 + }, + { + "epoch": 0.3017712541356682, + "grad_norm": 4.810273064943183, + "learning_rate": 8.185193408051117e-06, + "loss": 16.9043, + "step": 16509 + }, + { + "epoch": 0.3017895333321147, + "grad_norm": 7.7115591365479546, + "learning_rate": 8.184965227352269e-06, + "loss": 18.181, + "step": 16510 + }, + { + "epoch": 0.30180781252856126, + "grad_norm": 8.173721821529089, + "learning_rate": 8.184737035490395e-06, + "loss": 18.4305, + "step": 16511 + }, + { + "epoch": 0.3018260917250078, + "grad_norm": 7.775223499109105, + "learning_rate": 8.184508832466296e-06, + "loss": 17.6237, + "step": 16512 + }, + { + "epoch": 0.3018443709214543, + "grad_norm": 6.893432384644545, + "learning_rate": 8.184280618280767e-06, + "loss": 17.5622, + "step": 16513 + }, + { + "epoch": 0.3018626501179008, + "grad_norm": 6.232925317226751, + "learning_rate": 8.184052392934612e-06, + "loss": 17.6704, + "step": 16514 + }, + { + "epoch": 0.30188092931434735, + "grad_norm": 6.098056554058219, + "learning_rate": 8.18382415642863e-06, + "loss": 17.7114, + "step": 16515 + }, + { + "epoch": 0.3018992085107939, + "grad_norm": 6.588516230814564, + "learning_rate": 8.183595908763621e-06, + "loss": 17.5474, + "step": 16516 + }, + { + "epoch": 0.3019174877072404, + "grad_norm": 6.028432713556956, + "learning_rate": 8.183367649940383e-06, + "loss": 17.2659, + "step": 16517 + }, + { + "epoch": 0.3019357669036869, + "grad_norm": 6.720741996752916, + "learning_rate": 8.18313937995972e-06, + "loss": 17.7225, + "step": 16518 + }, + { + "epoch": 0.30195404610013343, + "grad_norm": 7.538141356641681, + "learning_rate": 8.182911098822429e-06, + "loss": 17.6945, + "step": 16519 + }, + { + "epoch": 0.30197232529657997, + "grad_norm": 7.364066713971182, + "learning_rate": 8.182682806529308e-06, + "loss": 17.9775, + "step": 16520 + }, + { + "epoch": 0.3019906044930265, + "grad_norm": 5.94866339604873, + "learning_rate": 8.182454503081163e-06, + "loss": 17.3491, + "step": 16521 + }, + { + "epoch": 0.302008883689473, + "grad_norm": 6.276540962611812, + "learning_rate": 8.182226188478789e-06, + "loss": 17.4954, + "step": 16522 + }, + { + "epoch": 0.3020271628859195, + "grad_norm": 7.731017932608166, + "learning_rate": 8.18199786272299e-06, + "loss": 18.1973, + "step": 16523 + }, + { + "epoch": 0.30204544208236606, + "grad_norm": 7.919062207011995, + "learning_rate": 8.181769525814564e-06, + "loss": 18.2331, + "step": 16524 + }, + { + "epoch": 0.3020637212788126, + "grad_norm": 5.786780000985477, + "learning_rate": 8.181541177754313e-06, + "loss": 17.3974, + "step": 16525 + }, + { + "epoch": 0.3020820004752591, + "grad_norm": 4.780131828598741, + "learning_rate": 8.181312818543035e-06, + "loss": 16.9365, + "step": 16526 + }, + { + "epoch": 0.3021002796717056, + "grad_norm": 6.2531493406756935, + "learning_rate": 8.18108444818153e-06, + "loss": 17.4062, + "step": 16527 + }, + { + "epoch": 0.30211855886815214, + "grad_norm": 7.22452000001973, + "learning_rate": 8.180856066670601e-06, + "loss": 17.8213, + "step": 16528 + }, + { + "epoch": 0.3021368380645987, + "grad_norm": 7.399980142162305, + "learning_rate": 8.18062767401105e-06, + "loss": 17.8293, + "step": 16529 + }, + { + "epoch": 0.3021551172610452, + "grad_norm": 6.711907461664344, + "learning_rate": 8.180399270203674e-06, + "loss": 17.6821, + "step": 16530 + }, + { + "epoch": 0.30217339645749175, + "grad_norm": 6.115761496396272, + "learning_rate": 8.180170855249273e-06, + "loss": 17.4398, + "step": 16531 + }, + { + "epoch": 0.30219167565393823, + "grad_norm": 6.0219889397981445, + "learning_rate": 8.17994242914865e-06, + "loss": 17.4822, + "step": 16532 + }, + { + "epoch": 0.30220995485038477, + "grad_norm": 7.016598860551536, + "learning_rate": 8.179713991902604e-06, + "loss": 17.8632, + "step": 16533 + }, + { + "epoch": 0.3022282340468313, + "grad_norm": 6.277178289562638, + "learning_rate": 8.179485543511937e-06, + "loss": 17.5471, + "step": 16534 + }, + { + "epoch": 0.30224651324327784, + "grad_norm": 6.154732914126383, + "learning_rate": 8.17925708397745e-06, + "loss": 17.2966, + "step": 16535 + }, + { + "epoch": 0.3022647924397244, + "grad_norm": 5.984522907203976, + "learning_rate": 8.179028613299942e-06, + "loss": 17.2695, + "step": 16536 + }, + { + "epoch": 0.30228307163617085, + "grad_norm": 6.073259677347897, + "learning_rate": 8.178800131480215e-06, + "loss": 17.3777, + "step": 16537 + }, + { + "epoch": 0.3023013508326174, + "grad_norm": 6.778560795947439, + "learning_rate": 8.17857163851907e-06, + "loss": 17.6691, + "step": 16538 + }, + { + "epoch": 0.3023196300290639, + "grad_norm": 5.072466790177338, + "learning_rate": 8.178343134417305e-06, + "loss": 17.0627, + "step": 16539 + }, + { + "epoch": 0.30233790922551046, + "grad_norm": 6.49635799588229, + "learning_rate": 8.178114619175725e-06, + "loss": 17.7353, + "step": 16540 + }, + { + "epoch": 0.302356188421957, + "grad_norm": 6.225165267747494, + "learning_rate": 8.17788609279513e-06, + "loss": 17.3818, + "step": 16541 + }, + { + "epoch": 0.3023744676184035, + "grad_norm": 5.528245605871067, + "learning_rate": 8.177657555276316e-06, + "loss": 17.1716, + "step": 16542 + }, + { + "epoch": 0.30239274681485, + "grad_norm": 7.403188748676723, + "learning_rate": 8.17742900662009e-06, + "loss": 18.0589, + "step": 16543 + }, + { + "epoch": 0.30241102601129655, + "grad_norm": 7.47989327310321, + "learning_rate": 8.177200446827253e-06, + "loss": 18.2505, + "step": 16544 + }, + { + "epoch": 0.3024293052077431, + "grad_norm": 6.608004154482326, + "learning_rate": 8.176971875898602e-06, + "loss": 17.7651, + "step": 16545 + }, + { + "epoch": 0.3024475844041896, + "grad_norm": 6.595024618848987, + "learning_rate": 8.176743293834942e-06, + "loss": 17.5882, + "step": 16546 + }, + { + "epoch": 0.3024658636006361, + "grad_norm": 6.83054755805045, + "learning_rate": 8.17651470063707e-06, + "loss": 17.783, + "step": 16547 + }, + { + "epoch": 0.30248414279708263, + "grad_norm": 6.688101712968924, + "learning_rate": 8.176286096305791e-06, + "loss": 17.5782, + "step": 16548 + }, + { + "epoch": 0.30250242199352917, + "grad_norm": 7.122155042754724, + "learning_rate": 8.176057480841905e-06, + "loss": 17.7173, + "step": 16549 + }, + { + "epoch": 0.3025207011899757, + "grad_norm": 5.9652240692357, + "learning_rate": 8.175828854246213e-06, + "loss": 17.0419, + "step": 16550 + }, + { + "epoch": 0.30253898038642224, + "grad_norm": 6.229676225581576, + "learning_rate": 8.175600216519518e-06, + "loss": 17.3498, + "step": 16551 + }, + { + "epoch": 0.3025572595828687, + "grad_norm": 6.822616507251027, + "learning_rate": 8.175371567662617e-06, + "loss": 17.6579, + "step": 16552 + }, + { + "epoch": 0.30257553877931526, + "grad_norm": 6.216293568132132, + "learning_rate": 8.175142907676314e-06, + "loss": 17.3725, + "step": 16553 + }, + { + "epoch": 0.3025938179757618, + "grad_norm": 6.257030541385044, + "learning_rate": 8.174914236561413e-06, + "loss": 17.5881, + "step": 16554 + }, + { + "epoch": 0.3026120971722083, + "grad_norm": 6.7557453311657545, + "learning_rate": 8.17468555431871e-06, + "loss": 17.7954, + "step": 16555 + }, + { + "epoch": 0.3026303763686548, + "grad_norm": 7.772523328786312, + "learning_rate": 8.174456860949013e-06, + "loss": 18.0306, + "step": 16556 + }, + { + "epoch": 0.30264865556510134, + "grad_norm": 5.725160385162716, + "learning_rate": 8.174228156453118e-06, + "loss": 17.2693, + "step": 16557 + }, + { + "epoch": 0.3026669347615479, + "grad_norm": 6.027807594635172, + "learning_rate": 8.173999440831832e-06, + "loss": 17.1753, + "step": 16558 + }, + { + "epoch": 0.3026852139579944, + "grad_norm": 6.579440995997506, + "learning_rate": 8.17377071408595e-06, + "loss": 17.7444, + "step": 16559 + }, + { + "epoch": 0.30270349315444095, + "grad_norm": 7.149903712173893, + "learning_rate": 8.173541976216278e-06, + "loss": 17.7759, + "step": 16560 + }, + { + "epoch": 0.30272177235088743, + "grad_norm": 8.21693445222621, + "learning_rate": 8.173313227223618e-06, + "loss": 18.1841, + "step": 16561 + }, + { + "epoch": 0.30274005154733397, + "grad_norm": 5.965184469782462, + "learning_rate": 8.173084467108768e-06, + "loss": 17.167, + "step": 16562 + }, + { + "epoch": 0.3027583307437805, + "grad_norm": 6.5322324471294095, + "learning_rate": 8.172855695872535e-06, + "loss": 17.8392, + "step": 16563 + }, + { + "epoch": 0.30277660994022704, + "grad_norm": 6.699074543118785, + "learning_rate": 8.172626913515716e-06, + "loss": 17.4916, + "step": 16564 + }, + { + "epoch": 0.3027948891366736, + "grad_norm": 7.690518849392542, + "learning_rate": 8.172398120039115e-06, + "loss": 18.1277, + "step": 16565 + }, + { + "epoch": 0.30281316833312005, + "grad_norm": 6.593627364055679, + "learning_rate": 8.172169315443536e-06, + "loss": 17.7265, + "step": 16566 + }, + { + "epoch": 0.3028314475295666, + "grad_norm": 7.555344403148906, + "learning_rate": 8.171940499729776e-06, + "loss": 17.5391, + "step": 16567 + }, + { + "epoch": 0.3028497267260131, + "grad_norm": 6.291748527130846, + "learning_rate": 8.171711672898642e-06, + "loss": 17.4901, + "step": 16568 + }, + { + "epoch": 0.30286800592245966, + "grad_norm": 6.284059474720727, + "learning_rate": 8.171482834950932e-06, + "loss": 17.5326, + "step": 16569 + }, + { + "epoch": 0.3028862851189062, + "grad_norm": 6.193792799554601, + "learning_rate": 8.171253985887452e-06, + "loss": 17.4754, + "step": 16570 + }, + { + "epoch": 0.3029045643153527, + "grad_norm": 5.946877100607961, + "learning_rate": 8.171025125709002e-06, + "loss": 17.2342, + "step": 16571 + }, + { + "epoch": 0.3029228435117992, + "grad_norm": 6.540139571152314, + "learning_rate": 8.170796254416382e-06, + "loss": 17.6354, + "step": 16572 + }, + { + "epoch": 0.30294112270824575, + "grad_norm": 7.6721978967782665, + "learning_rate": 8.170567372010396e-06, + "loss": 18.2392, + "step": 16573 + }, + { + "epoch": 0.3029594019046923, + "grad_norm": 7.625256006654974, + "learning_rate": 8.170338478491849e-06, + "loss": 17.9819, + "step": 16574 + }, + { + "epoch": 0.3029776811011388, + "grad_norm": 6.890673777461589, + "learning_rate": 8.17010957386154e-06, + "loss": 17.7003, + "step": 16575 + }, + { + "epoch": 0.3029959602975853, + "grad_norm": 5.740488014371495, + "learning_rate": 8.169880658120271e-06, + "loss": 17.1102, + "step": 16576 + }, + { + "epoch": 0.30301423949403183, + "grad_norm": 6.974765187209739, + "learning_rate": 8.169651731268846e-06, + "loss": 17.6724, + "step": 16577 + }, + { + "epoch": 0.30303251869047837, + "grad_norm": 7.644279023603949, + "learning_rate": 8.169422793308067e-06, + "loss": 18.1045, + "step": 16578 + }, + { + "epoch": 0.3030507978869249, + "grad_norm": 6.8224864213141005, + "learning_rate": 8.169193844238735e-06, + "loss": 18.0005, + "step": 16579 + }, + { + "epoch": 0.30306907708337144, + "grad_norm": 7.407862070402675, + "learning_rate": 8.168964884061654e-06, + "loss": 18.038, + "step": 16580 + }, + { + "epoch": 0.3030873562798179, + "grad_norm": 7.445578759311784, + "learning_rate": 8.168735912777626e-06, + "loss": 17.8016, + "step": 16581 + }, + { + "epoch": 0.30310563547626446, + "grad_norm": 7.453146616106986, + "learning_rate": 8.168506930387455e-06, + "loss": 17.6925, + "step": 16582 + }, + { + "epoch": 0.303123914672711, + "grad_norm": 6.647364772448283, + "learning_rate": 8.16827793689194e-06, + "loss": 17.9154, + "step": 16583 + }, + { + "epoch": 0.30314219386915753, + "grad_norm": 6.6805973224818604, + "learning_rate": 8.168048932291887e-06, + "loss": 17.7983, + "step": 16584 + }, + { + "epoch": 0.30316047306560406, + "grad_norm": 6.81124673399441, + "learning_rate": 8.167819916588098e-06, + "loss": 17.4131, + "step": 16585 + }, + { + "epoch": 0.30317875226205054, + "grad_norm": 5.71800434990808, + "learning_rate": 8.167590889781374e-06, + "loss": 17.193, + "step": 16586 + }, + { + "epoch": 0.3031970314584971, + "grad_norm": 6.357336002746887, + "learning_rate": 8.16736185187252e-06, + "loss": 17.6707, + "step": 16587 + }, + { + "epoch": 0.3032153106549436, + "grad_norm": 6.7051551075638995, + "learning_rate": 8.167132802862337e-06, + "loss": 17.7506, + "step": 16588 + }, + { + "epoch": 0.30323358985139015, + "grad_norm": 6.0656603212703715, + "learning_rate": 8.166903742751629e-06, + "loss": 17.3446, + "step": 16589 + }, + { + "epoch": 0.30325186904783663, + "grad_norm": 6.234430686785868, + "learning_rate": 8.166674671541197e-06, + "loss": 17.5529, + "step": 16590 + }, + { + "epoch": 0.30327014824428317, + "grad_norm": 6.07744793907776, + "learning_rate": 8.166445589231844e-06, + "loss": 17.3579, + "step": 16591 + }, + { + "epoch": 0.3032884274407297, + "grad_norm": 7.458944463186073, + "learning_rate": 8.166216495824377e-06, + "loss": 18.1661, + "step": 16592 + }, + { + "epoch": 0.30330670663717624, + "grad_norm": 7.237524505725243, + "learning_rate": 8.165987391319595e-06, + "loss": 17.9962, + "step": 16593 + }, + { + "epoch": 0.3033249858336228, + "grad_norm": 6.211035357940235, + "learning_rate": 8.165758275718299e-06, + "loss": 17.4706, + "step": 16594 + }, + { + "epoch": 0.30334326503006925, + "grad_norm": 6.169519490500913, + "learning_rate": 8.1655291490213e-06, + "loss": 17.4826, + "step": 16595 + }, + { + "epoch": 0.3033615442265158, + "grad_norm": 7.016402761120805, + "learning_rate": 8.165300011229391e-06, + "loss": 17.6625, + "step": 16596 + }, + { + "epoch": 0.3033798234229623, + "grad_norm": 7.2425316947153355, + "learning_rate": 8.165070862343383e-06, + "loss": 17.8298, + "step": 16597 + }, + { + "epoch": 0.30339810261940886, + "grad_norm": 5.969118681307772, + "learning_rate": 8.164841702364074e-06, + "loss": 17.277, + "step": 16598 + }, + { + "epoch": 0.3034163818158554, + "grad_norm": 6.9113068676432805, + "learning_rate": 8.164612531292272e-06, + "loss": 17.8815, + "step": 16599 + }, + { + "epoch": 0.3034346610123019, + "grad_norm": 6.00061687960561, + "learning_rate": 8.164383349128778e-06, + "loss": 17.2717, + "step": 16600 + }, + { + "epoch": 0.3034529402087484, + "grad_norm": 6.190804065741893, + "learning_rate": 8.164154155874392e-06, + "loss": 17.3708, + "step": 16601 + }, + { + "epoch": 0.30347121940519495, + "grad_norm": 5.90428780466989, + "learning_rate": 8.163924951529922e-06, + "loss": 17.2192, + "step": 16602 + }, + { + "epoch": 0.3034894986016415, + "grad_norm": 8.428949101455649, + "learning_rate": 8.16369573609617e-06, + "loss": 18.2044, + "step": 16603 + }, + { + "epoch": 0.303507777798088, + "grad_norm": 6.087154641374202, + "learning_rate": 8.163466509573938e-06, + "loss": 17.4781, + "step": 16604 + }, + { + "epoch": 0.3035260569945345, + "grad_norm": 5.548964191141877, + "learning_rate": 8.163237271964032e-06, + "loss": 17.1363, + "step": 16605 + }, + { + "epoch": 0.30354433619098103, + "grad_norm": 6.528545508294717, + "learning_rate": 8.163008023267253e-06, + "loss": 17.6114, + "step": 16606 + }, + { + "epoch": 0.30356261538742757, + "grad_norm": 5.317325238136297, + "learning_rate": 8.162778763484405e-06, + "loss": 16.9295, + "step": 16607 + }, + { + "epoch": 0.3035808945838741, + "grad_norm": 6.628272614822982, + "learning_rate": 8.162549492616292e-06, + "loss": 17.7807, + "step": 16608 + }, + { + "epoch": 0.30359917378032064, + "grad_norm": 6.85699704842018, + "learning_rate": 8.162320210663717e-06, + "loss": 17.6457, + "step": 16609 + }, + { + "epoch": 0.3036174529767671, + "grad_norm": 6.036235766142873, + "learning_rate": 8.162090917627486e-06, + "loss": 17.0974, + "step": 16610 + }, + { + "epoch": 0.30363573217321366, + "grad_norm": 6.257521317624505, + "learning_rate": 8.161861613508399e-06, + "loss": 17.6131, + "step": 16611 + }, + { + "epoch": 0.3036540113696602, + "grad_norm": 7.3301207660809515, + "learning_rate": 8.161632298307261e-06, + "loss": 17.7923, + "step": 16612 + }, + { + "epoch": 0.30367229056610673, + "grad_norm": 8.590125791099593, + "learning_rate": 8.161402972024876e-06, + "loss": 18.2708, + "step": 16613 + }, + { + "epoch": 0.30369056976255326, + "grad_norm": 6.756733202026823, + "learning_rate": 8.16117363466205e-06, + "loss": 17.8269, + "step": 16614 + }, + { + "epoch": 0.30370884895899974, + "grad_norm": 7.909718751814012, + "learning_rate": 8.160944286219582e-06, + "loss": 17.9376, + "step": 16615 + }, + { + "epoch": 0.3037271281554463, + "grad_norm": 5.841152948775874, + "learning_rate": 8.160714926698281e-06, + "loss": 17.2749, + "step": 16616 + }, + { + "epoch": 0.3037454073518928, + "grad_norm": 6.303060380406051, + "learning_rate": 8.160485556098948e-06, + "loss": 17.6905, + "step": 16617 + }, + { + "epoch": 0.30376368654833935, + "grad_norm": 7.849903925154078, + "learning_rate": 8.160256174422387e-06, + "loss": 18.0157, + "step": 16618 + }, + { + "epoch": 0.3037819657447859, + "grad_norm": 6.219801675497526, + "learning_rate": 8.160026781669401e-06, + "loss": 17.5778, + "step": 16619 + }, + { + "epoch": 0.30380024494123237, + "grad_norm": 6.340471740537694, + "learning_rate": 8.159797377840799e-06, + "loss": 17.2766, + "step": 16620 + }, + { + "epoch": 0.3038185241376789, + "grad_norm": 6.8026489423000065, + "learning_rate": 8.159567962937379e-06, + "loss": 17.823, + "step": 16621 + }, + { + "epoch": 0.30383680333412544, + "grad_norm": 7.274009078158674, + "learning_rate": 8.159338536959946e-06, + "loss": 18.1152, + "step": 16622 + }, + { + "epoch": 0.303855082530572, + "grad_norm": 8.867369198726927, + "learning_rate": 8.159109099909309e-06, + "loss": 18.6322, + "step": 16623 + }, + { + "epoch": 0.30387336172701845, + "grad_norm": 7.441710985970655, + "learning_rate": 8.158879651786266e-06, + "loss": 18.0164, + "step": 16624 + }, + { + "epoch": 0.303891640923465, + "grad_norm": 5.665058720299332, + "learning_rate": 8.158650192591625e-06, + "loss": 17.2601, + "step": 16625 + }, + { + "epoch": 0.3039099201199115, + "grad_norm": 5.330355568794583, + "learning_rate": 8.158420722326188e-06, + "loss": 17.0789, + "step": 16626 + }, + { + "epoch": 0.30392819931635806, + "grad_norm": 6.799221602475565, + "learning_rate": 8.158191240990761e-06, + "loss": 17.7676, + "step": 16627 + }, + { + "epoch": 0.3039464785128046, + "grad_norm": 7.771883530264951, + "learning_rate": 8.157961748586149e-06, + "loss": 17.9648, + "step": 16628 + }, + { + "epoch": 0.3039647577092511, + "grad_norm": 5.87215653021683, + "learning_rate": 8.157732245113153e-06, + "loss": 17.3832, + "step": 16629 + }, + { + "epoch": 0.3039830369056976, + "grad_norm": 7.254285161872083, + "learning_rate": 8.157502730572581e-06, + "loss": 17.9605, + "step": 16630 + }, + { + "epoch": 0.30400131610214415, + "grad_norm": 5.982002777119766, + "learning_rate": 8.157273204965238e-06, + "loss": 17.2095, + "step": 16631 + }, + { + "epoch": 0.3040195952985907, + "grad_norm": 5.96001871350836, + "learning_rate": 8.157043668291922e-06, + "loss": 17.3055, + "step": 16632 + }, + { + "epoch": 0.3040378744950372, + "grad_norm": 6.435863027618191, + "learning_rate": 8.156814120553445e-06, + "loss": 17.7032, + "step": 16633 + }, + { + "epoch": 0.3040561536914837, + "grad_norm": 7.57322633761336, + "learning_rate": 8.156584561750606e-06, + "loss": 18.0216, + "step": 16634 + }, + { + "epoch": 0.30407443288793023, + "grad_norm": 6.696133293513736, + "learning_rate": 8.156354991884214e-06, + "loss": 17.692, + "step": 16635 + }, + { + "epoch": 0.30409271208437677, + "grad_norm": 8.727030605321485, + "learning_rate": 8.156125410955071e-06, + "loss": 18.3051, + "step": 16636 + }, + { + "epoch": 0.3041109912808233, + "grad_norm": 6.733621858197244, + "learning_rate": 8.155895818963982e-06, + "loss": 17.498, + "step": 16637 + }, + { + "epoch": 0.30412927047726984, + "grad_norm": 5.976496155872423, + "learning_rate": 8.155666215911754e-06, + "loss": 17.279, + "step": 16638 + }, + { + "epoch": 0.3041475496737163, + "grad_norm": 6.118867408043492, + "learning_rate": 8.155436601799187e-06, + "loss": 17.4757, + "step": 16639 + }, + { + "epoch": 0.30416582887016286, + "grad_norm": 5.735956527341406, + "learning_rate": 8.15520697662709e-06, + "loss": 17.1856, + "step": 16640 + }, + { + "epoch": 0.3041841080666094, + "grad_norm": 7.487874252516413, + "learning_rate": 8.154977340396264e-06, + "loss": 18.0411, + "step": 16641 + }, + { + "epoch": 0.30420238726305593, + "grad_norm": 7.761509509625328, + "learning_rate": 8.154747693107518e-06, + "loss": 18.2016, + "step": 16642 + }, + { + "epoch": 0.30422066645950246, + "grad_norm": 6.191408684603873, + "learning_rate": 8.154518034761657e-06, + "loss": 17.5018, + "step": 16643 + }, + { + "epoch": 0.30423894565594894, + "grad_norm": 7.728201685651844, + "learning_rate": 8.154288365359483e-06, + "loss": 17.9976, + "step": 16644 + }, + { + "epoch": 0.3042572248523955, + "grad_norm": 5.864159128704303, + "learning_rate": 8.1540586849018e-06, + "loss": 17.3021, + "step": 16645 + }, + { + "epoch": 0.304275504048842, + "grad_norm": 5.7828330681318105, + "learning_rate": 8.153828993389417e-06, + "loss": 17.1839, + "step": 16646 + }, + { + "epoch": 0.30429378324528855, + "grad_norm": 6.2154094997417895, + "learning_rate": 8.153599290823136e-06, + "loss": 17.5577, + "step": 16647 + }, + { + "epoch": 0.3043120624417351, + "grad_norm": 5.964364786629598, + "learning_rate": 8.153369577203764e-06, + "loss": 17.4375, + "step": 16648 + }, + { + "epoch": 0.30433034163818157, + "grad_norm": 6.622789388812934, + "learning_rate": 8.153139852532104e-06, + "loss": 17.3699, + "step": 16649 + }, + { + "epoch": 0.3043486208346281, + "grad_norm": 8.46486387940904, + "learning_rate": 8.152910116808962e-06, + "loss": 18.9387, + "step": 16650 + }, + { + "epoch": 0.30436690003107464, + "grad_norm": 6.588297848353495, + "learning_rate": 8.152680370035146e-06, + "loss": 17.6354, + "step": 16651 + }, + { + "epoch": 0.3043851792275212, + "grad_norm": 7.363797111774982, + "learning_rate": 8.152450612211457e-06, + "loss": 17.824, + "step": 16652 + }, + { + "epoch": 0.3044034584239677, + "grad_norm": 7.877980197925042, + "learning_rate": 8.152220843338704e-06, + "loss": 18.4074, + "step": 16653 + }, + { + "epoch": 0.3044217376204142, + "grad_norm": 5.609245018958935, + "learning_rate": 8.15199106341769e-06, + "loss": 17.25, + "step": 16654 + }, + { + "epoch": 0.3044400168168607, + "grad_norm": 6.194752741290725, + "learning_rate": 8.151761272449219e-06, + "loss": 17.7048, + "step": 16655 + }, + { + "epoch": 0.30445829601330726, + "grad_norm": 5.858072727717588, + "learning_rate": 8.151531470434099e-06, + "loss": 17.364, + "step": 16656 + }, + { + "epoch": 0.3044765752097538, + "grad_norm": 7.032068695583198, + "learning_rate": 8.151301657373136e-06, + "loss": 17.9112, + "step": 16657 + }, + { + "epoch": 0.3044948544062003, + "grad_norm": 5.809396707838711, + "learning_rate": 8.151071833267135e-06, + "loss": 17.2292, + "step": 16658 + }, + { + "epoch": 0.3045131336026468, + "grad_norm": 6.118237197029155, + "learning_rate": 8.150841998116898e-06, + "loss": 17.5482, + "step": 16659 + }, + { + "epoch": 0.30453141279909335, + "grad_norm": 11.032475752883897, + "learning_rate": 8.150612151923234e-06, + "loss": 18.1286, + "step": 16660 + }, + { + "epoch": 0.3045496919955399, + "grad_norm": 5.7063388817630285, + "learning_rate": 8.150382294686948e-06, + "loss": 17.4102, + "step": 16661 + }, + { + "epoch": 0.3045679711919864, + "grad_norm": 6.531306214638482, + "learning_rate": 8.150152426408845e-06, + "loss": 17.4352, + "step": 16662 + }, + { + "epoch": 0.3045862503884329, + "grad_norm": 5.562829033755462, + "learning_rate": 8.14992254708973e-06, + "loss": 17.2006, + "step": 16663 + }, + { + "epoch": 0.30460452958487944, + "grad_norm": 6.371054500796451, + "learning_rate": 8.149692656730413e-06, + "loss": 17.6054, + "step": 16664 + }, + { + "epoch": 0.30462280878132597, + "grad_norm": 7.696357414683173, + "learning_rate": 8.149462755331695e-06, + "loss": 18.2222, + "step": 16665 + }, + { + "epoch": 0.3046410879777725, + "grad_norm": 6.145415972817905, + "learning_rate": 8.149232842894384e-06, + "loss": 17.449, + "step": 16666 + }, + { + "epoch": 0.30465936717421904, + "grad_norm": 7.071202653939831, + "learning_rate": 8.149002919419282e-06, + "loss": 18.3085, + "step": 16667 + }, + { + "epoch": 0.3046776463706655, + "grad_norm": 7.551379793858554, + "learning_rate": 8.148772984907203e-06, + "loss": 17.9634, + "step": 16668 + }, + { + "epoch": 0.30469592556711206, + "grad_norm": 8.186109212308564, + "learning_rate": 8.148543039358944e-06, + "loss": 18.2248, + "step": 16669 + }, + { + "epoch": 0.3047142047635586, + "grad_norm": 6.466871249323418, + "learning_rate": 8.148313082775316e-06, + "loss": 17.5463, + "step": 16670 + }, + { + "epoch": 0.30473248396000513, + "grad_norm": 6.293822843931664, + "learning_rate": 8.148083115157124e-06, + "loss": 17.3767, + "step": 16671 + }, + { + "epoch": 0.30475076315645167, + "grad_norm": 6.636580017643419, + "learning_rate": 8.147853136505175e-06, + "loss": 17.6899, + "step": 16672 + }, + { + "epoch": 0.30476904235289815, + "grad_norm": 6.777519905813917, + "learning_rate": 8.147623146820272e-06, + "loss": 17.7548, + "step": 16673 + }, + { + "epoch": 0.3047873215493447, + "grad_norm": 7.441594029204132, + "learning_rate": 8.147393146103224e-06, + "loss": 17.8708, + "step": 16674 + }, + { + "epoch": 0.3048056007457912, + "grad_norm": 6.356874956270289, + "learning_rate": 8.147163134354836e-06, + "loss": 17.5734, + "step": 16675 + }, + { + "epoch": 0.30482387994223775, + "grad_norm": 7.140009436584371, + "learning_rate": 8.146933111575915e-06, + "loss": 17.7291, + "step": 16676 + }, + { + "epoch": 0.3048421591386843, + "grad_norm": 6.992353068374214, + "learning_rate": 8.146703077767265e-06, + "loss": 17.9955, + "step": 16677 + }, + { + "epoch": 0.30486043833513077, + "grad_norm": 6.299840670557331, + "learning_rate": 8.146473032929693e-06, + "loss": 17.4187, + "step": 16678 + }, + { + "epoch": 0.3048787175315773, + "grad_norm": 5.890386436433404, + "learning_rate": 8.146242977064009e-06, + "loss": 17.3146, + "step": 16679 + }, + { + "epoch": 0.30489699672802384, + "grad_norm": 7.095378126234456, + "learning_rate": 8.146012910171014e-06, + "loss": 17.591, + "step": 16680 + }, + { + "epoch": 0.3049152759244704, + "grad_norm": 5.414916195517052, + "learning_rate": 8.14578283225152e-06, + "loss": 17.2094, + "step": 16681 + }, + { + "epoch": 0.3049335551209169, + "grad_norm": 8.092887418734362, + "learning_rate": 8.145552743306327e-06, + "loss": 17.7922, + "step": 16682 + }, + { + "epoch": 0.3049518343173634, + "grad_norm": 7.124788941677222, + "learning_rate": 8.145322643336245e-06, + "loss": 17.6115, + "step": 16683 + }, + { + "epoch": 0.3049701135138099, + "grad_norm": 6.079544236882442, + "learning_rate": 8.14509253234208e-06, + "loss": 17.7165, + "step": 16684 + }, + { + "epoch": 0.30498839271025646, + "grad_norm": 7.861638541511168, + "learning_rate": 8.14486241032464e-06, + "loss": 17.9554, + "step": 16685 + }, + { + "epoch": 0.305006671906703, + "grad_norm": 7.415013676567563, + "learning_rate": 8.14463227728473e-06, + "loss": 17.8059, + "step": 16686 + }, + { + "epoch": 0.30502495110314953, + "grad_norm": 6.172542416710036, + "learning_rate": 8.144402133223155e-06, + "loss": 17.3136, + "step": 16687 + }, + { + "epoch": 0.305043230299596, + "grad_norm": 7.380340395382709, + "learning_rate": 8.144171978140725e-06, + "loss": 17.1493, + "step": 16688 + }, + { + "epoch": 0.30506150949604255, + "grad_norm": 6.811544579927895, + "learning_rate": 8.143941812038244e-06, + "loss": 17.8914, + "step": 16689 + }, + { + "epoch": 0.3050797886924891, + "grad_norm": 6.0034266004955095, + "learning_rate": 8.14371163491652e-06, + "loss": 17.4623, + "step": 16690 + }, + { + "epoch": 0.3050980678889356, + "grad_norm": 6.050861975044013, + "learning_rate": 8.14348144677636e-06, + "loss": 17.5379, + "step": 16691 + }, + { + "epoch": 0.3051163470853821, + "grad_norm": 5.647014412074355, + "learning_rate": 8.14325124761857e-06, + "loss": 17.3413, + "step": 16692 + }, + { + "epoch": 0.30513462628182864, + "grad_norm": 7.545296878365928, + "learning_rate": 8.143021037443956e-06, + "loss": 17.9563, + "step": 16693 + }, + { + "epoch": 0.30515290547827517, + "grad_norm": 6.106851345102743, + "learning_rate": 8.142790816253327e-06, + "loss": 17.6058, + "step": 16694 + }, + { + "epoch": 0.3051711846747217, + "grad_norm": 6.0532280414861015, + "learning_rate": 8.14256058404749e-06, + "loss": 17.3859, + "step": 16695 + }, + { + "epoch": 0.30518946387116824, + "grad_norm": 5.322284477396805, + "learning_rate": 8.14233034082725e-06, + "loss": 17.4024, + "step": 16696 + }, + { + "epoch": 0.3052077430676147, + "grad_norm": 8.295172495342939, + "learning_rate": 8.142100086593414e-06, + "loss": 18.5029, + "step": 16697 + }, + { + "epoch": 0.30522602226406126, + "grad_norm": 8.014620110297049, + "learning_rate": 8.141869821346791e-06, + "loss": 18.4967, + "step": 16698 + }, + { + "epoch": 0.3052443014605078, + "grad_norm": 7.109719991357483, + "learning_rate": 8.141639545088189e-06, + "loss": 17.7804, + "step": 16699 + }, + { + "epoch": 0.30526258065695433, + "grad_norm": 7.570967708371013, + "learning_rate": 8.141409257818409e-06, + "loss": 17.7589, + "step": 16700 + }, + { + "epoch": 0.30528085985340087, + "grad_norm": 5.743573043057227, + "learning_rate": 8.141178959538263e-06, + "loss": 17.3379, + "step": 16701 + }, + { + "epoch": 0.30529913904984735, + "grad_norm": 5.60616088144606, + "learning_rate": 8.140948650248559e-06, + "loss": 17.2385, + "step": 16702 + }, + { + "epoch": 0.3053174182462939, + "grad_norm": 5.142308309232406, + "learning_rate": 8.140718329950101e-06, + "loss": 17.0075, + "step": 16703 + }, + { + "epoch": 0.3053356974427404, + "grad_norm": 6.547997382321389, + "learning_rate": 8.140487998643699e-06, + "loss": 17.8636, + "step": 16704 + }, + { + "epoch": 0.30535397663918695, + "grad_norm": 5.658739944947381, + "learning_rate": 8.140257656330159e-06, + "loss": 17.2068, + "step": 16705 + }, + { + "epoch": 0.3053722558356335, + "grad_norm": 6.732443229239918, + "learning_rate": 8.140027303010288e-06, + "loss": 17.6035, + "step": 16706 + }, + { + "epoch": 0.30539053503207997, + "grad_norm": 7.543038726133314, + "learning_rate": 8.139796938684892e-06, + "loss": 18.0054, + "step": 16707 + }, + { + "epoch": 0.3054088142285265, + "grad_norm": 5.737792449591821, + "learning_rate": 8.139566563354782e-06, + "loss": 17.1012, + "step": 16708 + }, + { + "epoch": 0.30542709342497304, + "grad_norm": 5.283208433592343, + "learning_rate": 8.139336177020765e-06, + "loss": 17.0553, + "step": 16709 + }, + { + "epoch": 0.3054453726214196, + "grad_norm": 4.807319283842524, + "learning_rate": 8.139105779683645e-06, + "loss": 16.9905, + "step": 16710 + }, + { + "epoch": 0.3054636518178661, + "grad_norm": 7.485844959479744, + "learning_rate": 8.138875371344232e-06, + "loss": 17.9085, + "step": 16711 + }, + { + "epoch": 0.3054819310143126, + "grad_norm": 5.759929392525538, + "learning_rate": 8.138644952003334e-06, + "loss": 17.2418, + "step": 16712 + }, + { + "epoch": 0.3055002102107591, + "grad_norm": 6.841986283055872, + "learning_rate": 8.138414521661758e-06, + "loss": 17.6657, + "step": 16713 + }, + { + "epoch": 0.30551848940720566, + "grad_norm": 7.699958157519718, + "learning_rate": 8.13818408032031e-06, + "loss": 18.2383, + "step": 16714 + }, + { + "epoch": 0.3055367686036522, + "grad_norm": 5.396068550385644, + "learning_rate": 8.1379536279798e-06, + "loss": 17.256, + "step": 16715 + }, + { + "epoch": 0.30555504780009873, + "grad_norm": 7.920276591533406, + "learning_rate": 8.137723164641034e-06, + "loss": 18.6347, + "step": 16716 + }, + { + "epoch": 0.3055733269965452, + "grad_norm": 7.267905648980075, + "learning_rate": 8.137492690304823e-06, + "loss": 17.924, + "step": 16717 + }, + { + "epoch": 0.30559160619299175, + "grad_norm": 5.882535555137481, + "learning_rate": 8.13726220497197e-06, + "loss": 17.3439, + "step": 16718 + }, + { + "epoch": 0.3056098853894383, + "grad_norm": 7.564212895553327, + "learning_rate": 8.137031708643283e-06, + "loss": 17.7634, + "step": 16719 + }, + { + "epoch": 0.3056281645858848, + "grad_norm": 8.278958609025626, + "learning_rate": 8.136801201319578e-06, + "loss": 17.9191, + "step": 16720 + }, + { + "epoch": 0.30564644378233136, + "grad_norm": 5.837226528619861, + "learning_rate": 8.136570683001652e-06, + "loss": 17.2523, + "step": 16721 + }, + { + "epoch": 0.30566472297877784, + "grad_norm": 7.32327415796826, + "learning_rate": 8.136340153690321e-06, + "loss": 17.8673, + "step": 16722 + }, + { + "epoch": 0.30568300217522437, + "grad_norm": 10.108315129156512, + "learning_rate": 8.13610961338639e-06, + "loss": 18.5179, + "step": 16723 + }, + { + "epoch": 0.3057012813716709, + "grad_norm": 6.933209439932622, + "learning_rate": 8.135879062090663e-06, + "loss": 17.9609, + "step": 16724 + }, + { + "epoch": 0.30571956056811744, + "grad_norm": 7.230965566714261, + "learning_rate": 8.135648499803956e-06, + "loss": 17.7497, + "step": 16725 + }, + { + "epoch": 0.3057378397645639, + "grad_norm": 6.254464458688985, + "learning_rate": 8.135417926527072e-06, + "loss": 17.5044, + "step": 16726 + }, + { + "epoch": 0.30575611896101046, + "grad_norm": 6.747843528117833, + "learning_rate": 8.135187342260819e-06, + "loss": 17.9859, + "step": 16727 + }, + { + "epoch": 0.305774398157457, + "grad_norm": 6.7380015745969875, + "learning_rate": 8.134956747006009e-06, + "loss": 17.922, + "step": 16728 + }, + { + "epoch": 0.30579267735390353, + "grad_norm": 6.965720999503236, + "learning_rate": 8.134726140763445e-06, + "loss": 17.6706, + "step": 16729 + }, + { + "epoch": 0.30581095655035007, + "grad_norm": 5.699536573914778, + "learning_rate": 8.134495523533939e-06, + "loss": 17.3331, + "step": 16730 + }, + { + "epoch": 0.30582923574679655, + "grad_norm": 7.264395494959649, + "learning_rate": 8.134264895318298e-06, + "loss": 17.9404, + "step": 16731 + }, + { + "epoch": 0.3058475149432431, + "grad_norm": 10.733387802977195, + "learning_rate": 8.134034256117332e-06, + "loss": 17.9636, + "step": 16732 + }, + { + "epoch": 0.3058657941396896, + "grad_norm": 5.704574170589308, + "learning_rate": 8.133803605931847e-06, + "loss": 17.1899, + "step": 16733 + }, + { + "epoch": 0.30588407333613615, + "grad_norm": 6.687750961798256, + "learning_rate": 8.133572944762651e-06, + "loss": 17.6118, + "step": 16734 + }, + { + "epoch": 0.3059023525325827, + "grad_norm": 8.285488445118865, + "learning_rate": 8.133342272610553e-06, + "loss": 17.1306, + "step": 16735 + }, + { + "epoch": 0.30592063172902917, + "grad_norm": 6.001814260037967, + "learning_rate": 8.133111589476366e-06, + "loss": 17.4356, + "step": 16736 + }, + { + "epoch": 0.3059389109254757, + "grad_norm": 7.99769116871961, + "learning_rate": 8.132880895360893e-06, + "loss": 18.3703, + "step": 16737 + }, + { + "epoch": 0.30595719012192224, + "grad_norm": 7.802336244065987, + "learning_rate": 8.132650190264944e-06, + "loss": 18.3243, + "step": 16738 + }, + { + "epoch": 0.3059754693183688, + "grad_norm": 6.642160089090536, + "learning_rate": 8.132419474189328e-06, + "loss": 17.7333, + "step": 16739 + }, + { + "epoch": 0.3059937485148153, + "grad_norm": 7.335260573506195, + "learning_rate": 8.132188747134852e-06, + "loss": 17.6557, + "step": 16740 + }, + { + "epoch": 0.3060120277112618, + "grad_norm": 6.055280801158583, + "learning_rate": 8.131958009102327e-06, + "loss": 17.3898, + "step": 16741 + }, + { + "epoch": 0.3060303069077083, + "grad_norm": 5.184181120365097, + "learning_rate": 8.131727260092564e-06, + "loss": 16.957, + "step": 16742 + }, + { + "epoch": 0.30604858610415486, + "grad_norm": 7.012052898024959, + "learning_rate": 8.131496500106366e-06, + "loss": 18.0213, + "step": 16743 + }, + { + "epoch": 0.3060668653006014, + "grad_norm": 7.012947203763594, + "learning_rate": 8.131265729144544e-06, + "loss": 17.6844, + "step": 16744 + }, + { + "epoch": 0.30608514449704793, + "grad_norm": 6.594685830215474, + "learning_rate": 8.131034947207909e-06, + "loss": 17.6969, + "step": 16745 + }, + { + "epoch": 0.3061034236934944, + "grad_norm": 7.3559014298241046, + "learning_rate": 8.130804154297268e-06, + "loss": 17.7295, + "step": 16746 + }, + { + "epoch": 0.30612170288994095, + "grad_norm": 7.191641221787167, + "learning_rate": 8.130573350413428e-06, + "loss": 18.1313, + "step": 16747 + }, + { + "epoch": 0.3061399820863875, + "grad_norm": 6.974589058037992, + "learning_rate": 8.130342535557202e-06, + "loss": 17.8844, + "step": 16748 + }, + { + "epoch": 0.306158261282834, + "grad_norm": 6.325822738509391, + "learning_rate": 8.130111709729396e-06, + "loss": 17.3803, + "step": 16749 + }, + { + "epoch": 0.30617654047928056, + "grad_norm": 5.97383920703589, + "learning_rate": 8.129880872930822e-06, + "loss": 17.5041, + "step": 16750 + }, + { + "epoch": 0.30619481967572704, + "grad_norm": 5.784921630829452, + "learning_rate": 8.129650025162285e-06, + "loss": 17.3294, + "step": 16751 + }, + { + "epoch": 0.3062130988721736, + "grad_norm": 5.714783397676278, + "learning_rate": 8.129419166424597e-06, + "loss": 17.3295, + "step": 16752 + }, + { + "epoch": 0.3062313780686201, + "grad_norm": 5.472971448192495, + "learning_rate": 8.129188296718566e-06, + "loss": 17.1211, + "step": 16753 + }, + { + "epoch": 0.30624965726506664, + "grad_norm": 8.371551177531051, + "learning_rate": 8.128957416045003e-06, + "loss": 17.8683, + "step": 16754 + }, + { + "epoch": 0.3062679364615132, + "grad_norm": 6.703373792499423, + "learning_rate": 8.128726524404715e-06, + "loss": 17.5168, + "step": 16755 + }, + { + "epoch": 0.30628621565795966, + "grad_norm": 6.402467016581645, + "learning_rate": 8.128495621798511e-06, + "loss": 17.5197, + "step": 16756 + }, + { + "epoch": 0.3063044948544062, + "grad_norm": 6.957447476972262, + "learning_rate": 8.128264708227203e-06, + "loss": 17.8857, + "step": 16757 + }, + { + "epoch": 0.30632277405085273, + "grad_norm": 7.614934722454362, + "learning_rate": 8.128033783691598e-06, + "loss": 18.1989, + "step": 16758 + }, + { + "epoch": 0.30634105324729927, + "grad_norm": 6.2255533879289615, + "learning_rate": 8.127802848192506e-06, + "loss": 17.4933, + "step": 16759 + }, + { + "epoch": 0.30635933244374575, + "grad_norm": 6.811878880010115, + "learning_rate": 8.127571901730736e-06, + "loss": 17.951, + "step": 16760 + }, + { + "epoch": 0.3063776116401923, + "grad_norm": 6.885163504999443, + "learning_rate": 8.127340944307099e-06, + "loss": 17.8237, + "step": 16761 + }, + { + "epoch": 0.3063958908366388, + "grad_norm": 6.169290540256493, + "learning_rate": 8.127109975922402e-06, + "loss": 17.4573, + "step": 16762 + }, + { + "epoch": 0.30641417003308535, + "grad_norm": 6.963155307268487, + "learning_rate": 8.126878996577456e-06, + "loss": 17.8927, + "step": 16763 + }, + { + "epoch": 0.3064324492295319, + "grad_norm": 7.110075754402128, + "learning_rate": 8.12664800627307e-06, + "loss": 18.0326, + "step": 16764 + }, + { + "epoch": 0.30645072842597837, + "grad_norm": 7.334729002026811, + "learning_rate": 8.126417005010056e-06, + "loss": 17.9197, + "step": 16765 + }, + { + "epoch": 0.3064690076224249, + "grad_norm": 5.815015540294881, + "learning_rate": 8.12618599278922e-06, + "loss": 17.5021, + "step": 16766 + }, + { + "epoch": 0.30648728681887144, + "grad_norm": 6.733952977149314, + "learning_rate": 8.125954969611373e-06, + "loss": 17.5858, + "step": 16767 + }, + { + "epoch": 0.306505566015318, + "grad_norm": 6.286434035949967, + "learning_rate": 8.125723935477328e-06, + "loss": 17.5503, + "step": 16768 + }, + { + "epoch": 0.3065238452117645, + "grad_norm": 7.330028539524488, + "learning_rate": 8.12549289038789e-06, + "loss": 17.8128, + "step": 16769 + }, + { + "epoch": 0.306542124408211, + "grad_norm": 6.148307060165565, + "learning_rate": 8.12526183434387e-06, + "loss": 17.4417, + "step": 16770 + }, + { + "epoch": 0.3065604036046575, + "grad_norm": 6.539274763971553, + "learning_rate": 8.125030767346081e-06, + "loss": 17.5556, + "step": 16771 + }, + { + "epoch": 0.30657868280110406, + "grad_norm": 7.796567227685249, + "learning_rate": 8.124799689395328e-06, + "loss": 18.2581, + "step": 16772 + }, + { + "epoch": 0.3065969619975506, + "grad_norm": 6.881588784470505, + "learning_rate": 8.124568600492421e-06, + "loss": 17.586, + "step": 16773 + }, + { + "epoch": 0.30661524119399713, + "grad_norm": 6.720588951388159, + "learning_rate": 8.124337500638175e-06, + "loss": 17.687, + "step": 16774 + }, + { + "epoch": 0.3066335203904436, + "grad_norm": 6.651211194075465, + "learning_rate": 8.124106389833397e-06, + "loss": 17.6702, + "step": 16775 + }, + { + "epoch": 0.30665179958689015, + "grad_norm": 5.27799086682439, + "learning_rate": 8.123875268078898e-06, + "loss": 16.9764, + "step": 16776 + }, + { + "epoch": 0.3066700787833367, + "grad_norm": 5.655025221938781, + "learning_rate": 8.123644135375487e-06, + "loss": 17.2942, + "step": 16777 + }, + { + "epoch": 0.3066883579797832, + "grad_norm": 9.086716992701747, + "learning_rate": 8.123412991723975e-06, + "loss": 18.2968, + "step": 16778 + }, + { + "epoch": 0.30670663717622976, + "grad_norm": 7.865888714452238, + "learning_rate": 8.123181837125169e-06, + "loss": 18.2855, + "step": 16779 + }, + { + "epoch": 0.30672491637267624, + "grad_norm": 6.064853050171648, + "learning_rate": 8.122950671579884e-06, + "loss": 17.5467, + "step": 16780 + }, + { + "epoch": 0.3067431955691228, + "grad_norm": 5.548607974608034, + "learning_rate": 8.122719495088926e-06, + "loss": 17.1425, + "step": 16781 + }, + { + "epoch": 0.3067614747655693, + "grad_norm": 5.511890499092476, + "learning_rate": 8.12248830765311e-06, + "loss": 17.3113, + "step": 16782 + }, + { + "epoch": 0.30677975396201584, + "grad_norm": 5.745980862167241, + "learning_rate": 8.12225710927324e-06, + "loss": 17.3077, + "step": 16783 + }, + { + "epoch": 0.3067980331584624, + "grad_norm": 6.277237563776308, + "learning_rate": 8.12202589995013e-06, + "loss": 17.7336, + "step": 16784 + }, + { + "epoch": 0.30681631235490886, + "grad_norm": 6.482625000784745, + "learning_rate": 8.121794679684593e-06, + "loss": 17.507, + "step": 16785 + }, + { + "epoch": 0.3068345915513554, + "grad_norm": 6.1322004915697095, + "learning_rate": 8.121563448477434e-06, + "loss": 17.5198, + "step": 16786 + }, + { + "epoch": 0.30685287074780193, + "grad_norm": 7.408403820256594, + "learning_rate": 8.121332206329468e-06, + "loss": 17.9982, + "step": 16787 + }, + { + "epoch": 0.30687114994424847, + "grad_norm": 6.359961007502059, + "learning_rate": 8.121100953241501e-06, + "loss": 17.3535, + "step": 16788 + }, + { + "epoch": 0.306889429140695, + "grad_norm": 6.706687458532863, + "learning_rate": 8.120869689214349e-06, + "loss": 17.4008, + "step": 16789 + }, + { + "epoch": 0.3069077083371415, + "grad_norm": 6.510804917349421, + "learning_rate": 8.120638414248819e-06, + "loss": 17.6156, + "step": 16790 + }, + { + "epoch": 0.306925987533588, + "grad_norm": 6.008520202802392, + "learning_rate": 8.12040712834572e-06, + "loss": 17.4768, + "step": 16791 + }, + { + "epoch": 0.30694426673003455, + "grad_norm": 6.809351771785792, + "learning_rate": 8.120175831505865e-06, + "loss": 17.5423, + "step": 16792 + }, + { + "epoch": 0.3069625459264811, + "grad_norm": 8.908918303842555, + "learning_rate": 8.119944523730065e-06, + "loss": 18.2396, + "step": 16793 + }, + { + "epoch": 0.30698082512292757, + "grad_norm": 6.16648566698405, + "learning_rate": 8.119713205019131e-06, + "loss": 17.4055, + "step": 16794 + }, + { + "epoch": 0.3069991043193741, + "grad_norm": 6.690468500161972, + "learning_rate": 8.119481875373874e-06, + "loss": 17.5125, + "step": 16795 + }, + { + "epoch": 0.30701738351582064, + "grad_norm": 7.690193087772361, + "learning_rate": 8.1192505347951e-06, + "loss": 18.2434, + "step": 16796 + }, + { + "epoch": 0.3070356627122672, + "grad_norm": 6.21081997330213, + "learning_rate": 8.119019183283627e-06, + "loss": 17.406, + "step": 16797 + }, + { + "epoch": 0.3070539419087137, + "grad_norm": 6.7458423446584534, + "learning_rate": 8.118787820840261e-06, + "loss": 17.6053, + "step": 16798 + }, + { + "epoch": 0.3070722211051602, + "grad_norm": 6.222093098054909, + "learning_rate": 8.118556447465815e-06, + "loss": 17.4989, + "step": 16799 + }, + { + "epoch": 0.30709050030160673, + "grad_norm": 7.178904037819462, + "learning_rate": 8.118325063161099e-06, + "loss": 17.8731, + "step": 16800 + }, + { + "epoch": 0.30710877949805326, + "grad_norm": 7.297540371704384, + "learning_rate": 8.118093667926923e-06, + "loss": 17.6484, + "step": 16801 + }, + { + "epoch": 0.3071270586944998, + "grad_norm": 6.5991204891772135, + "learning_rate": 8.1178622617641e-06, + "loss": 17.5603, + "step": 16802 + }, + { + "epoch": 0.30714533789094633, + "grad_norm": 8.665284978980097, + "learning_rate": 8.11763084467344e-06, + "loss": 17.5106, + "step": 16803 + }, + { + "epoch": 0.3071636170873928, + "grad_norm": 6.996091273311296, + "learning_rate": 8.117399416655758e-06, + "loss": 17.8823, + "step": 16804 + }, + { + "epoch": 0.30718189628383935, + "grad_norm": 6.669947849826685, + "learning_rate": 8.117167977711858e-06, + "loss": 17.6966, + "step": 16805 + }, + { + "epoch": 0.3072001754802859, + "grad_norm": 7.306246721884619, + "learning_rate": 8.116936527842556e-06, + "loss": 17.6553, + "step": 16806 + }, + { + "epoch": 0.3072184546767324, + "grad_norm": 6.424525242590785, + "learning_rate": 8.11670506704866e-06, + "loss": 17.3905, + "step": 16807 + }, + { + "epoch": 0.30723673387317896, + "grad_norm": 6.4793656402982025, + "learning_rate": 8.116473595330985e-06, + "loss": 17.6348, + "step": 16808 + }, + { + "epoch": 0.30725501306962544, + "grad_norm": 7.7622202772727755, + "learning_rate": 8.116242112690341e-06, + "loss": 18.0591, + "step": 16809 + }, + { + "epoch": 0.307273292266072, + "grad_norm": 5.689732872215455, + "learning_rate": 8.116010619127537e-06, + "loss": 17.0951, + "step": 16810 + }, + { + "epoch": 0.3072915714625185, + "grad_norm": 7.211639722715736, + "learning_rate": 8.115779114643386e-06, + "loss": 17.9286, + "step": 16811 + }, + { + "epoch": 0.30730985065896504, + "grad_norm": 5.708995603719906, + "learning_rate": 8.1155475992387e-06, + "loss": 17.0867, + "step": 16812 + }, + { + "epoch": 0.3073281298554116, + "grad_norm": 6.614224500834429, + "learning_rate": 8.115316072914292e-06, + "loss": 17.563, + "step": 16813 + }, + { + "epoch": 0.30734640905185806, + "grad_norm": 5.890866472730227, + "learning_rate": 8.11508453567097e-06, + "loss": 17.3403, + "step": 16814 + }, + { + "epoch": 0.3073646882483046, + "grad_norm": 10.282356418660505, + "learning_rate": 8.114852987509546e-06, + "loss": 18.3379, + "step": 16815 + }, + { + "epoch": 0.30738296744475113, + "grad_norm": 6.636957431138423, + "learning_rate": 8.114621428430834e-06, + "loss": 17.5146, + "step": 16816 + }, + { + "epoch": 0.30740124664119767, + "grad_norm": 8.012350806778004, + "learning_rate": 8.114389858435643e-06, + "loss": 18.3006, + "step": 16817 + }, + { + "epoch": 0.3074195258376442, + "grad_norm": 5.57934032195251, + "learning_rate": 8.114158277524788e-06, + "loss": 17.2041, + "step": 16818 + }, + { + "epoch": 0.3074378050340907, + "grad_norm": 5.903979757135749, + "learning_rate": 8.113926685699076e-06, + "loss": 17.2361, + "step": 16819 + }, + { + "epoch": 0.3074560842305372, + "grad_norm": 5.890769605037617, + "learning_rate": 8.113695082959323e-06, + "loss": 17.2154, + "step": 16820 + }, + { + "epoch": 0.30747436342698375, + "grad_norm": 6.167782280137336, + "learning_rate": 8.113463469306338e-06, + "loss": 17.605, + "step": 16821 + }, + { + "epoch": 0.3074926426234303, + "grad_norm": 6.041483161343982, + "learning_rate": 8.113231844740934e-06, + "loss": 17.276, + "step": 16822 + }, + { + "epoch": 0.3075109218198768, + "grad_norm": 6.384100256896727, + "learning_rate": 8.113000209263923e-06, + "loss": 17.6484, + "step": 16823 + }, + { + "epoch": 0.3075292010163233, + "grad_norm": 7.4010222801699, + "learning_rate": 8.112768562876115e-06, + "loss": 17.938, + "step": 16824 + }, + { + "epoch": 0.30754748021276984, + "grad_norm": 5.227382045976373, + "learning_rate": 8.112536905578324e-06, + "loss": 17.0111, + "step": 16825 + }, + { + "epoch": 0.3075657594092164, + "grad_norm": 6.248957610211808, + "learning_rate": 8.112305237371363e-06, + "loss": 17.6124, + "step": 16826 + }, + { + "epoch": 0.3075840386056629, + "grad_norm": 8.505216408345236, + "learning_rate": 8.11207355825604e-06, + "loss": 18.6358, + "step": 16827 + }, + { + "epoch": 0.3076023178021094, + "grad_norm": 5.564588254452631, + "learning_rate": 8.111841868233169e-06, + "loss": 17.1388, + "step": 16828 + }, + { + "epoch": 0.30762059699855593, + "grad_norm": 6.53574604524088, + "learning_rate": 8.111610167303564e-06, + "loss": 17.4384, + "step": 16829 + }, + { + "epoch": 0.30763887619500246, + "grad_norm": 5.688050011488321, + "learning_rate": 8.111378455468033e-06, + "loss": 17.2404, + "step": 16830 + }, + { + "epoch": 0.307657155391449, + "grad_norm": 7.501892039648677, + "learning_rate": 8.111146732727393e-06, + "loss": 18.3043, + "step": 16831 + }, + { + "epoch": 0.30767543458789554, + "grad_norm": 7.138462604852312, + "learning_rate": 8.110914999082453e-06, + "loss": 18.0898, + "step": 16832 + }, + { + "epoch": 0.307693713784342, + "grad_norm": 7.377713723181878, + "learning_rate": 8.110683254534026e-06, + "loss": 18.0345, + "step": 16833 + }, + { + "epoch": 0.30771199298078855, + "grad_norm": 6.676124166974646, + "learning_rate": 8.110451499082923e-06, + "loss": 17.518, + "step": 16834 + }, + { + "epoch": 0.3077302721772351, + "grad_norm": 6.495347737747358, + "learning_rate": 8.110219732729958e-06, + "loss": 17.639, + "step": 16835 + }, + { + "epoch": 0.3077485513736816, + "grad_norm": 7.532118493682625, + "learning_rate": 8.109987955475943e-06, + "loss": 18.0588, + "step": 16836 + }, + { + "epoch": 0.30776683057012816, + "grad_norm": 6.460262745617369, + "learning_rate": 8.10975616732169e-06, + "loss": 17.4713, + "step": 16837 + }, + { + "epoch": 0.30778510976657464, + "grad_norm": 6.988802108291433, + "learning_rate": 8.109524368268011e-06, + "loss": 17.7515, + "step": 16838 + }, + { + "epoch": 0.3078033889630212, + "grad_norm": 7.09823848055091, + "learning_rate": 8.10929255831572e-06, + "loss": 17.3706, + "step": 16839 + }, + { + "epoch": 0.3078216681594677, + "grad_norm": 6.16473466216064, + "learning_rate": 8.109060737465628e-06, + "loss": 17.6117, + "step": 16840 + }, + { + "epoch": 0.30783994735591425, + "grad_norm": 6.333954307177859, + "learning_rate": 8.108828905718547e-06, + "loss": 17.2888, + "step": 16841 + }, + { + "epoch": 0.3078582265523608, + "grad_norm": 7.885699569862376, + "learning_rate": 8.10859706307529e-06, + "loss": 18.4863, + "step": 16842 + }, + { + "epoch": 0.30787650574880726, + "grad_norm": 6.5957527771697055, + "learning_rate": 8.108365209536672e-06, + "loss": 17.7022, + "step": 16843 + }, + { + "epoch": 0.3078947849452538, + "grad_norm": 6.928343747044289, + "learning_rate": 8.108133345103505e-06, + "loss": 17.6217, + "step": 16844 + }, + { + "epoch": 0.30791306414170033, + "grad_norm": 5.832568707890773, + "learning_rate": 8.107901469776595e-06, + "loss": 17.4022, + "step": 16845 + }, + { + "epoch": 0.30793134333814687, + "grad_norm": 6.3452969762109435, + "learning_rate": 8.107669583556763e-06, + "loss": 17.3455, + "step": 16846 + }, + { + "epoch": 0.3079496225345934, + "grad_norm": 7.294542601090386, + "learning_rate": 8.10743768644482e-06, + "loss": 18.1352, + "step": 16847 + }, + { + "epoch": 0.3079679017310399, + "grad_norm": 6.319800452694817, + "learning_rate": 8.107205778441576e-06, + "loss": 17.7499, + "step": 16848 + }, + { + "epoch": 0.3079861809274864, + "grad_norm": 5.88348935253275, + "learning_rate": 8.106973859547847e-06, + "loss": 17.2997, + "step": 16849 + }, + { + "epoch": 0.30800446012393295, + "grad_norm": 6.2635753404745325, + "learning_rate": 8.106741929764443e-06, + "loss": 17.7991, + "step": 16850 + }, + { + "epoch": 0.3080227393203795, + "grad_norm": 5.887526511443674, + "learning_rate": 8.106509989092179e-06, + "loss": 17.331, + "step": 16851 + }, + { + "epoch": 0.308041018516826, + "grad_norm": 5.916620641359955, + "learning_rate": 8.106278037531864e-06, + "loss": 17.3204, + "step": 16852 + }, + { + "epoch": 0.3080592977132725, + "grad_norm": 9.593246848857893, + "learning_rate": 8.106046075084317e-06, + "loss": 18.2151, + "step": 16853 + }, + { + "epoch": 0.30807757690971904, + "grad_norm": 7.7937880274598745, + "learning_rate": 8.105814101750349e-06, + "loss": 17.7964, + "step": 16854 + }, + { + "epoch": 0.3080958561061656, + "grad_norm": 6.027667395664885, + "learning_rate": 8.10558211753077e-06, + "loss": 17.1767, + "step": 16855 + }, + { + "epoch": 0.3081141353026121, + "grad_norm": 6.311521404106981, + "learning_rate": 8.105350122426393e-06, + "loss": 17.51, + "step": 16856 + }, + { + "epoch": 0.30813241449905865, + "grad_norm": 6.497565444424784, + "learning_rate": 8.105118116438037e-06, + "loss": 17.3181, + "step": 16857 + }, + { + "epoch": 0.30815069369550513, + "grad_norm": 6.621864204055428, + "learning_rate": 8.104886099566511e-06, + "loss": 17.764, + "step": 16858 + }, + { + "epoch": 0.30816897289195166, + "grad_norm": 7.942768946301585, + "learning_rate": 8.104654071812629e-06, + "loss": 17.9489, + "step": 16859 + }, + { + "epoch": 0.3081872520883982, + "grad_norm": 7.0570350180681745, + "learning_rate": 8.104422033177201e-06, + "loss": 17.6731, + "step": 16860 + }, + { + "epoch": 0.30820553128484474, + "grad_norm": 6.918981765212633, + "learning_rate": 8.104189983661047e-06, + "loss": 17.8462, + "step": 16861 + }, + { + "epoch": 0.3082238104812912, + "grad_norm": 7.644666345858515, + "learning_rate": 8.103957923264974e-06, + "loss": 17.7347, + "step": 16862 + }, + { + "epoch": 0.30824208967773775, + "grad_norm": 7.955988581314848, + "learning_rate": 8.1037258519898e-06, + "loss": 18.2606, + "step": 16863 + }, + { + "epoch": 0.3082603688741843, + "grad_norm": 6.1781673866031745, + "learning_rate": 8.103493769836332e-06, + "loss": 17.3356, + "step": 16864 + }, + { + "epoch": 0.3082786480706308, + "grad_norm": 8.316612046608348, + "learning_rate": 8.103261676805392e-06, + "loss": 18.5851, + "step": 16865 + }, + { + "epoch": 0.30829692726707736, + "grad_norm": 7.471215637665413, + "learning_rate": 8.103029572897787e-06, + "loss": 18.0248, + "step": 16866 + }, + { + "epoch": 0.30831520646352384, + "grad_norm": 5.831401656742476, + "learning_rate": 8.102797458114332e-06, + "loss": 17.2288, + "step": 16867 + }, + { + "epoch": 0.3083334856599704, + "grad_norm": 6.277362015516548, + "learning_rate": 8.102565332455843e-06, + "loss": 17.3635, + "step": 16868 + }, + { + "epoch": 0.3083517648564169, + "grad_norm": 7.113790971566835, + "learning_rate": 8.102333195923131e-06, + "loss": 17.7055, + "step": 16869 + }, + { + "epoch": 0.30837004405286345, + "grad_norm": 6.126092734369032, + "learning_rate": 8.10210104851701e-06, + "loss": 17.5085, + "step": 16870 + }, + { + "epoch": 0.30838832324931, + "grad_norm": 6.734911255508399, + "learning_rate": 8.101868890238294e-06, + "loss": 17.8684, + "step": 16871 + }, + { + "epoch": 0.30840660244575646, + "grad_norm": 6.176690289964088, + "learning_rate": 8.101636721087799e-06, + "loss": 17.6733, + "step": 16872 + }, + { + "epoch": 0.308424881642203, + "grad_norm": 6.466825704602649, + "learning_rate": 8.101404541066331e-06, + "loss": 18.0116, + "step": 16873 + }, + { + "epoch": 0.30844316083864953, + "grad_norm": 8.540871578912435, + "learning_rate": 8.101172350174713e-06, + "loss": 18.262, + "step": 16874 + }, + { + "epoch": 0.30846144003509607, + "grad_norm": 6.476487326471872, + "learning_rate": 8.100940148413755e-06, + "loss": 17.787, + "step": 16875 + }, + { + "epoch": 0.3084797192315426, + "grad_norm": 6.543371046583851, + "learning_rate": 8.100707935784271e-06, + "loss": 17.5618, + "step": 16876 + }, + { + "epoch": 0.3084979984279891, + "grad_norm": 5.191648147949043, + "learning_rate": 8.100475712287074e-06, + "loss": 17.1827, + "step": 16877 + }, + { + "epoch": 0.3085162776244356, + "grad_norm": 8.132230026553794, + "learning_rate": 8.10024347792298e-06, + "loss": 18.4899, + "step": 16878 + }, + { + "epoch": 0.30853455682088216, + "grad_norm": 7.1031659533241465, + "learning_rate": 8.100011232692799e-06, + "loss": 17.8651, + "step": 16879 + }, + { + "epoch": 0.3085528360173287, + "grad_norm": 7.425041626183908, + "learning_rate": 8.09977897659735e-06, + "loss": 17.7215, + "step": 16880 + }, + { + "epoch": 0.3085711152137752, + "grad_norm": 7.556925177089032, + "learning_rate": 8.099546709637444e-06, + "loss": 18.1119, + "step": 16881 + }, + { + "epoch": 0.3085893944102217, + "grad_norm": 6.519700635836287, + "learning_rate": 8.099314431813895e-06, + "loss": 17.8108, + "step": 16882 + }, + { + "epoch": 0.30860767360666824, + "grad_norm": 6.792229799249035, + "learning_rate": 8.099082143127518e-06, + "loss": 17.4976, + "step": 16883 + }, + { + "epoch": 0.3086259528031148, + "grad_norm": 5.103482726645891, + "learning_rate": 8.098849843579128e-06, + "loss": 17.0982, + "step": 16884 + }, + { + "epoch": 0.3086442319995613, + "grad_norm": 6.815483530135922, + "learning_rate": 8.098617533169538e-06, + "loss": 17.8734, + "step": 16885 + }, + { + "epoch": 0.30866251119600785, + "grad_norm": 6.605615636643606, + "learning_rate": 8.098385211899562e-06, + "loss": 17.7568, + "step": 16886 + }, + { + "epoch": 0.30868079039245433, + "grad_norm": 6.773876397373886, + "learning_rate": 8.098152879770015e-06, + "loss": 17.5579, + "step": 16887 + }, + { + "epoch": 0.30869906958890087, + "grad_norm": 6.326966428213546, + "learning_rate": 8.09792053678171e-06, + "loss": 17.5555, + "step": 16888 + }, + { + "epoch": 0.3087173487853474, + "grad_norm": 7.177618927533809, + "learning_rate": 8.097688182935463e-06, + "loss": 17.9881, + "step": 16889 + }, + { + "epoch": 0.30873562798179394, + "grad_norm": 6.649670489629989, + "learning_rate": 8.097455818232089e-06, + "loss": 17.6745, + "step": 16890 + }, + { + "epoch": 0.30875390717824047, + "grad_norm": 7.538466543305304, + "learning_rate": 8.097223442672399e-06, + "loss": 17.9156, + "step": 16891 + }, + { + "epoch": 0.30877218637468695, + "grad_norm": 7.372745503381064, + "learning_rate": 8.096991056257212e-06, + "loss": 17.7921, + "step": 16892 + }, + { + "epoch": 0.3087904655711335, + "grad_norm": 7.89383907410312, + "learning_rate": 8.096758658987339e-06, + "loss": 18.3288, + "step": 16893 + }, + { + "epoch": 0.30880874476758, + "grad_norm": 6.242170963309946, + "learning_rate": 8.096526250863594e-06, + "loss": 17.5095, + "step": 16894 + }, + { + "epoch": 0.30882702396402656, + "grad_norm": 6.301612936572135, + "learning_rate": 8.096293831886795e-06, + "loss": 17.3264, + "step": 16895 + }, + { + "epoch": 0.30884530316047304, + "grad_norm": 5.8358432367779836, + "learning_rate": 8.096061402057755e-06, + "loss": 17.351, + "step": 16896 + }, + { + "epoch": 0.3088635823569196, + "grad_norm": 5.0439347738848, + "learning_rate": 8.095828961377287e-06, + "loss": 16.9685, + "step": 16897 + }, + { + "epoch": 0.3088818615533661, + "grad_norm": 5.497715837812876, + "learning_rate": 8.095596509846209e-06, + "loss": 17.1605, + "step": 16898 + }, + { + "epoch": 0.30890014074981265, + "grad_norm": 6.384977825418099, + "learning_rate": 8.095364047465333e-06, + "loss": 17.379, + "step": 16899 + }, + { + "epoch": 0.3089184199462592, + "grad_norm": 6.976100335057817, + "learning_rate": 8.095131574235473e-06, + "loss": 17.9314, + "step": 16900 + }, + { + "epoch": 0.30893669914270566, + "grad_norm": 6.306299971912668, + "learning_rate": 8.094899090157447e-06, + "loss": 17.5288, + "step": 16901 + }, + { + "epoch": 0.3089549783391522, + "grad_norm": 10.584265253117026, + "learning_rate": 8.094666595232067e-06, + "loss": 17.918, + "step": 16902 + }, + { + "epoch": 0.30897325753559873, + "grad_norm": 5.874533561464786, + "learning_rate": 8.094434089460152e-06, + "loss": 17.2669, + "step": 16903 + }, + { + "epoch": 0.30899153673204527, + "grad_norm": 6.055407560537636, + "learning_rate": 8.094201572842511e-06, + "loss": 16.9438, + "step": 16904 + }, + { + "epoch": 0.3090098159284918, + "grad_norm": 7.248798402285025, + "learning_rate": 8.093969045379964e-06, + "loss": 18.0245, + "step": 16905 + }, + { + "epoch": 0.3090280951249383, + "grad_norm": 6.966026641683622, + "learning_rate": 8.093736507073325e-06, + "loss": 17.3831, + "step": 16906 + }, + { + "epoch": 0.3090463743213848, + "grad_norm": 7.450958004208825, + "learning_rate": 8.093503957923404e-06, + "loss": 18.1105, + "step": 16907 + }, + { + "epoch": 0.30906465351783136, + "grad_norm": 6.392306557549609, + "learning_rate": 8.093271397931022e-06, + "loss": 17.6111, + "step": 16908 + }, + { + "epoch": 0.3090829327142779, + "grad_norm": 5.670538844079594, + "learning_rate": 8.093038827096993e-06, + "loss": 17.0552, + "step": 16909 + }, + { + "epoch": 0.3091012119107244, + "grad_norm": 7.512646870486651, + "learning_rate": 8.092806245422131e-06, + "loss": 17.7541, + "step": 16910 + }, + { + "epoch": 0.3091194911071709, + "grad_norm": 6.903661762816213, + "learning_rate": 8.092573652907252e-06, + "loss": 17.8163, + "step": 16911 + }, + { + "epoch": 0.30913777030361744, + "grad_norm": 7.138344336056069, + "learning_rate": 8.092341049553168e-06, + "loss": 17.9178, + "step": 16912 + }, + { + "epoch": 0.309156049500064, + "grad_norm": 6.014316542066885, + "learning_rate": 8.0921084353607e-06, + "loss": 17.5349, + "step": 16913 + }, + { + "epoch": 0.3091743286965105, + "grad_norm": 6.103595161617867, + "learning_rate": 8.091875810330658e-06, + "loss": 17.3808, + "step": 16914 + }, + { + "epoch": 0.30919260789295705, + "grad_norm": 5.635502138542043, + "learning_rate": 8.09164317446386e-06, + "loss": 17.4035, + "step": 16915 + }, + { + "epoch": 0.30921088708940353, + "grad_norm": 6.6603190080422365, + "learning_rate": 8.091410527761123e-06, + "loss": 17.4741, + "step": 16916 + }, + { + "epoch": 0.30922916628585007, + "grad_norm": 7.082998085470822, + "learning_rate": 8.09117787022326e-06, + "loss": 17.6634, + "step": 16917 + }, + { + "epoch": 0.3092474454822966, + "grad_norm": 7.799159186861457, + "learning_rate": 8.090945201851086e-06, + "loss": 18.1, + "step": 16918 + }, + { + "epoch": 0.30926572467874314, + "grad_norm": 7.639306056104836, + "learning_rate": 8.090712522645417e-06, + "loss": 17.9958, + "step": 16919 + }, + { + "epoch": 0.3092840038751897, + "grad_norm": 6.07550528023007, + "learning_rate": 8.090479832607069e-06, + "loss": 17.4038, + "step": 16920 + }, + { + "epoch": 0.30930228307163615, + "grad_norm": 7.2802138718397345, + "learning_rate": 8.090247131736857e-06, + "loss": 18.0411, + "step": 16921 + }, + { + "epoch": 0.3093205622680827, + "grad_norm": 7.460016928913056, + "learning_rate": 8.090014420035597e-06, + "loss": 18.155, + "step": 16922 + }, + { + "epoch": 0.3093388414645292, + "grad_norm": 6.719145207687469, + "learning_rate": 8.089781697504105e-06, + "loss": 17.7742, + "step": 16923 + }, + { + "epoch": 0.30935712066097576, + "grad_norm": 6.374124792853929, + "learning_rate": 8.089548964143196e-06, + "loss": 17.6679, + "step": 16924 + }, + { + "epoch": 0.3093753998574223, + "grad_norm": 6.882657522479925, + "learning_rate": 8.089316219953687e-06, + "loss": 17.5267, + "step": 16925 + }, + { + "epoch": 0.3093936790538688, + "grad_norm": 7.636590366002228, + "learning_rate": 8.089083464936392e-06, + "loss": 17.5369, + "step": 16926 + }, + { + "epoch": 0.3094119582503153, + "grad_norm": 6.799639629794637, + "learning_rate": 8.088850699092127e-06, + "loss": 17.92, + "step": 16927 + }, + { + "epoch": 0.30943023744676185, + "grad_norm": 6.764508225258617, + "learning_rate": 8.08861792242171e-06, + "loss": 17.5539, + "step": 16928 + }, + { + "epoch": 0.3094485166432084, + "grad_norm": 7.029403941083494, + "learning_rate": 8.088385134925953e-06, + "loss": 17.6364, + "step": 16929 + }, + { + "epoch": 0.30946679583965486, + "grad_norm": 6.558432469824841, + "learning_rate": 8.088152336605674e-06, + "loss": 17.4582, + "step": 16930 + }, + { + "epoch": 0.3094850750361014, + "grad_norm": 6.7557714165906955, + "learning_rate": 8.08791952746169e-06, + "loss": 17.7139, + "step": 16931 + }, + { + "epoch": 0.30950335423254793, + "grad_norm": 6.897953373824674, + "learning_rate": 8.087686707494817e-06, + "loss": 17.8887, + "step": 16932 + }, + { + "epoch": 0.30952163342899447, + "grad_norm": 6.63615805785581, + "learning_rate": 8.087453876705868e-06, + "loss": 17.7007, + "step": 16933 + }, + { + "epoch": 0.309539912625441, + "grad_norm": 6.515605327772047, + "learning_rate": 8.087221035095662e-06, + "loss": 17.4209, + "step": 16934 + }, + { + "epoch": 0.3095581918218875, + "grad_norm": 5.315636922119088, + "learning_rate": 8.086988182665016e-06, + "loss": 16.9972, + "step": 16935 + }, + { + "epoch": 0.309576471018334, + "grad_norm": 6.8677499121840775, + "learning_rate": 8.086755319414743e-06, + "loss": 17.692, + "step": 16936 + }, + { + "epoch": 0.30959475021478056, + "grad_norm": 7.371046058676606, + "learning_rate": 8.08652244534566e-06, + "loss": 18.0294, + "step": 16937 + }, + { + "epoch": 0.3096130294112271, + "grad_norm": 7.992511094900772, + "learning_rate": 8.086289560458583e-06, + "loss": 17.8508, + "step": 16938 + }, + { + "epoch": 0.3096313086076736, + "grad_norm": 6.380923803072536, + "learning_rate": 8.086056664754328e-06, + "loss": 17.919, + "step": 16939 + }, + { + "epoch": 0.3096495878041201, + "grad_norm": 6.475349880439513, + "learning_rate": 8.085823758233716e-06, + "loss": 17.7402, + "step": 16940 + }, + { + "epoch": 0.30966786700056664, + "grad_norm": 6.118767857981034, + "learning_rate": 8.085590840897558e-06, + "loss": 17.4519, + "step": 16941 + }, + { + "epoch": 0.3096861461970132, + "grad_norm": 6.533876579597839, + "learning_rate": 8.08535791274667e-06, + "loss": 17.2101, + "step": 16942 + }, + { + "epoch": 0.3097044253934597, + "grad_norm": 6.731862506967125, + "learning_rate": 8.085124973781872e-06, + "loss": 17.3575, + "step": 16943 + }, + { + "epoch": 0.30972270458990625, + "grad_norm": 7.39391206942118, + "learning_rate": 8.084892024003978e-06, + "loss": 17.8833, + "step": 16944 + }, + { + "epoch": 0.30974098378635273, + "grad_norm": 6.661848016390472, + "learning_rate": 8.084659063413805e-06, + "loss": 17.6427, + "step": 16945 + }, + { + "epoch": 0.30975926298279927, + "grad_norm": 6.019351705212323, + "learning_rate": 8.08442609201217e-06, + "loss": 17.5766, + "step": 16946 + }, + { + "epoch": 0.3097775421792458, + "grad_norm": 6.303805987317659, + "learning_rate": 8.084193109799889e-06, + "loss": 17.7708, + "step": 16947 + }, + { + "epoch": 0.30979582137569234, + "grad_norm": 8.651229911106629, + "learning_rate": 8.08396011677778e-06, + "loss": 18.3284, + "step": 16948 + }, + { + "epoch": 0.3098141005721389, + "grad_norm": 7.170146505652532, + "learning_rate": 8.083727112946657e-06, + "loss": 17.5075, + "step": 16949 + }, + { + "epoch": 0.30983237976858535, + "grad_norm": 5.813648845249317, + "learning_rate": 8.083494098307338e-06, + "loss": 17.4887, + "step": 16950 + }, + { + "epoch": 0.3098506589650319, + "grad_norm": 5.079115814545762, + "learning_rate": 8.08326107286064e-06, + "loss": 17.0961, + "step": 16951 + }, + { + "epoch": 0.3098689381614784, + "grad_norm": 6.967427799136651, + "learning_rate": 8.08302803660738e-06, + "loss": 17.8371, + "step": 16952 + }, + { + "epoch": 0.30988721735792496, + "grad_norm": 5.887776100616813, + "learning_rate": 8.082794989548372e-06, + "loss": 17.3094, + "step": 16953 + }, + { + "epoch": 0.3099054965543715, + "grad_norm": 5.590935783414069, + "learning_rate": 8.08256193168444e-06, + "loss": 17.3391, + "step": 16954 + }, + { + "epoch": 0.309923775750818, + "grad_norm": 5.9681028865587935, + "learning_rate": 8.082328863016392e-06, + "loss": 17.4024, + "step": 16955 + }, + { + "epoch": 0.3099420549472645, + "grad_norm": 7.224395964372872, + "learning_rate": 8.08209578354505e-06, + "loss": 17.9612, + "step": 16956 + }, + { + "epoch": 0.30996033414371105, + "grad_norm": 6.998488998901023, + "learning_rate": 8.081862693271228e-06, + "loss": 17.6941, + "step": 16957 + }, + { + "epoch": 0.3099786133401576, + "grad_norm": 6.834681750089619, + "learning_rate": 8.081629592195748e-06, + "loss": 17.6734, + "step": 16958 + }, + { + "epoch": 0.3099968925366041, + "grad_norm": 6.348617706162432, + "learning_rate": 8.08139648031942e-06, + "loss": 17.6922, + "step": 16959 + }, + { + "epoch": 0.3100151717330506, + "grad_norm": 6.926850762857286, + "learning_rate": 8.081163357643067e-06, + "loss": 17.7738, + "step": 16960 + }, + { + "epoch": 0.31003345092949713, + "grad_norm": 5.575484399543014, + "learning_rate": 8.080930224167505e-06, + "loss": 17.1479, + "step": 16961 + }, + { + "epoch": 0.31005173012594367, + "grad_norm": 6.297593836898856, + "learning_rate": 8.080697079893547e-06, + "loss": 17.2397, + "step": 16962 + }, + { + "epoch": 0.3100700093223902, + "grad_norm": 5.373837482153073, + "learning_rate": 8.080463924822016e-06, + "loss": 17.2421, + "step": 16963 + }, + { + "epoch": 0.3100882885188367, + "grad_norm": 6.251755688410515, + "learning_rate": 8.080230758953725e-06, + "loss": 17.6207, + "step": 16964 + }, + { + "epoch": 0.3101065677152832, + "grad_norm": 5.887693642675731, + "learning_rate": 8.07999758228949e-06, + "loss": 17.246, + "step": 16965 + }, + { + "epoch": 0.31012484691172976, + "grad_norm": 6.622014302673911, + "learning_rate": 8.079764394830132e-06, + "loss": 17.7236, + "step": 16966 + }, + { + "epoch": 0.3101431261081763, + "grad_norm": 6.26840513401978, + "learning_rate": 8.079531196576468e-06, + "loss": 17.4721, + "step": 16967 + }, + { + "epoch": 0.31016140530462283, + "grad_norm": 7.617899115028561, + "learning_rate": 8.079297987529315e-06, + "loss": 18.0188, + "step": 16968 + }, + { + "epoch": 0.3101796845010693, + "grad_norm": 6.255883472661476, + "learning_rate": 8.079064767689489e-06, + "loss": 17.4223, + "step": 16969 + }, + { + "epoch": 0.31019796369751584, + "grad_norm": 5.44702046069408, + "learning_rate": 8.078831537057809e-06, + "loss": 17.2657, + "step": 16970 + }, + { + "epoch": 0.3102162428939624, + "grad_norm": 6.8017262707599375, + "learning_rate": 8.07859829563509e-06, + "loss": 17.8041, + "step": 16971 + }, + { + "epoch": 0.3102345220904089, + "grad_norm": 7.582731197635068, + "learning_rate": 8.078365043422153e-06, + "loss": 17.7339, + "step": 16972 + }, + { + "epoch": 0.31025280128685545, + "grad_norm": 6.277249513335438, + "learning_rate": 8.078131780419811e-06, + "loss": 17.3556, + "step": 16973 + }, + { + "epoch": 0.31027108048330193, + "grad_norm": 6.069702885550678, + "learning_rate": 8.077898506628887e-06, + "loss": 17.4744, + "step": 16974 + }, + { + "epoch": 0.31028935967974847, + "grad_norm": 6.704836121399122, + "learning_rate": 8.077665222050195e-06, + "loss": 17.6548, + "step": 16975 + }, + { + "epoch": 0.310307638876195, + "grad_norm": 6.304636426754897, + "learning_rate": 8.077431926684552e-06, + "loss": 17.4647, + "step": 16976 + }, + { + "epoch": 0.31032591807264154, + "grad_norm": 6.967132063925525, + "learning_rate": 8.077198620532779e-06, + "loss": 17.773, + "step": 16977 + }, + { + "epoch": 0.3103441972690881, + "grad_norm": 7.629948796757115, + "learning_rate": 8.076965303595692e-06, + "loss": 17.9232, + "step": 16978 + }, + { + "epoch": 0.31036247646553455, + "grad_norm": 7.415482202614117, + "learning_rate": 8.076731975874107e-06, + "loss": 18.1143, + "step": 16979 + }, + { + "epoch": 0.3103807556619811, + "grad_norm": 6.065671198356489, + "learning_rate": 8.076498637368844e-06, + "loss": 17.5238, + "step": 16980 + }, + { + "epoch": 0.3103990348584276, + "grad_norm": 6.143485099025842, + "learning_rate": 8.07626528808072e-06, + "loss": 17.4993, + "step": 16981 + }, + { + "epoch": 0.31041731405487416, + "grad_norm": 6.107688265313014, + "learning_rate": 8.076031928010554e-06, + "loss": 17.4624, + "step": 16982 + }, + { + "epoch": 0.3104355932513207, + "grad_norm": 9.391764640027931, + "learning_rate": 8.075798557159163e-06, + "loss": 18.4911, + "step": 16983 + }, + { + "epoch": 0.3104538724477672, + "grad_norm": 6.056483632792479, + "learning_rate": 8.075565175527365e-06, + "loss": 17.4349, + "step": 16984 + }, + { + "epoch": 0.3104721516442137, + "grad_norm": 5.903959445175416, + "learning_rate": 8.075331783115977e-06, + "loss": 17.3011, + "step": 16985 + }, + { + "epoch": 0.31049043084066025, + "grad_norm": 5.315413721020948, + "learning_rate": 8.075098379925818e-06, + "loss": 17.0139, + "step": 16986 + }, + { + "epoch": 0.3105087100371068, + "grad_norm": 7.277900017474505, + "learning_rate": 8.074864965957706e-06, + "loss": 17.9004, + "step": 16987 + }, + { + "epoch": 0.3105269892335533, + "grad_norm": 6.132393360020543, + "learning_rate": 8.07463154121246e-06, + "loss": 17.6193, + "step": 16988 + }, + { + "epoch": 0.3105452684299998, + "grad_norm": 5.281859849348049, + "learning_rate": 8.074398105690897e-06, + "loss": 17.1315, + "step": 16989 + }, + { + "epoch": 0.31056354762644633, + "grad_norm": 7.063814869684254, + "learning_rate": 8.074164659393834e-06, + "loss": 17.4937, + "step": 16990 + }, + { + "epoch": 0.31058182682289287, + "grad_norm": 6.327726563585208, + "learning_rate": 8.073931202322092e-06, + "loss": 17.6545, + "step": 16991 + }, + { + "epoch": 0.3106001060193394, + "grad_norm": 6.2286458775949285, + "learning_rate": 8.073697734476489e-06, + "loss": 17.3014, + "step": 16992 + }, + { + "epoch": 0.31061838521578594, + "grad_norm": 6.557859461936198, + "learning_rate": 8.07346425585784e-06, + "loss": 17.4907, + "step": 16993 + }, + { + "epoch": 0.3106366644122324, + "grad_norm": 6.85632582671068, + "learning_rate": 8.073230766466966e-06, + "loss": 17.8222, + "step": 16994 + }, + { + "epoch": 0.31065494360867896, + "grad_norm": 7.178275517134839, + "learning_rate": 8.072997266304686e-06, + "loss": 17.7144, + "step": 16995 + }, + { + "epoch": 0.3106732228051255, + "grad_norm": 5.980530187088951, + "learning_rate": 8.072763755371816e-06, + "loss": 17.7154, + "step": 16996 + }, + { + "epoch": 0.31069150200157203, + "grad_norm": 6.507905260479088, + "learning_rate": 8.072530233669176e-06, + "loss": 17.8949, + "step": 16997 + }, + { + "epoch": 0.3107097811980185, + "grad_norm": 7.817112705747621, + "learning_rate": 8.072296701197584e-06, + "loss": 18.3349, + "step": 16998 + }, + { + "epoch": 0.31072806039446504, + "grad_norm": 6.768726074333093, + "learning_rate": 8.07206315795786e-06, + "loss": 18.1172, + "step": 16999 + }, + { + "epoch": 0.3107463395909116, + "grad_norm": 7.226745209650806, + "learning_rate": 8.071829603950821e-06, + "loss": 18.185, + "step": 17000 + }, + { + "epoch": 0.3107646187873581, + "grad_norm": 5.905469271144069, + "learning_rate": 8.071596039177284e-06, + "loss": 17.3604, + "step": 17001 + }, + { + "epoch": 0.31078289798380465, + "grad_norm": 6.731758686543261, + "learning_rate": 8.071362463638071e-06, + "loss": 17.4995, + "step": 17002 + }, + { + "epoch": 0.31080117718025113, + "grad_norm": 7.016546984059646, + "learning_rate": 8.071128877333999e-06, + "loss": 17.7842, + "step": 17003 + }, + { + "epoch": 0.31081945637669767, + "grad_norm": 6.972725771772177, + "learning_rate": 8.070895280265884e-06, + "loss": 18.0233, + "step": 17004 + }, + { + "epoch": 0.3108377355731442, + "grad_norm": 6.619120149588867, + "learning_rate": 8.07066167243455e-06, + "loss": 17.742, + "step": 17005 + }, + { + "epoch": 0.31085601476959074, + "grad_norm": 7.179735912054314, + "learning_rate": 8.070428053840816e-06, + "loss": 17.9021, + "step": 17006 + }, + { + "epoch": 0.3108742939660373, + "grad_norm": 6.547854175047619, + "learning_rate": 8.070194424485494e-06, + "loss": 17.6715, + "step": 17007 + }, + { + "epoch": 0.31089257316248375, + "grad_norm": 6.141519537589516, + "learning_rate": 8.069960784369407e-06, + "loss": 17.4044, + "step": 17008 + }, + { + "epoch": 0.3109108523589303, + "grad_norm": 6.107340651125197, + "learning_rate": 8.069727133493376e-06, + "loss": 17.4647, + "step": 17009 + }, + { + "epoch": 0.3109291315553768, + "grad_norm": 6.769323403249451, + "learning_rate": 8.069493471858216e-06, + "loss": 17.6992, + "step": 17010 + }, + { + "epoch": 0.31094741075182336, + "grad_norm": 5.643737975840195, + "learning_rate": 8.06925979946475e-06, + "loss": 17.2691, + "step": 17011 + }, + { + "epoch": 0.3109656899482699, + "grad_norm": 6.782297245436195, + "learning_rate": 8.069026116313791e-06, + "loss": 17.8015, + "step": 17012 + }, + { + "epoch": 0.3109839691447164, + "grad_norm": 6.4714854196737175, + "learning_rate": 8.068792422406167e-06, + "loss": 17.439, + "step": 17013 + }, + { + "epoch": 0.3110022483411629, + "grad_norm": 6.747759203050445, + "learning_rate": 8.068558717742688e-06, + "loss": 17.7828, + "step": 17014 + }, + { + "epoch": 0.31102052753760945, + "grad_norm": 7.009226825907778, + "learning_rate": 8.068325002324177e-06, + "loss": 17.5213, + "step": 17015 + }, + { + "epoch": 0.311038806734056, + "grad_norm": 5.484043950856844, + "learning_rate": 8.068091276151454e-06, + "loss": 17.116, + "step": 17016 + }, + { + "epoch": 0.3110570859305025, + "grad_norm": 6.943562688903507, + "learning_rate": 8.067857539225338e-06, + "loss": 17.909, + "step": 17017 + }, + { + "epoch": 0.311075365126949, + "grad_norm": 7.288566353969405, + "learning_rate": 8.067623791546646e-06, + "loss": 17.8718, + "step": 17018 + }, + { + "epoch": 0.31109364432339554, + "grad_norm": 8.31020159525137, + "learning_rate": 8.0673900331162e-06, + "loss": 18.0491, + "step": 17019 + }, + { + "epoch": 0.31111192351984207, + "grad_norm": 6.679492595636824, + "learning_rate": 8.067156263934818e-06, + "loss": 17.6752, + "step": 17020 + }, + { + "epoch": 0.3111302027162886, + "grad_norm": 6.486200578548156, + "learning_rate": 8.066922484003319e-06, + "loss": 17.8335, + "step": 17021 + }, + { + "epoch": 0.31114848191273514, + "grad_norm": 6.565739699048459, + "learning_rate": 8.066688693322523e-06, + "loss": 17.5224, + "step": 17022 + }, + { + "epoch": 0.3111667611091816, + "grad_norm": 6.113042705635595, + "learning_rate": 8.06645489189325e-06, + "loss": 17.7832, + "step": 17023 + }, + { + "epoch": 0.31118504030562816, + "grad_norm": 6.835543180942098, + "learning_rate": 8.066221079716317e-06, + "loss": 17.4553, + "step": 17024 + }, + { + "epoch": 0.3112033195020747, + "grad_norm": 5.97030877167702, + "learning_rate": 8.065987256792547e-06, + "loss": 17.1874, + "step": 17025 + }, + { + "epoch": 0.31122159869852123, + "grad_norm": 6.18769337467142, + "learning_rate": 8.065753423122755e-06, + "loss": 17.3374, + "step": 17026 + }, + { + "epoch": 0.31123987789496776, + "grad_norm": 6.025111695620645, + "learning_rate": 8.065519578707766e-06, + "loss": 17.1555, + "step": 17027 + }, + { + "epoch": 0.31125815709141424, + "grad_norm": 6.695901983329723, + "learning_rate": 8.065285723548398e-06, + "loss": 17.8727, + "step": 17028 + }, + { + "epoch": 0.3112764362878608, + "grad_norm": 8.023883962901643, + "learning_rate": 8.065051857645466e-06, + "loss": 18.1035, + "step": 17029 + }, + { + "epoch": 0.3112947154843073, + "grad_norm": 6.898508469798952, + "learning_rate": 8.064817980999794e-06, + "loss": 17.9484, + "step": 17030 + }, + { + "epoch": 0.31131299468075385, + "grad_norm": 6.922422156492689, + "learning_rate": 8.064584093612203e-06, + "loss": 17.7561, + "step": 17031 + }, + { + "epoch": 0.31133127387720033, + "grad_norm": 6.895024630554713, + "learning_rate": 8.064350195483509e-06, + "loss": 17.9243, + "step": 17032 + }, + { + "epoch": 0.31134955307364687, + "grad_norm": 9.128823275667488, + "learning_rate": 8.064116286614535e-06, + "loss": 18.3941, + "step": 17033 + }, + { + "epoch": 0.3113678322700934, + "grad_norm": 6.110840694777033, + "learning_rate": 8.063882367006098e-06, + "loss": 17.4976, + "step": 17034 + }, + { + "epoch": 0.31138611146653994, + "grad_norm": 6.416235495914686, + "learning_rate": 8.06364843665902e-06, + "loss": 17.8703, + "step": 17035 + }, + { + "epoch": 0.3114043906629865, + "grad_norm": 6.764230592310325, + "learning_rate": 8.063414495574118e-06, + "loss": 17.8869, + "step": 17036 + }, + { + "epoch": 0.31142266985943295, + "grad_norm": 6.669468797976102, + "learning_rate": 8.063180543752216e-06, + "loss": 17.8181, + "step": 17037 + }, + { + "epoch": 0.3114409490558795, + "grad_norm": 6.2584630470038904, + "learning_rate": 8.062946581194131e-06, + "loss": 17.4431, + "step": 17038 + }, + { + "epoch": 0.311459228252326, + "grad_norm": 9.804785148077373, + "learning_rate": 8.062712607900685e-06, + "loss": 18.5277, + "step": 17039 + }, + { + "epoch": 0.31147750744877256, + "grad_norm": 7.235891141499928, + "learning_rate": 8.062478623872698e-06, + "loss": 17.916, + "step": 17040 + }, + { + "epoch": 0.3114957866452191, + "grad_norm": 7.355908158041542, + "learning_rate": 8.062244629110986e-06, + "loss": 17.6022, + "step": 17041 + }, + { + "epoch": 0.3115140658416656, + "grad_norm": 7.182154954017314, + "learning_rate": 8.062010623616375e-06, + "loss": 17.7456, + "step": 17042 + }, + { + "epoch": 0.3115323450381121, + "grad_norm": 10.286129801372665, + "learning_rate": 8.06177660738968e-06, + "loss": 17.9494, + "step": 17043 + }, + { + "epoch": 0.31155062423455865, + "grad_norm": 6.156373082601276, + "learning_rate": 8.061542580431726e-06, + "loss": 17.349, + "step": 17044 + }, + { + "epoch": 0.3115689034310052, + "grad_norm": 6.827769349808465, + "learning_rate": 8.06130854274333e-06, + "loss": 17.6985, + "step": 17045 + }, + { + "epoch": 0.3115871826274517, + "grad_norm": 7.673966913137476, + "learning_rate": 8.061074494325315e-06, + "loss": 17.7679, + "step": 17046 + }, + { + "epoch": 0.3116054618238982, + "grad_norm": 9.078005760136998, + "learning_rate": 8.060840435178498e-06, + "loss": 18.4449, + "step": 17047 + }, + { + "epoch": 0.31162374102034474, + "grad_norm": 5.808936845800925, + "learning_rate": 8.0606063653037e-06, + "loss": 17.1379, + "step": 17048 + }, + { + "epoch": 0.31164202021679127, + "grad_norm": 10.06711807599597, + "learning_rate": 8.060372284701743e-06, + "loss": 18.286, + "step": 17049 + }, + { + "epoch": 0.3116602994132378, + "grad_norm": 5.761124065838537, + "learning_rate": 8.060138193373446e-06, + "loss": 17.0214, + "step": 17050 + }, + { + "epoch": 0.31167857860968434, + "grad_norm": 6.014642946448462, + "learning_rate": 8.059904091319633e-06, + "loss": 17.4176, + "step": 17051 + }, + { + "epoch": 0.3116968578061308, + "grad_norm": 5.110580803887255, + "learning_rate": 8.059669978541118e-06, + "loss": 16.9478, + "step": 17052 + }, + { + "epoch": 0.31171513700257736, + "grad_norm": 6.358176665756766, + "learning_rate": 8.059435855038727e-06, + "loss": 17.3045, + "step": 17053 + }, + { + "epoch": 0.3117334161990239, + "grad_norm": 6.821562150145217, + "learning_rate": 8.05920172081328e-06, + "loss": 17.5357, + "step": 17054 + }, + { + "epoch": 0.31175169539547043, + "grad_norm": 7.502017033052161, + "learning_rate": 8.058967575865593e-06, + "loss": 17.9821, + "step": 17055 + }, + { + "epoch": 0.31176997459191697, + "grad_norm": 6.404123994690587, + "learning_rate": 8.058733420196492e-06, + "loss": 17.3785, + "step": 17056 + }, + { + "epoch": 0.31178825378836345, + "grad_norm": 5.824909042286776, + "learning_rate": 8.058499253806797e-06, + "loss": 17.0588, + "step": 17057 + }, + { + "epoch": 0.31180653298481, + "grad_norm": 8.893768353879862, + "learning_rate": 8.058265076697327e-06, + "loss": 18.3984, + "step": 17058 + }, + { + "epoch": 0.3118248121812565, + "grad_norm": 8.36422672697942, + "learning_rate": 8.058030888868902e-06, + "loss": 18.1194, + "step": 17059 + }, + { + "epoch": 0.31184309137770305, + "grad_norm": 7.707476457887245, + "learning_rate": 8.057796690322345e-06, + "loss": 18.1633, + "step": 17060 + }, + { + "epoch": 0.3118613705741496, + "grad_norm": 5.794932126433034, + "learning_rate": 8.057562481058476e-06, + "loss": 17.2252, + "step": 17061 + }, + { + "epoch": 0.31187964977059607, + "grad_norm": 6.579128480537068, + "learning_rate": 8.057328261078116e-06, + "loss": 17.6532, + "step": 17062 + }, + { + "epoch": 0.3118979289670426, + "grad_norm": 5.953515237473084, + "learning_rate": 8.057094030382084e-06, + "loss": 17.3097, + "step": 17063 + }, + { + "epoch": 0.31191620816348914, + "grad_norm": 7.242998926533677, + "learning_rate": 8.056859788971206e-06, + "loss": 17.637, + "step": 17064 + }, + { + "epoch": 0.3119344873599357, + "grad_norm": 6.669121683718374, + "learning_rate": 8.056625536846297e-06, + "loss": 17.7112, + "step": 17065 + }, + { + "epoch": 0.31195276655638216, + "grad_norm": 6.234045316484745, + "learning_rate": 8.056391274008182e-06, + "loss": 17.5276, + "step": 17066 + }, + { + "epoch": 0.3119710457528287, + "grad_norm": 5.99918087491853, + "learning_rate": 8.05615700045768e-06, + "loss": 17.3289, + "step": 17067 + }, + { + "epoch": 0.3119893249492752, + "grad_norm": 7.395409281321343, + "learning_rate": 8.055922716195614e-06, + "loss": 17.9555, + "step": 17068 + }, + { + "epoch": 0.31200760414572176, + "grad_norm": 6.560024466253384, + "learning_rate": 8.055688421222802e-06, + "loss": 17.689, + "step": 17069 + }, + { + "epoch": 0.3120258833421683, + "grad_norm": 6.8100888844054595, + "learning_rate": 8.05545411554007e-06, + "loss": 17.4748, + "step": 17070 + }, + { + "epoch": 0.3120441625386148, + "grad_norm": 6.8086225586593345, + "learning_rate": 8.055219799148236e-06, + "loss": 17.5875, + "step": 17071 + }, + { + "epoch": 0.3120624417350613, + "grad_norm": 7.076630506537384, + "learning_rate": 8.05498547204812e-06, + "loss": 17.9189, + "step": 17072 + }, + { + "epoch": 0.31208072093150785, + "grad_norm": 7.134796225580065, + "learning_rate": 8.054751134240545e-06, + "loss": 17.9532, + "step": 17073 + }, + { + "epoch": 0.3120990001279544, + "grad_norm": 6.903890762980463, + "learning_rate": 8.054516785726333e-06, + "loss": 17.9626, + "step": 17074 + }, + { + "epoch": 0.3121172793244009, + "grad_norm": 6.892699558993286, + "learning_rate": 8.054282426506306e-06, + "loss": 17.8961, + "step": 17075 + }, + { + "epoch": 0.3121355585208474, + "grad_norm": 6.06007985064659, + "learning_rate": 8.054048056581283e-06, + "loss": 17.4805, + "step": 17076 + }, + { + "epoch": 0.31215383771729394, + "grad_norm": 6.5185241906590194, + "learning_rate": 8.053813675952085e-06, + "loss": 17.4921, + "step": 17077 + }, + { + "epoch": 0.31217211691374047, + "grad_norm": 7.249273040241578, + "learning_rate": 8.053579284619538e-06, + "loss": 17.394, + "step": 17078 + }, + { + "epoch": 0.312190396110187, + "grad_norm": 6.445044732571578, + "learning_rate": 8.05334488258446e-06, + "loss": 17.4595, + "step": 17079 + }, + { + "epoch": 0.31220867530663354, + "grad_norm": 6.187669512889358, + "learning_rate": 8.053110469847671e-06, + "loss": 17.4792, + "step": 17080 + }, + { + "epoch": 0.31222695450308, + "grad_norm": 6.122343555267942, + "learning_rate": 8.052876046409997e-06, + "loss": 17.619, + "step": 17081 + }, + { + "epoch": 0.31224523369952656, + "grad_norm": 6.058224000590358, + "learning_rate": 8.052641612272255e-06, + "loss": 17.3695, + "step": 17082 + }, + { + "epoch": 0.3122635128959731, + "grad_norm": 6.11590720237535, + "learning_rate": 8.052407167435271e-06, + "loss": 17.5494, + "step": 17083 + }, + { + "epoch": 0.31228179209241963, + "grad_norm": 5.560146633478227, + "learning_rate": 8.052172711899864e-06, + "loss": 17.3819, + "step": 17084 + }, + { + "epoch": 0.31230007128886617, + "grad_norm": 5.705357822499922, + "learning_rate": 8.051938245666857e-06, + "loss": 17.4352, + "step": 17085 + }, + { + "epoch": 0.31231835048531265, + "grad_norm": 5.918455183105127, + "learning_rate": 8.051703768737072e-06, + "loss": 17.2833, + "step": 17086 + }, + { + "epoch": 0.3123366296817592, + "grad_norm": 7.623831503881604, + "learning_rate": 8.051469281111329e-06, + "loss": 18.0024, + "step": 17087 + }, + { + "epoch": 0.3123549088782057, + "grad_norm": 7.182707774727692, + "learning_rate": 8.05123478279045e-06, + "loss": 17.6483, + "step": 17088 + }, + { + "epoch": 0.31237318807465225, + "grad_norm": 8.123794902176977, + "learning_rate": 8.05100027377526e-06, + "loss": 18.1358, + "step": 17089 + }, + { + "epoch": 0.3123914672710988, + "grad_norm": 7.305505632125513, + "learning_rate": 8.050765754066577e-06, + "loss": 17.5472, + "step": 17090 + }, + { + "epoch": 0.31240974646754527, + "grad_norm": 10.310046563194174, + "learning_rate": 8.050531223665226e-06, + "loss": 18.3201, + "step": 17091 + }, + { + "epoch": 0.3124280256639918, + "grad_norm": 7.293663182369382, + "learning_rate": 8.050296682572028e-06, + "loss": 17.6776, + "step": 17092 + }, + { + "epoch": 0.31244630486043834, + "grad_norm": 7.711378309642685, + "learning_rate": 8.050062130787803e-06, + "loss": 18.0298, + "step": 17093 + }, + { + "epoch": 0.3124645840568849, + "grad_norm": 8.998707061510684, + "learning_rate": 8.049827568313377e-06, + "loss": 18.6374, + "step": 17094 + }, + { + "epoch": 0.3124828632533314, + "grad_norm": 6.572040902501415, + "learning_rate": 8.049592995149568e-06, + "loss": 17.5308, + "step": 17095 + }, + { + "epoch": 0.3125011424497779, + "grad_norm": 7.654196776411626, + "learning_rate": 8.049358411297203e-06, + "loss": 18.1515, + "step": 17096 + }, + { + "epoch": 0.3125194216462244, + "grad_norm": 5.888627620214243, + "learning_rate": 8.049123816757098e-06, + "loss": 17.5023, + "step": 17097 + }, + { + "epoch": 0.31253770084267096, + "grad_norm": 5.840214153242996, + "learning_rate": 8.04888921153008e-06, + "loss": 17.5331, + "step": 17098 + }, + { + "epoch": 0.3125559800391175, + "grad_norm": 6.845758804187963, + "learning_rate": 8.048654595616972e-06, + "loss": 17.4805, + "step": 17099 + }, + { + "epoch": 0.312574259235564, + "grad_norm": 6.997364797480822, + "learning_rate": 8.04841996901859e-06, + "loss": 18.0936, + "step": 17100 + }, + { + "epoch": 0.3125925384320105, + "grad_norm": 7.516464323446667, + "learning_rate": 8.048185331735764e-06, + "loss": 17.8479, + "step": 17101 + }, + { + "epoch": 0.31261081762845705, + "grad_norm": 7.688519225306918, + "learning_rate": 8.047950683769312e-06, + "loss": 17.6508, + "step": 17102 + }, + { + "epoch": 0.3126290968249036, + "grad_norm": 6.727504229055415, + "learning_rate": 8.047716025120058e-06, + "loss": 17.6344, + "step": 17103 + }, + { + "epoch": 0.3126473760213501, + "grad_norm": 6.331388539219552, + "learning_rate": 8.047481355788822e-06, + "loss": 17.6653, + "step": 17104 + }, + { + "epoch": 0.3126656552177966, + "grad_norm": 6.8055450743979415, + "learning_rate": 8.047246675776428e-06, + "loss": 17.6641, + "step": 17105 + }, + { + "epoch": 0.31268393441424314, + "grad_norm": 7.724344148380758, + "learning_rate": 8.047011985083701e-06, + "loss": 18.4167, + "step": 17106 + }, + { + "epoch": 0.31270221361068967, + "grad_norm": 6.457607376909295, + "learning_rate": 8.04677728371146e-06, + "loss": 17.8015, + "step": 17107 + }, + { + "epoch": 0.3127204928071362, + "grad_norm": 10.292345069602899, + "learning_rate": 8.04654257166053e-06, + "loss": 18.1682, + "step": 17108 + }, + { + "epoch": 0.31273877200358274, + "grad_norm": 7.2397095021676385, + "learning_rate": 8.046307848931733e-06, + "loss": 17.7562, + "step": 17109 + }, + { + "epoch": 0.3127570512000292, + "grad_norm": 7.833608664386632, + "learning_rate": 8.04607311552589e-06, + "loss": 17.8266, + "step": 17110 + }, + { + "epoch": 0.31277533039647576, + "grad_norm": 9.656642046194163, + "learning_rate": 8.045838371443826e-06, + "loss": 17.5523, + "step": 17111 + }, + { + "epoch": 0.3127936095929223, + "grad_norm": 7.2585431274803085, + "learning_rate": 8.045603616686362e-06, + "loss": 17.7699, + "step": 17112 + }, + { + "epoch": 0.31281188878936883, + "grad_norm": 6.917059958259703, + "learning_rate": 8.045368851254322e-06, + "loss": 17.547, + "step": 17113 + }, + { + "epoch": 0.31283016798581537, + "grad_norm": 7.219994130213293, + "learning_rate": 8.04513407514853e-06, + "loss": 17.5624, + "step": 17114 + }, + { + "epoch": 0.31284844718226185, + "grad_norm": 9.500891634754796, + "learning_rate": 8.044899288369804e-06, + "loss": 18.6027, + "step": 17115 + }, + { + "epoch": 0.3128667263787084, + "grad_norm": 7.287378731024665, + "learning_rate": 8.044664490918972e-06, + "loss": 17.9434, + "step": 17116 + }, + { + "epoch": 0.3128850055751549, + "grad_norm": 7.479277181987813, + "learning_rate": 8.044429682796855e-06, + "loss": 18.0854, + "step": 17117 + }, + { + "epoch": 0.31290328477160145, + "grad_norm": 5.413721045579754, + "learning_rate": 8.044194864004276e-06, + "loss": 17.1874, + "step": 17118 + }, + { + "epoch": 0.312921563968048, + "grad_norm": 8.020478219000237, + "learning_rate": 8.043960034542058e-06, + "loss": 18.4478, + "step": 17119 + }, + { + "epoch": 0.31293984316449447, + "grad_norm": 6.171454756272949, + "learning_rate": 8.043725194411025e-06, + "loss": 17.6453, + "step": 17120 + }, + { + "epoch": 0.312958122360941, + "grad_norm": 7.381828581592629, + "learning_rate": 8.043490343612e-06, + "loss": 17.5644, + "step": 17121 + }, + { + "epoch": 0.31297640155738754, + "grad_norm": 5.4404574332533775, + "learning_rate": 8.043255482145804e-06, + "loss": 17.1108, + "step": 17122 + }, + { + "epoch": 0.3129946807538341, + "grad_norm": 8.279403270053606, + "learning_rate": 8.043020610013261e-06, + "loss": 18.4835, + "step": 17123 + }, + { + "epoch": 0.3130129599502806, + "grad_norm": 6.7496167025061675, + "learning_rate": 8.042785727215196e-06, + "loss": 17.4873, + "step": 17124 + }, + { + "epoch": 0.3130312391467271, + "grad_norm": 6.016379324428997, + "learning_rate": 8.042550833752431e-06, + "loss": 17.5257, + "step": 17125 + }, + { + "epoch": 0.3130495183431736, + "grad_norm": 5.828238094728282, + "learning_rate": 8.042315929625789e-06, + "loss": 17.3178, + "step": 17126 + }, + { + "epoch": 0.31306779753962016, + "grad_norm": 6.058292426814321, + "learning_rate": 8.042081014836094e-06, + "loss": 17.5104, + "step": 17127 + }, + { + "epoch": 0.3130860767360667, + "grad_norm": 5.795678889764795, + "learning_rate": 8.041846089384169e-06, + "loss": 17.2683, + "step": 17128 + }, + { + "epoch": 0.31310435593251323, + "grad_norm": 6.838414188100603, + "learning_rate": 8.041611153270837e-06, + "loss": 17.5137, + "step": 17129 + }, + { + "epoch": 0.3131226351289597, + "grad_norm": 7.508957516597827, + "learning_rate": 8.041376206496922e-06, + "loss": 18.0325, + "step": 17130 + }, + { + "epoch": 0.31314091432540625, + "grad_norm": 5.947618787572319, + "learning_rate": 8.041141249063249e-06, + "loss": 17.4239, + "step": 17131 + }, + { + "epoch": 0.3131591935218528, + "grad_norm": 6.988214297935935, + "learning_rate": 8.040906280970637e-06, + "loss": 17.8497, + "step": 17132 + }, + { + "epoch": 0.3131774727182993, + "grad_norm": 7.0904501986832065, + "learning_rate": 8.040671302219915e-06, + "loss": 17.9897, + "step": 17133 + }, + { + "epoch": 0.3131957519147458, + "grad_norm": 7.551212912243727, + "learning_rate": 8.040436312811902e-06, + "loss": 18.2153, + "step": 17134 + }, + { + "epoch": 0.31321403111119234, + "grad_norm": 7.66120681501316, + "learning_rate": 8.040201312747425e-06, + "loss": 17.5864, + "step": 17135 + }, + { + "epoch": 0.3132323103076389, + "grad_norm": 6.689147304740825, + "learning_rate": 8.039966302027305e-06, + "loss": 17.8764, + "step": 17136 + }, + { + "epoch": 0.3132505895040854, + "grad_norm": 6.95603386516969, + "learning_rate": 8.039731280652368e-06, + "loss": 17.8554, + "step": 17137 + }, + { + "epoch": 0.31326886870053194, + "grad_norm": 7.202105499569859, + "learning_rate": 8.039496248623436e-06, + "loss": 18.0454, + "step": 17138 + }, + { + "epoch": 0.3132871478969784, + "grad_norm": 7.411083961802374, + "learning_rate": 8.03926120594133e-06, + "loss": 17.7921, + "step": 17139 + }, + { + "epoch": 0.31330542709342496, + "grad_norm": 7.784527847208358, + "learning_rate": 8.039026152606883e-06, + "loss": 18.0465, + "step": 17140 + }, + { + "epoch": 0.3133237062898715, + "grad_norm": 6.618673361962217, + "learning_rate": 8.038791088620909e-06, + "loss": 17.4875, + "step": 17141 + }, + { + "epoch": 0.31334198548631803, + "grad_norm": 5.812691420482996, + "learning_rate": 8.038556013984239e-06, + "loss": 17.3837, + "step": 17142 + }, + { + "epoch": 0.31336026468276457, + "grad_norm": 7.11314530167224, + "learning_rate": 8.038320928697691e-06, + "loss": 17.7925, + "step": 17143 + }, + { + "epoch": 0.31337854387921105, + "grad_norm": 7.7265097697341, + "learning_rate": 8.038085832762095e-06, + "loss": 18.3554, + "step": 17144 + }, + { + "epoch": 0.3133968230756576, + "grad_norm": 5.447993047607807, + "learning_rate": 8.037850726178269e-06, + "loss": 17.1864, + "step": 17145 + }, + { + "epoch": 0.3134151022721041, + "grad_norm": 6.728960990362715, + "learning_rate": 8.037615608947041e-06, + "loss": 17.7867, + "step": 17146 + }, + { + "epoch": 0.31343338146855065, + "grad_norm": 7.495857224980962, + "learning_rate": 8.037380481069234e-06, + "loss": 17.8913, + "step": 17147 + }, + { + "epoch": 0.3134516606649972, + "grad_norm": 7.049243686139036, + "learning_rate": 8.037145342545671e-06, + "loss": 17.8628, + "step": 17148 + }, + { + "epoch": 0.31346993986144367, + "grad_norm": 4.711646748515058, + "learning_rate": 8.036910193377178e-06, + "loss": 16.9872, + "step": 17149 + }, + { + "epoch": 0.3134882190578902, + "grad_norm": 5.493751215946904, + "learning_rate": 8.036675033564579e-06, + "loss": 17.0835, + "step": 17150 + }, + { + "epoch": 0.31350649825433674, + "grad_norm": 5.888501284564506, + "learning_rate": 8.036439863108696e-06, + "loss": 17.3825, + "step": 17151 + }, + { + "epoch": 0.3135247774507833, + "grad_norm": 6.046177868777849, + "learning_rate": 8.036204682010355e-06, + "loss": 17.332, + "step": 17152 + }, + { + "epoch": 0.3135430566472298, + "grad_norm": 6.575959579527793, + "learning_rate": 8.03596949027038e-06, + "loss": 17.8634, + "step": 17153 + }, + { + "epoch": 0.3135613358436763, + "grad_norm": 6.607339601223778, + "learning_rate": 8.035734287889597e-06, + "loss": 17.6993, + "step": 17154 + }, + { + "epoch": 0.3135796150401228, + "grad_norm": 6.441259428163981, + "learning_rate": 8.035499074868827e-06, + "loss": 17.6536, + "step": 17155 + }, + { + "epoch": 0.31359789423656936, + "grad_norm": 6.082592525515024, + "learning_rate": 8.035263851208897e-06, + "loss": 17.4241, + "step": 17156 + }, + { + "epoch": 0.3136161734330159, + "grad_norm": 6.054719549805286, + "learning_rate": 8.03502861691063e-06, + "loss": 17.4872, + "step": 17157 + }, + { + "epoch": 0.31363445262946243, + "grad_norm": 6.86476340893, + "learning_rate": 8.034793371974851e-06, + "loss": 17.5494, + "step": 17158 + }, + { + "epoch": 0.3136527318259089, + "grad_norm": 6.060835396448324, + "learning_rate": 8.034558116402386e-06, + "loss": 17.4649, + "step": 17159 + }, + { + "epoch": 0.31367101102235545, + "grad_norm": 7.512181779187408, + "learning_rate": 8.034322850194056e-06, + "loss": 17.8525, + "step": 17160 + }, + { + "epoch": 0.313689290218802, + "grad_norm": 7.245479470438559, + "learning_rate": 8.034087573350689e-06, + "loss": 17.899, + "step": 17161 + }, + { + "epoch": 0.3137075694152485, + "grad_norm": 7.1684862366743936, + "learning_rate": 8.03385228587311e-06, + "loss": 17.7725, + "step": 17162 + }, + { + "epoch": 0.31372584861169506, + "grad_norm": 8.30358114454457, + "learning_rate": 8.033616987762138e-06, + "loss": 17.719, + "step": 17163 + }, + { + "epoch": 0.31374412780814154, + "grad_norm": 6.560198161467762, + "learning_rate": 8.033381679018605e-06, + "loss": 17.4197, + "step": 17164 + }, + { + "epoch": 0.3137624070045881, + "grad_norm": 7.377941426853494, + "learning_rate": 8.033146359643332e-06, + "loss": 17.6037, + "step": 17165 + }, + { + "epoch": 0.3137806862010346, + "grad_norm": 5.557929775709622, + "learning_rate": 8.032911029637143e-06, + "loss": 17.1505, + "step": 17166 + }, + { + "epoch": 0.31379896539748114, + "grad_norm": 6.899013606209825, + "learning_rate": 8.032675689000864e-06, + "loss": 17.6968, + "step": 17167 + }, + { + "epoch": 0.3138172445939276, + "grad_norm": 6.5191416595380485, + "learning_rate": 8.032440337735322e-06, + "loss": 17.5975, + "step": 17168 + }, + { + "epoch": 0.31383552379037416, + "grad_norm": 7.606407530181519, + "learning_rate": 8.032204975841337e-06, + "loss": 17.8917, + "step": 17169 + }, + { + "epoch": 0.3138538029868207, + "grad_norm": 7.05771477842777, + "learning_rate": 8.031969603319737e-06, + "loss": 17.4604, + "step": 17170 + }, + { + "epoch": 0.31387208218326723, + "grad_norm": 5.3322142199112195, + "learning_rate": 8.031734220171349e-06, + "loss": 17.1174, + "step": 17171 + }, + { + "epoch": 0.31389036137971377, + "grad_norm": 7.02892773669566, + "learning_rate": 8.031498826396992e-06, + "loss": 17.8542, + "step": 17172 + }, + { + "epoch": 0.31390864057616025, + "grad_norm": 9.054141376018855, + "learning_rate": 8.031263421997497e-06, + "loss": 18.8066, + "step": 17173 + }, + { + "epoch": 0.3139269197726068, + "grad_norm": 6.130279888692038, + "learning_rate": 8.031028006973686e-06, + "loss": 17.7783, + "step": 17174 + }, + { + "epoch": 0.3139451989690533, + "grad_norm": 5.995388164254831, + "learning_rate": 8.030792581326388e-06, + "loss": 17.4762, + "step": 17175 + }, + { + "epoch": 0.31396347816549985, + "grad_norm": 5.885826294439638, + "learning_rate": 8.030557145056421e-06, + "loss": 17.3577, + "step": 17176 + }, + { + "epoch": 0.3139817573619464, + "grad_norm": 6.60999595930151, + "learning_rate": 8.030321698164616e-06, + "loss": 17.6923, + "step": 17177 + }, + { + "epoch": 0.31400003655839287, + "grad_norm": 6.843984019058763, + "learning_rate": 8.030086240651796e-06, + "loss": 17.6676, + "step": 17178 + }, + { + "epoch": 0.3140183157548394, + "grad_norm": 5.883915114984477, + "learning_rate": 8.029850772518787e-06, + "loss": 17.2554, + "step": 17179 + }, + { + "epoch": 0.31403659495128594, + "grad_norm": 6.892014099833698, + "learning_rate": 8.029615293766413e-06, + "loss": 17.8833, + "step": 17180 + }, + { + "epoch": 0.3140548741477325, + "grad_norm": 6.339557791548552, + "learning_rate": 8.029379804395501e-06, + "loss": 17.4732, + "step": 17181 + }, + { + "epoch": 0.314073153344179, + "grad_norm": 7.260205023241176, + "learning_rate": 8.029144304406875e-06, + "loss": 17.8128, + "step": 17182 + }, + { + "epoch": 0.3140914325406255, + "grad_norm": 5.817146925384382, + "learning_rate": 8.028908793801362e-06, + "loss": 17.3207, + "step": 17183 + }, + { + "epoch": 0.31410971173707203, + "grad_norm": 6.626224046216491, + "learning_rate": 8.028673272579786e-06, + "loss": 17.6731, + "step": 17184 + }, + { + "epoch": 0.31412799093351856, + "grad_norm": 8.369654425397433, + "learning_rate": 8.028437740742974e-06, + "loss": 18.2265, + "step": 17185 + }, + { + "epoch": 0.3141462701299651, + "grad_norm": 9.432428565195123, + "learning_rate": 8.028202198291749e-06, + "loss": 18.692, + "step": 17186 + }, + { + "epoch": 0.31416454932641164, + "grad_norm": 4.921619769724041, + "learning_rate": 8.02796664522694e-06, + "loss": 16.8349, + "step": 17187 + }, + { + "epoch": 0.3141828285228581, + "grad_norm": 8.626537819541797, + "learning_rate": 8.027731081549368e-06, + "loss": 18.0901, + "step": 17188 + }, + { + "epoch": 0.31420110771930465, + "grad_norm": 7.517223221411115, + "learning_rate": 8.027495507259863e-06, + "loss": 17.8854, + "step": 17189 + }, + { + "epoch": 0.3142193869157512, + "grad_norm": 6.503608465111523, + "learning_rate": 8.027259922359248e-06, + "loss": 17.7484, + "step": 17190 + }, + { + "epoch": 0.3142376661121977, + "grad_norm": 6.030001566174185, + "learning_rate": 8.02702432684835e-06, + "loss": 17.3925, + "step": 17191 + }, + { + "epoch": 0.31425594530864426, + "grad_norm": 6.216164772313948, + "learning_rate": 8.026788720727997e-06, + "loss": 17.5722, + "step": 17192 + }, + { + "epoch": 0.31427422450509074, + "grad_norm": 5.947817891194216, + "learning_rate": 8.026553103999009e-06, + "loss": 17.5399, + "step": 17193 + }, + { + "epoch": 0.3142925037015373, + "grad_norm": 6.695638931461703, + "learning_rate": 8.026317476662215e-06, + "loss": 17.8152, + "step": 17194 + }, + { + "epoch": 0.3143107828979838, + "grad_norm": 7.016799302824753, + "learning_rate": 8.026081838718442e-06, + "loss": 17.458, + "step": 17195 + }, + { + "epoch": 0.31432906209443034, + "grad_norm": 5.769310967622843, + "learning_rate": 8.025846190168515e-06, + "loss": 17.3009, + "step": 17196 + }, + { + "epoch": 0.3143473412908769, + "grad_norm": 5.257616943946488, + "learning_rate": 8.02561053101326e-06, + "loss": 16.9891, + "step": 17197 + }, + { + "epoch": 0.31436562048732336, + "grad_norm": 6.439660213076309, + "learning_rate": 8.0253748612535e-06, + "loss": 17.6261, + "step": 17198 + }, + { + "epoch": 0.3143838996837699, + "grad_norm": 7.616382543381116, + "learning_rate": 8.025139180890066e-06, + "loss": 17.825, + "step": 17199 + }, + { + "epoch": 0.31440217888021643, + "grad_norm": 6.859843867417869, + "learning_rate": 8.02490348992378e-06, + "loss": 17.8512, + "step": 17200 + }, + { + "epoch": 0.31442045807666297, + "grad_norm": 6.012777268791715, + "learning_rate": 8.024667788355473e-06, + "loss": 17.4365, + "step": 17201 + }, + { + "epoch": 0.31443873727310945, + "grad_norm": 7.34281656927429, + "learning_rate": 8.024432076185967e-06, + "loss": 18.0848, + "step": 17202 + }, + { + "epoch": 0.314457016469556, + "grad_norm": 7.1363276984549735, + "learning_rate": 8.024196353416085e-06, + "loss": 17.8839, + "step": 17203 + }, + { + "epoch": 0.3144752956660025, + "grad_norm": 6.619445406609291, + "learning_rate": 8.023960620046661e-06, + "loss": 17.512, + "step": 17204 + }, + { + "epoch": 0.31449357486244905, + "grad_norm": 5.901356843278337, + "learning_rate": 8.023724876078517e-06, + "loss": 17.5128, + "step": 17205 + }, + { + "epoch": 0.3145118540588956, + "grad_norm": 5.377358834635803, + "learning_rate": 8.023489121512479e-06, + "loss": 17.0316, + "step": 17206 + }, + { + "epoch": 0.31453013325534207, + "grad_norm": 9.555391015931994, + "learning_rate": 8.023253356349375e-06, + "loss": 18.6005, + "step": 17207 + }, + { + "epoch": 0.3145484124517886, + "grad_norm": 6.7078403560945405, + "learning_rate": 8.023017580590029e-06, + "loss": 17.5261, + "step": 17208 + }, + { + "epoch": 0.31456669164823514, + "grad_norm": 6.47132507059488, + "learning_rate": 8.022781794235268e-06, + "loss": 17.8296, + "step": 17209 + }, + { + "epoch": 0.3145849708446817, + "grad_norm": 8.109703898154141, + "learning_rate": 8.02254599728592e-06, + "loss": 18.1307, + "step": 17210 + }, + { + "epoch": 0.3146032500411282, + "grad_norm": 6.5760029145144525, + "learning_rate": 8.022310189742812e-06, + "loss": 17.5782, + "step": 17211 + }, + { + "epoch": 0.3146215292375747, + "grad_norm": 8.482911353667058, + "learning_rate": 8.022074371606767e-06, + "loss": 18.6695, + "step": 17212 + }, + { + "epoch": 0.31463980843402123, + "grad_norm": 6.977732439286522, + "learning_rate": 8.021838542878616e-06, + "loss": 17.9351, + "step": 17213 + }, + { + "epoch": 0.31465808763046776, + "grad_norm": 5.743098806772147, + "learning_rate": 8.02160270355918e-06, + "loss": 17.3204, + "step": 17214 + }, + { + "epoch": 0.3146763668269143, + "grad_norm": 7.267947435405922, + "learning_rate": 8.02136685364929e-06, + "loss": 17.5826, + "step": 17215 + }, + { + "epoch": 0.31469464602336084, + "grad_norm": 7.912004941148593, + "learning_rate": 8.021130993149773e-06, + "loss": 17.9818, + "step": 17216 + }, + { + "epoch": 0.3147129252198073, + "grad_norm": 5.280419012227969, + "learning_rate": 8.020895122061454e-06, + "loss": 16.951, + "step": 17217 + }, + { + "epoch": 0.31473120441625385, + "grad_norm": 6.873611055670704, + "learning_rate": 8.020659240385157e-06, + "loss": 17.6168, + "step": 17218 + }, + { + "epoch": 0.3147494836127004, + "grad_norm": 8.206195024040627, + "learning_rate": 8.020423348121713e-06, + "loss": 18.5465, + "step": 17219 + }, + { + "epoch": 0.3147677628091469, + "grad_norm": 6.962185452648408, + "learning_rate": 8.020187445271947e-06, + "loss": 17.6953, + "step": 17220 + }, + { + "epoch": 0.31478604200559346, + "grad_norm": 7.088755955462132, + "learning_rate": 8.019951531836686e-06, + "loss": 17.9777, + "step": 17221 + }, + { + "epoch": 0.31480432120203994, + "grad_norm": 6.190448969393758, + "learning_rate": 8.019715607816758e-06, + "loss": 17.3383, + "step": 17222 + }, + { + "epoch": 0.3148226003984865, + "grad_norm": 6.980175547078554, + "learning_rate": 8.019479673212988e-06, + "loss": 17.8941, + "step": 17223 + }, + { + "epoch": 0.314840879594933, + "grad_norm": 6.766442043943911, + "learning_rate": 8.019243728026204e-06, + "loss": 17.7019, + "step": 17224 + }, + { + "epoch": 0.31485915879137955, + "grad_norm": 8.86342108336854, + "learning_rate": 8.019007772257233e-06, + "loss": 18.7092, + "step": 17225 + }, + { + "epoch": 0.3148774379878261, + "grad_norm": 6.873461870005551, + "learning_rate": 8.0187718059069e-06, + "loss": 17.7726, + "step": 17226 + }, + { + "epoch": 0.31489571718427256, + "grad_norm": 7.5290291440852535, + "learning_rate": 8.018535828976035e-06, + "loss": 18.0024, + "step": 17227 + }, + { + "epoch": 0.3149139963807191, + "grad_norm": 7.28258231935665, + "learning_rate": 8.018299841465464e-06, + "loss": 17.7803, + "step": 17228 + }, + { + "epoch": 0.31493227557716563, + "grad_norm": 5.672384628255509, + "learning_rate": 8.018063843376014e-06, + "loss": 17.1077, + "step": 17229 + }, + { + "epoch": 0.31495055477361217, + "grad_norm": 6.763207668043426, + "learning_rate": 8.017827834708513e-06, + "loss": 17.8968, + "step": 17230 + }, + { + "epoch": 0.3149688339700587, + "grad_norm": 6.352084908004501, + "learning_rate": 8.017591815463785e-06, + "loss": 17.9149, + "step": 17231 + }, + { + "epoch": 0.3149871131665052, + "grad_norm": 5.401660204723154, + "learning_rate": 8.017355785642661e-06, + "loss": 17.0605, + "step": 17232 + }, + { + "epoch": 0.3150053923629517, + "grad_norm": 6.3157623324548124, + "learning_rate": 8.017119745245969e-06, + "loss": 17.4241, + "step": 17233 + }, + { + "epoch": 0.31502367155939826, + "grad_norm": 7.187095964596448, + "learning_rate": 8.01688369427453e-06, + "loss": 17.8613, + "step": 17234 + }, + { + "epoch": 0.3150419507558448, + "grad_norm": 6.45123413937246, + "learning_rate": 8.016647632729177e-06, + "loss": 17.6179, + "step": 17235 + }, + { + "epoch": 0.31506022995229127, + "grad_norm": 6.373516121494759, + "learning_rate": 8.016411560610737e-06, + "loss": 17.4736, + "step": 17236 + }, + { + "epoch": 0.3150785091487378, + "grad_norm": 7.561806774375053, + "learning_rate": 8.016175477920036e-06, + "loss": 17.8736, + "step": 17237 + }, + { + "epoch": 0.31509678834518434, + "grad_norm": 8.012843117739664, + "learning_rate": 8.015939384657901e-06, + "loss": 18.4852, + "step": 17238 + }, + { + "epoch": 0.3151150675416309, + "grad_norm": 6.270369077479934, + "learning_rate": 8.015703280825158e-06, + "loss": 17.4573, + "step": 17239 + }, + { + "epoch": 0.3151333467380774, + "grad_norm": 7.8125620468099095, + "learning_rate": 8.015467166422641e-06, + "loss": 18.0803, + "step": 17240 + }, + { + "epoch": 0.3151516259345239, + "grad_norm": 6.951899482123191, + "learning_rate": 8.01523104145117e-06, + "loss": 17.6458, + "step": 17241 + }, + { + "epoch": 0.31516990513097043, + "grad_norm": 7.537431181425768, + "learning_rate": 8.014994905911577e-06, + "loss": 17.6453, + "step": 17242 + }, + { + "epoch": 0.31518818432741696, + "grad_norm": 7.574629766724625, + "learning_rate": 8.014758759804688e-06, + "loss": 17.9085, + "step": 17243 + }, + { + "epoch": 0.3152064635238635, + "grad_norm": 6.280790363627346, + "learning_rate": 8.014522603131332e-06, + "loss": 17.7072, + "step": 17244 + }, + { + "epoch": 0.31522474272031004, + "grad_norm": 6.663546949785355, + "learning_rate": 8.014286435892335e-06, + "loss": 17.7714, + "step": 17245 + }, + { + "epoch": 0.3152430219167565, + "grad_norm": 5.644993620118886, + "learning_rate": 8.014050258088527e-06, + "loss": 17.1598, + "step": 17246 + }, + { + "epoch": 0.31526130111320305, + "grad_norm": 6.549006480041142, + "learning_rate": 8.013814069720733e-06, + "loss": 17.5526, + "step": 17247 + }, + { + "epoch": 0.3152795803096496, + "grad_norm": 6.563431303899967, + "learning_rate": 8.013577870789783e-06, + "loss": 17.3868, + "step": 17248 + }, + { + "epoch": 0.3152978595060961, + "grad_norm": 5.361171220750779, + "learning_rate": 8.013341661296503e-06, + "loss": 17.0847, + "step": 17249 + }, + { + "epoch": 0.31531613870254266, + "grad_norm": 8.254859259003663, + "learning_rate": 8.013105441241722e-06, + "loss": 18.2838, + "step": 17250 + }, + { + "epoch": 0.31533441789898914, + "grad_norm": 7.036847900780292, + "learning_rate": 8.01286921062627e-06, + "loss": 17.8468, + "step": 17251 + }, + { + "epoch": 0.3153526970954357, + "grad_norm": 9.258121444683413, + "learning_rate": 8.012632969450971e-06, + "loss": 18.5679, + "step": 17252 + }, + { + "epoch": 0.3153709762918822, + "grad_norm": 7.0027051711961015, + "learning_rate": 8.012396717716655e-06, + "loss": 17.6236, + "step": 17253 + }, + { + "epoch": 0.31538925548832875, + "grad_norm": 7.772629492817567, + "learning_rate": 8.01216045542415e-06, + "loss": 18.0685, + "step": 17254 + }, + { + "epoch": 0.3154075346847753, + "grad_norm": 6.821788964207853, + "learning_rate": 8.011924182574285e-06, + "loss": 17.4284, + "step": 17255 + }, + { + "epoch": 0.31542581388122176, + "grad_norm": 8.243962954711291, + "learning_rate": 8.011687899167885e-06, + "loss": 17.9809, + "step": 17256 + }, + { + "epoch": 0.3154440930776683, + "grad_norm": 5.941002081332321, + "learning_rate": 8.011451605205782e-06, + "loss": 17.2933, + "step": 17257 + }, + { + "epoch": 0.31546237227411483, + "grad_norm": 5.7085955853111665, + "learning_rate": 8.011215300688803e-06, + "loss": 17.4081, + "step": 17258 + }, + { + "epoch": 0.31548065147056137, + "grad_norm": 7.489969880342292, + "learning_rate": 8.010978985617775e-06, + "loss": 18.0581, + "step": 17259 + }, + { + "epoch": 0.3154989306670079, + "grad_norm": 6.170305090550007, + "learning_rate": 8.010742659993525e-06, + "loss": 17.3531, + "step": 17260 + }, + { + "epoch": 0.3155172098634544, + "grad_norm": 5.761930461774254, + "learning_rate": 8.010506323816886e-06, + "loss": 17.1373, + "step": 17261 + }, + { + "epoch": 0.3155354890599009, + "grad_norm": 7.579984190464123, + "learning_rate": 8.010269977088684e-06, + "loss": 18.2122, + "step": 17262 + }, + { + "epoch": 0.31555376825634746, + "grad_norm": 8.04920914814461, + "learning_rate": 8.010033619809744e-06, + "loss": 18.2227, + "step": 17263 + }, + { + "epoch": 0.315572047452794, + "grad_norm": 6.980271621424535, + "learning_rate": 8.009797251980898e-06, + "loss": 17.4735, + "step": 17264 + }, + { + "epoch": 0.3155903266492405, + "grad_norm": 7.203646358916323, + "learning_rate": 8.009560873602976e-06, + "loss": 17.9982, + "step": 17265 + }, + { + "epoch": 0.315608605845687, + "grad_norm": 7.014703836732413, + "learning_rate": 8.009324484676801e-06, + "loss": 17.7615, + "step": 17266 + }, + { + "epoch": 0.31562688504213354, + "grad_norm": 8.962230500198741, + "learning_rate": 8.009088085203207e-06, + "loss": 18.1978, + "step": 17267 + }, + { + "epoch": 0.3156451642385801, + "grad_norm": 6.798689249987276, + "learning_rate": 8.00885167518302e-06, + "loss": 17.7435, + "step": 17268 + }, + { + "epoch": 0.3156634434350266, + "grad_norm": 6.71692463745285, + "learning_rate": 8.00861525461707e-06, + "loss": 17.6106, + "step": 17269 + }, + { + "epoch": 0.3156817226314731, + "grad_norm": 6.0212322090815045, + "learning_rate": 8.008378823506183e-06, + "loss": 17.4011, + "step": 17270 + }, + { + "epoch": 0.31570000182791963, + "grad_norm": 6.323795571367101, + "learning_rate": 8.008142381851191e-06, + "loss": 17.339, + "step": 17271 + }, + { + "epoch": 0.31571828102436617, + "grad_norm": 5.162207674821056, + "learning_rate": 8.007905929652919e-06, + "loss": 16.9637, + "step": 17272 + }, + { + "epoch": 0.3157365602208127, + "grad_norm": 6.40622354301321, + "learning_rate": 8.007669466912197e-06, + "loss": 17.2422, + "step": 17273 + }, + { + "epoch": 0.31575483941725924, + "grad_norm": 6.380792081899297, + "learning_rate": 8.007432993629857e-06, + "loss": 17.3429, + "step": 17274 + }, + { + "epoch": 0.3157731186137057, + "grad_norm": 7.435868585263863, + "learning_rate": 8.007196509806724e-06, + "loss": 17.7877, + "step": 17275 + }, + { + "epoch": 0.31579139781015225, + "grad_norm": 9.257972846005018, + "learning_rate": 8.00696001544363e-06, + "loss": 18.0455, + "step": 17276 + }, + { + "epoch": 0.3158096770065988, + "grad_norm": 6.444610897364533, + "learning_rate": 8.006723510541401e-06, + "loss": 17.6732, + "step": 17277 + }, + { + "epoch": 0.3158279562030453, + "grad_norm": 6.669997196515607, + "learning_rate": 8.006486995100866e-06, + "loss": 17.6559, + "step": 17278 + }, + { + "epoch": 0.31584623539949186, + "grad_norm": 5.975750319203101, + "learning_rate": 8.006250469122857e-06, + "loss": 17.3374, + "step": 17279 + }, + { + "epoch": 0.31586451459593834, + "grad_norm": 6.378632875562156, + "learning_rate": 8.0060139326082e-06, + "loss": 17.4521, + "step": 17280 + }, + { + "epoch": 0.3158827937923849, + "grad_norm": 6.1339635490609234, + "learning_rate": 8.005777385557723e-06, + "loss": 17.3795, + "step": 17281 + }, + { + "epoch": 0.3159010729888314, + "grad_norm": 7.2323692727419315, + "learning_rate": 8.005540827972259e-06, + "loss": 17.9336, + "step": 17282 + }, + { + "epoch": 0.31591935218527795, + "grad_norm": 5.936761222618097, + "learning_rate": 8.005304259852636e-06, + "loss": 17.2524, + "step": 17283 + }, + { + "epoch": 0.3159376313817245, + "grad_norm": 6.986959874057104, + "learning_rate": 8.00506768119968e-06, + "loss": 17.6888, + "step": 17284 + }, + { + "epoch": 0.31595591057817096, + "grad_norm": 6.618194378270937, + "learning_rate": 8.004831092014224e-06, + "loss": 17.7176, + "step": 17285 + }, + { + "epoch": 0.3159741897746175, + "grad_norm": 6.618019920452158, + "learning_rate": 8.004594492297095e-06, + "loss": 17.5945, + "step": 17286 + }, + { + "epoch": 0.31599246897106403, + "grad_norm": 6.229218484130583, + "learning_rate": 8.004357882049125e-06, + "loss": 17.3467, + "step": 17287 + }, + { + "epoch": 0.31601074816751057, + "grad_norm": 6.583558412131839, + "learning_rate": 8.004121261271139e-06, + "loss": 17.7141, + "step": 17288 + }, + { + "epoch": 0.3160290273639571, + "grad_norm": 5.570932105293516, + "learning_rate": 8.00388462996397e-06, + "loss": 17.0904, + "step": 17289 + }, + { + "epoch": 0.3160473065604036, + "grad_norm": 6.773103782373799, + "learning_rate": 8.003647988128447e-06, + "loss": 17.823, + "step": 17290 + }, + { + "epoch": 0.3160655857568501, + "grad_norm": 7.140574485216784, + "learning_rate": 8.003411335765397e-06, + "loss": 17.8718, + "step": 17291 + }, + { + "epoch": 0.31608386495329666, + "grad_norm": 7.65391096461442, + "learning_rate": 8.00317467287565e-06, + "loss": 18.1112, + "step": 17292 + }, + { + "epoch": 0.3161021441497432, + "grad_norm": 6.4821818518534196, + "learning_rate": 8.002937999460038e-06, + "loss": 17.4704, + "step": 17293 + }, + { + "epoch": 0.3161204233461897, + "grad_norm": 4.8915957927763465, + "learning_rate": 8.002701315519388e-06, + "loss": 16.8415, + "step": 17294 + }, + { + "epoch": 0.3161387025426362, + "grad_norm": 6.98314453855389, + "learning_rate": 8.002464621054531e-06, + "loss": 17.8192, + "step": 17295 + }, + { + "epoch": 0.31615698173908274, + "grad_norm": 5.695664433405737, + "learning_rate": 8.002227916066297e-06, + "loss": 17.4068, + "step": 17296 + }, + { + "epoch": 0.3161752609355293, + "grad_norm": 7.0937901152485265, + "learning_rate": 8.001991200555512e-06, + "loss": 18.2263, + "step": 17297 + }, + { + "epoch": 0.3161935401319758, + "grad_norm": 6.613123201057216, + "learning_rate": 8.00175447452301e-06, + "loss": 17.7912, + "step": 17298 + }, + { + "epoch": 0.31621181932842235, + "grad_norm": 5.779613572039592, + "learning_rate": 8.00151773796962e-06, + "loss": 17.1359, + "step": 17299 + }, + { + "epoch": 0.31623009852486883, + "grad_norm": 5.56224046592532, + "learning_rate": 8.00128099089617e-06, + "loss": 17.1956, + "step": 17300 + }, + { + "epoch": 0.31624837772131537, + "grad_norm": 5.978160083714933, + "learning_rate": 8.00104423330349e-06, + "loss": 17.1161, + "step": 17301 + }, + { + "epoch": 0.3162666569177619, + "grad_norm": 7.990643806813121, + "learning_rate": 8.000807465192411e-06, + "loss": 18.4523, + "step": 17302 + }, + { + "epoch": 0.31628493611420844, + "grad_norm": 5.567845383398115, + "learning_rate": 8.00057068656376e-06, + "loss": 17.1607, + "step": 17303 + }, + { + "epoch": 0.3163032153106549, + "grad_norm": 5.922321863482638, + "learning_rate": 8.000333897418372e-06, + "loss": 17.5144, + "step": 17304 + }, + { + "epoch": 0.31632149450710145, + "grad_norm": 5.705829061206794, + "learning_rate": 8.000097097757072e-06, + "loss": 17.2625, + "step": 17305 + }, + { + "epoch": 0.316339773703548, + "grad_norm": 6.227613307187678, + "learning_rate": 7.999860287580694e-06, + "loss": 17.6444, + "step": 17306 + }, + { + "epoch": 0.3163580528999945, + "grad_norm": 7.018679087373992, + "learning_rate": 7.999623466890065e-06, + "loss": 17.5985, + "step": 17307 + }, + { + "epoch": 0.31637633209644106, + "grad_norm": 6.960499383597473, + "learning_rate": 7.999386635686016e-06, + "loss": 17.6521, + "step": 17308 + }, + { + "epoch": 0.31639461129288754, + "grad_norm": 6.187277667793289, + "learning_rate": 7.999149793969377e-06, + "loss": 17.3097, + "step": 17309 + }, + { + "epoch": 0.3164128904893341, + "grad_norm": 6.838715198564892, + "learning_rate": 7.99891294174098e-06, + "loss": 17.6548, + "step": 17310 + }, + { + "epoch": 0.3164311696857806, + "grad_norm": 7.189376662082501, + "learning_rate": 7.998676079001651e-06, + "loss": 17.9041, + "step": 17311 + }, + { + "epoch": 0.31644944888222715, + "grad_norm": 6.9285400299559585, + "learning_rate": 7.998439205752222e-06, + "loss": 17.7576, + "step": 17312 + }, + { + "epoch": 0.3164677280786737, + "grad_norm": 6.837096459838117, + "learning_rate": 7.998202321993527e-06, + "loss": 17.5272, + "step": 17313 + }, + { + "epoch": 0.31648600727512016, + "grad_norm": 6.5943505560026425, + "learning_rate": 7.997965427726391e-06, + "loss": 17.3491, + "step": 17314 + }, + { + "epoch": 0.3165042864715667, + "grad_norm": 6.8997158881354235, + "learning_rate": 7.997728522951646e-06, + "loss": 18.0895, + "step": 17315 + }, + { + "epoch": 0.31652256566801323, + "grad_norm": 5.475364935696605, + "learning_rate": 7.997491607670123e-06, + "loss": 17.2759, + "step": 17316 + }, + { + "epoch": 0.31654084486445977, + "grad_norm": 6.76389765174802, + "learning_rate": 7.997254681882652e-06, + "loss": 17.7249, + "step": 17317 + }, + { + "epoch": 0.3165591240609063, + "grad_norm": 6.563208550847732, + "learning_rate": 7.997017745590064e-06, + "loss": 17.7053, + "step": 17318 + }, + { + "epoch": 0.3165774032573528, + "grad_norm": 6.285419678293115, + "learning_rate": 7.996780798793187e-06, + "loss": 17.2764, + "step": 17319 + }, + { + "epoch": 0.3165956824537993, + "grad_norm": 6.2225713999306125, + "learning_rate": 7.996543841492857e-06, + "loss": 17.4828, + "step": 17320 + }, + { + "epoch": 0.31661396165024586, + "grad_norm": 6.265834466442455, + "learning_rate": 7.996306873689899e-06, + "loss": 17.2636, + "step": 17321 + }, + { + "epoch": 0.3166322408466924, + "grad_norm": 6.726600881578628, + "learning_rate": 7.996069895385143e-06, + "loss": 17.4049, + "step": 17322 + }, + { + "epoch": 0.3166505200431389, + "grad_norm": 6.485672045106972, + "learning_rate": 7.995832906579426e-06, + "loss": 17.3534, + "step": 17323 + }, + { + "epoch": 0.3166687992395854, + "grad_norm": 8.904328244583729, + "learning_rate": 7.995595907273573e-06, + "loss": 18.7114, + "step": 17324 + }, + { + "epoch": 0.31668707843603194, + "grad_norm": 6.148101540336492, + "learning_rate": 7.995358897468414e-06, + "loss": 17.4741, + "step": 17325 + }, + { + "epoch": 0.3167053576324785, + "grad_norm": 6.932768245039868, + "learning_rate": 7.995121877164784e-06, + "loss": 17.4276, + "step": 17326 + }, + { + "epoch": 0.316723636828925, + "grad_norm": 7.669086120789412, + "learning_rate": 7.994884846363513e-06, + "loss": 17.7421, + "step": 17327 + }, + { + "epoch": 0.31674191602537155, + "grad_norm": 5.84113389394122, + "learning_rate": 7.99464780506543e-06, + "loss": 17.3566, + "step": 17328 + }, + { + "epoch": 0.31676019522181803, + "grad_norm": 6.880726002467942, + "learning_rate": 7.994410753271365e-06, + "loss": 17.6299, + "step": 17329 + }, + { + "epoch": 0.31677847441826457, + "grad_norm": 6.734761939125572, + "learning_rate": 7.99417369098215e-06, + "loss": 17.988, + "step": 17330 + }, + { + "epoch": 0.3167967536147111, + "grad_norm": 6.721574535046607, + "learning_rate": 7.993936618198616e-06, + "loss": 17.5829, + "step": 17331 + }, + { + "epoch": 0.31681503281115764, + "grad_norm": 6.424721329456516, + "learning_rate": 7.993699534921594e-06, + "loss": 17.7047, + "step": 17332 + }, + { + "epoch": 0.3168333120076042, + "grad_norm": 7.5887974576988615, + "learning_rate": 7.993462441151918e-06, + "loss": 17.8649, + "step": 17333 + }, + { + "epoch": 0.31685159120405065, + "grad_norm": 5.820842922740516, + "learning_rate": 7.993225336890414e-06, + "loss": 17.3976, + "step": 17334 + }, + { + "epoch": 0.3168698704004972, + "grad_norm": 6.607944164689301, + "learning_rate": 7.992988222137914e-06, + "loss": 17.5775, + "step": 17335 + }, + { + "epoch": 0.3168881495969437, + "grad_norm": 6.239762600115004, + "learning_rate": 7.99275109689525e-06, + "loss": 17.5336, + "step": 17336 + }, + { + "epoch": 0.31690642879339026, + "grad_norm": 6.274828033999698, + "learning_rate": 7.992513961163253e-06, + "loss": 17.3763, + "step": 17337 + }, + { + "epoch": 0.31692470798983674, + "grad_norm": 7.08519694139076, + "learning_rate": 7.992276814942756e-06, + "loss": 17.5813, + "step": 17338 + }, + { + "epoch": 0.3169429871862833, + "grad_norm": 7.3140485087203375, + "learning_rate": 7.992039658234586e-06, + "loss": 17.8942, + "step": 17339 + }, + { + "epoch": 0.3169612663827298, + "grad_norm": 7.207227940097351, + "learning_rate": 7.99180249103958e-06, + "loss": 18.0972, + "step": 17340 + }, + { + "epoch": 0.31697954557917635, + "grad_norm": 6.858963748711504, + "learning_rate": 7.991565313358562e-06, + "loss": 17.7875, + "step": 17341 + }, + { + "epoch": 0.3169978247756229, + "grad_norm": 6.556687275015834, + "learning_rate": 7.991328125192368e-06, + "loss": 17.3149, + "step": 17342 + }, + { + "epoch": 0.31701610397206936, + "grad_norm": 6.490040996250598, + "learning_rate": 7.99109092654183e-06, + "loss": 18.3425, + "step": 17343 + }, + { + "epoch": 0.3170343831685159, + "grad_norm": 5.979798621442256, + "learning_rate": 7.990853717407778e-06, + "loss": 17.4498, + "step": 17344 + }, + { + "epoch": 0.31705266236496243, + "grad_norm": 7.520527607230535, + "learning_rate": 7.990616497791043e-06, + "loss": 18.1362, + "step": 17345 + }, + { + "epoch": 0.31707094156140897, + "grad_norm": 8.00210194451691, + "learning_rate": 7.990379267692455e-06, + "loss": 17.9182, + "step": 17346 + }, + { + "epoch": 0.3170892207578555, + "grad_norm": 6.0069612982067415, + "learning_rate": 7.990142027112849e-06, + "loss": 17.13, + "step": 17347 + }, + { + "epoch": 0.317107499954302, + "grad_norm": 7.431075068686617, + "learning_rate": 7.989904776053054e-06, + "loss": 17.8591, + "step": 17348 + }, + { + "epoch": 0.3171257791507485, + "grad_norm": 7.194017830536698, + "learning_rate": 7.989667514513903e-06, + "loss": 17.9881, + "step": 17349 + }, + { + "epoch": 0.31714405834719506, + "grad_norm": 7.2560760693483894, + "learning_rate": 7.989430242496226e-06, + "loss": 18.1042, + "step": 17350 + }, + { + "epoch": 0.3171623375436416, + "grad_norm": 6.127488058005536, + "learning_rate": 7.989192960000855e-06, + "loss": 17.3964, + "step": 17351 + }, + { + "epoch": 0.31718061674008813, + "grad_norm": 6.820769982935315, + "learning_rate": 7.988955667028622e-06, + "loss": 17.7718, + "step": 17352 + }, + { + "epoch": 0.3171988959365346, + "grad_norm": 5.801515730191406, + "learning_rate": 7.988718363580359e-06, + "loss": 17.4517, + "step": 17353 + }, + { + "epoch": 0.31721717513298114, + "grad_norm": 6.102441621651423, + "learning_rate": 7.9884810496569e-06, + "loss": 17.2964, + "step": 17354 + }, + { + "epoch": 0.3172354543294277, + "grad_norm": 5.961007345755837, + "learning_rate": 7.988243725259071e-06, + "loss": 17.3146, + "step": 17355 + }, + { + "epoch": 0.3172537335258742, + "grad_norm": 6.451354412838017, + "learning_rate": 7.988006390387707e-06, + "loss": 17.4721, + "step": 17356 + }, + { + "epoch": 0.31727201272232075, + "grad_norm": 6.165619916006797, + "learning_rate": 7.98776904504364e-06, + "loss": 17.3525, + "step": 17357 + }, + { + "epoch": 0.31729029191876723, + "grad_norm": 6.448672229159587, + "learning_rate": 7.987531689227705e-06, + "loss": 17.5783, + "step": 17358 + }, + { + "epoch": 0.31730857111521377, + "grad_norm": 6.547768556287841, + "learning_rate": 7.987294322940728e-06, + "loss": 17.629, + "step": 17359 + }, + { + "epoch": 0.3173268503116603, + "grad_norm": 6.2012472626810355, + "learning_rate": 7.987056946183544e-06, + "loss": 17.5342, + "step": 17360 + }, + { + "epoch": 0.31734512950810684, + "grad_norm": 7.712075728461025, + "learning_rate": 7.986819558956984e-06, + "loss": 18.4234, + "step": 17361 + }, + { + "epoch": 0.3173634087045534, + "grad_norm": 7.181988503389922, + "learning_rate": 7.986582161261881e-06, + "loss": 17.7457, + "step": 17362 + }, + { + "epoch": 0.31738168790099985, + "grad_norm": 5.440387236226897, + "learning_rate": 7.986344753099067e-06, + "loss": 17.1153, + "step": 17363 + }, + { + "epoch": 0.3173999670974464, + "grad_norm": 5.29858840328652, + "learning_rate": 7.986107334469374e-06, + "loss": 17.1398, + "step": 17364 + }, + { + "epoch": 0.3174182462938929, + "grad_norm": 6.48987557252202, + "learning_rate": 7.985869905373635e-06, + "loss": 17.6772, + "step": 17365 + }, + { + "epoch": 0.31743652549033946, + "grad_norm": 7.141359406982921, + "learning_rate": 7.985632465812679e-06, + "loss": 17.8821, + "step": 17366 + }, + { + "epoch": 0.317454804686786, + "grad_norm": 6.660119386170312, + "learning_rate": 7.98539501578734e-06, + "loss": 17.6982, + "step": 17367 + }, + { + "epoch": 0.3174730838832325, + "grad_norm": 5.7470754973462554, + "learning_rate": 7.985157555298453e-06, + "loss": 17.2834, + "step": 17368 + }, + { + "epoch": 0.317491363079679, + "grad_norm": 5.582415356984512, + "learning_rate": 7.984920084346845e-06, + "loss": 17.2948, + "step": 17369 + }, + { + "epoch": 0.31750964227612555, + "grad_norm": 7.13506128815316, + "learning_rate": 7.984682602933353e-06, + "loss": 17.8519, + "step": 17370 + }, + { + "epoch": 0.3175279214725721, + "grad_norm": 7.337779130993448, + "learning_rate": 7.984445111058807e-06, + "loss": 18.0208, + "step": 17371 + }, + { + "epoch": 0.31754620066901856, + "grad_norm": 5.21294512449078, + "learning_rate": 7.98420760872404e-06, + "loss": 17.0952, + "step": 17372 + }, + { + "epoch": 0.3175644798654651, + "grad_norm": 6.4250015353459276, + "learning_rate": 7.983970095929884e-06, + "loss": 17.4149, + "step": 17373 + }, + { + "epoch": 0.31758275906191163, + "grad_norm": 5.239606804881995, + "learning_rate": 7.983732572677172e-06, + "loss": 16.8856, + "step": 17374 + }, + { + "epoch": 0.31760103825835817, + "grad_norm": 7.069788966658681, + "learning_rate": 7.983495038966735e-06, + "loss": 17.5749, + "step": 17375 + }, + { + "epoch": 0.3176193174548047, + "grad_norm": 6.574379653563129, + "learning_rate": 7.98325749479941e-06, + "loss": 17.8001, + "step": 17376 + }, + { + "epoch": 0.3176375966512512, + "grad_norm": 6.548469582548624, + "learning_rate": 7.983019940176024e-06, + "loss": 17.4517, + "step": 17377 + }, + { + "epoch": 0.3176558758476977, + "grad_norm": 5.8811889627432805, + "learning_rate": 7.982782375097412e-06, + "loss": 17.2188, + "step": 17378 + }, + { + "epoch": 0.31767415504414426, + "grad_norm": 6.428892726108311, + "learning_rate": 7.982544799564407e-06, + "loss": 17.5082, + "step": 17379 + }, + { + "epoch": 0.3176924342405908, + "grad_norm": 9.126127628108254, + "learning_rate": 7.98230721357784e-06, + "loss": 18.6344, + "step": 17380 + }, + { + "epoch": 0.31771071343703733, + "grad_norm": 5.123403464589389, + "learning_rate": 7.982069617138545e-06, + "loss": 16.9785, + "step": 17381 + }, + { + "epoch": 0.3177289926334838, + "grad_norm": 5.754014486785964, + "learning_rate": 7.981832010247358e-06, + "loss": 17.3393, + "step": 17382 + }, + { + "epoch": 0.31774727182993034, + "grad_norm": 6.143688327756162, + "learning_rate": 7.981594392905105e-06, + "loss": 17.4204, + "step": 17383 + }, + { + "epoch": 0.3177655510263769, + "grad_norm": 5.7456749514990175, + "learning_rate": 7.981356765112624e-06, + "loss": 17.1715, + "step": 17384 + }, + { + "epoch": 0.3177838302228234, + "grad_norm": 7.125977071006484, + "learning_rate": 7.981119126870747e-06, + "loss": 17.6395, + "step": 17385 + }, + { + "epoch": 0.31780210941926995, + "grad_norm": 6.775925131844454, + "learning_rate": 7.980881478180305e-06, + "loss": 17.5778, + "step": 17386 + }, + { + "epoch": 0.31782038861571643, + "grad_norm": 5.346890123465364, + "learning_rate": 7.980643819042132e-06, + "loss": 16.9998, + "step": 17387 + }, + { + "epoch": 0.31783866781216297, + "grad_norm": 6.44156051332979, + "learning_rate": 7.980406149457062e-06, + "loss": 17.615, + "step": 17388 + }, + { + "epoch": 0.3178569470086095, + "grad_norm": 6.542473041592614, + "learning_rate": 7.980168469425926e-06, + "loss": 17.9314, + "step": 17389 + }, + { + "epoch": 0.31787522620505604, + "grad_norm": 7.14022172765695, + "learning_rate": 7.979930778949559e-06, + "loss": 17.8826, + "step": 17390 + }, + { + "epoch": 0.3178935054015026, + "grad_norm": 6.3171456407064035, + "learning_rate": 7.979693078028792e-06, + "loss": 17.7095, + "step": 17391 + }, + { + "epoch": 0.31791178459794905, + "grad_norm": 7.237654991147253, + "learning_rate": 7.979455366664461e-06, + "loss": 17.7842, + "step": 17392 + }, + { + "epoch": 0.3179300637943956, + "grad_norm": 6.316166156805975, + "learning_rate": 7.979217644857395e-06, + "loss": 17.4043, + "step": 17393 + }, + { + "epoch": 0.3179483429908421, + "grad_norm": 6.222195360899406, + "learning_rate": 7.978979912608432e-06, + "loss": 17.3792, + "step": 17394 + }, + { + "epoch": 0.31796662218728866, + "grad_norm": 7.086724627567828, + "learning_rate": 7.978742169918403e-06, + "loss": 17.4918, + "step": 17395 + }, + { + "epoch": 0.3179849013837352, + "grad_norm": 6.821847337797812, + "learning_rate": 7.97850441678814e-06, + "loss": 17.6306, + "step": 17396 + }, + { + "epoch": 0.3180031805801817, + "grad_norm": 6.894452783898032, + "learning_rate": 7.978266653218478e-06, + "loss": 17.6678, + "step": 17397 + }, + { + "epoch": 0.3180214597766282, + "grad_norm": 7.5576865526535055, + "learning_rate": 7.978028879210249e-06, + "loss": 17.8944, + "step": 17398 + }, + { + "epoch": 0.31803973897307475, + "grad_norm": 6.537276779955941, + "learning_rate": 7.977791094764288e-06, + "loss": 17.436, + "step": 17399 + }, + { + "epoch": 0.3180580181695213, + "grad_norm": 5.298225272967506, + "learning_rate": 7.977553299881428e-06, + "loss": 17.0772, + "step": 17400 + }, + { + "epoch": 0.3180762973659678, + "grad_norm": 7.239544698165914, + "learning_rate": 7.9773154945625e-06, + "loss": 17.998, + "step": 17401 + }, + { + "epoch": 0.3180945765624143, + "grad_norm": 6.289866225021799, + "learning_rate": 7.977077678808342e-06, + "loss": 17.462, + "step": 17402 + }, + { + "epoch": 0.31811285575886084, + "grad_norm": 6.5872078956297475, + "learning_rate": 7.976839852619785e-06, + "loss": 17.5857, + "step": 17403 + }, + { + "epoch": 0.31813113495530737, + "grad_norm": 7.102906806243845, + "learning_rate": 7.976602015997662e-06, + "loss": 17.8424, + "step": 17404 + }, + { + "epoch": 0.3181494141517539, + "grad_norm": 6.1827037640816975, + "learning_rate": 7.976364168942807e-06, + "loss": 17.3523, + "step": 17405 + }, + { + "epoch": 0.3181676933482004, + "grad_norm": 7.042763867266987, + "learning_rate": 7.976126311456054e-06, + "loss": 17.7008, + "step": 17406 + }, + { + "epoch": 0.3181859725446469, + "grad_norm": 6.073728287871449, + "learning_rate": 7.975888443538235e-06, + "loss": 17.3641, + "step": 17407 + }, + { + "epoch": 0.31820425174109346, + "grad_norm": 5.714845719217169, + "learning_rate": 7.975650565190187e-06, + "loss": 17.4318, + "step": 17408 + }, + { + "epoch": 0.31822253093754, + "grad_norm": 7.3403681460001025, + "learning_rate": 7.975412676412742e-06, + "loss": 17.9309, + "step": 17409 + }, + { + "epoch": 0.31824081013398653, + "grad_norm": 6.26588164655034, + "learning_rate": 7.975174777206733e-06, + "loss": 17.543, + "step": 17410 + }, + { + "epoch": 0.318259089330433, + "grad_norm": 5.042416906249586, + "learning_rate": 7.974936867572995e-06, + "loss": 16.921, + "step": 17411 + }, + { + "epoch": 0.31827736852687954, + "grad_norm": 5.8848114121785455, + "learning_rate": 7.974698947512362e-06, + "loss": 17.5731, + "step": 17412 + }, + { + "epoch": 0.3182956477233261, + "grad_norm": 7.517702316834048, + "learning_rate": 7.974461017025667e-06, + "loss": 17.9577, + "step": 17413 + }, + { + "epoch": 0.3183139269197726, + "grad_norm": 5.918598179588317, + "learning_rate": 7.974223076113744e-06, + "loss": 17.278, + "step": 17414 + }, + { + "epoch": 0.31833220611621915, + "grad_norm": 5.42916953972551, + "learning_rate": 7.973985124777427e-06, + "loss": 17.1337, + "step": 17415 + }, + { + "epoch": 0.31835048531266563, + "grad_norm": 10.107034878062095, + "learning_rate": 7.973747163017552e-06, + "loss": 18.4012, + "step": 17416 + }, + { + "epoch": 0.31836876450911217, + "grad_norm": 6.690150743389067, + "learning_rate": 7.97350919083495e-06, + "loss": 17.5545, + "step": 17417 + }, + { + "epoch": 0.3183870437055587, + "grad_norm": 6.481417714103426, + "learning_rate": 7.973271208230454e-06, + "loss": 17.5627, + "step": 17418 + }, + { + "epoch": 0.31840532290200524, + "grad_norm": 6.523033117753645, + "learning_rate": 7.973033215204902e-06, + "loss": 17.5143, + "step": 17419 + }, + { + "epoch": 0.3184236020984518, + "grad_norm": 7.0409480642755655, + "learning_rate": 7.972795211759129e-06, + "loss": 17.7209, + "step": 17420 + }, + { + "epoch": 0.31844188129489825, + "grad_norm": 6.998002046816529, + "learning_rate": 7.972557197893964e-06, + "loss": 17.6533, + "step": 17421 + }, + { + "epoch": 0.3184601604913448, + "grad_norm": 7.4785771956664435, + "learning_rate": 7.972319173610243e-06, + "loss": 18.058, + "step": 17422 + }, + { + "epoch": 0.3184784396877913, + "grad_norm": 6.994765865188953, + "learning_rate": 7.972081138908805e-06, + "loss": 17.7197, + "step": 17423 + }, + { + "epoch": 0.31849671888423786, + "grad_norm": 7.442863291258745, + "learning_rate": 7.971843093790477e-06, + "loss": 17.7491, + "step": 17424 + }, + { + "epoch": 0.3185149980806844, + "grad_norm": 6.4550573992928895, + "learning_rate": 7.971605038256098e-06, + "loss": 17.6854, + "step": 17425 + }, + { + "epoch": 0.3185332772771309, + "grad_norm": 6.770599267949571, + "learning_rate": 7.971366972306503e-06, + "loss": 17.6583, + "step": 17426 + }, + { + "epoch": 0.3185515564735774, + "grad_norm": 7.6163586371788154, + "learning_rate": 7.971128895942522e-06, + "loss": 17.9685, + "step": 17427 + }, + { + "epoch": 0.31856983567002395, + "grad_norm": 7.577717258632709, + "learning_rate": 7.970890809164992e-06, + "loss": 18.0588, + "step": 17428 + }, + { + "epoch": 0.3185881148664705, + "grad_norm": 8.542688583545747, + "learning_rate": 7.97065271197475e-06, + "loss": 18.0899, + "step": 17429 + }, + { + "epoch": 0.318606394062917, + "grad_norm": 6.05510887570353, + "learning_rate": 7.970414604372627e-06, + "loss": 17.2955, + "step": 17430 + }, + { + "epoch": 0.3186246732593635, + "grad_norm": 6.308549937419515, + "learning_rate": 7.970176486359457e-06, + "loss": 17.3666, + "step": 17431 + }, + { + "epoch": 0.31864295245581004, + "grad_norm": 6.004841029169363, + "learning_rate": 7.969938357936078e-06, + "loss": 17.2362, + "step": 17432 + }, + { + "epoch": 0.31866123165225657, + "grad_norm": 5.99162523397113, + "learning_rate": 7.969700219103323e-06, + "loss": 17.3407, + "step": 17433 + }, + { + "epoch": 0.3186795108487031, + "grad_norm": 6.420638244051939, + "learning_rate": 7.969462069862025e-06, + "loss": 17.5247, + "step": 17434 + }, + { + "epoch": 0.31869779004514964, + "grad_norm": 6.14558719914021, + "learning_rate": 7.96922391021302e-06, + "loss": 17.4104, + "step": 17435 + }, + { + "epoch": 0.3187160692415961, + "grad_norm": 7.010187718192945, + "learning_rate": 7.968985740157144e-06, + "loss": 17.8984, + "step": 17436 + }, + { + "epoch": 0.31873434843804266, + "grad_norm": 7.00973642192062, + "learning_rate": 7.968747559695232e-06, + "loss": 18.0226, + "step": 17437 + }, + { + "epoch": 0.3187526276344892, + "grad_norm": 7.942679197149888, + "learning_rate": 7.968509368828115e-06, + "loss": 17.8861, + "step": 17438 + }, + { + "epoch": 0.31877090683093573, + "grad_norm": 6.691062041059063, + "learning_rate": 7.968271167556629e-06, + "loss": 17.5306, + "step": 17439 + }, + { + "epoch": 0.3187891860273822, + "grad_norm": 7.305086467569272, + "learning_rate": 7.968032955881612e-06, + "loss": 17.8021, + "step": 17440 + }, + { + "epoch": 0.31880746522382875, + "grad_norm": 7.282716776920097, + "learning_rate": 7.967794733803899e-06, + "loss": 17.9196, + "step": 17441 + }, + { + "epoch": 0.3188257444202753, + "grad_norm": 6.124799570346934, + "learning_rate": 7.96755650132432e-06, + "loss": 17.5765, + "step": 17442 + }, + { + "epoch": 0.3188440236167218, + "grad_norm": 6.351901912431719, + "learning_rate": 7.967318258443715e-06, + "loss": 17.266, + "step": 17443 + }, + { + "epoch": 0.31886230281316835, + "grad_norm": 7.462993494413, + "learning_rate": 7.967080005162915e-06, + "loss": 17.7182, + "step": 17444 + }, + { + "epoch": 0.31888058200961483, + "grad_norm": 5.1388979891743425, + "learning_rate": 7.966841741482757e-06, + "loss": 17.0286, + "step": 17445 + }, + { + "epoch": 0.31889886120606137, + "grad_norm": 7.108736057028803, + "learning_rate": 7.966603467404079e-06, + "loss": 17.679, + "step": 17446 + }, + { + "epoch": 0.3189171404025079, + "grad_norm": 6.703201495150327, + "learning_rate": 7.966365182927712e-06, + "loss": 17.7453, + "step": 17447 + }, + { + "epoch": 0.31893541959895444, + "grad_norm": 5.855150371783629, + "learning_rate": 7.966126888054491e-06, + "loss": 17.398, + "step": 17448 + }, + { + "epoch": 0.318953698795401, + "grad_norm": 6.85273940483492, + "learning_rate": 7.965888582785254e-06, + "loss": 17.8339, + "step": 17449 + }, + { + "epoch": 0.31897197799184746, + "grad_norm": 6.410416172121891, + "learning_rate": 7.965650267120834e-06, + "loss": 17.4644, + "step": 17450 + }, + { + "epoch": 0.318990257188294, + "grad_norm": 7.057073891279669, + "learning_rate": 7.96541194106207e-06, + "loss": 17.5452, + "step": 17451 + }, + { + "epoch": 0.3190085363847405, + "grad_norm": 7.045457363541464, + "learning_rate": 7.96517360460979e-06, + "loss": 17.6048, + "step": 17452 + }, + { + "epoch": 0.31902681558118706, + "grad_norm": 6.154527110652669, + "learning_rate": 7.964935257764836e-06, + "loss": 17.3925, + "step": 17453 + }, + { + "epoch": 0.3190450947776336, + "grad_norm": 7.464159396080561, + "learning_rate": 7.964696900528042e-06, + "loss": 18.3016, + "step": 17454 + }, + { + "epoch": 0.3190633739740801, + "grad_norm": 5.749208152804058, + "learning_rate": 7.964458532900242e-06, + "loss": 17.3589, + "step": 17455 + }, + { + "epoch": 0.3190816531705266, + "grad_norm": 8.00658019945167, + "learning_rate": 7.96422015488227e-06, + "loss": 18.218, + "step": 17456 + }, + { + "epoch": 0.31909993236697315, + "grad_norm": 8.344669067885857, + "learning_rate": 7.963981766474966e-06, + "loss": 18.2281, + "step": 17457 + }, + { + "epoch": 0.3191182115634197, + "grad_norm": 7.415546398600031, + "learning_rate": 7.963743367679163e-06, + "loss": 17.9363, + "step": 17458 + }, + { + "epoch": 0.3191364907598662, + "grad_norm": 7.5905559663552005, + "learning_rate": 7.963504958495695e-06, + "loss": 17.9948, + "step": 17459 + }, + { + "epoch": 0.3191547699563127, + "grad_norm": 7.352105628538097, + "learning_rate": 7.963266538925401e-06, + "loss": 18.1612, + "step": 17460 + }, + { + "epoch": 0.31917304915275924, + "grad_norm": 6.36190368461596, + "learning_rate": 7.963028108969115e-06, + "loss": 17.6291, + "step": 17461 + }, + { + "epoch": 0.31919132834920577, + "grad_norm": 6.576523290862874, + "learning_rate": 7.962789668627672e-06, + "loss": 17.451, + "step": 17462 + }, + { + "epoch": 0.3192096075456523, + "grad_norm": 8.733940482155006, + "learning_rate": 7.962551217901909e-06, + "loss": 17.718, + "step": 17463 + }, + { + "epoch": 0.31922788674209884, + "grad_norm": 7.95138733543129, + "learning_rate": 7.962312756792659e-06, + "loss": 18.3766, + "step": 17464 + }, + { + "epoch": 0.3192461659385453, + "grad_norm": 7.864199906412713, + "learning_rate": 7.962074285300763e-06, + "loss": 18.3167, + "step": 17465 + }, + { + "epoch": 0.31926444513499186, + "grad_norm": 6.142260208460528, + "learning_rate": 7.96183580342705e-06, + "loss": 17.4108, + "step": 17466 + }, + { + "epoch": 0.3192827243314384, + "grad_norm": 7.183732960893861, + "learning_rate": 7.961597311172361e-06, + "loss": 17.479, + "step": 17467 + }, + { + "epoch": 0.31930100352788493, + "grad_norm": 7.74294349359482, + "learning_rate": 7.96135880853753e-06, + "loss": 18.2792, + "step": 17468 + }, + { + "epoch": 0.31931928272433147, + "grad_norm": 6.047273233634122, + "learning_rate": 7.961120295523397e-06, + "loss": 17.4092, + "step": 17469 + }, + { + "epoch": 0.31933756192077795, + "grad_norm": 4.820243801648722, + "learning_rate": 7.960881772130791e-06, + "loss": 16.9432, + "step": 17470 + }, + { + "epoch": 0.3193558411172245, + "grad_norm": 7.203332010453183, + "learning_rate": 7.960643238360552e-06, + "loss": 17.6185, + "step": 17471 + }, + { + "epoch": 0.319374120313671, + "grad_norm": 5.7546710407216874, + "learning_rate": 7.960404694213514e-06, + "loss": 17.4795, + "step": 17472 + }, + { + "epoch": 0.31939239951011755, + "grad_norm": 6.916722004121082, + "learning_rate": 7.960166139690516e-06, + "loss": 17.6398, + "step": 17473 + }, + { + "epoch": 0.31941067870656403, + "grad_norm": 5.269629728558581, + "learning_rate": 7.959927574792393e-06, + "loss": 17.0616, + "step": 17474 + }, + { + "epoch": 0.31942895790301057, + "grad_norm": 7.214591716120327, + "learning_rate": 7.959688999519979e-06, + "loss": 17.8556, + "step": 17475 + }, + { + "epoch": 0.3194472370994571, + "grad_norm": 5.938926725577914, + "learning_rate": 7.959450413874112e-06, + "loss": 17.4313, + "step": 17476 + }, + { + "epoch": 0.31946551629590364, + "grad_norm": 6.393551342787462, + "learning_rate": 7.95921181785563e-06, + "loss": 17.747, + "step": 17477 + }, + { + "epoch": 0.3194837954923502, + "grad_norm": 6.970582896186193, + "learning_rate": 7.958973211465366e-06, + "loss": 17.4394, + "step": 17478 + }, + { + "epoch": 0.31950207468879666, + "grad_norm": 6.540716048527766, + "learning_rate": 7.958734594704158e-06, + "loss": 17.6077, + "step": 17479 + }, + { + "epoch": 0.3195203538852432, + "grad_norm": 6.393892115170374, + "learning_rate": 7.958495967572842e-06, + "loss": 17.7184, + "step": 17480 + }, + { + "epoch": 0.3195386330816897, + "grad_norm": 6.378799462589449, + "learning_rate": 7.958257330072255e-06, + "loss": 17.5559, + "step": 17481 + }, + { + "epoch": 0.31955691227813626, + "grad_norm": 6.667832837994266, + "learning_rate": 7.95801868220323e-06, + "loss": 17.6016, + "step": 17482 + }, + { + "epoch": 0.3195751914745828, + "grad_norm": 6.471674145106354, + "learning_rate": 7.95778002396661e-06, + "loss": 17.5118, + "step": 17483 + }, + { + "epoch": 0.3195934706710293, + "grad_norm": 6.955999185552674, + "learning_rate": 7.957541355363225e-06, + "loss": 17.8465, + "step": 17484 + }, + { + "epoch": 0.3196117498674758, + "grad_norm": 8.602094459095065, + "learning_rate": 7.957302676393916e-06, + "loss": 18.5077, + "step": 17485 + }, + { + "epoch": 0.31963002906392235, + "grad_norm": 7.357329600585394, + "learning_rate": 7.957063987059517e-06, + "loss": 17.6965, + "step": 17486 + }, + { + "epoch": 0.3196483082603689, + "grad_norm": 6.773284659372345, + "learning_rate": 7.956825287360864e-06, + "loss": 17.6129, + "step": 17487 + }, + { + "epoch": 0.3196665874568154, + "grad_norm": 7.487286498452414, + "learning_rate": 7.956586577298798e-06, + "loss": 18.1413, + "step": 17488 + }, + { + "epoch": 0.3196848666532619, + "grad_norm": 7.478385783668679, + "learning_rate": 7.95634785687415e-06, + "loss": 17.8402, + "step": 17489 + }, + { + "epoch": 0.31970314584970844, + "grad_norm": 6.650266303301079, + "learning_rate": 7.956109126087759e-06, + "loss": 17.7921, + "step": 17490 + }, + { + "epoch": 0.319721425046155, + "grad_norm": 8.307400356261565, + "learning_rate": 7.955870384940463e-06, + "loss": 18.2952, + "step": 17491 + }, + { + "epoch": 0.3197397042426015, + "grad_norm": 5.796790357817366, + "learning_rate": 7.955631633433099e-06, + "loss": 17.3616, + "step": 17492 + }, + { + "epoch": 0.31975798343904804, + "grad_norm": 7.4729494518302815, + "learning_rate": 7.955392871566501e-06, + "loss": 18.1274, + "step": 17493 + }, + { + "epoch": 0.3197762626354945, + "grad_norm": 7.555378140360098, + "learning_rate": 7.955154099341509e-06, + "loss": 17.7562, + "step": 17494 + }, + { + "epoch": 0.31979454183194106, + "grad_norm": 5.510249169967757, + "learning_rate": 7.954915316758955e-06, + "loss": 17.403, + "step": 17495 + }, + { + "epoch": 0.3198128210283876, + "grad_norm": 7.153243771245886, + "learning_rate": 7.954676523819682e-06, + "loss": 17.3068, + "step": 17496 + }, + { + "epoch": 0.31983110022483413, + "grad_norm": 6.570997957736306, + "learning_rate": 7.954437720524524e-06, + "loss": 17.472, + "step": 17497 + }, + { + "epoch": 0.31984937942128067, + "grad_norm": 7.388369987731288, + "learning_rate": 7.954198906874318e-06, + "loss": 17.7279, + "step": 17498 + }, + { + "epoch": 0.31986765861772715, + "grad_norm": 5.970687406083828, + "learning_rate": 7.953960082869901e-06, + "loss": 17.2817, + "step": 17499 + }, + { + "epoch": 0.3198859378141737, + "grad_norm": 5.885900804608633, + "learning_rate": 7.95372124851211e-06, + "loss": 17.4165, + "step": 17500 + }, + { + "epoch": 0.3199042170106202, + "grad_norm": 5.55017902287351, + "learning_rate": 7.953482403801782e-06, + "loss": 17.2828, + "step": 17501 + }, + { + "epoch": 0.31992249620706675, + "grad_norm": 8.790762996276861, + "learning_rate": 7.953243548739756e-06, + "loss": 18.4818, + "step": 17502 + }, + { + "epoch": 0.3199407754035133, + "grad_norm": 6.927184560188266, + "learning_rate": 7.953004683326867e-06, + "loss": 17.8649, + "step": 17503 + }, + { + "epoch": 0.31995905459995977, + "grad_norm": 5.5768742281237955, + "learning_rate": 7.952765807563952e-06, + "loss": 17.3118, + "step": 17504 + }, + { + "epoch": 0.3199773337964063, + "grad_norm": 6.95477515024145, + "learning_rate": 7.952526921451849e-06, + "loss": 18.0654, + "step": 17505 + }, + { + "epoch": 0.31999561299285284, + "grad_norm": 6.230271429962698, + "learning_rate": 7.952288024991398e-06, + "loss": 17.417, + "step": 17506 + }, + { + "epoch": 0.3200138921892994, + "grad_norm": 6.747843091399649, + "learning_rate": 7.952049118183429e-06, + "loss": 17.6285, + "step": 17507 + }, + { + "epoch": 0.32003217138574586, + "grad_norm": 7.916312733795419, + "learning_rate": 7.951810201028787e-06, + "loss": 18.3359, + "step": 17508 + }, + { + "epoch": 0.3200504505821924, + "grad_norm": 6.5808985123475665, + "learning_rate": 7.951571273528307e-06, + "loss": 17.5606, + "step": 17509 + }, + { + "epoch": 0.3200687297786389, + "grad_norm": 6.519203520268625, + "learning_rate": 7.951332335682823e-06, + "loss": 17.5328, + "step": 17510 + }, + { + "epoch": 0.32008700897508546, + "grad_norm": 5.964109407732359, + "learning_rate": 7.951093387493179e-06, + "loss": 17.5441, + "step": 17511 + }, + { + "epoch": 0.320105288171532, + "grad_norm": 7.177621814938111, + "learning_rate": 7.950854428960207e-06, + "loss": 17.819, + "step": 17512 + }, + { + "epoch": 0.3201235673679785, + "grad_norm": 6.044403665427599, + "learning_rate": 7.950615460084745e-06, + "loss": 17.5882, + "step": 17513 + }, + { + "epoch": 0.320141846564425, + "grad_norm": 7.707237807010002, + "learning_rate": 7.950376480867633e-06, + "loss": 17.7754, + "step": 17514 + }, + { + "epoch": 0.32016012576087155, + "grad_norm": 6.508442052553958, + "learning_rate": 7.950137491309708e-06, + "loss": 17.3819, + "step": 17515 + }, + { + "epoch": 0.3201784049573181, + "grad_norm": 6.812862237743727, + "learning_rate": 7.949898491411807e-06, + "loss": 17.7334, + "step": 17516 + }, + { + "epoch": 0.3201966841537646, + "grad_norm": 5.788706744289365, + "learning_rate": 7.949659481174768e-06, + "loss": 17.2861, + "step": 17517 + }, + { + "epoch": 0.3202149633502111, + "grad_norm": 6.495981215022219, + "learning_rate": 7.949420460599425e-06, + "loss": 17.8605, + "step": 17518 + }, + { + "epoch": 0.32023324254665764, + "grad_norm": 7.151300394292834, + "learning_rate": 7.949181429686624e-06, + "loss": 18.113, + "step": 17519 + }, + { + "epoch": 0.3202515217431042, + "grad_norm": 5.538760547001581, + "learning_rate": 7.948942388437195e-06, + "loss": 17.3418, + "step": 17520 + }, + { + "epoch": 0.3202698009395507, + "grad_norm": 6.04893879279129, + "learning_rate": 7.94870333685198e-06, + "loss": 17.2436, + "step": 17521 + }, + { + "epoch": 0.32028808013599724, + "grad_norm": 6.735375023901454, + "learning_rate": 7.948464274931816e-06, + "loss": 18.0333, + "step": 17522 + }, + { + "epoch": 0.3203063593324437, + "grad_norm": 6.345674222165929, + "learning_rate": 7.94822520267754e-06, + "loss": 17.7165, + "step": 17523 + }, + { + "epoch": 0.32032463852889026, + "grad_norm": 6.817369136315513, + "learning_rate": 7.94798612008999e-06, + "loss": 17.7406, + "step": 17524 + }, + { + "epoch": 0.3203429177253368, + "grad_norm": 5.850563317955968, + "learning_rate": 7.947747027170005e-06, + "loss": 17.3355, + "step": 17525 + }, + { + "epoch": 0.32036119692178333, + "grad_norm": 6.867214062864711, + "learning_rate": 7.947507923918423e-06, + "loss": 17.3336, + "step": 17526 + }, + { + "epoch": 0.32037947611822987, + "grad_norm": 6.268486565383152, + "learning_rate": 7.94726881033608e-06, + "loss": 17.3672, + "step": 17527 + }, + { + "epoch": 0.32039775531467635, + "grad_norm": 6.667181512188055, + "learning_rate": 7.947029686423818e-06, + "loss": 17.8645, + "step": 17528 + }, + { + "epoch": 0.3204160345111229, + "grad_norm": 7.25862817451171, + "learning_rate": 7.94679055218247e-06, + "loss": 18.0934, + "step": 17529 + }, + { + "epoch": 0.3204343137075694, + "grad_norm": 6.6850707239734835, + "learning_rate": 7.94655140761288e-06, + "loss": 17.288, + "step": 17530 + }, + { + "epoch": 0.32045259290401595, + "grad_norm": 7.096240594452438, + "learning_rate": 7.94631225271588e-06, + "loss": 18.0013, + "step": 17531 + }, + { + "epoch": 0.3204708721004625, + "grad_norm": 6.55634688373912, + "learning_rate": 7.946073087492311e-06, + "loss": 17.6222, + "step": 17532 + }, + { + "epoch": 0.32048915129690897, + "grad_norm": 6.717447999377284, + "learning_rate": 7.945833911943013e-06, + "loss": 17.8644, + "step": 17533 + }, + { + "epoch": 0.3205074304933555, + "grad_norm": 6.8972307919127305, + "learning_rate": 7.945594726068823e-06, + "loss": 17.9629, + "step": 17534 + }, + { + "epoch": 0.32052570968980204, + "grad_norm": 6.469995544451611, + "learning_rate": 7.945355529870578e-06, + "loss": 17.4238, + "step": 17535 + }, + { + "epoch": 0.3205439888862486, + "grad_norm": 5.857223663272877, + "learning_rate": 7.945116323349119e-06, + "loss": 17.3366, + "step": 17536 + }, + { + "epoch": 0.3205622680826951, + "grad_norm": 7.036488914584075, + "learning_rate": 7.944877106505282e-06, + "loss": 17.7789, + "step": 17537 + }, + { + "epoch": 0.3205805472791416, + "grad_norm": 6.284735969025044, + "learning_rate": 7.944637879339907e-06, + "loss": 17.4463, + "step": 17538 + }, + { + "epoch": 0.32059882647558813, + "grad_norm": 5.871437555277933, + "learning_rate": 7.94439864185383e-06, + "loss": 17.4446, + "step": 17539 + }, + { + "epoch": 0.32061710567203466, + "grad_norm": 6.1560330642973335, + "learning_rate": 7.944159394047893e-06, + "loss": 17.5608, + "step": 17540 + }, + { + "epoch": 0.3206353848684812, + "grad_norm": 5.920449784092316, + "learning_rate": 7.943920135922932e-06, + "loss": 17.4074, + "step": 17541 + }, + { + "epoch": 0.3206536640649277, + "grad_norm": 6.87684239240657, + "learning_rate": 7.943680867479786e-06, + "loss": 17.8309, + "step": 17542 + }, + { + "epoch": 0.3206719432613742, + "grad_norm": 6.122805539581964, + "learning_rate": 7.943441588719294e-06, + "loss": 17.3828, + "step": 17543 + }, + { + "epoch": 0.32069022245782075, + "grad_norm": 8.136131342916752, + "learning_rate": 7.943202299642297e-06, + "loss": 18.3353, + "step": 17544 + }, + { + "epoch": 0.3207085016542673, + "grad_norm": 8.59593021747093, + "learning_rate": 7.942963000249628e-06, + "loss": 18.373, + "step": 17545 + }, + { + "epoch": 0.3207267808507138, + "grad_norm": 6.971687487346667, + "learning_rate": 7.94272369054213e-06, + "loss": 17.6022, + "step": 17546 + }, + { + "epoch": 0.3207450600471603, + "grad_norm": 5.31227976173666, + "learning_rate": 7.942484370520643e-06, + "loss": 17.1144, + "step": 17547 + }, + { + "epoch": 0.32076333924360684, + "grad_norm": 6.780034685837173, + "learning_rate": 7.942245040186e-06, + "loss": 17.7526, + "step": 17548 + }, + { + "epoch": 0.3207816184400534, + "grad_norm": 5.263989698779791, + "learning_rate": 7.942005699539046e-06, + "loss": 17.1315, + "step": 17549 + }, + { + "epoch": 0.3207998976364999, + "grad_norm": 7.301086640136754, + "learning_rate": 7.941766348580617e-06, + "loss": 17.9126, + "step": 17550 + }, + { + "epoch": 0.32081817683294644, + "grad_norm": 6.426251101219799, + "learning_rate": 7.941526987311552e-06, + "loss": 17.6605, + "step": 17551 + }, + { + "epoch": 0.3208364560293929, + "grad_norm": 6.434784824376673, + "learning_rate": 7.941287615732689e-06, + "loss": 17.6406, + "step": 17552 + }, + { + "epoch": 0.32085473522583946, + "grad_norm": 7.409805954713263, + "learning_rate": 7.94104823384487e-06, + "loss": 18.2674, + "step": 17553 + }, + { + "epoch": 0.320873014422286, + "grad_norm": 5.58741255493611, + "learning_rate": 7.940808841648932e-06, + "loss": 17.3585, + "step": 17554 + }, + { + "epoch": 0.32089129361873253, + "grad_norm": 7.277771121319455, + "learning_rate": 7.940569439145714e-06, + "loss": 18.0897, + "step": 17555 + }, + { + "epoch": 0.32090957281517907, + "grad_norm": 6.72962489040817, + "learning_rate": 7.940330026336055e-06, + "loss": 17.694, + "step": 17556 + }, + { + "epoch": 0.32092785201162555, + "grad_norm": 5.180622443703075, + "learning_rate": 7.940090603220793e-06, + "loss": 16.8818, + "step": 17557 + }, + { + "epoch": 0.3209461312080721, + "grad_norm": 5.899524899822945, + "learning_rate": 7.93985116980077e-06, + "loss": 17.3021, + "step": 17558 + }, + { + "epoch": 0.3209644104045186, + "grad_norm": 6.69713386327496, + "learning_rate": 7.939611726076823e-06, + "loss": 17.6658, + "step": 17559 + }, + { + "epoch": 0.32098268960096515, + "grad_norm": 7.072623495168213, + "learning_rate": 7.939372272049792e-06, + "loss": 17.8161, + "step": 17560 + }, + { + "epoch": 0.3210009687974117, + "grad_norm": 6.802736690860457, + "learning_rate": 7.939132807720518e-06, + "loss": 17.676, + "step": 17561 + }, + { + "epoch": 0.32101924799385817, + "grad_norm": 4.864000088094914, + "learning_rate": 7.938893333089837e-06, + "loss": 16.9136, + "step": 17562 + }, + { + "epoch": 0.3210375271903047, + "grad_norm": 5.849446161457291, + "learning_rate": 7.93865384815859e-06, + "loss": 17.5182, + "step": 17563 + }, + { + "epoch": 0.32105580638675124, + "grad_norm": 6.494365172480418, + "learning_rate": 7.938414352927618e-06, + "loss": 17.6016, + "step": 17564 + }, + { + "epoch": 0.3210740855831978, + "grad_norm": 6.639194863257415, + "learning_rate": 7.938174847397758e-06, + "loss": 17.6989, + "step": 17565 + }, + { + "epoch": 0.3210923647796443, + "grad_norm": 7.944189344042548, + "learning_rate": 7.937935331569848e-06, + "loss": 18.2166, + "step": 17566 + }, + { + "epoch": 0.3211106439760908, + "grad_norm": 7.286882368237209, + "learning_rate": 7.93769580544473e-06, + "loss": 18.2911, + "step": 17567 + }, + { + "epoch": 0.32112892317253733, + "grad_norm": 6.926221020780543, + "learning_rate": 7.937456269023245e-06, + "loss": 17.6126, + "step": 17568 + }, + { + "epoch": 0.32114720236898386, + "grad_norm": 6.692531238903858, + "learning_rate": 7.93721672230623e-06, + "loss": 17.605, + "step": 17569 + }, + { + "epoch": 0.3211654815654304, + "grad_norm": 5.020600207226189, + "learning_rate": 7.936977165294525e-06, + "loss": 16.8796, + "step": 17570 + }, + { + "epoch": 0.32118376076187694, + "grad_norm": 6.703917254732207, + "learning_rate": 7.93673759798897e-06, + "loss": 17.6535, + "step": 17571 + }, + { + "epoch": 0.3212020399583234, + "grad_norm": 6.488071635110896, + "learning_rate": 7.936498020390404e-06, + "loss": 17.7118, + "step": 17572 + }, + { + "epoch": 0.32122031915476995, + "grad_norm": 7.154042169177971, + "learning_rate": 7.936258432499669e-06, + "loss": 17.6759, + "step": 17573 + }, + { + "epoch": 0.3212385983512165, + "grad_norm": 8.49823162600307, + "learning_rate": 7.9360188343176e-06, + "loss": 18.0933, + "step": 17574 + }, + { + "epoch": 0.321256877547663, + "grad_norm": 8.807821376726649, + "learning_rate": 7.935779225845042e-06, + "loss": 18.4572, + "step": 17575 + }, + { + "epoch": 0.3212751567441095, + "grad_norm": 6.9730719662996, + "learning_rate": 7.935539607082832e-06, + "loss": 17.8311, + "step": 17576 + }, + { + "epoch": 0.32129343594055604, + "grad_norm": 6.399781052021891, + "learning_rate": 7.935299978031811e-06, + "loss": 17.7642, + "step": 17577 + }, + { + "epoch": 0.3213117151370026, + "grad_norm": 6.923258449578971, + "learning_rate": 7.935060338692817e-06, + "loss": 17.9463, + "step": 17578 + }, + { + "epoch": 0.3213299943334491, + "grad_norm": 5.661210167097001, + "learning_rate": 7.934820689066693e-06, + "loss": 17.1465, + "step": 17579 + }, + { + "epoch": 0.32134827352989564, + "grad_norm": 6.156655129376134, + "learning_rate": 7.934581029154276e-06, + "loss": 17.6847, + "step": 17580 + }, + { + "epoch": 0.3213665527263421, + "grad_norm": 6.773443607982263, + "learning_rate": 7.934341358956409e-06, + "loss": 17.7063, + "step": 17581 + }, + { + "epoch": 0.32138483192278866, + "grad_norm": 6.826103831707497, + "learning_rate": 7.934101678473926e-06, + "loss": 17.7705, + "step": 17582 + }, + { + "epoch": 0.3214031111192352, + "grad_norm": 5.698977585287464, + "learning_rate": 7.933861987707675e-06, + "loss": 17.18, + "step": 17583 + }, + { + "epoch": 0.32142139031568173, + "grad_norm": 6.760545354895173, + "learning_rate": 7.93362228665849e-06, + "loss": 17.9606, + "step": 17584 + }, + { + "epoch": 0.32143966951212827, + "grad_norm": 6.4923707747208566, + "learning_rate": 7.933382575327216e-06, + "loss": 17.5494, + "step": 17585 + }, + { + "epoch": 0.32145794870857475, + "grad_norm": 5.761927297438746, + "learning_rate": 7.933142853714689e-06, + "loss": 17.4916, + "step": 17586 + }, + { + "epoch": 0.3214762279050213, + "grad_norm": 7.406766303230393, + "learning_rate": 7.932903121821749e-06, + "loss": 17.8947, + "step": 17587 + }, + { + "epoch": 0.3214945071014678, + "grad_norm": 5.591415923018626, + "learning_rate": 7.93266337964924e-06, + "loss": 17.3074, + "step": 17588 + }, + { + "epoch": 0.32151278629791435, + "grad_norm": 6.4557917353124585, + "learning_rate": 7.932423627198e-06, + "loss": 17.693, + "step": 17589 + }, + { + "epoch": 0.3215310654943609, + "grad_norm": 5.755025346279173, + "learning_rate": 7.932183864468872e-06, + "loss": 17.3218, + "step": 17590 + }, + { + "epoch": 0.32154934469080737, + "grad_norm": 6.200545828607429, + "learning_rate": 7.93194409146269e-06, + "loss": 17.5762, + "step": 17591 + }, + { + "epoch": 0.3215676238872539, + "grad_norm": 5.663094746456993, + "learning_rate": 7.931704308180302e-06, + "loss": 17.2843, + "step": 17592 + }, + { + "epoch": 0.32158590308370044, + "grad_norm": 6.467995005492069, + "learning_rate": 7.931464514622543e-06, + "loss": 17.4465, + "step": 17593 + }, + { + "epoch": 0.321604182280147, + "grad_norm": 5.806689306895789, + "learning_rate": 7.931224710790256e-06, + "loss": 17.2962, + "step": 17594 + }, + { + "epoch": 0.3216224614765935, + "grad_norm": 6.154533597171496, + "learning_rate": 7.93098489668428e-06, + "loss": 17.3486, + "step": 17595 + }, + { + "epoch": 0.32164074067304, + "grad_norm": 6.699972068362086, + "learning_rate": 7.930745072305455e-06, + "loss": 17.5267, + "step": 17596 + }, + { + "epoch": 0.32165901986948653, + "grad_norm": 7.016208914675771, + "learning_rate": 7.930505237654624e-06, + "loss": 17.5877, + "step": 17597 + }, + { + "epoch": 0.32167729906593306, + "grad_norm": 6.944053439932251, + "learning_rate": 7.930265392732627e-06, + "loss": 17.6566, + "step": 17598 + }, + { + "epoch": 0.3216955782623796, + "grad_norm": 6.254643436663572, + "learning_rate": 7.930025537540304e-06, + "loss": 17.6416, + "step": 17599 + }, + { + "epoch": 0.32171385745882614, + "grad_norm": 6.85258363988787, + "learning_rate": 7.929785672078496e-06, + "loss": 17.6077, + "step": 17600 + }, + { + "epoch": 0.3217321366552726, + "grad_norm": 6.205739028806184, + "learning_rate": 7.929545796348041e-06, + "loss": 17.5122, + "step": 17601 + }, + { + "epoch": 0.32175041585171915, + "grad_norm": 6.310597843555779, + "learning_rate": 7.929305910349786e-06, + "loss": 17.2942, + "step": 17602 + }, + { + "epoch": 0.3217686950481657, + "grad_norm": 7.499436544521757, + "learning_rate": 7.929066014084566e-06, + "loss": 17.749, + "step": 17603 + }, + { + "epoch": 0.3217869742446122, + "grad_norm": 6.284919539679724, + "learning_rate": 7.928826107553224e-06, + "loss": 17.513, + "step": 17604 + }, + { + "epoch": 0.32180525344105876, + "grad_norm": 7.584473562317276, + "learning_rate": 7.9285861907566e-06, + "loss": 17.8977, + "step": 17605 + }, + { + "epoch": 0.32182353263750524, + "grad_norm": 7.055192555998346, + "learning_rate": 7.928346263695537e-06, + "loss": 17.7729, + "step": 17606 + }, + { + "epoch": 0.3218418118339518, + "grad_norm": 8.069726630065611, + "learning_rate": 7.928106326370872e-06, + "loss": 18.356, + "step": 17607 + }, + { + "epoch": 0.3218600910303983, + "grad_norm": 5.606543136716627, + "learning_rate": 7.92786637878345e-06, + "loss": 17.06, + "step": 17608 + }, + { + "epoch": 0.32187837022684485, + "grad_norm": 7.076976047593216, + "learning_rate": 7.927626420934112e-06, + "loss": 17.3924, + "step": 17609 + }, + { + "epoch": 0.3218966494232913, + "grad_norm": 6.147490011363302, + "learning_rate": 7.927386452823695e-06, + "loss": 17.3598, + "step": 17610 + }, + { + "epoch": 0.32191492861973786, + "grad_norm": 6.665162055800956, + "learning_rate": 7.927146474453042e-06, + "loss": 17.9429, + "step": 17611 + }, + { + "epoch": 0.3219332078161844, + "grad_norm": 5.36051715475057, + "learning_rate": 7.926906485822998e-06, + "loss": 17.1047, + "step": 17612 + }, + { + "epoch": 0.32195148701263093, + "grad_norm": 11.761879936366626, + "learning_rate": 7.926666486934398e-06, + "loss": 18.6834, + "step": 17613 + }, + { + "epoch": 0.32196976620907747, + "grad_norm": 5.695281167138567, + "learning_rate": 7.926426477788087e-06, + "loss": 17.3025, + "step": 17614 + }, + { + "epoch": 0.32198804540552395, + "grad_norm": 5.154080125994498, + "learning_rate": 7.926186458384904e-06, + "loss": 17.0674, + "step": 17615 + }, + { + "epoch": 0.3220063246019705, + "grad_norm": 6.038050596825892, + "learning_rate": 7.925946428725693e-06, + "loss": 17.3455, + "step": 17616 + }, + { + "epoch": 0.322024603798417, + "grad_norm": 5.41747520107593, + "learning_rate": 7.925706388811293e-06, + "loss": 17.1618, + "step": 17617 + }, + { + "epoch": 0.32204288299486356, + "grad_norm": 5.824629910042442, + "learning_rate": 7.925466338642545e-06, + "loss": 17.137, + "step": 17618 + }, + { + "epoch": 0.3220611621913101, + "grad_norm": 6.258364118403654, + "learning_rate": 7.925226278220292e-06, + "loss": 17.6541, + "step": 17619 + }, + { + "epoch": 0.32207944138775657, + "grad_norm": 6.918020430505713, + "learning_rate": 7.924986207545376e-06, + "loss": 17.6106, + "step": 17620 + }, + { + "epoch": 0.3220977205842031, + "grad_norm": 5.210507486396455, + "learning_rate": 7.924746126618635e-06, + "loss": 17.0473, + "step": 17621 + }, + { + "epoch": 0.32211599978064964, + "grad_norm": 8.335482754109005, + "learning_rate": 7.924506035440914e-06, + "loss": 18.4042, + "step": 17622 + }, + { + "epoch": 0.3221342789770962, + "grad_norm": 5.207931457502078, + "learning_rate": 7.924265934013054e-06, + "loss": 16.9483, + "step": 17623 + }, + { + "epoch": 0.3221525581735427, + "grad_norm": 8.92538790907973, + "learning_rate": 7.924025822335895e-06, + "loss": 18.117, + "step": 17624 + }, + { + "epoch": 0.3221708373699892, + "grad_norm": 6.976151972593718, + "learning_rate": 7.923785700410276e-06, + "loss": 18.046, + "step": 17625 + }, + { + "epoch": 0.32218911656643573, + "grad_norm": 7.820570273505529, + "learning_rate": 7.923545568237046e-06, + "loss": 17.9882, + "step": 17626 + }, + { + "epoch": 0.32220739576288226, + "grad_norm": 6.724563730350931, + "learning_rate": 7.92330542581704e-06, + "loss": 17.5323, + "step": 17627 + }, + { + "epoch": 0.3222256749593288, + "grad_norm": 6.692164008823655, + "learning_rate": 7.923065273151103e-06, + "loss": 17.6357, + "step": 17628 + }, + { + "epoch": 0.32224395415577534, + "grad_norm": 5.977387225591005, + "learning_rate": 7.922825110240078e-06, + "loss": 17.2419, + "step": 17629 + }, + { + "epoch": 0.3222622333522218, + "grad_norm": 6.459570328727786, + "learning_rate": 7.922584937084802e-06, + "loss": 17.4161, + "step": 17630 + }, + { + "epoch": 0.32228051254866835, + "grad_norm": 6.683616062881609, + "learning_rate": 7.922344753686119e-06, + "loss": 17.7386, + "step": 17631 + }, + { + "epoch": 0.3222987917451149, + "grad_norm": 5.1633507876631, + "learning_rate": 7.922104560044872e-06, + "loss": 17.0346, + "step": 17632 + }, + { + "epoch": 0.3223170709415614, + "grad_norm": 6.779004434736978, + "learning_rate": 7.921864356161904e-06, + "loss": 17.5749, + "step": 17633 + }, + { + "epoch": 0.32233535013800796, + "grad_norm": 6.6448000556715705, + "learning_rate": 7.921624142038053e-06, + "loss": 17.4547, + "step": 17634 + }, + { + "epoch": 0.32235362933445444, + "grad_norm": 6.9512427461943025, + "learning_rate": 7.921383917674164e-06, + "loss": 17.5671, + "step": 17635 + }, + { + "epoch": 0.322371908530901, + "grad_norm": 7.072902040991003, + "learning_rate": 7.921143683071076e-06, + "loss": 17.8704, + "step": 17636 + }, + { + "epoch": 0.3223901877273475, + "grad_norm": 6.973465940446415, + "learning_rate": 7.920903438229635e-06, + "loss": 17.8397, + "step": 17637 + }, + { + "epoch": 0.32240846692379405, + "grad_norm": 5.468724935219691, + "learning_rate": 7.920663183150679e-06, + "loss": 17.2447, + "step": 17638 + }, + { + "epoch": 0.3224267461202406, + "grad_norm": 8.139759119222475, + "learning_rate": 7.920422917835054e-06, + "loss": 18.1, + "step": 17639 + }, + { + "epoch": 0.32244502531668706, + "grad_norm": 5.998672892071717, + "learning_rate": 7.920182642283598e-06, + "loss": 17.4788, + "step": 17640 + }, + { + "epoch": 0.3224633045131336, + "grad_norm": 5.916220502412665, + "learning_rate": 7.919942356497157e-06, + "loss": 17.305, + "step": 17641 + }, + { + "epoch": 0.32248158370958013, + "grad_norm": 6.748695405789091, + "learning_rate": 7.91970206047657e-06, + "loss": 17.831, + "step": 17642 + }, + { + "epoch": 0.32249986290602667, + "grad_norm": 6.75706245539897, + "learning_rate": 7.91946175422268e-06, + "loss": 17.7215, + "step": 17643 + }, + { + "epoch": 0.32251814210247315, + "grad_norm": 5.84519637319865, + "learning_rate": 7.919221437736333e-06, + "loss": 17.45, + "step": 17644 + }, + { + "epoch": 0.3225364212989197, + "grad_norm": 5.426434874888759, + "learning_rate": 7.918981111018365e-06, + "loss": 17.0958, + "step": 17645 + }, + { + "epoch": 0.3225547004953662, + "grad_norm": 8.686339613866407, + "learning_rate": 7.918740774069623e-06, + "loss": 18.4381, + "step": 17646 + }, + { + "epoch": 0.32257297969181276, + "grad_norm": 6.0229604203282525, + "learning_rate": 7.91850042689095e-06, + "loss": 17.4061, + "step": 17647 + }, + { + "epoch": 0.3225912588882593, + "grad_norm": 7.045232420341901, + "learning_rate": 7.918260069483182e-06, + "loss": 17.9396, + "step": 17648 + }, + { + "epoch": 0.32260953808470577, + "grad_norm": 7.3879756288542335, + "learning_rate": 7.918019701847168e-06, + "loss": 18.1053, + "step": 17649 + }, + { + "epoch": 0.3226278172811523, + "grad_norm": 7.417586721580354, + "learning_rate": 7.917779323983748e-06, + "loss": 18.019, + "step": 17650 + }, + { + "epoch": 0.32264609647759884, + "grad_norm": 7.810734338045706, + "learning_rate": 7.917538935893765e-06, + "loss": 18.1668, + "step": 17651 + }, + { + "epoch": 0.3226643756740454, + "grad_norm": 5.962226528997857, + "learning_rate": 7.91729853757806e-06, + "loss": 17.1979, + "step": 17652 + }, + { + "epoch": 0.3226826548704919, + "grad_norm": 6.282755958032387, + "learning_rate": 7.917058129037478e-06, + "loss": 17.5572, + "step": 17653 + }, + { + "epoch": 0.3227009340669384, + "grad_norm": 7.2950753549441245, + "learning_rate": 7.91681771027286e-06, + "loss": 17.8905, + "step": 17654 + }, + { + "epoch": 0.32271921326338493, + "grad_norm": 6.718396391577625, + "learning_rate": 7.916577281285048e-06, + "loss": 17.5792, + "step": 17655 + }, + { + "epoch": 0.32273749245983147, + "grad_norm": 5.407450209892771, + "learning_rate": 7.916336842074888e-06, + "loss": 17.3437, + "step": 17656 + }, + { + "epoch": 0.322755771656278, + "grad_norm": 5.827207139776879, + "learning_rate": 7.916096392643218e-06, + "loss": 17.417, + "step": 17657 + }, + { + "epoch": 0.32277405085272454, + "grad_norm": 6.742161879018509, + "learning_rate": 7.915855932990884e-06, + "loss": 17.5526, + "step": 17658 + }, + { + "epoch": 0.322792330049171, + "grad_norm": 6.818318909808149, + "learning_rate": 7.915615463118729e-06, + "loss": 17.5256, + "step": 17659 + }, + { + "epoch": 0.32281060924561755, + "grad_norm": 6.824640501856777, + "learning_rate": 7.915374983027593e-06, + "loss": 17.8469, + "step": 17660 + }, + { + "epoch": 0.3228288884420641, + "grad_norm": 6.830626955606592, + "learning_rate": 7.915134492718323e-06, + "loss": 17.6978, + "step": 17661 + }, + { + "epoch": 0.3228471676385106, + "grad_norm": 7.148219238642276, + "learning_rate": 7.914893992191759e-06, + "loss": 17.8267, + "step": 17662 + }, + { + "epoch": 0.32286544683495716, + "grad_norm": 7.88725752829819, + "learning_rate": 7.914653481448742e-06, + "loss": 18.2131, + "step": 17663 + }, + { + "epoch": 0.32288372603140364, + "grad_norm": 6.4780235233255095, + "learning_rate": 7.914412960490118e-06, + "loss": 17.4028, + "step": 17664 + }, + { + "epoch": 0.3229020052278502, + "grad_norm": 5.945918683748106, + "learning_rate": 7.914172429316733e-06, + "loss": 17.4281, + "step": 17665 + }, + { + "epoch": 0.3229202844242967, + "grad_norm": 6.167771083235213, + "learning_rate": 7.913931887929423e-06, + "loss": 17.312, + "step": 17666 + }, + { + "epoch": 0.32293856362074325, + "grad_norm": 8.232554697348833, + "learning_rate": 7.913691336329037e-06, + "loss": 18.3315, + "step": 17667 + }, + { + "epoch": 0.3229568428171898, + "grad_norm": 6.99789188139659, + "learning_rate": 7.913450774516415e-06, + "loss": 17.6496, + "step": 17668 + }, + { + "epoch": 0.32297512201363626, + "grad_norm": 6.172405494613258, + "learning_rate": 7.9132102024924e-06, + "loss": 17.3793, + "step": 17669 + }, + { + "epoch": 0.3229934012100828, + "grad_norm": 6.875727755040785, + "learning_rate": 7.912969620257835e-06, + "loss": 17.3903, + "step": 17670 + }, + { + "epoch": 0.32301168040652933, + "grad_norm": 8.725162881721227, + "learning_rate": 7.912729027813568e-06, + "loss": 18.6348, + "step": 17671 + }, + { + "epoch": 0.32302995960297587, + "grad_norm": 5.532490516428547, + "learning_rate": 7.912488425160436e-06, + "loss": 17.2522, + "step": 17672 + }, + { + "epoch": 0.3230482387994224, + "grad_norm": 5.84360916027288, + "learning_rate": 7.912247812299283e-06, + "loss": 17.2939, + "step": 17673 + }, + { + "epoch": 0.3230665179958689, + "grad_norm": 8.111220523515845, + "learning_rate": 7.912007189230957e-06, + "loss": 18.5528, + "step": 17674 + }, + { + "epoch": 0.3230847971923154, + "grad_norm": 7.7266110967014425, + "learning_rate": 7.911766555956297e-06, + "loss": 18.3292, + "step": 17675 + }, + { + "epoch": 0.32310307638876196, + "grad_norm": 6.113899516838553, + "learning_rate": 7.91152591247615e-06, + "loss": 17.3183, + "step": 17676 + }, + { + "epoch": 0.3231213555852085, + "grad_norm": 4.764840947501727, + "learning_rate": 7.911285258791355e-06, + "loss": 17.0315, + "step": 17677 + }, + { + "epoch": 0.32313963478165497, + "grad_norm": 5.930207843666761, + "learning_rate": 7.91104459490276e-06, + "loss": 17.4221, + "step": 17678 + }, + { + "epoch": 0.3231579139781015, + "grad_norm": 6.575485128174704, + "learning_rate": 7.910803920811203e-06, + "loss": 17.528, + "step": 17679 + }, + { + "epoch": 0.32317619317454804, + "grad_norm": 7.029879562926375, + "learning_rate": 7.910563236517534e-06, + "loss": 17.6705, + "step": 17680 + }, + { + "epoch": 0.3231944723709946, + "grad_norm": 7.354106194436128, + "learning_rate": 7.910322542022591e-06, + "loss": 17.7459, + "step": 17681 + }, + { + "epoch": 0.3232127515674411, + "grad_norm": 6.873088354184548, + "learning_rate": 7.91008183732722e-06, + "loss": 17.7918, + "step": 17682 + }, + { + "epoch": 0.3232310307638876, + "grad_norm": 6.241485631431879, + "learning_rate": 7.909841122432269e-06, + "loss": 17.7582, + "step": 17683 + }, + { + "epoch": 0.32324930996033413, + "grad_norm": 6.642653081716574, + "learning_rate": 7.909600397338573e-06, + "loss": 17.5774, + "step": 17684 + }, + { + "epoch": 0.32326758915678067, + "grad_norm": 7.312009719374648, + "learning_rate": 7.909359662046983e-06, + "loss": 17.7914, + "step": 17685 + }, + { + "epoch": 0.3232858683532272, + "grad_norm": 7.6530235791674, + "learning_rate": 7.909118916558338e-06, + "loss": 18.1013, + "step": 17686 + }, + { + "epoch": 0.32330414754967374, + "grad_norm": 5.896039758083264, + "learning_rate": 7.908878160873483e-06, + "loss": 17.2169, + "step": 17687 + }, + { + "epoch": 0.3233224267461202, + "grad_norm": 7.229472362374189, + "learning_rate": 7.908637394993265e-06, + "loss": 17.7046, + "step": 17688 + }, + { + "epoch": 0.32334070594256675, + "grad_norm": 6.181143582624684, + "learning_rate": 7.90839661891852e-06, + "loss": 17.3552, + "step": 17689 + }, + { + "epoch": 0.3233589851390133, + "grad_norm": 6.751168869331026, + "learning_rate": 7.908155832650103e-06, + "loss": 17.6183, + "step": 17690 + }, + { + "epoch": 0.3233772643354598, + "grad_norm": 5.3818619076095136, + "learning_rate": 7.90791503618885e-06, + "loss": 17.0947, + "step": 17691 + }, + { + "epoch": 0.32339554353190636, + "grad_norm": 7.173813864648602, + "learning_rate": 7.907674229535606e-06, + "loss": 17.7175, + "step": 17692 + }, + { + "epoch": 0.32341382272835284, + "grad_norm": 6.849343874448063, + "learning_rate": 7.907433412691218e-06, + "loss": 17.6358, + "step": 17693 + }, + { + "epoch": 0.3234321019247994, + "grad_norm": 5.811953914809876, + "learning_rate": 7.907192585656528e-06, + "loss": 17.3599, + "step": 17694 + }, + { + "epoch": 0.3234503811212459, + "grad_norm": 6.406612554200852, + "learning_rate": 7.90695174843238e-06, + "loss": 17.3569, + "step": 17695 + }, + { + "epoch": 0.32346866031769245, + "grad_norm": 5.3506577758741125, + "learning_rate": 7.906710901019618e-06, + "loss": 17.1093, + "step": 17696 + }, + { + "epoch": 0.323486939514139, + "grad_norm": 7.877884365220087, + "learning_rate": 7.906470043419086e-06, + "loss": 18.4213, + "step": 17697 + }, + { + "epoch": 0.32350521871058546, + "grad_norm": 6.736546281178132, + "learning_rate": 7.90622917563163e-06, + "loss": 17.8381, + "step": 17698 + }, + { + "epoch": 0.323523497907032, + "grad_norm": 7.589485775589006, + "learning_rate": 7.905988297658093e-06, + "loss": 18.1189, + "step": 17699 + }, + { + "epoch": 0.32354177710347853, + "grad_norm": 5.904388765821344, + "learning_rate": 7.905747409499318e-06, + "loss": 17.4213, + "step": 17700 + }, + { + "epoch": 0.32356005629992507, + "grad_norm": 7.315703403965148, + "learning_rate": 7.905506511156151e-06, + "loss": 17.8598, + "step": 17701 + }, + { + "epoch": 0.3235783354963716, + "grad_norm": 7.147717361011801, + "learning_rate": 7.905265602629435e-06, + "loss": 18.0147, + "step": 17702 + }, + { + "epoch": 0.3235966146928181, + "grad_norm": 8.356688813804935, + "learning_rate": 7.905024683920018e-06, + "loss": 18.8452, + "step": 17703 + }, + { + "epoch": 0.3236148938892646, + "grad_norm": 5.914228540617025, + "learning_rate": 7.904783755028738e-06, + "loss": 17.2188, + "step": 17704 + }, + { + "epoch": 0.32363317308571116, + "grad_norm": 5.47241798190702, + "learning_rate": 7.904542815956444e-06, + "loss": 17.2184, + "step": 17705 + }, + { + "epoch": 0.3236514522821577, + "grad_norm": 5.4748724835386104, + "learning_rate": 7.90430186670398e-06, + "loss": 17.1407, + "step": 17706 + }, + { + "epoch": 0.32366973147860423, + "grad_norm": 7.256228296101664, + "learning_rate": 7.90406090727219e-06, + "loss": 17.7795, + "step": 17707 + }, + { + "epoch": 0.3236880106750507, + "grad_norm": 7.323442326843513, + "learning_rate": 7.90381993766192e-06, + "loss": 18.3803, + "step": 17708 + }, + { + "epoch": 0.32370628987149724, + "grad_norm": 6.627845023069605, + "learning_rate": 7.903578957874012e-06, + "loss": 17.4024, + "step": 17709 + }, + { + "epoch": 0.3237245690679438, + "grad_norm": 7.420892493312514, + "learning_rate": 7.90333796790931e-06, + "loss": 18.1122, + "step": 17710 + }, + { + "epoch": 0.3237428482643903, + "grad_norm": 7.30268088293728, + "learning_rate": 7.903096967768662e-06, + "loss": 18.3343, + "step": 17711 + }, + { + "epoch": 0.3237611274608368, + "grad_norm": 6.287198503197101, + "learning_rate": 7.902855957452911e-06, + "loss": 17.4339, + "step": 17712 + }, + { + "epoch": 0.32377940665728333, + "grad_norm": 6.465856027196392, + "learning_rate": 7.902614936962902e-06, + "loss": 17.6532, + "step": 17713 + }, + { + "epoch": 0.32379768585372987, + "grad_norm": 6.308087695928773, + "learning_rate": 7.902373906299479e-06, + "loss": 17.4322, + "step": 17714 + }, + { + "epoch": 0.3238159650501764, + "grad_norm": 6.605016306857635, + "learning_rate": 7.902132865463487e-06, + "loss": 17.5501, + "step": 17715 + }, + { + "epoch": 0.32383424424662294, + "grad_norm": 5.748431837972686, + "learning_rate": 7.901891814455772e-06, + "loss": 17.503, + "step": 17716 + }, + { + "epoch": 0.3238525234430694, + "grad_norm": 7.569285201614199, + "learning_rate": 7.901650753277177e-06, + "loss": 18.0239, + "step": 17717 + }, + { + "epoch": 0.32387080263951595, + "grad_norm": 8.720107472605482, + "learning_rate": 7.901409681928548e-06, + "loss": 18.1893, + "step": 17718 + }, + { + "epoch": 0.3238890818359625, + "grad_norm": 6.799628458198373, + "learning_rate": 7.90116860041073e-06, + "loss": 18.034, + "step": 17719 + }, + { + "epoch": 0.323907361032409, + "grad_norm": 7.660477877385908, + "learning_rate": 7.90092750872457e-06, + "loss": 18.111, + "step": 17720 + }, + { + "epoch": 0.32392564022885556, + "grad_norm": 6.272439255187087, + "learning_rate": 7.900686406870908e-06, + "loss": 17.3672, + "step": 17721 + }, + { + "epoch": 0.32394391942530204, + "grad_norm": 6.880732583845493, + "learning_rate": 7.900445294850591e-06, + "loss": 17.4492, + "step": 17722 + }, + { + "epoch": 0.3239621986217486, + "grad_norm": 6.76794043338143, + "learning_rate": 7.900204172664468e-06, + "loss": 17.4717, + "step": 17723 + }, + { + "epoch": 0.3239804778181951, + "grad_norm": 6.342803464392752, + "learning_rate": 7.89996304031338e-06, + "loss": 17.3245, + "step": 17724 + }, + { + "epoch": 0.32399875701464165, + "grad_norm": 8.656431088842632, + "learning_rate": 7.899721897798172e-06, + "loss": 18.5138, + "step": 17725 + }, + { + "epoch": 0.3240170362110882, + "grad_norm": 6.08280644000809, + "learning_rate": 7.899480745119693e-06, + "loss": 17.5171, + "step": 17726 + }, + { + "epoch": 0.32403531540753466, + "grad_norm": 6.808667970082966, + "learning_rate": 7.899239582278783e-06, + "loss": 17.5517, + "step": 17727 + }, + { + "epoch": 0.3240535946039812, + "grad_norm": 6.885294328924785, + "learning_rate": 7.898998409276291e-06, + "loss": 17.6877, + "step": 17728 + }, + { + "epoch": 0.32407187380042773, + "grad_norm": 6.599931734145278, + "learning_rate": 7.89875722611306e-06, + "loss": 17.6172, + "step": 17729 + }, + { + "epoch": 0.32409015299687427, + "grad_norm": 5.370720150179787, + "learning_rate": 7.898516032789937e-06, + "loss": 17.0975, + "step": 17730 + }, + { + "epoch": 0.3241084321933208, + "grad_norm": 5.709689429617202, + "learning_rate": 7.898274829307769e-06, + "loss": 17.2393, + "step": 17731 + }, + { + "epoch": 0.3241267113897673, + "grad_norm": 6.0834948297296645, + "learning_rate": 7.898033615667395e-06, + "loss": 17.4599, + "step": 17732 + }, + { + "epoch": 0.3241449905862138, + "grad_norm": 6.197853953703129, + "learning_rate": 7.897792391869668e-06, + "loss": 17.4502, + "step": 17733 + }, + { + "epoch": 0.32416326978266036, + "grad_norm": 6.903159381393967, + "learning_rate": 7.89755115791543e-06, + "loss": 17.6691, + "step": 17734 + }, + { + "epoch": 0.3241815489791069, + "grad_norm": 6.520885784867376, + "learning_rate": 7.897309913805525e-06, + "loss": 17.7851, + "step": 17735 + }, + { + "epoch": 0.32419982817555343, + "grad_norm": 6.8901319836075805, + "learning_rate": 7.8970686595408e-06, + "loss": 17.6095, + "step": 17736 + }, + { + "epoch": 0.3242181073719999, + "grad_norm": 6.546169171931813, + "learning_rate": 7.896827395122102e-06, + "loss": 17.2313, + "step": 17737 + }, + { + "epoch": 0.32423638656844644, + "grad_norm": 7.915518776957184, + "learning_rate": 7.896586120550276e-06, + "loss": 17.9864, + "step": 17738 + }, + { + "epoch": 0.324254665764893, + "grad_norm": 6.9312848548921435, + "learning_rate": 7.896344835826166e-06, + "loss": 17.8592, + "step": 17739 + }, + { + "epoch": 0.3242729449613395, + "grad_norm": 5.485400336636975, + "learning_rate": 7.89610354095062e-06, + "loss": 17.3208, + "step": 17740 + }, + { + "epoch": 0.32429122415778605, + "grad_norm": 7.078439224696487, + "learning_rate": 7.895862235924481e-06, + "loss": 18.073, + "step": 17741 + }, + { + "epoch": 0.32430950335423253, + "grad_norm": 6.53713902367002, + "learning_rate": 7.895620920748594e-06, + "loss": 17.5156, + "step": 17742 + }, + { + "epoch": 0.32432778255067907, + "grad_norm": 6.786474194080707, + "learning_rate": 7.895379595423809e-06, + "loss": 17.6559, + "step": 17743 + }, + { + "epoch": 0.3243460617471256, + "grad_norm": 6.124202454779582, + "learning_rate": 7.895138259950972e-06, + "loss": 17.5843, + "step": 17744 + }, + { + "epoch": 0.32436434094357214, + "grad_norm": 6.803613532007667, + "learning_rate": 7.894896914330925e-06, + "loss": 17.6096, + "step": 17745 + }, + { + "epoch": 0.3243826201400186, + "grad_norm": 5.626643891123522, + "learning_rate": 7.894655558564514e-06, + "loss": 17.1485, + "step": 17746 + }, + { + "epoch": 0.32440089933646515, + "grad_norm": 5.9320916818188785, + "learning_rate": 7.894414192652589e-06, + "loss": 17.1957, + "step": 17747 + }, + { + "epoch": 0.3244191785329117, + "grad_norm": 7.825310368883438, + "learning_rate": 7.89417281659599e-06, + "loss": 18.514, + "step": 17748 + }, + { + "epoch": 0.3244374577293582, + "grad_norm": 5.832333198813054, + "learning_rate": 7.89393143039557e-06, + "loss": 17.4828, + "step": 17749 + }, + { + "epoch": 0.32445573692580476, + "grad_norm": 7.19840251586273, + "learning_rate": 7.893690034052167e-06, + "loss": 18.1829, + "step": 17750 + }, + { + "epoch": 0.32447401612225124, + "grad_norm": 6.9550916640503875, + "learning_rate": 7.893448627566637e-06, + "loss": 17.4698, + "step": 17751 + }, + { + "epoch": 0.3244922953186978, + "grad_norm": 6.196420070907171, + "learning_rate": 7.893207210939817e-06, + "loss": 17.5809, + "step": 17752 + }, + { + "epoch": 0.3245105745151443, + "grad_norm": 6.916335868485267, + "learning_rate": 7.892965784172558e-06, + "loss": 17.642, + "step": 17753 + }, + { + "epoch": 0.32452885371159085, + "grad_norm": 7.6881850398519616, + "learning_rate": 7.892724347265706e-06, + "loss": 18.3688, + "step": 17754 + }, + { + "epoch": 0.3245471329080374, + "grad_norm": 6.320467909081312, + "learning_rate": 7.892482900220105e-06, + "loss": 17.5321, + "step": 17755 + }, + { + "epoch": 0.32456541210448386, + "grad_norm": 5.970622163186657, + "learning_rate": 7.892241443036601e-06, + "loss": 17.6554, + "step": 17756 + }, + { + "epoch": 0.3245836913009304, + "grad_norm": 6.518388630649442, + "learning_rate": 7.891999975716043e-06, + "loss": 17.7403, + "step": 17757 + }, + { + "epoch": 0.32460197049737693, + "grad_norm": 8.581149135678286, + "learning_rate": 7.891758498259277e-06, + "loss": 18.1474, + "step": 17758 + }, + { + "epoch": 0.32462024969382347, + "grad_norm": 6.430791470956686, + "learning_rate": 7.891517010667147e-06, + "loss": 17.4294, + "step": 17759 + }, + { + "epoch": 0.32463852889027, + "grad_norm": 6.747363986693666, + "learning_rate": 7.891275512940502e-06, + "loss": 17.7416, + "step": 17760 + }, + { + "epoch": 0.3246568080867165, + "grad_norm": 6.648622315508402, + "learning_rate": 7.891034005080188e-06, + "loss": 17.8707, + "step": 17761 + }, + { + "epoch": 0.324675087283163, + "grad_norm": 7.107497351581862, + "learning_rate": 7.890792487087049e-06, + "loss": 17.6806, + "step": 17762 + }, + { + "epoch": 0.32469336647960956, + "grad_norm": 6.519000431193044, + "learning_rate": 7.890550958961933e-06, + "loss": 17.4322, + "step": 17763 + }, + { + "epoch": 0.3247116456760561, + "grad_norm": 6.180686488773445, + "learning_rate": 7.890309420705686e-06, + "loss": 17.4595, + "step": 17764 + }, + { + "epoch": 0.32472992487250263, + "grad_norm": 6.813454925467863, + "learning_rate": 7.890067872319158e-06, + "loss": 17.3925, + "step": 17765 + }, + { + "epoch": 0.3247482040689491, + "grad_norm": 7.281394317189339, + "learning_rate": 7.88982631380319e-06, + "loss": 17.4958, + "step": 17766 + }, + { + "epoch": 0.32476648326539564, + "grad_norm": 6.22838325671392, + "learning_rate": 7.889584745158634e-06, + "loss": 17.5235, + "step": 17767 + }, + { + "epoch": 0.3247847624618422, + "grad_norm": 7.7460781767421345, + "learning_rate": 7.889343166386334e-06, + "loss": 17.707, + "step": 17768 + }, + { + "epoch": 0.3248030416582887, + "grad_norm": 6.367076966095275, + "learning_rate": 7.889101577487134e-06, + "loss": 17.6144, + "step": 17769 + }, + { + "epoch": 0.32482132085473525, + "grad_norm": 6.420686888235853, + "learning_rate": 7.888859978461887e-06, + "loss": 17.7745, + "step": 17770 + }, + { + "epoch": 0.32483960005118173, + "grad_norm": 6.926027327496974, + "learning_rate": 7.888618369311436e-06, + "loss": 17.5875, + "step": 17771 + }, + { + "epoch": 0.32485787924762827, + "grad_norm": 6.695980001231234, + "learning_rate": 7.888376750036626e-06, + "loss": 17.4798, + "step": 17772 + }, + { + "epoch": 0.3248761584440748, + "grad_norm": 6.401478752069769, + "learning_rate": 7.888135120638309e-06, + "loss": 17.4129, + "step": 17773 + }, + { + "epoch": 0.32489443764052134, + "grad_norm": 6.690387221084487, + "learning_rate": 7.887893481117327e-06, + "loss": 17.6467, + "step": 17774 + }, + { + "epoch": 0.3249127168369679, + "grad_norm": 6.288090431810262, + "learning_rate": 7.88765183147453e-06, + "loss": 17.5896, + "step": 17775 + }, + { + "epoch": 0.32493099603341435, + "grad_norm": 6.856296727872927, + "learning_rate": 7.887410171710764e-06, + "loss": 17.6377, + "step": 17776 + }, + { + "epoch": 0.3249492752298609, + "grad_norm": 7.220036562946701, + "learning_rate": 7.887168501826874e-06, + "loss": 18.1338, + "step": 17777 + }, + { + "epoch": 0.3249675544263074, + "grad_norm": 6.856851184796004, + "learning_rate": 7.88692682182371e-06, + "loss": 17.7401, + "step": 17778 + }, + { + "epoch": 0.32498583362275396, + "grad_norm": 8.166159479680575, + "learning_rate": 7.886685131702118e-06, + "loss": 18.38, + "step": 17779 + }, + { + "epoch": 0.32500411281920044, + "grad_norm": 7.24517999511875, + "learning_rate": 7.886443431462946e-06, + "loss": 17.9074, + "step": 17780 + }, + { + "epoch": 0.325022392015647, + "grad_norm": 7.729271856325883, + "learning_rate": 7.886201721107041e-06, + "loss": 18.1386, + "step": 17781 + }, + { + "epoch": 0.3250406712120935, + "grad_norm": 6.002140416107779, + "learning_rate": 7.885960000635247e-06, + "loss": 17.3604, + "step": 17782 + }, + { + "epoch": 0.32505895040854005, + "grad_norm": 6.484218390540682, + "learning_rate": 7.885718270048414e-06, + "loss": 17.3894, + "step": 17783 + }, + { + "epoch": 0.3250772296049866, + "grad_norm": 6.5765930619465, + "learning_rate": 7.885476529347391e-06, + "loss": 17.8014, + "step": 17784 + }, + { + "epoch": 0.32509550880143306, + "grad_norm": 7.923834403163036, + "learning_rate": 7.885234778533022e-06, + "loss": 18.1465, + "step": 17785 + }, + { + "epoch": 0.3251137879978796, + "grad_norm": 6.430724149334308, + "learning_rate": 7.884993017606155e-06, + "loss": 17.4932, + "step": 17786 + }, + { + "epoch": 0.32513206719432614, + "grad_norm": 6.77751500942763, + "learning_rate": 7.884751246567637e-06, + "loss": 18.0722, + "step": 17787 + }, + { + "epoch": 0.32515034639077267, + "grad_norm": 6.186405548945096, + "learning_rate": 7.884509465418318e-06, + "loss": 17.6653, + "step": 17788 + }, + { + "epoch": 0.3251686255872192, + "grad_norm": 7.268120331299511, + "learning_rate": 7.884267674159043e-06, + "loss": 17.9533, + "step": 17789 + }, + { + "epoch": 0.3251869047836657, + "grad_norm": 7.245866146128734, + "learning_rate": 7.884025872790661e-06, + "loss": 18.1388, + "step": 17790 + }, + { + "epoch": 0.3252051839801122, + "grad_norm": 6.500089085904076, + "learning_rate": 7.883784061314017e-06, + "loss": 17.6386, + "step": 17791 + }, + { + "epoch": 0.32522346317655876, + "grad_norm": 6.76226009888217, + "learning_rate": 7.88354223972996e-06, + "loss": 17.5732, + "step": 17792 + }, + { + "epoch": 0.3252417423730053, + "grad_norm": 5.529002913461683, + "learning_rate": 7.883300408039338e-06, + "loss": 17.024, + "step": 17793 + }, + { + "epoch": 0.32526002156945183, + "grad_norm": 6.280103600881916, + "learning_rate": 7.883058566243e-06, + "loss": 17.3343, + "step": 17794 + }, + { + "epoch": 0.3252783007658983, + "grad_norm": 5.537614534548059, + "learning_rate": 7.88281671434179e-06, + "loss": 17.201, + "step": 17795 + }, + { + "epoch": 0.32529657996234485, + "grad_norm": 6.842566450357201, + "learning_rate": 7.882574852336558e-06, + "loss": 17.7165, + "step": 17796 + }, + { + "epoch": 0.3253148591587914, + "grad_norm": 6.62388762569798, + "learning_rate": 7.882332980228151e-06, + "loss": 17.5779, + "step": 17797 + }, + { + "epoch": 0.3253331383552379, + "grad_norm": 7.81704242010747, + "learning_rate": 7.882091098017417e-06, + "loss": 18.1819, + "step": 17798 + }, + { + "epoch": 0.32535141755168445, + "grad_norm": 8.17155499574659, + "learning_rate": 7.881849205705206e-06, + "loss": 18.1628, + "step": 17799 + }, + { + "epoch": 0.32536969674813093, + "grad_norm": 5.228344861810051, + "learning_rate": 7.881607303292361e-06, + "loss": 17.1929, + "step": 17800 + }, + { + "epoch": 0.32538797594457747, + "grad_norm": 6.478130051775579, + "learning_rate": 7.881365390779734e-06, + "loss": 17.7408, + "step": 17801 + }, + { + "epoch": 0.325406255141024, + "grad_norm": 7.205583581384945, + "learning_rate": 7.881123468168169e-06, + "loss": 17.7589, + "step": 17802 + }, + { + "epoch": 0.32542453433747054, + "grad_norm": 8.659392100726214, + "learning_rate": 7.880881535458519e-06, + "loss": 18.608, + "step": 17803 + }, + { + "epoch": 0.3254428135339171, + "grad_norm": 6.691558341576043, + "learning_rate": 7.880639592651628e-06, + "loss": 17.315, + "step": 17804 + }, + { + "epoch": 0.32546109273036355, + "grad_norm": 6.824648938193877, + "learning_rate": 7.880397639748346e-06, + "loss": 17.7161, + "step": 17805 + }, + { + "epoch": 0.3254793719268101, + "grad_norm": 5.681769825030666, + "learning_rate": 7.88015567674952e-06, + "loss": 17.378, + "step": 17806 + }, + { + "epoch": 0.3254976511232566, + "grad_norm": 7.590131225088655, + "learning_rate": 7.879913703655997e-06, + "loss": 17.7448, + "step": 17807 + }, + { + "epoch": 0.32551593031970316, + "grad_norm": 6.873404346881098, + "learning_rate": 7.879671720468626e-06, + "loss": 17.6415, + "step": 17808 + }, + { + "epoch": 0.3255342095161497, + "grad_norm": 8.40510328880946, + "learning_rate": 7.879429727188257e-06, + "loss": 17.7219, + "step": 17809 + }, + { + "epoch": 0.3255524887125962, + "grad_norm": 6.477972532371386, + "learning_rate": 7.879187723815737e-06, + "loss": 17.7117, + "step": 17810 + }, + { + "epoch": 0.3255707679090427, + "grad_norm": 7.337673120293114, + "learning_rate": 7.878945710351913e-06, + "loss": 17.9361, + "step": 17811 + }, + { + "epoch": 0.32558904710548925, + "grad_norm": 6.719375226765658, + "learning_rate": 7.878703686797634e-06, + "loss": 17.4639, + "step": 17812 + }, + { + "epoch": 0.3256073263019358, + "grad_norm": 5.282211402681556, + "learning_rate": 7.878461653153749e-06, + "loss": 17.2107, + "step": 17813 + }, + { + "epoch": 0.32562560549838226, + "grad_norm": 6.322777752947682, + "learning_rate": 7.878219609421105e-06, + "loss": 17.5931, + "step": 17814 + }, + { + "epoch": 0.3256438846948288, + "grad_norm": 6.335835715646217, + "learning_rate": 7.87797755560055e-06, + "loss": 17.7482, + "step": 17815 + }, + { + "epoch": 0.32566216389127534, + "grad_norm": 6.695294309727545, + "learning_rate": 7.877735491692937e-06, + "loss": 17.9087, + "step": 17816 + }, + { + "epoch": 0.32568044308772187, + "grad_norm": 6.898742884933421, + "learning_rate": 7.877493417699109e-06, + "loss": 17.8053, + "step": 17817 + }, + { + "epoch": 0.3256987222841684, + "grad_norm": 5.513199510821228, + "learning_rate": 7.877251333619916e-06, + "loss": 17.1758, + "step": 17818 + }, + { + "epoch": 0.3257170014806149, + "grad_norm": 5.876307084156893, + "learning_rate": 7.877009239456206e-06, + "loss": 17.3925, + "step": 17819 + }, + { + "epoch": 0.3257352806770614, + "grad_norm": 7.402268251489208, + "learning_rate": 7.876767135208829e-06, + "loss": 17.6846, + "step": 17820 + }, + { + "epoch": 0.32575355987350796, + "grad_norm": 6.426810501598508, + "learning_rate": 7.876525020878632e-06, + "loss": 17.7161, + "step": 17821 + }, + { + "epoch": 0.3257718390699545, + "grad_norm": 6.662116041025357, + "learning_rate": 7.876282896466465e-06, + "loss": 17.7204, + "step": 17822 + }, + { + "epoch": 0.32579011826640103, + "grad_norm": 5.774182478325658, + "learning_rate": 7.876040761973179e-06, + "loss": 17.3396, + "step": 17823 + }, + { + "epoch": 0.3258083974628475, + "grad_norm": 6.616518385284222, + "learning_rate": 7.875798617399614e-06, + "loss": 17.5979, + "step": 17824 + }, + { + "epoch": 0.32582667665929405, + "grad_norm": 6.846705875031634, + "learning_rate": 7.875556462746628e-06, + "loss": 17.9477, + "step": 17825 + }, + { + "epoch": 0.3258449558557406, + "grad_norm": 5.693870640415677, + "learning_rate": 7.875314298015065e-06, + "loss": 17.2138, + "step": 17826 + }, + { + "epoch": 0.3258632350521871, + "grad_norm": 6.969038422382312, + "learning_rate": 7.875072123205776e-06, + "loss": 18.0013, + "step": 17827 + }, + { + "epoch": 0.32588151424863365, + "grad_norm": 6.1486335454418874, + "learning_rate": 7.874829938319608e-06, + "loss": 17.4233, + "step": 17828 + }, + { + "epoch": 0.32589979344508013, + "grad_norm": 6.132878567881788, + "learning_rate": 7.87458774335741e-06, + "loss": 17.4989, + "step": 17829 + }, + { + "epoch": 0.32591807264152667, + "grad_norm": 6.537418428041828, + "learning_rate": 7.874345538320033e-06, + "loss": 17.5308, + "step": 17830 + }, + { + "epoch": 0.3259363518379732, + "grad_norm": 7.185045793392967, + "learning_rate": 7.874103323208323e-06, + "loss": 17.5923, + "step": 17831 + }, + { + "epoch": 0.32595463103441974, + "grad_norm": 7.792166666071645, + "learning_rate": 7.87386109802313e-06, + "loss": 18.4338, + "step": 17832 + }, + { + "epoch": 0.3259729102308663, + "grad_norm": 5.88940091915513, + "learning_rate": 7.873618862765305e-06, + "loss": 17.4111, + "step": 17833 + }, + { + "epoch": 0.32599118942731276, + "grad_norm": 5.0957149338845635, + "learning_rate": 7.873376617435693e-06, + "loss": 17.0923, + "step": 17834 + }, + { + "epoch": 0.3260094686237593, + "grad_norm": 6.947892487435096, + "learning_rate": 7.873134362035147e-06, + "loss": 17.7021, + "step": 17835 + }, + { + "epoch": 0.3260277478202058, + "grad_norm": 6.767897076797359, + "learning_rate": 7.872892096564512e-06, + "loss": 17.6243, + "step": 17836 + }, + { + "epoch": 0.32604602701665236, + "grad_norm": 5.5962489430574625, + "learning_rate": 7.872649821024642e-06, + "loss": 17.0655, + "step": 17837 + }, + { + "epoch": 0.3260643062130989, + "grad_norm": 7.541190100036091, + "learning_rate": 7.872407535416384e-06, + "loss": 18.0704, + "step": 17838 + }, + { + "epoch": 0.3260825854095454, + "grad_norm": 7.35367908788049, + "learning_rate": 7.872165239740585e-06, + "loss": 18.224, + "step": 17839 + }, + { + "epoch": 0.3261008646059919, + "grad_norm": 6.867253190417915, + "learning_rate": 7.871922933998098e-06, + "loss": 17.7978, + "step": 17840 + }, + { + "epoch": 0.32611914380243845, + "grad_norm": 7.990110756551321, + "learning_rate": 7.871680618189768e-06, + "loss": 17.9894, + "step": 17841 + }, + { + "epoch": 0.326137422998885, + "grad_norm": 5.920388531886041, + "learning_rate": 7.871438292316448e-06, + "loss": 17.292, + "step": 17842 + }, + { + "epoch": 0.3261557021953315, + "grad_norm": 5.815565367184524, + "learning_rate": 7.871195956378985e-06, + "loss": 17.4709, + "step": 17843 + }, + { + "epoch": 0.326173981391778, + "grad_norm": 5.669661110095847, + "learning_rate": 7.870953610378231e-06, + "loss": 17.2476, + "step": 17844 + }, + { + "epoch": 0.32619226058822454, + "grad_norm": 6.000059489859444, + "learning_rate": 7.870711254315031e-06, + "loss": 17.153, + "step": 17845 + }, + { + "epoch": 0.32621053978467107, + "grad_norm": 6.574262859048824, + "learning_rate": 7.870468888190239e-06, + "loss": 17.281, + "step": 17846 + }, + { + "epoch": 0.3262288189811176, + "grad_norm": 6.777443581943335, + "learning_rate": 7.870226512004704e-06, + "loss": 17.566, + "step": 17847 + }, + { + "epoch": 0.3262470981775641, + "grad_norm": 7.712616665092374, + "learning_rate": 7.869984125759272e-06, + "loss": 18.2918, + "step": 17848 + }, + { + "epoch": 0.3262653773740106, + "grad_norm": 7.503483333333809, + "learning_rate": 7.869741729454797e-06, + "loss": 17.9509, + "step": 17849 + }, + { + "epoch": 0.32628365657045716, + "grad_norm": 6.6588036093839165, + "learning_rate": 7.869499323092122e-06, + "loss": 17.5324, + "step": 17850 + }, + { + "epoch": 0.3263019357669037, + "grad_norm": 6.430826262770365, + "learning_rate": 7.869256906672104e-06, + "loss": 17.451, + "step": 17851 + }, + { + "epoch": 0.32632021496335023, + "grad_norm": 7.02650368230658, + "learning_rate": 7.869014480195589e-06, + "loss": 17.9679, + "step": 17852 + }, + { + "epoch": 0.3263384941597967, + "grad_norm": 6.164166018539878, + "learning_rate": 7.868772043663429e-06, + "loss": 17.3597, + "step": 17853 + }, + { + "epoch": 0.32635677335624325, + "grad_norm": 6.413715747359366, + "learning_rate": 7.868529597076469e-06, + "loss": 17.8651, + "step": 17854 + }, + { + "epoch": 0.3263750525526898, + "grad_norm": 6.470010343951983, + "learning_rate": 7.868287140435564e-06, + "loss": 17.5322, + "step": 17855 + }, + { + "epoch": 0.3263933317491363, + "grad_norm": 6.276310738559672, + "learning_rate": 7.86804467374156e-06, + "loss": 17.6477, + "step": 17856 + }, + { + "epoch": 0.32641161094558285, + "grad_norm": 6.24266804974688, + "learning_rate": 7.867802196995308e-06, + "loss": 17.4857, + "step": 17857 + }, + { + "epoch": 0.32642989014202933, + "grad_norm": 6.496002462917033, + "learning_rate": 7.867559710197658e-06, + "loss": 17.4417, + "step": 17858 + }, + { + "epoch": 0.32644816933847587, + "grad_norm": 7.161516651354113, + "learning_rate": 7.867317213349461e-06, + "loss": 18.0931, + "step": 17859 + }, + { + "epoch": 0.3264664485349224, + "grad_norm": 6.511777079840866, + "learning_rate": 7.867074706451567e-06, + "loss": 17.4908, + "step": 17860 + }, + { + "epoch": 0.32648472773136894, + "grad_norm": 7.188666024752628, + "learning_rate": 7.866832189504823e-06, + "loss": 17.5326, + "step": 17861 + }, + { + "epoch": 0.3265030069278155, + "grad_norm": 7.008425777054819, + "learning_rate": 7.866589662510083e-06, + "loss": 17.9847, + "step": 17862 + }, + { + "epoch": 0.32652128612426196, + "grad_norm": 7.068713305265701, + "learning_rate": 7.866347125468192e-06, + "loss": 17.8885, + "step": 17863 + }, + { + "epoch": 0.3265395653207085, + "grad_norm": 5.917749223184772, + "learning_rate": 7.866104578380005e-06, + "loss": 17.7064, + "step": 17864 + }, + { + "epoch": 0.326557844517155, + "grad_norm": 6.621049855079859, + "learning_rate": 7.86586202124637e-06, + "loss": 17.6552, + "step": 17865 + }, + { + "epoch": 0.32657612371360156, + "grad_norm": 7.758840166314253, + "learning_rate": 7.865619454068137e-06, + "loss": 17.9226, + "step": 17866 + }, + { + "epoch": 0.3265944029100481, + "grad_norm": 7.331408698946266, + "learning_rate": 7.865376876846158e-06, + "loss": 17.9984, + "step": 17867 + }, + { + "epoch": 0.3266126821064946, + "grad_norm": 6.837829300824041, + "learning_rate": 7.86513428958128e-06, + "loss": 17.4893, + "step": 17868 + }, + { + "epoch": 0.3266309613029411, + "grad_norm": 6.4049637784235545, + "learning_rate": 7.864891692274355e-06, + "loss": 17.5133, + "step": 17869 + }, + { + "epoch": 0.32664924049938765, + "grad_norm": 6.273073302128767, + "learning_rate": 7.864649084926232e-06, + "loss": 17.2967, + "step": 17870 + }, + { + "epoch": 0.3266675196958342, + "grad_norm": 6.344309302924921, + "learning_rate": 7.864406467537764e-06, + "loss": 17.439, + "step": 17871 + }, + { + "epoch": 0.3266857988922807, + "grad_norm": 6.449355080338526, + "learning_rate": 7.864163840109802e-06, + "loss": 17.1538, + "step": 17872 + }, + { + "epoch": 0.3267040780887272, + "grad_norm": 5.920602117673282, + "learning_rate": 7.863921202643192e-06, + "loss": 17.258, + "step": 17873 + }, + { + "epoch": 0.32672235728517374, + "grad_norm": 6.259629210157948, + "learning_rate": 7.863678555138786e-06, + "loss": 17.3877, + "step": 17874 + }, + { + "epoch": 0.3267406364816203, + "grad_norm": 6.69678860967342, + "learning_rate": 7.863435897597437e-06, + "loss": 17.802, + "step": 17875 + }, + { + "epoch": 0.3267589156780668, + "grad_norm": 6.340828147922386, + "learning_rate": 7.863193230019991e-06, + "loss": 17.6317, + "step": 17876 + }, + { + "epoch": 0.32677719487451334, + "grad_norm": 6.991458089088453, + "learning_rate": 7.862950552407304e-06, + "loss": 18.0541, + "step": 17877 + }, + { + "epoch": 0.3267954740709598, + "grad_norm": 6.0945809284360495, + "learning_rate": 7.862707864760225e-06, + "loss": 17.2963, + "step": 17878 + }, + { + "epoch": 0.32681375326740636, + "grad_norm": 7.682440158745301, + "learning_rate": 7.862465167079599e-06, + "loss": 17.6765, + "step": 17879 + }, + { + "epoch": 0.3268320324638529, + "grad_norm": 6.554405377487383, + "learning_rate": 7.862222459366283e-06, + "loss": 17.4388, + "step": 17880 + }, + { + "epoch": 0.32685031166029943, + "grad_norm": 5.860101075469898, + "learning_rate": 7.861979741621126e-06, + "loss": 17.3062, + "step": 17881 + }, + { + "epoch": 0.3268685908567459, + "grad_norm": 5.921788599011734, + "learning_rate": 7.86173701384498e-06, + "loss": 17.421, + "step": 17882 + }, + { + "epoch": 0.32688687005319245, + "grad_norm": 6.729473933777559, + "learning_rate": 7.86149427603869e-06, + "loss": 18.0369, + "step": 17883 + }, + { + "epoch": 0.326905149249639, + "grad_norm": 7.171624055665262, + "learning_rate": 7.861251528203113e-06, + "loss": 17.7876, + "step": 17884 + }, + { + "epoch": 0.3269234284460855, + "grad_norm": 5.78811924291365, + "learning_rate": 7.861008770339098e-06, + "loss": 17.2695, + "step": 17885 + }, + { + "epoch": 0.32694170764253205, + "grad_norm": 7.386439276230386, + "learning_rate": 7.860766002447495e-06, + "loss": 17.7566, + "step": 17886 + }, + { + "epoch": 0.32695998683897853, + "grad_norm": 5.864195043080574, + "learning_rate": 7.860523224529156e-06, + "loss": 17.34, + "step": 17887 + }, + { + "epoch": 0.32697826603542507, + "grad_norm": 5.371148963013059, + "learning_rate": 7.86028043658493e-06, + "loss": 17.0583, + "step": 17888 + }, + { + "epoch": 0.3269965452318716, + "grad_norm": 7.595197440764273, + "learning_rate": 7.860037638615671e-06, + "loss": 17.8264, + "step": 17889 + }, + { + "epoch": 0.32701482442831814, + "grad_norm": 5.525909084266543, + "learning_rate": 7.859794830622227e-06, + "loss": 17.1311, + "step": 17890 + }, + { + "epoch": 0.3270331036247647, + "grad_norm": 7.350189973649129, + "learning_rate": 7.859552012605452e-06, + "loss": 18.1613, + "step": 17891 + }, + { + "epoch": 0.32705138282121116, + "grad_norm": 6.313772679875273, + "learning_rate": 7.859309184566193e-06, + "loss": 17.7864, + "step": 17892 + }, + { + "epoch": 0.3270696620176577, + "grad_norm": 5.807894059462818, + "learning_rate": 7.859066346505305e-06, + "loss": 17.3594, + "step": 17893 + }, + { + "epoch": 0.3270879412141042, + "grad_norm": 5.577269715782767, + "learning_rate": 7.858823498423637e-06, + "loss": 17.5178, + "step": 17894 + }, + { + "epoch": 0.32710622041055076, + "grad_norm": 5.881722427483263, + "learning_rate": 7.85858064032204e-06, + "loss": 17.5096, + "step": 17895 + }, + { + "epoch": 0.3271244996069973, + "grad_norm": 7.551792391781151, + "learning_rate": 7.858337772201368e-06, + "loss": 18.3451, + "step": 17896 + }, + { + "epoch": 0.3271427788034438, + "grad_norm": 7.199569481845562, + "learning_rate": 7.858094894062468e-06, + "loss": 17.7249, + "step": 17897 + }, + { + "epoch": 0.3271610579998903, + "grad_norm": 6.221756197581803, + "learning_rate": 7.857852005906195e-06, + "loss": 17.5991, + "step": 17898 + }, + { + "epoch": 0.32717933719633685, + "grad_norm": 7.165307209689358, + "learning_rate": 7.857609107733398e-06, + "loss": 17.9371, + "step": 17899 + }, + { + "epoch": 0.3271976163927834, + "grad_norm": 6.4143827619662614, + "learning_rate": 7.857366199544929e-06, + "loss": 17.3668, + "step": 17900 + }, + { + "epoch": 0.3272158955892299, + "grad_norm": 6.634976745565556, + "learning_rate": 7.857123281341639e-06, + "loss": 17.635, + "step": 17901 + }, + { + "epoch": 0.3272341747856764, + "grad_norm": 6.445272136131572, + "learning_rate": 7.85688035312438e-06, + "loss": 17.7784, + "step": 17902 + }, + { + "epoch": 0.32725245398212294, + "grad_norm": 6.627760389550447, + "learning_rate": 7.856637414894003e-06, + "loss": 17.7706, + "step": 17903 + }, + { + "epoch": 0.3272707331785695, + "grad_norm": 7.047530424947791, + "learning_rate": 7.85639446665136e-06, + "loss": 17.7615, + "step": 17904 + }, + { + "epoch": 0.327289012375016, + "grad_norm": 6.242491228600791, + "learning_rate": 7.856151508397303e-06, + "loss": 17.2843, + "step": 17905 + }, + { + "epoch": 0.32730729157146254, + "grad_norm": 7.56601766773708, + "learning_rate": 7.855908540132682e-06, + "loss": 18.0979, + "step": 17906 + }, + { + "epoch": 0.327325570767909, + "grad_norm": 6.057493735827574, + "learning_rate": 7.85566556185835e-06, + "loss": 17.554, + "step": 17907 + }, + { + "epoch": 0.32734384996435556, + "grad_norm": 6.36774798328385, + "learning_rate": 7.855422573575158e-06, + "loss": 17.4506, + "step": 17908 + }, + { + "epoch": 0.3273621291608021, + "grad_norm": 5.150073712766874, + "learning_rate": 7.855179575283958e-06, + "loss": 16.9747, + "step": 17909 + }, + { + "epoch": 0.32738040835724863, + "grad_norm": 6.624202698198208, + "learning_rate": 7.8549365669856e-06, + "loss": 17.5623, + "step": 17910 + }, + { + "epoch": 0.32739868755369517, + "grad_norm": 6.897513006872821, + "learning_rate": 7.854693548680939e-06, + "loss": 17.84, + "step": 17911 + }, + { + "epoch": 0.32741696675014165, + "grad_norm": 6.807505608479961, + "learning_rate": 7.854450520370823e-06, + "loss": 17.7713, + "step": 17912 + }, + { + "epoch": 0.3274352459465882, + "grad_norm": 8.995518507179327, + "learning_rate": 7.854207482056106e-06, + "loss": 18.9268, + "step": 17913 + }, + { + "epoch": 0.3274535251430347, + "grad_norm": 6.933558453854653, + "learning_rate": 7.85396443373764e-06, + "loss": 17.8008, + "step": 17914 + }, + { + "epoch": 0.32747180433948125, + "grad_norm": 6.034555834357745, + "learning_rate": 7.853721375416276e-06, + "loss": 17.4894, + "step": 17915 + }, + { + "epoch": 0.32749008353592773, + "grad_norm": 6.874334781188533, + "learning_rate": 7.853478307092867e-06, + "loss": 17.7075, + "step": 17916 + }, + { + "epoch": 0.32750836273237427, + "grad_norm": 6.582386838434732, + "learning_rate": 7.853235228768263e-06, + "loss": 17.7948, + "step": 17917 + }, + { + "epoch": 0.3275266419288208, + "grad_norm": 5.708070149466229, + "learning_rate": 7.852992140443317e-06, + "loss": 17.3767, + "step": 17918 + }, + { + "epoch": 0.32754492112526734, + "grad_norm": 5.705497787883412, + "learning_rate": 7.852749042118882e-06, + "loss": 17.1096, + "step": 17919 + }, + { + "epoch": 0.3275632003217139, + "grad_norm": 6.898629192703969, + "learning_rate": 7.85250593379581e-06, + "loss": 17.7545, + "step": 17920 + }, + { + "epoch": 0.32758147951816036, + "grad_norm": 5.666294156284441, + "learning_rate": 7.85226281547495e-06, + "loss": 17.4826, + "step": 17921 + }, + { + "epoch": 0.3275997587146069, + "grad_norm": 5.759539364835163, + "learning_rate": 7.852019687157158e-06, + "loss": 17.1894, + "step": 17922 + }, + { + "epoch": 0.32761803791105343, + "grad_norm": 6.0697805940568275, + "learning_rate": 7.851776548843285e-06, + "loss": 17.3675, + "step": 17923 + }, + { + "epoch": 0.32763631710749996, + "grad_norm": 6.1392823783558494, + "learning_rate": 7.851533400534179e-06, + "loss": 17.5242, + "step": 17924 + }, + { + "epoch": 0.3276545963039465, + "grad_norm": 8.411186952250112, + "learning_rate": 7.8512902422307e-06, + "loss": 18.1547, + "step": 17925 + }, + { + "epoch": 0.327672875500393, + "grad_norm": 8.022695074463817, + "learning_rate": 7.851047073933693e-06, + "loss": 17.9314, + "step": 17926 + }, + { + "epoch": 0.3276911546968395, + "grad_norm": 5.887575318032095, + "learning_rate": 7.850803895644017e-06, + "loss": 17.4752, + "step": 17927 + }, + { + "epoch": 0.32770943389328605, + "grad_norm": 7.175985913344132, + "learning_rate": 7.850560707362518e-06, + "loss": 17.9756, + "step": 17928 + }, + { + "epoch": 0.3277277130897326, + "grad_norm": 6.701434753654602, + "learning_rate": 7.85031750909005e-06, + "loss": 17.622, + "step": 17929 + }, + { + "epoch": 0.3277459922861791, + "grad_norm": 5.9431661773785684, + "learning_rate": 7.85007430082747e-06, + "loss": 17.4444, + "step": 17930 + }, + { + "epoch": 0.3277642714826256, + "grad_norm": 5.460614712466154, + "learning_rate": 7.849831082575625e-06, + "loss": 17.2491, + "step": 17931 + }, + { + "epoch": 0.32778255067907214, + "grad_norm": 6.449065531617945, + "learning_rate": 7.849587854335369e-06, + "loss": 17.6549, + "step": 17932 + }, + { + "epoch": 0.3278008298755187, + "grad_norm": 6.505054635099538, + "learning_rate": 7.849344616107554e-06, + "loss": 17.4446, + "step": 17933 + }, + { + "epoch": 0.3278191090719652, + "grad_norm": 6.711412197228129, + "learning_rate": 7.849101367893037e-06, + "loss": 17.407, + "step": 17934 + }, + { + "epoch": 0.32783738826841174, + "grad_norm": 7.0024387822301275, + "learning_rate": 7.848858109692663e-06, + "loss": 17.6073, + "step": 17935 + }, + { + "epoch": 0.3278556674648582, + "grad_norm": 5.9135186739116845, + "learning_rate": 7.84861484150729e-06, + "loss": 17.4377, + "step": 17936 + }, + { + "epoch": 0.32787394666130476, + "grad_norm": 7.451437561929218, + "learning_rate": 7.848371563337771e-06, + "loss": 17.9172, + "step": 17937 + }, + { + "epoch": 0.3278922258577513, + "grad_norm": 7.453435976355388, + "learning_rate": 7.848128275184954e-06, + "loss": 17.9466, + "step": 17938 + }, + { + "epoch": 0.32791050505419783, + "grad_norm": 6.787862427824031, + "learning_rate": 7.847884977049695e-06, + "loss": 17.8055, + "step": 17939 + }, + { + "epoch": 0.32792878425064437, + "grad_norm": 5.684888889559415, + "learning_rate": 7.847641668932848e-06, + "loss": 17.0676, + "step": 17940 + }, + { + "epoch": 0.32794706344709085, + "grad_norm": 6.670039474136435, + "learning_rate": 7.847398350835263e-06, + "loss": 17.7334, + "step": 17941 + }, + { + "epoch": 0.3279653426435374, + "grad_norm": 6.450659337918838, + "learning_rate": 7.847155022757793e-06, + "loss": 17.2774, + "step": 17942 + }, + { + "epoch": 0.3279836218399839, + "grad_norm": 7.099834262384581, + "learning_rate": 7.846911684701293e-06, + "loss": 18.0099, + "step": 17943 + }, + { + "epoch": 0.32800190103643045, + "grad_norm": 8.16204715184103, + "learning_rate": 7.846668336666616e-06, + "loss": 17.9374, + "step": 17944 + }, + { + "epoch": 0.328020180232877, + "grad_norm": 5.679050610946927, + "learning_rate": 7.84642497865461e-06, + "loss": 17.0642, + "step": 17945 + }, + { + "epoch": 0.32803845942932347, + "grad_norm": 6.36842780726683, + "learning_rate": 7.846181610666134e-06, + "loss": 17.358, + "step": 17946 + }, + { + "epoch": 0.32805673862577, + "grad_norm": 7.540779931147121, + "learning_rate": 7.845938232702037e-06, + "loss": 18.0322, + "step": 17947 + }, + { + "epoch": 0.32807501782221654, + "grad_norm": 6.828821345241826, + "learning_rate": 7.845694844763174e-06, + "loss": 17.5905, + "step": 17948 + }, + { + "epoch": 0.3280932970186631, + "grad_norm": 6.156397289374125, + "learning_rate": 7.845451446850399e-06, + "loss": 17.3741, + "step": 17949 + }, + { + "epoch": 0.32811157621510956, + "grad_norm": 8.289089484106746, + "learning_rate": 7.845208038964562e-06, + "loss": 17.8755, + "step": 17950 + }, + { + "epoch": 0.3281298554115561, + "grad_norm": 6.832392891004793, + "learning_rate": 7.844964621106518e-06, + "loss": 17.6991, + "step": 17951 + }, + { + "epoch": 0.32814813460800263, + "grad_norm": 5.67463719989777, + "learning_rate": 7.844721193277118e-06, + "loss": 17.4293, + "step": 17952 + }, + { + "epoch": 0.32816641380444916, + "grad_norm": 6.881617909594826, + "learning_rate": 7.84447775547722e-06, + "loss": 17.6258, + "step": 17953 + }, + { + "epoch": 0.3281846930008957, + "grad_norm": 6.887026728402564, + "learning_rate": 7.844234307707673e-06, + "loss": 17.658, + "step": 17954 + }, + { + "epoch": 0.3282029721973422, + "grad_norm": 6.028614365375526, + "learning_rate": 7.843990849969332e-06, + "loss": 17.4576, + "step": 17955 + }, + { + "epoch": 0.3282212513937887, + "grad_norm": 6.630595707426664, + "learning_rate": 7.84374738226305e-06, + "loss": 17.7347, + "step": 17956 + }, + { + "epoch": 0.32823953059023525, + "grad_norm": 6.54165765514594, + "learning_rate": 7.843503904589678e-06, + "loss": 17.6441, + "step": 17957 + }, + { + "epoch": 0.3282578097866818, + "grad_norm": 7.652646522711401, + "learning_rate": 7.843260416950073e-06, + "loss": 18.535, + "step": 17958 + }, + { + "epoch": 0.3282760889831283, + "grad_norm": 6.696123866533352, + "learning_rate": 7.843016919345088e-06, + "loss": 17.6987, + "step": 17959 + }, + { + "epoch": 0.3282943681795748, + "grad_norm": 6.198056922095434, + "learning_rate": 7.842773411775575e-06, + "loss": 17.2978, + "step": 17960 + }, + { + "epoch": 0.32831264737602134, + "grad_norm": 9.442932381923036, + "learning_rate": 7.842529894242387e-06, + "loss": 18.1727, + "step": 17961 + }, + { + "epoch": 0.3283309265724679, + "grad_norm": 6.890377807218648, + "learning_rate": 7.84228636674638e-06, + "loss": 17.2305, + "step": 17962 + }, + { + "epoch": 0.3283492057689144, + "grad_norm": 7.8386009702386445, + "learning_rate": 7.842042829288405e-06, + "loss": 18.2672, + "step": 17963 + }, + { + "epoch": 0.32836748496536095, + "grad_norm": 5.3862193100565, + "learning_rate": 7.841799281869316e-06, + "loss": 16.9004, + "step": 17964 + }, + { + "epoch": 0.3283857641618074, + "grad_norm": 6.426333063664287, + "learning_rate": 7.841555724489968e-06, + "loss": 17.3328, + "step": 17965 + }, + { + "epoch": 0.32840404335825396, + "grad_norm": 6.454542323521426, + "learning_rate": 7.841312157151214e-06, + "loss": 17.7325, + "step": 17966 + }, + { + "epoch": 0.3284223225547005, + "grad_norm": 6.364717940158, + "learning_rate": 7.841068579853905e-06, + "loss": 17.4542, + "step": 17967 + }, + { + "epoch": 0.32844060175114703, + "grad_norm": 8.143825905159051, + "learning_rate": 7.8408249925989e-06, + "loss": 18.5515, + "step": 17968 + }, + { + "epoch": 0.32845888094759357, + "grad_norm": 6.231896144861432, + "learning_rate": 7.840581395387049e-06, + "loss": 17.5316, + "step": 17969 + }, + { + "epoch": 0.32847716014404005, + "grad_norm": 6.312618339014489, + "learning_rate": 7.840337788219206e-06, + "loss": 17.3823, + "step": 17970 + }, + { + "epoch": 0.3284954393404866, + "grad_norm": 7.612973403971207, + "learning_rate": 7.840094171096227e-06, + "loss": 17.9676, + "step": 17971 + }, + { + "epoch": 0.3285137185369331, + "grad_norm": 7.0474817026375955, + "learning_rate": 7.839850544018963e-06, + "loss": 17.5598, + "step": 17972 + }, + { + "epoch": 0.32853199773337965, + "grad_norm": 7.233474172101555, + "learning_rate": 7.839606906988269e-06, + "loss": 17.6773, + "step": 17973 + }, + { + "epoch": 0.3285502769298262, + "grad_norm": 7.354578322300646, + "learning_rate": 7.839363260005e-06, + "loss": 18.029, + "step": 17974 + }, + { + "epoch": 0.32856855612627267, + "grad_norm": 6.074226772905845, + "learning_rate": 7.839119603070009e-06, + "loss": 17.4109, + "step": 17975 + }, + { + "epoch": 0.3285868353227192, + "grad_norm": 6.238306304417973, + "learning_rate": 7.83887593618415e-06, + "loss": 17.4351, + "step": 17976 + }, + { + "epoch": 0.32860511451916574, + "grad_norm": 7.7263793962581655, + "learning_rate": 7.838632259348277e-06, + "loss": 18.1237, + "step": 17977 + }, + { + "epoch": 0.3286233937156123, + "grad_norm": 7.757291919193158, + "learning_rate": 7.838388572563245e-06, + "loss": 18.1212, + "step": 17978 + }, + { + "epoch": 0.3286416729120588, + "grad_norm": 5.371021798274012, + "learning_rate": 7.838144875829907e-06, + "loss": 17.1414, + "step": 17979 + }, + { + "epoch": 0.3286599521085053, + "grad_norm": 5.787143922859928, + "learning_rate": 7.837901169149118e-06, + "loss": 17.484, + "step": 17980 + }, + { + "epoch": 0.32867823130495183, + "grad_norm": 5.58306720618807, + "learning_rate": 7.837657452521731e-06, + "loss": 17.1153, + "step": 17981 + }, + { + "epoch": 0.32869651050139836, + "grad_norm": 6.215042796250209, + "learning_rate": 7.837413725948601e-06, + "loss": 17.2298, + "step": 17982 + }, + { + "epoch": 0.3287147896978449, + "grad_norm": 6.972794338186463, + "learning_rate": 7.837169989430582e-06, + "loss": 17.7384, + "step": 17983 + }, + { + "epoch": 0.3287330688942914, + "grad_norm": 5.574196475197414, + "learning_rate": 7.836926242968528e-06, + "loss": 17.1791, + "step": 17984 + }, + { + "epoch": 0.3287513480907379, + "grad_norm": 6.929002251117667, + "learning_rate": 7.836682486563296e-06, + "loss": 17.7906, + "step": 17985 + }, + { + "epoch": 0.32876962728718445, + "grad_norm": 6.137476365263559, + "learning_rate": 7.836438720215736e-06, + "loss": 17.0248, + "step": 17986 + }, + { + "epoch": 0.328787906483631, + "grad_norm": 6.542484529930226, + "learning_rate": 7.836194943926704e-06, + "loss": 17.8205, + "step": 17987 + }, + { + "epoch": 0.3288061856800775, + "grad_norm": 6.055576458847637, + "learning_rate": 7.835951157697055e-06, + "loss": 17.1962, + "step": 17988 + }, + { + "epoch": 0.328824464876524, + "grad_norm": 5.382599772472619, + "learning_rate": 7.835707361527644e-06, + "loss": 17.0756, + "step": 17989 + }, + { + "epoch": 0.32884274407297054, + "grad_norm": 8.919955577730727, + "learning_rate": 7.835463555419324e-06, + "loss": 18.5505, + "step": 17990 + }, + { + "epoch": 0.3288610232694171, + "grad_norm": 6.047143064185945, + "learning_rate": 7.835219739372952e-06, + "loss": 17.4779, + "step": 17991 + }, + { + "epoch": 0.3288793024658636, + "grad_norm": 7.568418447047422, + "learning_rate": 7.83497591338938e-06, + "loss": 17.8594, + "step": 17992 + }, + { + "epoch": 0.32889758166231015, + "grad_norm": 6.442770499078313, + "learning_rate": 7.834732077469463e-06, + "loss": 17.6591, + "step": 17993 + }, + { + "epoch": 0.3289158608587566, + "grad_norm": 5.93445607341471, + "learning_rate": 7.834488231614056e-06, + "loss": 17.4359, + "step": 17994 + }, + { + "epoch": 0.32893414005520316, + "grad_norm": 5.694514030385999, + "learning_rate": 7.834244375824013e-06, + "loss": 17.1629, + "step": 17995 + }, + { + "epoch": 0.3289524192516497, + "grad_norm": 7.673753865343592, + "learning_rate": 7.834000510100192e-06, + "loss": 17.9861, + "step": 17996 + }, + { + "epoch": 0.32897069844809623, + "grad_norm": 5.615634763335271, + "learning_rate": 7.833756634443442e-06, + "loss": 17.3676, + "step": 17997 + }, + { + "epoch": 0.32898897764454277, + "grad_norm": 6.668802536505096, + "learning_rate": 7.833512748854622e-06, + "loss": 17.5947, + "step": 17998 + }, + { + "epoch": 0.32900725684098925, + "grad_norm": 6.918821112867181, + "learning_rate": 7.833268853334587e-06, + "loss": 17.8065, + "step": 17999 + }, + { + "epoch": 0.3290255360374358, + "grad_norm": 6.840251713117992, + "learning_rate": 7.83302494788419e-06, + "loss": 17.9558, + "step": 18000 + }, + { + "epoch": 0.3290438152338823, + "grad_norm": 6.965153888851557, + "learning_rate": 7.832781032504285e-06, + "loss": 17.3834, + "step": 18001 + }, + { + "epoch": 0.32906209443032886, + "grad_norm": 6.2277298638382845, + "learning_rate": 7.832537107195729e-06, + "loss": 17.7506, + "step": 18002 + }, + { + "epoch": 0.3290803736267754, + "grad_norm": 7.37426000360595, + "learning_rate": 7.832293171959378e-06, + "loss": 17.9153, + "step": 18003 + }, + { + "epoch": 0.32909865282322187, + "grad_norm": 6.85056256944876, + "learning_rate": 7.832049226796083e-06, + "loss": 17.7379, + "step": 18004 + }, + { + "epoch": 0.3291169320196684, + "grad_norm": 6.888727972434795, + "learning_rate": 7.831805271706701e-06, + "loss": 17.7978, + "step": 18005 + }, + { + "epoch": 0.32913521121611494, + "grad_norm": 5.792005203325986, + "learning_rate": 7.83156130669209e-06, + "loss": 17.4617, + "step": 18006 + }, + { + "epoch": 0.3291534904125615, + "grad_norm": 6.5035961323595215, + "learning_rate": 7.831317331753099e-06, + "loss": 17.6357, + "step": 18007 + }, + { + "epoch": 0.329171769609008, + "grad_norm": 7.231552584455718, + "learning_rate": 7.831073346890588e-06, + "loss": 18.0991, + "step": 18008 + }, + { + "epoch": 0.3291900488054545, + "grad_norm": 5.947293522693324, + "learning_rate": 7.830829352105412e-06, + "loss": 17.2566, + "step": 18009 + }, + { + "epoch": 0.32920832800190103, + "grad_norm": 5.742591906200253, + "learning_rate": 7.830585347398423e-06, + "loss": 17.2973, + "step": 18010 + }, + { + "epoch": 0.32922660719834757, + "grad_norm": 7.13675380760017, + "learning_rate": 7.830341332770477e-06, + "loss": 17.7959, + "step": 18011 + }, + { + "epoch": 0.3292448863947941, + "grad_norm": 7.440563667754312, + "learning_rate": 7.830097308222431e-06, + "loss": 18.1438, + "step": 18012 + }, + { + "epoch": 0.32926316559124064, + "grad_norm": 6.20282260730255, + "learning_rate": 7.829853273755142e-06, + "loss": 17.369, + "step": 18013 + }, + { + "epoch": 0.3292814447876871, + "grad_norm": 5.730776245466025, + "learning_rate": 7.82960922936946e-06, + "loss": 17.1559, + "step": 18014 + }, + { + "epoch": 0.32929972398413365, + "grad_norm": 6.100881290165984, + "learning_rate": 7.829365175066244e-06, + "loss": 17.5479, + "step": 18015 + }, + { + "epoch": 0.3293180031805802, + "grad_norm": 6.817386754238109, + "learning_rate": 7.82912111084635e-06, + "loss": 17.8857, + "step": 18016 + }, + { + "epoch": 0.3293362823770267, + "grad_norm": 7.652466360647009, + "learning_rate": 7.828877036710629e-06, + "loss": 17.7605, + "step": 18017 + }, + { + "epoch": 0.3293545615734732, + "grad_norm": 6.025793143133479, + "learning_rate": 7.828632952659942e-06, + "loss": 17.3684, + "step": 18018 + }, + { + "epoch": 0.32937284076991974, + "grad_norm": 5.6804646863167445, + "learning_rate": 7.82838885869514e-06, + "loss": 17.1107, + "step": 18019 + }, + { + "epoch": 0.3293911199663663, + "grad_norm": 6.310527485746265, + "learning_rate": 7.828144754817083e-06, + "loss": 17.2968, + "step": 18020 + }, + { + "epoch": 0.3294093991628128, + "grad_norm": 6.532576936114714, + "learning_rate": 7.827900641026622e-06, + "loss": 17.3347, + "step": 18021 + }, + { + "epoch": 0.32942767835925935, + "grad_norm": 5.82792366400588, + "learning_rate": 7.827656517324616e-06, + "loss": 17.3119, + "step": 18022 + }, + { + "epoch": 0.3294459575557058, + "grad_norm": 6.7218850160504235, + "learning_rate": 7.827412383711919e-06, + "loss": 17.6072, + "step": 18023 + }, + { + "epoch": 0.32946423675215236, + "grad_norm": 5.941080781099494, + "learning_rate": 7.827168240189385e-06, + "loss": 17.3296, + "step": 18024 + }, + { + "epoch": 0.3294825159485989, + "grad_norm": 5.940669637323221, + "learning_rate": 7.826924086757873e-06, + "loss": 17.3381, + "step": 18025 + }, + { + "epoch": 0.32950079514504543, + "grad_norm": 6.334301727038886, + "learning_rate": 7.826679923418238e-06, + "loss": 17.4331, + "step": 18026 + }, + { + "epoch": 0.32951907434149197, + "grad_norm": 6.434928643502983, + "learning_rate": 7.826435750171336e-06, + "loss": 17.6518, + "step": 18027 + }, + { + "epoch": 0.32953735353793845, + "grad_norm": 5.795639760284889, + "learning_rate": 7.82619156701802e-06, + "loss": 17.3047, + "step": 18028 + }, + { + "epoch": 0.329555632734385, + "grad_norm": 6.196272877079253, + "learning_rate": 7.825947373959147e-06, + "loss": 17.5228, + "step": 18029 + }, + { + "epoch": 0.3295739119308315, + "grad_norm": 5.278615878184858, + "learning_rate": 7.825703170995576e-06, + "loss": 17.0388, + "step": 18030 + }, + { + "epoch": 0.32959219112727806, + "grad_norm": 7.92969487264405, + "learning_rate": 7.82545895812816e-06, + "loss": 17.9639, + "step": 18031 + }, + { + "epoch": 0.3296104703237246, + "grad_norm": 7.482819457997297, + "learning_rate": 7.825214735357754e-06, + "loss": 17.8671, + "step": 18032 + }, + { + "epoch": 0.32962874952017107, + "grad_norm": 5.786874699951977, + "learning_rate": 7.824970502685218e-06, + "loss": 17.2433, + "step": 18033 + }, + { + "epoch": 0.3296470287166176, + "grad_norm": 6.134673803576229, + "learning_rate": 7.824726260111403e-06, + "loss": 17.4821, + "step": 18034 + }, + { + "epoch": 0.32966530791306414, + "grad_norm": 6.769792504409528, + "learning_rate": 7.824482007637171e-06, + "loss": 17.7747, + "step": 18035 + }, + { + "epoch": 0.3296835871095107, + "grad_norm": 6.461015525984091, + "learning_rate": 7.824237745263372e-06, + "loss": 17.3478, + "step": 18036 + }, + { + "epoch": 0.3297018663059572, + "grad_norm": 8.139266660952575, + "learning_rate": 7.823993472990866e-06, + "loss": 18.0593, + "step": 18037 + }, + { + "epoch": 0.3297201455024037, + "grad_norm": 5.728976451364784, + "learning_rate": 7.823749190820507e-06, + "loss": 17.1549, + "step": 18038 + }, + { + "epoch": 0.32973842469885023, + "grad_norm": 6.000148853719646, + "learning_rate": 7.823504898753152e-06, + "loss": 17.5285, + "step": 18039 + }, + { + "epoch": 0.32975670389529677, + "grad_norm": 5.398455405227888, + "learning_rate": 7.823260596789657e-06, + "loss": 17.1715, + "step": 18040 + }, + { + "epoch": 0.3297749830917433, + "grad_norm": 7.0204903803390675, + "learning_rate": 7.82301628493088e-06, + "loss": 17.6809, + "step": 18041 + }, + { + "epoch": 0.32979326228818984, + "grad_norm": 6.668100476576282, + "learning_rate": 7.822771963177676e-06, + "loss": 17.872, + "step": 18042 + }, + { + "epoch": 0.3298115414846363, + "grad_norm": 7.371328529126587, + "learning_rate": 7.822527631530898e-06, + "loss": 18.0666, + "step": 18043 + }, + { + "epoch": 0.32982982068108285, + "grad_norm": 8.178018564594312, + "learning_rate": 7.822283289991411e-06, + "loss": 18.3436, + "step": 18044 + }, + { + "epoch": 0.3298480998775294, + "grad_norm": 11.580222952895815, + "learning_rate": 7.822038938560061e-06, + "loss": 18.7833, + "step": 18045 + }, + { + "epoch": 0.3298663790739759, + "grad_norm": 7.480236483365055, + "learning_rate": 7.82179457723771e-06, + "loss": 17.4975, + "step": 18046 + }, + { + "epoch": 0.32988465827042246, + "grad_norm": 6.546495003471312, + "learning_rate": 7.821550206025218e-06, + "loss": 17.6675, + "step": 18047 + }, + { + "epoch": 0.32990293746686894, + "grad_norm": 7.540433245344673, + "learning_rate": 7.821305824923434e-06, + "loss": 17.9526, + "step": 18048 + }, + { + "epoch": 0.3299212166633155, + "grad_norm": 6.398733916800262, + "learning_rate": 7.821061433933217e-06, + "loss": 17.98, + "step": 18049 + }, + { + "epoch": 0.329939495859762, + "grad_norm": 6.002220326238243, + "learning_rate": 7.820817033055426e-06, + "loss": 17.5718, + "step": 18050 + }, + { + "epoch": 0.32995777505620855, + "grad_norm": 5.524521422577539, + "learning_rate": 7.820572622290916e-06, + "loss": 17.1192, + "step": 18051 + }, + { + "epoch": 0.329976054252655, + "grad_norm": 7.543780459250276, + "learning_rate": 7.820328201640545e-06, + "loss": 17.9918, + "step": 18052 + }, + { + "epoch": 0.32999433344910156, + "grad_norm": 7.530696739431696, + "learning_rate": 7.820083771105166e-06, + "loss": 18.4403, + "step": 18053 + }, + { + "epoch": 0.3300126126455481, + "grad_norm": 6.880084319685398, + "learning_rate": 7.819839330685641e-06, + "loss": 17.7529, + "step": 18054 + }, + { + "epoch": 0.33003089184199463, + "grad_norm": 5.9957187798217, + "learning_rate": 7.819594880382822e-06, + "loss": 17.5537, + "step": 18055 + }, + { + "epoch": 0.33004917103844117, + "grad_norm": 8.015415195987858, + "learning_rate": 7.819350420197566e-06, + "loss": 18.0474, + "step": 18056 + }, + { + "epoch": 0.33006745023488765, + "grad_norm": 6.748262435062119, + "learning_rate": 7.819105950130734e-06, + "loss": 17.699, + "step": 18057 + }, + { + "epoch": 0.3300857294313342, + "grad_norm": 6.842770845866191, + "learning_rate": 7.81886147018318e-06, + "loss": 17.9296, + "step": 18058 + }, + { + "epoch": 0.3301040086277807, + "grad_norm": 7.048283201148179, + "learning_rate": 7.818616980355759e-06, + "loss": 18.0855, + "step": 18059 + }, + { + "epoch": 0.33012228782422726, + "grad_norm": 5.679185944674213, + "learning_rate": 7.818372480649332e-06, + "loss": 17.4917, + "step": 18060 + }, + { + "epoch": 0.3301405670206738, + "grad_norm": 8.87379436962972, + "learning_rate": 7.818127971064755e-06, + "loss": 18.5461, + "step": 18061 + }, + { + "epoch": 0.33015884621712027, + "grad_norm": 7.260242380332696, + "learning_rate": 7.817883451602884e-06, + "loss": 17.8184, + "step": 18062 + }, + { + "epoch": 0.3301771254135668, + "grad_norm": 6.120923486287874, + "learning_rate": 7.817638922264572e-06, + "loss": 17.292, + "step": 18063 + }, + { + "epoch": 0.33019540461001334, + "grad_norm": 6.204902826118205, + "learning_rate": 7.817394383050683e-06, + "loss": 17.5321, + "step": 18064 + }, + { + "epoch": 0.3302136838064599, + "grad_norm": 6.371457833218362, + "learning_rate": 7.817149833962073e-06, + "loss": 17.5532, + "step": 18065 + }, + { + "epoch": 0.3302319630029064, + "grad_norm": 6.202345362612025, + "learning_rate": 7.816905274999594e-06, + "loss": 17.5803, + "step": 18066 + }, + { + "epoch": 0.3302502421993529, + "grad_norm": 5.772474384306617, + "learning_rate": 7.816660706164107e-06, + "loss": 17.3038, + "step": 18067 + }, + { + "epoch": 0.33026852139579943, + "grad_norm": 6.2731523091285215, + "learning_rate": 7.81641612745647e-06, + "loss": 17.4522, + "step": 18068 + }, + { + "epoch": 0.33028680059224597, + "grad_norm": 6.030460793230085, + "learning_rate": 7.81617153887754e-06, + "loss": 17.4709, + "step": 18069 + }, + { + "epoch": 0.3303050797886925, + "grad_norm": 7.152867573673834, + "learning_rate": 7.815926940428169e-06, + "loss": 18.0158, + "step": 18070 + }, + { + "epoch": 0.33032335898513904, + "grad_norm": 6.988290298304303, + "learning_rate": 7.815682332109221e-06, + "loss": 18.0304, + "step": 18071 + }, + { + "epoch": 0.3303416381815855, + "grad_norm": 5.60823683949017, + "learning_rate": 7.815437713921553e-06, + "loss": 17.0719, + "step": 18072 + }, + { + "epoch": 0.33035991737803205, + "grad_norm": 6.508456643510161, + "learning_rate": 7.815193085866017e-06, + "loss": 17.7962, + "step": 18073 + }, + { + "epoch": 0.3303781965744786, + "grad_norm": 5.333315064822724, + "learning_rate": 7.814948447943474e-06, + "loss": 17.0398, + "step": 18074 + }, + { + "epoch": 0.3303964757709251, + "grad_norm": 6.06665473651248, + "learning_rate": 7.814703800154781e-06, + "loss": 17.2698, + "step": 18075 + }, + { + "epoch": 0.33041475496737166, + "grad_norm": 6.011162302757721, + "learning_rate": 7.814459142500795e-06, + "loss": 17.2969, + "step": 18076 + }, + { + "epoch": 0.33043303416381814, + "grad_norm": 6.284279519153075, + "learning_rate": 7.814214474982374e-06, + "loss": 17.3795, + "step": 18077 + }, + { + "epoch": 0.3304513133602647, + "grad_norm": 6.412640893166762, + "learning_rate": 7.813969797600377e-06, + "loss": 17.8153, + "step": 18078 + }, + { + "epoch": 0.3304695925567112, + "grad_norm": 6.086706312108125, + "learning_rate": 7.813725110355658e-06, + "loss": 17.514, + "step": 18079 + }, + { + "epoch": 0.33048787175315775, + "grad_norm": 6.369750439259867, + "learning_rate": 7.813480413249078e-06, + "loss": 17.4404, + "step": 18080 + }, + { + "epoch": 0.3305061509496043, + "grad_norm": 6.714898930401324, + "learning_rate": 7.813235706281493e-06, + "loss": 17.7209, + "step": 18081 + }, + { + "epoch": 0.33052443014605076, + "grad_norm": 5.085908452935506, + "learning_rate": 7.812990989453762e-06, + "loss": 17.2132, + "step": 18082 + }, + { + "epoch": 0.3305427093424973, + "grad_norm": 7.636258516957033, + "learning_rate": 7.81274626276674e-06, + "loss": 18.3384, + "step": 18083 + }, + { + "epoch": 0.33056098853894383, + "grad_norm": 6.3478744830122045, + "learning_rate": 7.812501526221286e-06, + "loss": 17.6347, + "step": 18084 + }, + { + "epoch": 0.33057926773539037, + "grad_norm": 6.298719686003601, + "learning_rate": 7.812256779818262e-06, + "loss": 17.2591, + "step": 18085 + }, + { + "epoch": 0.33059754693183685, + "grad_norm": 6.323332639605773, + "learning_rate": 7.812012023558517e-06, + "loss": 17.4948, + "step": 18086 + }, + { + "epoch": 0.3306158261282834, + "grad_norm": 6.769834675958703, + "learning_rate": 7.811767257442917e-06, + "loss": 17.6773, + "step": 18087 + }, + { + "epoch": 0.3306341053247299, + "grad_norm": 7.566896663553592, + "learning_rate": 7.811522481472316e-06, + "loss": 18.1479, + "step": 18088 + }, + { + "epoch": 0.33065238452117646, + "grad_norm": 7.349644500255119, + "learning_rate": 7.811277695647573e-06, + "loss": 17.9977, + "step": 18089 + }, + { + "epoch": 0.330670663717623, + "grad_norm": 6.247972811162064, + "learning_rate": 7.811032899969545e-06, + "loss": 17.5106, + "step": 18090 + }, + { + "epoch": 0.3306889429140695, + "grad_norm": 8.004581912619546, + "learning_rate": 7.81078809443909e-06, + "loss": 18.3166, + "step": 18091 + }, + { + "epoch": 0.330707222110516, + "grad_norm": 7.109596586974475, + "learning_rate": 7.810543279057068e-06, + "loss": 17.8664, + "step": 18092 + }, + { + "epoch": 0.33072550130696254, + "grad_norm": 5.738651723051288, + "learning_rate": 7.810298453824336e-06, + "loss": 17.3457, + "step": 18093 + }, + { + "epoch": 0.3307437805034091, + "grad_norm": 7.220865070391188, + "learning_rate": 7.81005361874175e-06, + "loss": 17.6067, + "step": 18094 + }, + { + "epoch": 0.3307620596998556, + "grad_norm": 8.148402427707252, + "learning_rate": 7.80980877381017e-06, + "loss": 18.197, + "step": 18095 + }, + { + "epoch": 0.3307803388963021, + "grad_norm": 5.588565291030363, + "learning_rate": 7.809563919030456e-06, + "loss": 17.1872, + "step": 18096 + }, + { + "epoch": 0.33079861809274863, + "grad_norm": 6.75662550492222, + "learning_rate": 7.809319054403463e-06, + "loss": 17.1131, + "step": 18097 + }, + { + "epoch": 0.33081689728919517, + "grad_norm": 7.431033782768452, + "learning_rate": 7.80907417993005e-06, + "loss": 17.975, + "step": 18098 + }, + { + "epoch": 0.3308351764856417, + "grad_norm": 7.685822219913446, + "learning_rate": 7.808829295611078e-06, + "loss": 17.7454, + "step": 18099 + }, + { + "epoch": 0.33085345568208824, + "grad_norm": 6.228433468252308, + "learning_rate": 7.8085844014474e-06, + "loss": 17.2792, + "step": 18100 + }, + { + "epoch": 0.3308717348785347, + "grad_norm": 7.236931279106639, + "learning_rate": 7.808339497439881e-06, + "loss": 17.8093, + "step": 18101 + }, + { + "epoch": 0.33089001407498125, + "grad_norm": 4.747904350524478, + "learning_rate": 7.808094583589372e-06, + "loss": 16.8722, + "step": 18102 + }, + { + "epoch": 0.3309082932714278, + "grad_norm": 5.943061297250326, + "learning_rate": 7.807849659896738e-06, + "loss": 17.5772, + "step": 18103 + }, + { + "epoch": 0.3309265724678743, + "grad_norm": 8.433176466603951, + "learning_rate": 7.807604726362833e-06, + "loss": 18.0154, + "step": 18104 + }, + { + "epoch": 0.33094485166432086, + "grad_norm": 7.371157211341919, + "learning_rate": 7.807359782988517e-06, + "loss": 17.5609, + "step": 18105 + }, + { + "epoch": 0.33096313086076734, + "grad_norm": 7.081159680112228, + "learning_rate": 7.80711482977465e-06, + "loss": 17.9772, + "step": 18106 + }, + { + "epoch": 0.3309814100572139, + "grad_norm": 8.509748744179188, + "learning_rate": 7.806869866722087e-06, + "loss": 18.6014, + "step": 18107 + }, + { + "epoch": 0.3309996892536604, + "grad_norm": 6.176470547847755, + "learning_rate": 7.806624893831692e-06, + "loss": 17.4069, + "step": 18108 + }, + { + "epoch": 0.33101796845010695, + "grad_norm": 5.95420010238592, + "learning_rate": 7.806379911104316e-06, + "loss": 17.4311, + "step": 18109 + }, + { + "epoch": 0.3310362476465535, + "grad_norm": 5.677335869781743, + "learning_rate": 7.806134918540825e-06, + "loss": 17.4766, + "step": 18110 + }, + { + "epoch": 0.33105452684299996, + "grad_norm": 5.634643249307986, + "learning_rate": 7.805889916142073e-06, + "loss": 17.2223, + "step": 18111 + }, + { + "epoch": 0.3310728060394465, + "grad_norm": 6.59916300705919, + "learning_rate": 7.805644903908922e-06, + "loss": 17.4556, + "step": 18112 + }, + { + "epoch": 0.33109108523589303, + "grad_norm": 5.978686613423198, + "learning_rate": 7.805399881842227e-06, + "loss": 17.1401, + "step": 18113 + }, + { + "epoch": 0.33110936443233957, + "grad_norm": 6.213357174843528, + "learning_rate": 7.805154849942851e-06, + "loss": 17.2441, + "step": 18114 + }, + { + "epoch": 0.3311276436287861, + "grad_norm": 6.805620757747374, + "learning_rate": 7.804909808211649e-06, + "loss": 17.5737, + "step": 18115 + }, + { + "epoch": 0.3311459228252326, + "grad_norm": 6.61973807849275, + "learning_rate": 7.804664756649483e-06, + "loss": 17.7328, + "step": 18116 + }, + { + "epoch": 0.3311642020216791, + "grad_norm": 7.081637541097287, + "learning_rate": 7.80441969525721e-06, + "loss": 18.1021, + "step": 18117 + }, + { + "epoch": 0.33118248121812566, + "grad_norm": 6.1570293838340024, + "learning_rate": 7.804174624035687e-06, + "loss": 17.2987, + "step": 18118 + }, + { + "epoch": 0.3312007604145722, + "grad_norm": 7.137962111086096, + "learning_rate": 7.803929542985778e-06, + "loss": 18.09, + "step": 18119 + }, + { + "epoch": 0.3312190396110187, + "grad_norm": 6.630613522949249, + "learning_rate": 7.80368445210834e-06, + "loss": 17.2999, + "step": 18120 + }, + { + "epoch": 0.3312373188074652, + "grad_norm": 6.5951159747846795, + "learning_rate": 7.80343935140423e-06, + "loss": 17.5798, + "step": 18121 + }, + { + "epoch": 0.33125559800391174, + "grad_norm": 5.678767297055565, + "learning_rate": 7.803194240874307e-06, + "loss": 17.2323, + "step": 18122 + }, + { + "epoch": 0.3312738772003583, + "grad_norm": 6.177090873093481, + "learning_rate": 7.802949120519433e-06, + "loss": 17.4886, + "step": 18123 + }, + { + "epoch": 0.3312921563968048, + "grad_norm": 6.948735445214337, + "learning_rate": 7.802703990340465e-06, + "loss": 17.9592, + "step": 18124 + }, + { + "epoch": 0.3313104355932513, + "grad_norm": 6.245303136428903, + "learning_rate": 7.802458850338262e-06, + "loss": 17.5357, + "step": 18125 + }, + { + "epoch": 0.33132871478969783, + "grad_norm": 5.0947277965446975, + "learning_rate": 7.802213700513686e-06, + "loss": 16.7877, + "step": 18126 + }, + { + "epoch": 0.33134699398614437, + "grad_norm": 6.178992615043774, + "learning_rate": 7.801968540867593e-06, + "loss": 17.5462, + "step": 18127 + }, + { + "epoch": 0.3313652731825909, + "grad_norm": 6.400873647242991, + "learning_rate": 7.801723371400842e-06, + "loss": 17.5993, + "step": 18128 + }, + { + "epoch": 0.33138355237903744, + "grad_norm": 7.180080291465306, + "learning_rate": 7.801478192114294e-06, + "loss": 17.7825, + "step": 18129 + }, + { + "epoch": 0.3314018315754839, + "grad_norm": 8.426975854213334, + "learning_rate": 7.80123300300881e-06, + "loss": 18.0174, + "step": 18130 + }, + { + "epoch": 0.33142011077193045, + "grad_norm": 9.678813909641246, + "learning_rate": 7.800987804085248e-06, + "loss": 18.6294, + "step": 18131 + }, + { + "epoch": 0.331438389968377, + "grad_norm": 6.117208571758629, + "learning_rate": 7.800742595344464e-06, + "loss": 17.3539, + "step": 18132 + }, + { + "epoch": 0.3314566691648235, + "grad_norm": 8.796654775465436, + "learning_rate": 7.800497376787322e-06, + "loss": 18.2088, + "step": 18133 + }, + { + "epoch": 0.33147494836127006, + "grad_norm": 6.591865191776443, + "learning_rate": 7.80025214841468e-06, + "loss": 17.7186, + "step": 18134 + }, + { + "epoch": 0.33149322755771654, + "grad_norm": 12.247619296007418, + "learning_rate": 7.800006910227395e-06, + "loss": 18.1093, + "step": 18135 + }, + { + "epoch": 0.3315115067541631, + "grad_norm": 6.145603212567427, + "learning_rate": 7.79976166222633e-06, + "loss": 17.3947, + "step": 18136 + }, + { + "epoch": 0.3315297859506096, + "grad_norm": 5.775018551611026, + "learning_rate": 7.799516404412344e-06, + "loss": 17.327, + "step": 18137 + }, + { + "epoch": 0.33154806514705615, + "grad_norm": 7.1323289170806365, + "learning_rate": 7.799271136786294e-06, + "loss": 17.8676, + "step": 18138 + }, + { + "epoch": 0.3315663443435027, + "grad_norm": 6.461974829962352, + "learning_rate": 7.799025859349043e-06, + "loss": 17.6025, + "step": 18139 + }, + { + "epoch": 0.33158462353994916, + "grad_norm": 7.838103949478371, + "learning_rate": 7.79878057210145e-06, + "loss": 18.3584, + "step": 18140 + }, + { + "epoch": 0.3316029027363957, + "grad_norm": 8.413702494573815, + "learning_rate": 7.798535275044374e-06, + "loss": 18.6791, + "step": 18141 + }, + { + "epoch": 0.33162118193284223, + "grad_norm": 5.8768644266598145, + "learning_rate": 7.798289968178674e-06, + "loss": 17.3626, + "step": 18142 + }, + { + "epoch": 0.33163946112928877, + "grad_norm": 5.994574320590915, + "learning_rate": 7.79804465150521e-06, + "loss": 17.5051, + "step": 18143 + }, + { + "epoch": 0.3316577403257353, + "grad_norm": 6.586087063590682, + "learning_rate": 7.797799325024842e-06, + "loss": 17.736, + "step": 18144 + }, + { + "epoch": 0.3316760195221818, + "grad_norm": 5.638852904875247, + "learning_rate": 7.797553988738432e-06, + "loss": 17.2101, + "step": 18145 + }, + { + "epoch": 0.3316942987186283, + "grad_norm": 5.736074742061226, + "learning_rate": 7.797308642646836e-06, + "loss": 17.1432, + "step": 18146 + }, + { + "epoch": 0.33171257791507486, + "grad_norm": 7.090918000663191, + "learning_rate": 7.797063286750916e-06, + "loss": 17.6971, + "step": 18147 + }, + { + "epoch": 0.3317308571115214, + "grad_norm": 6.206276526643924, + "learning_rate": 7.796817921051534e-06, + "loss": 17.3684, + "step": 18148 + }, + { + "epoch": 0.33174913630796793, + "grad_norm": 6.436787675555288, + "learning_rate": 7.796572545549546e-06, + "loss": 17.7381, + "step": 18149 + }, + { + "epoch": 0.3317674155044144, + "grad_norm": 5.870395325684857, + "learning_rate": 7.796327160245814e-06, + "loss": 17.257, + "step": 18150 + }, + { + "epoch": 0.33178569470086094, + "grad_norm": 7.163311270417848, + "learning_rate": 7.796081765141198e-06, + "loss": 17.813, + "step": 18151 + }, + { + "epoch": 0.3318039738973075, + "grad_norm": 6.897218684159557, + "learning_rate": 7.795836360236559e-06, + "loss": 17.6345, + "step": 18152 + }, + { + "epoch": 0.331822253093754, + "grad_norm": 6.524512327566224, + "learning_rate": 7.795590945532757e-06, + "loss": 17.5572, + "step": 18153 + }, + { + "epoch": 0.3318405322902005, + "grad_norm": 6.146439310580656, + "learning_rate": 7.79534552103065e-06, + "loss": 17.2985, + "step": 18154 + }, + { + "epoch": 0.33185881148664703, + "grad_norm": 6.068391168257, + "learning_rate": 7.7951000867311e-06, + "loss": 17.6694, + "step": 18155 + }, + { + "epoch": 0.33187709068309357, + "grad_norm": 7.338505430554289, + "learning_rate": 7.794854642634964e-06, + "loss": 17.6184, + "step": 18156 + }, + { + "epoch": 0.3318953698795401, + "grad_norm": 6.143827448923468, + "learning_rate": 7.794609188743108e-06, + "loss": 17.3543, + "step": 18157 + }, + { + "epoch": 0.33191364907598664, + "grad_norm": 7.009805038282566, + "learning_rate": 7.79436372505639e-06, + "loss": 17.7452, + "step": 18158 + }, + { + "epoch": 0.3319319282724331, + "grad_norm": 5.642670319090825, + "learning_rate": 7.794118251575666e-06, + "loss": 17.3011, + "step": 18159 + }, + { + "epoch": 0.33195020746887965, + "grad_norm": 7.102703592615145, + "learning_rate": 7.793872768301802e-06, + "loss": 17.8441, + "step": 18160 + }, + { + "epoch": 0.3319684866653262, + "grad_norm": 7.125349431069596, + "learning_rate": 7.793627275235658e-06, + "loss": 17.5985, + "step": 18161 + }, + { + "epoch": 0.3319867658617727, + "grad_norm": 5.280807767518307, + "learning_rate": 7.793381772378091e-06, + "loss": 16.9641, + "step": 18162 + }, + { + "epoch": 0.33200504505821926, + "grad_norm": 7.904987467500512, + "learning_rate": 7.793136259729963e-06, + "loss": 18.7803, + "step": 18163 + }, + { + "epoch": 0.33202332425466574, + "grad_norm": 5.918237343746448, + "learning_rate": 7.792890737292135e-06, + "loss": 17.1779, + "step": 18164 + }, + { + "epoch": 0.3320416034511123, + "grad_norm": 6.076838668528555, + "learning_rate": 7.792645205065469e-06, + "loss": 17.3053, + "step": 18165 + }, + { + "epoch": 0.3320598826475588, + "grad_norm": 5.958843514612012, + "learning_rate": 7.792399663050822e-06, + "loss": 17.4298, + "step": 18166 + }, + { + "epoch": 0.33207816184400535, + "grad_norm": 6.130864054531944, + "learning_rate": 7.792154111249057e-06, + "loss": 17.1949, + "step": 18167 + }, + { + "epoch": 0.3320964410404519, + "grad_norm": 7.954768819297425, + "learning_rate": 7.791908549661036e-06, + "loss": 18.3735, + "step": 18168 + }, + { + "epoch": 0.33211472023689836, + "grad_norm": 8.298811300794295, + "learning_rate": 7.791662978287616e-06, + "loss": 18.0898, + "step": 18169 + }, + { + "epoch": 0.3321329994333449, + "grad_norm": 6.303685131205644, + "learning_rate": 7.791417397129659e-06, + "loss": 17.1811, + "step": 18170 + }, + { + "epoch": 0.33215127862979144, + "grad_norm": 7.206124607693227, + "learning_rate": 7.791171806188027e-06, + "loss": 18.0821, + "step": 18171 + }, + { + "epoch": 0.33216955782623797, + "grad_norm": 8.314122606992065, + "learning_rate": 7.79092620546358e-06, + "loss": 17.6907, + "step": 18172 + }, + { + "epoch": 0.3321878370226845, + "grad_norm": 5.033311332280101, + "learning_rate": 7.790680594957179e-06, + "loss": 16.9447, + "step": 18173 + }, + { + "epoch": 0.332206116219131, + "grad_norm": 6.494315316107332, + "learning_rate": 7.790434974669685e-06, + "loss": 17.2337, + "step": 18174 + }, + { + "epoch": 0.3322243954155775, + "grad_norm": 6.53056235395755, + "learning_rate": 7.790189344601957e-06, + "loss": 17.7898, + "step": 18175 + }, + { + "epoch": 0.33224267461202406, + "grad_norm": 6.62133568231179, + "learning_rate": 7.789943704754859e-06, + "loss": 17.6237, + "step": 18176 + }, + { + "epoch": 0.3322609538084706, + "grad_norm": 7.443861748373182, + "learning_rate": 7.789698055129248e-06, + "loss": 17.912, + "step": 18177 + }, + { + "epoch": 0.33227923300491713, + "grad_norm": 6.973394307184902, + "learning_rate": 7.789452395725991e-06, + "loss": 17.7439, + "step": 18178 + }, + { + "epoch": 0.3322975122013636, + "grad_norm": 6.785497824905413, + "learning_rate": 7.789206726545944e-06, + "loss": 17.5712, + "step": 18179 + }, + { + "epoch": 0.33231579139781015, + "grad_norm": 5.727668255250328, + "learning_rate": 7.788961047589968e-06, + "loss": 17.3711, + "step": 18180 + }, + { + "epoch": 0.3323340705942567, + "grad_norm": 7.234093853885038, + "learning_rate": 7.788715358858927e-06, + "loss": 17.8033, + "step": 18181 + }, + { + "epoch": 0.3323523497907032, + "grad_norm": 7.431557595833951, + "learning_rate": 7.78846966035368e-06, + "loss": 18.0144, + "step": 18182 + }, + { + "epoch": 0.33237062898714975, + "grad_norm": 5.669439493474292, + "learning_rate": 7.78822395207509e-06, + "loss": 17.175, + "step": 18183 + }, + { + "epoch": 0.33238890818359623, + "grad_norm": 5.84385140341315, + "learning_rate": 7.787978234024014e-06, + "loss": 17.4927, + "step": 18184 + }, + { + "epoch": 0.33240718738004277, + "grad_norm": 6.780583032607836, + "learning_rate": 7.78773250620132e-06, + "loss": 17.8106, + "step": 18185 + }, + { + "epoch": 0.3324254665764893, + "grad_norm": 7.255141407721134, + "learning_rate": 7.787486768607864e-06, + "loss": 17.8612, + "step": 18186 + }, + { + "epoch": 0.33244374577293584, + "grad_norm": 6.983175297532281, + "learning_rate": 7.787241021244509e-06, + "loss": 17.8476, + "step": 18187 + }, + { + "epoch": 0.3324620249693823, + "grad_norm": 7.714506912999816, + "learning_rate": 7.786995264112113e-06, + "loss": 17.8431, + "step": 18188 + }, + { + "epoch": 0.33248030416582885, + "grad_norm": 6.635305280111997, + "learning_rate": 7.786749497211545e-06, + "loss": 17.844, + "step": 18189 + }, + { + "epoch": 0.3324985833622754, + "grad_norm": 6.495138931650477, + "learning_rate": 7.78650372054366e-06, + "loss": 17.572, + "step": 18190 + }, + { + "epoch": 0.3325168625587219, + "grad_norm": 6.49733321429841, + "learning_rate": 7.786257934109321e-06, + "loss": 17.6245, + "step": 18191 + }, + { + "epoch": 0.33253514175516846, + "grad_norm": 7.198187102241127, + "learning_rate": 7.78601213790939e-06, + "loss": 17.7093, + "step": 18192 + }, + { + "epoch": 0.33255342095161494, + "grad_norm": 6.14606092043527, + "learning_rate": 7.785766331944729e-06, + "loss": 17.6105, + "step": 18193 + }, + { + "epoch": 0.3325717001480615, + "grad_norm": 5.960404881106066, + "learning_rate": 7.785520516216196e-06, + "loss": 17.5927, + "step": 18194 + }, + { + "epoch": 0.332589979344508, + "grad_norm": 6.351337550024658, + "learning_rate": 7.785274690724657e-06, + "loss": 17.8266, + "step": 18195 + }, + { + "epoch": 0.33260825854095455, + "grad_norm": 5.717737567289761, + "learning_rate": 7.785028855470973e-06, + "loss": 17.3423, + "step": 18196 + }, + { + "epoch": 0.3326265377374011, + "grad_norm": 8.886182938977878, + "learning_rate": 7.784783010456002e-06, + "loss": 18.6777, + "step": 18197 + }, + { + "epoch": 0.33264481693384756, + "grad_norm": 6.677696799615567, + "learning_rate": 7.784537155680611e-06, + "loss": 17.6191, + "step": 18198 + }, + { + "epoch": 0.3326630961302941, + "grad_norm": 5.338467567289512, + "learning_rate": 7.784291291145657e-06, + "loss": 16.9954, + "step": 18199 + }, + { + "epoch": 0.33268137532674064, + "grad_norm": 5.759579456797714, + "learning_rate": 7.784045416852007e-06, + "loss": 17.2598, + "step": 18200 + }, + { + "epoch": 0.33269965452318717, + "grad_norm": 6.395268915261202, + "learning_rate": 7.783799532800516e-06, + "loss": 17.4145, + "step": 18201 + }, + { + "epoch": 0.3327179337196337, + "grad_norm": 6.0361347411754505, + "learning_rate": 7.78355363899205e-06, + "loss": 17.2654, + "step": 18202 + }, + { + "epoch": 0.3327362129160802, + "grad_norm": 6.208967974543009, + "learning_rate": 7.78330773542747e-06, + "loss": 17.4756, + "step": 18203 + }, + { + "epoch": 0.3327544921125267, + "grad_norm": 7.052348690253904, + "learning_rate": 7.783061822107637e-06, + "loss": 17.6805, + "step": 18204 + }, + { + "epoch": 0.33277277130897326, + "grad_norm": 8.153610445614932, + "learning_rate": 7.782815899033415e-06, + "loss": 18.003, + "step": 18205 + }, + { + "epoch": 0.3327910505054198, + "grad_norm": 6.652723108848424, + "learning_rate": 7.782569966205664e-06, + "loss": 17.6864, + "step": 18206 + }, + { + "epoch": 0.33280932970186633, + "grad_norm": 7.3738920706327615, + "learning_rate": 7.782324023625247e-06, + "loss": 17.7472, + "step": 18207 + }, + { + "epoch": 0.3328276088983128, + "grad_norm": 5.475350667601087, + "learning_rate": 7.782078071293026e-06, + "loss": 17.132, + "step": 18208 + }, + { + "epoch": 0.33284588809475935, + "grad_norm": 6.62703705335512, + "learning_rate": 7.781832109209864e-06, + "loss": 17.2494, + "step": 18209 + }, + { + "epoch": 0.3328641672912059, + "grad_norm": 5.713141076718879, + "learning_rate": 7.78158613737662e-06, + "loss": 17.2371, + "step": 18210 + }, + { + "epoch": 0.3328824464876524, + "grad_norm": 5.889611034039129, + "learning_rate": 7.781340155794159e-06, + "loss": 17.2982, + "step": 18211 + }, + { + "epoch": 0.33290072568409895, + "grad_norm": 6.832460942448049, + "learning_rate": 7.78109416446334e-06, + "loss": 17.4735, + "step": 18212 + }, + { + "epoch": 0.33291900488054543, + "grad_norm": 7.148140644955894, + "learning_rate": 7.78084816338503e-06, + "loss": 17.9011, + "step": 18213 + }, + { + "epoch": 0.33293728407699197, + "grad_norm": 6.981509351000349, + "learning_rate": 7.780602152560089e-06, + "loss": 17.4855, + "step": 18214 + }, + { + "epoch": 0.3329555632734385, + "grad_norm": 5.84717279478008, + "learning_rate": 7.780356131989375e-06, + "loss": 17.1238, + "step": 18215 + }, + { + "epoch": 0.33297384246988504, + "grad_norm": 6.5923059057619104, + "learning_rate": 7.780110101673758e-06, + "loss": 17.6541, + "step": 18216 + }, + { + "epoch": 0.3329921216663316, + "grad_norm": 7.385514389434467, + "learning_rate": 7.779864061614094e-06, + "loss": 17.5661, + "step": 18217 + }, + { + "epoch": 0.33301040086277806, + "grad_norm": 6.295521554971102, + "learning_rate": 7.779618011811248e-06, + "loss": 17.6539, + "step": 18218 + }, + { + "epoch": 0.3330286800592246, + "grad_norm": 6.767173193562697, + "learning_rate": 7.779371952266082e-06, + "loss": 17.3043, + "step": 18219 + }, + { + "epoch": 0.3330469592556711, + "grad_norm": 5.589786289988833, + "learning_rate": 7.779125882979458e-06, + "loss": 17.3223, + "step": 18220 + }, + { + "epoch": 0.33306523845211766, + "grad_norm": 7.416346672681761, + "learning_rate": 7.778879803952242e-06, + "loss": 17.8023, + "step": 18221 + }, + { + "epoch": 0.33308351764856414, + "grad_norm": 5.774653161170778, + "learning_rate": 7.77863371518529e-06, + "loss": 17.1647, + "step": 18222 + }, + { + "epoch": 0.3331017968450107, + "grad_norm": 7.831824156077151, + "learning_rate": 7.77838761667947e-06, + "loss": 17.9389, + "step": 18223 + }, + { + "epoch": 0.3331200760414572, + "grad_norm": 5.982720017870738, + "learning_rate": 7.778141508435641e-06, + "loss": 17.2224, + "step": 18224 + }, + { + "epoch": 0.33313835523790375, + "grad_norm": 7.700784768558035, + "learning_rate": 7.777895390454669e-06, + "loss": 17.8543, + "step": 18225 + }, + { + "epoch": 0.3331566344343503, + "grad_norm": 8.116481306875674, + "learning_rate": 7.777649262737412e-06, + "loss": 18.4105, + "step": 18226 + }, + { + "epoch": 0.33317491363079677, + "grad_norm": 6.267324299825966, + "learning_rate": 7.777403125284737e-06, + "loss": 17.1391, + "step": 18227 + }, + { + "epoch": 0.3331931928272433, + "grad_norm": 6.52838322101029, + "learning_rate": 7.777156978097505e-06, + "loss": 17.8091, + "step": 18228 + }, + { + "epoch": 0.33321147202368984, + "grad_norm": 6.199658199303566, + "learning_rate": 7.776910821176578e-06, + "loss": 17.317, + "step": 18229 + }, + { + "epoch": 0.33322975122013637, + "grad_norm": 7.757411540484732, + "learning_rate": 7.77666465452282e-06, + "loss": 18.0858, + "step": 18230 + }, + { + "epoch": 0.3332480304165829, + "grad_norm": 6.168299341975133, + "learning_rate": 7.776418478137095e-06, + "loss": 17.5393, + "step": 18231 + }, + { + "epoch": 0.3332663096130294, + "grad_norm": 6.299672138464206, + "learning_rate": 7.776172292020262e-06, + "loss": 17.2522, + "step": 18232 + }, + { + "epoch": 0.3332845888094759, + "grad_norm": 5.580060240216484, + "learning_rate": 7.775926096173187e-06, + "loss": 17.2081, + "step": 18233 + }, + { + "epoch": 0.33330286800592246, + "grad_norm": 6.557004094769657, + "learning_rate": 7.775679890596731e-06, + "loss": 17.5013, + "step": 18234 + }, + { + "epoch": 0.333321147202369, + "grad_norm": 8.150564436436557, + "learning_rate": 7.77543367529176e-06, + "loss": 17.8301, + "step": 18235 + }, + { + "epoch": 0.33333942639881553, + "grad_norm": 6.586964449226526, + "learning_rate": 7.775187450259132e-06, + "loss": 17.5818, + "step": 18236 + }, + { + "epoch": 0.333357705595262, + "grad_norm": 5.918227388140869, + "learning_rate": 7.774941215499715e-06, + "loss": 17.1627, + "step": 18237 + }, + { + "epoch": 0.33337598479170855, + "grad_norm": 5.814865474293571, + "learning_rate": 7.774694971014366e-06, + "loss": 17.4967, + "step": 18238 + }, + { + "epoch": 0.3333942639881551, + "grad_norm": 6.3666921474352005, + "learning_rate": 7.774448716803957e-06, + "loss": 17.9214, + "step": 18239 + }, + { + "epoch": 0.3334125431846016, + "grad_norm": 5.805887642846262, + "learning_rate": 7.774202452869343e-06, + "loss": 17.3668, + "step": 18240 + }, + { + "epoch": 0.33343082238104815, + "grad_norm": 6.024066042154094, + "learning_rate": 7.773956179211392e-06, + "loss": 17.3197, + "step": 18241 + }, + { + "epoch": 0.33344910157749463, + "grad_norm": 6.4865101154431555, + "learning_rate": 7.773709895830964e-06, + "loss": 17.2924, + "step": 18242 + }, + { + "epoch": 0.33346738077394117, + "grad_norm": 6.695215422941599, + "learning_rate": 7.773463602728923e-06, + "loss": 17.669, + "step": 18243 + }, + { + "epoch": 0.3334856599703877, + "grad_norm": 6.694233011803048, + "learning_rate": 7.773217299906134e-06, + "loss": 17.7665, + "step": 18244 + }, + { + "epoch": 0.33350393916683424, + "grad_norm": 5.820866749295414, + "learning_rate": 7.772970987363458e-06, + "loss": 17.1573, + "step": 18245 + }, + { + "epoch": 0.3335222183632808, + "grad_norm": 6.240684066895211, + "learning_rate": 7.772724665101761e-06, + "loss": 17.4504, + "step": 18246 + }, + { + "epoch": 0.33354049755972726, + "grad_norm": 6.125521233582662, + "learning_rate": 7.772478333121904e-06, + "loss": 17.2956, + "step": 18247 + }, + { + "epoch": 0.3335587767561738, + "grad_norm": 7.79943240506785, + "learning_rate": 7.77223199142475e-06, + "loss": 18.0713, + "step": 18248 + }, + { + "epoch": 0.3335770559526203, + "grad_norm": 8.366673337653188, + "learning_rate": 7.771985640011163e-06, + "loss": 18.4496, + "step": 18249 + }, + { + "epoch": 0.33359533514906686, + "grad_norm": 7.069364827523432, + "learning_rate": 7.771739278882009e-06, + "loss": 17.8833, + "step": 18250 + }, + { + "epoch": 0.3336136143455134, + "grad_norm": 5.865046767256241, + "learning_rate": 7.771492908038147e-06, + "loss": 17.2996, + "step": 18251 + }, + { + "epoch": 0.3336318935419599, + "grad_norm": 7.32448449067599, + "learning_rate": 7.771246527480446e-06, + "loss": 17.7859, + "step": 18252 + }, + { + "epoch": 0.3336501727384064, + "grad_norm": 6.180994553510096, + "learning_rate": 7.771000137209763e-06, + "loss": 17.5074, + "step": 18253 + }, + { + "epoch": 0.33366845193485295, + "grad_norm": 6.730369171626773, + "learning_rate": 7.770753737226965e-06, + "loss": 17.6742, + "step": 18254 + }, + { + "epoch": 0.3336867311312995, + "grad_norm": 7.494552997378219, + "learning_rate": 7.77050732753292e-06, + "loss": 17.7578, + "step": 18255 + }, + { + "epoch": 0.33370501032774597, + "grad_norm": 6.277815491673119, + "learning_rate": 7.770260908128481e-06, + "loss": 17.4783, + "step": 18256 + }, + { + "epoch": 0.3337232895241925, + "grad_norm": 6.2061717937115715, + "learning_rate": 7.770014479014523e-06, + "loss": 17.5916, + "step": 18257 + }, + { + "epoch": 0.33374156872063904, + "grad_norm": 7.7996121002910055, + "learning_rate": 7.769768040191904e-06, + "loss": 18.2667, + "step": 18258 + }, + { + "epoch": 0.3337598479170856, + "grad_norm": 8.257618892263123, + "learning_rate": 7.769521591661487e-06, + "loss": 18.329, + "step": 18259 + }, + { + "epoch": 0.3337781271135321, + "grad_norm": 7.268356090039945, + "learning_rate": 7.769275133424135e-06, + "loss": 18.227, + "step": 18260 + }, + { + "epoch": 0.3337964063099786, + "grad_norm": 6.236784550460681, + "learning_rate": 7.769028665480718e-06, + "loss": 17.495, + "step": 18261 + }, + { + "epoch": 0.3338146855064251, + "grad_norm": 5.826446793854041, + "learning_rate": 7.768782187832094e-06, + "loss": 17.0138, + "step": 18262 + }, + { + "epoch": 0.33383296470287166, + "grad_norm": 5.045773038751963, + "learning_rate": 7.768535700479128e-06, + "loss": 17.0794, + "step": 18263 + }, + { + "epoch": 0.3338512438993182, + "grad_norm": 6.339256647183979, + "learning_rate": 7.768289203422685e-06, + "loss": 17.3311, + "step": 18264 + }, + { + "epoch": 0.33386952309576473, + "grad_norm": 6.1668381429627415, + "learning_rate": 7.768042696663629e-06, + "loss": 17.2583, + "step": 18265 + }, + { + "epoch": 0.3338878022922112, + "grad_norm": 6.779129137618555, + "learning_rate": 7.767796180202823e-06, + "loss": 17.6344, + "step": 18266 + }, + { + "epoch": 0.33390608148865775, + "grad_norm": 7.8809435520336955, + "learning_rate": 7.767549654041132e-06, + "loss": 17.9554, + "step": 18267 + }, + { + "epoch": 0.3339243606851043, + "grad_norm": 7.234332435835093, + "learning_rate": 7.767303118179422e-06, + "loss": 17.8294, + "step": 18268 + }, + { + "epoch": 0.3339426398815508, + "grad_norm": 5.744768987872191, + "learning_rate": 7.76705657261855e-06, + "loss": 17.2128, + "step": 18269 + }, + { + "epoch": 0.33396091907799735, + "grad_norm": 7.181589881128322, + "learning_rate": 7.766810017359387e-06, + "loss": 17.7517, + "step": 18270 + }, + { + "epoch": 0.33397919827444383, + "grad_norm": 7.6324903882375645, + "learning_rate": 7.766563452402796e-06, + "loss": 18.0289, + "step": 18271 + }, + { + "epoch": 0.33399747747089037, + "grad_norm": 7.045548758291174, + "learning_rate": 7.766316877749641e-06, + "loss": 17.5929, + "step": 18272 + }, + { + "epoch": 0.3340157566673369, + "grad_norm": 5.48986111322997, + "learning_rate": 7.766070293400783e-06, + "loss": 17.0748, + "step": 18273 + }, + { + "epoch": 0.33403403586378344, + "grad_norm": 6.950841627839012, + "learning_rate": 7.765823699357089e-06, + "loss": 17.7548, + "step": 18274 + }, + { + "epoch": 0.33405231506023, + "grad_norm": 7.359007079523373, + "learning_rate": 7.765577095619423e-06, + "loss": 17.6903, + "step": 18275 + }, + { + "epoch": 0.33407059425667646, + "grad_norm": 6.8479912684776725, + "learning_rate": 7.765330482188649e-06, + "loss": 17.1306, + "step": 18276 + }, + { + "epoch": 0.334088873453123, + "grad_norm": 7.270761509348165, + "learning_rate": 7.765083859065631e-06, + "loss": 17.8807, + "step": 18277 + }, + { + "epoch": 0.3341071526495695, + "grad_norm": 6.29190187688376, + "learning_rate": 7.764837226251237e-06, + "loss": 17.6153, + "step": 18278 + }, + { + "epoch": 0.33412543184601606, + "grad_norm": 6.576013848161763, + "learning_rate": 7.764590583746328e-06, + "loss": 17.4124, + "step": 18279 + }, + { + "epoch": 0.3341437110424626, + "grad_norm": 6.887204970856277, + "learning_rate": 7.764343931551765e-06, + "loss": 17.745, + "step": 18280 + }, + { + "epoch": 0.3341619902389091, + "grad_norm": 7.233508854667866, + "learning_rate": 7.76409726966842e-06, + "loss": 18.0759, + "step": 18281 + }, + { + "epoch": 0.3341802694353556, + "grad_norm": 7.782221930944987, + "learning_rate": 7.763850598097154e-06, + "loss": 18.2327, + "step": 18282 + }, + { + "epoch": 0.33419854863180215, + "grad_norm": 5.774946535351223, + "learning_rate": 7.76360391683883e-06, + "loss": 17.1124, + "step": 18283 + }, + { + "epoch": 0.3342168278282487, + "grad_norm": 7.142259606946519, + "learning_rate": 7.763357225894314e-06, + "loss": 17.9635, + "step": 18284 + }, + { + "epoch": 0.3342351070246952, + "grad_norm": 6.552254980874395, + "learning_rate": 7.763110525264471e-06, + "loss": 17.6825, + "step": 18285 + }, + { + "epoch": 0.3342533862211417, + "grad_norm": 7.166106033103709, + "learning_rate": 7.762863814950165e-06, + "loss": 17.816, + "step": 18286 + }, + { + "epoch": 0.33427166541758824, + "grad_norm": 6.908900588446828, + "learning_rate": 7.762617094952262e-06, + "loss": 18.2036, + "step": 18287 + }, + { + "epoch": 0.3342899446140348, + "grad_norm": 7.4750743296947375, + "learning_rate": 7.762370365271625e-06, + "loss": 18.0611, + "step": 18288 + }, + { + "epoch": 0.3343082238104813, + "grad_norm": 6.420174060591953, + "learning_rate": 7.76212362590912e-06, + "loss": 17.6022, + "step": 18289 + }, + { + "epoch": 0.3343265030069278, + "grad_norm": 6.725124424115284, + "learning_rate": 7.761876876865612e-06, + "loss": 17.3032, + "step": 18290 + }, + { + "epoch": 0.3343447822033743, + "grad_norm": 6.210333346303373, + "learning_rate": 7.761630118141966e-06, + "loss": 17.2584, + "step": 18291 + }, + { + "epoch": 0.33436306139982086, + "grad_norm": 7.46718203655446, + "learning_rate": 7.761383349739045e-06, + "loss": 18.3899, + "step": 18292 + }, + { + "epoch": 0.3343813405962674, + "grad_norm": 5.809202179301146, + "learning_rate": 7.761136571657714e-06, + "loss": 17.3044, + "step": 18293 + }, + { + "epoch": 0.33439961979271393, + "grad_norm": 7.969665938088734, + "learning_rate": 7.760889783898839e-06, + "loss": 18.0716, + "step": 18294 + }, + { + "epoch": 0.3344178989891604, + "grad_norm": 7.580848556915434, + "learning_rate": 7.760642986463284e-06, + "loss": 18.0002, + "step": 18295 + }, + { + "epoch": 0.33443617818560695, + "grad_norm": 5.325450503699952, + "learning_rate": 7.760396179351919e-06, + "loss": 17.0275, + "step": 18296 + }, + { + "epoch": 0.3344544573820535, + "grad_norm": 7.614931020495053, + "learning_rate": 7.760149362565602e-06, + "loss": 18.0945, + "step": 18297 + }, + { + "epoch": 0.3344727365785, + "grad_norm": 6.624587523393241, + "learning_rate": 7.7599025361052e-06, + "loss": 17.1637, + "step": 18298 + }, + { + "epoch": 0.33449101577494655, + "grad_norm": 6.067403262655608, + "learning_rate": 7.759655699971581e-06, + "loss": 17.3723, + "step": 18299 + }, + { + "epoch": 0.33450929497139303, + "grad_norm": 7.451357832709255, + "learning_rate": 7.759408854165608e-06, + "loss": 17.8072, + "step": 18300 + }, + { + "epoch": 0.33452757416783957, + "grad_norm": 5.248124535174485, + "learning_rate": 7.759161998688145e-06, + "loss": 16.988, + "step": 18301 + }, + { + "epoch": 0.3345458533642861, + "grad_norm": 5.894392508469301, + "learning_rate": 7.758915133540059e-06, + "loss": 17.2724, + "step": 18302 + }, + { + "epoch": 0.33456413256073264, + "grad_norm": 8.490995476710541, + "learning_rate": 7.758668258722217e-06, + "loss": 17.5968, + "step": 18303 + }, + { + "epoch": 0.3345824117571792, + "grad_norm": 6.3621774839835705, + "learning_rate": 7.758421374235481e-06, + "loss": 17.3985, + "step": 18304 + }, + { + "epoch": 0.33460069095362566, + "grad_norm": 6.348004245125481, + "learning_rate": 7.758174480080717e-06, + "loss": 17.4257, + "step": 18305 + }, + { + "epoch": 0.3346189701500722, + "grad_norm": 6.258615994091, + "learning_rate": 7.75792757625879e-06, + "loss": 17.6457, + "step": 18306 + }, + { + "epoch": 0.33463724934651873, + "grad_norm": 6.986781705578218, + "learning_rate": 7.757680662770568e-06, + "loss": 17.6274, + "step": 18307 + }, + { + "epoch": 0.33465552854296526, + "grad_norm": 6.621352092591684, + "learning_rate": 7.757433739616913e-06, + "loss": 17.8456, + "step": 18308 + }, + { + "epoch": 0.3346738077394118, + "grad_norm": 5.42694561819176, + "learning_rate": 7.757186806798693e-06, + "loss": 17.1311, + "step": 18309 + }, + { + "epoch": 0.3346920869358583, + "grad_norm": 6.145164943286487, + "learning_rate": 7.756939864316773e-06, + "loss": 17.3195, + "step": 18310 + }, + { + "epoch": 0.3347103661323048, + "grad_norm": 6.99898461330753, + "learning_rate": 7.756692912172017e-06, + "loss": 17.8293, + "step": 18311 + }, + { + "epoch": 0.33472864532875135, + "grad_norm": 7.338612922482191, + "learning_rate": 7.756445950365292e-06, + "loss": 17.5407, + "step": 18312 + }, + { + "epoch": 0.3347469245251979, + "grad_norm": 6.485677018272688, + "learning_rate": 7.756198978897463e-06, + "loss": 17.7522, + "step": 18313 + }, + { + "epoch": 0.3347652037216444, + "grad_norm": 7.882693990131259, + "learning_rate": 7.755951997769395e-06, + "loss": 17.9094, + "step": 18314 + }, + { + "epoch": 0.3347834829180909, + "grad_norm": 6.346007494365597, + "learning_rate": 7.755705006981955e-06, + "loss": 17.3265, + "step": 18315 + }, + { + "epoch": 0.33480176211453744, + "grad_norm": 6.206414249888963, + "learning_rate": 7.75545800653601e-06, + "loss": 17.2567, + "step": 18316 + }, + { + "epoch": 0.334820041310984, + "grad_norm": 6.55249888096095, + "learning_rate": 7.755210996432421e-06, + "loss": 17.4971, + "step": 18317 + }, + { + "epoch": 0.3348383205074305, + "grad_norm": 6.964432634711099, + "learning_rate": 7.754963976672056e-06, + "loss": 17.7342, + "step": 18318 + }, + { + "epoch": 0.33485659970387704, + "grad_norm": 7.916931176316435, + "learning_rate": 7.754716947255784e-06, + "loss": 17.8617, + "step": 18319 + }, + { + "epoch": 0.3348748789003235, + "grad_norm": 6.461639288916398, + "learning_rate": 7.754469908184467e-06, + "loss": 17.347, + "step": 18320 + }, + { + "epoch": 0.33489315809677006, + "grad_norm": 5.057605319358128, + "learning_rate": 7.754222859458973e-06, + "loss": 16.8883, + "step": 18321 + }, + { + "epoch": 0.3349114372932166, + "grad_norm": 6.832876065541013, + "learning_rate": 7.753975801080165e-06, + "loss": 17.5447, + "step": 18322 + }, + { + "epoch": 0.33492971648966313, + "grad_norm": 6.864591411239356, + "learning_rate": 7.753728733048911e-06, + "loss": 17.6954, + "step": 18323 + }, + { + "epoch": 0.3349479956861096, + "grad_norm": 6.001523010916419, + "learning_rate": 7.753481655366077e-06, + "loss": 17.3145, + "step": 18324 + }, + { + "epoch": 0.33496627488255615, + "grad_norm": 5.667306709259325, + "learning_rate": 7.75323456803253e-06, + "loss": 17.0702, + "step": 18325 + }, + { + "epoch": 0.3349845540790027, + "grad_norm": 6.639306807494685, + "learning_rate": 7.752987471049133e-06, + "loss": 17.4848, + "step": 18326 + }, + { + "epoch": 0.3350028332754492, + "grad_norm": 6.2005682970654865, + "learning_rate": 7.752740364416756e-06, + "loss": 17.4231, + "step": 18327 + }, + { + "epoch": 0.33502111247189575, + "grad_norm": 6.852004423139621, + "learning_rate": 7.752493248136262e-06, + "loss": 17.8458, + "step": 18328 + }, + { + "epoch": 0.33503939166834223, + "grad_norm": 6.242489731981976, + "learning_rate": 7.752246122208515e-06, + "loss": 17.3899, + "step": 18329 + }, + { + "epoch": 0.33505767086478877, + "grad_norm": 6.761276698732926, + "learning_rate": 7.751998986634388e-06, + "loss": 17.3753, + "step": 18330 + }, + { + "epoch": 0.3350759500612353, + "grad_norm": 8.4697101881472, + "learning_rate": 7.751751841414742e-06, + "loss": 18.2433, + "step": 18331 + }, + { + "epoch": 0.33509422925768184, + "grad_norm": 5.98340590415751, + "learning_rate": 7.751504686550444e-06, + "loss": 17.3087, + "step": 18332 + }, + { + "epoch": 0.3351125084541284, + "grad_norm": 4.63306568016994, + "learning_rate": 7.75125752204236e-06, + "loss": 16.8643, + "step": 18333 + }, + { + "epoch": 0.33513078765057486, + "grad_norm": 6.874276635523743, + "learning_rate": 7.751010347891361e-06, + "loss": 17.6956, + "step": 18334 + }, + { + "epoch": 0.3351490668470214, + "grad_norm": 7.056705825769277, + "learning_rate": 7.750763164098308e-06, + "loss": 18.1054, + "step": 18335 + }, + { + "epoch": 0.33516734604346793, + "grad_norm": 6.022894070645962, + "learning_rate": 7.750515970664066e-06, + "loss": 17.2611, + "step": 18336 + }, + { + "epoch": 0.33518562523991446, + "grad_norm": 6.4601713256358595, + "learning_rate": 7.750268767589507e-06, + "loss": 17.4091, + "step": 18337 + }, + { + "epoch": 0.335203904436361, + "grad_norm": 7.631234410012835, + "learning_rate": 7.750021554875493e-06, + "loss": 18.0535, + "step": 18338 + }, + { + "epoch": 0.3352221836328075, + "grad_norm": 6.5660887452535865, + "learning_rate": 7.749774332522894e-06, + "loss": 17.5306, + "step": 18339 + }, + { + "epoch": 0.335240462829254, + "grad_norm": 6.38797809805494, + "learning_rate": 7.749527100532572e-06, + "loss": 17.5954, + "step": 18340 + }, + { + "epoch": 0.33525874202570055, + "grad_norm": 6.6464765680954505, + "learning_rate": 7.749279858905398e-06, + "loss": 17.9632, + "step": 18341 + }, + { + "epoch": 0.3352770212221471, + "grad_norm": 5.948301317242268, + "learning_rate": 7.749032607642237e-06, + "loss": 17.1922, + "step": 18342 + }, + { + "epoch": 0.3352953004185936, + "grad_norm": 5.775275108449763, + "learning_rate": 7.748785346743955e-06, + "loss": 17.3549, + "step": 18343 + }, + { + "epoch": 0.3353135796150401, + "grad_norm": 5.930169817187332, + "learning_rate": 7.748538076211418e-06, + "loss": 17.362, + "step": 18344 + }, + { + "epoch": 0.33533185881148664, + "grad_norm": 6.230980678229091, + "learning_rate": 7.748290796045493e-06, + "loss": 17.4745, + "step": 18345 + }, + { + "epoch": 0.3353501380079332, + "grad_norm": 6.814774819011009, + "learning_rate": 7.74804350624705e-06, + "loss": 17.7425, + "step": 18346 + }, + { + "epoch": 0.3353684172043797, + "grad_norm": 7.52974247559615, + "learning_rate": 7.74779620681695e-06, + "loss": 17.9699, + "step": 18347 + }, + { + "epoch": 0.33538669640082625, + "grad_norm": 6.217013645338138, + "learning_rate": 7.747548897756063e-06, + "loss": 17.175, + "step": 18348 + }, + { + "epoch": 0.3354049755972727, + "grad_norm": 6.00741862194668, + "learning_rate": 7.747301579065256e-06, + "loss": 17.1926, + "step": 18349 + }, + { + "epoch": 0.33542325479371926, + "grad_norm": 6.8412301633342105, + "learning_rate": 7.747054250745396e-06, + "loss": 17.6055, + "step": 18350 + }, + { + "epoch": 0.3354415339901658, + "grad_norm": 6.638726934444085, + "learning_rate": 7.746806912797349e-06, + "loss": 17.6016, + "step": 18351 + }, + { + "epoch": 0.33545981318661233, + "grad_norm": 7.583313276990292, + "learning_rate": 7.746559565221983e-06, + "loss": 18.0188, + "step": 18352 + }, + { + "epoch": 0.33547809238305887, + "grad_norm": 7.2380370876110645, + "learning_rate": 7.746312208020164e-06, + "loss": 18.0414, + "step": 18353 + }, + { + "epoch": 0.33549637157950535, + "grad_norm": 7.097301550884373, + "learning_rate": 7.746064841192757e-06, + "loss": 17.6619, + "step": 18354 + }, + { + "epoch": 0.3355146507759519, + "grad_norm": 6.277958463243101, + "learning_rate": 7.745817464740633e-06, + "loss": 17.5832, + "step": 18355 + }, + { + "epoch": 0.3355329299723984, + "grad_norm": 6.045685301953929, + "learning_rate": 7.745570078664655e-06, + "loss": 17.2695, + "step": 18356 + }, + { + "epoch": 0.33555120916884495, + "grad_norm": 7.749337621853986, + "learning_rate": 7.745322682965693e-06, + "loss": 17.9238, + "step": 18357 + }, + { + "epoch": 0.33556948836529144, + "grad_norm": 7.15648722064878, + "learning_rate": 7.745075277644615e-06, + "loss": 18.0479, + "step": 18358 + }, + { + "epoch": 0.33558776756173797, + "grad_norm": 6.481833282438767, + "learning_rate": 7.744827862702284e-06, + "loss": 17.5936, + "step": 18359 + }, + { + "epoch": 0.3356060467581845, + "grad_norm": 7.27669269267047, + "learning_rate": 7.744580438139571e-06, + "loss": 17.9004, + "step": 18360 + }, + { + "epoch": 0.33562432595463104, + "grad_norm": 6.198573375094083, + "learning_rate": 7.744333003957341e-06, + "loss": 17.412, + "step": 18361 + }, + { + "epoch": 0.3356426051510776, + "grad_norm": 6.3126170267845465, + "learning_rate": 7.744085560156462e-06, + "loss": 17.565, + "step": 18362 + }, + { + "epoch": 0.33566088434752406, + "grad_norm": 6.722386020348753, + "learning_rate": 7.743838106737802e-06, + "loss": 17.2823, + "step": 18363 + }, + { + "epoch": 0.3356791635439706, + "grad_norm": 8.532039182448148, + "learning_rate": 7.743590643702227e-06, + "loss": 18.4358, + "step": 18364 + }, + { + "epoch": 0.33569744274041713, + "grad_norm": 7.019269928101385, + "learning_rate": 7.743343171050604e-06, + "loss": 17.7279, + "step": 18365 + }, + { + "epoch": 0.33571572193686366, + "grad_norm": 6.2363754879825795, + "learning_rate": 7.743095688783803e-06, + "loss": 17.4868, + "step": 18366 + }, + { + "epoch": 0.3357340011333102, + "grad_norm": 6.547608551667183, + "learning_rate": 7.742848196902688e-06, + "loss": 17.6869, + "step": 18367 + }, + { + "epoch": 0.3357522803297567, + "grad_norm": 6.524650132021672, + "learning_rate": 7.742600695408128e-06, + "loss": 17.6394, + "step": 18368 + }, + { + "epoch": 0.3357705595262032, + "grad_norm": 7.580916121783052, + "learning_rate": 7.742353184300992e-06, + "loss": 17.504, + "step": 18369 + }, + { + "epoch": 0.33578883872264975, + "grad_norm": 7.134162781073809, + "learning_rate": 7.742105663582145e-06, + "loss": 17.9606, + "step": 18370 + }, + { + "epoch": 0.3358071179190963, + "grad_norm": 6.742971936651327, + "learning_rate": 7.741858133252456e-06, + "loss": 17.4696, + "step": 18371 + }, + { + "epoch": 0.3358253971155428, + "grad_norm": 5.756909069664118, + "learning_rate": 7.741610593312794e-06, + "loss": 17.2435, + "step": 18372 + }, + { + "epoch": 0.3358436763119893, + "grad_norm": 6.655582882159083, + "learning_rate": 7.741363043764023e-06, + "loss": 17.3407, + "step": 18373 + }, + { + "epoch": 0.33586195550843584, + "grad_norm": 5.649762551970552, + "learning_rate": 7.741115484607011e-06, + "loss": 17.1598, + "step": 18374 + }, + { + "epoch": 0.3358802347048824, + "grad_norm": 5.345426745039483, + "learning_rate": 7.74086791584263e-06, + "loss": 17.0716, + "step": 18375 + }, + { + "epoch": 0.3358985139013289, + "grad_norm": 6.107630936871481, + "learning_rate": 7.740620337471743e-06, + "loss": 17.4788, + "step": 18376 + }, + { + "epoch": 0.33591679309777545, + "grad_norm": 6.792138438895729, + "learning_rate": 7.74037274949522e-06, + "loss": 17.4798, + "step": 18377 + }, + { + "epoch": 0.3359350722942219, + "grad_norm": 5.853056285933311, + "learning_rate": 7.740125151913927e-06, + "loss": 17.2303, + "step": 18378 + }, + { + "epoch": 0.33595335149066846, + "grad_norm": 6.464523229272147, + "learning_rate": 7.739877544728734e-06, + "loss": 17.5476, + "step": 18379 + }, + { + "epoch": 0.335971630687115, + "grad_norm": 8.422027448603725, + "learning_rate": 7.739629927940508e-06, + "loss": 18.2603, + "step": 18380 + }, + { + "epoch": 0.33598990988356153, + "grad_norm": 6.288018775709371, + "learning_rate": 7.739382301550117e-06, + "loss": 17.6214, + "step": 18381 + }, + { + "epoch": 0.33600818908000807, + "grad_norm": 6.475994648492222, + "learning_rate": 7.73913466555843e-06, + "loss": 17.4556, + "step": 18382 + }, + { + "epoch": 0.33602646827645455, + "grad_norm": 5.701573680999045, + "learning_rate": 7.738887019966312e-06, + "loss": 17.106, + "step": 18383 + }, + { + "epoch": 0.3360447474729011, + "grad_norm": 7.0780042943270125, + "learning_rate": 7.738639364774633e-06, + "loss": 17.6859, + "step": 18384 + }, + { + "epoch": 0.3360630266693476, + "grad_norm": 7.001881184847415, + "learning_rate": 7.73839169998426e-06, + "loss": 18.0078, + "step": 18385 + }, + { + "epoch": 0.33608130586579416, + "grad_norm": 7.605615146116568, + "learning_rate": 7.738144025596063e-06, + "loss": 18.2703, + "step": 18386 + }, + { + "epoch": 0.3360995850622407, + "grad_norm": 6.939675288723311, + "learning_rate": 7.737896341610908e-06, + "loss": 17.482, + "step": 18387 + }, + { + "epoch": 0.33611786425868717, + "grad_norm": 6.544616756770822, + "learning_rate": 7.737648648029664e-06, + "loss": 17.7247, + "step": 18388 + }, + { + "epoch": 0.3361361434551337, + "grad_norm": 5.560292907012687, + "learning_rate": 7.737400944853201e-06, + "loss": 17.3386, + "step": 18389 + }, + { + "epoch": 0.33615442265158024, + "grad_norm": 6.072849495003731, + "learning_rate": 7.737153232082383e-06, + "loss": 17.3119, + "step": 18390 + }, + { + "epoch": 0.3361727018480268, + "grad_norm": 7.224700545550032, + "learning_rate": 7.73690550971808e-06, + "loss": 17.9829, + "step": 18391 + }, + { + "epoch": 0.33619098104447326, + "grad_norm": 7.312227418371407, + "learning_rate": 7.736657777761164e-06, + "loss": 17.7287, + "step": 18392 + }, + { + "epoch": 0.3362092602409198, + "grad_norm": 8.927086043083467, + "learning_rate": 7.736410036212497e-06, + "loss": 18.3927, + "step": 18393 + }, + { + "epoch": 0.33622753943736633, + "grad_norm": 6.14345139295062, + "learning_rate": 7.73616228507295e-06, + "loss": 17.3107, + "step": 18394 + }, + { + "epoch": 0.33624581863381287, + "grad_norm": 8.08745349602045, + "learning_rate": 7.735914524343393e-06, + "loss": 18.4474, + "step": 18395 + }, + { + "epoch": 0.3362640978302594, + "grad_norm": 7.518355698459249, + "learning_rate": 7.735666754024692e-06, + "loss": 18.0125, + "step": 18396 + }, + { + "epoch": 0.3362823770267059, + "grad_norm": 7.670188487304936, + "learning_rate": 7.735418974117716e-06, + "loss": 18.2755, + "step": 18397 + }, + { + "epoch": 0.3363006562231524, + "grad_norm": 7.363685793543698, + "learning_rate": 7.735171184623336e-06, + "loss": 17.5832, + "step": 18398 + }, + { + "epoch": 0.33631893541959895, + "grad_norm": 7.797323413026644, + "learning_rate": 7.734923385542417e-06, + "loss": 18.2299, + "step": 18399 + }, + { + "epoch": 0.3363372146160455, + "grad_norm": 6.175064421290564, + "learning_rate": 7.734675576875828e-06, + "loss": 17.5104, + "step": 18400 + }, + { + "epoch": 0.336355493812492, + "grad_norm": 6.925918699168115, + "learning_rate": 7.73442775862444e-06, + "loss": 17.8916, + "step": 18401 + }, + { + "epoch": 0.3363737730089385, + "grad_norm": 6.313385351587444, + "learning_rate": 7.734179930789119e-06, + "loss": 17.4044, + "step": 18402 + }, + { + "epoch": 0.33639205220538504, + "grad_norm": 6.600660588623588, + "learning_rate": 7.733932093370735e-06, + "loss": 17.4565, + "step": 18403 + }, + { + "epoch": 0.3364103314018316, + "grad_norm": 6.081680885442603, + "learning_rate": 7.733684246370156e-06, + "loss": 17.4435, + "step": 18404 + }, + { + "epoch": 0.3364286105982781, + "grad_norm": 6.066761271916142, + "learning_rate": 7.733436389788252e-06, + "loss": 17.3945, + "step": 18405 + }, + { + "epoch": 0.33644688979472465, + "grad_norm": 5.79555519497916, + "learning_rate": 7.73318852362589e-06, + "loss": 17.3575, + "step": 18406 + }, + { + "epoch": 0.3364651689911711, + "grad_norm": 7.031841822332528, + "learning_rate": 7.732940647883939e-06, + "loss": 17.3679, + "step": 18407 + }, + { + "epoch": 0.33648344818761766, + "grad_norm": 5.652500007778597, + "learning_rate": 7.732692762563267e-06, + "loss": 17.395, + "step": 18408 + }, + { + "epoch": 0.3365017273840642, + "grad_norm": 7.356023526358955, + "learning_rate": 7.732444867664746e-06, + "loss": 18.0134, + "step": 18409 + }, + { + "epoch": 0.33652000658051073, + "grad_norm": 6.5753188549896775, + "learning_rate": 7.732196963189243e-06, + "loss": 17.7415, + "step": 18410 + }, + { + "epoch": 0.33653828577695727, + "grad_norm": 6.340180504691334, + "learning_rate": 7.731949049137627e-06, + "loss": 17.2667, + "step": 18411 + }, + { + "epoch": 0.33655656497340375, + "grad_norm": 6.867459569486875, + "learning_rate": 7.731701125510764e-06, + "loss": 17.7586, + "step": 18412 + }, + { + "epoch": 0.3365748441698503, + "grad_norm": 9.345006651836984, + "learning_rate": 7.731453192309529e-06, + "loss": 18.6728, + "step": 18413 + }, + { + "epoch": 0.3365931233662968, + "grad_norm": 6.909630227126561, + "learning_rate": 7.731205249534785e-06, + "loss": 17.6986, + "step": 18414 + }, + { + "epoch": 0.33661140256274336, + "grad_norm": 7.7838930350249935, + "learning_rate": 7.730957297187403e-06, + "loss": 17.6255, + "step": 18415 + }, + { + "epoch": 0.3366296817591899, + "grad_norm": 6.265011349717121, + "learning_rate": 7.730709335268256e-06, + "loss": 17.4874, + "step": 18416 + }, + { + "epoch": 0.33664796095563637, + "grad_norm": 8.02861059803375, + "learning_rate": 7.730461363778206e-06, + "loss": 18.3071, + "step": 18417 + }, + { + "epoch": 0.3366662401520829, + "grad_norm": 5.596711905591294, + "learning_rate": 7.730213382718129e-06, + "loss": 17.1153, + "step": 18418 + }, + { + "epoch": 0.33668451934852944, + "grad_norm": 7.972988658366918, + "learning_rate": 7.72996539208889e-06, + "loss": 18.1299, + "step": 18419 + }, + { + "epoch": 0.336702798544976, + "grad_norm": 8.120943580975988, + "learning_rate": 7.729717391891358e-06, + "loss": 18.3888, + "step": 18420 + }, + { + "epoch": 0.3367210777414225, + "grad_norm": 6.731044068945436, + "learning_rate": 7.729469382126405e-06, + "loss": 17.5747, + "step": 18421 + }, + { + "epoch": 0.336739356937869, + "grad_norm": 7.289569065858472, + "learning_rate": 7.729221362794897e-06, + "loss": 17.9513, + "step": 18422 + }, + { + "epoch": 0.33675763613431553, + "grad_norm": 5.743911337736639, + "learning_rate": 7.728973333897707e-06, + "loss": 17.2202, + "step": 18423 + }, + { + "epoch": 0.33677591533076207, + "grad_norm": 6.662068868190871, + "learning_rate": 7.728725295435701e-06, + "loss": 17.6026, + "step": 18424 + }, + { + "epoch": 0.3367941945272086, + "grad_norm": 9.093729319320211, + "learning_rate": 7.72847724740975e-06, + "loss": 18.9263, + "step": 18425 + }, + { + "epoch": 0.3368124737236551, + "grad_norm": 6.455217427397297, + "learning_rate": 7.728229189820721e-06, + "loss": 17.7361, + "step": 18426 + }, + { + "epoch": 0.3368307529201016, + "grad_norm": 7.146877314145098, + "learning_rate": 7.72798112266949e-06, + "loss": 17.7652, + "step": 18427 + }, + { + "epoch": 0.33684903211654815, + "grad_norm": 7.404027428581642, + "learning_rate": 7.727733045956919e-06, + "loss": 17.6531, + "step": 18428 + }, + { + "epoch": 0.3368673113129947, + "grad_norm": 6.469316020147925, + "learning_rate": 7.72748495968388e-06, + "loss": 17.7893, + "step": 18429 + }, + { + "epoch": 0.3368855905094412, + "grad_norm": 7.740519309697861, + "learning_rate": 7.727236863851243e-06, + "loss": 17.9784, + "step": 18430 + }, + { + "epoch": 0.3369038697058877, + "grad_norm": 7.096828766894045, + "learning_rate": 7.726988758459877e-06, + "loss": 17.7158, + "step": 18431 + }, + { + "epoch": 0.33692214890233424, + "grad_norm": 6.821265096886873, + "learning_rate": 7.726740643510654e-06, + "loss": 17.6382, + "step": 18432 + }, + { + "epoch": 0.3369404280987808, + "grad_norm": 7.026946958908574, + "learning_rate": 7.72649251900444e-06, + "loss": 18.1321, + "step": 18433 + }, + { + "epoch": 0.3369587072952273, + "grad_norm": 6.347502405851839, + "learning_rate": 7.726244384942108e-06, + "loss": 17.4784, + "step": 18434 + }, + { + "epoch": 0.33697698649167385, + "grad_norm": 5.277570150694714, + "learning_rate": 7.725996241324524e-06, + "loss": 17.0442, + "step": 18435 + }, + { + "epoch": 0.3369952656881203, + "grad_norm": 5.521964833824245, + "learning_rate": 7.725748088152561e-06, + "loss": 17.1314, + "step": 18436 + }, + { + "epoch": 0.33701354488456686, + "grad_norm": 7.197662323340944, + "learning_rate": 7.725499925427086e-06, + "loss": 17.6402, + "step": 18437 + }, + { + "epoch": 0.3370318240810134, + "grad_norm": 6.006474924625444, + "learning_rate": 7.725251753148972e-06, + "loss": 17.4828, + "step": 18438 + }, + { + "epoch": 0.33705010327745993, + "grad_norm": 5.059700720043585, + "learning_rate": 7.725003571319086e-06, + "loss": 16.9115, + "step": 18439 + }, + { + "epoch": 0.33706838247390647, + "grad_norm": 6.864063054870559, + "learning_rate": 7.7247553799383e-06, + "loss": 17.577, + "step": 18440 + }, + { + "epoch": 0.33708666167035295, + "grad_norm": 6.52387554693364, + "learning_rate": 7.724507179007484e-06, + "loss": 17.5513, + "step": 18441 + }, + { + "epoch": 0.3371049408667995, + "grad_norm": 5.43570866174775, + "learning_rate": 7.724258968527503e-06, + "loss": 17.2563, + "step": 18442 + }, + { + "epoch": 0.337123220063246, + "grad_norm": 7.3973909691611395, + "learning_rate": 7.724010748499232e-06, + "loss": 17.9174, + "step": 18443 + }, + { + "epoch": 0.33714149925969256, + "grad_norm": 6.778630546710854, + "learning_rate": 7.72376251892354e-06, + "loss": 17.6066, + "step": 18444 + }, + { + "epoch": 0.3371597784561391, + "grad_norm": 5.961436839492804, + "learning_rate": 7.723514279801298e-06, + "loss": 17.3852, + "step": 18445 + }, + { + "epoch": 0.33717805765258557, + "grad_norm": 7.062129788464218, + "learning_rate": 7.723266031133373e-06, + "loss": 17.9826, + "step": 18446 + }, + { + "epoch": 0.3371963368490321, + "grad_norm": 7.432303147374123, + "learning_rate": 7.72301777292064e-06, + "loss": 17.7383, + "step": 18447 + }, + { + "epoch": 0.33721461604547864, + "grad_norm": 7.742647669234327, + "learning_rate": 7.722769505163963e-06, + "loss": 18.1091, + "step": 18448 + }, + { + "epoch": 0.3372328952419252, + "grad_norm": 7.893233484369064, + "learning_rate": 7.722521227864216e-06, + "loss": 18.2455, + "step": 18449 + }, + { + "epoch": 0.3372511744383717, + "grad_norm": 7.142750209709399, + "learning_rate": 7.722272941022268e-06, + "loss": 17.8689, + "step": 18450 + }, + { + "epoch": 0.3372694536348182, + "grad_norm": 7.571102108735381, + "learning_rate": 7.72202464463899e-06, + "loss": 18.0523, + "step": 18451 + }, + { + "epoch": 0.33728773283126473, + "grad_norm": 7.237628522781843, + "learning_rate": 7.721776338715252e-06, + "loss": 17.7742, + "step": 18452 + }, + { + "epoch": 0.33730601202771127, + "grad_norm": 8.014417650877423, + "learning_rate": 7.721528023251924e-06, + "loss": 18.1643, + "step": 18453 + }, + { + "epoch": 0.3373242912241578, + "grad_norm": 5.603909557078515, + "learning_rate": 7.721279698249878e-06, + "loss": 17.1089, + "step": 18454 + }, + { + "epoch": 0.33734257042060434, + "grad_norm": 8.166787237695967, + "learning_rate": 7.72103136370998e-06, + "loss": 18.3125, + "step": 18455 + }, + { + "epoch": 0.3373608496170508, + "grad_norm": 7.54873919290871, + "learning_rate": 7.720783019633103e-06, + "loss": 17.8197, + "step": 18456 + }, + { + "epoch": 0.33737912881349735, + "grad_norm": 5.9202994619846, + "learning_rate": 7.720534666020119e-06, + "loss": 17.3548, + "step": 18457 + }, + { + "epoch": 0.3373974080099439, + "grad_norm": 6.260292847986377, + "learning_rate": 7.720286302871899e-06, + "loss": 17.5368, + "step": 18458 + }, + { + "epoch": 0.3374156872063904, + "grad_norm": 5.300475724027984, + "learning_rate": 7.720037930189308e-06, + "loss": 17.0986, + "step": 18459 + }, + { + "epoch": 0.3374339664028369, + "grad_norm": 8.137722655610194, + "learning_rate": 7.719789547973222e-06, + "loss": 18.1161, + "step": 18460 + }, + { + "epoch": 0.33745224559928344, + "grad_norm": 6.534889342544086, + "learning_rate": 7.719541156224509e-06, + "loss": 17.3363, + "step": 18461 + }, + { + "epoch": 0.33747052479573, + "grad_norm": 6.652638240476339, + "learning_rate": 7.71929275494404e-06, + "loss": 17.3188, + "step": 18462 + }, + { + "epoch": 0.3374888039921765, + "grad_norm": 6.130865054693788, + "learning_rate": 7.719044344132687e-06, + "loss": 17.2704, + "step": 18463 + }, + { + "epoch": 0.33750708318862305, + "grad_norm": 6.9481372715060985, + "learning_rate": 7.718795923791318e-06, + "loss": 17.679, + "step": 18464 + }, + { + "epoch": 0.3375253623850695, + "grad_norm": 6.577487798955367, + "learning_rate": 7.718547493920808e-06, + "loss": 17.4429, + "step": 18465 + }, + { + "epoch": 0.33754364158151606, + "grad_norm": 6.712775286732302, + "learning_rate": 7.718299054522023e-06, + "loss": 17.616, + "step": 18466 + }, + { + "epoch": 0.3375619207779626, + "grad_norm": 6.309615343063586, + "learning_rate": 7.718050605595834e-06, + "loss": 17.5656, + "step": 18467 + }, + { + "epoch": 0.33758019997440913, + "grad_norm": 7.976188969781821, + "learning_rate": 7.717802147143116e-06, + "loss": 17.9397, + "step": 18468 + }, + { + "epoch": 0.33759847917085567, + "grad_norm": 6.056843727085684, + "learning_rate": 7.717553679164736e-06, + "loss": 17.6374, + "step": 18469 + }, + { + "epoch": 0.33761675836730215, + "grad_norm": 5.79688689723586, + "learning_rate": 7.717305201661567e-06, + "loss": 17.2079, + "step": 18470 + }, + { + "epoch": 0.3376350375637487, + "grad_norm": 8.007233412783789, + "learning_rate": 7.717056714634478e-06, + "loss": 18.2403, + "step": 18471 + }, + { + "epoch": 0.3376533167601952, + "grad_norm": 8.340269402764084, + "learning_rate": 7.716808218084344e-06, + "loss": 18.3294, + "step": 18472 + }, + { + "epoch": 0.33767159595664176, + "grad_norm": 5.789205177314982, + "learning_rate": 7.716559712012029e-06, + "loss": 17.2842, + "step": 18473 + }, + { + "epoch": 0.3376898751530883, + "grad_norm": 6.961346009129132, + "learning_rate": 7.716311196418409e-06, + "loss": 18.0157, + "step": 18474 + }, + { + "epoch": 0.3377081543495348, + "grad_norm": 6.816075453202713, + "learning_rate": 7.716062671304356e-06, + "loss": 17.5105, + "step": 18475 + }, + { + "epoch": 0.3377264335459813, + "grad_norm": 7.1519747347555525, + "learning_rate": 7.715814136670738e-06, + "loss": 17.6979, + "step": 18476 + }, + { + "epoch": 0.33774471274242784, + "grad_norm": 7.397342958436656, + "learning_rate": 7.715565592518426e-06, + "loss": 18.2724, + "step": 18477 + }, + { + "epoch": 0.3377629919388744, + "grad_norm": 7.2484185914721415, + "learning_rate": 7.715317038848294e-06, + "loss": 18.0584, + "step": 18478 + }, + { + "epoch": 0.3377812711353209, + "grad_norm": 6.753753332473047, + "learning_rate": 7.71506847566121e-06, + "loss": 17.6873, + "step": 18479 + }, + { + "epoch": 0.3377995503317674, + "grad_norm": 5.910713944546503, + "learning_rate": 7.714819902958047e-06, + "loss": 17.2845, + "step": 18480 + }, + { + "epoch": 0.33781782952821393, + "grad_norm": 6.040055410360147, + "learning_rate": 7.714571320739674e-06, + "loss": 17.6741, + "step": 18481 + }, + { + "epoch": 0.33783610872466047, + "grad_norm": 6.16224240871509, + "learning_rate": 7.714322729006968e-06, + "loss": 17.4705, + "step": 18482 + }, + { + "epoch": 0.337854387921107, + "grad_norm": 6.786275507486063, + "learning_rate": 7.714074127760793e-06, + "loss": 17.6599, + "step": 18483 + }, + { + "epoch": 0.33787266711755354, + "grad_norm": 5.052748271895253, + "learning_rate": 7.713825517002025e-06, + "loss": 16.891, + "step": 18484 + }, + { + "epoch": 0.337890946314, + "grad_norm": 6.548327809552189, + "learning_rate": 7.713576896731534e-06, + "loss": 17.8533, + "step": 18485 + }, + { + "epoch": 0.33790922551044655, + "grad_norm": 6.809651591047302, + "learning_rate": 7.713328266950192e-06, + "loss": 17.5008, + "step": 18486 + }, + { + "epoch": 0.3379275047068931, + "grad_norm": 6.811358222407411, + "learning_rate": 7.71307962765887e-06, + "loss": 17.4791, + "step": 18487 + }, + { + "epoch": 0.3379457839033396, + "grad_norm": 6.732612926720186, + "learning_rate": 7.712830978858437e-06, + "loss": 17.6251, + "step": 18488 + }, + { + "epoch": 0.33796406309978616, + "grad_norm": 6.0661694502438674, + "learning_rate": 7.71258232054977e-06, + "loss": 17.433, + "step": 18489 + }, + { + "epoch": 0.33798234229623264, + "grad_norm": 7.741720293416946, + "learning_rate": 7.712333652733736e-06, + "loss": 17.7191, + "step": 18490 + }, + { + "epoch": 0.3380006214926792, + "grad_norm": 5.94148528406997, + "learning_rate": 7.712084975411207e-06, + "loss": 17.4314, + "step": 18491 + }, + { + "epoch": 0.3380189006891257, + "grad_norm": 5.503236881276483, + "learning_rate": 7.711836288583056e-06, + "loss": 17.3177, + "step": 18492 + }, + { + "epoch": 0.33803717988557225, + "grad_norm": 6.227555450615774, + "learning_rate": 7.711587592250157e-06, + "loss": 17.5322, + "step": 18493 + }, + { + "epoch": 0.3380554590820187, + "grad_norm": 6.5307639282098044, + "learning_rate": 7.711338886413375e-06, + "loss": 17.6392, + "step": 18494 + }, + { + "epoch": 0.33807373827846526, + "grad_norm": 8.25031259660908, + "learning_rate": 7.711090171073585e-06, + "loss": 18.2677, + "step": 18495 + }, + { + "epoch": 0.3380920174749118, + "grad_norm": 7.919697683464512, + "learning_rate": 7.710841446231662e-06, + "loss": 17.9129, + "step": 18496 + }, + { + "epoch": 0.33811029667135833, + "grad_norm": 6.672298055295145, + "learning_rate": 7.710592711888473e-06, + "loss": 17.546, + "step": 18497 + }, + { + "epoch": 0.33812857586780487, + "grad_norm": 5.8147908282243925, + "learning_rate": 7.710343968044893e-06, + "loss": 17.1957, + "step": 18498 + }, + { + "epoch": 0.33814685506425135, + "grad_norm": 7.871496156274192, + "learning_rate": 7.710095214701792e-06, + "loss": 17.8094, + "step": 18499 + }, + { + "epoch": 0.3381651342606979, + "grad_norm": 8.4109188978225, + "learning_rate": 7.709846451860044e-06, + "loss": 18.0162, + "step": 18500 + }, + { + "epoch": 0.3381834134571444, + "grad_norm": 5.2937680106235705, + "learning_rate": 7.709597679520517e-06, + "loss": 17.0771, + "step": 18501 + }, + { + "epoch": 0.33820169265359096, + "grad_norm": 6.379321046963044, + "learning_rate": 7.709348897684087e-06, + "loss": 17.7056, + "step": 18502 + }, + { + "epoch": 0.3382199718500375, + "grad_norm": 7.265112552027738, + "learning_rate": 7.709100106351624e-06, + "loss": 17.5608, + "step": 18503 + }, + { + "epoch": 0.338238251046484, + "grad_norm": 6.226972182198487, + "learning_rate": 7.708851305523999e-06, + "loss": 17.03, + "step": 18504 + }, + { + "epoch": 0.3382565302429305, + "grad_norm": 6.515094573049991, + "learning_rate": 7.708602495202085e-06, + "loss": 17.4419, + "step": 18505 + }, + { + "epoch": 0.33827480943937704, + "grad_norm": 5.828203314056155, + "learning_rate": 7.708353675386756e-06, + "loss": 17.4523, + "step": 18506 + }, + { + "epoch": 0.3382930886358236, + "grad_norm": 7.433264374469968, + "learning_rate": 7.70810484607888e-06, + "loss": 17.8551, + "step": 18507 + }, + { + "epoch": 0.3383113678322701, + "grad_norm": 6.452021201753046, + "learning_rate": 7.707856007279336e-06, + "loss": 17.5515, + "step": 18508 + }, + { + "epoch": 0.3383296470287166, + "grad_norm": 6.927308323353563, + "learning_rate": 7.707607158988989e-06, + "loss": 17.6572, + "step": 18509 + }, + { + "epoch": 0.33834792622516313, + "grad_norm": 7.59364801967737, + "learning_rate": 7.707358301208711e-06, + "loss": 18.0751, + "step": 18510 + }, + { + "epoch": 0.33836620542160967, + "grad_norm": 8.481590889263641, + "learning_rate": 7.70710943393938e-06, + "loss": 18.4459, + "step": 18511 + }, + { + "epoch": 0.3383844846180562, + "grad_norm": 6.218440313046197, + "learning_rate": 7.706860557181865e-06, + "loss": 17.4924, + "step": 18512 + }, + { + "epoch": 0.33840276381450274, + "grad_norm": 7.873215963953774, + "learning_rate": 7.70661167093704e-06, + "loss": 17.8562, + "step": 18513 + }, + { + "epoch": 0.3384210430109492, + "grad_norm": 5.718721767784067, + "learning_rate": 7.706362775205775e-06, + "loss": 17.4298, + "step": 18514 + }, + { + "epoch": 0.33843932220739575, + "grad_norm": 6.855804305720729, + "learning_rate": 7.706113869988942e-06, + "loss": 17.7369, + "step": 18515 + }, + { + "epoch": 0.3384576014038423, + "grad_norm": 6.285208999963029, + "learning_rate": 7.705864955287417e-06, + "loss": 17.4317, + "step": 18516 + }, + { + "epoch": 0.3384758806002888, + "grad_norm": 6.876274856684372, + "learning_rate": 7.705616031102067e-06, + "loss": 17.7482, + "step": 18517 + }, + { + "epoch": 0.33849415979673536, + "grad_norm": 6.551674808949516, + "learning_rate": 7.70536709743377e-06, + "loss": 17.5721, + "step": 18518 + }, + { + "epoch": 0.33851243899318184, + "grad_norm": 7.1792626870622795, + "learning_rate": 7.705118154283395e-06, + "loss": 17.81, + "step": 18519 + }, + { + "epoch": 0.3385307181896284, + "grad_norm": 6.532119534364582, + "learning_rate": 7.704869201651817e-06, + "loss": 17.4447, + "step": 18520 + }, + { + "epoch": 0.3385489973860749, + "grad_norm": 7.836061959256069, + "learning_rate": 7.704620239539907e-06, + "loss": 18.0364, + "step": 18521 + }, + { + "epoch": 0.33856727658252145, + "grad_norm": 7.278440436296791, + "learning_rate": 7.704371267948537e-06, + "loss": 17.9914, + "step": 18522 + }, + { + "epoch": 0.338585555778968, + "grad_norm": 9.443688365625592, + "learning_rate": 7.704122286878582e-06, + "loss": 19.0627, + "step": 18523 + }, + { + "epoch": 0.33860383497541446, + "grad_norm": 6.7285697217231775, + "learning_rate": 7.703873296330911e-06, + "loss": 17.7257, + "step": 18524 + }, + { + "epoch": 0.338622114171861, + "grad_norm": 6.832079061969127, + "learning_rate": 7.703624296306398e-06, + "loss": 17.8432, + "step": 18525 + }, + { + "epoch": 0.33864039336830754, + "grad_norm": 7.372634881950305, + "learning_rate": 7.703375286805918e-06, + "loss": 18.1717, + "step": 18526 + }, + { + "epoch": 0.33865867256475407, + "grad_norm": 6.671263012834701, + "learning_rate": 7.703126267830341e-06, + "loss": 17.832, + "step": 18527 + }, + { + "epoch": 0.33867695176120055, + "grad_norm": 8.064692606205229, + "learning_rate": 7.702877239380541e-06, + "loss": 18.3962, + "step": 18528 + }, + { + "epoch": 0.3386952309576471, + "grad_norm": 5.9140910909686655, + "learning_rate": 7.702628201457393e-06, + "loss": 17.543, + "step": 18529 + }, + { + "epoch": 0.3387135101540936, + "grad_norm": 7.536298645706425, + "learning_rate": 7.702379154061766e-06, + "loss": 18.0075, + "step": 18530 + }, + { + "epoch": 0.33873178935054016, + "grad_norm": 6.903730155055247, + "learning_rate": 7.702130097194536e-06, + "loss": 17.737, + "step": 18531 + }, + { + "epoch": 0.3387500685469867, + "grad_norm": 6.892784737799518, + "learning_rate": 7.701881030856573e-06, + "loss": 17.8239, + "step": 18532 + }, + { + "epoch": 0.3387683477434332, + "grad_norm": 7.989731384844175, + "learning_rate": 7.701631955048751e-06, + "loss": 18.1867, + "step": 18533 + }, + { + "epoch": 0.3387866269398797, + "grad_norm": 6.75314577527341, + "learning_rate": 7.701382869771944e-06, + "loss": 17.6316, + "step": 18534 + }, + { + "epoch": 0.33880490613632624, + "grad_norm": 6.425552269052934, + "learning_rate": 7.701133775027026e-06, + "loss": 17.7064, + "step": 18535 + }, + { + "epoch": 0.3388231853327728, + "grad_norm": 6.475145211434733, + "learning_rate": 7.700884670814867e-06, + "loss": 17.5869, + "step": 18536 + }, + { + "epoch": 0.3388414645292193, + "grad_norm": 7.979887064014127, + "learning_rate": 7.700635557136342e-06, + "loss": 17.9246, + "step": 18537 + }, + { + "epoch": 0.3388597437256658, + "grad_norm": 7.1966159183684155, + "learning_rate": 7.700386433992325e-06, + "loss": 17.6055, + "step": 18538 + }, + { + "epoch": 0.33887802292211233, + "grad_norm": 5.8361565872888335, + "learning_rate": 7.700137301383685e-06, + "loss": 17.3603, + "step": 18539 + }, + { + "epoch": 0.33889630211855887, + "grad_norm": 6.800728664016093, + "learning_rate": 7.6998881593113e-06, + "loss": 17.8276, + "step": 18540 + }, + { + "epoch": 0.3389145813150054, + "grad_norm": 6.969027315962583, + "learning_rate": 7.69963900777604e-06, + "loss": 18.0461, + "step": 18541 + }, + { + "epoch": 0.33893286051145194, + "grad_norm": 7.748699637922752, + "learning_rate": 7.699389846778781e-06, + "loss": 18.1258, + "step": 18542 + }, + { + "epoch": 0.3389511397078984, + "grad_norm": 7.299936552345248, + "learning_rate": 7.699140676320394e-06, + "loss": 17.5742, + "step": 18543 + }, + { + "epoch": 0.33896941890434495, + "grad_norm": 5.406616937955294, + "learning_rate": 7.698891496401753e-06, + "loss": 17.1552, + "step": 18544 + }, + { + "epoch": 0.3389876981007915, + "grad_norm": 6.105185122653779, + "learning_rate": 7.698642307023732e-06, + "loss": 17.3877, + "step": 18545 + }, + { + "epoch": 0.339005977297238, + "grad_norm": 6.070445457980878, + "learning_rate": 7.698393108187203e-06, + "loss": 17.433, + "step": 18546 + }, + { + "epoch": 0.33902425649368456, + "grad_norm": 7.327904658034851, + "learning_rate": 7.698143899893042e-06, + "loss": 18.2264, + "step": 18547 + }, + { + "epoch": 0.33904253569013104, + "grad_norm": 5.811399519134845, + "learning_rate": 7.697894682142119e-06, + "loss": 17.2927, + "step": 18548 + }, + { + "epoch": 0.3390608148865776, + "grad_norm": 8.371710481814059, + "learning_rate": 7.697645454935311e-06, + "loss": 18.5259, + "step": 18549 + }, + { + "epoch": 0.3390790940830241, + "grad_norm": 6.162236942618617, + "learning_rate": 7.697396218273488e-06, + "loss": 17.4977, + "step": 18550 + }, + { + "epoch": 0.33909737327947065, + "grad_norm": 5.667929160973008, + "learning_rate": 7.697146972157527e-06, + "loss": 17.2238, + "step": 18551 + }, + { + "epoch": 0.3391156524759172, + "grad_norm": 6.3630864987232725, + "learning_rate": 7.696897716588299e-06, + "loss": 17.5051, + "step": 18552 + }, + { + "epoch": 0.33913393167236366, + "grad_norm": 5.547956065392964, + "learning_rate": 7.696648451566678e-06, + "loss": 17.2606, + "step": 18553 + }, + { + "epoch": 0.3391522108688102, + "grad_norm": 5.693365817578991, + "learning_rate": 7.696399177093539e-06, + "loss": 17.2777, + "step": 18554 + }, + { + "epoch": 0.33917049006525674, + "grad_norm": 7.699578961192763, + "learning_rate": 7.696149893169756e-06, + "loss": 18.1077, + "step": 18555 + }, + { + "epoch": 0.33918876926170327, + "grad_norm": 7.370075956908817, + "learning_rate": 7.6959005997962e-06, + "loss": 17.7865, + "step": 18556 + }, + { + "epoch": 0.3392070484581498, + "grad_norm": 6.320802877160347, + "learning_rate": 7.695651296973745e-06, + "loss": 17.6819, + "step": 18557 + }, + { + "epoch": 0.3392253276545963, + "grad_norm": 7.172380059789501, + "learning_rate": 7.695401984703269e-06, + "loss": 17.9064, + "step": 18558 + }, + { + "epoch": 0.3392436068510428, + "grad_norm": 5.8204236789504575, + "learning_rate": 7.695152662985641e-06, + "loss": 17.3202, + "step": 18559 + }, + { + "epoch": 0.33926188604748936, + "grad_norm": 6.671811365467112, + "learning_rate": 7.694903331821736e-06, + "loss": 17.4557, + "step": 18560 + }, + { + "epoch": 0.3392801652439359, + "grad_norm": 5.506696943152934, + "learning_rate": 7.694653991212431e-06, + "loss": 17.1238, + "step": 18561 + }, + { + "epoch": 0.3392984444403824, + "grad_norm": 6.6946185875147926, + "learning_rate": 7.694404641158597e-06, + "loss": 18.0571, + "step": 18562 + }, + { + "epoch": 0.3393167236368289, + "grad_norm": 5.7400999723436295, + "learning_rate": 7.694155281661108e-06, + "loss": 17.1185, + "step": 18563 + }, + { + "epoch": 0.33933500283327545, + "grad_norm": 4.560350280967302, + "learning_rate": 7.69390591272084e-06, + "loss": 16.7462, + "step": 18564 + }, + { + "epoch": 0.339353282029722, + "grad_norm": 6.779489856870191, + "learning_rate": 7.693656534338665e-06, + "loss": 17.9682, + "step": 18565 + }, + { + "epoch": 0.3393715612261685, + "grad_norm": 6.055479736217653, + "learning_rate": 7.693407146515455e-06, + "loss": 17.3872, + "step": 18566 + }, + { + "epoch": 0.339389840422615, + "grad_norm": 7.063314999171396, + "learning_rate": 7.693157749252089e-06, + "loss": 17.8399, + "step": 18567 + }, + { + "epoch": 0.33940811961906153, + "grad_norm": 6.485390388814588, + "learning_rate": 7.692908342549439e-06, + "loss": 17.3437, + "step": 18568 + }, + { + "epoch": 0.33942639881550807, + "grad_norm": 7.67912906130451, + "learning_rate": 7.692658926408378e-06, + "loss": 18.3269, + "step": 18569 + }, + { + "epoch": 0.3394446780119546, + "grad_norm": 6.66874415783317, + "learning_rate": 7.692409500829781e-06, + "loss": 17.6165, + "step": 18570 + }, + { + "epoch": 0.33946295720840114, + "grad_norm": 5.525745694311952, + "learning_rate": 7.692160065814522e-06, + "loss": 16.9587, + "step": 18571 + }, + { + "epoch": 0.3394812364048476, + "grad_norm": 6.5776478903528135, + "learning_rate": 7.691910621363479e-06, + "loss": 17.9475, + "step": 18572 + }, + { + "epoch": 0.33949951560129416, + "grad_norm": 6.735479974909261, + "learning_rate": 7.691661167477519e-06, + "loss": 17.3685, + "step": 18573 + }, + { + "epoch": 0.3395177947977407, + "grad_norm": 6.103561192639743, + "learning_rate": 7.691411704157521e-06, + "loss": 17.369, + "step": 18574 + }, + { + "epoch": 0.3395360739941872, + "grad_norm": 5.988450917113393, + "learning_rate": 7.69116223140436e-06, + "loss": 17.2464, + "step": 18575 + }, + { + "epoch": 0.33955435319063376, + "grad_norm": 5.907624695401685, + "learning_rate": 7.690912749218908e-06, + "loss": 17.3719, + "step": 18576 + }, + { + "epoch": 0.33957263238708024, + "grad_norm": 6.418615476685507, + "learning_rate": 7.69066325760204e-06, + "loss": 17.6757, + "step": 18577 + }, + { + "epoch": 0.3395909115835268, + "grad_norm": 6.390374719797928, + "learning_rate": 7.69041375655463e-06, + "loss": 17.4991, + "step": 18578 + }, + { + "epoch": 0.3396091907799733, + "grad_norm": 7.513673674936005, + "learning_rate": 7.690164246077553e-06, + "loss": 17.6994, + "step": 18579 + }, + { + "epoch": 0.33962746997641985, + "grad_norm": 7.370235617724177, + "learning_rate": 7.689914726171685e-06, + "loss": 17.7465, + "step": 18580 + }, + { + "epoch": 0.3396457491728664, + "grad_norm": 5.776963263010866, + "learning_rate": 7.6896651968379e-06, + "loss": 17.1914, + "step": 18581 + }, + { + "epoch": 0.33966402836931286, + "grad_norm": 7.480861992419118, + "learning_rate": 7.689415658077074e-06, + "loss": 18.0238, + "step": 18582 + }, + { + "epoch": 0.3396823075657594, + "grad_norm": 7.51474730627037, + "learning_rate": 7.689166109890073e-06, + "loss": 17.8101, + "step": 18583 + }, + { + "epoch": 0.33970058676220594, + "grad_norm": 5.94084192894617, + "learning_rate": 7.688916552277783e-06, + "loss": 17.2131, + "step": 18584 + }, + { + "epoch": 0.33971886595865247, + "grad_norm": 5.7606416869487616, + "learning_rate": 7.688666985241074e-06, + "loss": 17.5015, + "step": 18585 + }, + { + "epoch": 0.339737145155099, + "grad_norm": 7.528853867532281, + "learning_rate": 7.688417408780817e-06, + "loss": 18.0463, + "step": 18586 + }, + { + "epoch": 0.3397554243515455, + "grad_norm": 6.374259758307279, + "learning_rate": 7.688167822897894e-06, + "loss": 17.3193, + "step": 18587 + }, + { + "epoch": 0.339773703547992, + "grad_norm": 7.187857336463657, + "learning_rate": 7.687918227593174e-06, + "loss": 17.9579, + "step": 18588 + }, + { + "epoch": 0.33979198274443856, + "grad_norm": 6.771650370125248, + "learning_rate": 7.687668622867535e-06, + "loss": 17.7377, + "step": 18589 + }, + { + "epoch": 0.3398102619408851, + "grad_norm": 6.478670704204277, + "learning_rate": 7.687419008721848e-06, + "loss": 17.6423, + "step": 18590 + }, + { + "epoch": 0.33982854113733163, + "grad_norm": 5.801317083485911, + "learning_rate": 7.687169385156994e-06, + "loss": 17.2358, + "step": 18591 + }, + { + "epoch": 0.3398468203337781, + "grad_norm": 5.778391106065269, + "learning_rate": 7.686919752173842e-06, + "loss": 17.1861, + "step": 18592 + }, + { + "epoch": 0.33986509953022465, + "grad_norm": 5.4130668068485495, + "learning_rate": 7.686670109773271e-06, + "loss": 17.1547, + "step": 18593 + }, + { + "epoch": 0.3398833787266712, + "grad_norm": 7.179857244364473, + "learning_rate": 7.686420457956153e-06, + "loss": 18.2276, + "step": 18594 + }, + { + "epoch": 0.3399016579231177, + "grad_norm": 7.545135015636312, + "learning_rate": 7.686170796723365e-06, + "loss": 17.9744, + "step": 18595 + }, + { + "epoch": 0.3399199371195642, + "grad_norm": 5.992206441478063, + "learning_rate": 7.685921126075783e-06, + "loss": 17.4739, + "step": 18596 + }, + { + "epoch": 0.33993821631601073, + "grad_norm": 7.753257465418856, + "learning_rate": 7.685671446014281e-06, + "loss": 17.8907, + "step": 18597 + }, + { + "epoch": 0.33995649551245727, + "grad_norm": 5.220488384198819, + "learning_rate": 7.68542175653973e-06, + "loss": 17.2379, + "step": 18598 + }, + { + "epoch": 0.3399747747089038, + "grad_norm": 6.756246510999741, + "learning_rate": 7.68517205765301e-06, + "loss": 17.9904, + "step": 18599 + }, + { + "epoch": 0.33999305390535034, + "grad_norm": 6.6665878497981295, + "learning_rate": 7.684922349354997e-06, + "loss": 17.666, + "step": 18600 + }, + { + "epoch": 0.3400113331017968, + "grad_norm": 6.7635936038448685, + "learning_rate": 7.684672631646561e-06, + "loss": 17.6601, + "step": 18601 + }, + { + "epoch": 0.34002961229824336, + "grad_norm": 6.019484555741808, + "learning_rate": 7.684422904528584e-06, + "loss": 17.4302, + "step": 18602 + }, + { + "epoch": 0.3400478914946899, + "grad_norm": 6.587729455162058, + "learning_rate": 7.684173168001936e-06, + "loss": 17.6979, + "step": 18603 + }, + { + "epoch": 0.3400661706911364, + "grad_norm": 6.695329373211095, + "learning_rate": 7.683923422067492e-06, + "loss": 17.7751, + "step": 18604 + }, + { + "epoch": 0.34008444988758296, + "grad_norm": 5.599427833123826, + "learning_rate": 7.683673666726133e-06, + "loss": 17.3286, + "step": 18605 + }, + { + "epoch": 0.34010272908402944, + "grad_norm": 6.655312439568152, + "learning_rate": 7.683423901978729e-06, + "loss": 17.6809, + "step": 18606 + }, + { + "epoch": 0.340121008280476, + "grad_norm": 5.213666939523555, + "learning_rate": 7.683174127826156e-06, + "loss": 17.0193, + "step": 18607 + }, + { + "epoch": 0.3401392874769225, + "grad_norm": 6.082647708732131, + "learning_rate": 7.682924344269292e-06, + "loss": 17.4358, + "step": 18608 + }, + { + "epoch": 0.34015756667336905, + "grad_norm": 6.728008690159649, + "learning_rate": 7.682674551309008e-06, + "loss": 17.8505, + "step": 18609 + }, + { + "epoch": 0.3401758458698156, + "grad_norm": 6.666065553449126, + "learning_rate": 7.682424748946187e-06, + "loss": 17.8174, + "step": 18610 + }, + { + "epoch": 0.34019412506626207, + "grad_norm": 5.802111281189128, + "learning_rate": 7.682174937181696e-06, + "loss": 17.2524, + "step": 18611 + }, + { + "epoch": 0.3402124042627086, + "grad_norm": 7.474876302335836, + "learning_rate": 7.681925116016417e-06, + "loss": 18.1691, + "step": 18612 + }, + { + "epoch": 0.34023068345915514, + "grad_norm": 6.281967462298568, + "learning_rate": 7.681675285451223e-06, + "loss": 17.3999, + "step": 18613 + }, + { + "epoch": 0.34024896265560167, + "grad_norm": 7.110678994979965, + "learning_rate": 7.68142544548699e-06, + "loss": 17.7144, + "step": 18614 + }, + { + "epoch": 0.3402672418520482, + "grad_norm": 6.398574156336081, + "learning_rate": 7.681175596124592e-06, + "loss": 17.4233, + "step": 18615 + }, + { + "epoch": 0.3402855210484947, + "grad_norm": 6.289630212886747, + "learning_rate": 7.680925737364908e-06, + "loss": 17.3901, + "step": 18616 + }, + { + "epoch": 0.3403038002449412, + "grad_norm": 5.8380846989527155, + "learning_rate": 7.680675869208811e-06, + "loss": 17.1604, + "step": 18617 + }, + { + "epoch": 0.34032207944138776, + "grad_norm": 5.569289614347202, + "learning_rate": 7.680425991657177e-06, + "loss": 17.0017, + "step": 18618 + }, + { + "epoch": 0.3403403586378343, + "grad_norm": 6.144307323923875, + "learning_rate": 7.680176104710883e-06, + "loss": 17.4247, + "step": 18619 + }, + { + "epoch": 0.34035863783428083, + "grad_norm": 6.459215526872056, + "learning_rate": 7.679926208370807e-06, + "loss": 17.6302, + "step": 18620 + }, + { + "epoch": 0.3403769170307273, + "grad_norm": 5.93392102819098, + "learning_rate": 7.679676302637818e-06, + "loss": 17.2792, + "step": 18621 + }, + { + "epoch": 0.34039519622717385, + "grad_norm": 7.4960631324250215, + "learning_rate": 7.679426387512799e-06, + "loss": 17.7464, + "step": 18622 + }, + { + "epoch": 0.3404134754236204, + "grad_norm": 8.31714979088079, + "learning_rate": 7.679176462996622e-06, + "loss": 18.3638, + "step": 18623 + }, + { + "epoch": 0.3404317546200669, + "grad_norm": 5.6869215349779525, + "learning_rate": 7.678926529090164e-06, + "loss": 17.3644, + "step": 18624 + }, + { + "epoch": 0.34045003381651345, + "grad_norm": 9.132516392454006, + "learning_rate": 7.6786765857943e-06, + "loss": 18.0032, + "step": 18625 + }, + { + "epoch": 0.34046831301295993, + "grad_norm": 6.52480229562743, + "learning_rate": 7.678426633109908e-06, + "loss": 17.6034, + "step": 18626 + }, + { + "epoch": 0.34048659220940647, + "grad_norm": 8.39836950754598, + "learning_rate": 7.678176671037864e-06, + "loss": 18.007, + "step": 18627 + }, + { + "epoch": 0.340504871405853, + "grad_norm": 5.796424602940962, + "learning_rate": 7.677926699579042e-06, + "loss": 17.2748, + "step": 18628 + }, + { + "epoch": 0.34052315060229954, + "grad_norm": 6.627960521855117, + "learning_rate": 7.677676718734319e-06, + "loss": 17.6418, + "step": 18629 + }, + { + "epoch": 0.340541429798746, + "grad_norm": 7.443410950212804, + "learning_rate": 7.677426728504572e-06, + "loss": 18.0215, + "step": 18630 + }, + { + "epoch": 0.34055970899519256, + "grad_norm": 6.474408461785655, + "learning_rate": 7.677176728890677e-06, + "loss": 17.5713, + "step": 18631 + }, + { + "epoch": 0.3405779881916391, + "grad_norm": 7.853435968222706, + "learning_rate": 7.676926719893509e-06, + "loss": 18.0385, + "step": 18632 + }, + { + "epoch": 0.3405962673880856, + "grad_norm": 7.4119833741467795, + "learning_rate": 7.676676701513945e-06, + "loss": 17.9976, + "step": 18633 + }, + { + "epoch": 0.34061454658453216, + "grad_norm": 7.26715653733841, + "learning_rate": 7.676426673752862e-06, + "loss": 18.1636, + "step": 18634 + }, + { + "epoch": 0.34063282578097864, + "grad_norm": 7.406503317562208, + "learning_rate": 7.676176636611137e-06, + "loss": 17.742, + "step": 18635 + }, + { + "epoch": 0.3406511049774252, + "grad_norm": 6.3999819331489585, + "learning_rate": 7.675926590089643e-06, + "loss": 17.6915, + "step": 18636 + }, + { + "epoch": 0.3406693841738717, + "grad_norm": 7.352261573316151, + "learning_rate": 7.675676534189261e-06, + "loss": 17.7272, + "step": 18637 + }, + { + "epoch": 0.34068766337031825, + "grad_norm": 6.6079238737001695, + "learning_rate": 7.675426468910862e-06, + "loss": 17.4509, + "step": 18638 + }, + { + "epoch": 0.3407059425667648, + "grad_norm": 5.703601467050324, + "learning_rate": 7.675176394255326e-06, + "loss": 17.4343, + "step": 18639 + }, + { + "epoch": 0.34072422176321127, + "grad_norm": 7.165661530921426, + "learning_rate": 7.67492631022353e-06, + "loss": 17.945, + "step": 18640 + }, + { + "epoch": 0.3407425009596578, + "grad_norm": 5.0921407244045165, + "learning_rate": 7.67467621681635e-06, + "loss": 16.9564, + "step": 18641 + }, + { + "epoch": 0.34076078015610434, + "grad_norm": 7.055686350188954, + "learning_rate": 7.674426114034662e-06, + "loss": 18.0199, + "step": 18642 + }, + { + "epoch": 0.3407790593525509, + "grad_norm": 6.803755903484869, + "learning_rate": 7.67417600187934e-06, + "loss": 17.4164, + "step": 18643 + }, + { + "epoch": 0.3407973385489974, + "grad_norm": 7.2652829283862514, + "learning_rate": 7.673925880351266e-06, + "loss": 17.7704, + "step": 18644 + }, + { + "epoch": 0.3408156177454439, + "grad_norm": 7.275087787321198, + "learning_rate": 7.673675749451312e-06, + "loss": 17.8544, + "step": 18645 + }, + { + "epoch": 0.3408338969418904, + "grad_norm": 6.113747157958731, + "learning_rate": 7.673425609180356e-06, + "loss": 17.4012, + "step": 18646 + }, + { + "epoch": 0.34085217613833696, + "grad_norm": 5.939387750543694, + "learning_rate": 7.673175459539277e-06, + "loss": 17.0685, + "step": 18647 + }, + { + "epoch": 0.3408704553347835, + "grad_norm": 5.721475154394008, + "learning_rate": 7.672925300528949e-06, + "loss": 17.1819, + "step": 18648 + }, + { + "epoch": 0.34088873453123003, + "grad_norm": 6.697699313686008, + "learning_rate": 7.672675132150249e-06, + "loss": 17.3943, + "step": 18649 + }, + { + "epoch": 0.3409070137276765, + "grad_norm": 6.393508358343627, + "learning_rate": 7.672424954404057e-06, + "loss": 17.5696, + "step": 18650 + }, + { + "epoch": 0.34092529292412305, + "grad_norm": 8.07438924970571, + "learning_rate": 7.672174767291246e-06, + "loss": 17.8587, + "step": 18651 + }, + { + "epoch": 0.3409435721205696, + "grad_norm": 6.642235116480529, + "learning_rate": 7.671924570812694e-06, + "loss": 17.6111, + "step": 18652 + }, + { + "epoch": 0.3409618513170161, + "grad_norm": 6.590922537754951, + "learning_rate": 7.671674364969277e-06, + "loss": 17.4407, + "step": 18653 + }, + { + "epoch": 0.34098013051346265, + "grad_norm": 5.966974561777243, + "learning_rate": 7.671424149761878e-06, + "loss": 17.4277, + "step": 18654 + }, + { + "epoch": 0.34099840970990913, + "grad_norm": 7.563680802508155, + "learning_rate": 7.671173925191364e-06, + "loss": 17.7721, + "step": 18655 + }, + { + "epoch": 0.34101668890635567, + "grad_norm": 6.201240177848574, + "learning_rate": 7.670923691258619e-06, + "loss": 17.2041, + "step": 18656 + }, + { + "epoch": 0.3410349681028022, + "grad_norm": 6.99645951902213, + "learning_rate": 7.670673447964518e-06, + "loss": 17.6827, + "step": 18657 + }, + { + "epoch": 0.34105324729924874, + "grad_norm": 6.715923558615538, + "learning_rate": 7.67042319530994e-06, + "loss": 17.2222, + "step": 18658 + }, + { + "epoch": 0.3410715264956953, + "grad_norm": 9.084080103438504, + "learning_rate": 7.670172933295758e-06, + "loss": 18.4638, + "step": 18659 + }, + { + "epoch": 0.34108980569214176, + "grad_norm": 6.348419974733283, + "learning_rate": 7.669922661922853e-06, + "loss": 17.379, + "step": 18660 + }, + { + "epoch": 0.3411080848885883, + "grad_norm": 8.42640356163809, + "learning_rate": 7.6696723811921e-06, + "loss": 18.2419, + "step": 18661 + }, + { + "epoch": 0.34112636408503483, + "grad_norm": 6.340744017100043, + "learning_rate": 7.669422091104377e-06, + "loss": 17.2227, + "step": 18662 + }, + { + "epoch": 0.34114464328148136, + "grad_norm": 6.583856869218941, + "learning_rate": 7.669171791660562e-06, + "loss": 17.5243, + "step": 18663 + }, + { + "epoch": 0.34116292247792784, + "grad_norm": 6.765985311311577, + "learning_rate": 7.668921482861531e-06, + "loss": 17.6967, + "step": 18664 + }, + { + "epoch": 0.3411812016743744, + "grad_norm": 7.855367461729398, + "learning_rate": 7.668671164708163e-06, + "loss": 18.0338, + "step": 18665 + }, + { + "epoch": 0.3411994808708209, + "grad_norm": 5.842699478441956, + "learning_rate": 7.668420837201331e-06, + "loss": 17.3604, + "step": 18666 + }, + { + "epoch": 0.34121776006726745, + "grad_norm": 5.647097215030044, + "learning_rate": 7.668170500341918e-06, + "loss": 17.3587, + "step": 18667 + }, + { + "epoch": 0.341236039263714, + "grad_norm": 6.117666474122066, + "learning_rate": 7.6679201541308e-06, + "loss": 17.5845, + "step": 18668 + }, + { + "epoch": 0.34125431846016047, + "grad_norm": 6.599540683177623, + "learning_rate": 7.667669798568852e-06, + "loss": 17.6067, + "step": 18669 + }, + { + "epoch": 0.341272597656607, + "grad_norm": 6.819116057737852, + "learning_rate": 7.667419433656953e-06, + "loss": 17.8113, + "step": 18670 + }, + { + "epoch": 0.34129087685305354, + "grad_norm": 6.718181840431996, + "learning_rate": 7.667169059395978e-06, + "loss": 17.6825, + "step": 18671 + }, + { + "epoch": 0.3413091560495001, + "grad_norm": 6.805742840354403, + "learning_rate": 7.66691867578681e-06, + "loss": 17.7864, + "step": 18672 + }, + { + "epoch": 0.3413274352459466, + "grad_norm": 7.512941050983491, + "learning_rate": 7.666668282830323e-06, + "loss": 18.1333, + "step": 18673 + }, + { + "epoch": 0.3413457144423931, + "grad_norm": 6.473133563595723, + "learning_rate": 7.666417880527395e-06, + "loss": 17.5756, + "step": 18674 + }, + { + "epoch": 0.3413639936388396, + "grad_norm": 6.226849326535687, + "learning_rate": 7.666167468878902e-06, + "loss": 17.576, + "step": 18675 + }, + { + "epoch": 0.34138227283528616, + "grad_norm": 6.436126671583582, + "learning_rate": 7.665917047885724e-06, + "loss": 17.4472, + "step": 18676 + }, + { + "epoch": 0.3414005520317327, + "grad_norm": 5.675453461661069, + "learning_rate": 7.665666617548739e-06, + "loss": 17.3, + "step": 18677 + }, + { + "epoch": 0.34141883122817923, + "grad_norm": 6.821599837399781, + "learning_rate": 7.665416177868825e-06, + "loss": 17.8204, + "step": 18678 + }, + { + "epoch": 0.3414371104246257, + "grad_norm": 7.783969477680731, + "learning_rate": 7.665165728846857e-06, + "loss": 18.1205, + "step": 18679 + }, + { + "epoch": 0.34145538962107225, + "grad_norm": 6.7448408250391045, + "learning_rate": 7.664915270483713e-06, + "loss": 17.6446, + "step": 18680 + }, + { + "epoch": 0.3414736688175188, + "grad_norm": 5.673997915867822, + "learning_rate": 7.664664802780275e-06, + "loss": 17.0912, + "step": 18681 + }, + { + "epoch": 0.3414919480139653, + "grad_norm": 6.27327755016016, + "learning_rate": 7.664414325737417e-06, + "loss": 17.4395, + "step": 18682 + }, + { + "epoch": 0.34151022721041185, + "grad_norm": 7.16947317453992, + "learning_rate": 7.664163839356017e-06, + "loss": 18.1252, + "step": 18683 + }, + { + "epoch": 0.34152850640685833, + "grad_norm": 5.995978383754804, + "learning_rate": 7.663913343636955e-06, + "loss": 17.2703, + "step": 18684 + }, + { + "epoch": 0.34154678560330487, + "grad_norm": 6.606834029652045, + "learning_rate": 7.663662838581107e-06, + "loss": 17.4672, + "step": 18685 + }, + { + "epoch": 0.3415650647997514, + "grad_norm": 5.280484714656908, + "learning_rate": 7.663412324189353e-06, + "loss": 16.9884, + "step": 18686 + }, + { + "epoch": 0.34158334399619794, + "grad_norm": 7.013647159691276, + "learning_rate": 7.663161800462569e-06, + "loss": 17.7756, + "step": 18687 + }, + { + "epoch": 0.3416016231926445, + "grad_norm": 7.220249667939323, + "learning_rate": 7.662911267401634e-06, + "loss": 17.4257, + "step": 18688 + }, + { + "epoch": 0.34161990238909096, + "grad_norm": 6.068767553887193, + "learning_rate": 7.662660725007427e-06, + "loss": 17.4268, + "step": 18689 + }, + { + "epoch": 0.3416381815855375, + "grad_norm": 6.104009893408434, + "learning_rate": 7.662410173280825e-06, + "loss": 17.5837, + "step": 18690 + }, + { + "epoch": 0.34165646078198403, + "grad_norm": 6.684856497049956, + "learning_rate": 7.662159612222706e-06, + "loss": 17.843, + "step": 18691 + }, + { + "epoch": 0.34167473997843056, + "grad_norm": 6.654655871948643, + "learning_rate": 7.661909041833951e-06, + "loss": 17.4879, + "step": 18692 + }, + { + "epoch": 0.3416930191748771, + "grad_norm": 5.581902799086931, + "learning_rate": 7.661658462115431e-06, + "loss": 17.2192, + "step": 18693 + }, + { + "epoch": 0.3417112983713236, + "grad_norm": 6.939167635113642, + "learning_rate": 7.661407873068031e-06, + "loss": 17.7619, + "step": 18694 + }, + { + "epoch": 0.3417295775677701, + "grad_norm": 6.485193863234857, + "learning_rate": 7.661157274692628e-06, + "loss": 17.4223, + "step": 18695 + }, + { + "epoch": 0.34174785676421665, + "grad_norm": 6.644593736895018, + "learning_rate": 7.660906666990102e-06, + "loss": 17.7662, + "step": 18696 + }, + { + "epoch": 0.3417661359606632, + "grad_norm": 5.50455822128651, + "learning_rate": 7.660656049961326e-06, + "loss": 17.0274, + "step": 18697 + }, + { + "epoch": 0.34178441515710967, + "grad_norm": 6.24026387829265, + "learning_rate": 7.66040542360718e-06, + "loss": 17.3114, + "step": 18698 + }, + { + "epoch": 0.3418026943535562, + "grad_norm": 6.941206257786055, + "learning_rate": 7.660154787928546e-06, + "loss": 18.088, + "step": 18699 + }, + { + "epoch": 0.34182097355000274, + "grad_norm": 7.010808293183774, + "learning_rate": 7.659904142926302e-06, + "loss": 17.6137, + "step": 18700 + }, + { + "epoch": 0.3418392527464493, + "grad_norm": 5.953407799406823, + "learning_rate": 7.659653488601322e-06, + "loss": 17.2847, + "step": 18701 + }, + { + "epoch": 0.3418575319428958, + "grad_norm": 6.3305612939514555, + "learning_rate": 7.659402824954488e-06, + "loss": 17.7074, + "step": 18702 + }, + { + "epoch": 0.3418758111393423, + "grad_norm": 6.868297075420189, + "learning_rate": 7.659152151986679e-06, + "loss": 17.529, + "step": 18703 + }, + { + "epoch": 0.3418940903357888, + "grad_norm": 6.883515930230535, + "learning_rate": 7.658901469698771e-06, + "loss": 17.5927, + "step": 18704 + }, + { + "epoch": 0.34191236953223536, + "grad_norm": 5.387613705367663, + "learning_rate": 7.658650778091645e-06, + "loss": 17.0186, + "step": 18705 + }, + { + "epoch": 0.3419306487286819, + "grad_norm": 7.590193402357873, + "learning_rate": 7.658400077166178e-06, + "loss": 18.2854, + "step": 18706 + }, + { + "epoch": 0.34194892792512843, + "grad_norm": 5.918681654641845, + "learning_rate": 7.658149366923249e-06, + "loss": 17.1875, + "step": 18707 + }, + { + "epoch": 0.3419672071215749, + "grad_norm": 6.065267697725173, + "learning_rate": 7.65789864736374e-06, + "loss": 17.3768, + "step": 18708 + }, + { + "epoch": 0.34198548631802145, + "grad_norm": 6.035988272592032, + "learning_rate": 7.657647918488523e-06, + "loss": 17.473, + "step": 18709 + }, + { + "epoch": 0.342003765514468, + "grad_norm": 7.538219994187357, + "learning_rate": 7.657397180298483e-06, + "loss": 17.6449, + "step": 18710 + }, + { + "epoch": 0.3420220447109145, + "grad_norm": 8.440697458735569, + "learning_rate": 7.657146432794496e-06, + "loss": 18.4291, + "step": 18711 + }, + { + "epoch": 0.34204032390736105, + "grad_norm": 7.200546142422039, + "learning_rate": 7.65689567597744e-06, + "loss": 17.6517, + "step": 18712 + }, + { + "epoch": 0.34205860310380753, + "grad_norm": 7.796034028720834, + "learning_rate": 7.656644909848198e-06, + "loss": 17.9615, + "step": 18713 + }, + { + "epoch": 0.34207688230025407, + "grad_norm": 5.797347903855568, + "learning_rate": 7.656394134407646e-06, + "loss": 17.3534, + "step": 18714 + }, + { + "epoch": 0.3420951614967006, + "grad_norm": 6.328034515254491, + "learning_rate": 7.656143349656661e-06, + "loss": 17.3153, + "step": 18715 + }, + { + "epoch": 0.34211344069314714, + "grad_norm": 8.291240342910957, + "learning_rate": 7.655892555596124e-06, + "loss": 18.3262, + "step": 18716 + }, + { + "epoch": 0.3421317198895937, + "grad_norm": 7.436553844779456, + "learning_rate": 7.655641752226915e-06, + "loss": 17.8486, + "step": 18717 + }, + { + "epoch": 0.34214999908604016, + "grad_norm": 6.385686728345363, + "learning_rate": 7.655390939549911e-06, + "loss": 17.9412, + "step": 18718 + }, + { + "epoch": 0.3421682782824867, + "grad_norm": 7.330562144678844, + "learning_rate": 7.655140117565995e-06, + "loss": 17.9242, + "step": 18719 + }, + { + "epoch": 0.34218655747893323, + "grad_norm": 7.543387211435891, + "learning_rate": 7.65488928627604e-06, + "loss": 18.0056, + "step": 18720 + }, + { + "epoch": 0.34220483667537976, + "grad_norm": 7.81850260618639, + "learning_rate": 7.65463844568093e-06, + "loss": 17.9146, + "step": 18721 + }, + { + "epoch": 0.3422231158718263, + "grad_norm": 5.5160302544833995, + "learning_rate": 7.654387595781542e-06, + "loss": 17.0875, + "step": 18722 + }, + { + "epoch": 0.3422413950682728, + "grad_norm": 6.8386345834937305, + "learning_rate": 7.654136736578759e-06, + "loss": 17.4698, + "step": 18723 + }, + { + "epoch": 0.3422596742647193, + "grad_norm": 6.12700821599637, + "learning_rate": 7.653885868073454e-06, + "loss": 17.195, + "step": 18724 + }, + { + "epoch": 0.34227795346116585, + "grad_norm": 5.048266219225038, + "learning_rate": 7.653634990266507e-06, + "loss": 17.0995, + "step": 18725 + }, + { + "epoch": 0.3422962326576124, + "grad_norm": 7.152460909063966, + "learning_rate": 7.653384103158802e-06, + "loss": 17.6174, + "step": 18726 + }, + { + "epoch": 0.3423145118540589, + "grad_norm": 5.968102892944309, + "learning_rate": 7.653133206751218e-06, + "loss": 17.3426, + "step": 18727 + }, + { + "epoch": 0.3423327910505054, + "grad_norm": 8.25861894424999, + "learning_rate": 7.65288230104463e-06, + "loss": 18.1985, + "step": 18728 + }, + { + "epoch": 0.34235107024695194, + "grad_norm": 7.8130738972777465, + "learning_rate": 7.652631386039921e-06, + "loss": 17.7325, + "step": 18729 + }, + { + "epoch": 0.3423693494433985, + "grad_norm": 7.035678726252646, + "learning_rate": 7.65238046173797e-06, + "loss": 18.0212, + "step": 18730 + }, + { + "epoch": 0.342387628639845, + "grad_norm": 6.6801113104921805, + "learning_rate": 7.652129528139654e-06, + "loss": 17.6916, + "step": 18731 + }, + { + "epoch": 0.3424059078362915, + "grad_norm": 7.156947819969368, + "learning_rate": 7.651878585245853e-06, + "loss": 18.0317, + "step": 18732 + }, + { + "epoch": 0.342424187032738, + "grad_norm": 7.582022893941064, + "learning_rate": 7.65162763305745e-06, + "loss": 17.6248, + "step": 18733 + }, + { + "epoch": 0.34244246622918456, + "grad_norm": 7.9573283114692295, + "learning_rate": 7.65137667157532e-06, + "loss": 18.3825, + "step": 18734 + }, + { + "epoch": 0.3424607454256311, + "grad_norm": 5.623300634353293, + "learning_rate": 7.651125700800346e-06, + "loss": 17.1096, + "step": 18735 + }, + { + "epoch": 0.34247902462207763, + "grad_norm": 6.6412611985181265, + "learning_rate": 7.650874720733407e-06, + "loss": 17.4568, + "step": 18736 + }, + { + "epoch": 0.3424973038185241, + "grad_norm": 5.04614504841508, + "learning_rate": 7.650623731375381e-06, + "loss": 16.9781, + "step": 18737 + }, + { + "epoch": 0.34251558301497065, + "grad_norm": 6.510875943565755, + "learning_rate": 7.65037273272715e-06, + "loss": 17.628, + "step": 18738 + }, + { + "epoch": 0.3425338622114172, + "grad_norm": 6.4333687956937515, + "learning_rate": 7.650121724789592e-06, + "loss": 17.8449, + "step": 18739 + }, + { + "epoch": 0.3425521414078637, + "grad_norm": 6.963383029694077, + "learning_rate": 7.649870707563588e-06, + "loss": 17.674, + "step": 18740 + }, + { + "epoch": 0.34257042060431026, + "grad_norm": 7.182635781943536, + "learning_rate": 7.649619681050015e-06, + "loss": 17.9128, + "step": 18741 + }, + { + "epoch": 0.34258869980075674, + "grad_norm": 8.654101903838043, + "learning_rate": 7.649368645249757e-06, + "loss": 18.2864, + "step": 18742 + }, + { + "epoch": 0.34260697899720327, + "grad_norm": 6.562410099438556, + "learning_rate": 7.649117600163691e-06, + "loss": 17.5895, + "step": 18743 + }, + { + "epoch": 0.3426252581936498, + "grad_norm": 7.61451345911573, + "learning_rate": 7.648866545792699e-06, + "loss": 17.8557, + "step": 18744 + }, + { + "epoch": 0.34264353739009634, + "grad_norm": 7.150796271708187, + "learning_rate": 7.648615482137658e-06, + "loss": 17.9896, + "step": 18745 + }, + { + "epoch": 0.3426618165865429, + "grad_norm": 6.534132170946157, + "learning_rate": 7.64836440919945e-06, + "loss": 17.8412, + "step": 18746 + }, + { + "epoch": 0.34268009578298936, + "grad_norm": 6.674528048805177, + "learning_rate": 7.648113326978954e-06, + "loss": 17.5252, + "step": 18747 + }, + { + "epoch": 0.3426983749794359, + "grad_norm": 5.492376445780663, + "learning_rate": 7.647862235477053e-06, + "loss": 17.1936, + "step": 18748 + }, + { + "epoch": 0.34271665417588243, + "grad_norm": 5.219124691380864, + "learning_rate": 7.647611134694621e-06, + "loss": 17.003, + "step": 18749 + }, + { + "epoch": 0.34273493337232896, + "grad_norm": 7.492574678745267, + "learning_rate": 7.647360024632544e-06, + "loss": 17.9828, + "step": 18750 + }, + { + "epoch": 0.3427532125687755, + "grad_norm": 5.298951135931565, + "learning_rate": 7.6471089052917e-06, + "loss": 16.8713, + "step": 18751 + }, + { + "epoch": 0.342771491765222, + "grad_norm": 6.2397662614587945, + "learning_rate": 7.646857776672968e-06, + "loss": 17.5835, + "step": 18752 + }, + { + "epoch": 0.3427897709616685, + "grad_norm": 5.97676886984631, + "learning_rate": 7.64660663877723e-06, + "loss": 17.6145, + "step": 18753 + }, + { + "epoch": 0.34280805015811505, + "grad_norm": 5.447429576276824, + "learning_rate": 7.646355491605367e-06, + "loss": 17.1826, + "step": 18754 + }, + { + "epoch": 0.3428263293545616, + "grad_norm": 5.293841771378851, + "learning_rate": 7.646104335158254e-06, + "loss": 17.2044, + "step": 18755 + }, + { + "epoch": 0.3428446085510081, + "grad_norm": 5.762019716466999, + "learning_rate": 7.645853169436777e-06, + "loss": 17.3198, + "step": 18756 + }, + { + "epoch": 0.3428628877474546, + "grad_norm": 6.274008540598406, + "learning_rate": 7.645601994441813e-06, + "loss": 17.3941, + "step": 18757 + }, + { + "epoch": 0.34288116694390114, + "grad_norm": 6.880523485854208, + "learning_rate": 7.645350810174245e-06, + "loss": 17.8719, + "step": 18758 + }, + { + "epoch": 0.3428994461403477, + "grad_norm": 8.122983849837068, + "learning_rate": 7.64509961663495e-06, + "loss": 17.9244, + "step": 18759 + }, + { + "epoch": 0.3429177253367942, + "grad_norm": 6.032228247568009, + "learning_rate": 7.644848413824812e-06, + "loss": 17.1918, + "step": 18760 + }, + { + "epoch": 0.34293600453324075, + "grad_norm": 6.7114781849518925, + "learning_rate": 7.64459720174471e-06, + "loss": 17.6641, + "step": 18761 + }, + { + "epoch": 0.3429542837296872, + "grad_norm": 5.099540818065632, + "learning_rate": 7.644345980395524e-06, + "loss": 16.9746, + "step": 18762 + }, + { + "epoch": 0.34297256292613376, + "grad_norm": 7.0862622411338645, + "learning_rate": 7.644094749778134e-06, + "loss": 17.9195, + "step": 18763 + }, + { + "epoch": 0.3429908421225803, + "grad_norm": 6.6467858690198405, + "learning_rate": 7.643843509893423e-06, + "loss": 17.7148, + "step": 18764 + }, + { + "epoch": 0.34300912131902683, + "grad_norm": 6.181743736946362, + "learning_rate": 7.64359226074227e-06, + "loss": 17.5138, + "step": 18765 + }, + { + "epoch": 0.3430274005154733, + "grad_norm": 7.299730676973299, + "learning_rate": 7.643341002325553e-06, + "loss": 17.8965, + "step": 18766 + }, + { + "epoch": 0.34304567971191985, + "grad_norm": 6.007275784693551, + "learning_rate": 7.643089734644157e-06, + "loss": 17.513, + "step": 18767 + }, + { + "epoch": 0.3430639589083664, + "grad_norm": 6.887004519072852, + "learning_rate": 7.64283845769896e-06, + "loss": 17.5114, + "step": 18768 + }, + { + "epoch": 0.3430822381048129, + "grad_norm": 7.213322080699672, + "learning_rate": 7.642587171490846e-06, + "loss": 17.6282, + "step": 18769 + }, + { + "epoch": 0.34310051730125946, + "grad_norm": 7.240584778713403, + "learning_rate": 7.64233587602069e-06, + "loss": 17.7969, + "step": 18770 + }, + { + "epoch": 0.34311879649770594, + "grad_norm": 7.131732296849313, + "learning_rate": 7.642084571289376e-06, + "loss": 17.5967, + "step": 18771 + }, + { + "epoch": 0.34313707569415247, + "grad_norm": 5.8936483822808405, + "learning_rate": 7.641833257297788e-06, + "loss": 17.2392, + "step": 18772 + }, + { + "epoch": 0.343155354890599, + "grad_norm": 7.3028334644621475, + "learning_rate": 7.641581934046802e-06, + "loss": 17.7516, + "step": 18773 + }, + { + "epoch": 0.34317363408704554, + "grad_norm": 6.259976548532674, + "learning_rate": 7.6413306015373e-06, + "loss": 17.2937, + "step": 18774 + }, + { + "epoch": 0.3431919132834921, + "grad_norm": 7.36073248310213, + "learning_rate": 7.641079259770163e-06, + "loss": 17.9214, + "step": 18775 + }, + { + "epoch": 0.34321019247993856, + "grad_norm": 7.563428694144747, + "learning_rate": 7.640827908746274e-06, + "loss": 17.849, + "step": 18776 + }, + { + "epoch": 0.3432284716763851, + "grad_norm": 5.960784738086114, + "learning_rate": 7.640576548466512e-06, + "loss": 17.4236, + "step": 18777 + }, + { + "epoch": 0.34324675087283163, + "grad_norm": 6.341646836969284, + "learning_rate": 7.640325178931757e-06, + "loss": 17.4697, + "step": 18778 + }, + { + "epoch": 0.34326503006927817, + "grad_norm": 6.240868076821658, + "learning_rate": 7.640073800142892e-06, + "loss": 17.0821, + "step": 18779 + }, + { + "epoch": 0.3432833092657247, + "grad_norm": 6.089649298515942, + "learning_rate": 7.639822412100798e-06, + "loss": 17.3529, + "step": 18780 + }, + { + "epoch": 0.3433015884621712, + "grad_norm": 5.793944393176278, + "learning_rate": 7.639571014806356e-06, + "loss": 17.3753, + "step": 18781 + }, + { + "epoch": 0.3433198676586177, + "grad_norm": 6.461406824451548, + "learning_rate": 7.639319608260446e-06, + "loss": 17.5282, + "step": 18782 + }, + { + "epoch": 0.34333814685506425, + "grad_norm": 6.998204049448984, + "learning_rate": 7.63906819246395e-06, + "loss": 17.8295, + "step": 18783 + }, + { + "epoch": 0.3433564260515108, + "grad_norm": 8.130001995420912, + "learning_rate": 7.638816767417746e-06, + "loss": 18.1857, + "step": 18784 + }, + { + "epoch": 0.3433747052479573, + "grad_norm": 6.0315541071302485, + "learning_rate": 7.638565333122721e-06, + "loss": 17.1067, + "step": 18785 + }, + { + "epoch": 0.3433929844444038, + "grad_norm": 8.11687880415702, + "learning_rate": 7.638313889579754e-06, + "loss": 18.4706, + "step": 18786 + }, + { + "epoch": 0.34341126364085034, + "grad_norm": 6.595413154594915, + "learning_rate": 7.638062436789726e-06, + "loss": 17.3251, + "step": 18787 + }, + { + "epoch": 0.3434295428372969, + "grad_norm": 6.549313801078654, + "learning_rate": 7.637810974753517e-06, + "loss": 17.6947, + "step": 18788 + }, + { + "epoch": 0.3434478220337434, + "grad_norm": 5.937545933649113, + "learning_rate": 7.637559503472009e-06, + "loss": 17.1814, + "step": 18789 + }, + { + "epoch": 0.34346610123018995, + "grad_norm": 6.059549672670603, + "learning_rate": 7.637308022946084e-06, + "loss": 17.2336, + "step": 18790 + }, + { + "epoch": 0.3434843804266364, + "grad_norm": 6.964066984769655, + "learning_rate": 7.637056533176625e-06, + "loss": 17.7251, + "step": 18791 + }, + { + "epoch": 0.34350265962308296, + "grad_norm": 8.46609148214759, + "learning_rate": 7.636805034164511e-06, + "loss": 18.3873, + "step": 18792 + }, + { + "epoch": 0.3435209388195295, + "grad_norm": 5.574070086308546, + "learning_rate": 7.636553525910621e-06, + "loss": 17.1388, + "step": 18793 + }, + { + "epoch": 0.34353921801597603, + "grad_norm": 7.062336492708427, + "learning_rate": 7.636302008415844e-06, + "loss": 17.7763, + "step": 18794 + }, + { + "epoch": 0.34355749721242257, + "grad_norm": 6.918785307699996, + "learning_rate": 7.636050481681055e-06, + "loss": 17.5145, + "step": 18795 + }, + { + "epoch": 0.34357577640886905, + "grad_norm": 6.681621492836319, + "learning_rate": 7.63579894570714e-06, + "loss": 17.6554, + "step": 18796 + }, + { + "epoch": 0.3435940556053156, + "grad_norm": 8.249625305925028, + "learning_rate": 7.635547400494976e-06, + "loss": 18.4605, + "step": 18797 + }, + { + "epoch": 0.3436123348017621, + "grad_norm": 8.073382903177853, + "learning_rate": 7.635295846045447e-06, + "loss": 18.4458, + "step": 18798 + }, + { + "epoch": 0.34363061399820866, + "grad_norm": 6.880827109514999, + "learning_rate": 7.635044282359437e-06, + "loss": 17.6348, + "step": 18799 + }, + { + "epoch": 0.34364889319465514, + "grad_norm": 6.763693486197934, + "learning_rate": 7.634792709437822e-06, + "loss": 17.4167, + "step": 18800 + }, + { + "epoch": 0.34366717239110167, + "grad_norm": 6.2756764284531386, + "learning_rate": 7.63454112728149e-06, + "loss": 17.4985, + "step": 18801 + }, + { + "epoch": 0.3436854515875482, + "grad_norm": 5.653630459512056, + "learning_rate": 7.634289535891319e-06, + "loss": 17.3694, + "step": 18802 + }, + { + "epoch": 0.34370373078399474, + "grad_norm": 7.489377884641946, + "learning_rate": 7.634037935268191e-06, + "loss": 17.7952, + "step": 18803 + }, + { + "epoch": 0.3437220099804413, + "grad_norm": 5.682485521663398, + "learning_rate": 7.63378632541299e-06, + "loss": 17.2305, + "step": 18804 + }, + { + "epoch": 0.34374028917688776, + "grad_norm": 6.994947420661549, + "learning_rate": 7.633534706326596e-06, + "loss": 17.6213, + "step": 18805 + }, + { + "epoch": 0.3437585683733343, + "grad_norm": 7.111023801184497, + "learning_rate": 7.633283078009892e-06, + "loss": 17.9773, + "step": 18806 + }, + { + "epoch": 0.34377684756978083, + "grad_norm": 9.382723296691937, + "learning_rate": 7.633031440463757e-06, + "loss": 18.3125, + "step": 18807 + }, + { + "epoch": 0.34379512676622737, + "grad_norm": 9.081819864001538, + "learning_rate": 7.632779793689077e-06, + "loss": 18.6075, + "step": 18808 + }, + { + "epoch": 0.3438134059626739, + "grad_norm": 7.903707217962754, + "learning_rate": 7.632528137686732e-06, + "loss": 17.8067, + "step": 18809 + }, + { + "epoch": 0.3438316851591204, + "grad_norm": 6.8143214832862204, + "learning_rate": 7.632276472457604e-06, + "loss": 17.8137, + "step": 18810 + }, + { + "epoch": 0.3438499643555669, + "grad_norm": 8.528808663813097, + "learning_rate": 7.632024798002577e-06, + "loss": 18.0433, + "step": 18811 + }, + { + "epoch": 0.34386824355201345, + "grad_norm": 5.970033498778536, + "learning_rate": 7.631773114322529e-06, + "loss": 17.3655, + "step": 18812 + }, + { + "epoch": 0.34388652274846, + "grad_norm": 7.185904186886038, + "learning_rate": 7.631521421418348e-06, + "loss": 17.7229, + "step": 18813 + }, + { + "epoch": 0.3439048019449065, + "grad_norm": 6.002420221406456, + "learning_rate": 7.63126971929091e-06, + "loss": 17.6412, + "step": 18814 + }, + { + "epoch": 0.343923081141353, + "grad_norm": 5.6499855524419145, + "learning_rate": 7.631018007941101e-06, + "loss": 17.1009, + "step": 18815 + }, + { + "epoch": 0.34394136033779954, + "grad_norm": 5.970341578576657, + "learning_rate": 7.6307662873698e-06, + "loss": 17.3979, + "step": 18816 + }, + { + "epoch": 0.3439596395342461, + "grad_norm": 5.279621444832671, + "learning_rate": 7.630514557577895e-06, + "loss": 17.3045, + "step": 18817 + }, + { + "epoch": 0.3439779187306926, + "grad_norm": 6.6552676311017365, + "learning_rate": 7.630262818566264e-06, + "loss": 17.5556, + "step": 18818 + }, + { + "epoch": 0.34399619792713915, + "grad_norm": 7.680684311602772, + "learning_rate": 7.630011070335788e-06, + "loss": 18.5134, + "step": 18819 + }, + { + "epoch": 0.3440144771235856, + "grad_norm": 5.6916536488339355, + "learning_rate": 7.629759312887353e-06, + "loss": 17.4569, + "step": 18820 + }, + { + "epoch": 0.34403275632003216, + "grad_norm": 7.07473974257135, + "learning_rate": 7.62950754622184e-06, + "loss": 18.1428, + "step": 18821 + }, + { + "epoch": 0.3440510355164787, + "grad_norm": 6.386525174074634, + "learning_rate": 7.62925577034013e-06, + "loss": 17.6783, + "step": 18822 + }, + { + "epoch": 0.34406931471292523, + "grad_norm": 7.169004794316508, + "learning_rate": 7.629003985243108e-06, + "loss": 17.8882, + "step": 18823 + }, + { + "epoch": 0.34408759390937177, + "grad_norm": 6.911734582211646, + "learning_rate": 7.628752190931654e-06, + "loss": 18.1529, + "step": 18824 + }, + { + "epoch": 0.34410587310581825, + "grad_norm": 5.576247646965134, + "learning_rate": 7.628500387406652e-06, + "loss": 17.0755, + "step": 18825 + }, + { + "epoch": 0.3441241523022648, + "grad_norm": 7.021326916468734, + "learning_rate": 7.6282485746689835e-06, + "loss": 17.8135, + "step": 18826 + }, + { + "epoch": 0.3441424314987113, + "grad_norm": 5.888912413575095, + "learning_rate": 7.627996752719533e-06, + "loss": 17.1671, + "step": 18827 + }, + { + "epoch": 0.34416071069515786, + "grad_norm": 7.174716073977307, + "learning_rate": 7.627744921559183e-06, + "loss": 17.7786, + "step": 18828 + }, + { + "epoch": 0.3441789898916044, + "grad_norm": 6.440076200999903, + "learning_rate": 7.627493081188813e-06, + "loss": 17.3864, + "step": 18829 + }, + { + "epoch": 0.3441972690880509, + "grad_norm": 6.371314035709741, + "learning_rate": 7.627241231609308e-06, + "loss": 17.3974, + "step": 18830 + }, + { + "epoch": 0.3442155482844974, + "grad_norm": 7.178583371291788, + "learning_rate": 7.626989372821552e-06, + "loss": 17.9502, + "step": 18831 + }, + { + "epoch": 0.34423382748094394, + "grad_norm": 5.43836292811123, + "learning_rate": 7.6267375048264245e-06, + "loss": 17.1827, + "step": 18832 + }, + { + "epoch": 0.3442521066773905, + "grad_norm": 6.061526697036359, + "learning_rate": 7.62648562762481e-06, + "loss": 17.364, + "step": 18833 + }, + { + "epoch": 0.34427038587383696, + "grad_norm": 7.416400119145123, + "learning_rate": 7.626233741217592e-06, + "loss": 17.9628, + "step": 18834 + }, + { + "epoch": 0.3442886650702835, + "grad_norm": 6.155932104665575, + "learning_rate": 7.625981845605652e-06, + "loss": 17.6379, + "step": 18835 + }, + { + "epoch": 0.34430694426673003, + "grad_norm": 6.286729678642646, + "learning_rate": 7.625729940789875e-06, + "loss": 17.0942, + "step": 18836 + }, + { + "epoch": 0.34432522346317657, + "grad_norm": 5.570072005269807, + "learning_rate": 7.625478026771143e-06, + "loss": 17.1133, + "step": 18837 + }, + { + "epoch": 0.3443435026596231, + "grad_norm": 8.294658198108628, + "learning_rate": 7.625226103550334e-06, + "loss": 18.2501, + "step": 18838 + }, + { + "epoch": 0.3443617818560696, + "grad_norm": 6.554547636739679, + "learning_rate": 7.6249741711283385e-06, + "loss": 17.6861, + "step": 18839 + }, + { + "epoch": 0.3443800610525161, + "grad_norm": 5.81957898550504, + "learning_rate": 7.624722229506036e-06, + "loss": 17.3066, + "step": 18840 + }, + { + "epoch": 0.34439834024896265, + "grad_norm": 6.623162130971828, + "learning_rate": 7.624470278684311e-06, + "loss": 17.4684, + "step": 18841 + }, + { + "epoch": 0.3444166194454092, + "grad_norm": 7.694048787608055, + "learning_rate": 7.624218318664044e-06, + "loss": 18.2676, + "step": 18842 + }, + { + "epoch": 0.3444348986418557, + "grad_norm": 6.292099350719881, + "learning_rate": 7.6239663494461195e-06, + "loss": 17.409, + "step": 18843 + }, + { + "epoch": 0.3444531778383022, + "grad_norm": 6.2170305037463836, + "learning_rate": 7.623714371031421e-06, + "loss": 17.2171, + "step": 18844 + }, + { + "epoch": 0.34447145703474874, + "grad_norm": 7.276994272151025, + "learning_rate": 7.623462383420831e-06, + "loss": 17.9421, + "step": 18845 + }, + { + "epoch": 0.3444897362311953, + "grad_norm": 6.682340604025584, + "learning_rate": 7.6232103866152325e-06, + "loss": 17.2774, + "step": 18846 + }, + { + "epoch": 0.3445080154276418, + "grad_norm": 7.655083146438742, + "learning_rate": 7.622958380615511e-06, + "loss": 18.1831, + "step": 18847 + }, + { + "epoch": 0.34452629462408835, + "grad_norm": 6.375371933391231, + "learning_rate": 7.622706365422545e-06, + "loss": 17.7205, + "step": 18848 + }, + { + "epoch": 0.3445445738205348, + "grad_norm": 7.460218703284634, + "learning_rate": 7.622454341037224e-06, + "loss": 18.0615, + "step": 18849 + }, + { + "epoch": 0.34456285301698136, + "grad_norm": 7.557403389145596, + "learning_rate": 7.622202307460426e-06, + "loss": 17.9129, + "step": 18850 + }, + { + "epoch": 0.3445811322134279, + "grad_norm": 5.463903761896323, + "learning_rate": 7.621950264693039e-06, + "loss": 17.2442, + "step": 18851 + }, + { + "epoch": 0.34459941140987443, + "grad_norm": 5.320197632858207, + "learning_rate": 7.62169821273594e-06, + "loss": 17.2238, + "step": 18852 + }, + { + "epoch": 0.34461769060632097, + "grad_norm": 5.893030144033991, + "learning_rate": 7.621446151590018e-06, + "loss": 17.4173, + "step": 18853 + }, + { + "epoch": 0.34463596980276745, + "grad_norm": 6.252265876898372, + "learning_rate": 7.6211940812561555e-06, + "loss": 17.5848, + "step": 18854 + }, + { + "epoch": 0.344654248999214, + "grad_norm": 7.255444106176171, + "learning_rate": 7.620942001735235e-06, + "loss": 17.8887, + "step": 18855 + }, + { + "epoch": 0.3446725281956605, + "grad_norm": 5.76311644733677, + "learning_rate": 7.620689913028141e-06, + "loss": 17.1523, + "step": 18856 + }, + { + "epoch": 0.34469080739210706, + "grad_norm": 6.070883116721132, + "learning_rate": 7.620437815135753e-06, + "loss": 17.1883, + "step": 18857 + }, + { + "epoch": 0.3447090865885536, + "grad_norm": 6.704219160795553, + "learning_rate": 7.620185708058962e-06, + "loss": 17.6405, + "step": 18858 + }, + { + "epoch": 0.3447273657850001, + "grad_norm": 5.39117974837999, + "learning_rate": 7.619933591798645e-06, + "loss": 17.2863, + "step": 18859 + }, + { + "epoch": 0.3447456449814466, + "grad_norm": 5.623336980396304, + "learning_rate": 7.619681466355688e-06, + "loss": 17.4557, + "step": 18860 + }, + { + "epoch": 0.34476392417789314, + "grad_norm": 6.1063789323148985, + "learning_rate": 7.619429331730977e-06, + "loss": 17.6039, + "step": 18861 + }, + { + "epoch": 0.3447822033743397, + "grad_norm": 7.56351926677025, + "learning_rate": 7.6191771879253905e-06, + "loss": 17.9513, + "step": 18862 + }, + { + "epoch": 0.3448004825707862, + "grad_norm": 6.106402972925933, + "learning_rate": 7.618925034939817e-06, + "loss": 17.3865, + "step": 18863 + }, + { + "epoch": 0.3448187617672327, + "grad_norm": 5.920384771196159, + "learning_rate": 7.618672872775138e-06, + "loss": 17.3956, + "step": 18864 + }, + { + "epoch": 0.34483704096367923, + "grad_norm": 6.670774222241027, + "learning_rate": 7.618420701432238e-06, + "loss": 17.9289, + "step": 18865 + }, + { + "epoch": 0.34485532016012577, + "grad_norm": 7.524320600353264, + "learning_rate": 7.618168520912001e-06, + "loss": 18.0164, + "step": 18866 + }, + { + "epoch": 0.3448735993565723, + "grad_norm": 6.578151056101703, + "learning_rate": 7.617916331215309e-06, + "loss": 17.4418, + "step": 18867 + }, + { + "epoch": 0.3448918785530188, + "grad_norm": 6.111321466298664, + "learning_rate": 7.61766413234305e-06, + "loss": 17.4151, + "step": 18868 + }, + { + "epoch": 0.3449101577494653, + "grad_norm": 6.647655970034263, + "learning_rate": 7.617411924296103e-06, + "loss": 17.8468, + "step": 18869 + }, + { + "epoch": 0.34492843694591185, + "grad_norm": 5.391290134991107, + "learning_rate": 7.617159707075355e-06, + "loss": 17.1188, + "step": 18870 + }, + { + "epoch": 0.3449467161423584, + "grad_norm": 5.836604081616722, + "learning_rate": 7.61690748068169e-06, + "loss": 17.4615, + "step": 18871 + }, + { + "epoch": 0.3449649953388049, + "grad_norm": 5.828869638207708, + "learning_rate": 7.6166552451159914e-06, + "loss": 17.298, + "step": 18872 + }, + { + "epoch": 0.3449832745352514, + "grad_norm": 6.268811339034368, + "learning_rate": 7.6164030003791424e-06, + "loss": 17.5558, + "step": 18873 + }, + { + "epoch": 0.34500155373169794, + "grad_norm": 7.453674579298899, + "learning_rate": 7.6161507464720285e-06, + "loss": 17.7549, + "step": 18874 + }, + { + "epoch": 0.3450198329281445, + "grad_norm": 6.651998662950934, + "learning_rate": 7.615898483395534e-06, + "loss": 17.8893, + "step": 18875 + }, + { + "epoch": 0.345038112124591, + "grad_norm": 6.128656015172041, + "learning_rate": 7.61564621115054e-06, + "loss": 17.6188, + "step": 18876 + }, + { + "epoch": 0.34505639132103755, + "grad_norm": 6.3756299741129485, + "learning_rate": 7.615393929737935e-06, + "loss": 17.6619, + "step": 18877 + }, + { + "epoch": 0.34507467051748403, + "grad_norm": 5.825326039746997, + "learning_rate": 7.615141639158601e-06, + "loss": 17.3186, + "step": 18878 + }, + { + "epoch": 0.34509294971393056, + "grad_norm": 6.891541289228385, + "learning_rate": 7.614889339413422e-06, + "loss": 17.9987, + "step": 18879 + }, + { + "epoch": 0.3451112289103771, + "grad_norm": 5.671312721659657, + "learning_rate": 7.614637030503282e-06, + "loss": 16.9284, + "step": 18880 + }, + { + "epoch": 0.34512950810682363, + "grad_norm": 6.622144873740716, + "learning_rate": 7.614384712429068e-06, + "loss": 17.5671, + "step": 18881 + }, + { + "epoch": 0.34514778730327017, + "grad_norm": 7.310112596708961, + "learning_rate": 7.614132385191661e-06, + "loss": 17.9785, + "step": 18882 + }, + { + "epoch": 0.34516606649971665, + "grad_norm": 5.69360839671099, + "learning_rate": 7.613880048791948e-06, + "loss": 17.1646, + "step": 18883 + }, + { + "epoch": 0.3451843456961632, + "grad_norm": 6.170253810983289, + "learning_rate": 7.61362770323081e-06, + "loss": 17.4712, + "step": 18884 + }, + { + "epoch": 0.3452026248926097, + "grad_norm": 5.50022870992592, + "learning_rate": 7.613375348509135e-06, + "loss": 17.2666, + "step": 18885 + }, + { + "epoch": 0.34522090408905626, + "grad_norm": 7.188684396091385, + "learning_rate": 7.613122984627808e-06, + "loss": 17.6921, + "step": 18886 + }, + { + "epoch": 0.3452391832855028, + "grad_norm": 7.81632667876437, + "learning_rate": 7.61287061158771e-06, + "loss": 18.0837, + "step": 18887 + }, + { + "epoch": 0.3452574624819493, + "grad_norm": 6.656226293490472, + "learning_rate": 7.612618229389728e-06, + "loss": 17.4341, + "step": 18888 + }, + { + "epoch": 0.3452757416783958, + "grad_norm": 6.449742042322299, + "learning_rate": 7.6123658380347446e-06, + "loss": 17.4351, + "step": 18889 + }, + { + "epoch": 0.34529402087484234, + "grad_norm": 6.712440839298584, + "learning_rate": 7.612113437523646e-06, + "loss": 17.6861, + "step": 18890 + }, + { + "epoch": 0.3453123000712889, + "grad_norm": 8.251373663963818, + "learning_rate": 7.611861027857317e-06, + "loss": 18.1807, + "step": 18891 + }, + { + "epoch": 0.3453305792677354, + "grad_norm": 6.296892881834908, + "learning_rate": 7.61160860903664e-06, + "loss": 17.5629, + "step": 18892 + }, + { + "epoch": 0.3453488584641819, + "grad_norm": 6.264489853355421, + "learning_rate": 7.611356181062503e-06, + "loss": 17.5003, + "step": 18893 + }, + { + "epoch": 0.34536713766062843, + "grad_norm": 6.240038453594531, + "learning_rate": 7.61110374393579e-06, + "loss": 17.3175, + "step": 18894 + }, + { + "epoch": 0.34538541685707497, + "grad_norm": 6.152512085827735, + "learning_rate": 7.610851297657383e-06, + "loss": 17.3498, + "step": 18895 + }, + { + "epoch": 0.3454036960535215, + "grad_norm": 6.005694368651467, + "learning_rate": 7.6105988422281715e-06, + "loss": 17.0651, + "step": 18896 + }, + { + "epoch": 0.34542197524996804, + "grad_norm": 5.959610914219258, + "learning_rate": 7.610346377649034e-06, + "loss": 17.4591, + "step": 18897 + }, + { + "epoch": 0.3454402544464145, + "grad_norm": 6.33804627651429, + "learning_rate": 7.610093903920861e-06, + "loss": 17.6828, + "step": 18898 + }, + { + "epoch": 0.34545853364286105, + "grad_norm": 7.439112451385574, + "learning_rate": 7.609841421044537e-06, + "loss": 17.9568, + "step": 18899 + }, + { + "epoch": 0.3454768128393076, + "grad_norm": 7.210065475661961, + "learning_rate": 7.609588929020944e-06, + "loss": 17.7448, + "step": 18900 + }, + { + "epoch": 0.3454950920357541, + "grad_norm": 7.720746780965516, + "learning_rate": 7.609336427850966e-06, + "loss": 17.9802, + "step": 18901 + }, + { + "epoch": 0.3455133712322006, + "grad_norm": 5.498515695973799, + "learning_rate": 7.609083917535491e-06, + "loss": 17.3154, + "step": 18902 + }, + { + "epoch": 0.34553165042864714, + "grad_norm": 5.962879130467125, + "learning_rate": 7.608831398075406e-06, + "loss": 17.3829, + "step": 18903 + }, + { + "epoch": 0.3455499296250937, + "grad_norm": 6.846620968994786, + "learning_rate": 7.608578869471591e-06, + "loss": 17.8339, + "step": 18904 + }, + { + "epoch": 0.3455682088215402, + "grad_norm": 7.6018391661958145, + "learning_rate": 7.6083263317249335e-06, + "loss": 18.1102, + "step": 18905 + }, + { + "epoch": 0.34558648801798675, + "grad_norm": 5.784234635221302, + "learning_rate": 7.6080737848363195e-06, + "loss": 17.2917, + "step": 18906 + }, + { + "epoch": 0.34560476721443323, + "grad_norm": 5.752357111200658, + "learning_rate": 7.6078212288066335e-06, + "loss": 17.2057, + "step": 18907 + }, + { + "epoch": 0.34562304641087976, + "grad_norm": 6.459778009405557, + "learning_rate": 7.60756866363676e-06, + "loss": 17.1771, + "step": 18908 + }, + { + "epoch": 0.3456413256073263, + "grad_norm": 7.0444922758887865, + "learning_rate": 7.6073160893275834e-06, + "loss": 17.8434, + "step": 18909 + }, + { + "epoch": 0.34565960480377284, + "grad_norm": 6.265177353041961, + "learning_rate": 7.6070635058799915e-06, + "loss": 17.1887, + "step": 18910 + }, + { + "epoch": 0.34567788400021937, + "grad_norm": 7.053831506175236, + "learning_rate": 7.606810913294866e-06, + "loss": 17.7498, + "step": 18911 + }, + { + "epoch": 0.34569616319666585, + "grad_norm": 5.4893544285976725, + "learning_rate": 7.606558311573097e-06, + "loss": 17.1869, + "step": 18912 + }, + { + "epoch": 0.3457144423931124, + "grad_norm": 6.470420202776055, + "learning_rate": 7.606305700715567e-06, + "loss": 17.6234, + "step": 18913 + }, + { + "epoch": 0.3457327215895589, + "grad_norm": 5.8681958086487915, + "learning_rate": 7.606053080723161e-06, + "loss": 17.1226, + "step": 18914 + }, + { + "epoch": 0.34575100078600546, + "grad_norm": 7.992891775574533, + "learning_rate": 7.605800451596765e-06, + "loss": 17.9576, + "step": 18915 + }, + { + "epoch": 0.345769279982452, + "grad_norm": 5.892657778749329, + "learning_rate": 7.605547813337264e-06, + "loss": 17.2621, + "step": 18916 + }, + { + "epoch": 0.3457875591788985, + "grad_norm": 5.501337991493749, + "learning_rate": 7.605295165945546e-06, + "loss": 17.1447, + "step": 18917 + }, + { + "epoch": 0.345805838375345, + "grad_norm": 7.913320990572355, + "learning_rate": 7.605042509422493e-06, + "loss": 18.0964, + "step": 18918 + }, + { + "epoch": 0.34582411757179154, + "grad_norm": 6.4001766127926905, + "learning_rate": 7.604789843768993e-06, + "loss": 17.3341, + "step": 18919 + }, + { + "epoch": 0.3458423967682381, + "grad_norm": 7.205033271136617, + "learning_rate": 7.60453716898593e-06, + "loss": 17.9212, + "step": 18920 + }, + { + "epoch": 0.3458606759646846, + "grad_norm": 5.8284231389009395, + "learning_rate": 7.6042844850741905e-06, + "loss": 17.1386, + "step": 18921 + }, + { + "epoch": 0.3458789551611311, + "grad_norm": 7.97500368082429, + "learning_rate": 7.604031792034659e-06, + "loss": 18.2105, + "step": 18922 + }, + { + "epoch": 0.34589723435757763, + "grad_norm": 6.882976466923342, + "learning_rate": 7.603779089868224e-06, + "loss": 17.7936, + "step": 18923 + }, + { + "epoch": 0.34591551355402417, + "grad_norm": 6.071329614133939, + "learning_rate": 7.603526378575767e-06, + "loss": 17.3471, + "step": 18924 + }, + { + "epoch": 0.3459337927504707, + "grad_norm": 7.596351184938982, + "learning_rate": 7.603273658158177e-06, + "loss": 18.3059, + "step": 18925 + }, + { + "epoch": 0.34595207194691724, + "grad_norm": 7.818145529326444, + "learning_rate": 7.603020928616338e-06, + "loss": 17.9933, + "step": 18926 + }, + { + "epoch": 0.3459703511433637, + "grad_norm": 5.9681859650846, + "learning_rate": 7.602768189951138e-06, + "loss": 17.3911, + "step": 18927 + }, + { + "epoch": 0.34598863033981025, + "grad_norm": 7.141417352971058, + "learning_rate": 7.602515442163461e-06, + "loss": 17.5778, + "step": 18928 + }, + { + "epoch": 0.3460069095362568, + "grad_norm": 5.776967542647376, + "learning_rate": 7.602262685254192e-06, + "loss": 17.2206, + "step": 18929 + }, + { + "epoch": 0.3460251887327033, + "grad_norm": 7.337196629646757, + "learning_rate": 7.602009919224219e-06, + "loss": 18.1074, + "step": 18930 + }, + { + "epoch": 0.34604346792914986, + "grad_norm": 6.295462201743125, + "learning_rate": 7.6017571440744264e-06, + "loss": 17.4926, + "step": 18931 + }, + { + "epoch": 0.34606174712559634, + "grad_norm": 6.889041399251884, + "learning_rate": 7.601504359805702e-06, + "loss": 17.6455, + "step": 18932 + }, + { + "epoch": 0.3460800263220429, + "grad_norm": 8.463787252771924, + "learning_rate": 7.60125156641893e-06, + "loss": 18.1351, + "step": 18933 + }, + { + "epoch": 0.3460983055184894, + "grad_norm": 6.931909202236026, + "learning_rate": 7.600998763914996e-06, + "loss": 17.6481, + "step": 18934 + }, + { + "epoch": 0.34611658471493595, + "grad_norm": 6.67161552163744, + "learning_rate": 7.6007459522947875e-06, + "loss": 17.763, + "step": 18935 + }, + { + "epoch": 0.34613486391138243, + "grad_norm": 6.180953002498059, + "learning_rate": 7.60049313155919e-06, + "loss": 17.3172, + "step": 18936 + }, + { + "epoch": 0.34615314310782896, + "grad_norm": 5.520944988557824, + "learning_rate": 7.600240301709092e-06, + "loss": 17.0942, + "step": 18937 + }, + { + "epoch": 0.3461714223042755, + "grad_norm": 5.072948752035825, + "learning_rate": 7.599987462745375e-06, + "loss": 16.8876, + "step": 18938 + }, + { + "epoch": 0.34618970150072204, + "grad_norm": 8.091342686028737, + "learning_rate": 7.599734614668928e-06, + "loss": 17.9227, + "step": 18939 + }, + { + "epoch": 0.34620798069716857, + "grad_norm": 5.525980838938255, + "learning_rate": 7.599481757480636e-06, + "loss": 17.1579, + "step": 18940 + }, + { + "epoch": 0.34622625989361505, + "grad_norm": 6.154743454988897, + "learning_rate": 7.599228891181389e-06, + "loss": 17.4544, + "step": 18941 + }, + { + "epoch": 0.3462445390900616, + "grad_norm": 7.132020764960109, + "learning_rate": 7.5989760157720675e-06, + "loss": 17.8731, + "step": 18942 + }, + { + "epoch": 0.3462628182865081, + "grad_norm": 5.503747199022232, + "learning_rate": 7.59872313125356e-06, + "loss": 17.073, + "step": 18943 + }, + { + "epoch": 0.34628109748295466, + "grad_norm": 6.0161469480476795, + "learning_rate": 7.598470237626756e-06, + "loss": 17.2366, + "step": 18944 + }, + { + "epoch": 0.3462993766794012, + "grad_norm": 6.026055133355524, + "learning_rate": 7.598217334892537e-06, + "loss": 17.3908, + "step": 18945 + }, + { + "epoch": 0.3463176558758477, + "grad_norm": 6.8960372076668035, + "learning_rate": 7.597964423051794e-06, + "loss": 17.6786, + "step": 18946 + }, + { + "epoch": 0.3463359350722942, + "grad_norm": 7.113678456554318, + "learning_rate": 7.5977115021054096e-06, + "loss": 18.041, + "step": 18947 + }, + { + "epoch": 0.34635421426874075, + "grad_norm": 6.122004662433077, + "learning_rate": 7.597458572054272e-06, + "loss": 17.2704, + "step": 18948 + }, + { + "epoch": 0.3463724934651873, + "grad_norm": 8.464978046311984, + "learning_rate": 7.597205632899267e-06, + "loss": 18.6798, + "step": 18949 + }, + { + "epoch": 0.3463907726616338, + "grad_norm": 6.166113285087843, + "learning_rate": 7.596952684641282e-06, + "loss": 17.5786, + "step": 18950 + }, + { + "epoch": 0.3464090518580803, + "grad_norm": 8.464100664878849, + "learning_rate": 7.596699727281205e-06, + "loss": 18.4814, + "step": 18951 + }, + { + "epoch": 0.34642733105452683, + "grad_norm": 6.51298013944387, + "learning_rate": 7.596446760819918e-06, + "loss": 17.5248, + "step": 18952 + }, + { + "epoch": 0.34644561025097337, + "grad_norm": 8.071762299248826, + "learning_rate": 7.596193785258311e-06, + "loss": 18.2478, + "step": 18953 + }, + { + "epoch": 0.3464638894474199, + "grad_norm": 6.861047489719463, + "learning_rate": 7.59594080059727e-06, + "loss": 17.6007, + "step": 18954 + }, + { + "epoch": 0.34648216864386644, + "grad_norm": 7.388227593370416, + "learning_rate": 7.595687806837683e-06, + "loss": 17.9192, + "step": 18955 + }, + { + "epoch": 0.3465004478403129, + "grad_norm": 6.022438807693688, + "learning_rate": 7.595434803980436e-06, + "loss": 17.35, + "step": 18956 + }, + { + "epoch": 0.34651872703675946, + "grad_norm": 6.939677496048242, + "learning_rate": 7.595181792026414e-06, + "loss": 17.8307, + "step": 18957 + }, + { + "epoch": 0.346537006233206, + "grad_norm": 6.9813038396306135, + "learning_rate": 7.594928770976505e-06, + "loss": 17.8424, + "step": 18958 + }, + { + "epoch": 0.3465552854296525, + "grad_norm": 5.442763769373437, + "learning_rate": 7.594675740831597e-06, + "loss": 17.1987, + "step": 18959 + }, + { + "epoch": 0.34657356462609906, + "grad_norm": 6.688560567220221, + "learning_rate": 7.594422701592574e-06, + "loss": 17.663, + "step": 18960 + }, + { + "epoch": 0.34659184382254554, + "grad_norm": 7.418697154261923, + "learning_rate": 7.5941696532603246e-06, + "loss": 17.8478, + "step": 18961 + }, + { + "epoch": 0.3466101230189921, + "grad_norm": 6.555275826485624, + "learning_rate": 7.5939165958357365e-06, + "loss": 17.7269, + "step": 18962 + }, + { + "epoch": 0.3466284022154386, + "grad_norm": 6.872575812728452, + "learning_rate": 7.593663529319695e-06, + "loss": 17.8606, + "step": 18963 + }, + { + "epoch": 0.34664668141188515, + "grad_norm": 6.927386186000394, + "learning_rate": 7.593410453713088e-06, + "loss": 17.8418, + "step": 18964 + }, + { + "epoch": 0.3466649606083317, + "grad_norm": 6.69084749891412, + "learning_rate": 7.593157369016804e-06, + "loss": 17.454, + "step": 18965 + }, + { + "epoch": 0.34668323980477816, + "grad_norm": 6.755837995512869, + "learning_rate": 7.592904275231727e-06, + "loss": 17.8124, + "step": 18966 + }, + { + "epoch": 0.3467015190012247, + "grad_norm": 6.100701960625412, + "learning_rate": 7.592651172358746e-06, + "loss": 17.5114, + "step": 18967 + }, + { + "epoch": 0.34671979819767124, + "grad_norm": 6.9029266075070135, + "learning_rate": 7.592398060398749e-06, + "loss": 17.7986, + "step": 18968 + }, + { + "epoch": 0.34673807739411777, + "grad_norm": 6.1624585317135745, + "learning_rate": 7.592144939352619e-06, + "loss": 17.2991, + "step": 18969 + }, + { + "epoch": 0.34675635659056425, + "grad_norm": 5.21458580299733, + "learning_rate": 7.591891809221247e-06, + "loss": 17.201, + "step": 18970 + }, + { + "epoch": 0.3467746357870108, + "grad_norm": 7.3290017937401455, + "learning_rate": 7.591638670005519e-06, + "loss": 18.0177, + "step": 18971 + }, + { + "epoch": 0.3467929149834573, + "grad_norm": 5.548219698809922, + "learning_rate": 7.591385521706324e-06, + "loss": 17.2977, + "step": 18972 + }, + { + "epoch": 0.34681119417990386, + "grad_norm": 6.618569731965456, + "learning_rate": 7.591132364324545e-06, + "loss": 17.7773, + "step": 18973 + }, + { + "epoch": 0.3468294733763504, + "grad_norm": 5.670441696830813, + "learning_rate": 7.5908791978610745e-06, + "loss": 17.1742, + "step": 18974 + }, + { + "epoch": 0.3468477525727969, + "grad_norm": 6.266394248876967, + "learning_rate": 7.590626022316796e-06, + "loss": 17.4635, + "step": 18975 + }, + { + "epoch": 0.3468660317692434, + "grad_norm": 7.1009250742474554, + "learning_rate": 7.590372837692597e-06, + "loss": 17.8548, + "step": 18976 + }, + { + "epoch": 0.34688431096568995, + "grad_norm": 8.017534823453387, + "learning_rate": 7.5901196439893664e-06, + "loss": 18.4484, + "step": 18977 + }, + { + "epoch": 0.3469025901621365, + "grad_norm": 9.423774676795624, + "learning_rate": 7.589866441207993e-06, + "loss": 18.6049, + "step": 18978 + }, + { + "epoch": 0.346920869358583, + "grad_norm": 5.964929523147536, + "learning_rate": 7.589613229349361e-06, + "loss": 17.4383, + "step": 18979 + }, + { + "epoch": 0.3469391485550295, + "grad_norm": 6.44123440510207, + "learning_rate": 7.5893600084143595e-06, + "loss": 17.6667, + "step": 18980 + }, + { + "epoch": 0.34695742775147603, + "grad_norm": 6.673572921483574, + "learning_rate": 7.589106778403876e-06, + "loss": 17.7146, + "step": 18981 + }, + { + "epoch": 0.34697570694792257, + "grad_norm": 5.71994061700808, + "learning_rate": 7.588853539318799e-06, + "loss": 17.4912, + "step": 18982 + }, + { + "epoch": 0.3469939861443691, + "grad_norm": 6.24984709488286, + "learning_rate": 7.588600291160013e-06, + "loss": 17.6651, + "step": 18983 + }, + { + "epoch": 0.34701226534081564, + "grad_norm": 6.899227373441953, + "learning_rate": 7.588347033928408e-06, + "loss": 18.0038, + "step": 18984 + }, + { + "epoch": 0.3470305445372621, + "grad_norm": 7.093099139063351, + "learning_rate": 7.588093767624871e-06, + "loss": 17.9797, + "step": 18985 + }, + { + "epoch": 0.34704882373370866, + "grad_norm": 6.660768228404228, + "learning_rate": 7.587840492250293e-06, + "loss": 17.3602, + "step": 18986 + }, + { + "epoch": 0.3470671029301552, + "grad_norm": 6.04878390469459, + "learning_rate": 7.587587207805555e-06, + "loss": 17.2671, + "step": 18987 + }, + { + "epoch": 0.3470853821266017, + "grad_norm": 8.783789370315844, + "learning_rate": 7.587333914291551e-06, + "loss": 19.0311, + "step": 18988 + }, + { + "epoch": 0.34710366132304826, + "grad_norm": 6.75572416519923, + "learning_rate": 7.587080611709163e-06, + "loss": 18.0498, + "step": 18989 + }, + { + "epoch": 0.34712194051949474, + "grad_norm": 6.152852386853728, + "learning_rate": 7.586827300059285e-06, + "loss": 17.3344, + "step": 18990 + }, + { + "epoch": 0.3471402197159413, + "grad_norm": 8.027940366040578, + "learning_rate": 7.586573979342799e-06, + "loss": 18.1787, + "step": 18991 + }, + { + "epoch": 0.3471584989123878, + "grad_norm": 7.033464796487611, + "learning_rate": 7.586320649560599e-06, + "loss": 17.2953, + "step": 18992 + }, + { + "epoch": 0.34717677810883435, + "grad_norm": 6.151192848005572, + "learning_rate": 7.586067310713567e-06, + "loss": 17.5295, + "step": 18993 + }, + { + "epoch": 0.3471950573052809, + "grad_norm": 6.775272982301498, + "learning_rate": 7.585813962802595e-06, + "loss": 17.9858, + "step": 18994 + }, + { + "epoch": 0.34721333650172737, + "grad_norm": 7.107263174528969, + "learning_rate": 7.585560605828568e-06, + "loss": 17.8125, + "step": 18995 + }, + { + "epoch": 0.3472316156981739, + "grad_norm": 8.009063603799497, + "learning_rate": 7.585307239792377e-06, + "loss": 18.1853, + "step": 18996 + }, + { + "epoch": 0.34724989489462044, + "grad_norm": 5.769403736100324, + "learning_rate": 7.585053864694907e-06, + "loss": 17.463, + "step": 18997 + }, + { + "epoch": 0.347268174091067, + "grad_norm": 5.6762921224754175, + "learning_rate": 7.5848004805370475e-06, + "loss": 17.2014, + "step": 18998 + }, + { + "epoch": 0.3472864532875135, + "grad_norm": 7.577787481676225, + "learning_rate": 7.584547087319689e-06, + "loss": 18.1574, + "step": 18999 + }, + { + "epoch": 0.34730473248396, + "grad_norm": 5.820216814608337, + "learning_rate": 7.584293685043716e-06, + "loss": 17.3679, + "step": 19000 + }, + { + "epoch": 0.3473230116804065, + "grad_norm": 6.389470657618189, + "learning_rate": 7.584040273710016e-06, + "loss": 17.6025, + "step": 19001 + }, + { + "epoch": 0.34734129087685306, + "grad_norm": 6.825169449048844, + "learning_rate": 7.583786853319479e-06, + "loss": 17.7989, + "step": 19002 + }, + { + "epoch": 0.3473595700732996, + "grad_norm": 5.567763698087035, + "learning_rate": 7.583533423872997e-06, + "loss": 17.2088, + "step": 19003 + }, + { + "epoch": 0.3473778492697461, + "grad_norm": 6.1100989530468, + "learning_rate": 7.583279985371452e-06, + "loss": 17.6809, + "step": 19004 + }, + { + "epoch": 0.3473961284661926, + "grad_norm": 7.535361986395495, + "learning_rate": 7.583026537815734e-06, + "loss": 17.9317, + "step": 19005 + }, + { + "epoch": 0.34741440766263915, + "grad_norm": 7.958814589469808, + "learning_rate": 7.582773081206733e-06, + "loss": 18.0453, + "step": 19006 + }, + { + "epoch": 0.3474326868590857, + "grad_norm": 7.2911889967958015, + "learning_rate": 7.582519615545339e-06, + "loss": 18.1261, + "step": 19007 + }, + { + "epoch": 0.3474509660555322, + "grad_norm": 8.145035509363272, + "learning_rate": 7.582266140832435e-06, + "loss": 18.7788, + "step": 19008 + }, + { + "epoch": 0.3474692452519787, + "grad_norm": 7.024365272537474, + "learning_rate": 7.582012657068912e-06, + "loss": 17.6573, + "step": 19009 + }, + { + "epoch": 0.34748752444842523, + "grad_norm": 5.462961894964153, + "learning_rate": 7.58175916425566e-06, + "loss": 17.2726, + "step": 19010 + }, + { + "epoch": 0.34750580364487177, + "grad_norm": 7.841845760232381, + "learning_rate": 7.581505662393564e-06, + "loss": 18.0179, + "step": 19011 + }, + { + "epoch": 0.3475240828413183, + "grad_norm": 5.8154820249981745, + "learning_rate": 7.581252151483518e-06, + "loss": 17.4179, + "step": 19012 + }, + { + "epoch": 0.34754236203776484, + "grad_norm": 5.71731248489601, + "learning_rate": 7.580998631526406e-06, + "loss": 17.18, + "step": 19013 + }, + { + "epoch": 0.3475606412342113, + "grad_norm": 6.907962212338977, + "learning_rate": 7.580745102523117e-06, + "loss": 17.7363, + "step": 19014 + }, + { + "epoch": 0.34757892043065786, + "grad_norm": 8.04364219464597, + "learning_rate": 7.580491564474542e-06, + "loss": 18.3559, + "step": 19015 + }, + { + "epoch": 0.3475971996271044, + "grad_norm": 7.898696894901157, + "learning_rate": 7.580238017381565e-06, + "loss": 18.4485, + "step": 19016 + }, + { + "epoch": 0.3476154788235509, + "grad_norm": 7.3644667197822535, + "learning_rate": 7.57998446124508e-06, + "loss": 17.9994, + "step": 19017 + }, + { + "epoch": 0.34763375801999746, + "grad_norm": 7.964761199509911, + "learning_rate": 7.579730896065974e-06, + "loss": 18.6357, + "step": 19018 + }, + { + "epoch": 0.34765203721644394, + "grad_norm": 7.69745785077081, + "learning_rate": 7.5794773218451324e-06, + "loss": 17.9703, + "step": 19019 + }, + { + "epoch": 0.3476703164128905, + "grad_norm": 5.963685721172786, + "learning_rate": 7.579223738583448e-06, + "loss": 17.3082, + "step": 19020 + }, + { + "epoch": 0.347688595609337, + "grad_norm": 5.4843169921823245, + "learning_rate": 7.5789701462818075e-06, + "loss": 17.3481, + "step": 19021 + }, + { + "epoch": 0.34770687480578355, + "grad_norm": 7.035984081381331, + "learning_rate": 7.578716544941102e-06, + "loss": 17.8914, + "step": 19022 + }, + { + "epoch": 0.3477251540022301, + "grad_norm": 8.248334072918139, + "learning_rate": 7.578462934562217e-06, + "loss": 17.92, + "step": 19023 + }, + { + "epoch": 0.34774343319867657, + "grad_norm": 6.501565335749314, + "learning_rate": 7.5782093151460435e-06, + "loss": 17.3995, + "step": 19024 + }, + { + "epoch": 0.3477617123951231, + "grad_norm": 7.022827693322409, + "learning_rate": 7.5779556866934715e-06, + "loss": 18.0678, + "step": 19025 + }, + { + "epoch": 0.34777999159156964, + "grad_norm": 8.056792785808573, + "learning_rate": 7.577702049205387e-06, + "loss": 18.0544, + "step": 19026 + }, + { + "epoch": 0.3477982707880162, + "grad_norm": 5.813902440435188, + "learning_rate": 7.577448402682682e-06, + "loss": 17.2232, + "step": 19027 + }, + { + "epoch": 0.3478165499844627, + "grad_norm": 6.1137023407954185, + "learning_rate": 7.577194747126244e-06, + "loss": 17.3333, + "step": 19028 + }, + { + "epoch": 0.3478348291809092, + "grad_norm": 5.989409712986414, + "learning_rate": 7.57694108253696e-06, + "loss": 17.2316, + "step": 19029 + }, + { + "epoch": 0.3478531083773557, + "grad_norm": 5.130493179645548, + "learning_rate": 7.576687408915723e-06, + "loss": 16.9334, + "step": 19030 + }, + { + "epoch": 0.34787138757380226, + "grad_norm": 6.070497004139617, + "learning_rate": 7.576433726263419e-06, + "loss": 17.4544, + "step": 19031 + }, + { + "epoch": 0.3478896667702488, + "grad_norm": 6.206023807432817, + "learning_rate": 7.57618003458094e-06, + "loss": 17.7142, + "step": 19032 + }, + { + "epoch": 0.34790794596669533, + "grad_norm": 6.3894035943645315, + "learning_rate": 7.575926333869172e-06, + "loss": 17.2279, + "step": 19033 + }, + { + "epoch": 0.3479262251631418, + "grad_norm": 4.903387349998527, + "learning_rate": 7.575672624129006e-06, + "loss": 16.7663, + "step": 19034 + }, + { + "epoch": 0.34794450435958835, + "grad_norm": 6.085588517306378, + "learning_rate": 7.575418905361331e-06, + "loss": 17.1808, + "step": 19035 + }, + { + "epoch": 0.3479627835560349, + "grad_norm": 8.329898932990947, + "learning_rate": 7.575165177567036e-06, + "loss": 17.7279, + "step": 19036 + }, + { + "epoch": 0.3479810627524814, + "grad_norm": 5.714696455360687, + "learning_rate": 7.574911440747013e-06, + "loss": 17.2421, + "step": 19037 + }, + { + "epoch": 0.3479993419489279, + "grad_norm": 7.4415930191479, + "learning_rate": 7.574657694902146e-06, + "loss": 18.241, + "step": 19038 + }, + { + "epoch": 0.34801762114537443, + "grad_norm": 7.682200406043389, + "learning_rate": 7.574403940033326e-06, + "loss": 17.6827, + "step": 19039 + }, + { + "epoch": 0.34803590034182097, + "grad_norm": 7.373737036423542, + "learning_rate": 7.574150176141446e-06, + "loss": 18.2338, + "step": 19040 + }, + { + "epoch": 0.3480541795382675, + "grad_norm": 6.582039258523045, + "learning_rate": 7.573896403227395e-06, + "loss": 17.7226, + "step": 19041 + }, + { + "epoch": 0.34807245873471404, + "grad_norm": 5.455502816831152, + "learning_rate": 7.573642621292056e-06, + "loss": 17.3168, + "step": 19042 + }, + { + "epoch": 0.3480907379311605, + "grad_norm": 10.111280711634576, + "learning_rate": 7.5733888303363235e-06, + "loss": 18.9197, + "step": 19043 + }, + { + "epoch": 0.34810901712760706, + "grad_norm": 7.355693970955116, + "learning_rate": 7.57313503036109e-06, + "loss": 17.8706, + "step": 19044 + }, + { + "epoch": 0.3481272963240536, + "grad_norm": 5.411445701675955, + "learning_rate": 7.572881221367239e-06, + "loss": 17.137, + "step": 19045 + }, + { + "epoch": 0.34814557552050013, + "grad_norm": 7.883532814525666, + "learning_rate": 7.572627403355664e-06, + "loss": 18.134, + "step": 19046 + }, + { + "epoch": 0.34816385471694666, + "grad_norm": 6.0318294601231495, + "learning_rate": 7.572373576327251e-06, + "loss": 17.2574, + "step": 19047 + }, + { + "epoch": 0.34818213391339314, + "grad_norm": 7.86051881596752, + "learning_rate": 7.572119740282893e-06, + "loss": 18.2593, + "step": 19048 + }, + { + "epoch": 0.3482004131098397, + "grad_norm": 5.075490205629728, + "learning_rate": 7.571865895223478e-06, + "loss": 16.8729, + "step": 19049 + }, + { + "epoch": 0.3482186923062862, + "grad_norm": 8.147328538856263, + "learning_rate": 7.5716120411498965e-06, + "loss": 18.4634, + "step": 19050 + }, + { + "epoch": 0.34823697150273275, + "grad_norm": 5.504469117713465, + "learning_rate": 7.571358178063039e-06, + "loss": 17.1509, + "step": 19051 + }, + { + "epoch": 0.3482552506991793, + "grad_norm": 6.705735662361387, + "learning_rate": 7.571104305963792e-06, + "loss": 17.7934, + "step": 19052 + }, + { + "epoch": 0.34827352989562577, + "grad_norm": 5.3881058828175705, + "learning_rate": 7.570850424853049e-06, + "loss": 16.9258, + "step": 19053 + }, + { + "epoch": 0.3482918090920723, + "grad_norm": 6.8951777748095635, + "learning_rate": 7.570596534731699e-06, + "loss": 17.8119, + "step": 19054 + }, + { + "epoch": 0.34831008828851884, + "grad_norm": 5.937914246025563, + "learning_rate": 7.570342635600632e-06, + "loss": 17.2293, + "step": 19055 + }, + { + "epoch": 0.3483283674849654, + "grad_norm": 6.921626001753688, + "learning_rate": 7.570088727460735e-06, + "loss": 17.7745, + "step": 19056 + }, + { + "epoch": 0.3483466466814119, + "grad_norm": 8.85207934715673, + "learning_rate": 7.5698348103129e-06, + "loss": 18.2198, + "step": 19057 + }, + { + "epoch": 0.3483649258778584, + "grad_norm": 5.969737430063275, + "learning_rate": 7.569580884158017e-06, + "loss": 17.1383, + "step": 19058 + }, + { + "epoch": 0.3483832050743049, + "grad_norm": 7.2601443315016585, + "learning_rate": 7.569326948996978e-06, + "loss": 17.7702, + "step": 19059 + }, + { + "epoch": 0.34840148427075146, + "grad_norm": 7.75588757033667, + "learning_rate": 7.569073004830669e-06, + "loss": 17.9613, + "step": 19060 + }, + { + "epoch": 0.348419763467198, + "grad_norm": 5.469352595144674, + "learning_rate": 7.568819051659983e-06, + "loss": 17.4016, + "step": 19061 + }, + { + "epoch": 0.34843804266364453, + "grad_norm": 6.614436525311249, + "learning_rate": 7.568565089485809e-06, + "loss": 17.7536, + "step": 19062 + }, + { + "epoch": 0.348456321860091, + "grad_norm": 5.400787195704766, + "learning_rate": 7.568311118309035e-06, + "loss": 17.1057, + "step": 19063 + }, + { + "epoch": 0.34847460105653755, + "grad_norm": 6.276006433668197, + "learning_rate": 7.5680571381305555e-06, + "loss": 17.4418, + "step": 19064 + }, + { + "epoch": 0.3484928802529841, + "grad_norm": 5.90012870807546, + "learning_rate": 7.567803148951259e-06, + "loss": 17.2305, + "step": 19065 + }, + { + "epoch": 0.3485111594494306, + "grad_norm": 9.806247923713876, + "learning_rate": 7.5675491507720355e-06, + "loss": 18.9236, + "step": 19066 + }, + { + "epoch": 0.34852943864587715, + "grad_norm": 5.767030138984717, + "learning_rate": 7.567295143593774e-06, + "loss": 17.2684, + "step": 19067 + }, + { + "epoch": 0.34854771784232363, + "grad_norm": 7.542900090242737, + "learning_rate": 7.567041127417367e-06, + "loss": 18.4578, + "step": 19068 + }, + { + "epoch": 0.34856599703877017, + "grad_norm": 5.341792812164293, + "learning_rate": 7.5667871022437025e-06, + "loss": 17.1259, + "step": 19069 + }, + { + "epoch": 0.3485842762352167, + "grad_norm": 7.385116990771949, + "learning_rate": 7.566533068073671e-06, + "loss": 18.0127, + "step": 19070 + }, + { + "epoch": 0.34860255543166324, + "grad_norm": 7.847511809859978, + "learning_rate": 7.566279024908165e-06, + "loss": 18.119, + "step": 19071 + }, + { + "epoch": 0.3486208346281097, + "grad_norm": 6.816524743102691, + "learning_rate": 7.566024972748074e-06, + "loss": 17.5523, + "step": 19072 + }, + { + "epoch": 0.34863911382455626, + "grad_norm": 7.3187033101902355, + "learning_rate": 7.565770911594288e-06, + "loss": 18.1721, + "step": 19073 + }, + { + "epoch": 0.3486573930210028, + "grad_norm": 7.181470743501008, + "learning_rate": 7.565516841447698e-06, + "loss": 17.9119, + "step": 19074 + }, + { + "epoch": 0.34867567221744933, + "grad_norm": 8.020648660161886, + "learning_rate": 7.565262762309194e-06, + "loss": 18.5753, + "step": 19075 + }, + { + "epoch": 0.34869395141389586, + "grad_norm": 7.215914984143746, + "learning_rate": 7.565008674179666e-06, + "loss": 17.8941, + "step": 19076 + }, + { + "epoch": 0.34871223061034234, + "grad_norm": 6.445584663850671, + "learning_rate": 7.564754577060005e-06, + "loss": 17.6232, + "step": 19077 + }, + { + "epoch": 0.3487305098067889, + "grad_norm": 7.515743718445765, + "learning_rate": 7.5645004709511015e-06, + "loss": 17.7808, + "step": 19078 + }, + { + "epoch": 0.3487487890032354, + "grad_norm": 7.9065855217742165, + "learning_rate": 7.564246355853846e-06, + "loss": 17.7412, + "step": 19079 + }, + { + "epoch": 0.34876706819968195, + "grad_norm": 7.175318809610735, + "learning_rate": 7.563992231769131e-06, + "loss": 18.122, + "step": 19080 + }, + { + "epoch": 0.3487853473961285, + "grad_norm": 6.167743180577843, + "learning_rate": 7.563738098697846e-06, + "loss": 17.301, + "step": 19081 + }, + { + "epoch": 0.34880362659257497, + "grad_norm": 7.036348573682763, + "learning_rate": 7.56348395664088e-06, + "loss": 17.7892, + "step": 19082 + }, + { + "epoch": 0.3488219057890215, + "grad_norm": 7.608559451051648, + "learning_rate": 7.563229805599126e-06, + "loss": 18.3402, + "step": 19083 + }, + { + "epoch": 0.34884018498546804, + "grad_norm": 5.978868292873253, + "learning_rate": 7.562975645573473e-06, + "loss": 17.487, + "step": 19084 + }, + { + "epoch": 0.3488584641819146, + "grad_norm": 6.081142939739899, + "learning_rate": 7.5627214765648134e-06, + "loss": 17.2917, + "step": 19085 + }, + { + "epoch": 0.3488767433783611, + "grad_norm": 6.354029078628353, + "learning_rate": 7.562467298574039e-06, + "loss": 17.5563, + "step": 19086 + }, + { + "epoch": 0.3488950225748076, + "grad_norm": 6.797968928490002, + "learning_rate": 7.562213111602037e-06, + "loss": 17.316, + "step": 19087 + }, + { + "epoch": 0.3489133017712541, + "grad_norm": 5.105401632683817, + "learning_rate": 7.5619589156497004e-06, + "loss": 17.0003, + "step": 19088 + }, + { + "epoch": 0.34893158096770066, + "grad_norm": 6.572664449949223, + "learning_rate": 7.561704710717921e-06, + "loss": 17.4657, + "step": 19089 + }, + { + "epoch": 0.3489498601641472, + "grad_norm": 6.692513585854958, + "learning_rate": 7.5614504968075895e-06, + "loss": 17.6388, + "step": 19090 + }, + { + "epoch": 0.34896813936059373, + "grad_norm": 6.308701198746419, + "learning_rate": 7.561196273919596e-06, + "loss": 17.6996, + "step": 19091 + }, + { + "epoch": 0.3489864185570402, + "grad_norm": 8.232221482867809, + "learning_rate": 7.560942042054831e-06, + "loss": 18.2663, + "step": 19092 + }, + { + "epoch": 0.34900469775348675, + "grad_norm": 5.985624416470622, + "learning_rate": 7.560687801214186e-06, + "loss": 17.2788, + "step": 19093 + }, + { + "epoch": 0.3490229769499333, + "grad_norm": 8.288715722382547, + "learning_rate": 7.560433551398553e-06, + "loss": 18.4856, + "step": 19094 + }, + { + "epoch": 0.3490412561463798, + "grad_norm": 6.881455031953275, + "learning_rate": 7.560179292608823e-06, + "loss": 17.5496, + "step": 19095 + }, + { + "epoch": 0.34905953534282635, + "grad_norm": 6.606205236103944, + "learning_rate": 7.5599250248458864e-06, + "loss": 17.4679, + "step": 19096 + }, + { + "epoch": 0.34907781453927283, + "grad_norm": 6.94821201997258, + "learning_rate": 7.559670748110634e-06, + "loss": 17.885, + "step": 19097 + }, + { + "epoch": 0.34909609373571937, + "grad_norm": 7.050968024514505, + "learning_rate": 7.559416462403959e-06, + "loss": 18.0848, + "step": 19098 + }, + { + "epoch": 0.3491143729321659, + "grad_norm": 6.783862967584317, + "learning_rate": 7.559162167726751e-06, + "loss": 17.2696, + "step": 19099 + }, + { + "epoch": 0.34913265212861244, + "grad_norm": 6.884680949415629, + "learning_rate": 7.558907864079901e-06, + "loss": 17.5352, + "step": 19100 + }, + { + "epoch": 0.349150931325059, + "grad_norm": 7.496809186050319, + "learning_rate": 7.558653551464301e-06, + "loss": 18.0601, + "step": 19101 + }, + { + "epoch": 0.34916921052150546, + "grad_norm": 7.9261805044850115, + "learning_rate": 7.558399229880843e-06, + "loss": 18.2118, + "step": 19102 + }, + { + "epoch": 0.349187489717952, + "grad_norm": 8.338104394501705, + "learning_rate": 7.558144899330418e-06, + "loss": 18.584, + "step": 19103 + }, + { + "epoch": 0.34920576891439853, + "grad_norm": 6.305445602880384, + "learning_rate": 7.557890559813916e-06, + "loss": 17.5534, + "step": 19104 + }, + { + "epoch": 0.34922404811084506, + "grad_norm": 6.472521795684011, + "learning_rate": 7.557636211332231e-06, + "loss": 17.5505, + "step": 19105 + }, + { + "epoch": 0.34924232730729154, + "grad_norm": 6.9698569968533866, + "learning_rate": 7.557381853886252e-06, + "loss": 17.612, + "step": 19106 + }, + { + "epoch": 0.3492606065037381, + "grad_norm": 6.984919849468305, + "learning_rate": 7.557127487476872e-06, + "loss": 17.6495, + "step": 19107 + }, + { + "epoch": 0.3492788857001846, + "grad_norm": 6.338068856962003, + "learning_rate": 7.556873112104981e-06, + "loss": 17.7788, + "step": 19108 + }, + { + "epoch": 0.34929716489663115, + "grad_norm": 7.984494388826529, + "learning_rate": 7.556618727771473e-06, + "loss": 18.0341, + "step": 19109 + }, + { + "epoch": 0.3493154440930777, + "grad_norm": 5.431516007249899, + "learning_rate": 7.5563643344772385e-06, + "loss": 17.1211, + "step": 19110 + }, + { + "epoch": 0.34933372328952417, + "grad_norm": 6.056252371829921, + "learning_rate": 7.5561099322231665e-06, + "loss": 17.3791, + "step": 19111 + }, + { + "epoch": 0.3493520024859707, + "grad_norm": 7.158640698573915, + "learning_rate": 7.555855521010153e-06, + "loss": 17.7733, + "step": 19112 + }, + { + "epoch": 0.34937028168241724, + "grad_norm": 7.090888065074658, + "learning_rate": 7.555601100839087e-06, + "loss": 17.8602, + "step": 19113 + }, + { + "epoch": 0.3493885608788638, + "grad_norm": 6.705794721948667, + "learning_rate": 7.555346671710861e-06, + "loss": 17.6568, + "step": 19114 + }, + { + "epoch": 0.3494068400753103, + "grad_norm": 6.4504616451705346, + "learning_rate": 7.555092233626367e-06, + "loss": 17.4211, + "step": 19115 + }, + { + "epoch": 0.3494251192717568, + "grad_norm": 7.092298629616406, + "learning_rate": 7.5548377865864955e-06, + "loss": 17.7163, + "step": 19116 + }, + { + "epoch": 0.3494433984682033, + "grad_norm": 7.2102206459298746, + "learning_rate": 7.554583330592141e-06, + "loss": 18.0103, + "step": 19117 + }, + { + "epoch": 0.34946167766464986, + "grad_norm": 7.2977341125012325, + "learning_rate": 7.554328865644193e-06, + "loss": 17.9424, + "step": 19118 + }, + { + "epoch": 0.3494799568610964, + "grad_norm": 6.037901914704069, + "learning_rate": 7.5540743917435435e-06, + "loss": 17.3669, + "step": 19119 + }, + { + "epoch": 0.34949823605754293, + "grad_norm": 6.128530930107559, + "learning_rate": 7.553819908891085e-06, + "loss": 17.3773, + "step": 19120 + }, + { + "epoch": 0.3495165152539894, + "grad_norm": 7.217781933883795, + "learning_rate": 7.55356541708771e-06, + "loss": 18.0618, + "step": 19121 + }, + { + "epoch": 0.34953479445043595, + "grad_norm": 7.532066289037688, + "learning_rate": 7.553310916334307e-06, + "loss": 17.9471, + "step": 19122 + }, + { + "epoch": 0.3495530736468825, + "grad_norm": 7.884880601530532, + "learning_rate": 7.553056406631773e-06, + "loss": 18.0946, + "step": 19123 + }, + { + "epoch": 0.349571352843329, + "grad_norm": 6.906685803731557, + "learning_rate": 7.5528018879809985e-06, + "loss": 17.5602, + "step": 19124 + }, + { + "epoch": 0.34958963203977556, + "grad_norm": 6.31357951210493, + "learning_rate": 7.552547360382873e-06, + "loss": 17.8895, + "step": 19125 + }, + { + "epoch": 0.34960791123622204, + "grad_norm": 5.962814846618306, + "learning_rate": 7.552292823838292e-06, + "loss": 17.2481, + "step": 19126 + }, + { + "epoch": 0.34962619043266857, + "grad_norm": 5.733791137215473, + "learning_rate": 7.552038278348146e-06, + "loss": 17.3406, + "step": 19127 + }, + { + "epoch": 0.3496444696291151, + "grad_norm": 6.127260787737624, + "learning_rate": 7.5517837239133275e-06, + "loss": 17.4258, + "step": 19128 + }, + { + "epoch": 0.34966274882556164, + "grad_norm": 5.498357279762782, + "learning_rate": 7.551529160534727e-06, + "loss": 17.0711, + "step": 19129 + }, + { + "epoch": 0.3496810280220082, + "grad_norm": 7.139696871622537, + "learning_rate": 7.55127458821324e-06, + "loss": 17.6185, + "step": 19130 + }, + { + "epoch": 0.34969930721845466, + "grad_norm": 5.361238809589072, + "learning_rate": 7.551020006949756e-06, + "loss": 17.1274, + "step": 19131 + }, + { + "epoch": 0.3497175864149012, + "grad_norm": 5.661615351669901, + "learning_rate": 7.5507654167451684e-06, + "loss": 17.1587, + "step": 19132 + }, + { + "epoch": 0.34973586561134773, + "grad_norm": 7.1297421996471515, + "learning_rate": 7.550510817600369e-06, + "loss": 17.9598, + "step": 19133 + }, + { + "epoch": 0.34975414480779426, + "grad_norm": 6.251832945188568, + "learning_rate": 7.5502562095162516e-06, + "loss": 17.431, + "step": 19134 + }, + { + "epoch": 0.3497724240042408, + "grad_norm": 6.8688043884264065, + "learning_rate": 7.550001592493705e-06, + "loss": 17.7732, + "step": 19135 + }, + { + "epoch": 0.3497907032006873, + "grad_norm": 7.889183889188873, + "learning_rate": 7.549746966533627e-06, + "loss": 17.9789, + "step": 19136 + }, + { + "epoch": 0.3498089823971338, + "grad_norm": 5.877786857054396, + "learning_rate": 7.5494923316369075e-06, + "loss": 17.3148, + "step": 19137 + }, + { + "epoch": 0.34982726159358035, + "grad_norm": 6.4262491059760976, + "learning_rate": 7.549237687804436e-06, + "loss": 17.6071, + "step": 19138 + }, + { + "epoch": 0.3498455407900269, + "grad_norm": 8.282359710148388, + "learning_rate": 7.548983035037107e-06, + "loss": 18.281, + "step": 19139 + }, + { + "epoch": 0.34986381998647337, + "grad_norm": 7.108549038253616, + "learning_rate": 7.548728373335816e-06, + "loss": 17.8754, + "step": 19140 + }, + { + "epoch": 0.3498820991829199, + "grad_norm": 8.155913885820201, + "learning_rate": 7.548473702701453e-06, + "loss": 17.8797, + "step": 19141 + }, + { + "epoch": 0.34990037837936644, + "grad_norm": 6.6169587638016925, + "learning_rate": 7.54821902313491e-06, + "loss": 17.6922, + "step": 19142 + }, + { + "epoch": 0.349918657575813, + "grad_norm": 7.043672133519509, + "learning_rate": 7.547964334637081e-06, + "loss": 18.2744, + "step": 19143 + }, + { + "epoch": 0.3499369367722595, + "grad_norm": 9.829544045235695, + "learning_rate": 7.547709637208859e-06, + "loss": 17.7204, + "step": 19144 + }, + { + "epoch": 0.349955215968706, + "grad_norm": 6.093992746556152, + "learning_rate": 7.547454930851135e-06, + "loss": 17.446, + "step": 19145 + }, + { + "epoch": 0.3499734951651525, + "grad_norm": 6.498279843625785, + "learning_rate": 7.5472002155648015e-06, + "loss": 17.5197, + "step": 19146 + }, + { + "epoch": 0.34999177436159906, + "grad_norm": 6.192123225655105, + "learning_rate": 7.5469454913507534e-06, + "loss": 17.5905, + "step": 19147 + }, + { + "epoch": 0.3500100535580456, + "grad_norm": 7.238979418550019, + "learning_rate": 7.546690758209883e-06, + "loss": 17.7479, + "step": 19148 + }, + { + "epoch": 0.35002833275449213, + "grad_norm": 7.023267655072233, + "learning_rate": 7.5464360161430805e-06, + "loss": 17.6625, + "step": 19149 + }, + { + "epoch": 0.3500466119509386, + "grad_norm": 6.64545949456632, + "learning_rate": 7.546181265151241e-06, + "loss": 17.5238, + "step": 19150 + }, + { + "epoch": 0.35006489114738515, + "grad_norm": 7.66921051515604, + "learning_rate": 7.545926505235258e-06, + "loss": 18.1588, + "step": 19151 + }, + { + "epoch": 0.3500831703438317, + "grad_norm": 7.1549883339917955, + "learning_rate": 7.545671736396023e-06, + "loss": 17.8666, + "step": 19152 + }, + { + "epoch": 0.3501014495402782, + "grad_norm": 7.647805578815517, + "learning_rate": 7.545416958634431e-06, + "loss": 18.2531, + "step": 19153 + }, + { + "epoch": 0.35011972873672476, + "grad_norm": 4.906920746033703, + "learning_rate": 7.5451621719513725e-06, + "loss": 16.9204, + "step": 19154 + }, + { + "epoch": 0.35013800793317124, + "grad_norm": 6.69855203244943, + "learning_rate": 7.544907376347741e-06, + "loss": 17.762, + "step": 19155 + }, + { + "epoch": 0.35015628712961777, + "grad_norm": 6.688795121293713, + "learning_rate": 7.544652571824429e-06, + "loss": 17.6316, + "step": 19156 + }, + { + "epoch": 0.3501745663260643, + "grad_norm": 7.152484763408261, + "learning_rate": 7.544397758382331e-06, + "loss": 17.6217, + "step": 19157 + }, + { + "epoch": 0.35019284552251084, + "grad_norm": 5.769786048137639, + "learning_rate": 7.544142936022342e-06, + "loss": 17.3694, + "step": 19158 + }, + { + "epoch": 0.3502111247189574, + "grad_norm": 5.942327537010234, + "learning_rate": 7.543888104745352e-06, + "loss": 17.4199, + "step": 19159 + }, + { + "epoch": 0.35022940391540386, + "grad_norm": 8.713755063742017, + "learning_rate": 7.543633264552253e-06, + "loss": 18.8002, + "step": 19160 + }, + { + "epoch": 0.3502476831118504, + "grad_norm": 6.111676468636414, + "learning_rate": 7.5433784154439425e-06, + "loss": 17.347, + "step": 19161 + }, + { + "epoch": 0.35026596230829693, + "grad_norm": 5.696435753875319, + "learning_rate": 7.5431235574213104e-06, + "loss": 17.2735, + "step": 19162 + }, + { + "epoch": 0.35028424150474347, + "grad_norm": 6.897497797903919, + "learning_rate": 7.54286869048525e-06, + "loss": 17.7057, + "step": 19163 + }, + { + "epoch": 0.35030252070119, + "grad_norm": 7.618375172339585, + "learning_rate": 7.542613814636655e-06, + "loss": 18.0204, + "step": 19164 + }, + { + "epoch": 0.3503207998976365, + "grad_norm": 6.664557194664785, + "learning_rate": 7.542358929876421e-06, + "loss": 17.899, + "step": 19165 + }, + { + "epoch": 0.350339079094083, + "grad_norm": 6.527456188713896, + "learning_rate": 7.5421040362054385e-06, + "loss": 17.7848, + "step": 19166 + }, + { + "epoch": 0.35035735829052955, + "grad_norm": 6.481927320564349, + "learning_rate": 7.541849133624603e-06, + "loss": 17.5153, + "step": 19167 + }, + { + "epoch": 0.3503756374869761, + "grad_norm": 6.508228343221474, + "learning_rate": 7.541594222134807e-06, + "loss": 17.4626, + "step": 19168 + }, + { + "epoch": 0.3503939166834226, + "grad_norm": 6.364981374422483, + "learning_rate": 7.541339301736943e-06, + "loss": 17.3569, + "step": 19169 + }, + { + "epoch": 0.3504121958798691, + "grad_norm": 7.083302689962924, + "learning_rate": 7.541084372431904e-06, + "loss": 17.8332, + "step": 19170 + }, + { + "epoch": 0.35043047507631564, + "grad_norm": 7.3664542017646895, + "learning_rate": 7.5408294342205866e-06, + "loss": 17.9209, + "step": 19171 + }, + { + "epoch": 0.3504487542727622, + "grad_norm": 6.621397043851026, + "learning_rate": 7.540574487103882e-06, + "loss": 17.7306, + "step": 19172 + }, + { + "epoch": 0.3504670334692087, + "grad_norm": 6.072460447216706, + "learning_rate": 7.540319531082685e-06, + "loss": 17.151, + "step": 19173 + }, + { + "epoch": 0.3504853126656552, + "grad_norm": 6.006454141057217, + "learning_rate": 7.540064566157887e-06, + "loss": 17.1267, + "step": 19174 + }, + { + "epoch": 0.3505035918621017, + "grad_norm": 5.747981494818808, + "learning_rate": 7.539809592330385e-06, + "loss": 17.1149, + "step": 19175 + }, + { + "epoch": 0.35052187105854826, + "grad_norm": 6.813425080652564, + "learning_rate": 7.539554609601069e-06, + "loss": 17.6495, + "step": 19176 + }, + { + "epoch": 0.3505401502549948, + "grad_norm": 7.750637941850223, + "learning_rate": 7.539299617970834e-06, + "loss": 17.9969, + "step": 19177 + }, + { + "epoch": 0.35055842945144133, + "grad_norm": 7.154887239422486, + "learning_rate": 7.539044617440576e-06, + "loss": 17.5975, + "step": 19178 + }, + { + "epoch": 0.3505767086478878, + "grad_norm": 5.696516250028021, + "learning_rate": 7.538789608011185e-06, + "loss": 17.2689, + "step": 19179 + }, + { + "epoch": 0.35059498784433435, + "grad_norm": 6.376758840470761, + "learning_rate": 7.53853458968356e-06, + "loss": 17.5849, + "step": 19180 + }, + { + "epoch": 0.3506132670407809, + "grad_norm": 5.269383120275941, + "learning_rate": 7.538279562458588e-06, + "loss": 17.1573, + "step": 19181 + }, + { + "epoch": 0.3506315462372274, + "grad_norm": 7.144242085682909, + "learning_rate": 7.538024526337169e-06, + "loss": 18.1725, + "step": 19182 + }, + { + "epoch": 0.35064982543367396, + "grad_norm": 6.782469407083449, + "learning_rate": 7.537769481320194e-06, + "loss": 17.6427, + "step": 19183 + }, + { + "epoch": 0.35066810463012044, + "grad_norm": 6.493324414981373, + "learning_rate": 7.537514427408555e-06, + "loss": 17.4828, + "step": 19184 + }, + { + "epoch": 0.35068638382656697, + "grad_norm": 6.67935934419827, + "learning_rate": 7.537259364603149e-06, + "loss": 17.6403, + "step": 19185 + }, + { + "epoch": 0.3507046630230135, + "grad_norm": 6.6564380539045604, + "learning_rate": 7.53700429290487e-06, + "loss": 17.708, + "step": 19186 + }, + { + "epoch": 0.35072294221946004, + "grad_norm": 7.4398656587598895, + "learning_rate": 7.536749212314611e-06, + "loss": 17.5967, + "step": 19187 + }, + { + "epoch": 0.3507412214159066, + "grad_norm": 7.525312261833701, + "learning_rate": 7.5364941228332655e-06, + "loss": 17.6407, + "step": 19188 + }, + { + "epoch": 0.35075950061235306, + "grad_norm": 6.326946912306059, + "learning_rate": 7.536239024461729e-06, + "loss": 17.5081, + "step": 19189 + }, + { + "epoch": 0.3507777798087996, + "grad_norm": 6.086980055709366, + "learning_rate": 7.535983917200895e-06, + "loss": 17.3881, + "step": 19190 + }, + { + "epoch": 0.35079605900524613, + "grad_norm": 5.687678816216126, + "learning_rate": 7.535728801051656e-06, + "loss": 17.2807, + "step": 19191 + }, + { + "epoch": 0.35081433820169267, + "grad_norm": 6.775249668295368, + "learning_rate": 7.5354736760149085e-06, + "loss": 17.5426, + "step": 19192 + }, + { + "epoch": 0.3508326173981392, + "grad_norm": 4.997770310487413, + "learning_rate": 7.535218542091546e-06, + "loss": 16.8495, + "step": 19193 + }, + { + "epoch": 0.3508508965945857, + "grad_norm": 8.67709245403442, + "learning_rate": 7.534963399282462e-06, + "loss": 17.9259, + "step": 19194 + }, + { + "epoch": 0.3508691757910322, + "grad_norm": 7.081679979038991, + "learning_rate": 7.5347082475885515e-06, + "loss": 17.7819, + "step": 19195 + }, + { + "epoch": 0.35088745498747875, + "grad_norm": 6.15128860725821, + "learning_rate": 7.534453087010709e-06, + "loss": 17.4622, + "step": 19196 + }, + { + "epoch": 0.3509057341839253, + "grad_norm": 9.454014416296994, + "learning_rate": 7.534197917549827e-06, + "loss": 18.5688, + "step": 19197 + }, + { + "epoch": 0.3509240133803718, + "grad_norm": 5.653357758714179, + "learning_rate": 7.533942739206802e-06, + "loss": 17.0181, + "step": 19198 + }, + { + "epoch": 0.3509422925768183, + "grad_norm": 8.371572714131837, + "learning_rate": 7.533687551982529e-06, + "loss": 18.4663, + "step": 19199 + }, + { + "epoch": 0.35096057177326484, + "grad_norm": 5.903768986161092, + "learning_rate": 7.533432355877899e-06, + "loss": 17.2126, + "step": 19200 + }, + { + "epoch": 0.3509788509697114, + "grad_norm": 7.8339977387852615, + "learning_rate": 7.533177150893809e-06, + "loss": 17.8432, + "step": 19201 + }, + { + "epoch": 0.3509971301661579, + "grad_norm": 5.658744341429674, + "learning_rate": 7.532921937031153e-06, + "loss": 17.3167, + "step": 19202 + }, + { + "epoch": 0.35101540936260445, + "grad_norm": 6.03939753372727, + "learning_rate": 7.532666714290826e-06, + "loss": 16.9948, + "step": 19203 + }, + { + "epoch": 0.3510336885590509, + "grad_norm": 6.768835174965798, + "learning_rate": 7.532411482673721e-06, + "loss": 17.5395, + "step": 19204 + }, + { + "epoch": 0.35105196775549746, + "grad_norm": 5.418941345037018, + "learning_rate": 7.532156242180734e-06, + "loss": 17.0491, + "step": 19205 + }, + { + "epoch": 0.351070246951944, + "grad_norm": 6.822958325551641, + "learning_rate": 7.531900992812759e-06, + "loss": 17.5348, + "step": 19206 + }, + { + "epoch": 0.35108852614839053, + "grad_norm": 7.108892069534108, + "learning_rate": 7.531645734570689e-06, + "loss": 17.9473, + "step": 19207 + }, + { + "epoch": 0.351106805344837, + "grad_norm": 6.974757906404995, + "learning_rate": 7.531390467455424e-06, + "loss": 18.0174, + "step": 19208 + }, + { + "epoch": 0.35112508454128355, + "grad_norm": 7.23199396986483, + "learning_rate": 7.531135191467852e-06, + "loss": 17.9606, + "step": 19209 + }, + { + "epoch": 0.3511433637377301, + "grad_norm": 6.282276272652066, + "learning_rate": 7.530879906608873e-06, + "loss": 17.4516, + "step": 19210 + }, + { + "epoch": 0.3511616429341766, + "grad_norm": 6.7981872820744895, + "learning_rate": 7.530624612879378e-06, + "loss": 17.8349, + "step": 19211 + }, + { + "epoch": 0.35117992213062316, + "grad_norm": 7.898551184926161, + "learning_rate": 7.5303693102802635e-06, + "loss": 17.8937, + "step": 19212 + }, + { + "epoch": 0.35119820132706964, + "grad_norm": 5.46829284366765, + "learning_rate": 7.530113998812425e-06, + "loss": 17.1842, + "step": 19213 + }, + { + "epoch": 0.3512164805235162, + "grad_norm": 6.301265062633721, + "learning_rate": 7.529858678476756e-06, + "loss": 17.6048, + "step": 19214 + }, + { + "epoch": 0.3512347597199627, + "grad_norm": 7.420612252023315, + "learning_rate": 7.529603349274152e-06, + "loss": 17.5682, + "step": 19215 + }, + { + "epoch": 0.35125303891640924, + "grad_norm": 6.836806153714951, + "learning_rate": 7.529348011205506e-06, + "loss": 17.6979, + "step": 19216 + }, + { + "epoch": 0.3512713181128558, + "grad_norm": 7.2037295493918085, + "learning_rate": 7.529092664271718e-06, + "loss": 17.8771, + "step": 19217 + }, + { + "epoch": 0.35128959730930226, + "grad_norm": 6.491421191766841, + "learning_rate": 7.528837308473678e-06, + "loss": 17.7311, + "step": 19218 + }, + { + "epoch": 0.3513078765057488, + "grad_norm": 6.726707260941049, + "learning_rate": 7.5285819438122805e-06, + "loss": 17.7142, + "step": 19219 + }, + { + "epoch": 0.35132615570219533, + "grad_norm": 6.129149885948185, + "learning_rate": 7.528326570288427e-06, + "loss": 17.4959, + "step": 19220 + }, + { + "epoch": 0.35134443489864187, + "grad_norm": 5.846540043871445, + "learning_rate": 7.5280711879030055e-06, + "loss": 17.2388, + "step": 19221 + }, + { + "epoch": 0.3513627140950884, + "grad_norm": 8.91938466865475, + "learning_rate": 7.527815796656914e-06, + "loss": 18.7418, + "step": 19222 + }, + { + "epoch": 0.3513809932915349, + "grad_norm": 8.019711184974327, + "learning_rate": 7.527560396551048e-06, + "loss": 18.2656, + "step": 19223 + }, + { + "epoch": 0.3513992724879814, + "grad_norm": 6.768107675353159, + "learning_rate": 7.527304987586301e-06, + "loss": 17.799, + "step": 19224 + }, + { + "epoch": 0.35141755168442795, + "grad_norm": 5.558738259085692, + "learning_rate": 7.527049569763571e-06, + "loss": 17.122, + "step": 19225 + }, + { + "epoch": 0.3514358308808745, + "grad_norm": 7.115017050290701, + "learning_rate": 7.526794143083751e-06, + "loss": 17.7497, + "step": 19226 + }, + { + "epoch": 0.351454110077321, + "grad_norm": 6.686023231461101, + "learning_rate": 7.526538707547737e-06, + "loss": 17.4914, + "step": 19227 + }, + { + "epoch": 0.3514723892737675, + "grad_norm": 7.005102727356235, + "learning_rate": 7.526283263156424e-06, + "loss": 17.7012, + "step": 19228 + }, + { + "epoch": 0.35149066847021404, + "grad_norm": 6.39245729381603, + "learning_rate": 7.526027809910706e-06, + "loss": 17.5251, + "step": 19229 + }, + { + "epoch": 0.3515089476666606, + "grad_norm": 6.653919114430062, + "learning_rate": 7.525772347811482e-06, + "loss": 17.686, + "step": 19230 + }, + { + "epoch": 0.3515272268631071, + "grad_norm": 6.929301038300203, + "learning_rate": 7.5255168768596444e-06, + "loss": 17.8971, + "step": 19231 + }, + { + "epoch": 0.35154550605955365, + "grad_norm": 5.498475126930915, + "learning_rate": 7.52526139705609e-06, + "loss": 17.2416, + "step": 19232 + }, + { + "epoch": 0.3515637852560001, + "grad_norm": 6.282802389198806, + "learning_rate": 7.525005908401711e-06, + "loss": 17.5791, + "step": 19233 + }, + { + "epoch": 0.35158206445244666, + "grad_norm": 6.470919523886569, + "learning_rate": 7.5247504108974075e-06, + "loss": 17.6202, + "step": 19234 + }, + { + "epoch": 0.3516003436488932, + "grad_norm": 5.620344683425913, + "learning_rate": 7.524494904544072e-06, + "loss": 17.3058, + "step": 19235 + }, + { + "epoch": 0.35161862284533973, + "grad_norm": 5.405100648432202, + "learning_rate": 7.524239389342601e-06, + "loss": 17.2389, + "step": 19236 + }, + { + "epoch": 0.35163690204178627, + "grad_norm": 6.056074402243854, + "learning_rate": 7.523983865293891e-06, + "loss": 17.3652, + "step": 19237 + }, + { + "epoch": 0.35165518123823275, + "grad_norm": 6.845948434940423, + "learning_rate": 7.523728332398834e-06, + "loss": 17.7525, + "step": 19238 + }, + { + "epoch": 0.3516734604346793, + "grad_norm": 5.678162363390758, + "learning_rate": 7.523472790658331e-06, + "loss": 17.2832, + "step": 19239 + }, + { + "epoch": 0.3516917396311258, + "grad_norm": 6.276029721847072, + "learning_rate": 7.523217240073273e-06, + "loss": 17.2178, + "step": 19240 + }, + { + "epoch": 0.35171001882757236, + "grad_norm": 6.771556626423765, + "learning_rate": 7.52296168064456e-06, + "loss": 17.5713, + "step": 19241 + }, + { + "epoch": 0.35172829802401884, + "grad_norm": 6.9140068797528595, + "learning_rate": 7.522706112373083e-06, + "loss": 17.5612, + "step": 19242 + }, + { + "epoch": 0.3517465772204654, + "grad_norm": 5.268949832379009, + "learning_rate": 7.5224505352597395e-06, + "loss": 17.1115, + "step": 19243 + }, + { + "epoch": 0.3517648564169119, + "grad_norm": 8.420649134444037, + "learning_rate": 7.522194949305428e-06, + "loss": 17.0724, + "step": 19244 + }, + { + "epoch": 0.35178313561335844, + "grad_norm": 6.6024701703151845, + "learning_rate": 7.5219393545110406e-06, + "loss": 17.6513, + "step": 19245 + }, + { + "epoch": 0.351801414809805, + "grad_norm": 5.5397313205401115, + "learning_rate": 7.521683750877475e-06, + "loss": 17.143, + "step": 19246 + }, + { + "epoch": 0.35181969400625146, + "grad_norm": 7.0635908472449245, + "learning_rate": 7.521428138405626e-06, + "loss": 17.8985, + "step": 19247 + }, + { + "epoch": 0.351837973202698, + "grad_norm": 6.690937779179455, + "learning_rate": 7.521172517096391e-06, + "loss": 17.5753, + "step": 19248 + }, + { + "epoch": 0.35185625239914453, + "grad_norm": 5.162150717931989, + "learning_rate": 7.520916886950664e-06, + "loss": 16.9441, + "step": 19249 + }, + { + "epoch": 0.35187453159559107, + "grad_norm": 7.542734166513863, + "learning_rate": 7.520661247969343e-06, + "loss": 17.9402, + "step": 19250 + }, + { + "epoch": 0.3518928107920376, + "grad_norm": 6.2321172952593455, + "learning_rate": 7.520405600153324e-06, + "loss": 17.4276, + "step": 19251 + }, + { + "epoch": 0.3519110899884841, + "grad_norm": 6.189843951485729, + "learning_rate": 7.520149943503501e-06, + "loss": 17.4252, + "step": 19252 + }, + { + "epoch": 0.3519293691849306, + "grad_norm": 6.775092305787856, + "learning_rate": 7.5198942780207705e-06, + "loss": 17.6616, + "step": 19253 + }, + { + "epoch": 0.35194764838137715, + "grad_norm": 5.865178419230509, + "learning_rate": 7.519638603706029e-06, + "loss": 17.066, + "step": 19254 + }, + { + "epoch": 0.3519659275778237, + "grad_norm": 7.121064231289921, + "learning_rate": 7.519382920560175e-06, + "loss": 17.9656, + "step": 19255 + }, + { + "epoch": 0.3519842067742702, + "grad_norm": 7.603032854869226, + "learning_rate": 7.519127228584101e-06, + "loss": 17.9382, + "step": 19256 + }, + { + "epoch": 0.3520024859707167, + "grad_norm": 6.441960309050005, + "learning_rate": 7.5188715277787035e-06, + "loss": 18.0542, + "step": 19257 + }, + { + "epoch": 0.35202076516716324, + "grad_norm": 6.6675001435748245, + "learning_rate": 7.518615818144883e-06, + "loss": 17.1855, + "step": 19258 + }, + { + "epoch": 0.3520390443636098, + "grad_norm": 7.671842857573003, + "learning_rate": 7.51836009968353e-06, + "loss": 17.9289, + "step": 19259 + }, + { + "epoch": 0.3520573235600563, + "grad_norm": 5.406344839963338, + "learning_rate": 7.518104372395545e-06, + "loss": 17.2156, + "step": 19260 + }, + { + "epoch": 0.35207560275650285, + "grad_norm": 7.403033251888348, + "learning_rate": 7.5178486362818215e-06, + "loss": 18.0308, + "step": 19261 + }, + { + "epoch": 0.35209388195294933, + "grad_norm": 7.1430028300022945, + "learning_rate": 7.517592891343258e-06, + "loss": 17.698, + "step": 19262 + }, + { + "epoch": 0.35211216114939586, + "grad_norm": 9.051370413187286, + "learning_rate": 7.517337137580749e-06, + "loss": 18.4366, + "step": 19263 + }, + { + "epoch": 0.3521304403458424, + "grad_norm": 6.552307942762419, + "learning_rate": 7.517081374995192e-06, + "loss": 17.4133, + "step": 19264 + }, + { + "epoch": 0.35214871954228893, + "grad_norm": 6.255774953362444, + "learning_rate": 7.516825603587483e-06, + "loss": 17.5453, + "step": 19265 + }, + { + "epoch": 0.35216699873873547, + "grad_norm": 6.4441058435343725, + "learning_rate": 7.516569823358519e-06, + "loss": 17.49, + "step": 19266 + }, + { + "epoch": 0.35218527793518195, + "grad_norm": 7.242224969692995, + "learning_rate": 7.516314034309197e-06, + "loss": 18.3363, + "step": 19267 + }, + { + "epoch": 0.3522035571316285, + "grad_norm": 6.158568205970019, + "learning_rate": 7.516058236440412e-06, + "loss": 17.394, + "step": 19268 + }, + { + "epoch": 0.352221836328075, + "grad_norm": 7.229432679066626, + "learning_rate": 7.515802429753061e-06, + "loss": 17.947, + "step": 19269 + }, + { + "epoch": 0.35224011552452156, + "grad_norm": 6.939608841008816, + "learning_rate": 7.515546614248039e-06, + "loss": 17.6991, + "step": 19270 + }, + { + "epoch": 0.3522583947209681, + "grad_norm": 5.790529395723205, + "learning_rate": 7.515290789926248e-06, + "loss": 17.3236, + "step": 19271 + }, + { + "epoch": 0.3522766739174146, + "grad_norm": 5.226285675220542, + "learning_rate": 7.515034956788579e-06, + "loss": 17.0536, + "step": 19272 + }, + { + "epoch": 0.3522949531138611, + "grad_norm": 6.510252246174526, + "learning_rate": 7.514779114835931e-06, + "loss": 17.729, + "step": 19273 + }, + { + "epoch": 0.35231323231030764, + "grad_norm": 6.164202259826949, + "learning_rate": 7.5145232640692e-06, + "loss": 17.4468, + "step": 19274 + }, + { + "epoch": 0.3523315115067542, + "grad_norm": 5.663787925761229, + "learning_rate": 7.514267404489284e-06, + "loss": 17.3572, + "step": 19275 + }, + { + "epoch": 0.35234979070320066, + "grad_norm": 6.53771164186853, + "learning_rate": 7.514011536097079e-06, + "loss": 17.3409, + "step": 19276 + }, + { + "epoch": 0.3523680698996472, + "grad_norm": 5.923042661465154, + "learning_rate": 7.51375565889348e-06, + "loss": 17.332, + "step": 19277 + }, + { + "epoch": 0.35238634909609373, + "grad_norm": 6.503639029233199, + "learning_rate": 7.513499772879387e-06, + "loss": 17.5271, + "step": 19278 + }, + { + "epoch": 0.35240462829254027, + "grad_norm": 6.303507979073592, + "learning_rate": 7.513243878055696e-06, + "loss": 17.5725, + "step": 19279 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 6.265970875824671, + "learning_rate": 7.512987974423303e-06, + "loss": 17.4912, + "step": 19280 + }, + { + "epoch": 0.3524411866854333, + "grad_norm": 5.95214388881223, + "learning_rate": 7.512732061983103e-06, + "loss": 17.37, + "step": 19281 + }, + { + "epoch": 0.3524594658818798, + "grad_norm": 6.7964495082100855, + "learning_rate": 7.512476140735998e-06, + "loss": 17.4483, + "step": 19282 + }, + { + "epoch": 0.35247774507832635, + "grad_norm": 5.8637732923041135, + "learning_rate": 7.51222021068288e-06, + "loss": 17.5728, + "step": 19283 + }, + { + "epoch": 0.3524960242747729, + "grad_norm": 16.598796165901515, + "learning_rate": 7.51196427182465e-06, + "loss": 17.1121, + "step": 19284 + }, + { + "epoch": 0.3525143034712194, + "grad_norm": 20.40456920183395, + "learning_rate": 7.5117083241622014e-06, + "loss": 17.6317, + "step": 19285 + }, + { + "epoch": 0.3525325826676659, + "grad_norm": 29.364647554813363, + "learning_rate": 7.511452367696434e-06, + "loss": 17.4675, + "step": 19286 + }, + { + "epoch": 0.35255086186411244, + "grad_norm": 5.421552533264433, + "learning_rate": 7.511196402428244e-06, + "loss": 17.0389, + "step": 19287 + }, + { + "epoch": 0.352569141060559, + "grad_norm": 6.622874636689023, + "learning_rate": 7.510940428358529e-06, + "loss": 17.6747, + "step": 19288 + }, + { + "epoch": 0.3525874202570055, + "grad_norm": 6.4841809629340625, + "learning_rate": 7.510684445488186e-06, + "loss": 17.6812, + "step": 19289 + }, + { + "epoch": 0.35260569945345205, + "grad_norm": 6.109972943276097, + "learning_rate": 7.51042845381811e-06, + "loss": 17.2907, + "step": 19290 + }, + { + "epoch": 0.35262397864989853, + "grad_norm": 5.171756038966166, + "learning_rate": 7.5101724533492025e-06, + "loss": 16.9398, + "step": 19291 + }, + { + "epoch": 0.35264225784634506, + "grad_norm": 6.6507230063221625, + "learning_rate": 7.509916444082357e-06, + "loss": 17.5161, + "step": 19292 + }, + { + "epoch": 0.3526605370427916, + "grad_norm": 5.773007882989047, + "learning_rate": 7.509660426018473e-06, + "loss": 17.1385, + "step": 19293 + }, + { + "epoch": 0.35267881623923814, + "grad_norm": 8.749319481203136, + "learning_rate": 7.509404399158445e-06, + "loss": 18.1071, + "step": 19294 + }, + { + "epoch": 0.35269709543568467, + "grad_norm": 8.197171997068704, + "learning_rate": 7.509148363503174e-06, + "loss": 18.0026, + "step": 19295 + }, + { + "epoch": 0.35271537463213115, + "grad_norm": 12.50595201301948, + "learning_rate": 7.508892319053555e-06, + "loss": 17.443, + "step": 19296 + }, + { + "epoch": 0.3527336538285777, + "grad_norm": 6.516132376949015, + "learning_rate": 7.508636265810486e-06, + "loss": 17.3995, + "step": 19297 + }, + { + "epoch": 0.3527519330250242, + "grad_norm": 6.613174558109078, + "learning_rate": 7.508380203774865e-06, + "loss": 17.5949, + "step": 19298 + }, + { + "epoch": 0.35277021222147076, + "grad_norm": 7.868352150229157, + "learning_rate": 7.508124132947589e-06, + "loss": 17.9619, + "step": 19299 + }, + { + "epoch": 0.3527884914179173, + "grad_norm": 7.004765055019461, + "learning_rate": 7.507868053329557e-06, + "loss": 17.6702, + "step": 19300 + }, + { + "epoch": 0.3528067706143638, + "grad_norm": 5.577969945626082, + "learning_rate": 7.507611964921664e-06, + "loss": 17.0379, + "step": 19301 + }, + { + "epoch": 0.3528250498108103, + "grad_norm": 5.206648365877945, + "learning_rate": 7.507355867724807e-06, + "loss": 16.8953, + "step": 19302 + }, + { + "epoch": 0.35284332900725685, + "grad_norm": 7.115165103580579, + "learning_rate": 7.5070997617398875e-06, + "loss": 17.6558, + "step": 19303 + }, + { + "epoch": 0.3528616082037034, + "grad_norm": 8.738741552797054, + "learning_rate": 7.5068436469678e-06, + "loss": 17.2167, + "step": 19304 + }, + { + "epoch": 0.3528798874001499, + "grad_norm": 6.129962169898521, + "learning_rate": 7.506587523409443e-06, + "loss": 17.1931, + "step": 19305 + }, + { + "epoch": 0.3528981665965964, + "grad_norm": 7.6024967870376905, + "learning_rate": 7.506331391065714e-06, + "loss": 17.9997, + "step": 19306 + }, + { + "epoch": 0.35291644579304293, + "grad_norm": 7.450007025171821, + "learning_rate": 7.5060752499375125e-06, + "loss": 17.8514, + "step": 19307 + }, + { + "epoch": 0.35293472498948947, + "grad_norm": 8.420767894245751, + "learning_rate": 7.505819100025733e-06, + "loss": 18.2729, + "step": 19308 + }, + { + "epoch": 0.352953004185936, + "grad_norm": 6.280470968026568, + "learning_rate": 7.5055629413312745e-06, + "loss": 17.4036, + "step": 19309 + }, + { + "epoch": 0.3529712833823825, + "grad_norm": 7.887634315817519, + "learning_rate": 7.505306773855036e-06, + "loss": 18.1748, + "step": 19310 + }, + { + "epoch": 0.352989562578829, + "grad_norm": 6.721258238027715, + "learning_rate": 7.505050597597916e-06, + "loss": 17.559, + "step": 19311 + }, + { + "epoch": 0.35300784177527555, + "grad_norm": 7.314230183760376, + "learning_rate": 7.50479441256081e-06, + "loss": 17.8109, + "step": 19312 + }, + { + "epoch": 0.3530261209717221, + "grad_norm": 6.0105027388851155, + "learning_rate": 7.504538218744617e-06, + "loss": 17.4281, + "step": 19313 + }, + { + "epoch": 0.3530444001681686, + "grad_norm": 6.292850551068649, + "learning_rate": 7.504282016150235e-06, + "loss": 17.6397, + "step": 19314 + }, + { + "epoch": 0.3530626793646151, + "grad_norm": 7.011075822188665, + "learning_rate": 7.504025804778561e-06, + "loss": 17.8955, + "step": 19315 + }, + { + "epoch": 0.35308095856106164, + "grad_norm": 6.904144514818356, + "learning_rate": 7.503769584630495e-06, + "loss": 17.6697, + "step": 19316 + }, + { + "epoch": 0.3530992377575082, + "grad_norm": 7.152090942929883, + "learning_rate": 7.503513355706934e-06, + "loss": 18.0321, + "step": 19317 + }, + { + "epoch": 0.3531175169539547, + "grad_norm": 6.4690979081874485, + "learning_rate": 7.5032571180087756e-06, + "loss": 17.5206, + "step": 19318 + }, + { + "epoch": 0.35313579615040125, + "grad_norm": 6.742357135324228, + "learning_rate": 7.5030008715369175e-06, + "loss": 17.6227, + "step": 19319 + }, + { + "epoch": 0.35315407534684773, + "grad_norm": 6.9498528970034394, + "learning_rate": 7.502744616292259e-06, + "loss": 17.718, + "step": 19320 + }, + { + "epoch": 0.35317235454329426, + "grad_norm": 5.8268840292535256, + "learning_rate": 7.502488352275697e-06, + "loss": 17.4104, + "step": 19321 + }, + { + "epoch": 0.3531906337397408, + "grad_norm": 7.403693368819702, + "learning_rate": 7.502232079488132e-06, + "loss": 17.4577, + "step": 19322 + }, + { + "epoch": 0.35320891293618734, + "grad_norm": 6.124948314523103, + "learning_rate": 7.5019757979304594e-06, + "loss": 17.3621, + "step": 19323 + }, + { + "epoch": 0.35322719213263387, + "grad_norm": 5.397086258924638, + "learning_rate": 7.50171950760358e-06, + "loss": 17.0504, + "step": 19324 + }, + { + "epoch": 0.35324547132908035, + "grad_norm": 6.130934468575067, + "learning_rate": 7.5014632085083905e-06, + "loss": 17.3744, + "step": 19325 + }, + { + "epoch": 0.3532637505255269, + "grad_norm": 5.84133944166528, + "learning_rate": 7.5012069006457876e-06, + "loss": 17.518, + "step": 19326 + }, + { + "epoch": 0.3532820297219734, + "grad_norm": 6.435103065004983, + "learning_rate": 7.500950584016675e-06, + "loss": 17.6368, + "step": 19327 + }, + { + "epoch": 0.35330030891841996, + "grad_norm": 6.361098036967545, + "learning_rate": 7.500694258621946e-06, + "loss": 17.4724, + "step": 19328 + }, + { + "epoch": 0.3533185881148665, + "grad_norm": 6.2022596023931325, + "learning_rate": 7.500437924462498e-06, + "loss": 17.6998, + "step": 19329 + }, + { + "epoch": 0.353336867311313, + "grad_norm": 5.95565398908481, + "learning_rate": 7.500181581539236e-06, + "loss": 17.5403, + "step": 19330 + }, + { + "epoch": 0.3533551465077595, + "grad_norm": 6.4444870534955765, + "learning_rate": 7.4999252298530536e-06, + "loss": 17.6668, + "step": 19331 + }, + { + "epoch": 0.35337342570420605, + "grad_norm": 7.226446417006764, + "learning_rate": 7.4996688694048496e-06, + "loss": 17.999, + "step": 19332 + }, + { + "epoch": 0.3533917049006526, + "grad_norm": 5.130752996598166, + "learning_rate": 7.499412500195522e-06, + "loss": 16.8309, + "step": 19333 + }, + { + "epoch": 0.3534099840970991, + "grad_norm": 6.091716027701422, + "learning_rate": 7.499156122225972e-06, + "loss": 17.1431, + "step": 19334 + }, + { + "epoch": 0.3534282632935456, + "grad_norm": 5.577210815893012, + "learning_rate": 7.498899735497096e-06, + "loss": 17.2686, + "step": 19335 + }, + { + "epoch": 0.35344654248999213, + "grad_norm": 5.420874767043154, + "learning_rate": 7.498643340009793e-06, + "loss": 17.2443, + "step": 19336 + }, + { + "epoch": 0.35346482168643867, + "grad_norm": 7.005754200492961, + "learning_rate": 7.498386935764964e-06, + "loss": 17.7823, + "step": 19337 + }, + { + "epoch": 0.3534831008828852, + "grad_norm": 7.014225587626757, + "learning_rate": 7.498130522763503e-06, + "loss": 17.7229, + "step": 19338 + }, + { + "epoch": 0.35350138007933174, + "grad_norm": 6.815981578344918, + "learning_rate": 7.497874101006312e-06, + "loss": 17.8219, + "step": 19339 + }, + { + "epoch": 0.3535196592757782, + "grad_norm": 6.287199660882772, + "learning_rate": 7.497617670494289e-06, + "loss": 17.5265, + "step": 19340 + }, + { + "epoch": 0.35353793847222476, + "grad_norm": 7.457545237236673, + "learning_rate": 7.497361231228334e-06, + "loss": 18.0378, + "step": 19341 + }, + { + "epoch": 0.3535562176686713, + "grad_norm": 7.807867387794018, + "learning_rate": 7.497104783209343e-06, + "loss": 18.1411, + "step": 19342 + }, + { + "epoch": 0.3535744968651178, + "grad_norm": 5.939464419722728, + "learning_rate": 7.496848326438218e-06, + "loss": 17.237, + "step": 19343 + }, + { + "epoch": 0.3535927760615643, + "grad_norm": 5.94933347974999, + "learning_rate": 7.496591860915855e-06, + "loss": 17.6885, + "step": 19344 + }, + { + "epoch": 0.35361105525801084, + "grad_norm": 7.62622439160612, + "learning_rate": 7.496335386643155e-06, + "loss": 18.0454, + "step": 19345 + }, + { + "epoch": 0.3536293344544574, + "grad_norm": 7.654420929465917, + "learning_rate": 7.496078903621016e-06, + "loss": 17.9037, + "step": 19346 + }, + { + "epoch": 0.3536476136509039, + "grad_norm": 6.9677134942444, + "learning_rate": 7.495822411850335e-06, + "loss": 17.8292, + "step": 19347 + }, + { + "epoch": 0.35366589284735045, + "grad_norm": 7.397537831439291, + "learning_rate": 7.495565911332015e-06, + "loss": 18.5911, + "step": 19348 + }, + { + "epoch": 0.35368417204379693, + "grad_norm": 6.928067092409122, + "learning_rate": 7.495309402066954e-06, + "loss": 17.9352, + "step": 19349 + }, + { + "epoch": 0.35370245124024347, + "grad_norm": 6.730765795853856, + "learning_rate": 7.495052884056048e-06, + "loss": 17.6145, + "step": 19350 + }, + { + "epoch": 0.35372073043669, + "grad_norm": 8.10738099050842, + "learning_rate": 7.4947963573001995e-06, + "loss": 17.8017, + "step": 19351 + }, + { + "epoch": 0.35373900963313654, + "grad_norm": 7.93727278428228, + "learning_rate": 7.494539821800305e-06, + "loss": 17.7669, + "step": 19352 + }, + { + "epoch": 0.35375728882958307, + "grad_norm": 7.339848526157841, + "learning_rate": 7.494283277557266e-06, + "loss": 17.6792, + "step": 19353 + }, + { + "epoch": 0.35377556802602955, + "grad_norm": 7.201721940676704, + "learning_rate": 7.49402672457198e-06, + "loss": 17.8781, + "step": 19354 + }, + { + "epoch": 0.3537938472224761, + "grad_norm": 6.697310098952394, + "learning_rate": 7.493770162845348e-06, + "loss": 17.5622, + "step": 19355 + }, + { + "epoch": 0.3538121264189226, + "grad_norm": 8.084516537100795, + "learning_rate": 7.493513592378265e-06, + "loss": 18.0973, + "step": 19356 + }, + { + "epoch": 0.35383040561536916, + "grad_norm": 5.1961317299465675, + "learning_rate": 7.493257013171636e-06, + "loss": 17.1162, + "step": 19357 + }, + { + "epoch": 0.3538486848118157, + "grad_norm": 6.731553770253061, + "learning_rate": 7.493000425226358e-06, + "loss": 17.816, + "step": 19358 + }, + { + "epoch": 0.3538669640082622, + "grad_norm": 6.091521486875263, + "learning_rate": 7.492743828543327e-06, + "loss": 17.4561, + "step": 19359 + }, + { + "epoch": 0.3538852432047087, + "grad_norm": 6.8667630201488725, + "learning_rate": 7.492487223123448e-06, + "loss": 17.7971, + "step": 19360 + }, + { + "epoch": 0.35390352240115525, + "grad_norm": 6.19592515380013, + "learning_rate": 7.492230608967614e-06, + "loss": 17.2811, + "step": 19361 + }, + { + "epoch": 0.3539218015976018, + "grad_norm": 5.807384444053767, + "learning_rate": 7.491973986076733e-06, + "loss": 17.3417, + "step": 19362 + }, + { + "epoch": 0.3539400807940483, + "grad_norm": 7.155386757335756, + "learning_rate": 7.491717354451695e-06, + "loss": 17.7106, + "step": 19363 + }, + { + "epoch": 0.3539583599904948, + "grad_norm": 6.538612560506778, + "learning_rate": 7.491460714093406e-06, + "loss": 17.2755, + "step": 19364 + }, + { + "epoch": 0.35397663918694133, + "grad_norm": 6.458033663280672, + "learning_rate": 7.491204065002763e-06, + "loss": 17.4271, + "step": 19365 + }, + { + "epoch": 0.35399491838338787, + "grad_norm": 5.739066204118389, + "learning_rate": 7.4909474071806665e-06, + "loss": 17.1759, + "step": 19366 + }, + { + "epoch": 0.3540131975798344, + "grad_norm": 6.692072568599566, + "learning_rate": 7.490690740628015e-06, + "loss": 17.6206, + "step": 19367 + }, + { + "epoch": 0.35403147677628094, + "grad_norm": 8.267619576579627, + "learning_rate": 7.4904340653457086e-06, + "loss": 18.2098, + "step": 19368 + }, + { + "epoch": 0.3540497559727274, + "grad_norm": 7.1204361353633825, + "learning_rate": 7.4901773813346465e-06, + "loss": 17.9392, + "step": 19369 + }, + { + "epoch": 0.35406803516917396, + "grad_norm": 6.691643134926981, + "learning_rate": 7.489920688595729e-06, + "loss": 17.7207, + "step": 19370 + }, + { + "epoch": 0.3540863143656205, + "grad_norm": 6.25705543192282, + "learning_rate": 7.489663987129855e-06, + "loss": 17.4774, + "step": 19371 + }, + { + "epoch": 0.354104593562067, + "grad_norm": 7.4853794775293885, + "learning_rate": 7.489407276937927e-06, + "loss": 18.0386, + "step": 19372 + }, + { + "epoch": 0.35412287275851356, + "grad_norm": 7.476864122292723, + "learning_rate": 7.48915055802084e-06, + "loss": 18.1652, + "step": 19373 + }, + { + "epoch": 0.35414115195496004, + "grad_norm": 9.351362439238002, + "learning_rate": 7.488893830379498e-06, + "loss": 18.3186, + "step": 19374 + }, + { + "epoch": 0.3541594311514066, + "grad_norm": 8.284016530529174, + "learning_rate": 7.4886370940147975e-06, + "loss": 18.4558, + "step": 19375 + }, + { + "epoch": 0.3541777103478531, + "grad_norm": 4.6921299243565455, + "learning_rate": 7.4883803489276404e-06, + "loss": 16.9383, + "step": 19376 + }, + { + "epoch": 0.35419598954429965, + "grad_norm": 5.42530176855745, + "learning_rate": 7.4881235951189265e-06, + "loss": 17.0558, + "step": 19377 + }, + { + "epoch": 0.35421426874074613, + "grad_norm": 7.705040512252523, + "learning_rate": 7.487866832589555e-06, + "loss": 17.8432, + "step": 19378 + }, + { + "epoch": 0.35423254793719267, + "grad_norm": 6.561441805437232, + "learning_rate": 7.487610061340427e-06, + "loss": 17.3472, + "step": 19379 + }, + { + "epoch": 0.3542508271336392, + "grad_norm": 5.070628575814306, + "learning_rate": 7.48735328137244e-06, + "loss": 16.7991, + "step": 19380 + }, + { + "epoch": 0.35426910633008574, + "grad_norm": 5.672514503446029, + "learning_rate": 7.487096492686498e-06, + "loss": 16.9956, + "step": 19381 + }, + { + "epoch": 0.3542873855265323, + "grad_norm": 6.737898024418183, + "learning_rate": 7.486839695283497e-06, + "loss": 17.5229, + "step": 19382 + }, + { + "epoch": 0.35430566472297875, + "grad_norm": 6.182176476803175, + "learning_rate": 7.486582889164338e-06, + "loss": 17.5448, + "step": 19383 + }, + { + "epoch": 0.3543239439194253, + "grad_norm": 8.33687576037172, + "learning_rate": 7.486326074329923e-06, + "loss": 18.6737, + "step": 19384 + }, + { + "epoch": 0.3543422231158718, + "grad_norm": 5.595973665793096, + "learning_rate": 7.48606925078115e-06, + "loss": 17.1997, + "step": 19385 + }, + { + "epoch": 0.35436050231231836, + "grad_norm": 5.926973625388309, + "learning_rate": 7.4858124185189215e-06, + "loss": 17.1603, + "step": 19386 + }, + { + "epoch": 0.3543787815087649, + "grad_norm": 5.435840466787997, + "learning_rate": 7.485555577544136e-06, + "loss": 17.1007, + "step": 19387 + }, + { + "epoch": 0.3543970607052114, + "grad_norm": 7.000065925848345, + "learning_rate": 7.4852987278576915e-06, + "loss": 17.7634, + "step": 19388 + }, + { + "epoch": 0.3544153399016579, + "grad_norm": 6.415895711266425, + "learning_rate": 7.485041869460493e-06, + "loss": 17.4346, + "step": 19389 + }, + { + "epoch": 0.35443361909810445, + "grad_norm": 6.9250974739440645, + "learning_rate": 7.4847850023534375e-06, + "loss": 17.7694, + "step": 19390 + }, + { + "epoch": 0.354451898294551, + "grad_norm": 6.543538726229283, + "learning_rate": 7.484528126537426e-06, + "loss": 17.6611, + "step": 19391 + }, + { + "epoch": 0.3544701774909975, + "grad_norm": 5.240844858653111, + "learning_rate": 7.484271242013359e-06, + "loss": 16.8667, + "step": 19392 + }, + { + "epoch": 0.354488456687444, + "grad_norm": 6.863256791947771, + "learning_rate": 7.484014348782138e-06, + "loss": 17.4832, + "step": 19393 + }, + { + "epoch": 0.35450673588389053, + "grad_norm": 6.642713304065364, + "learning_rate": 7.483757446844661e-06, + "loss": 17.7014, + "step": 19394 + }, + { + "epoch": 0.35452501508033707, + "grad_norm": 6.849159659362434, + "learning_rate": 7.48350053620183e-06, + "loss": 17.4561, + "step": 19395 + }, + { + "epoch": 0.3545432942767836, + "grad_norm": 5.535302093579434, + "learning_rate": 7.4832436168545466e-06, + "loss": 17.1929, + "step": 19396 + }, + { + "epoch": 0.35456157347323014, + "grad_norm": 6.1383064259059, + "learning_rate": 7.4829866888037065e-06, + "loss": 17.3695, + "step": 19397 + }, + { + "epoch": 0.3545798526696766, + "grad_norm": 6.16842294986674, + "learning_rate": 7.482729752050215e-06, + "loss": 17.5157, + "step": 19398 + }, + { + "epoch": 0.35459813186612316, + "grad_norm": 6.435334878102475, + "learning_rate": 7.4824728065949735e-06, + "loss": 17.6294, + "step": 19399 + }, + { + "epoch": 0.3546164110625697, + "grad_norm": 7.209909555953054, + "learning_rate": 7.482215852438878e-06, + "loss": 17.7161, + "step": 19400 + }, + { + "epoch": 0.3546346902590162, + "grad_norm": 5.889956488909761, + "learning_rate": 7.481958889582832e-06, + "loss": 17.2855, + "step": 19401 + }, + { + "epoch": 0.35465296945546276, + "grad_norm": 7.656774016906944, + "learning_rate": 7.481701918027734e-06, + "loss": 18.104, + "step": 19402 + }, + { + "epoch": 0.35467124865190924, + "grad_norm": 8.024126407579462, + "learning_rate": 7.481444937774488e-06, + "loss": 18.0196, + "step": 19403 + }, + { + "epoch": 0.3546895278483558, + "grad_norm": 7.285299984162822, + "learning_rate": 7.4811879488239915e-06, + "loss": 17.7149, + "step": 19404 + }, + { + "epoch": 0.3547078070448023, + "grad_norm": 6.37127872968481, + "learning_rate": 7.480930951177148e-06, + "loss": 17.4418, + "step": 19405 + }, + { + "epoch": 0.35472608624124885, + "grad_norm": 7.470571138470041, + "learning_rate": 7.480673944834856e-06, + "loss": 17.9877, + "step": 19406 + }, + { + "epoch": 0.3547443654376954, + "grad_norm": 8.185439832267216, + "learning_rate": 7.480416929798016e-06, + "loss": 17.9194, + "step": 19407 + }, + { + "epoch": 0.35476264463414187, + "grad_norm": 5.043994100916689, + "learning_rate": 7.480159906067531e-06, + "loss": 17.0113, + "step": 19408 + }, + { + "epoch": 0.3547809238305884, + "grad_norm": 6.712227558010779, + "learning_rate": 7.479902873644301e-06, + "loss": 17.8045, + "step": 19409 + }, + { + "epoch": 0.35479920302703494, + "grad_norm": 7.727220735557964, + "learning_rate": 7.479645832529225e-06, + "loss": 18.0885, + "step": 19410 + }, + { + "epoch": 0.3548174822234815, + "grad_norm": 7.878353648153295, + "learning_rate": 7.479388782723208e-06, + "loss": 17.9703, + "step": 19411 + }, + { + "epoch": 0.35483576141992795, + "grad_norm": 7.830284306740343, + "learning_rate": 7.479131724227147e-06, + "loss": 17.9463, + "step": 19412 + }, + { + "epoch": 0.3548540406163745, + "grad_norm": 6.335659283153983, + "learning_rate": 7.4788746570419454e-06, + "loss": 17.2787, + "step": 19413 + }, + { + "epoch": 0.354872319812821, + "grad_norm": 6.30472829977409, + "learning_rate": 7.478617581168503e-06, + "loss": 17.2966, + "step": 19414 + }, + { + "epoch": 0.35489059900926756, + "grad_norm": 6.574696312691399, + "learning_rate": 7.478360496607719e-06, + "loss": 17.8703, + "step": 19415 + }, + { + "epoch": 0.3549088782057141, + "grad_norm": 6.392912622945608, + "learning_rate": 7.478103403360498e-06, + "loss": 17.1885, + "step": 19416 + }, + { + "epoch": 0.3549271574021606, + "grad_norm": 6.15821031640931, + "learning_rate": 7.477846301427741e-06, + "loss": 17.4841, + "step": 19417 + }, + { + "epoch": 0.3549454365986071, + "grad_norm": 5.8032289235700905, + "learning_rate": 7.4775891908103456e-06, + "loss": 17.2369, + "step": 19418 + }, + { + "epoch": 0.35496371579505365, + "grad_norm": 7.8978308302965, + "learning_rate": 7.477332071509217e-06, + "loss": 18.2384, + "step": 19419 + }, + { + "epoch": 0.3549819949915002, + "grad_norm": 7.281175734484118, + "learning_rate": 7.477074943525253e-06, + "loss": 17.8954, + "step": 19420 + }, + { + "epoch": 0.3550002741879467, + "grad_norm": 6.277923838885113, + "learning_rate": 7.476817806859357e-06, + "loss": 17.3499, + "step": 19421 + }, + { + "epoch": 0.3550185533843932, + "grad_norm": 7.200947809816378, + "learning_rate": 7.476560661512429e-06, + "loss": 17.6936, + "step": 19422 + }, + { + "epoch": 0.35503683258083973, + "grad_norm": 7.9194955302842756, + "learning_rate": 7.476303507485371e-06, + "loss": 17.7159, + "step": 19423 + }, + { + "epoch": 0.35505511177728627, + "grad_norm": 5.766785049922964, + "learning_rate": 7.4760463447790844e-06, + "loss": 17.3214, + "step": 19424 + }, + { + "epoch": 0.3550733909737328, + "grad_norm": 7.358756659132915, + "learning_rate": 7.47578917339447e-06, + "loss": 18.048, + "step": 19425 + }, + { + "epoch": 0.35509167017017934, + "grad_norm": 7.4608818189334, + "learning_rate": 7.475531993332429e-06, + "loss": 17.7924, + "step": 19426 + }, + { + "epoch": 0.3551099493666258, + "grad_norm": 6.007732917341111, + "learning_rate": 7.475274804593864e-06, + "loss": 17.2599, + "step": 19427 + }, + { + "epoch": 0.35512822856307236, + "grad_norm": 7.804206020385914, + "learning_rate": 7.475017607179676e-06, + "loss": 18.2085, + "step": 19428 + }, + { + "epoch": 0.3551465077595189, + "grad_norm": 6.8839181908048666, + "learning_rate": 7.474760401090764e-06, + "loss": 17.9312, + "step": 19429 + }, + { + "epoch": 0.35516478695596543, + "grad_norm": 7.304411432808902, + "learning_rate": 7.474503186328033e-06, + "loss": 17.8743, + "step": 19430 + }, + { + "epoch": 0.35518306615241196, + "grad_norm": 7.846221445858971, + "learning_rate": 7.474245962892382e-06, + "loss": 18.1887, + "step": 19431 + }, + { + "epoch": 0.35520134534885844, + "grad_norm": 6.601758236885788, + "learning_rate": 7.4739887307847145e-06, + "loss": 17.3935, + "step": 19432 + }, + { + "epoch": 0.355219624545305, + "grad_norm": 5.638614361581859, + "learning_rate": 7.473731490005931e-06, + "loss": 17.2594, + "step": 19433 + }, + { + "epoch": 0.3552379037417515, + "grad_norm": 8.1558423597002, + "learning_rate": 7.473474240556934e-06, + "loss": 18.1723, + "step": 19434 + }, + { + "epoch": 0.35525618293819805, + "grad_norm": 6.212145101632636, + "learning_rate": 7.473216982438624e-06, + "loss": 17.3857, + "step": 19435 + }, + { + "epoch": 0.3552744621346446, + "grad_norm": 6.131519220006103, + "learning_rate": 7.472959715651902e-06, + "loss": 17.4338, + "step": 19436 + }, + { + "epoch": 0.35529274133109107, + "grad_norm": 5.871994721306829, + "learning_rate": 7.472702440197672e-06, + "loss": 17.0883, + "step": 19437 + }, + { + "epoch": 0.3553110205275376, + "grad_norm": 6.403552947923738, + "learning_rate": 7.472445156076834e-06, + "loss": 17.5859, + "step": 19438 + }, + { + "epoch": 0.35532929972398414, + "grad_norm": 8.193769561087173, + "learning_rate": 7.47218786329029e-06, + "loss": 18.3522, + "step": 19439 + }, + { + "epoch": 0.3553475789204307, + "grad_norm": 5.705550408018775, + "learning_rate": 7.471930561838943e-06, + "loss": 17.3153, + "step": 19440 + }, + { + "epoch": 0.3553658581168772, + "grad_norm": 5.943444147922853, + "learning_rate": 7.471673251723694e-06, + "loss": 17.3869, + "step": 19441 + }, + { + "epoch": 0.3553841373133237, + "grad_norm": 6.622891918132139, + "learning_rate": 7.471415932945443e-06, + "loss": 17.8957, + "step": 19442 + }, + { + "epoch": 0.3554024165097702, + "grad_norm": 7.742108703211141, + "learning_rate": 7.4711586055050944e-06, + "loss": 18.1735, + "step": 19443 + }, + { + "epoch": 0.35542069570621676, + "grad_norm": 5.842591222976607, + "learning_rate": 7.470901269403551e-06, + "loss": 17.3396, + "step": 19444 + }, + { + "epoch": 0.3554389749026633, + "grad_norm": 5.9068511967150705, + "learning_rate": 7.470643924641712e-06, + "loss": 17.2643, + "step": 19445 + }, + { + "epoch": 0.3554572540991098, + "grad_norm": 6.018712598958013, + "learning_rate": 7.47038657122048e-06, + "loss": 17.3815, + "step": 19446 + }, + { + "epoch": 0.3554755332955563, + "grad_norm": 7.499780665216952, + "learning_rate": 7.470129209140756e-06, + "loss": 18.296, + "step": 19447 + }, + { + "epoch": 0.35549381249200285, + "grad_norm": 9.515142214114233, + "learning_rate": 7.469871838403446e-06, + "loss": 17.596, + "step": 19448 + }, + { + "epoch": 0.3555120916884494, + "grad_norm": 5.112666074246406, + "learning_rate": 7.469614459009449e-06, + "loss": 16.9713, + "step": 19449 + }, + { + "epoch": 0.3555303708848959, + "grad_norm": 7.224288794638123, + "learning_rate": 7.469357070959667e-06, + "loss": 17.7355, + "step": 19450 + }, + { + "epoch": 0.3555486500813424, + "grad_norm": 7.842872194239781, + "learning_rate": 7.469099674255002e-06, + "loss": 17.8648, + "step": 19451 + }, + { + "epoch": 0.35556692927778893, + "grad_norm": 6.537318214223713, + "learning_rate": 7.468842268896359e-06, + "loss": 17.4829, + "step": 19452 + }, + { + "epoch": 0.35558520847423547, + "grad_norm": 5.624756200968691, + "learning_rate": 7.468584854884636e-06, + "loss": 17.1853, + "step": 19453 + }, + { + "epoch": 0.355603487670682, + "grad_norm": 6.1704492959040245, + "learning_rate": 7.468327432220739e-06, + "loss": 17.1741, + "step": 19454 + }, + { + "epoch": 0.35562176686712854, + "grad_norm": 5.6975740937579635, + "learning_rate": 7.468070000905568e-06, + "loss": 17.1753, + "step": 19455 + }, + { + "epoch": 0.355640046063575, + "grad_norm": 7.813950691810959, + "learning_rate": 7.467812560940025e-06, + "loss": 18.2112, + "step": 19456 + }, + { + "epoch": 0.35565832526002156, + "grad_norm": 5.335502955682161, + "learning_rate": 7.467555112325013e-06, + "loss": 17.0313, + "step": 19457 + }, + { + "epoch": 0.3556766044564681, + "grad_norm": 6.446712609176233, + "learning_rate": 7.467297655061437e-06, + "loss": 17.4101, + "step": 19458 + }, + { + "epoch": 0.35569488365291463, + "grad_norm": 6.1874619697485365, + "learning_rate": 7.467040189150194e-06, + "loss": 17.3346, + "step": 19459 + }, + { + "epoch": 0.35571316284936116, + "grad_norm": 6.018356849341548, + "learning_rate": 7.466782714592191e-06, + "loss": 17.0546, + "step": 19460 + }, + { + "epoch": 0.35573144204580764, + "grad_norm": 5.294735414932873, + "learning_rate": 7.466525231388327e-06, + "loss": 17.1645, + "step": 19461 + }, + { + "epoch": 0.3557497212422542, + "grad_norm": 6.469328490918808, + "learning_rate": 7.4662677395395074e-06, + "loss": 17.5357, + "step": 19462 + }, + { + "epoch": 0.3557680004387007, + "grad_norm": 6.282228377007154, + "learning_rate": 7.466010239046632e-06, + "loss": 17.6503, + "step": 19463 + }, + { + "epoch": 0.35578627963514725, + "grad_norm": 6.608266845938125, + "learning_rate": 7.465752729910607e-06, + "loss": 17.4305, + "step": 19464 + }, + { + "epoch": 0.3558045588315938, + "grad_norm": 6.311324031626701, + "learning_rate": 7.465495212132331e-06, + "loss": 17.7267, + "step": 19465 + }, + { + "epoch": 0.35582283802804027, + "grad_norm": 6.094071624844103, + "learning_rate": 7.465237685712708e-06, + "loss": 17.3744, + "step": 19466 + }, + { + "epoch": 0.3558411172244868, + "grad_norm": 7.621146830460536, + "learning_rate": 7.464980150652642e-06, + "loss": 17.8176, + "step": 19467 + }, + { + "epoch": 0.35585939642093334, + "grad_norm": 7.217791900838364, + "learning_rate": 7.464722606953034e-06, + "loss": 18.1047, + "step": 19468 + }, + { + "epoch": 0.3558776756173799, + "grad_norm": 6.719686606133383, + "learning_rate": 7.4644650546147875e-06, + "loss": 17.65, + "step": 19469 + }, + { + "epoch": 0.3558959548138264, + "grad_norm": 5.859634824966476, + "learning_rate": 7.464207493638803e-06, + "loss": 17.4257, + "step": 19470 + }, + { + "epoch": 0.3559142340102729, + "grad_norm": 7.003694273833317, + "learning_rate": 7.463949924025987e-06, + "loss": 17.9359, + "step": 19471 + }, + { + "epoch": 0.3559325132067194, + "grad_norm": 6.715987847591053, + "learning_rate": 7.463692345777241e-06, + "loss": 17.4042, + "step": 19472 + }, + { + "epoch": 0.35595079240316596, + "grad_norm": 6.6440791916900235, + "learning_rate": 7.463434758893465e-06, + "loss": 17.2459, + "step": 19473 + }, + { + "epoch": 0.3559690715996125, + "grad_norm": 6.703365635526031, + "learning_rate": 7.4631771633755645e-06, + "loss": 17.6477, + "step": 19474 + }, + { + "epoch": 0.35598735079605903, + "grad_norm": 7.42266547463853, + "learning_rate": 7.462919559224442e-06, + "loss": 18.0435, + "step": 19475 + }, + { + "epoch": 0.3560056299925055, + "grad_norm": 6.53594389417586, + "learning_rate": 7.462661946441001e-06, + "loss": 17.4178, + "step": 19476 + }, + { + "epoch": 0.35602390918895205, + "grad_norm": 5.858822877464721, + "learning_rate": 7.462404325026142e-06, + "loss": 17.5212, + "step": 19477 + }, + { + "epoch": 0.3560421883853986, + "grad_norm": 6.326782648560232, + "learning_rate": 7.46214669498077e-06, + "loss": 17.1662, + "step": 19478 + }, + { + "epoch": 0.3560604675818451, + "grad_norm": 7.593968420428665, + "learning_rate": 7.461889056305789e-06, + "loss": 17.7736, + "step": 19479 + }, + { + "epoch": 0.3560787467782916, + "grad_norm": 5.912228012846317, + "learning_rate": 7.4616314090020995e-06, + "loss": 17.0636, + "step": 19480 + }, + { + "epoch": 0.35609702597473814, + "grad_norm": 6.369144309951476, + "learning_rate": 7.461373753070605e-06, + "loss": 17.5882, + "step": 19481 + }, + { + "epoch": 0.35611530517118467, + "grad_norm": 8.272102536758139, + "learning_rate": 7.4611160885122105e-06, + "loss": 17.848, + "step": 19482 + }, + { + "epoch": 0.3561335843676312, + "grad_norm": 6.165283653648815, + "learning_rate": 7.460858415327815e-06, + "loss": 17.2829, + "step": 19483 + }, + { + "epoch": 0.35615186356407774, + "grad_norm": 6.53035454580862, + "learning_rate": 7.460600733518326e-06, + "loss": 17.4869, + "step": 19484 + }, + { + "epoch": 0.3561701427605242, + "grad_norm": 6.209285409291938, + "learning_rate": 7.460343043084645e-06, + "loss": 17.4252, + "step": 19485 + }, + { + "epoch": 0.35618842195697076, + "grad_norm": 6.619422600433882, + "learning_rate": 7.460085344027675e-06, + "loss": 17.3904, + "step": 19486 + }, + { + "epoch": 0.3562067011534173, + "grad_norm": 6.954155104690954, + "learning_rate": 7.45982763634832e-06, + "loss": 17.5735, + "step": 19487 + }, + { + "epoch": 0.35622498034986383, + "grad_norm": 8.274983654871239, + "learning_rate": 7.45956992004748e-06, + "loss": 18.6415, + "step": 19488 + }, + { + "epoch": 0.35624325954631036, + "grad_norm": 6.702428815338919, + "learning_rate": 7.459312195126064e-06, + "loss": 17.6304, + "step": 19489 + }, + { + "epoch": 0.35626153874275684, + "grad_norm": 5.848455807410714, + "learning_rate": 7.459054461584971e-06, + "loss": 17.2761, + "step": 19490 + }, + { + "epoch": 0.3562798179392034, + "grad_norm": 6.183570962589923, + "learning_rate": 7.458796719425106e-06, + "loss": 17.489, + "step": 19491 + }, + { + "epoch": 0.3562980971356499, + "grad_norm": 6.0344839279626195, + "learning_rate": 7.458538968647371e-06, + "loss": 17.5043, + "step": 19492 + }, + { + "epoch": 0.35631637633209645, + "grad_norm": 7.7679328374348735, + "learning_rate": 7.45828120925267e-06, + "loss": 18.2713, + "step": 19493 + }, + { + "epoch": 0.356334655528543, + "grad_norm": 7.188328960680923, + "learning_rate": 7.458023441241907e-06, + "loss": 17.872, + "step": 19494 + }, + { + "epoch": 0.35635293472498947, + "grad_norm": 6.708241420470331, + "learning_rate": 7.457765664615986e-06, + "loss": 17.8736, + "step": 19495 + }, + { + "epoch": 0.356371213921436, + "grad_norm": 5.880791325612288, + "learning_rate": 7.4575078793758095e-06, + "loss": 17.4544, + "step": 19496 + }, + { + "epoch": 0.35638949311788254, + "grad_norm": 7.228093828812865, + "learning_rate": 7.4572500855222795e-06, + "loss": 17.7935, + "step": 19497 + }, + { + "epoch": 0.3564077723143291, + "grad_norm": 7.389873482180641, + "learning_rate": 7.456992283056302e-06, + "loss": 17.7376, + "step": 19498 + }, + { + "epoch": 0.3564260515107756, + "grad_norm": 6.079701279278257, + "learning_rate": 7.456734471978782e-06, + "loss": 17.4537, + "step": 19499 + }, + { + "epoch": 0.3564443307072221, + "grad_norm": 6.49756636901589, + "learning_rate": 7.456476652290619e-06, + "loss": 17.8027, + "step": 19500 + }, + { + "epoch": 0.3564626099036686, + "grad_norm": 8.343409896559569, + "learning_rate": 7.456218823992718e-06, + "loss": 18.3613, + "step": 19501 + }, + { + "epoch": 0.35648088910011516, + "grad_norm": 7.275233240696608, + "learning_rate": 7.455960987085982e-06, + "loss": 18.1414, + "step": 19502 + }, + { + "epoch": 0.3564991682965617, + "grad_norm": 5.746313584875896, + "learning_rate": 7.4557031415713185e-06, + "loss": 17.3823, + "step": 19503 + }, + { + "epoch": 0.35651744749300823, + "grad_norm": 7.209214987742282, + "learning_rate": 7.455445287449627e-06, + "loss": 17.9968, + "step": 19504 + }, + { + "epoch": 0.3565357266894547, + "grad_norm": 5.615786379902083, + "learning_rate": 7.455187424721814e-06, + "loss": 17.1986, + "step": 19505 + }, + { + "epoch": 0.35655400588590125, + "grad_norm": 6.919847812923979, + "learning_rate": 7.454929553388781e-06, + "loss": 17.5477, + "step": 19506 + }, + { + "epoch": 0.3565722850823478, + "grad_norm": 6.458199993844874, + "learning_rate": 7.454671673451434e-06, + "loss": 17.3889, + "step": 19507 + }, + { + "epoch": 0.3565905642787943, + "grad_norm": 5.492010412363909, + "learning_rate": 7.454413784910675e-06, + "loss": 17.189, + "step": 19508 + }, + { + "epoch": 0.35660884347524086, + "grad_norm": 5.620456516038945, + "learning_rate": 7.454155887767409e-06, + "loss": 17.3132, + "step": 19509 + }, + { + "epoch": 0.35662712267168734, + "grad_norm": 7.333231458293356, + "learning_rate": 7.453897982022539e-06, + "loss": 17.9381, + "step": 19510 + }, + { + "epoch": 0.35664540186813387, + "grad_norm": 5.895555401955035, + "learning_rate": 7.453640067676971e-06, + "loss": 17.6563, + "step": 19511 + }, + { + "epoch": 0.3566636810645804, + "grad_norm": 6.095911156600389, + "learning_rate": 7.4533821447316045e-06, + "loss": 17.2515, + "step": 19512 + }, + { + "epoch": 0.35668196026102694, + "grad_norm": 5.91007587085715, + "learning_rate": 7.4531242131873505e-06, + "loss": 17.2879, + "step": 19513 + }, + { + "epoch": 0.3567002394574734, + "grad_norm": 7.874624189571801, + "learning_rate": 7.452866273045106e-06, + "loss": 17.926, + "step": 19514 + }, + { + "epoch": 0.35671851865391996, + "grad_norm": 6.741908116251173, + "learning_rate": 7.452608324305779e-06, + "loss": 17.5684, + "step": 19515 + }, + { + "epoch": 0.3567367978503665, + "grad_norm": 5.734303574903024, + "learning_rate": 7.452350366970273e-06, + "loss": 16.9621, + "step": 19516 + }, + { + "epoch": 0.35675507704681303, + "grad_norm": 6.827994662104138, + "learning_rate": 7.452092401039491e-06, + "loss": 17.8539, + "step": 19517 + }, + { + "epoch": 0.35677335624325957, + "grad_norm": 5.554487343322766, + "learning_rate": 7.451834426514339e-06, + "loss": 17.2516, + "step": 19518 + }, + { + "epoch": 0.35679163543970605, + "grad_norm": 8.996661982365572, + "learning_rate": 7.4515764433957195e-06, + "loss": 18.6347, + "step": 19519 + }, + { + "epoch": 0.3568099146361526, + "grad_norm": 5.777592153915319, + "learning_rate": 7.4513184516845376e-06, + "loss": 17.1962, + "step": 19520 + }, + { + "epoch": 0.3568281938325991, + "grad_norm": 6.6244143733216205, + "learning_rate": 7.451060451381696e-06, + "loss": 17.6402, + "step": 19521 + }, + { + "epoch": 0.35684647302904565, + "grad_norm": 5.736463429171136, + "learning_rate": 7.450802442488101e-06, + "loss": 17.448, + "step": 19522 + }, + { + "epoch": 0.3568647522254922, + "grad_norm": 6.225464066752719, + "learning_rate": 7.450544425004657e-06, + "loss": 17.4283, + "step": 19523 + }, + { + "epoch": 0.35688303142193867, + "grad_norm": 5.557820621065601, + "learning_rate": 7.450286398932266e-06, + "loss": 17.2756, + "step": 19524 + }, + { + "epoch": 0.3569013106183852, + "grad_norm": 7.261448883357131, + "learning_rate": 7.450028364271834e-06, + "loss": 17.9538, + "step": 19525 + }, + { + "epoch": 0.35691958981483174, + "grad_norm": 7.418123239235833, + "learning_rate": 7.449770321024265e-06, + "loss": 17.8033, + "step": 19526 + }, + { + "epoch": 0.3569378690112783, + "grad_norm": 6.38774604313727, + "learning_rate": 7.449512269190466e-06, + "loss": 17.5972, + "step": 19527 + }, + { + "epoch": 0.3569561482077248, + "grad_norm": 7.360832041667508, + "learning_rate": 7.449254208771337e-06, + "loss": 18.2713, + "step": 19528 + }, + { + "epoch": 0.3569744274041713, + "grad_norm": 5.408830883075092, + "learning_rate": 7.4489961397677835e-06, + "loss": 17.1253, + "step": 19529 + }, + { + "epoch": 0.3569927066006178, + "grad_norm": 6.239119904201538, + "learning_rate": 7.4487380621807125e-06, + "loss": 17.3757, + "step": 19530 + }, + { + "epoch": 0.35701098579706436, + "grad_norm": 5.891247481157093, + "learning_rate": 7.4484799760110285e-06, + "loss": 17.1889, + "step": 19531 + }, + { + "epoch": 0.3570292649935109, + "grad_norm": 6.40253611384729, + "learning_rate": 7.448221881259633e-06, + "loss": 17.7897, + "step": 19532 + }, + { + "epoch": 0.35704754418995743, + "grad_norm": 6.509498408766673, + "learning_rate": 7.4479637779274315e-06, + "loss": 17.5238, + "step": 19533 + }, + { + "epoch": 0.3570658233864039, + "grad_norm": 5.435814259320647, + "learning_rate": 7.44770566601533e-06, + "loss": 16.9849, + "step": 19534 + }, + { + "epoch": 0.35708410258285045, + "grad_norm": 6.373180457230543, + "learning_rate": 7.447447545524234e-06, + "loss": 17.5635, + "step": 19535 + }, + { + "epoch": 0.357102381779297, + "grad_norm": 6.064176241219916, + "learning_rate": 7.447189416455045e-06, + "loss": 17.3874, + "step": 19536 + }, + { + "epoch": 0.3571206609757435, + "grad_norm": 7.94021393627077, + "learning_rate": 7.44693127880867e-06, + "loss": 18.064, + "step": 19537 + }, + { + "epoch": 0.35713894017219006, + "grad_norm": 6.717605353286808, + "learning_rate": 7.446673132586013e-06, + "loss": 17.7664, + "step": 19538 + }, + { + "epoch": 0.35715721936863654, + "grad_norm": 5.421782166907727, + "learning_rate": 7.446414977787979e-06, + "loss": 17.1765, + "step": 19539 + }, + { + "epoch": 0.35717549856508307, + "grad_norm": 6.563266448417799, + "learning_rate": 7.446156814415472e-06, + "loss": 17.6065, + "step": 19540 + }, + { + "epoch": 0.3571937777615296, + "grad_norm": 5.986476597498721, + "learning_rate": 7.4458986424694e-06, + "loss": 17.385, + "step": 19541 + }, + { + "epoch": 0.35721205695797614, + "grad_norm": 6.529516511890925, + "learning_rate": 7.445640461950664e-06, + "loss": 17.4231, + "step": 19542 + }, + { + "epoch": 0.3572303361544227, + "grad_norm": 7.215770537126524, + "learning_rate": 7.4453822728601695e-06, + "loss": 17.7901, + "step": 19543 + }, + { + "epoch": 0.35724861535086916, + "grad_norm": 6.153997687868142, + "learning_rate": 7.445124075198824e-06, + "loss": 17.3847, + "step": 19544 + }, + { + "epoch": 0.3572668945473157, + "grad_norm": 6.406699556092609, + "learning_rate": 7.44486586896753e-06, + "loss": 17.5249, + "step": 19545 + }, + { + "epoch": 0.35728517374376223, + "grad_norm": 5.122915638185464, + "learning_rate": 7.444607654167194e-06, + "loss": 17.0643, + "step": 19546 + }, + { + "epoch": 0.35730345294020877, + "grad_norm": 6.14367321722611, + "learning_rate": 7.444349430798718e-06, + "loss": 17.3365, + "step": 19547 + }, + { + "epoch": 0.35732173213665525, + "grad_norm": 6.8760673202196125, + "learning_rate": 7.444091198863012e-06, + "loss": 17.8723, + "step": 19548 + }, + { + "epoch": 0.3573400113331018, + "grad_norm": 9.740963472137814, + "learning_rate": 7.4438329583609785e-06, + "loss": 19.0003, + "step": 19549 + }, + { + "epoch": 0.3573582905295483, + "grad_norm": 6.189180487315558, + "learning_rate": 7.443574709293522e-06, + "loss": 17.4863, + "step": 19550 + }, + { + "epoch": 0.35737656972599485, + "grad_norm": 7.1232218886265555, + "learning_rate": 7.443316451661546e-06, + "loss": 17.9244, + "step": 19551 + }, + { + "epoch": 0.3573948489224414, + "grad_norm": 6.644580298661404, + "learning_rate": 7.4430581854659615e-06, + "loss": 17.6901, + "step": 19552 + }, + { + "epoch": 0.35741312811888787, + "grad_norm": 7.33589492496623, + "learning_rate": 7.442799910707667e-06, + "loss": 17.77, + "step": 19553 + }, + { + "epoch": 0.3574314073153344, + "grad_norm": 7.316198263494221, + "learning_rate": 7.442541627387572e-06, + "loss": 18.1297, + "step": 19554 + }, + { + "epoch": 0.35744968651178094, + "grad_norm": 7.166057386822801, + "learning_rate": 7.442283335506582e-06, + "loss": 17.8345, + "step": 19555 + }, + { + "epoch": 0.3574679657082275, + "grad_norm": 6.646128805745733, + "learning_rate": 7.442025035065598e-06, + "loss": 17.4866, + "step": 19556 + }, + { + "epoch": 0.357486244904674, + "grad_norm": 6.057776358116597, + "learning_rate": 7.441766726065529e-06, + "loss": 17.4085, + "step": 19557 + }, + { + "epoch": 0.3575045241011205, + "grad_norm": 6.231824837767921, + "learning_rate": 7.441508408507281e-06, + "loss": 17.4139, + "step": 19558 + }, + { + "epoch": 0.357522803297567, + "grad_norm": 6.982239309763197, + "learning_rate": 7.441250082391756e-06, + "loss": 18.093, + "step": 19559 + }, + { + "epoch": 0.35754108249401356, + "grad_norm": 7.365303753528583, + "learning_rate": 7.440991747719863e-06, + "loss": 17.7381, + "step": 19560 + }, + { + "epoch": 0.3575593616904601, + "grad_norm": 6.102411056837014, + "learning_rate": 7.440733404492504e-06, + "loss": 17.3601, + "step": 19561 + }, + { + "epoch": 0.35757764088690663, + "grad_norm": 8.092326201907957, + "learning_rate": 7.4404750527105885e-06, + "loss": 18.417, + "step": 19562 + }, + { + "epoch": 0.3575959200833531, + "grad_norm": 5.923153029325546, + "learning_rate": 7.440216692375017e-06, + "loss": 17.0967, + "step": 19563 + }, + { + "epoch": 0.35761419927979965, + "grad_norm": 6.1980079939452795, + "learning_rate": 7.4399583234867005e-06, + "loss": 17.4764, + "step": 19564 + }, + { + "epoch": 0.3576324784762462, + "grad_norm": 6.10555734087733, + "learning_rate": 7.43969994604654e-06, + "loss": 17.4802, + "step": 19565 + }, + { + "epoch": 0.3576507576726927, + "grad_norm": 5.356375823423826, + "learning_rate": 7.439441560055443e-06, + "loss": 17.0387, + "step": 19566 + }, + { + "epoch": 0.35766903686913926, + "grad_norm": 5.8036224360158135, + "learning_rate": 7.4391831655143155e-06, + "loss": 17.2135, + "step": 19567 + }, + { + "epoch": 0.35768731606558574, + "grad_norm": 6.727786201025457, + "learning_rate": 7.4389247624240635e-06, + "loss": 17.4191, + "step": 19568 + }, + { + "epoch": 0.35770559526203227, + "grad_norm": 6.942808237998604, + "learning_rate": 7.43866635078559e-06, + "loss": 17.7536, + "step": 19569 + }, + { + "epoch": 0.3577238744584788, + "grad_norm": 7.271246304521421, + "learning_rate": 7.438407930599802e-06, + "loss": 17.9878, + "step": 19570 + }, + { + "epoch": 0.35774215365492534, + "grad_norm": 7.343102527902273, + "learning_rate": 7.438149501867609e-06, + "loss": 17.6091, + "step": 19571 + }, + { + "epoch": 0.3577604328513719, + "grad_norm": 7.014358727902585, + "learning_rate": 7.437891064589912e-06, + "loss": 17.4978, + "step": 19572 + }, + { + "epoch": 0.35777871204781836, + "grad_norm": 6.316023574272729, + "learning_rate": 7.437632618767619e-06, + "loss": 17.6849, + "step": 19573 + }, + { + "epoch": 0.3577969912442649, + "grad_norm": 7.1173021736225, + "learning_rate": 7.437374164401632e-06, + "loss": 18.0485, + "step": 19574 + }, + { + "epoch": 0.35781527044071143, + "grad_norm": 6.922034267789044, + "learning_rate": 7.437115701492863e-06, + "loss": 17.6598, + "step": 19575 + }, + { + "epoch": 0.35783354963715797, + "grad_norm": 6.941606087333205, + "learning_rate": 7.436857230042215e-06, + "loss": 17.7257, + "step": 19576 + }, + { + "epoch": 0.3578518288336045, + "grad_norm": 5.806717517611175, + "learning_rate": 7.436598750050593e-06, + "loss": 17.2348, + "step": 19577 + }, + { + "epoch": 0.357870108030051, + "grad_norm": 7.204718588209834, + "learning_rate": 7.436340261518904e-06, + "loss": 17.6794, + "step": 19578 + }, + { + "epoch": 0.3578883872264975, + "grad_norm": 5.228357199531201, + "learning_rate": 7.436081764448054e-06, + "loss": 16.9076, + "step": 19579 + }, + { + "epoch": 0.35790666642294405, + "grad_norm": 8.135319780959174, + "learning_rate": 7.4358232588389475e-06, + "loss": 18.3858, + "step": 19580 + }, + { + "epoch": 0.3579249456193906, + "grad_norm": 6.395997239594845, + "learning_rate": 7.435564744692494e-06, + "loss": 17.4644, + "step": 19581 + }, + { + "epoch": 0.35794322481583707, + "grad_norm": 6.621546714164927, + "learning_rate": 7.435306222009597e-06, + "loss": 17.5496, + "step": 19582 + }, + { + "epoch": 0.3579615040122836, + "grad_norm": 7.346892370167702, + "learning_rate": 7.435047690791162e-06, + "loss": 17.5626, + "step": 19583 + }, + { + "epoch": 0.35797978320873014, + "grad_norm": 5.783952019402652, + "learning_rate": 7.434789151038097e-06, + "loss": 17.3614, + "step": 19584 + }, + { + "epoch": 0.3579980624051767, + "grad_norm": 5.760549772703515, + "learning_rate": 7.434530602751307e-06, + "loss": 17.2567, + "step": 19585 + }, + { + "epoch": 0.3580163416016232, + "grad_norm": 6.478047570788055, + "learning_rate": 7.434272045931698e-06, + "loss": 17.4649, + "step": 19586 + }, + { + "epoch": 0.3580346207980697, + "grad_norm": 6.221514341852532, + "learning_rate": 7.434013480580178e-06, + "loss": 17.2979, + "step": 19587 + }, + { + "epoch": 0.3580528999945162, + "grad_norm": 8.038249083522063, + "learning_rate": 7.43375490669765e-06, + "loss": 18.2009, + "step": 19588 + }, + { + "epoch": 0.35807117919096276, + "grad_norm": 7.153169603424881, + "learning_rate": 7.433496324285023e-06, + "loss": 17.8616, + "step": 19589 + }, + { + "epoch": 0.3580894583874093, + "grad_norm": 6.140821138713422, + "learning_rate": 7.433237733343204e-06, + "loss": 17.2862, + "step": 19590 + }, + { + "epoch": 0.35810773758385583, + "grad_norm": 6.303164117025946, + "learning_rate": 7.432979133873096e-06, + "loss": 17.1239, + "step": 19591 + }, + { + "epoch": 0.3581260167803023, + "grad_norm": 7.40032118331771, + "learning_rate": 7.432720525875608e-06, + "loss": 17.7626, + "step": 19592 + }, + { + "epoch": 0.35814429597674885, + "grad_norm": 5.211355925358332, + "learning_rate": 7.432461909351646e-06, + "loss": 17.0628, + "step": 19593 + }, + { + "epoch": 0.3581625751731954, + "grad_norm": 7.288194341296274, + "learning_rate": 7.432203284302115e-06, + "loss": 18.2211, + "step": 19594 + }, + { + "epoch": 0.3581808543696419, + "grad_norm": 7.43978040314632, + "learning_rate": 7.431944650727924e-06, + "loss": 17.9704, + "step": 19595 + }, + { + "epoch": 0.35819913356608846, + "grad_norm": 8.187597270207503, + "learning_rate": 7.431686008629978e-06, + "loss": 18.0001, + "step": 19596 + }, + { + "epoch": 0.35821741276253494, + "grad_norm": 7.247317235545807, + "learning_rate": 7.431427358009182e-06, + "loss": 17.9215, + "step": 19597 + }, + { + "epoch": 0.3582356919589815, + "grad_norm": 7.942615379347667, + "learning_rate": 7.431168698866444e-06, + "loss": 17.7566, + "step": 19598 + }, + { + "epoch": 0.358253971155428, + "grad_norm": 5.567173124767692, + "learning_rate": 7.430910031202673e-06, + "loss": 17.1107, + "step": 19599 + }, + { + "epoch": 0.35827225035187454, + "grad_norm": 6.038390410749337, + "learning_rate": 7.430651355018772e-06, + "loss": 17.3829, + "step": 19600 + }, + { + "epoch": 0.3582905295483211, + "grad_norm": 6.185621499809661, + "learning_rate": 7.4303926703156484e-06, + "loss": 17.3401, + "step": 19601 + }, + { + "epoch": 0.35830880874476756, + "grad_norm": 6.998107385390362, + "learning_rate": 7.43013397709421e-06, + "loss": 17.6478, + "step": 19602 + }, + { + "epoch": 0.3583270879412141, + "grad_norm": 6.423917792766361, + "learning_rate": 7.429875275355364e-06, + "loss": 17.5313, + "step": 19603 + }, + { + "epoch": 0.35834536713766063, + "grad_norm": 6.190849407069878, + "learning_rate": 7.429616565100014e-06, + "loss": 17.6826, + "step": 19604 + }, + { + "epoch": 0.35836364633410717, + "grad_norm": 6.864104331432642, + "learning_rate": 7.429357846329069e-06, + "loss": 17.4815, + "step": 19605 + }, + { + "epoch": 0.3583819255305537, + "grad_norm": 5.633235072213729, + "learning_rate": 7.429099119043437e-06, + "loss": 17.101, + "step": 19606 + }, + { + "epoch": 0.3584002047270002, + "grad_norm": 7.173497231752361, + "learning_rate": 7.428840383244023e-06, + "loss": 17.5816, + "step": 19607 + }, + { + "epoch": 0.3584184839234467, + "grad_norm": 6.824575995155842, + "learning_rate": 7.428581638931734e-06, + "loss": 17.7751, + "step": 19608 + }, + { + "epoch": 0.35843676311989325, + "grad_norm": 7.294082566289569, + "learning_rate": 7.428322886107475e-06, + "loss": 17.9937, + "step": 19609 + }, + { + "epoch": 0.3584550423163398, + "grad_norm": 6.336024020206503, + "learning_rate": 7.428064124772158e-06, + "loss": 17.5486, + "step": 19610 + }, + { + "epoch": 0.3584733215127863, + "grad_norm": 6.8384419425680045, + "learning_rate": 7.4278053549266845e-06, + "loss": 17.8061, + "step": 19611 + }, + { + "epoch": 0.3584916007092328, + "grad_norm": 6.8563408704511835, + "learning_rate": 7.427546576571966e-06, + "loss": 17.8507, + "step": 19612 + }, + { + "epoch": 0.35850987990567934, + "grad_norm": 5.672177529612705, + "learning_rate": 7.427287789708907e-06, + "loss": 17.2885, + "step": 19613 + }, + { + "epoch": 0.3585281591021259, + "grad_norm": 6.383419232609466, + "learning_rate": 7.427028994338414e-06, + "loss": 17.3135, + "step": 19614 + }, + { + "epoch": 0.3585464382985724, + "grad_norm": 7.179162938060904, + "learning_rate": 7.426770190461394e-06, + "loss": 17.9906, + "step": 19615 + }, + { + "epoch": 0.3585647174950189, + "grad_norm": 5.885327218747135, + "learning_rate": 7.4265113780787575e-06, + "loss": 17.1926, + "step": 19616 + }, + { + "epoch": 0.3585829966914654, + "grad_norm": 6.841343611008343, + "learning_rate": 7.426252557191409e-06, + "loss": 17.5999, + "step": 19617 + }, + { + "epoch": 0.35860127588791196, + "grad_norm": 5.706515078920045, + "learning_rate": 7.425993727800255e-06, + "loss": 17.1247, + "step": 19618 + }, + { + "epoch": 0.3586195550843585, + "grad_norm": 5.059189919687213, + "learning_rate": 7.425734889906203e-06, + "loss": 16.9361, + "step": 19619 + }, + { + "epoch": 0.35863783428080503, + "grad_norm": 5.500086691594506, + "learning_rate": 7.425476043510161e-06, + "loss": 17.0845, + "step": 19620 + }, + { + "epoch": 0.3586561134772515, + "grad_norm": 6.27450390437792, + "learning_rate": 7.4252171886130365e-06, + "loss": 17.4172, + "step": 19621 + }, + { + "epoch": 0.35867439267369805, + "grad_norm": 5.902742990740609, + "learning_rate": 7.424958325215736e-06, + "loss": 17.596, + "step": 19622 + }, + { + "epoch": 0.3586926718701446, + "grad_norm": 6.644123805087707, + "learning_rate": 7.424699453319166e-06, + "loss": 17.4616, + "step": 19623 + }, + { + "epoch": 0.3587109510665911, + "grad_norm": 7.793181924074182, + "learning_rate": 7.424440572924236e-06, + "loss": 17.9932, + "step": 19624 + }, + { + "epoch": 0.35872923026303766, + "grad_norm": 9.1000473577842, + "learning_rate": 7.424181684031853e-06, + "loss": 18.4022, + "step": 19625 + }, + { + "epoch": 0.35874750945948414, + "grad_norm": 6.450341745546176, + "learning_rate": 7.423922786642922e-06, + "loss": 17.4903, + "step": 19626 + }, + { + "epoch": 0.3587657886559307, + "grad_norm": 5.945848969936562, + "learning_rate": 7.423663880758354e-06, + "loss": 17.0628, + "step": 19627 + }, + { + "epoch": 0.3587840678523772, + "grad_norm": 5.344817447692296, + "learning_rate": 7.423404966379052e-06, + "loss": 17.0436, + "step": 19628 + }, + { + "epoch": 0.35880234704882374, + "grad_norm": 6.273733286302565, + "learning_rate": 7.4231460435059255e-06, + "loss": 17.3068, + "step": 19629 + }, + { + "epoch": 0.3588206262452703, + "grad_norm": 6.762580780150825, + "learning_rate": 7.422887112139884e-06, + "loss": 17.5117, + "step": 19630 + }, + { + "epoch": 0.35883890544171676, + "grad_norm": 12.948570375745069, + "learning_rate": 7.422628172281834e-06, + "loss": 18.4008, + "step": 19631 + }, + { + "epoch": 0.3588571846381633, + "grad_norm": 7.659120306546429, + "learning_rate": 7.422369223932682e-06, + "loss": 18.1467, + "step": 19632 + }, + { + "epoch": 0.35887546383460983, + "grad_norm": 6.1455289098079176, + "learning_rate": 7.422110267093334e-06, + "loss": 17.3468, + "step": 19633 + }, + { + "epoch": 0.35889374303105637, + "grad_norm": 7.165817662349481, + "learning_rate": 7.421851301764702e-06, + "loss": 17.7761, + "step": 19634 + }, + { + "epoch": 0.3589120222275029, + "grad_norm": 6.38528561482254, + "learning_rate": 7.421592327947691e-06, + "loss": 17.6262, + "step": 19635 + }, + { + "epoch": 0.3589303014239494, + "grad_norm": 8.56793241169269, + "learning_rate": 7.421333345643208e-06, + "loss": 17.9467, + "step": 19636 + }, + { + "epoch": 0.3589485806203959, + "grad_norm": 6.218560703794342, + "learning_rate": 7.4210743548521625e-06, + "loss": 17.5763, + "step": 19637 + }, + { + "epoch": 0.35896685981684245, + "grad_norm": 7.398152881594439, + "learning_rate": 7.4208153555754615e-06, + "loss": 17.7664, + "step": 19638 + }, + { + "epoch": 0.358985139013289, + "grad_norm": 8.148209656499118, + "learning_rate": 7.420556347814012e-06, + "loss": 18.0681, + "step": 19639 + }, + { + "epoch": 0.3590034182097355, + "grad_norm": 5.627743029720429, + "learning_rate": 7.420297331568723e-06, + "loss": 17.3774, + "step": 19640 + }, + { + "epoch": 0.359021697406182, + "grad_norm": 7.478425081380414, + "learning_rate": 7.420038306840503e-06, + "loss": 18.0255, + "step": 19641 + }, + { + "epoch": 0.35903997660262854, + "grad_norm": 6.609427342782146, + "learning_rate": 7.419779273630256e-06, + "loss": 17.5987, + "step": 19642 + }, + { + "epoch": 0.3590582557990751, + "grad_norm": 7.193156920052047, + "learning_rate": 7.419520231938895e-06, + "loss": 17.4684, + "step": 19643 + }, + { + "epoch": 0.3590765349955216, + "grad_norm": 5.748449793544554, + "learning_rate": 7.419261181767326e-06, + "loss": 17.293, + "step": 19644 + }, + { + "epoch": 0.35909481419196815, + "grad_norm": 8.081670635816513, + "learning_rate": 7.419002123116456e-06, + "loss": 18.1125, + "step": 19645 + }, + { + "epoch": 0.35911309338841463, + "grad_norm": 5.235679311443473, + "learning_rate": 7.418743055987192e-06, + "loss": 16.8445, + "step": 19646 + }, + { + "epoch": 0.35913137258486116, + "grad_norm": 7.671628058202542, + "learning_rate": 7.418483980380444e-06, + "loss": 18.4453, + "step": 19647 + }, + { + "epoch": 0.3591496517813077, + "grad_norm": 5.769573530475302, + "learning_rate": 7.418224896297121e-06, + "loss": 17.2692, + "step": 19648 + }, + { + "epoch": 0.35916793097775424, + "grad_norm": 6.803533960951056, + "learning_rate": 7.417965803738127e-06, + "loss": 17.6479, + "step": 19649 + }, + { + "epoch": 0.3591862101742007, + "grad_norm": 6.928022572209373, + "learning_rate": 7.417706702704375e-06, + "loss": 17.8475, + "step": 19650 + }, + { + "epoch": 0.35920448937064725, + "grad_norm": 6.335909893000433, + "learning_rate": 7.4174475931967705e-06, + "loss": 17.6418, + "step": 19651 + }, + { + "epoch": 0.3592227685670938, + "grad_norm": 9.00018364645467, + "learning_rate": 7.417188475216222e-06, + "loss": 17.9307, + "step": 19652 + }, + { + "epoch": 0.3592410477635403, + "grad_norm": 5.782366549676033, + "learning_rate": 7.416929348763636e-06, + "loss": 17.0915, + "step": 19653 + }, + { + "epoch": 0.35925932695998686, + "grad_norm": 7.1325422603754, + "learning_rate": 7.416670213839924e-06, + "loss": 17.5685, + "step": 19654 + }, + { + "epoch": 0.35927760615643334, + "grad_norm": 6.932598907319226, + "learning_rate": 7.416411070445992e-06, + "loss": 17.8335, + "step": 19655 + }, + { + "epoch": 0.3592958853528799, + "grad_norm": 7.340339314323664, + "learning_rate": 7.416151918582748e-06, + "loss": 17.9561, + "step": 19656 + }, + { + "epoch": 0.3593141645493264, + "grad_norm": 7.09431420271154, + "learning_rate": 7.415892758251102e-06, + "loss": 17.8449, + "step": 19657 + }, + { + "epoch": 0.35933244374577294, + "grad_norm": 6.842800698365112, + "learning_rate": 7.415633589451963e-06, + "loss": 17.6761, + "step": 19658 + }, + { + "epoch": 0.3593507229422195, + "grad_norm": 6.4437469966055225, + "learning_rate": 7.4153744121862356e-06, + "loss": 17.432, + "step": 19659 + }, + { + "epoch": 0.35936900213866596, + "grad_norm": 6.043227159248423, + "learning_rate": 7.4151152264548325e-06, + "loss": 17.367, + "step": 19660 + }, + { + "epoch": 0.3593872813351125, + "grad_norm": 5.698312052736918, + "learning_rate": 7.414856032258657e-06, + "loss": 17.2209, + "step": 19661 + }, + { + "epoch": 0.35940556053155903, + "grad_norm": 5.7025338404511485, + "learning_rate": 7.414596829598624e-06, + "loss": 17.1254, + "step": 19662 + }, + { + "epoch": 0.35942383972800557, + "grad_norm": 7.415967652311182, + "learning_rate": 7.414337618475638e-06, + "loss": 17.956, + "step": 19663 + }, + { + "epoch": 0.3594421189244521, + "grad_norm": 7.879265535384837, + "learning_rate": 7.414078398890607e-06, + "loss": 18.2849, + "step": 19664 + }, + { + "epoch": 0.3594603981208986, + "grad_norm": 6.156486612932872, + "learning_rate": 7.413819170844441e-06, + "loss": 17.5195, + "step": 19665 + }, + { + "epoch": 0.3594786773173451, + "grad_norm": 6.833262405969314, + "learning_rate": 7.4135599343380485e-06, + "loss": 17.6161, + "step": 19666 + }, + { + "epoch": 0.35949695651379165, + "grad_norm": 5.941007952575729, + "learning_rate": 7.413300689372338e-06, + "loss": 17.3599, + "step": 19667 + }, + { + "epoch": 0.3595152357102382, + "grad_norm": 7.484804191904205, + "learning_rate": 7.413041435948218e-06, + "loss": 18.2024, + "step": 19668 + }, + { + "epoch": 0.3595335149066847, + "grad_norm": 5.704438021847672, + "learning_rate": 7.412782174066596e-06, + "loss": 17.3, + "step": 19669 + }, + { + "epoch": 0.3595517941031312, + "grad_norm": 6.185017125210529, + "learning_rate": 7.412522903728383e-06, + "loss": 17.2473, + "step": 19670 + }, + { + "epoch": 0.35957007329957774, + "grad_norm": 5.671060004639427, + "learning_rate": 7.412263624934486e-06, + "loss": 17.3157, + "step": 19671 + }, + { + "epoch": 0.3595883524960243, + "grad_norm": 6.884825009983396, + "learning_rate": 7.412004337685817e-06, + "loss": 17.9186, + "step": 19672 + }, + { + "epoch": 0.3596066316924708, + "grad_norm": 7.112803035888674, + "learning_rate": 7.411745041983279e-06, + "loss": 17.7299, + "step": 19673 + }, + { + "epoch": 0.35962491088891735, + "grad_norm": 7.537168009725052, + "learning_rate": 7.411485737827784e-06, + "loss": 17.6849, + "step": 19674 + }, + { + "epoch": 0.35964319008536383, + "grad_norm": 8.076727056397893, + "learning_rate": 7.411226425220241e-06, + "loss": 17.8502, + "step": 19675 + }, + { + "epoch": 0.35966146928181036, + "grad_norm": 7.422706306015047, + "learning_rate": 7.41096710416156e-06, + "loss": 17.7832, + "step": 19676 + }, + { + "epoch": 0.3596797484782569, + "grad_norm": 6.264827269575593, + "learning_rate": 7.410707774652648e-06, + "loss": 17.4166, + "step": 19677 + }, + { + "epoch": 0.35969802767470344, + "grad_norm": 6.39577954566566, + "learning_rate": 7.410448436694413e-06, + "loss": 17.6863, + "step": 19678 + }, + { + "epoch": 0.35971630687114997, + "grad_norm": 6.806574691474655, + "learning_rate": 7.410189090287767e-06, + "loss": 17.4876, + "step": 19679 + }, + { + "epoch": 0.35973458606759645, + "grad_norm": 5.805553498268519, + "learning_rate": 7.4099297354336165e-06, + "loss": 17.1755, + "step": 19680 + }, + { + "epoch": 0.359752865264043, + "grad_norm": 6.210606853116422, + "learning_rate": 7.4096703721328724e-06, + "loss": 17.4134, + "step": 19681 + }, + { + "epoch": 0.3597711444604895, + "grad_norm": 7.514337354941543, + "learning_rate": 7.4094110003864425e-06, + "loss": 18.1328, + "step": 19682 + }, + { + "epoch": 0.35978942365693606, + "grad_norm": 7.110195460046373, + "learning_rate": 7.409151620195234e-06, + "loss": 17.6775, + "step": 19683 + }, + { + "epoch": 0.35980770285338254, + "grad_norm": 6.409499865083177, + "learning_rate": 7.4088922315601605e-06, + "loss": 17.4388, + "step": 19684 + }, + { + "epoch": 0.3598259820498291, + "grad_norm": 6.406989787801191, + "learning_rate": 7.408632834482128e-06, + "loss": 17.4169, + "step": 19685 + }, + { + "epoch": 0.3598442612462756, + "grad_norm": 8.533000130720591, + "learning_rate": 7.408373428962048e-06, + "loss": 17.9988, + "step": 19686 + }, + { + "epoch": 0.35986254044272215, + "grad_norm": 7.258823821723709, + "learning_rate": 7.408114015000826e-06, + "loss": 18.2898, + "step": 19687 + }, + { + "epoch": 0.3598808196391687, + "grad_norm": 6.27707598990602, + "learning_rate": 7.407854592599373e-06, + "loss": 17.4093, + "step": 19688 + }, + { + "epoch": 0.35989909883561516, + "grad_norm": 6.565035756992462, + "learning_rate": 7.407595161758601e-06, + "loss": 17.8111, + "step": 19689 + }, + { + "epoch": 0.3599173780320617, + "grad_norm": 7.039274284803448, + "learning_rate": 7.407335722479415e-06, + "loss": 17.7455, + "step": 19690 + }, + { + "epoch": 0.35993565722850823, + "grad_norm": 6.696144712301817, + "learning_rate": 7.407076274762727e-06, + "loss": 17.4365, + "step": 19691 + }, + { + "epoch": 0.35995393642495477, + "grad_norm": 6.206007733573061, + "learning_rate": 7.406816818609445e-06, + "loss": 17.5794, + "step": 19692 + }, + { + "epoch": 0.3599722156214013, + "grad_norm": 8.493781781946268, + "learning_rate": 7.406557354020478e-06, + "loss": 18.0151, + "step": 19693 + }, + { + "epoch": 0.3599904948178478, + "grad_norm": 7.626843398616922, + "learning_rate": 7.406297880996738e-06, + "loss": 18.0236, + "step": 19694 + }, + { + "epoch": 0.3600087740142943, + "grad_norm": 6.942237055756598, + "learning_rate": 7.406038399539133e-06, + "loss": 17.555, + "step": 19695 + }, + { + "epoch": 0.36002705321074085, + "grad_norm": 6.615219864669865, + "learning_rate": 7.405778909648571e-06, + "loss": 17.548, + "step": 19696 + }, + { + "epoch": 0.3600453324071874, + "grad_norm": 6.089896410559852, + "learning_rate": 7.405519411325962e-06, + "loss": 17.6716, + "step": 19697 + }, + { + "epoch": 0.3600636116036339, + "grad_norm": 5.7815032990880715, + "learning_rate": 7.405259904572218e-06, + "loss": 17.4445, + "step": 19698 + }, + { + "epoch": 0.3600818908000804, + "grad_norm": 7.686329575038699, + "learning_rate": 7.405000389388246e-06, + "loss": 17.9146, + "step": 19699 + }, + { + "epoch": 0.36010016999652694, + "grad_norm": 9.216626247454519, + "learning_rate": 7.404740865774956e-06, + "loss": 18.9646, + "step": 19700 + }, + { + "epoch": 0.3601184491929735, + "grad_norm": 6.141974855088552, + "learning_rate": 7.404481333733258e-06, + "loss": 17.4394, + "step": 19701 + }, + { + "epoch": 0.36013672838942, + "grad_norm": 6.143262419367804, + "learning_rate": 7.404221793264062e-06, + "loss": 17.4181, + "step": 19702 + }, + { + "epoch": 0.36015500758586655, + "grad_norm": 5.082826702009226, + "learning_rate": 7.403962244368278e-06, + "loss": 16.8467, + "step": 19703 + }, + { + "epoch": 0.36017328678231303, + "grad_norm": 7.543973184689073, + "learning_rate": 7.403702687046813e-06, + "loss": 18.0566, + "step": 19704 + }, + { + "epoch": 0.36019156597875956, + "grad_norm": 6.4485019873172496, + "learning_rate": 7.40344312130058e-06, + "loss": 17.5813, + "step": 19705 + }, + { + "epoch": 0.3602098451752061, + "grad_norm": 6.611479357754433, + "learning_rate": 7.4031835471304865e-06, + "loss": 18.2218, + "step": 19706 + }, + { + "epoch": 0.36022812437165264, + "grad_norm": 6.891696190007479, + "learning_rate": 7.402923964537444e-06, + "loss": 17.6386, + "step": 19707 + }, + { + "epoch": 0.36024640356809917, + "grad_norm": 6.611510491468658, + "learning_rate": 7.4026643735223615e-06, + "loss": 17.9912, + "step": 19708 + }, + { + "epoch": 0.36026468276454565, + "grad_norm": 5.8347357933883925, + "learning_rate": 7.402404774086149e-06, + "loss": 17.4233, + "step": 19709 + }, + { + "epoch": 0.3602829619609922, + "grad_norm": 5.884081020716994, + "learning_rate": 7.402145166229715e-06, + "loss": 17.4612, + "step": 19710 + }, + { + "epoch": 0.3603012411574387, + "grad_norm": 4.938148253713487, + "learning_rate": 7.401885549953972e-06, + "loss": 16.6768, + "step": 19711 + }, + { + "epoch": 0.36031952035388526, + "grad_norm": 6.006304997933859, + "learning_rate": 7.401625925259828e-06, + "loss": 17.3801, + "step": 19712 + }, + { + "epoch": 0.3603377995503318, + "grad_norm": 5.800407938715674, + "learning_rate": 7.401366292148195e-06, + "loss": 17.4753, + "step": 19713 + }, + { + "epoch": 0.3603560787467783, + "grad_norm": 6.698856927273207, + "learning_rate": 7.40110665061998e-06, + "loss": 17.4803, + "step": 19714 + }, + { + "epoch": 0.3603743579432248, + "grad_norm": 5.772585813016475, + "learning_rate": 7.400847000676094e-06, + "loss": 17.2274, + "step": 19715 + }, + { + "epoch": 0.36039263713967135, + "grad_norm": 6.647914607446682, + "learning_rate": 7.400587342317448e-06, + "loss": 17.5091, + "step": 19716 + }, + { + "epoch": 0.3604109163361179, + "grad_norm": 8.63686879543823, + "learning_rate": 7.400327675544953e-06, + "loss": 18.5806, + "step": 19717 + }, + { + "epoch": 0.36042919553256436, + "grad_norm": 7.582196247257612, + "learning_rate": 7.400068000359517e-06, + "loss": 18.1169, + "step": 19718 + }, + { + "epoch": 0.3604474747290109, + "grad_norm": 6.340555182234325, + "learning_rate": 7.39980831676205e-06, + "loss": 17.6335, + "step": 19719 + }, + { + "epoch": 0.36046575392545743, + "grad_norm": 6.6482684547007675, + "learning_rate": 7.399548624753464e-06, + "loss": 17.6152, + "step": 19720 + }, + { + "epoch": 0.36048403312190397, + "grad_norm": 5.909967438297958, + "learning_rate": 7.399288924334669e-06, + "loss": 17.4626, + "step": 19721 + }, + { + "epoch": 0.3605023123183505, + "grad_norm": 6.005151272205654, + "learning_rate": 7.399029215506573e-06, + "loss": 17.4018, + "step": 19722 + }, + { + "epoch": 0.360520591514797, + "grad_norm": 7.12043830715943, + "learning_rate": 7.398769498270089e-06, + "loss": 17.5324, + "step": 19723 + }, + { + "epoch": 0.3605388707112435, + "grad_norm": 5.024518202423369, + "learning_rate": 7.398509772626125e-06, + "loss": 16.9543, + "step": 19724 + }, + { + "epoch": 0.36055714990769006, + "grad_norm": 6.777717028682303, + "learning_rate": 7.398250038575592e-06, + "loss": 17.6355, + "step": 19725 + }, + { + "epoch": 0.3605754291041366, + "grad_norm": 6.549358447587129, + "learning_rate": 7.397990296119402e-06, + "loss": 17.546, + "step": 19726 + }, + { + "epoch": 0.3605937083005831, + "grad_norm": 6.683853120093345, + "learning_rate": 7.397730545258465e-06, + "loss": 17.6049, + "step": 19727 + }, + { + "epoch": 0.3606119874970296, + "grad_norm": 6.851753428347234, + "learning_rate": 7.3974707859936875e-06, + "loss": 17.3464, + "step": 19728 + }, + { + "epoch": 0.36063026669347614, + "grad_norm": 4.364639334883509, + "learning_rate": 7.397211018325985e-06, + "loss": 16.718, + "step": 19729 + }, + { + "epoch": 0.3606485458899227, + "grad_norm": 7.298185805244727, + "learning_rate": 7.396951242256265e-06, + "loss": 18.0385, + "step": 19730 + }, + { + "epoch": 0.3606668250863692, + "grad_norm": 6.818757348557367, + "learning_rate": 7.396691457785441e-06, + "loss": 17.7227, + "step": 19731 + }, + { + "epoch": 0.36068510428281575, + "grad_norm": 6.243456883073716, + "learning_rate": 7.39643166491442e-06, + "loss": 17.4463, + "step": 19732 + }, + { + "epoch": 0.36070338347926223, + "grad_norm": 7.381302393938972, + "learning_rate": 7.396171863644112e-06, + "loss": 17.7082, + "step": 19733 + }, + { + "epoch": 0.36072166267570877, + "grad_norm": 7.6168563506880975, + "learning_rate": 7.395912053975432e-06, + "loss": 18.3822, + "step": 19734 + }, + { + "epoch": 0.3607399418721553, + "grad_norm": 7.06514164437329, + "learning_rate": 7.395652235909287e-06, + "loss": 17.9721, + "step": 19735 + }, + { + "epoch": 0.36075822106860184, + "grad_norm": 6.251879282186458, + "learning_rate": 7.39539240944659e-06, + "loss": 17.3321, + "step": 19736 + }, + { + "epoch": 0.36077650026504837, + "grad_norm": 8.067429593127484, + "learning_rate": 7.3951325745882495e-06, + "loss": 17.7748, + "step": 19737 + }, + { + "epoch": 0.36079477946149485, + "grad_norm": 6.340137944056408, + "learning_rate": 7.394872731335177e-06, + "loss": 17.0545, + "step": 19738 + }, + { + "epoch": 0.3608130586579414, + "grad_norm": 7.021389629665991, + "learning_rate": 7.394612879688285e-06, + "loss": 17.594, + "step": 19739 + }, + { + "epoch": 0.3608313378543879, + "grad_norm": 6.903716597008266, + "learning_rate": 7.3943530196484815e-06, + "loss": 17.6848, + "step": 19740 + }, + { + "epoch": 0.36084961705083446, + "grad_norm": 7.48213399992343, + "learning_rate": 7.394093151216679e-06, + "loss": 18.2631, + "step": 19741 + }, + { + "epoch": 0.360867896247281, + "grad_norm": 5.734938813390531, + "learning_rate": 7.393833274393786e-06, + "loss": 17.0842, + "step": 19742 + }, + { + "epoch": 0.3608861754437275, + "grad_norm": 6.20768880816114, + "learning_rate": 7.393573389180716e-06, + "loss": 17.5858, + "step": 19743 + }, + { + "epoch": 0.360904454640174, + "grad_norm": 6.778458834230044, + "learning_rate": 7.393313495578381e-06, + "loss": 17.7546, + "step": 19744 + }, + { + "epoch": 0.36092273383662055, + "grad_norm": 5.675230261919206, + "learning_rate": 7.393053593587688e-06, + "loss": 17.2096, + "step": 19745 + }, + { + "epoch": 0.3609410130330671, + "grad_norm": 5.775884309642294, + "learning_rate": 7.392793683209549e-06, + "loss": 17.075, + "step": 19746 + }, + { + "epoch": 0.3609592922295136, + "grad_norm": 6.958690337990445, + "learning_rate": 7.392533764444876e-06, + "loss": 17.7834, + "step": 19747 + }, + { + "epoch": 0.3609775714259601, + "grad_norm": 7.253467290962211, + "learning_rate": 7.392273837294581e-06, + "loss": 17.8554, + "step": 19748 + }, + { + "epoch": 0.36099585062240663, + "grad_norm": 6.391119217486749, + "learning_rate": 7.3920139017595735e-06, + "loss": 17.5906, + "step": 19749 + }, + { + "epoch": 0.36101412981885317, + "grad_norm": 7.451194477337203, + "learning_rate": 7.391753957840765e-06, + "loss": 17.5156, + "step": 19750 + }, + { + "epoch": 0.3610324090152997, + "grad_norm": 7.478802256143311, + "learning_rate": 7.391494005539066e-06, + "loss": 17.6013, + "step": 19751 + }, + { + "epoch": 0.3610506882117462, + "grad_norm": 6.444095772568898, + "learning_rate": 7.391234044855388e-06, + "loss": 17.4123, + "step": 19752 + }, + { + "epoch": 0.3610689674081927, + "grad_norm": 6.233308545837031, + "learning_rate": 7.390974075790643e-06, + "loss": 17.3002, + "step": 19753 + }, + { + "epoch": 0.36108724660463926, + "grad_norm": 6.2841775562864495, + "learning_rate": 7.390714098345739e-06, + "loss": 17.6364, + "step": 19754 + }, + { + "epoch": 0.3611055258010858, + "grad_norm": 6.062349416057556, + "learning_rate": 7.390454112521592e-06, + "loss": 17.3652, + "step": 19755 + }, + { + "epoch": 0.3611238049975323, + "grad_norm": 6.675362618049272, + "learning_rate": 7.3901941183191095e-06, + "loss": 17.7102, + "step": 19756 + }, + { + "epoch": 0.3611420841939788, + "grad_norm": 5.779234254884737, + "learning_rate": 7.389934115739204e-06, + "loss": 17.1497, + "step": 19757 + }, + { + "epoch": 0.36116036339042534, + "grad_norm": 6.523897847285966, + "learning_rate": 7.389674104782789e-06, + "loss": 17.7741, + "step": 19758 + }, + { + "epoch": 0.3611786425868719, + "grad_norm": 6.619671923116873, + "learning_rate": 7.389414085450772e-06, + "loss": 17.6544, + "step": 19759 + }, + { + "epoch": 0.3611969217833184, + "grad_norm": 7.02593384430311, + "learning_rate": 7.389154057744066e-06, + "loss": 17.8876, + "step": 19760 + }, + { + "epoch": 0.36121520097976495, + "grad_norm": 7.397148017211546, + "learning_rate": 7.388894021663581e-06, + "loss": 17.9427, + "step": 19761 + }, + { + "epoch": 0.36123348017621143, + "grad_norm": 6.055158587464948, + "learning_rate": 7.388633977210231e-06, + "loss": 17.3072, + "step": 19762 + }, + { + "epoch": 0.36125175937265797, + "grad_norm": 5.619037949473037, + "learning_rate": 7.388373924384926e-06, + "loss": 17.247, + "step": 19763 + }, + { + "epoch": 0.3612700385691045, + "grad_norm": 6.593007971613619, + "learning_rate": 7.388113863188579e-06, + "loss": 17.7705, + "step": 19764 + }, + { + "epoch": 0.36128831776555104, + "grad_norm": 6.80874070597616, + "learning_rate": 7.387853793622099e-06, + "loss": 17.6813, + "step": 19765 + }, + { + "epoch": 0.3613065969619976, + "grad_norm": 8.362611850273602, + "learning_rate": 7.387593715686399e-06, + "loss": 18.5448, + "step": 19766 + }, + { + "epoch": 0.36132487615844405, + "grad_norm": 6.786788258346048, + "learning_rate": 7.387333629382388e-06, + "loss": 17.598, + "step": 19767 + }, + { + "epoch": 0.3613431553548906, + "grad_norm": 5.684770298024673, + "learning_rate": 7.387073534710982e-06, + "loss": 17.2738, + "step": 19768 + }, + { + "epoch": 0.3613614345513371, + "grad_norm": 6.338173972597751, + "learning_rate": 7.38681343167309e-06, + "loss": 17.5468, + "step": 19769 + }, + { + "epoch": 0.36137971374778366, + "grad_norm": 8.451045048153572, + "learning_rate": 7.386553320269625e-06, + "loss": 18.3305, + "step": 19770 + }, + { + "epoch": 0.3613979929442302, + "grad_norm": 7.1551800330201125, + "learning_rate": 7.386293200501495e-06, + "loss": 17.7602, + "step": 19771 + }, + { + "epoch": 0.3614162721406767, + "grad_norm": 6.1481840629691025, + "learning_rate": 7.386033072369619e-06, + "loss": 17.4353, + "step": 19772 + }, + { + "epoch": 0.3614345513371232, + "grad_norm": 8.949106858349216, + "learning_rate": 7.3857729358749e-06, + "loss": 17.7564, + "step": 19773 + }, + { + "epoch": 0.36145283053356975, + "grad_norm": 7.304820160434771, + "learning_rate": 7.385512791018255e-06, + "loss": 17.9803, + "step": 19774 + }, + { + "epoch": 0.3614711097300163, + "grad_norm": 7.3603973195622086, + "learning_rate": 7.3852526378005955e-06, + "loss": 18.0031, + "step": 19775 + }, + { + "epoch": 0.3614893889264628, + "grad_norm": 5.999500649438277, + "learning_rate": 7.384992476222832e-06, + "loss": 17.3192, + "step": 19776 + }, + { + "epoch": 0.3615076681229093, + "grad_norm": 7.537500465756095, + "learning_rate": 7.384732306285875e-06, + "loss": 17.9028, + "step": 19777 + }, + { + "epoch": 0.36152594731935583, + "grad_norm": 6.726759715914112, + "learning_rate": 7.384472127990641e-06, + "loss": 17.7602, + "step": 19778 + }, + { + "epoch": 0.36154422651580237, + "grad_norm": 6.548891995168657, + "learning_rate": 7.384211941338038e-06, + "loss": 17.719, + "step": 19779 + }, + { + "epoch": 0.3615625057122489, + "grad_norm": 5.862858606696997, + "learning_rate": 7.383951746328979e-06, + "loss": 17.2742, + "step": 19780 + }, + { + "epoch": 0.36158078490869544, + "grad_norm": 6.781399042218442, + "learning_rate": 7.383691542964376e-06, + "loss": 18.1223, + "step": 19781 + }, + { + "epoch": 0.3615990641051419, + "grad_norm": 6.087743190092235, + "learning_rate": 7.383431331245142e-06, + "loss": 17.3174, + "step": 19782 + }, + { + "epoch": 0.36161734330158846, + "grad_norm": 6.2910179355940254, + "learning_rate": 7.383171111172186e-06, + "loss": 17.2643, + "step": 19783 + }, + { + "epoch": 0.361635622498035, + "grad_norm": 6.218268411467764, + "learning_rate": 7.382910882746424e-06, + "loss": 17.3961, + "step": 19784 + }, + { + "epoch": 0.3616539016944815, + "grad_norm": 5.730710339353957, + "learning_rate": 7.382650645968764e-06, + "loss": 16.9912, + "step": 19785 + }, + { + "epoch": 0.361672180890928, + "grad_norm": 6.022333613509842, + "learning_rate": 7.382390400840123e-06, + "loss": 17.1482, + "step": 19786 + }, + { + "epoch": 0.36169046008737454, + "grad_norm": 7.196930519841331, + "learning_rate": 7.382130147361408e-06, + "loss": 17.9276, + "step": 19787 + }, + { + "epoch": 0.3617087392838211, + "grad_norm": 6.0683435637162395, + "learning_rate": 7.381869885533534e-06, + "loss": 17.3092, + "step": 19788 + }, + { + "epoch": 0.3617270184802676, + "grad_norm": 7.343149765349085, + "learning_rate": 7.381609615357414e-06, + "loss": 18.0614, + "step": 19789 + }, + { + "epoch": 0.36174529767671415, + "grad_norm": 5.5653076492380675, + "learning_rate": 7.381349336833958e-06, + "loss": 17.3303, + "step": 19790 + }, + { + "epoch": 0.36176357687316063, + "grad_norm": 5.080857582127793, + "learning_rate": 7.381089049964078e-06, + "loss": 17.013, + "step": 19791 + }, + { + "epoch": 0.36178185606960717, + "grad_norm": 7.169992833963652, + "learning_rate": 7.38082875474869e-06, + "loss": 17.5835, + "step": 19792 + }, + { + "epoch": 0.3618001352660537, + "grad_norm": 6.034150606404791, + "learning_rate": 7.380568451188702e-06, + "loss": 17.4684, + "step": 19793 + }, + { + "epoch": 0.36181841446250024, + "grad_norm": 7.196883464812891, + "learning_rate": 7.3803081392850286e-06, + "loss": 17.6147, + "step": 19794 + }, + { + "epoch": 0.3618366936589468, + "grad_norm": 7.120685145067033, + "learning_rate": 7.380047819038583e-06, + "loss": 17.9046, + "step": 19795 + }, + { + "epoch": 0.36185497285539325, + "grad_norm": 5.37868830159824, + "learning_rate": 7.3797874904502744e-06, + "loss": 17.0191, + "step": 19796 + }, + { + "epoch": 0.3618732520518398, + "grad_norm": 7.297146558207694, + "learning_rate": 7.3795271535210175e-06, + "loss": 17.8612, + "step": 19797 + }, + { + "epoch": 0.3618915312482863, + "grad_norm": 6.835203495703539, + "learning_rate": 7.379266808251725e-06, + "loss": 17.6131, + "step": 19798 + }, + { + "epoch": 0.36190981044473286, + "grad_norm": 6.051211154481783, + "learning_rate": 7.3790064546433096e-06, + "loss": 17.3336, + "step": 19799 + }, + { + "epoch": 0.3619280896411794, + "grad_norm": 9.885438003116352, + "learning_rate": 7.378746092696682e-06, + "loss": 17.6307, + "step": 19800 + }, + { + "epoch": 0.3619463688376259, + "grad_norm": 9.13616426789303, + "learning_rate": 7.378485722412756e-06, + "loss": 17.9851, + "step": 19801 + }, + { + "epoch": 0.3619646480340724, + "grad_norm": 6.680500048478998, + "learning_rate": 7.3782253437924434e-06, + "loss": 17.4634, + "step": 19802 + }, + { + "epoch": 0.36198292723051895, + "grad_norm": 7.965927476562711, + "learning_rate": 7.377964956836658e-06, + "loss": 18.0622, + "step": 19803 + }, + { + "epoch": 0.3620012064269655, + "grad_norm": 6.075774584032624, + "learning_rate": 7.377704561546311e-06, + "loss": 17.4991, + "step": 19804 + }, + { + "epoch": 0.362019485623412, + "grad_norm": 5.394030899585471, + "learning_rate": 7.377444157922318e-06, + "loss": 17.1865, + "step": 19805 + }, + { + "epoch": 0.3620377648198585, + "grad_norm": 6.494119032171042, + "learning_rate": 7.377183745965587e-06, + "loss": 17.2804, + "step": 19806 + }, + { + "epoch": 0.36205604401630503, + "grad_norm": 6.971741388121176, + "learning_rate": 7.3769233256770346e-06, + "loss": 17.7178, + "step": 19807 + }, + { + "epoch": 0.36207432321275157, + "grad_norm": 7.57425777775483, + "learning_rate": 7.3766628970575716e-06, + "loss": 17.7694, + "step": 19808 + }, + { + "epoch": 0.3620926024091981, + "grad_norm": 6.448874817621395, + "learning_rate": 7.3764024601081105e-06, + "loss": 17.3395, + "step": 19809 + }, + { + "epoch": 0.36211088160564464, + "grad_norm": 7.60523362352674, + "learning_rate": 7.376142014829566e-06, + "loss": 17.8962, + "step": 19810 + }, + { + "epoch": 0.3621291608020911, + "grad_norm": 6.5349476392425885, + "learning_rate": 7.3758815612228505e-06, + "loss": 17.5655, + "step": 19811 + }, + { + "epoch": 0.36214743999853766, + "grad_norm": 6.597569027456001, + "learning_rate": 7.375621099288875e-06, + "loss": 17.5343, + "step": 19812 + }, + { + "epoch": 0.3621657191949842, + "grad_norm": 8.332972762924195, + "learning_rate": 7.375360629028556e-06, + "loss": 18.2068, + "step": 19813 + }, + { + "epoch": 0.36218399839143073, + "grad_norm": 6.058549509102406, + "learning_rate": 7.375100150442802e-06, + "loss": 17.3782, + "step": 19814 + }, + { + "epoch": 0.36220227758787726, + "grad_norm": 6.656909869088897, + "learning_rate": 7.3748396635325284e-06, + "loss": 17.7636, + "step": 19815 + }, + { + "epoch": 0.36222055678432374, + "grad_norm": 8.63263441180892, + "learning_rate": 7.374579168298648e-06, + "loss": 18.1313, + "step": 19816 + }, + { + "epoch": 0.3622388359807703, + "grad_norm": 6.344480129066718, + "learning_rate": 7.374318664742075e-06, + "loss": 17.5452, + "step": 19817 + }, + { + "epoch": 0.3622571151772168, + "grad_norm": 8.058106029602351, + "learning_rate": 7.374058152863719e-06, + "loss": 18.2145, + "step": 19818 + }, + { + "epoch": 0.36227539437366335, + "grad_norm": 7.686560264536356, + "learning_rate": 7.373797632664497e-06, + "loss": 18.156, + "step": 19819 + }, + { + "epoch": 0.36229367357010983, + "grad_norm": 6.453915937680067, + "learning_rate": 7.373537104145318e-06, + "loss": 17.696, + "step": 19820 + }, + { + "epoch": 0.36231195276655637, + "grad_norm": 6.338557925126497, + "learning_rate": 7.373276567307099e-06, + "loss": 17.6531, + "step": 19821 + }, + { + "epoch": 0.3623302319630029, + "grad_norm": 7.44949599558676, + "learning_rate": 7.373016022150752e-06, + "loss": 17.9402, + "step": 19822 + }, + { + "epoch": 0.36234851115944944, + "grad_norm": 5.511826999001526, + "learning_rate": 7.372755468677188e-06, + "loss": 17.1546, + "step": 19823 + }, + { + "epoch": 0.362366790355896, + "grad_norm": 6.730621061072508, + "learning_rate": 7.372494906887324e-06, + "loss": 17.5534, + "step": 19824 + }, + { + "epoch": 0.36238506955234245, + "grad_norm": 6.597743266117303, + "learning_rate": 7.37223433678207e-06, + "loss": 17.886, + "step": 19825 + }, + { + "epoch": 0.362403348748789, + "grad_norm": 6.424877650774905, + "learning_rate": 7.371973758362341e-06, + "loss": 17.7819, + "step": 19826 + }, + { + "epoch": 0.3624216279452355, + "grad_norm": 6.081115763625361, + "learning_rate": 7.371713171629051e-06, + "loss": 17.4009, + "step": 19827 + }, + { + "epoch": 0.36243990714168206, + "grad_norm": 6.027815519647787, + "learning_rate": 7.371452576583109e-06, + "loss": 17.4071, + "step": 19828 + }, + { + "epoch": 0.3624581863381286, + "grad_norm": 6.908653643836321, + "learning_rate": 7.371191973225433e-06, + "loss": 18.0503, + "step": 19829 + }, + { + "epoch": 0.3624764655345751, + "grad_norm": 9.657029465211973, + "learning_rate": 7.370931361556936e-06, + "loss": 18.0939, + "step": 19830 + }, + { + "epoch": 0.3624947447310216, + "grad_norm": 6.602626637139704, + "learning_rate": 7.37067074157853e-06, + "loss": 17.6102, + "step": 19831 + }, + { + "epoch": 0.36251302392746815, + "grad_norm": 5.74620240840169, + "learning_rate": 7.370410113291129e-06, + "loss": 17.0892, + "step": 19832 + }, + { + "epoch": 0.3625313031239147, + "grad_norm": 5.600716101090447, + "learning_rate": 7.370149476695644e-06, + "loss": 17.1531, + "step": 19833 + }, + { + "epoch": 0.3625495823203612, + "grad_norm": 6.647988825642275, + "learning_rate": 7.3698888317929924e-06, + "loss": 17.6309, + "step": 19834 + }, + { + "epoch": 0.3625678615168077, + "grad_norm": 7.462942426129583, + "learning_rate": 7.369628178584087e-06, + "loss": 18.1157, + "step": 19835 + }, + { + "epoch": 0.36258614071325423, + "grad_norm": 6.950999798039686, + "learning_rate": 7.369367517069839e-06, + "loss": 17.818, + "step": 19836 + }, + { + "epoch": 0.36260441990970077, + "grad_norm": 5.928236702927465, + "learning_rate": 7.369106847251164e-06, + "loss": 17.324, + "step": 19837 + }, + { + "epoch": 0.3626226991061473, + "grad_norm": 8.025803946220389, + "learning_rate": 7.368846169128975e-06, + "loss": 17.9005, + "step": 19838 + }, + { + "epoch": 0.36264097830259384, + "grad_norm": 7.188681587313006, + "learning_rate": 7.368585482704186e-06, + "loss": 17.7076, + "step": 19839 + }, + { + "epoch": 0.3626592574990403, + "grad_norm": 5.946788418432133, + "learning_rate": 7.3683247879777094e-06, + "loss": 17.3071, + "step": 19840 + }, + { + "epoch": 0.36267753669548686, + "grad_norm": 8.783183882784503, + "learning_rate": 7.368064084950461e-06, + "loss": 18.1716, + "step": 19841 + }, + { + "epoch": 0.3626958158919334, + "grad_norm": 8.32619851133781, + "learning_rate": 7.367803373623352e-06, + "loss": 18.4285, + "step": 19842 + }, + { + "epoch": 0.36271409508837993, + "grad_norm": 5.768828605822886, + "learning_rate": 7.3675426539973e-06, + "loss": 17.2132, + "step": 19843 + }, + { + "epoch": 0.36273237428482646, + "grad_norm": 7.295614907201202, + "learning_rate": 7.3672819260732155e-06, + "loss": 17.9062, + "step": 19844 + }, + { + "epoch": 0.36275065348127294, + "grad_norm": 7.601003625095559, + "learning_rate": 7.367021189852013e-06, + "loss": 17.755, + "step": 19845 + }, + { + "epoch": 0.3627689326777195, + "grad_norm": 7.25728850681366, + "learning_rate": 7.366760445334607e-06, + "loss": 17.588, + "step": 19846 + }, + { + "epoch": 0.362787211874166, + "grad_norm": 7.435540056191759, + "learning_rate": 7.366499692521909e-06, + "loss": 17.9894, + "step": 19847 + }, + { + "epoch": 0.36280549107061255, + "grad_norm": 6.575627358714186, + "learning_rate": 7.3662389314148375e-06, + "loss": 17.651, + "step": 19848 + }, + { + "epoch": 0.3628237702670591, + "grad_norm": 6.107729087499006, + "learning_rate": 7.3659781620143035e-06, + "loss": 17.3608, + "step": 19849 + }, + { + "epoch": 0.36284204946350557, + "grad_norm": 7.461839777289414, + "learning_rate": 7.36571738432122e-06, + "loss": 17.6875, + "step": 19850 + }, + { + "epoch": 0.3628603286599521, + "grad_norm": 6.015353779103699, + "learning_rate": 7.365456598336504e-06, + "loss": 17.3833, + "step": 19851 + }, + { + "epoch": 0.36287860785639864, + "grad_norm": 7.613494342322678, + "learning_rate": 7.3651958040610675e-06, + "loss": 18.0758, + "step": 19852 + }, + { + "epoch": 0.3628968870528452, + "grad_norm": 8.27991365474283, + "learning_rate": 7.364935001495823e-06, + "loss": 18.2216, + "step": 19853 + }, + { + "epoch": 0.36291516624929165, + "grad_norm": 7.206607014140489, + "learning_rate": 7.364674190641688e-06, + "loss": 17.7533, + "step": 19854 + }, + { + "epoch": 0.3629334454457382, + "grad_norm": 6.2258669349276, + "learning_rate": 7.3644133714995754e-06, + "loss": 17.4918, + "step": 19855 + }, + { + "epoch": 0.3629517246421847, + "grad_norm": 5.830931908101433, + "learning_rate": 7.364152544070399e-06, + "loss": 17.4619, + "step": 19856 + }, + { + "epoch": 0.36297000383863126, + "grad_norm": 6.744025648768919, + "learning_rate": 7.363891708355074e-06, + "loss": 17.8524, + "step": 19857 + }, + { + "epoch": 0.3629882830350778, + "grad_norm": 5.33544559027682, + "learning_rate": 7.363630864354513e-06, + "loss": 17.122, + "step": 19858 + }, + { + "epoch": 0.3630065622315243, + "grad_norm": 7.120475255009561, + "learning_rate": 7.363370012069631e-06, + "loss": 17.8644, + "step": 19859 + }, + { + "epoch": 0.3630248414279708, + "grad_norm": 6.415446205178347, + "learning_rate": 7.36310915150134e-06, + "loss": 17.4951, + "step": 19860 + }, + { + "epoch": 0.36304312062441735, + "grad_norm": 5.1713900764061735, + "learning_rate": 7.362848282650559e-06, + "loss": 16.9743, + "step": 19861 + }, + { + "epoch": 0.3630613998208639, + "grad_norm": 6.582510831952927, + "learning_rate": 7.362587405518199e-06, + "loss": 17.458, + "step": 19862 + }, + { + "epoch": 0.3630796790173104, + "grad_norm": 7.14838707725858, + "learning_rate": 7.3623265201051755e-06, + "loss": 17.4343, + "step": 19863 + }, + { + "epoch": 0.3630979582137569, + "grad_norm": 6.030890566137289, + "learning_rate": 7.362065626412402e-06, + "loss": 17.5821, + "step": 19864 + }, + { + "epoch": 0.36311623741020344, + "grad_norm": 6.132200128666458, + "learning_rate": 7.361804724440793e-06, + "loss": 17.5141, + "step": 19865 + }, + { + "epoch": 0.36313451660664997, + "grad_norm": 6.755722703473308, + "learning_rate": 7.361543814191266e-06, + "loss": 17.4266, + "step": 19866 + }, + { + "epoch": 0.3631527958030965, + "grad_norm": 5.901362078986602, + "learning_rate": 7.36128289566473e-06, + "loss": 17.4349, + "step": 19867 + }, + { + "epoch": 0.36317107499954304, + "grad_norm": 7.375293085928717, + "learning_rate": 7.361021968862103e-06, + "loss": 18.1263, + "step": 19868 + }, + { + "epoch": 0.3631893541959895, + "grad_norm": 7.949065423837825, + "learning_rate": 7.3607610337842995e-06, + "loss": 18.0082, + "step": 19869 + }, + { + "epoch": 0.36320763339243606, + "grad_norm": 7.529454143613692, + "learning_rate": 7.360500090432232e-06, + "loss": 18.2168, + "step": 19870 + }, + { + "epoch": 0.3632259125888826, + "grad_norm": 5.57568043384929, + "learning_rate": 7.360239138806818e-06, + "loss": 17.1047, + "step": 19871 + }, + { + "epoch": 0.36324419178532913, + "grad_norm": 5.331238942134828, + "learning_rate": 7.359978178908972e-06, + "loss": 16.9609, + "step": 19872 + }, + { + "epoch": 0.36326247098177566, + "grad_norm": 5.665163282601551, + "learning_rate": 7.359717210739605e-06, + "loss": 17.2756, + "step": 19873 + }, + { + "epoch": 0.36328075017822214, + "grad_norm": 6.414277189172481, + "learning_rate": 7.3594562342996335e-06, + "loss": 17.6562, + "step": 19874 + }, + { + "epoch": 0.3632990293746687, + "grad_norm": 6.986932385311707, + "learning_rate": 7.359195249589974e-06, + "loss": 17.5632, + "step": 19875 + }, + { + "epoch": 0.3633173085711152, + "grad_norm": 6.595780216320281, + "learning_rate": 7.3589342566115406e-06, + "loss": 17.6591, + "step": 19876 + }, + { + "epoch": 0.36333558776756175, + "grad_norm": 9.75605735647986, + "learning_rate": 7.358673255365245e-06, + "loss": 19.0344, + "step": 19877 + }, + { + "epoch": 0.3633538669640083, + "grad_norm": 9.233791121504725, + "learning_rate": 7.358412245852007e-06, + "loss": 18.404, + "step": 19878 + }, + { + "epoch": 0.36337214616045477, + "grad_norm": 7.148687535834478, + "learning_rate": 7.358151228072736e-06, + "loss": 17.9507, + "step": 19879 + }, + { + "epoch": 0.3633904253569013, + "grad_norm": 6.465007259481233, + "learning_rate": 7.357890202028351e-06, + "loss": 17.4709, + "step": 19880 + }, + { + "epoch": 0.36340870455334784, + "grad_norm": 6.286347588352254, + "learning_rate": 7.3576291677197655e-06, + "loss": 17.3946, + "step": 19881 + }, + { + "epoch": 0.3634269837497944, + "grad_norm": 4.960147903218949, + "learning_rate": 7.357368125147895e-06, + "loss": 16.9839, + "step": 19882 + }, + { + "epoch": 0.3634452629462409, + "grad_norm": 7.9545658929064755, + "learning_rate": 7.3571070743136515e-06, + "loss": 18.2448, + "step": 19883 + }, + { + "epoch": 0.3634635421426874, + "grad_norm": 5.9500038086809495, + "learning_rate": 7.3568460152179545e-06, + "loss": 17.5407, + "step": 19884 + }, + { + "epoch": 0.3634818213391339, + "grad_norm": 8.391662365068228, + "learning_rate": 7.356584947861716e-06, + "loss": 18.2017, + "step": 19885 + }, + { + "epoch": 0.36350010053558046, + "grad_norm": 8.43666846600439, + "learning_rate": 7.356323872245852e-06, + "loss": 18.4896, + "step": 19886 + }, + { + "epoch": 0.363518379732027, + "grad_norm": 6.651142502786835, + "learning_rate": 7.356062788371277e-06, + "loss": 17.5591, + "step": 19887 + }, + { + "epoch": 0.3635366589284735, + "grad_norm": 6.580105254687807, + "learning_rate": 7.355801696238906e-06, + "loss": 17.5632, + "step": 19888 + }, + { + "epoch": 0.36355493812492, + "grad_norm": 6.534458383635875, + "learning_rate": 7.3555405958496555e-06, + "loss": 17.4418, + "step": 19889 + }, + { + "epoch": 0.36357321732136655, + "grad_norm": 7.116823096185072, + "learning_rate": 7.3552794872044385e-06, + "loss": 17.8299, + "step": 19890 + }, + { + "epoch": 0.3635914965178131, + "grad_norm": 6.319830041837158, + "learning_rate": 7.355018370304172e-06, + "loss": 17.7226, + "step": 19891 + }, + { + "epoch": 0.3636097757142596, + "grad_norm": 5.650490274725987, + "learning_rate": 7.35475724514977e-06, + "loss": 17.1894, + "step": 19892 + }, + { + "epoch": 0.3636280549107061, + "grad_norm": 6.31767404409286, + "learning_rate": 7.354496111742149e-06, + "loss": 17.6089, + "step": 19893 + }, + { + "epoch": 0.36364633410715264, + "grad_norm": 7.319743799716542, + "learning_rate": 7.354234970082223e-06, + "loss": 17.6796, + "step": 19894 + }, + { + "epoch": 0.36366461330359917, + "grad_norm": 7.574336442661001, + "learning_rate": 7.353973820170906e-06, + "loss": 17.582, + "step": 19895 + }, + { + "epoch": 0.3636828925000457, + "grad_norm": 6.055400818121726, + "learning_rate": 7.353712662009118e-06, + "loss": 17.562, + "step": 19896 + }, + { + "epoch": 0.36370117169649224, + "grad_norm": 5.941978445605599, + "learning_rate": 7.35345149559777e-06, + "loss": 17.2626, + "step": 19897 + }, + { + "epoch": 0.3637194508929387, + "grad_norm": 5.331945161886824, + "learning_rate": 7.353190320937778e-06, + "loss": 17.1074, + "step": 19898 + }, + { + "epoch": 0.36373773008938526, + "grad_norm": 7.415834593382804, + "learning_rate": 7.352929138030059e-06, + "loss": 18.073, + "step": 19899 + }, + { + "epoch": 0.3637560092858318, + "grad_norm": 7.025386381980766, + "learning_rate": 7.352667946875528e-06, + "loss": 17.3585, + "step": 19900 + }, + { + "epoch": 0.36377428848227833, + "grad_norm": 6.2390589600593245, + "learning_rate": 7.352406747475098e-06, + "loss": 17.484, + "step": 19901 + }, + { + "epoch": 0.36379256767872487, + "grad_norm": 7.228486929245648, + "learning_rate": 7.352145539829688e-06, + "loss": 17.651, + "step": 19902 + }, + { + "epoch": 0.36381084687517135, + "grad_norm": 5.342575900198373, + "learning_rate": 7.351884323940212e-06, + "loss": 17.0671, + "step": 19903 + }, + { + "epoch": 0.3638291260716179, + "grad_norm": 6.952441270837234, + "learning_rate": 7.351623099807587e-06, + "loss": 17.7346, + "step": 19904 + }, + { + "epoch": 0.3638474052680644, + "grad_norm": 5.874155239855435, + "learning_rate": 7.351361867432725e-06, + "loss": 17.4286, + "step": 19905 + }, + { + "epoch": 0.36386568446451095, + "grad_norm": 6.257066167438005, + "learning_rate": 7.351100626816544e-06, + "loss": 17.5954, + "step": 19906 + }, + { + "epoch": 0.3638839636609575, + "grad_norm": 6.795264780061543, + "learning_rate": 7.350839377959959e-06, + "loss": 17.6907, + "step": 19907 + }, + { + "epoch": 0.36390224285740397, + "grad_norm": 10.194669551376359, + "learning_rate": 7.350578120863887e-06, + "loss": 18.3099, + "step": 19908 + }, + { + "epoch": 0.3639205220538505, + "grad_norm": 5.402891267281715, + "learning_rate": 7.350316855529243e-06, + "loss": 17.2023, + "step": 19909 + }, + { + "epoch": 0.36393880125029704, + "grad_norm": 6.010307659767309, + "learning_rate": 7.35005558195694e-06, + "loss": 17.5061, + "step": 19910 + }, + { + "epoch": 0.3639570804467436, + "grad_norm": 5.967855881697756, + "learning_rate": 7.3497943001478975e-06, + "loss": 17.4228, + "step": 19911 + }, + { + "epoch": 0.3639753596431901, + "grad_norm": 6.897902739331548, + "learning_rate": 7.34953301010303e-06, + "loss": 17.7083, + "step": 19912 + }, + { + "epoch": 0.3639936388396366, + "grad_norm": 7.135653523181321, + "learning_rate": 7.349271711823255e-06, + "loss": 17.8317, + "step": 19913 + }, + { + "epoch": 0.3640119180360831, + "grad_norm": 6.64145514082436, + "learning_rate": 7.3490104053094845e-06, + "loss": 17.4163, + "step": 19914 + }, + { + "epoch": 0.36403019723252966, + "grad_norm": 6.376695234039627, + "learning_rate": 7.348749090562636e-06, + "loss": 17.5942, + "step": 19915 + }, + { + "epoch": 0.3640484764289762, + "grad_norm": 4.790855748747493, + "learning_rate": 7.348487767583625e-06, + "loss": 16.8697, + "step": 19916 + }, + { + "epoch": 0.36406675562542273, + "grad_norm": 8.041839296634185, + "learning_rate": 7.348226436373371e-06, + "loss": 17.9628, + "step": 19917 + }, + { + "epoch": 0.3640850348218692, + "grad_norm": 7.822606379322255, + "learning_rate": 7.347965096932785e-06, + "loss": 18.3822, + "step": 19918 + }, + { + "epoch": 0.36410331401831575, + "grad_norm": 5.845155756889289, + "learning_rate": 7.347703749262787e-06, + "loss": 17.2784, + "step": 19919 + }, + { + "epoch": 0.3641215932147623, + "grad_norm": 6.453666345030061, + "learning_rate": 7.3474423933642895e-06, + "loss": 17.7771, + "step": 19920 + }, + { + "epoch": 0.3641398724112088, + "grad_norm": 6.952256037911246, + "learning_rate": 7.34718102923821e-06, + "loss": 17.9244, + "step": 19921 + }, + { + "epoch": 0.3641581516076553, + "grad_norm": 5.726640790921278, + "learning_rate": 7.3469196568854654e-06, + "loss": 17.2561, + "step": 19922 + }, + { + "epoch": 0.36417643080410184, + "grad_norm": 6.158958516929366, + "learning_rate": 7.346658276306971e-06, + "loss": 17.3897, + "step": 19923 + }, + { + "epoch": 0.36419471000054837, + "grad_norm": 6.7314552207787, + "learning_rate": 7.3463968875036415e-06, + "loss": 17.5779, + "step": 19924 + }, + { + "epoch": 0.3642129891969949, + "grad_norm": 5.953983189035276, + "learning_rate": 7.346135490476396e-06, + "loss": 17.1681, + "step": 19925 + }, + { + "epoch": 0.36423126839344144, + "grad_norm": 7.448260645280754, + "learning_rate": 7.345874085226149e-06, + "loss": 18.071, + "step": 19926 + }, + { + "epoch": 0.3642495475898879, + "grad_norm": 7.135589902163268, + "learning_rate": 7.3456126717538165e-06, + "loss": 17.9099, + "step": 19927 + }, + { + "epoch": 0.36426782678633446, + "grad_norm": 7.811948117017589, + "learning_rate": 7.3453512500603155e-06, + "loss": 18.1919, + "step": 19928 + }, + { + "epoch": 0.364286105982781, + "grad_norm": 6.4819789518858775, + "learning_rate": 7.345089820146561e-06, + "loss": 17.391, + "step": 19929 + }, + { + "epoch": 0.36430438517922753, + "grad_norm": 6.573790459434949, + "learning_rate": 7.3448283820134714e-06, + "loss": 17.4223, + "step": 19930 + }, + { + "epoch": 0.36432266437567407, + "grad_norm": 7.142233545797336, + "learning_rate": 7.344566935661963e-06, + "loss": 17.9212, + "step": 19931 + }, + { + "epoch": 0.36434094357212055, + "grad_norm": 7.815281670207444, + "learning_rate": 7.344305481092948e-06, + "loss": 18.5594, + "step": 19932 + }, + { + "epoch": 0.3643592227685671, + "grad_norm": 6.389263015084949, + "learning_rate": 7.344044018307347e-06, + "loss": 17.6672, + "step": 19933 + }, + { + "epoch": 0.3643775019650136, + "grad_norm": 6.363778951246994, + "learning_rate": 7.3437825473060756e-06, + "loss": 17.5988, + "step": 19934 + }, + { + "epoch": 0.36439578116146015, + "grad_norm": 7.878011337387179, + "learning_rate": 7.34352106809005e-06, + "loss": 18.1643, + "step": 19935 + }, + { + "epoch": 0.3644140603579067, + "grad_norm": 8.285066118286098, + "learning_rate": 7.343259580660185e-06, + "loss": 18.3539, + "step": 19936 + }, + { + "epoch": 0.36443233955435317, + "grad_norm": 6.443267824682111, + "learning_rate": 7.3429980850173985e-06, + "loss": 17.3013, + "step": 19937 + }, + { + "epoch": 0.3644506187507997, + "grad_norm": 6.650738875390846, + "learning_rate": 7.342736581162608e-06, + "loss": 17.6297, + "step": 19938 + }, + { + "epoch": 0.36446889794724624, + "grad_norm": 8.323867892700868, + "learning_rate": 7.342475069096729e-06, + "loss": 18.2189, + "step": 19939 + }, + { + "epoch": 0.3644871771436928, + "grad_norm": 8.029657663627717, + "learning_rate": 7.342213548820678e-06, + "loss": 17.9917, + "step": 19940 + }, + { + "epoch": 0.3645054563401393, + "grad_norm": 5.902466295141075, + "learning_rate": 7.3419520203353736e-06, + "loss": 17.5369, + "step": 19941 + }, + { + "epoch": 0.3645237355365858, + "grad_norm": 6.6950706084737, + "learning_rate": 7.341690483641727e-06, + "loss": 17.6478, + "step": 19942 + }, + { + "epoch": 0.3645420147330323, + "grad_norm": 7.414295409257129, + "learning_rate": 7.34142893874066e-06, + "loss": 18.0049, + "step": 19943 + }, + { + "epoch": 0.36456029392947886, + "grad_norm": 6.441432619775502, + "learning_rate": 7.341167385633089e-06, + "loss": 17.3596, + "step": 19944 + }, + { + "epoch": 0.3645785731259254, + "grad_norm": 7.565466444403028, + "learning_rate": 7.340905824319928e-06, + "loss": 17.9493, + "step": 19945 + }, + { + "epoch": 0.36459685232237193, + "grad_norm": 6.884218125613176, + "learning_rate": 7.3406442548020965e-06, + "loss": 17.9367, + "step": 19946 + }, + { + "epoch": 0.3646151315188184, + "grad_norm": 8.205436379367864, + "learning_rate": 7.340382677080509e-06, + "loss": 18.2755, + "step": 19947 + }, + { + "epoch": 0.36463341071526495, + "grad_norm": 7.479586387863585, + "learning_rate": 7.3401210911560825e-06, + "loss": 18.084, + "step": 19948 + }, + { + "epoch": 0.3646516899117115, + "grad_norm": 5.456945197338324, + "learning_rate": 7.3398594970297365e-06, + "loss": 17.2475, + "step": 19949 + }, + { + "epoch": 0.364669969108158, + "grad_norm": 7.155232689620144, + "learning_rate": 7.339597894702385e-06, + "loss": 18.057, + "step": 19950 + }, + { + "epoch": 0.36468824830460456, + "grad_norm": 6.315182426164225, + "learning_rate": 7.339336284174946e-06, + "loss": 17.4557, + "step": 19951 + }, + { + "epoch": 0.36470652750105104, + "grad_norm": 6.869441070444373, + "learning_rate": 7.339074665448336e-06, + "loss": 17.7521, + "step": 19952 + }, + { + "epoch": 0.36472480669749757, + "grad_norm": 5.832337154707694, + "learning_rate": 7.338813038523473e-06, + "loss": 17.2121, + "step": 19953 + }, + { + "epoch": 0.3647430858939441, + "grad_norm": 7.771324661108997, + "learning_rate": 7.338551403401273e-06, + "loss": 18.1004, + "step": 19954 + }, + { + "epoch": 0.36476136509039064, + "grad_norm": 5.4355066147730575, + "learning_rate": 7.338289760082653e-06, + "loss": 17.1734, + "step": 19955 + }, + { + "epoch": 0.3647796442868371, + "grad_norm": 6.067806744073409, + "learning_rate": 7.33802810856853e-06, + "loss": 17.3221, + "step": 19956 + }, + { + "epoch": 0.36479792348328366, + "grad_norm": 5.627941981217168, + "learning_rate": 7.337766448859822e-06, + "loss": 17.2232, + "step": 19957 + }, + { + "epoch": 0.3648162026797302, + "grad_norm": 7.304743637762822, + "learning_rate": 7.337504780957446e-06, + "loss": 17.9585, + "step": 19958 + }, + { + "epoch": 0.36483448187617673, + "grad_norm": 6.371991764224059, + "learning_rate": 7.337243104862317e-06, + "loss": 17.4396, + "step": 19959 + }, + { + "epoch": 0.36485276107262327, + "grad_norm": 7.272901108269467, + "learning_rate": 7.336981420575354e-06, + "loss": 17.7912, + "step": 19960 + }, + { + "epoch": 0.36487104026906975, + "grad_norm": 6.709934676998213, + "learning_rate": 7.336719728097475e-06, + "loss": 17.6917, + "step": 19961 + }, + { + "epoch": 0.3648893194655163, + "grad_norm": 7.241521777883362, + "learning_rate": 7.336458027429596e-06, + "loss": 17.9274, + "step": 19962 + }, + { + "epoch": 0.3649075986619628, + "grad_norm": 6.401168922310908, + "learning_rate": 7.336196318572635e-06, + "loss": 17.6308, + "step": 19963 + }, + { + "epoch": 0.36492587785840935, + "grad_norm": 5.128963487143801, + "learning_rate": 7.335934601527507e-06, + "loss": 17.2691, + "step": 19964 + }, + { + "epoch": 0.3649441570548559, + "grad_norm": 6.057947574781268, + "learning_rate": 7.335672876295131e-06, + "loss": 17.4541, + "step": 19965 + }, + { + "epoch": 0.36496243625130237, + "grad_norm": 8.199589475868523, + "learning_rate": 7.335411142876425e-06, + "loss": 17.8986, + "step": 19966 + }, + { + "epoch": 0.3649807154477489, + "grad_norm": 5.152101468473128, + "learning_rate": 7.335149401272306e-06, + "loss": 16.9409, + "step": 19967 + }, + { + "epoch": 0.36499899464419544, + "grad_norm": 7.64284625076132, + "learning_rate": 7.33488765148369e-06, + "loss": 17.8299, + "step": 19968 + }, + { + "epoch": 0.365017273840642, + "grad_norm": 6.694127243468344, + "learning_rate": 7.334625893511496e-06, + "loss": 17.6618, + "step": 19969 + }, + { + "epoch": 0.3650355530370885, + "grad_norm": 6.137374494704486, + "learning_rate": 7.33436412735664e-06, + "loss": 17.1838, + "step": 19970 + }, + { + "epoch": 0.365053832233535, + "grad_norm": 6.492198993151892, + "learning_rate": 7.334102353020041e-06, + "loss": 17.8877, + "step": 19971 + }, + { + "epoch": 0.3650721114299815, + "grad_norm": 5.802896208555929, + "learning_rate": 7.333840570502616e-06, + "loss": 17.2948, + "step": 19972 + }, + { + "epoch": 0.36509039062642806, + "grad_norm": 7.132221431844595, + "learning_rate": 7.333578779805282e-06, + "loss": 17.6006, + "step": 19973 + }, + { + "epoch": 0.3651086698228746, + "grad_norm": 6.093304534442225, + "learning_rate": 7.333316980928956e-06, + "loss": 17.3567, + "step": 19974 + }, + { + "epoch": 0.36512694901932113, + "grad_norm": 6.436127130333897, + "learning_rate": 7.333055173874558e-06, + "loss": 17.4512, + "step": 19975 + }, + { + "epoch": 0.3651452282157676, + "grad_norm": 6.954979280215868, + "learning_rate": 7.332793358643003e-06, + "loss": 17.6193, + "step": 19976 + }, + { + "epoch": 0.36516350741221415, + "grad_norm": 5.556852294777728, + "learning_rate": 7.33253153523521e-06, + "loss": 17.1742, + "step": 19977 + }, + { + "epoch": 0.3651817866086607, + "grad_norm": 6.905918436637122, + "learning_rate": 7.332269703652098e-06, + "loss": 17.6606, + "step": 19978 + }, + { + "epoch": 0.3652000658051072, + "grad_norm": 7.1322970346039885, + "learning_rate": 7.332007863894582e-06, + "loss": 17.18, + "step": 19979 + }, + { + "epoch": 0.36521834500155376, + "grad_norm": 6.8587204657493555, + "learning_rate": 7.331746015963579e-06, + "loss": 17.7386, + "step": 19980 + }, + { + "epoch": 0.36523662419800024, + "grad_norm": 6.772290370521658, + "learning_rate": 7.331484159860009e-06, + "loss": 17.7342, + "step": 19981 + }, + { + "epoch": 0.3652549033944468, + "grad_norm": 7.231616881412896, + "learning_rate": 7.331222295584791e-06, + "loss": 17.6909, + "step": 19982 + }, + { + "epoch": 0.3652731825908933, + "grad_norm": 7.793296884804323, + "learning_rate": 7.33096042313884e-06, + "loss": 18.1183, + "step": 19983 + }, + { + "epoch": 0.36529146178733984, + "grad_norm": 6.483927942294078, + "learning_rate": 7.330698542523076e-06, + "loss": 17.2938, + "step": 19984 + }, + { + "epoch": 0.3653097409837864, + "grad_norm": 6.057767800886469, + "learning_rate": 7.330436653738415e-06, + "loss": 17.5713, + "step": 19985 + }, + { + "epoch": 0.36532802018023286, + "grad_norm": 6.1906657356421215, + "learning_rate": 7.330174756785777e-06, + "loss": 17.2538, + "step": 19986 + }, + { + "epoch": 0.3653462993766794, + "grad_norm": 7.278490927171888, + "learning_rate": 7.329912851666077e-06, + "loss": 17.6953, + "step": 19987 + }, + { + "epoch": 0.36536457857312593, + "grad_norm": 5.9615259558238, + "learning_rate": 7.329650938380235e-06, + "loss": 17.3261, + "step": 19988 + }, + { + "epoch": 0.36538285776957247, + "grad_norm": 5.550292146699225, + "learning_rate": 7.329389016929169e-06, + "loss": 17.1694, + "step": 19989 + }, + { + "epoch": 0.36540113696601895, + "grad_norm": 6.412002416771385, + "learning_rate": 7.3291270873137964e-06, + "loss": 17.7491, + "step": 19990 + }, + { + "epoch": 0.3654194161624655, + "grad_norm": 6.143702555565244, + "learning_rate": 7.328865149535036e-06, + "loss": 17.4818, + "step": 19991 + }, + { + "epoch": 0.365437695358912, + "grad_norm": 7.844841567040807, + "learning_rate": 7.328603203593805e-06, + "loss": 17.7499, + "step": 19992 + }, + { + "epoch": 0.36545597455535855, + "grad_norm": 8.015703335755985, + "learning_rate": 7.328341249491022e-06, + "loss": 17.7027, + "step": 19993 + }, + { + "epoch": 0.3654742537518051, + "grad_norm": 6.263571151195266, + "learning_rate": 7.3280792872276055e-06, + "loss": 17.3522, + "step": 19994 + }, + { + "epoch": 0.36549253294825157, + "grad_norm": 7.541199669687731, + "learning_rate": 7.327817316804472e-06, + "loss": 17.7727, + "step": 19995 + }, + { + "epoch": 0.3655108121446981, + "grad_norm": 7.378806630387118, + "learning_rate": 7.3275553382225405e-06, + "loss": 17.955, + "step": 19996 + }, + { + "epoch": 0.36552909134114464, + "grad_norm": 6.7678685546807085, + "learning_rate": 7.32729335148273e-06, + "loss": 17.4339, + "step": 19997 + }, + { + "epoch": 0.3655473705375912, + "grad_norm": 7.12057927862782, + "learning_rate": 7.327031356585959e-06, + "loss": 17.3648, + "step": 19998 + }, + { + "epoch": 0.3655656497340377, + "grad_norm": 8.71501300379711, + "learning_rate": 7.326769353533144e-06, + "loss": 17.7721, + "step": 19999 + }, + { + "epoch": 0.3655839289304842, + "grad_norm": 7.299752176009435, + "learning_rate": 7.326507342325206e-06, + "loss": 18.0457, + "step": 20000 + }, + { + "epoch": 0.36560220812693073, + "grad_norm": 7.3599804513870914, + "learning_rate": 7.32624532296306e-06, + "loss": 17.6514, + "step": 20001 + }, + { + "epoch": 0.36562048732337726, + "grad_norm": 5.3486958687936745, + "learning_rate": 7.325983295447626e-06, + "loss": 16.9642, + "step": 20002 + }, + { + "epoch": 0.3656387665198238, + "grad_norm": 6.557372272729643, + "learning_rate": 7.325721259779824e-06, + "loss": 17.619, + "step": 20003 + }, + { + "epoch": 0.36565704571627033, + "grad_norm": 5.416973195391767, + "learning_rate": 7.325459215960569e-06, + "loss": 17.2945, + "step": 20004 + }, + { + "epoch": 0.3656753249127168, + "grad_norm": 5.989822328414677, + "learning_rate": 7.325197163990783e-06, + "loss": 17.1684, + "step": 20005 + }, + { + "epoch": 0.36569360410916335, + "grad_norm": 6.011230896835203, + "learning_rate": 7.32493510387138e-06, + "loss": 17.2764, + "step": 20006 + }, + { + "epoch": 0.3657118833056099, + "grad_norm": 6.343896300084955, + "learning_rate": 7.324673035603283e-06, + "loss": 17.2743, + "step": 20007 + }, + { + "epoch": 0.3657301625020564, + "grad_norm": 8.424063219074782, + "learning_rate": 7.324410959187409e-06, + "loss": 18.5565, + "step": 20008 + }, + { + "epoch": 0.36574844169850296, + "grad_norm": 5.454175634280242, + "learning_rate": 7.324148874624676e-06, + "loss": 17.1443, + "step": 20009 + }, + { + "epoch": 0.36576672089494944, + "grad_norm": 6.962785575902715, + "learning_rate": 7.323886781916003e-06, + "loss": 17.6528, + "step": 20010 + }, + { + "epoch": 0.365785000091396, + "grad_norm": 6.398342119715435, + "learning_rate": 7.323624681062308e-06, + "loss": 17.469, + "step": 20011 + }, + { + "epoch": 0.3658032792878425, + "grad_norm": 6.982471194952962, + "learning_rate": 7.323362572064509e-06, + "loss": 17.7279, + "step": 20012 + }, + { + "epoch": 0.36582155848428904, + "grad_norm": 5.722768016888922, + "learning_rate": 7.323100454923529e-06, + "loss": 17.3035, + "step": 20013 + }, + { + "epoch": 0.3658398376807356, + "grad_norm": 6.207848962254209, + "learning_rate": 7.322838329640281e-06, + "loss": 17.4449, + "step": 20014 + }, + { + "epoch": 0.36585811687718206, + "grad_norm": 5.52371366053853, + "learning_rate": 7.322576196215687e-06, + "loss": 17.0434, + "step": 20015 + }, + { + "epoch": 0.3658763960736286, + "grad_norm": 6.447198465491182, + "learning_rate": 7.322314054650664e-06, + "loss": 17.4525, + "step": 20016 + }, + { + "epoch": 0.36589467527007513, + "grad_norm": 5.772447503822162, + "learning_rate": 7.322051904946134e-06, + "loss": 17.204, + "step": 20017 + }, + { + "epoch": 0.36591295446652167, + "grad_norm": 5.598459571819927, + "learning_rate": 7.321789747103012e-06, + "loss": 17.2012, + "step": 20018 + }, + { + "epoch": 0.3659312336629682, + "grad_norm": 6.772302513544603, + "learning_rate": 7.321527581122218e-06, + "loss": 17.5079, + "step": 20019 + }, + { + "epoch": 0.3659495128594147, + "grad_norm": 8.395492804046095, + "learning_rate": 7.3212654070046715e-06, + "loss": 18.0994, + "step": 20020 + }, + { + "epoch": 0.3659677920558612, + "grad_norm": 5.442244433328405, + "learning_rate": 7.321003224751292e-06, + "loss": 16.9976, + "step": 20021 + }, + { + "epoch": 0.36598607125230775, + "grad_norm": 4.894323867458015, + "learning_rate": 7.320741034362997e-06, + "loss": 16.9411, + "step": 20022 + }, + { + "epoch": 0.3660043504487543, + "grad_norm": 6.376324414241229, + "learning_rate": 7.3204788358407065e-06, + "loss": 17.4495, + "step": 20023 + }, + { + "epoch": 0.36602262964520077, + "grad_norm": 7.014361650828988, + "learning_rate": 7.320216629185339e-06, + "loss": 17.6281, + "step": 20024 + }, + { + "epoch": 0.3660409088416473, + "grad_norm": 6.3606585805402585, + "learning_rate": 7.319954414397813e-06, + "loss": 17.4682, + "step": 20025 + }, + { + "epoch": 0.36605918803809384, + "grad_norm": 6.883518029555075, + "learning_rate": 7.3196921914790485e-06, + "loss": 17.637, + "step": 20026 + }, + { + "epoch": 0.3660774672345404, + "grad_norm": 7.022462850259167, + "learning_rate": 7.319429960429965e-06, + "loss": 17.7141, + "step": 20027 + }, + { + "epoch": 0.3660957464309869, + "grad_norm": 6.239354392444819, + "learning_rate": 7.319167721251479e-06, + "loss": 17.3864, + "step": 20028 + }, + { + "epoch": 0.3661140256274334, + "grad_norm": 4.969791983872103, + "learning_rate": 7.318905473944512e-06, + "loss": 16.8889, + "step": 20029 + }, + { + "epoch": 0.36613230482387993, + "grad_norm": 6.228127877753289, + "learning_rate": 7.318643218509982e-06, + "loss": 17.5305, + "step": 20030 + }, + { + "epoch": 0.36615058402032646, + "grad_norm": 10.772890395687297, + "learning_rate": 7.31838095494881e-06, + "loss": 18.2895, + "step": 20031 + }, + { + "epoch": 0.366168863216773, + "grad_norm": 7.2522352411852555, + "learning_rate": 7.318118683261915e-06, + "loss": 17.7977, + "step": 20032 + }, + { + "epoch": 0.36618714241321954, + "grad_norm": 7.486696173260041, + "learning_rate": 7.317856403450212e-06, + "loss": 18.1596, + "step": 20033 + }, + { + "epoch": 0.366205421609666, + "grad_norm": 6.07141209553588, + "learning_rate": 7.317594115514626e-06, + "loss": 17.4972, + "step": 20034 + }, + { + "epoch": 0.36622370080611255, + "grad_norm": 6.372265499279799, + "learning_rate": 7.317331819456073e-06, + "loss": 17.5198, + "step": 20035 + }, + { + "epoch": 0.3662419800025591, + "grad_norm": 8.42351503298585, + "learning_rate": 7.317069515275472e-06, + "loss": 18.2196, + "step": 20036 + }, + { + "epoch": 0.3662602591990056, + "grad_norm": 7.328440958729051, + "learning_rate": 7.316807202973744e-06, + "loss": 18.0784, + "step": 20037 + }, + { + "epoch": 0.36627853839545216, + "grad_norm": 5.967004691863931, + "learning_rate": 7.316544882551808e-06, + "loss": 17.1939, + "step": 20038 + }, + { + "epoch": 0.36629681759189864, + "grad_norm": 6.792899665896095, + "learning_rate": 7.3162825540105834e-06, + "loss": 17.5523, + "step": 20039 + }, + { + "epoch": 0.3663150967883452, + "grad_norm": 6.016111889403537, + "learning_rate": 7.31602021735099e-06, + "loss": 17.2861, + "step": 20040 + }, + { + "epoch": 0.3663333759847917, + "grad_norm": 5.728117369895846, + "learning_rate": 7.315757872573946e-06, + "loss": 17.3807, + "step": 20041 + }, + { + "epoch": 0.36635165518123824, + "grad_norm": 9.82960673985697, + "learning_rate": 7.315495519680371e-06, + "loss": 19.2907, + "step": 20042 + }, + { + "epoch": 0.3663699343776848, + "grad_norm": 6.578645153903663, + "learning_rate": 7.315233158671186e-06, + "loss": 17.7264, + "step": 20043 + }, + { + "epoch": 0.36638821357413126, + "grad_norm": 7.27288412308479, + "learning_rate": 7.3149707895473096e-06, + "loss": 17.8259, + "step": 20044 + }, + { + "epoch": 0.3664064927705778, + "grad_norm": 7.088161976089492, + "learning_rate": 7.314708412309661e-06, + "loss": 17.7448, + "step": 20045 + }, + { + "epoch": 0.36642477196702433, + "grad_norm": 9.611423527380586, + "learning_rate": 7.314446026959161e-06, + "loss": 17.7218, + "step": 20046 + }, + { + "epoch": 0.36644305116347087, + "grad_norm": 7.10924466864736, + "learning_rate": 7.314183633496728e-06, + "loss": 17.8319, + "step": 20047 + }, + { + "epoch": 0.3664613303599174, + "grad_norm": 6.762878693982524, + "learning_rate": 7.313921231923282e-06, + "loss": 17.8862, + "step": 20048 + }, + { + "epoch": 0.3664796095563639, + "grad_norm": 7.040128425206224, + "learning_rate": 7.313658822239743e-06, + "loss": 17.5381, + "step": 20049 + }, + { + "epoch": 0.3664978887528104, + "grad_norm": 6.2413025502073864, + "learning_rate": 7.31339640444703e-06, + "loss": 17.2786, + "step": 20050 + }, + { + "epoch": 0.36651616794925695, + "grad_norm": 5.821097701028158, + "learning_rate": 7.313133978546065e-06, + "loss": 17.2855, + "step": 20051 + }, + { + "epoch": 0.3665344471457035, + "grad_norm": 6.420080152777128, + "learning_rate": 7.312871544537765e-06, + "loss": 17.5517, + "step": 20052 + }, + { + "epoch": 0.36655272634215, + "grad_norm": 6.687833941744186, + "learning_rate": 7.312609102423052e-06, + "loss": 17.0203, + "step": 20053 + }, + { + "epoch": 0.3665710055385965, + "grad_norm": 7.4913413588200575, + "learning_rate": 7.312346652202843e-06, + "loss": 18.13, + "step": 20054 + }, + { + "epoch": 0.36658928473504304, + "grad_norm": 6.378464109102114, + "learning_rate": 7.312084193878061e-06, + "loss": 17.5577, + "step": 20055 + }, + { + "epoch": 0.3666075639314896, + "grad_norm": 6.46936764170632, + "learning_rate": 7.311821727449624e-06, + "loss": 17.4391, + "step": 20056 + }, + { + "epoch": 0.3666258431279361, + "grad_norm": 6.938693382391579, + "learning_rate": 7.311559252918452e-06, + "loss": 17.7051, + "step": 20057 + }, + { + "epoch": 0.3666441223243826, + "grad_norm": 6.756617300125149, + "learning_rate": 7.311296770285467e-06, + "loss": 17.748, + "step": 20058 + }, + { + "epoch": 0.36666240152082913, + "grad_norm": 7.063743985939497, + "learning_rate": 7.3110342795515865e-06, + "loss": 17.5138, + "step": 20059 + }, + { + "epoch": 0.36668068071727566, + "grad_norm": 6.830333458443167, + "learning_rate": 7.310771780717729e-06, + "loss": 17.8553, + "step": 20060 + }, + { + "epoch": 0.3666989599137222, + "grad_norm": 7.060236435351416, + "learning_rate": 7.310509273784821e-06, + "loss": 17.798, + "step": 20061 + }, + { + "epoch": 0.36671723911016874, + "grad_norm": 6.933666121958821, + "learning_rate": 7.310246758753776e-06, + "loss": 17.826, + "step": 20062 + }, + { + "epoch": 0.3667355183066152, + "grad_norm": 8.6564575951896, + "learning_rate": 7.309984235625518e-06, + "loss": 18.0475, + "step": 20063 + }, + { + "epoch": 0.36675379750306175, + "grad_norm": 6.021194253401691, + "learning_rate": 7.309721704400965e-06, + "loss": 17.2236, + "step": 20064 + }, + { + "epoch": 0.3667720766995083, + "grad_norm": 8.03573630844235, + "learning_rate": 7.309459165081037e-06, + "loss": 18.3012, + "step": 20065 + }, + { + "epoch": 0.3667903558959548, + "grad_norm": 6.409128167831567, + "learning_rate": 7.309196617666657e-06, + "loss": 17.5102, + "step": 20066 + }, + { + "epoch": 0.36680863509240136, + "grad_norm": 5.691077165759169, + "learning_rate": 7.308934062158741e-06, + "loss": 17.4877, + "step": 20067 + }, + { + "epoch": 0.36682691428884784, + "grad_norm": 7.946724345294653, + "learning_rate": 7.308671498558213e-06, + "loss": 18.0304, + "step": 20068 + }, + { + "epoch": 0.3668451934852944, + "grad_norm": 6.276899429007991, + "learning_rate": 7.308408926865991e-06, + "loss": 17.5556, + "step": 20069 + }, + { + "epoch": 0.3668634726817409, + "grad_norm": 8.054993608283159, + "learning_rate": 7.308146347082996e-06, + "loss": 18.115, + "step": 20070 + }, + { + "epoch": 0.36688175187818745, + "grad_norm": 6.73741360233731, + "learning_rate": 7.307883759210148e-06, + "loss": 17.7996, + "step": 20071 + }, + { + "epoch": 0.366900031074634, + "grad_norm": 6.637461375419106, + "learning_rate": 7.3076211632483695e-06, + "loss": 17.5056, + "step": 20072 + }, + { + "epoch": 0.36691831027108046, + "grad_norm": 6.408352045887416, + "learning_rate": 7.307358559198578e-06, + "loss": 17.5988, + "step": 20073 + }, + { + "epoch": 0.366936589467527, + "grad_norm": 7.497068300084706, + "learning_rate": 7.307095947061694e-06, + "loss": 17.4475, + "step": 20074 + }, + { + "epoch": 0.36695486866397353, + "grad_norm": 7.083633864362394, + "learning_rate": 7.306833326838641e-06, + "loss": 17.7188, + "step": 20075 + }, + { + "epoch": 0.36697314786042007, + "grad_norm": 5.704502153833018, + "learning_rate": 7.306570698530336e-06, + "loss": 17.2738, + "step": 20076 + }, + { + "epoch": 0.3669914270568666, + "grad_norm": 6.596272202193835, + "learning_rate": 7.306308062137702e-06, + "loss": 17.5896, + "step": 20077 + }, + { + "epoch": 0.3670097062533131, + "grad_norm": 5.400517468672541, + "learning_rate": 7.3060454176616555e-06, + "loss": 17.1224, + "step": 20078 + }, + { + "epoch": 0.3670279854497596, + "grad_norm": 7.2614347856671815, + "learning_rate": 7.3057827651031225e-06, + "loss": 17.918, + "step": 20079 + }, + { + "epoch": 0.36704626464620616, + "grad_norm": 5.905622146665631, + "learning_rate": 7.305520104463019e-06, + "loss": 17.1705, + "step": 20080 + }, + { + "epoch": 0.3670645438426527, + "grad_norm": 5.873681135369336, + "learning_rate": 7.30525743574227e-06, + "loss": 17.1603, + "step": 20081 + }, + { + "epoch": 0.3670828230390992, + "grad_norm": 5.945053141843443, + "learning_rate": 7.304994758941792e-06, + "loss": 17.3761, + "step": 20082 + }, + { + "epoch": 0.3671011022355457, + "grad_norm": 8.528893648065019, + "learning_rate": 7.304732074062508e-06, + "loss": 18.5653, + "step": 20083 + }, + { + "epoch": 0.36711938143199224, + "grad_norm": 6.767456007725406, + "learning_rate": 7.3044693811053395e-06, + "loss": 17.8274, + "step": 20084 + }, + { + "epoch": 0.3671376606284388, + "grad_norm": 7.488586284702607, + "learning_rate": 7.304206680071204e-06, + "loss": 17.8009, + "step": 20085 + }, + { + "epoch": 0.3671559398248853, + "grad_norm": 6.5844851393308925, + "learning_rate": 7.303943970961025e-06, + "loss": 17.4793, + "step": 20086 + }, + { + "epoch": 0.36717421902133185, + "grad_norm": 5.177449408518992, + "learning_rate": 7.303681253775721e-06, + "loss": 17.1093, + "step": 20087 + }, + { + "epoch": 0.36719249821777833, + "grad_norm": 7.252141770205381, + "learning_rate": 7.303418528516214e-06, + "loss": 17.6935, + "step": 20088 + }, + { + "epoch": 0.36721077741422486, + "grad_norm": 5.407830787162609, + "learning_rate": 7.303155795183427e-06, + "loss": 17.032, + "step": 20089 + }, + { + "epoch": 0.3672290566106714, + "grad_norm": 5.407756889535193, + "learning_rate": 7.302893053778277e-06, + "loss": 17.2391, + "step": 20090 + }, + { + "epoch": 0.36724733580711794, + "grad_norm": 5.398190716370125, + "learning_rate": 7.302630304301688e-06, + "loss": 17.0918, + "step": 20091 + }, + { + "epoch": 0.3672656150035644, + "grad_norm": 6.664543300439525, + "learning_rate": 7.302367546754577e-06, + "loss": 17.6589, + "step": 20092 + }, + { + "epoch": 0.36728389420001095, + "grad_norm": 7.744316272679232, + "learning_rate": 7.302104781137871e-06, + "loss": 18.0601, + "step": 20093 + }, + { + "epoch": 0.3673021733964575, + "grad_norm": 7.400419416615188, + "learning_rate": 7.3018420074524856e-06, + "loss": 17.8153, + "step": 20094 + }, + { + "epoch": 0.367320452592904, + "grad_norm": 7.730658448004205, + "learning_rate": 7.301579225699344e-06, + "loss": 17.9133, + "step": 20095 + }, + { + "epoch": 0.36733873178935056, + "grad_norm": 7.870582895323564, + "learning_rate": 7.301316435879366e-06, + "loss": 18.2224, + "step": 20096 + }, + { + "epoch": 0.36735701098579704, + "grad_norm": 5.7632077535216215, + "learning_rate": 7.301053637993476e-06, + "loss": 17.2209, + "step": 20097 + }, + { + "epoch": 0.3673752901822436, + "grad_norm": 6.723134070566391, + "learning_rate": 7.30079083204259e-06, + "loss": 17.5405, + "step": 20098 + }, + { + "epoch": 0.3673935693786901, + "grad_norm": 8.821324588463753, + "learning_rate": 7.300528018027634e-06, + "loss": 18.5008, + "step": 20099 + }, + { + "epoch": 0.36741184857513665, + "grad_norm": 6.1028228266027895, + "learning_rate": 7.300265195949526e-06, + "loss": 17.4973, + "step": 20100 + }, + { + "epoch": 0.3674301277715832, + "grad_norm": 7.136781649492445, + "learning_rate": 7.300002365809187e-06, + "loss": 17.7945, + "step": 20101 + }, + { + "epoch": 0.36744840696802966, + "grad_norm": 6.748100371495393, + "learning_rate": 7.299739527607541e-06, + "loss": 17.5044, + "step": 20102 + }, + { + "epoch": 0.3674666861644762, + "grad_norm": 7.662613441268769, + "learning_rate": 7.299476681345508e-06, + "loss": 18.1845, + "step": 20103 + }, + { + "epoch": 0.36748496536092273, + "grad_norm": 6.700378445390445, + "learning_rate": 7.299213827024007e-06, + "loss": 17.564, + "step": 20104 + }, + { + "epoch": 0.36750324455736927, + "grad_norm": 6.7273354272376, + "learning_rate": 7.298950964643961e-06, + "loss": 17.5001, + "step": 20105 + }, + { + "epoch": 0.3675215237538158, + "grad_norm": 8.701715193046008, + "learning_rate": 7.298688094206292e-06, + "loss": 18.1433, + "step": 20106 + }, + { + "epoch": 0.3675398029502623, + "grad_norm": 7.906115467473902, + "learning_rate": 7.298425215711922e-06, + "loss": 17.9331, + "step": 20107 + }, + { + "epoch": 0.3675580821467088, + "grad_norm": 5.709904934285042, + "learning_rate": 7.298162329161769e-06, + "loss": 17.3205, + "step": 20108 + }, + { + "epoch": 0.36757636134315536, + "grad_norm": 6.515815760470419, + "learning_rate": 7.297899434556757e-06, + "loss": 17.5524, + "step": 20109 + }, + { + "epoch": 0.3675946405396019, + "grad_norm": 5.267983022408444, + "learning_rate": 7.297636531897807e-06, + "loss": 16.9202, + "step": 20110 + }, + { + "epoch": 0.3676129197360484, + "grad_norm": 6.826460841783871, + "learning_rate": 7.2973736211858404e-06, + "loss": 17.4843, + "step": 20111 + }, + { + "epoch": 0.3676311989324949, + "grad_norm": 5.052098701405337, + "learning_rate": 7.297110702421779e-06, + "loss": 16.9792, + "step": 20112 + }, + { + "epoch": 0.36764947812894144, + "grad_norm": 6.565128374893936, + "learning_rate": 7.2968477756065446e-06, + "loss": 17.3362, + "step": 20113 + }, + { + "epoch": 0.367667757325388, + "grad_norm": 6.128296352871817, + "learning_rate": 7.296584840741055e-06, + "loss": 17.4375, + "step": 20114 + }, + { + "epoch": 0.3676860365218345, + "grad_norm": 6.125640252298581, + "learning_rate": 7.2963218978262375e-06, + "loss": 17.266, + "step": 20115 + }, + { + "epoch": 0.36770431571828105, + "grad_norm": 6.902902395075871, + "learning_rate": 7.296058946863011e-06, + "loss": 17.6244, + "step": 20116 + }, + { + "epoch": 0.36772259491472753, + "grad_norm": 7.64173330540598, + "learning_rate": 7.295795987852297e-06, + "loss": 18.2167, + "step": 20117 + }, + { + "epoch": 0.36774087411117407, + "grad_norm": 6.884629072605797, + "learning_rate": 7.295533020795017e-06, + "loss": 17.5537, + "step": 20118 + }, + { + "epoch": 0.3677591533076206, + "grad_norm": 6.1274511937644185, + "learning_rate": 7.295270045692091e-06, + "loss": 17.3439, + "step": 20119 + }, + { + "epoch": 0.36777743250406714, + "grad_norm": 6.622061460834097, + "learning_rate": 7.295007062544446e-06, + "loss": 17.4974, + "step": 20120 + }, + { + "epoch": 0.36779571170051367, + "grad_norm": 6.366456852787825, + "learning_rate": 7.294744071352999e-06, + "loss": 17.3657, + "step": 20121 + }, + { + "epoch": 0.36781399089696015, + "grad_norm": 6.999359129834391, + "learning_rate": 7.294481072118673e-06, + "loss": 17.7983, + "step": 20122 + }, + { + "epoch": 0.3678322700934067, + "grad_norm": 6.185835948923508, + "learning_rate": 7.2942180648423885e-06, + "loss": 17.4683, + "step": 20123 + }, + { + "epoch": 0.3678505492898532, + "grad_norm": 6.919377538042241, + "learning_rate": 7.293955049525071e-06, + "loss": 17.8182, + "step": 20124 + }, + { + "epoch": 0.36786882848629976, + "grad_norm": 8.598542348260848, + "learning_rate": 7.2936920261676395e-06, + "loss": 18.0788, + "step": 20125 + }, + { + "epoch": 0.36788710768274624, + "grad_norm": 6.762175903862375, + "learning_rate": 7.293428994771017e-06, + "loss": 17.6727, + "step": 20126 + }, + { + "epoch": 0.3679053868791928, + "grad_norm": 6.078292989964342, + "learning_rate": 7.293165955336125e-06, + "loss": 17.3059, + "step": 20127 + }, + { + "epoch": 0.3679236660756393, + "grad_norm": 6.885064048666509, + "learning_rate": 7.292902907863883e-06, + "loss": 17.7859, + "step": 20128 + }, + { + "epoch": 0.36794194527208585, + "grad_norm": 6.8245340079871175, + "learning_rate": 7.292639852355216e-06, + "loss": 17.6658, + "step": 20129 + }, + { + "epoch": 0.3679602244685324, + "grad_norm": 5.562108315960352, + "learning_rate": 7.292376788811047e-06, + "loss": 17.0449, + "step": 20130 + }, + { + "epoch": 0.36797850366497886, + "grad_norm": 7.151082116488049, + "learning_rate": 7.292113717232297e-06, + "loss": 17.8598, + "step": 20131 + }, + { + "epoch": 0.3679967828614254, + "grad_norm": 6.569337289212714, + "learning_rate": 7.291850637619884e-06, + "loss": 17.7569, + "step": 20132 + }, + { + "epoch": 0.36801506205787193, + "grad_norm": 6.592275684741269, + "learning_rate": 7.291587549974734e-06, + "loss": 17.6656, + "step": 20133 + }, + { + "epoch": 0.36803334125431847, + "grad_norm": 7.178208409049657, + "learning_rate": 7.291324454297771e-06, + "loss": 17.6371, + "step": 20134 + }, + { + "epoch": 0.368051620450765, + "grad_norm": 7.485265496700924, + "learning_rate": 7.291061350589913e-06, + "loss": 18.1561, + "step": 20135 + }, + { + "epoch": 0.3680698996472115, + "grad_norm": 7.948896447649479, + "learning_rate": 7.290798238852084e-06, + "loss": 17.9248, + "step": 20136 + }, + { + "epoch": 0.368088178843658, + "grad_norm": 7.168305311563071, + "learning_rate": 7.2905351190852055e-06, + "loss": 17.8386, + "step": 20137 + }, + { + "epoch": 0.36810645804010456, + "grad_norm": 5.664485484315749, + "learning_rate": 7.290271991290201e-06, + "loss": 17.13, + "step": 20138 + }, + { + "epoch": 0.3681247372365511, + "grad_norm": 6.137259040566241, + "learning_rate": 7.290008855467992e-06, + "loss": 17.4592, + "step": 20139 + }, + { + "epoch": 0.3681430164329976, + "grad_norm": 7.451550629648147, + "learning_rate": 7.289745711619499e-06, + "loss": 18.3272, + "step": 20140 + }, + { + "epoch": 0.3681612956294441, + "grad_norm": 5.842546717482064, + "learning_rate": 7.2894825597456475e-06, + "loss": 17.2294, + "step": 20141 + }, + { + "epoch": 0.36817957482589064, + "grad_norm": 7.217392933405763, + "learning_rate": 7.289219399847358e-06, + "loss": 18.2428, + "step": 20142 + }, + { + "epoch": 0.3681978540223372, + "grad_norm": 7.084181205845155, + "learning_rate": 7.288956231925552e-06, + "loss": 17.5697, + "step": 20143 + }, + { + "epoch": 0.3682161332187837, + "grad_norm": 5.796022375836719, + "learning_rate": 7.288693055981156e-06, + "loss": 17.2733, + "step": 20144 + }, + { + "epoch": 0.36823441241523025, + "grad_norm": 6.625334101804693, + "learning_rate": 7.288429872015087e-06, + "loss": 17.5939, + "step": 20145 + }, + { + "epoch": 0.36825269161167673, + "grad_norm": 6.874146281867213, + "learning_rate": 7.28816668002827e-06, + "loss": 17.5714, + "step": 20146 + }, + { + "epoch": 0.36827097080812327, + "grad_norm": 6.428597716907134, + "learning_rate": 7.287903480021627e-06, + "loss": 17.5581, + "step": 20147 + }, + { + "epoch": 0.3682892500045698, + "grad_norm": 5.285284881012578, + "learning_rate": 7.287640271996082e-06, + "loss": 16.989, + "step": 20148 + }, + { + "epoch": 0.36830752920101634, + "grad_norm": 5.255396001001805, + "learning_rate": 7.287377055952557e-06, + "loss": 16.8976, + "step": 20149 + }, + { + "epoch": 0.3683258083974629, + "grad_norm": 6.133955867592134, + "learning_rate": 7.287113831891972e-06, + "loss": 17.2857, + "step": 20150 + }, + { + "epoch": 0.36834408759390935, + "grad_norm": 5.460204999978745, + "learning_rate": 7.286850599815253e-06, + "loss": 17.1928, + "step": 20151 + }, + { + "epoch": 0.3683623667903559, + "grad_norm": 9.398947934144394, + "learning_rate": 7.286587359723321e-06, + "loss": 17.856, + "step": 20152 + }, + { + "epoch": 0.3683806459868024, + "grad_norm": 6.430024112866958, + "learning_rate": 7.286324111617098e-06, + "loss": 17.398, + "step": 20153 + }, + { + "epoch": 0.36839892518324896, + "grad_norm": 6.933947022277681, + "learning_rate": 7.286060855497508e-06, + "loss": 17.4871, + "step": 20154 + }, + { + "epoch": 0.3684172043796955, + "grad_norm": 6.49910817450385, + "learning_rate": 7.285797591365471e-06, + "loss": 17.3752, + "step": 20155 + }, + { + "epoch": 0.368435483576142, + "grad_norm": 6.501522383444161, + "learning_rate": 7.285534319221914e-06, + "loss": 17.5474, + "step": 20156 + }, + { + "epoch": 0.3684537627725885, + "grad_norm": 6.8706472101460205, + "learning_rate": 7.285271039067758e-06, + "loss": 17.6611, + "step": 20157 + }, + { + "epoch": 0.36847204196903505, + "grad_norm": 11.028480464771858, + "learning_rate": 7.285007750903924e-06, + "loss": 18.1285, + "step": 20158 + }, + { + "epoch": 0.3684903211654816, + "grad_norm": 6.970470342012089, + "learning_rate": 7.284744454731336e-06, + "loss": 17.9104, + "step": 20159 + }, + { + "epoch": 0.36850860036192806, + "grad_norm": 6.760825199052605, + "learning_rate": 7.284481150550917e-06, + "loss": 17.8917, + "step": 20160 + }, + { + "epoch": 0.3685268795583746, + "grad_norm": 6.481066904222357, + "learning_rate": 7.28421783836359e-06, + "loss": 17.732, + "step": 20161 + }, + { + "epoch": 0.36854515875482113, + "grad_norm": 5.683108799358094, + "learning_rate": 7.283954518170279e-06, + "loss": 17.3968, + "step": 20162 + }, + { + "epoch": 0.36856343795126767, + "grad_norm": 5.7027389137600535, + "learning_rate": 7.283691189971905e-06, + "loss": 17.5697, + "step": 20163 + }, + { + "epoch": 0.3685817171477142, + "grad_norm": 7.586150772960724, + "learning_rate": 7.28342785376939e-06, + "loss": 17.9641, + "step": 20164 + }, + { + "epoch": 0.3685999963441607, + "grad_norm": 7.0170276217612555, + "learning_rate": 7.28316450956366e-06, + "loss": 17.6283, + "step": 20165 + }, + { + "epoch": 0.3686182755406072, + "grad_norm": 8.547247398630253, + "learning_rate": 7.282901157355635e-06, + "loss": 17.8078, + "step": 20166 + }, + { + "epoch": 0.36863655473705376, + "grad_norm": 6.7076971943592865, + "learning_rate": 7.282637797146241e-06, + "loss": 17.8502, + "step": 20167 + }, + { + "epoch": 0.3686548339335003, + "grad_norm": 7.7097343986682505, + "learning_rate": 7.2823744289364e-06, + "loss": 17.678, + "step": 20168 + }, + { + "epoch": 0.36867311312994683, + "grad_norm": 6.749417805144167, + "learning_rate": 7.282111052727033e-06, + "loss": 17.5471, + "step": 20169 + }, + { + "epoch": 0.3686913923263933, + "grad_norm": 5.925547427443137, + "learning_rate": 7.281847668519066e-06, + "loss": 17.0655, + "step": 20170 + }, + { + "epoch": 0.36870967152283984, + "grad_norm": 5.014707464621083, + "learning_rate": 7.28158427631342e-06, + "loss": 16.8923, + "step": 20171 + }, + { + "epoch": 0.3687279507192864, + "grad_norm": 8.34346511944614, + "learning_rate": 7.281320876111021e-06, + "loss": 18.1639, + "step": 20172 + }, + { + "epoch": 0.3687462299157329, + "grad_norm": 6.665248434135376, + "learning_rate": 7.2810574679127886e-06, + "loss": 17.5314, + "step": 20173 + }, + { + "epoch": 0.36876450911217945, + "grad_norm": 6.58588464299712, + "learning_rate": 7.280794051719647e-06, + "loss": 17.7183, + "step": 20174 + }, + { + "epoch": 0.36878278830862593, + "grad_norm": 6.760345052735897, + "learning_rate": 7.280530627532521e-06, + "loss": 17.6791, + "step": 20175 + }, + { + "epoch": 0.36880106750507247, + "grad_norm": 6.586517927225698, + "learning_rate": 7.280267195352335e-06, + "loss": 17.5181, + "step": 20176 + }, + { + "epoch": 0.368819346701519, + "grad_norm": 4.754023736789508, + "learning_rate": 7.280003755180009e-06, + "loss": 16.9561, + "step": 20177 + }, + { + "epoch": 0.36883762589796554, + "grad_norm": 8.604932407798067, + "learning_rate": 7.279740307016468e-06, + "loss": 18.5146, + "step": 20178 + }, + { + "epoch": 0.3688559050944121, + "grad_norm": 5.268657647543221, + "learning_rate": 7.279476850862634e-06, + "loss": 16.9788, + "step": 20179 + }, + { + "epoch": 0.36887418429085855, + "grad_norm": 7.057785945547185, + "learning_rate": 7.2792133867194314e-06, + "loss": 17.814, + "step": 20180 + }, + { + "epoch": 0.3688924634873051, + "grad_norm": 7.236374121449109, + "learning_rate": 7.278949914587784e-06, + "loss": 17.9099, + "step": 20181 + }, + { + "epoch": 0.3689107426837516, + "grad_norm": 6.654765517746662, + "learning_rate": 7.278686434468615e-06, + "loss": 17.8197, + "step": 20182 + }, + { + "epoch": 0.36892902188019816, + "grad_norm": 6.710378083383677, + "learning_rate": 7.278422946362847e-06, + "loss": 17.7071, + "step": 20183 + }, + { + "epoch": 0.3689473010766447, + "grad_norm": 7.2149529704784925, + "learning_rate": 7.2781594502714056e-06, + "loss": 17.7761, + "step": 20184 + }, + { + "epoch": 0.3689655802730912, + "grad_norm": 6.455532537998574, + "learning_rate": 7.277895946195213e-06, + "loss": 17.4167, + "step": 20185 + }, + { + "epoch": 0.3689838594695377, + "grad_norm": 5.737671359493939, + "learning_rate": 7.2776324341351925e-06, + "loss": 17.2511, + "step": 20186 + }, + { + "epoch": 0.36900213866598425, + "grad_norm": 7.3158062405044495, + "learning_rate": 7.277368914092266e-06, + "loss": 17.8465, + "step": 20187 + }, + { + "epoch": 0.3690204178624308, + "grad_norm": 6.451035583476881, + "learning_rate": 7.277105386067361e-06, + "loss": 17.4541, + "step": 20188 + }, + { + "epoch": 0.3690386970588773, + "grad_norm": 6.621442986884995, + "learning_rate": 7.2768418500614e-06, + "loss": 17.8096, + "step": 20189 + }, + { + "epoch": 0.3690569762553238, + "grad_norm": 6.519777440519619, + "learning_rate": 7.276578306075306e-06, + "loss": 17.4998, + "step": 20190 + }, + { + "epoch": 0.36907525545177033, + "grad_norm": 6.84816196470274, + "learning_rate": 7.276314754110001e-06, + "loss": 17.5994, + "step": 20191 + }, + { + "epoch": 0.36909353464821687, + "grad_norm": 6.731177467542887, + "learning_rate": 7.276051194166409e-06, + "loss": 17.458, + "step": 20192 + }, + { + "epoch": 0.3691118138446634, + "grad_norm": 9.501986421561949, + "learning_rate": 7.275787626245459e-06, + "loss": 18.2521, + "step": 20193 + }, + { + "epoch": 0.3691300930411099, + "grad_norm": 6.501485311118727, + "learning_rate": 7.2755240503480685e-06, + "loss": 17.5143, + "step": 20194 + }, + { + "epoch": 0.3691483722375564, + "grad_norm": 8.309874503389043, + "learning_rate": 7.2752604664751634e-06, + "loss": 18.1989, + "step": 20195 + }, + { + "epoch": 0.36916665143400296, + "grad_norm": 7.4206115730466164, + "learning_rate": 7.274996874627669e-06, + "loss": 17.9945, + "step": 20196 + }, + { + "epoch": 0.3691849306304495, + "grad_norm": 5.68336568933742, + "learning_rate": 7.274733274806507e-06, + "loss": 17.2302, + "step": 20197 + }, + { + "epoch": 0.36920320982689603, + "grad_norm": 5.460141761208392, + "learning_rate": 7.274469667012603e-06, + "loss": 17.1825, + "step": 20198 + }, + { + "epoch": 0.3692214890233425, + "grad_norm": 6.54903313345841, + "learning_rate": 7.274206051246879e-06, + "loss": 17.2857, + "step": 20199 + }, + { + "epoch": 0.36923976821978904, + "grad_norm": 5.60409017180462, + "learning_rate": 7.273942427510262e-06, + "loss": 17.296, + "step": 20200 + }, + { + "epoch": 0.3692580474162356, + "grad_norm": 5.352966850503021, + "learning_rate": 7.273678795803671e-06, + "loss": 16.9111, + "step": 20201 + }, + { + "epoch": 0.3692763266126821, + "grad_norm": 7.657158870126983, + "learning_rate": 7.273415156128037e-06, + "loss": 18.1799, + "step": 20202 + }, + { + "epoch": 0.36929460580912865, + "grad_norm": 6.281812384549631, + "learning_rate": 7.273151508484278e-06, + "loss": 17.1461, + "step": 20203 + }, + { + "epoch": 0.36931288500557513, + "grad_norm": 5.657290687537265, + "learning_rate": 7.2728878528733205e-06, + "loss": 17.268, + "step": 20204 + }, + { + "epoch": 0.36933116420202167, + "grad_norm": 7.615048088581312, + "learning_rate": 7.272624189296088e-06, + "loss": 17.7176, + "step": 20205 + }, + { + "epoch": 0.3693494433984682, + "grad_norm": 8.85994191056542, + "learning_rate": 7.272360517753505e-06, + "loss": 18.8301, + "step": 20206 + }, + { + "epoch": 0.36936772259491474, + "grad_norm": 6.823672506279992, + "learning_rate": 7.272096838246496e-06, + "loss": 17.9613, + "step": 20207 + }, + { + "epoch": 0.3693860017913613, + "grad_norm": 6.95551203175244, + "learning_rate": 7.271833150775984e-06, + "loss": 17.7593, + "step": 20208 + }, + { + "epoch": 0.36940428098780775, + "grad_norm": 5.5484462019423795, + "learning_rate": 7.271569455342895e-06, + "loss": 17.0792, + "step": 20209 + }, + { + "epoch": 0.3694225601842543, + "grad_norm": 7.258949293892397, + "learning_rate": 7.271305751948152e-06, + "loss": 17.8223, + "step": 20210 + }, + { + "epoch": 0.3694408393807008, + "grad_norm": 7.2421793358182995, + "learning_rate": 7.2710420405926795e-06, + "loss": 18.0179, + "step": 20211 + }, + { + "epoch": 0.36945911857714736, + "grad_norm": 5.539469075281623, + "learning_rate": 7.270778321277401e-06, + "loss": 17.1364, + "step": 20212 + }, + { + "epoch": 0.3694773977735939, + "grad_norm": 8.741657716263415, + "learning_rate": 7.270514594003243e-06, + "loss": 18.4418, + "step": 20213 + }, + { + "epoch": 0.3694956769700404, + "grad_norm": 6.51638394524961, + "learning_rate": 7.270250858771126e-06, + "loss": 17.5268, + "step": 20214 + }, + { + "epoch": 0.3695139561664869, + "grad_norm": 5.658198226211729, + "learning_rate": 7.2699871155819775e-06, + "loss": 17.0006, + "step": 20215 + }, + { + "epoch": 0.36953223536293345, + "grad_norm": 5.933392965481522, + "learning_rate": 7.269723364436721e-06, + "loss": 17.113, + "step": 20216 + }, + { + "epoch": 0.36955051455938, + "grad_norm": 7.0326731791723684, + "learning_rate": 7.2694596053362834e-06, + "loss": 17.9364, + "step": 20217 + }, + { + "epoch": 0.3695687937558265, + "grad_norm": 7.2282587953716515, + "learning_rate": 7.269195838281585e-06, + "loss": 17.5891, + "step": 20218 + }, + { + "epoch": 0.369587072952273, + "grad_norm": 6.463010505382674, + "learning_rate": 7.268932063273552e-06, + "loss": 17.4782, + "step": 20219 + }, + { + "epoch": 0.36960535214871953, + "grad_norm": 7.062369766509262, + "learning_rate": 7.26866828031311e-06, + "loss": 17.5808, + "step": 20220 + }, + { + "epoch": 0.36962363134516607, + "grad_norm": 7.207672875590722, + "learning_rate": 7.2684044894011805e-06, + "loss": 17.8386, + "step": 20221 + }, + { + "epoch": 0.3696419105416126, + "grad_norm": 4.822325848945059, + "learning_rate": 7.268140690538692e-06, + "loss": 16.8335, + "step": 20222 + }, + { + "epoch": 0.36966018973805914, + "grad_norm": 5.3225809086744755, + "learning_rate": 7.267876883726567e-06, + "loss": 17.1389, + "step": 20223 + }, + { + "epoch": 0.3696784689345056, + "grad_norm": 5.3704522860734105, + "learning_rate": 7.267613068965729e-06, + "loss": 17.0194, + "step": 20224 + }, + { + "epoch": 0.36969674813095216, + "grad_norm": 6.869480950513082, + "learning_rate": 7.267349246257105e-06, + "loss": 18.2319, + "step": 20225 + }, + { + "epoch": 0.3697150273273987, + "grad_norm": 6.397992614038071, + "learning_rate": 7.267085415601618e-06, + "loss": 17.5737, + "step": 20226 + }, + { + "epoch": 0.36973330652384523, + "grad_norm": 6.558547566995884, + "learning_rate": 7.266821577000195e-06, + "loss": 17.7717, + "step": 20227 + }, + { + "epoch": 0.3697515857202917, + "grad_norm": 6.4273194172281585, + "learning_rate": 7.266557730453757e-06, + "loss": 17.454, + "step": 20228 + }, + { + "epoch": 0.36976986491673824, + "grad_norm": 7.414612660435543, + "learning_rate": 7.266293875963232e-06, + "loss": 17.8947, + "step": 20229 + }, + { + "epoch": 0.3697881441131848, + "grad_norm": 6.833536317395566, + "learning_rate": 7.266030013529544e-06, + "loss": 17.7028, + "step": 20230 + }, + { + "epoch": 0.3698064233096313, + "grad_norm": 6.376908683057129, + "learning_rate": 7.265766143153617e-06, + "loss": 17.7087, + "step": 20231 + }, + { + "epoch": 0.36982470250607785, + "grad_norm": 6.2602699004023385, + "learning_rate": 7.265502264836376e-06, + "loss": 17.3338, + "step": 20232 + }, + { + "epoch": 0.36984298170252433, + "grad_norm": 5.588023044502968, + "learning_rate": 7.265238378578745e-06, + "loss": 17.2379, + "step": 20233 + }, + { + "epoch": 0.36986126089897087, + "grad_norm": 4.989465197818141, + "learning_rate": 7.264974484381653e-06, + "loss": 16.9015, + "step": 20234 + }, + { + "epoch": 0.3698795400954174, + "grad_norm": 6.612200306580514, + "learning_rate": 7.26471058224602e-06, + "loss": 17.569, + "step": 20235 + }, + { + "epoch": 0.36989781929186394, + "grad_norm": 7.340673453322384, + "learning_rate": 7.264446672172772e-06, + "loss": 17.7228, + "step": 20236 + }, + { + "epoch": 0.3699160984883105, + "grad_norm": 5.468142572189995, + "learning_rate": 7.264182754162836e-06, + "loss": 17.0734, + "step": 20237 + }, + { + "epoch": 0.36993437768475695, + "grad_norm": 7.07678047529603, + "learning_rate": 7.263918828217137e-06, + "loss": 17.6607, + "step": 20238 + }, + { + "epoch": 0.3699526568812035, + "grad_norm": 7.675859352430443, + "learning_rate": 7.263654894336598e-06, + "loss": 18.2184, + "step": 20239 + }, + { + "epoch": 0.36997093607765, + "grad_norm": 5.954324255706961, + "learning_rate": 7.263390952522145e-06, + "loss": 17.0799, + "step": 20240 + }, + { + "epoch": 0.36998921527409656, + "grad_norm": 6.029026241074232, + "learning_rate": 7.263127002774703e-06, + "loss": 17.3228, + "step": 20241 + }, + { + "epoch": 0.3700074944705431, + "grad_norm": 5.535211916569911, + "learning_rate": 7.262863045095197e-06, + "loss": 17.2121, + "step": 20242 + }, + { + "epoch": 0.3700257736669896, + "grad_norm": 7.536357040008187, + "learning_rate": 7.262599079484554e-06, + "loss": 18.1031, + "step": 20243 + }, + { + "epoch": 0.3700440528634361, + "grad_norm": 6.40201864864051, + "learning_rate": 7.262335105943696e-06, + "loss": 17.8633, + "step": 20244 + }, + { + "epoch": 0.37006233205988265, + "grad_norm": 7.2861604702502065, + "learning_rate": 7.262071124473551e-06, + "loss": 17.5257, + "step": 20245 + }, + { + "epoch": 0.3700806112563292, + "grad_norm": 6.315008338704994, + "learning_rate": 7.261807135075041e-06, + "loss": 17.5906, + "step": 20246 + }, + { + "epoch": 0.3700988904527757, + "grad_norm": 8.925224656046367, + "learning_rate": 7.261543137749094e-06, + "loss": 19.0537, + "step": 20247 + }, + { + "epoch": 0.3701171696492222, + "grad_norm": 5.802604624840978, + "learning_rate": 7.261279132496636e-06, + "loss": 17.1504, + "step": 20248 + }, + { + "epoch": 0.37013544884566874, + "grad_norm": 8.186105514082048, + "learning_rate": 7.261015119318589e-06, + "loss": 18.017, + "step": 20249 + }, + { + "epoch": 0.37015372804211527, + "grad_norm": 6.326322714453996, + "learning_rate": 7.260751098215881e-06, + "loss": 17.5734, + "step": 20250 + }, + { + "epoch": 0.3701720072385618, + "grad_norm": 7.162270440843001, + "learning_rate": 7.260487069189437e-06, + "loss": 17.9879, + "step": 20251 + }, + { + "epoch": 0.37019028643500834, + "grad_norm": 6.534782206402165, + "learning_rate": 7.260223032240181e-06, + "loss": 17.5544, + "step": 20252 + }, + { + "epoch": 0.3702085656314548, + "grad_norm": 6.059176468142338, + "learning_rate": 7.25995898736904e-06, + "loss": 17.3778, + "step": 20253 + }, + { + "epoch": 0.37022684482790136, + "grad_norm": 5.622668196541799, + "learning_rate": 7.259694934576939e-06, + "loss": 17.4539, + "step": 20254 + }, + { + "epoch": 0.3702451240243479, + "grad_norm": 7.751454172468496, + "learning_rate": 7.259430873864804e-06, + "loss": 17.8333, + "step": 20255 + }, + { + "epoch": 0.37026340322079443, + "grad_norm": 6.797918019734457, + "learning_rate": 7.259166805233559e-06, + "loss": 17.8342, + "step": 20256 + }, + { + "epoch": 0.37028168241724096, + "grad_norm": 7.335166529034762, + "learning_rate": 7.25890272868413e-06, + "loss": 17.7724, + "step": 20257 + }, + { + "epoch": 0.37029996161368745, + "grad_norm": 5.964816393323727, + "learning_rate": 7.258638644217444e-06, + "loss": 17.3451, + "step": 20258 + }, + { + "epoch": 0.370318240810134, + "grad_norm": 9.362367560733293, + "learning_rate": 7.258374551834425e-06, + "loss": 17.8653, + "step": 20259 + }, + { + "epoch": 0.3703365200065805, + "grad_norm": 6.005702624129726, + "learning_rate": 7.258110451535998e-06, + "loss": 17.2804, + "step": 20260 + }, + { + "epoch": 0.37035479920302705, + "grad_norm": 6.918011032469916, + "learning_rate": 7.257846343323091e-06, + "loss": 17.6388, + "step": 20261 + }, + { + "epoch": 0.37037307839947353, + "grad_norm": 6.683525037046206, + "learning_rate": 7.257582227196629e-06, + "loss": 17.9775, + "step": 20262 + }, + { + "epoch": 0.37039135759592007, + "grad_norm": 6.844561692522498, + "learning_rate": 7.257318103157537e-06, + "loss": 17.5909, + "step": 20263 + }, + { + "epoch": 0.3704096367923666, + "grad_norm": 6.407991545432797, + "learning_rate": 7.25705397120674e-06, + "loss": 17.442, + "step": 20264 + }, + { + "epoch": 0.37042791598881314, + "grad_norm": 7.4644311069466385, + "learning_rate": 7.256789831345166e-06, + "loss": 17.7972, + "step": 20265 + }, + { + "epoch": 0.3704461951852597, + "grad_norm": 6.029558736818221, + "learning_rate": 7.256525683573739e-06, + "loss": 17.319, + "step": 20266 + }, + { + "epoch": 0.37046447438170615, + "grad_norm": 6.868138842885967, + "learning_rate": 7.2562615278933845e-06, + "loss": 17.7122, + "step": 20267 + }, + { + "epoch": 0.3704827535781527, + "grad_norm": 6.01779397169704, + "learning_rate": 7.255997364305028e-06, + "loss": 17.2451, + "step": 20268 + }, + { + "epoch": 0.3705010327745992, + "grad_norm": 6.623586650363315, + "learning_rate": 7.255733192809598e-06, + "loss": 17.6405, + "step": 20269 + }, + { + "epoch": 0.37051931197104576, + "grad_norm": 5.949821133058551, + "learning_rate": 7.2554690134080195e-06, + "loss": 17.2411, + "step": 20270 + }, + { + "epoch": 0.3705375911674923, + "grad_norm": 5.943184825420087, + "learning_rate": 7.255204826101218e-06, + "loss": 17.2515, + "step": 20271 + }, + { + "epoch": 0.3705558703639388, + "grad_norm": 7.694644087135006, + "learning_rate": 7.254940630890119e-06, + "loss": 17.9962, + "step": 20272 + }, + { + "epoch": 0.3705741495603853, + "grad_norm": 7.380378328384874, + "learning_rate": 7.254676427775648e-06, + "loss": 17.4382, + "step": 20273 + }, + { + "epoch": 0.37059242875683185, + "grad_norm": 7.042658148811812, + "learning_rate": 7.254412216758731e-06, + "loss": 17.4113, + "step": 20274 + }, + { + "epoch": 0.3706107079532784, + "grad_norm": 7.525244056436113, + "learning_rate": 7.254147997840297e-06, + "loss": 18.1133, + "step": 20275 + }, + { + "epoch": 0.3706289871497249, + "grad_norm": 7.00185530585026, + "learning_rate": 7.25388377102127e-06, + "loss": 17.8023, + "step": 20276 + }, + { + "epoch": 0.3706472663461714, + "grad_norm": 6.562099782250633, + "learning_rate": 7.253619536302574e-06, + "loss": 17.2893, + "step": 20277 + }, + { + "epoch": 0.37066554554261794, + "grad_norm": 6.358206370988475, + "learning_rate": 7.253355293685137e-06, + "loss": 17.6039, + "step": 20278 + }, + { + "epoch": 0.37068382473906447, + "grad_norm": 6.083152868347161, + "learning_rate": 7.2530910431698876e-06, + "loss": 17.4665, + "step": 20279 + }, + { + "epoch": 0.370702103935511, + "grad_norm": 8.2756852119704, + "learning_rate": 7.252826784757747e-06, + "loss": 18.5358, + "step": 20280 + }, + { + "epoch": 0.37072038313195754, + "grad_norm": 6.258825911969038, + "learning_rate": 7.252562518449646e-06, + "loss": 17.2119, + "step": 20281 + }, + { + "epoch": 0.370738662328404, + "grad_norm": 7.742175015108623, + "learning_rate": 7.252298244246507e-06, + "loss": 17.6524, + "step": 20282 + }, + { + "epoch": 0.37075694152485056, + "grad_norm": 7.474971190019222, + "learning_rate": 7.252033962149259e-06, + "loss": 17.9957, + "step": 20283 + }, + { + "epoch": 0.3707752207212971, + "grad_norm": 5.816660347635074, + "learning_rate": 7.251769672158828e-06, + "loss": 17.118, + "step": 20284 + }, + { + "epoch": 0.37079349991774363, + "grad_norm": 5.944782867154556, + "learning_rate": 7.25150537427614e-06, + "loss": 17.4711, + "step": 20285 + }, + { + "epoch": 0.37081177911419017, + "grad_norm": 7.211325359244409, + "learning_rate": 7.251241068502121e-06, + "loss": 17.7895, + "step": 20286 + }, + { + "epoch": 0.37083005831063665, + "grad_norm": 7.37474166400793, + "learning_rate": 7.250976754837695e-06, + "loss": 17.4837, + "step": 20287 + }, + { + "epoch": 0.3708483375070832, + "grad_norm": 6.2830496208268, + "learning_rate": 7.250712433283793e-06, + "loss": 17.4363, + "step": 20288 + }, + { + "epoch": 0.3708666167035297, + "grad_norm": 8.058595380782732, + "learning_rate": 7.250448103841339e-06, + "loss": 18.1716, + "step": 20289 + }, + { + "epoch": 0.37088489589997625, + "grad_norm": 5.1949873569851635, + "learning_rate": 7.250183766511259e-06, + "loss": 17.067, + "step": 20290 + }, + { + "epoch": 0.3709031750964228, + "grad_norm": 7.460335858185023, + "learning_rate": 7.249919421294481e-06, + "loss": 17.7925, + "step": 20291 + }, + { + "epoch": 0.37092145429286927, + "grad_norm": 8.332337387334707, + "learning_rate": 7.2496550681919295e-06, + "loss": 18.2483, + "step": 20292 + }, + { + "epoch": 0.3709397334893158, + "grad_norm": 5.447324642704994, + "learning_rate": 7.249390707204533e-06, + "loss": 17.0831, + "step": 20293 + }, + { + "epoch": 0.37095801268576234, + "grad_norm": 6.048356023286317, + "learning_rate": 7.249126338333218e-06, + "loss": 17.4379, + "step": 20294 + }, + { + "epoch": 0.3709762918822089, + "grad_norm": 6.806845187645089, + "learning_rate": 7.2488619615789095e-06, + "loss": 17.7563, + "step": 20295 + }, + { + "epoch": 0.37099457107865536, + "grad_norm": 5.968497371252764, + "learning_rate": 7.248597576942534e-06, + "loss": 17.3476, + "step": 20296 + }, + { + "epoch": 0.3710128502751019, + "grad_norm": 7.537556683206155, + "learning_rate": 7.248333184425021e-06, + "loss": 17.8649, + "step": 20297 + }, + { + "epoch": 0.3710311294715484, + "grad_norm": 6.298372576893895, + "learning_rate": 7.2480687840272935e-06, + "loss": 17.2882, + "step": 20298 + }, + { + "epoch": 0.37104940866799496, + "grad_norm": 6.362553137479139, + "learning_rate": 7.247804375750281e-06, + "loss": 17.6443, + "step": 20299 + }, + { + "epoch": 0.3710676878644415, + "grad_norm": 4.55298761071668, + "learning_rate": 7.2475399595949105e-06, + "loss": 16.6209, + "step": 20300 + }, + { + "epoch": 0.371085967060888, + "grad_norm": 5.750610391656214, + "learning_rate": 7.2472755355621045e-06, + "loss": 17.1992, + "step": 20301 + }, + { + "epoch": 0.3711042462573345, + "grad_norm": 6.369967866465619, + "learning_rate": 7.247011103652794e-06, + "loss": 17.8721, + "step": 20302 + }, + { + "epoch": 0.37112252545378105, + "grad_norm": 7.08208857214891, + "learning_rate": 7.246746663867906e-06, + "loss": 17.9226, + "step": 20303 + }, + { + "epoch": 0.3711408046502276, + "grad_norm": 6.477355633600419, + "learning_rate": 7.246482216208365e-06, + "loss": 17.4098, + "step": 20304 + }, + { + "epoch": 0.3711590838466741, + "grad_norm": 6.497567825869114, + "learning_rate": 7.246217760675098e-06, + "loss": 17.623, + "step": 20305 + }, + { + "epoch": 0.3711773630431206, + "grad_norm": 6.477044126668947, + "learning_rate": 7.245953297269033e-06, + "loss": 17.3895, + "step": 20306 + }, + { + "epoch": 0.37119564223956714, + "grad_norm": 5.643879089142931, + "learning_rate": 7.2456888259910975e-06, + "loss": 16.9544, + "step": 20307 + }, + { + "epoch": 0.37121392143601367, + "grad_norm": 6.8606448765339945, + "learning_rate": 7.245424346842217e-06, + "loss": 17.5219, + "step": 20308 + }, + { + "epoch": 0.3712322006324602, + "grad_norm": 7.416814314734445, + "learning_rate": 7.2451598598233184e-06, + "loss": 17.3286, + "step": 20309 + }, + { + "epoch": 0.37125047982890674, + "grad_norm": 7.233693659450961, + "learning_rate": 7.244895364935329e-06, + "loss": 17.6946, + "step": 20310 + }, + { + "epoch": 0.3712687590253532, + "grad_norm": 6.991765959399882, + "learning_rate": 7.244630862179178e-06, + "loss": 17.9366, + "step": 20311 + }, + { + "epoch": 0.37128703822179976, + "grad_norm": 5.725040707089489, + "learning_rate": 7.244366351555789e-06, + "loss": 17.3533, + "step": 20312 + }, + { + "epoch": 0.3713053174182463, + "grad_norm": 5.299920044235636, + "learning_rate": 7.244101833066093e-06, + "loss": 16.9887, + "step": 20313 + }, + { + "epoch": 0.37132359661469283, + "grad_norm": 6.651243925486612, + "learning_rate": 7.243837306711011e-06, + "loss": 17.7768, + "step": 20314 + }, + { + "epoch": 0.37134187581113937, + "grad_norm": 6.195642278522273, + "learning_rate": 7.243572772491476e-06, + "loss": 17.2241, + "step": 20315 + }, + { + "epoch": 0.37136015500758585, + "grad_norm": 7.474750560470644, + "learning_rate": 7.243308230408413e-06, + "loss": 17.6936, + "step": 20316 + }, + { + "epoch": 0.3713784342040324, + "grad_norm": 6.453715122510833, + "learning_rate": 7.243043680462751e-06, + "loss": 17.6156, + "step": 20317 + }, + { + "epoch": 0.3713967134004789, + "grad_norm": 4.693457419653771, + "learning_rate": 7.2427791226554136e-06, + "loss": 16.9029, + "step": 20318 + }, + { + "epoch": 0.37141499259692545, + "grad_norm": 7.411761556224165, + "learning_rate": 7.24251455698733e-06, + "loss": 17.5067, + "step": 20319 + }, + { + "epoch": 0.371433271793372, + "grad_norm": 6.894335782006021, + "learning_rate": 7.242249983459429e-06, + "loss": 17.7946, + "step": 20320 + }, + { + "epoch": 0.37145155098981847, + "grad_norm": 8.265264598175682, + "learning_rate": 7.241985402072634e-06, + "loss": 17.5989, + "step": 20321 + }, + { + "epoch": 0.371469830186265, + "grad_norm": 6.0522965571151515, + "learning_rate": 7.241720812827876e-06, + "loss": 17.4702, + "step": 20322 + }, + { + "epoch": 0.37148810938271154, + "grad_norm": 5.793673229110889, + "learning_rate": 7.241456215726082e-06, + "loss": 17.4635, + "step": 20323 + }, + { + "epoch": 0.3715063885791581, + "grad_norm": 6.144375277382662, + "learning_rate": 7.241191610768177e-06, + "loss": 17.5922, + "step": 20324 + }, + { + "epoch": 0.3715246677756046, + "grad_norm": 5.6056628885740265, + "learning_rate": 7.24092699795509e-06, + "loss": 17.2037, + "step": 20325 + }, + { + "epoch": 0.3715429469720511, + "grad_norm": 5.406300344384662, + "learning_rate": 7.240662377287748e-06, + "loss": 17.1828, + "step": 20326 + }, + { + "epoch": 0.3715612261684976, + "grad_norm": 7.063140556306277, + "learning_rate": 7.240397748767081e-06, + "loss": 17.6718, + "step": 20327 + }, + { + "epoch": 0.37157950536494416, + "grad_norm": 7.660404107492753, + "learning_rate": 7.240133112394012e-06, + "loss": 18.1827, + "step": 20328 + }, + { + "epoch": 0.3715977845613907, + "grad_norm": 6.445982472041131, + "learning_rate": 7.239868468169471e-06, + "loss": 17.5548, + "step": 20329 + }, + { + "epoch": 0.3716160637578372, + "grad_norm": 6.112940911948984, + "learning_rate": 7.239603816094387e-06, + "loss": 17.2363, + "step": 20330 + }, + { + "epoch": 0.3716343429542837, + "grad_norm": 5.734139377635783, + "learning_rate": 7.239339156169686e-06, + "loss": 17.1976, + "step": 20331 + }, + { + "epoch": 0.37165262215073025, + "grad_norm": 5.476184692509491, + "learning_rate": 7.239074488396294e-06, + "loss": 17.2108, + "step": 20332 + }, + { + "epoch": 0.3716709013471768, + "grad_norm": 6.127131572197858, + "learning_rate": 7.238809812775139e-06, + "loss": 17.3148, + "step": 20333 + }, + { + "epoch": 0.3716891805436233, + "grad_norm": 7.332164080361569, + "learning_rate": 7.238545129307153e-06, + "loss": 17.7033, + "step": 20334 + }, + { + "epoch": 0.3717074597400698, + "grad_norm": 6.19730544508833, + "learning_rate": 7.2382804379932595e-06, + "loss": 17.3996, + "step": 20335 + }, + { + "epoch": 0.37172573893651634, + "grad_norm": 7.430300612267093, + "learning_rate": 7.238015738834388e-06, + "loss": 18.2496, + "step": 20336 + }, + { + "epoch": 0.3717440181329629, + "grad_norm": 6.067064644869495, + "learning_rate": 7.237751031831464e-06, + "loss": 17.2441, + "step": 20337 + }, + { + "epoch": 0.3717622973294094, + "grad_norm": 6.331459560734785, + "learning_rate": 7.2374863169854175e-06, + "loss": 17.3189, + "step": 20338 + }, + { + "epoch": 0.37178057652585594, + "grad_norm": 5.788864823371587, + "learning_rate": 7.237221594297175e-06, + "loss": 17.0798, + "step": 20339 + }, + { + "epoch": 0.3717988557223024, + "grad_norm": 6.677458964288095, + "learning_rate": 7.236956863767665e-06, + "loss": 17.7383, + "step": 20340 + }, + { + "epoch": 0.37181713491874896, + "grad_norm": 6.999870699390516, + "learning_rate": 7.2366921253978165e-06, + "loss": 17.6734, + "step": 20341 + }, + { + "epoch": 0.3718354141151955, + "grad_norm": 8.180484547018644, + "learning_rate": 7.236427379188556e-06, + "loss": 18.4928, + "step": 20342 + }, + { + "epoch": 0.37185369331164203, + "grad_norm": 7.760925417708203, + "learning_rate": 7.2361626251408105e-06, + "loss": 18.3223, + "step": 20343 + }, + { + "epoch": 0.37187197250808857, + "grad_norm": 6.124893382275461, + "learning_rate": 7.235897863255509e-06, + "loss": 17.5096, + "step": 20344 + }, + { + "epoch": 0.37189025170453505, + "grad_norm": 5.905706033508136, + "learning_rate": 7.23563309353358e-06, + "loss": 17.3197, + "step": 20345 + }, + { + "epoch": 0.3719085309009816, + "grad_norm": 6.6805730546934505, + "learning_rate": 7.235368315975951e-06, + "loss": 17.679, + "step": 20346 + }, + { + "epoch": 0.3719268100974281, + "grad_norm": 6.794605647027376, + "learning_rate": 7.23510353058355e-06, + "loss": 17.8838, + "step": 20347 + }, + { + "epoch": 0.37194508929387465, + "grad_norm": 5.693706186866842, + "learning_rate": 7.234838737357306e-06, + "loss": 17.549, + "step": 20348 + }, + { + "epoch": 0.3719633684903212, + "grad_norm": 7.74429790113914, + "learning_rate": 7.234573936298146e-06, + "loss": 17.9132, + "step": 20349 + }, + { + "epoch": 0.37198164768676767, + "grad_norm": 7.1510684608011825, + "learning_rate": 7.234309127406998e-06, + "loss": 17.5058, + "step": 20350 + }, + { + "epoch": 0.3719999268832142, + "grad_norm": 5.94800289132471, + "learning_rate": 7.234044310684789e-06, + "loss": 17.368, + "step": 20351 + }, + { + "epoch": 0.37201820607966074, + "grad_norm": 7.601307345453096, + "learning_rate": 7.233779486132451e-06, + "loss": 17.7545, + "step": 20352 + }, + { + "epoch": 0.3720364852761073, + "grad_norm": 5.93708836301738, + "learning_rate": 7.233514653750907e-06, + "loss": 17.4703, + "step": 20353 + }, + { + "epoch": 0.3720547644725538, + "grad_norm": 6.089745149104948, + "learning_rate": 7.23324981354109e-06, + "loss": 17.1858, + "step": 20354 + }, + { + "epoch": 0.3720730436690003, + "grad_norm": 5.428649211988015, + "learning_rate": 7.232984965503925e-06, + "loss": 17.0951, + "step": 20355 + }, + { + "epoch": 0.3720913228654468, + "grad_norm": 7.580011344726705, + "learning_rate": 7.232720109640342e-06, + "loss": 17.7734, + "step": 20356 + }, + { + "epoch": 0.37210960206189336, + "grad_norm": 7.538724121483692, + "learning_rate": 7.232455245951269e-06, + "loss": 17.9085, + "step": 20357 + }, + { + "epoch": 0.3721278812583399, + "grad_norm": 6.5290370401590705, + "learning_rate": 7.232190374437634e-06, + "loss": 17.5595, + "step": 20358 + }, + { + "epoch": 0.37214616045478643, + "grad_norm": 7.6163367269095374, + "learning_rate": 7.231925495100365e-06, + "loss": 18.1394, + "step": 20359 + }, + { + "epoch": 0.3721644396512329, + "grad_norm": 7.053694094974511, + "learning_rate": 7.231660607940391e-06, + "loss": 17.8886, + "step": 20360 + }, + { + "epoch": 0.37218271884767945, + "grad_norm": 5.9060435750609335, + "learning_rate": 7.23139571295864e-06, + "loss": 17.3318, + "step": 20361 + }, + { + "epoch": 0.372200998044126, + "grad_norm": 7.689619564427998, + "learning_rate": 7.231130810156042e-06, + "loss": 17.9838, + "step": 20362 + }, + { + "epoch": 0.3722192772405725, + "grad_norm": 8.4709035566881, + "learning_rate": 7.230865899533522e-06, + "loss": 17.8392, + "step": 20363 + }, + { + "epoch": 0.372237556437019, + "grad_norm": 5.693540796749584, + "learning_rate": 7.230600981092012e-06, + "loss": 17.215, + "step": 20364 + }, + { + "epoch": 0.37225583563346554, + "grad_norm": 5.698863212612251, + "learning_rate": 7.230336054832438e-06, + "loss": 17.0566, + "step": 20365 + }, + { + "epoch": 0.3722741148299121, + "grad_norm": 7.070801136228155, + "learning_rate": 7.230071120755732e-06, + "loss": 17.548, + "step": 20366 + }, + { + "epoch": 0.3722923940263586, + "grad_norm": 8.326772885066012, + "learning_rate": 7.229806178862818e-06, + "loss": 17.5586, + "step": 20367 + }, + { + "epoch": 0.37231067322280514, + "grad_norm": 5.982651802352122, + "learning_rate": 7.229541229154627e-06, + "loss": 17.277, + "step": 20368 + }, + { + "epoch": 0.3723289524192516, + "grad_norm": 6.496205803133922, + "learning_rate": 7.2292762716320886e-06, + "loss": 17.6187, + "step": 20369 + }, + { + "epoch": 0.37234723161569816, + "grad_norm": 6.161779591690141, + "learning_rate": 7.229011306296129e-06, + "loss": 17.5687, + "step": 20370 + }, + { + "epoch": 0.3723655108121447, + "grad_norm": 7.006544113911659, + "learning_rate": 7.2287463331476795e-06, + "loss": 17.7012, + "step": 20371 + }, + { + "epoch": 0.37238379000859123, + "grad_norm": 5.95469046391852, + "learning_rate": 7.228481352187668e-06, + "loss": 17.1852, + "step": 20372 + }, + { + "epoch": 0.37240206920503777, + "grad_norm": 8.285553807565886, + "learning_rate": 7.2282163634170196e-06, + "loss": 17.919, + "step": 20373 + }, + { + "epoch": 0.37242034840148425, + "grad_norm": 6.3174512701254, + "learning_rate": 7.2279513668366696e-06, + "loss": 17.2932, + "step": 20374 + }, + { + "epoch": 0.3724386275979308, + "grad_norm": 5.365878806247166, + "learning_rate": 7.2276863624475414e-06, + "loss": 17.0177, + "step": 20375 + }, + { + "epoch": 0.3724569067943773, + "grad_norm": 7.849594451486141, + "learning_rate": 7.227421350250568e-06, + "loss": 18.0845, + "step": 20376 + }, + { + "epoch": 0.37247518599082385, + "grad_norm": 7.173790348339584, + "learning_rate": 7.227156330246674e-06, + "loss": 17.7624, + "step": 20377 + }, + { + "epoch": 0.3724934651872704, + "grad_norm": 6.732504055398928, + "learning_rate": 7.226891302436789e-06, + "loss": 17.5077, + "step": 20378 + }, + { + "epoch": 0.37251174438371687, + "grad_norm": 7.776751079739892, + "learning_rate": 7.226626266821847e-06, + "loss": 17.9832, + "step": 20379 + }, + { + "epoch": 0.3725300235801634, + "grad_norm": 7.164123401789497, + "learning_rate": 7.226361223402771e-06, + "loss": 17.8237, + "step": 20380 + }, + { + "epoch": 0.37254830277660994, + "grad_norm": 5.495777387559704, + "learning_rate": 7.226096172180492e-06, + "loss": 17.0427, + "step": 20381 + }, + { + "epoch": 0.3725665819730565, + "grad_norm": 7.497811476326853, + "learning_rate": 7.225831113155939e-06, + "loss": 17.7845, + "step": 20382 + }, + { + "epoch": 0.372584861169503, + "grad_norm": 6.126258756148357, + "learning_rate": 7.225566046330041e-06, + "loss": 17.292, + "step": 20383 + }, + { + "epoch": 0.3726031403659495, + "grad_norm": 7.5891907319413425, + "learning_rate": 7.225300971703728e-06, + "loss": 17.7541, + "step": 20384 + }, + { + "epoch": 0.37262141956239603, + "grad_norm": 5.681311559330811, + "learning_rate": 7.225035889277928e-06, + "loss": 17.2156, + "step": 20385 + }, + { + "epoch": 0.37263969875884256, + "grad_norm": 5.389113341989961, + "learning_rate": 7.224770799053571e-06, + "loss": 17.2472, + "step": 20386 + }, + { + "epoch": 0.3726579779552891, + "grad_norm": 7.119291474765218, + "learning_rate": 7.224505701031584e-06, + "loss": 17.9081, + "step": 20387 + }, + { + "epoch": 0.37267625715173563, + "grad_norm": 7.550679998764515, + "learning_rate": 7.224240595212898e-06, + "loss": 17.8024, + "step": 20388 + }, + { + "epoch": 0.3726945363481821, + "grad_norm": 7.120317814639238, + "learning_rate": 7.223975481598443e-06, + "loss": 17.6353, + "step": 20389 + }, + { + "epoch": 0.37271281554462865, + "grad_norm": 6.534661234696521, + "learning_rate": 7.223710360189145e-06, + "loss": 17.6243, + "step": 20390 + }, + { + "epoch": 0.3727310947410752, + "grad_norm": 6.674445844875846, + "learning_rate": 7.223445230985936e-06, + "loss": 17.8813, + "step": 20391 + }, + { + "epoch": 0.3727493739375217, + "grad_norm": 6.958357293000667, + "learning_rate": 7.223180093989743e-06, + "loss": 17.6807, + "step": 20392 + }, + { + "epoch": 0.37276765313396826, + "grad_norm": 6.307849016021199, + "learning_rate": 7.2229149492015e-06, + "loss": 17.414, + "step": 20393 + }, + { + "epoch": 0.37278593233041474, + "grad_norm": 9.402225556016122, + "learning_rate": 7.2226497966221295e-06, + "loss": 18.8113, + "step": 20394 + }, + { + "epoch": 0.3728042115268613, + "grad_norm": 6.158146935391387, + "learning_rate": 7.222384636252566e-06, + "loss": 17.4431, + "step": 20395 + }, + { + "epoch": 0.3728224907233078, + "grad_norm": 8.180275466524071, + "learning_rate": 7.2221194680937375e-06, + "loss": 18.4592, + "step": 20396 + }, + { + "epoch": 0.37284076991975434, + "grad_norm": 6.246114860222931, + "learning_rate": 7.221854292146573e-06, + "loss": 17.2941, + "step": 20397 + }, + { + "epoch": 0.3728590491162008, + "grad_norm": 6.15569543233691, + "learning_rate": 7.221589108412001e-06, + "loss": 17.5038, + "step": 20398 + }, + { + "epoch": 0.37287732831264736, + "grad_norm": 7.643497344732373, + "learning_rate": 7.221323916890952e-06, + "loss": 17.9917, + "step": 20399 + }, + { + "epoch": 0.3728956075090939, + "grad_norm": 6.504358251889061, + "learning_rate": 7.221058717584357e-06, + "loss": 17.5867, + "step": 20400 + }, + { + "epoch": 0.37291388670554043, + "grad_norm": 6.410339312678638, + "learning_rate": 7.2207935104931425e-06, + "loss": 17.5052, + "step": 20401 + }, + { + "epoch": 0.37293216590198697, + "grad_norm": 8.280785292636546, + "learning_rate": 7.22052829561824e-06, + "loss": 17.466, + "step": 20402 + }, + { + "epoch": 0.37295044509843345, + "grad_norm": 6.071215727603237, + "learning_rate": 7.2202630729605794e-06, + "loss": 17.3962, + "step": 20403 + }, + { + "epoch": 0.37296872429488, + "grad_norm": 7.037387699823428, + "learning_rate": 7.219997842521088e-06, + "loss": 18.0676, + "step": 20404 + }, + { + "epoch": 0.3729870034913265, + "grad_norm": 6.362228304268196, + "learning_rate": 7.2197326043006965e-06, + "loss": 17.452, + "step": 20405 + }, + { + "epoch": 0.37300528268777305, + "grad_norm": 4.923762259060005, + "learning_rate": 7.219467358300335e-06, + "loss": 16.9562, + "step": 20406 + }, + { + "epoch": 0.3730235618842196, + "grad_norm": 5.680735934454968, + "learning_rate": 7.219202104520935e-06, + "loss": 17.1766, + "step": 20407 + }, + { + "epoch": 0.37304184108066607, + "grad_norm": 6.010009134758298, + "learning_rate": 7.218936842963422e-06, + "loss": 17.3163, + "step": 20408 + }, + { + "epoch": 0.3730601202771126, + "grad_norm": 6.572640723154287, + "learning_rate": 7.218671573628729e-06, + "loss": 17.5496, + "step": 20409 + }, + { + "epoch": 0.37307839947355914, + "grad_norm": 5.927443830371127, + "learning_rate": 7.218406296517785e-06, + "loss": 17.3753, + "step": 20410 + }, + { + "epoch": 0.3730966786700057, + "grad_norm": 7.44410725698587, + "learning_rate": 7.218141011631518e-06, + "loss": 17.9294, + "step": 20411 + }, + { + "epoch": 0.3731149578664522, + "grad_norm": 5.054380719914831, + "learning_rate": 7.21787571897086e-06, + "loss": 16.8668, + "step": 20412 + }, + { + "epoch": 0.3731332370628987, + "grad_norm": 6.755970480299233, + "learning_rate": 7.21761041853674e-06, + "loss": 17.6978, + "step": 20413 + }, + { + "epoch": 0.37315151625934523, + "grad_norm": 6.60742547686864, + "learning_rate": 7.217345110330088e-06, + "loss": 17.3586, + "step": 20414 + }, + { + "epoch": 0.37316979545579176, + "grad_norm": 6.517868095519433, + "learning_rate": 7.217079794351833e-06, + "loss": 17.6383, + "step": 20415 + }, + { + "epoch": 0.3731880746522383, + "grad_norm": 7.533480740334832, + "learning_rate": 7.216814470602907e-06, + "loss": 18.0985, + "step": 20416 + }, + { + "epoch": 0.37320635384868484, + "grad_norm": 6.71196017452529, + "learning_rate": 7.216549139084239e-06, + "loss": 17.6497, + "step": 20417 + }, + { + "epoch": 0.3732246330451313, + "grad_norm": 5.921607526423101, + "learning_rate": 7.216283799796758e-06, + "loss": 17.4144, + "step": 20418 + }, + { + "epoch": 0.37324291224157785, + "grad_norm": 8.118812396137619, + "learning_rate": 7.216018452741393e-06, + "loss": 18.1792, + "step": 20419 + }, + { + "epoch": 0.3732611914380244, + "grad_norm": 8.533555423039589, + "learning_rate": 7.215753097919078e-06, + "loss": 18.1658, + "step": 20420 + }, + { + "epoch": 0.3732794706344709, + "grad_norm": 9.005233588612297, + "learning_rate": 7.215487735330739e-06, + "loss": 18.7296, + "step": 20421 + }, + { + "epoch": 0.37329774983091746, + "grad_norm": 7.1531783299493465, + "learning_rate": 7.215222364977309e-06, + "loss": 17.8506, + "step": 20422 + }, + { + "epoch": 0.37331602902736394, + "grad_norm": 6.62261073381144, + "learning_rate": 7.2149569868597156e-06, + "loss": 17.3995, + "step": 20423 + }, + { + "epoch": 0.3733343082238105, + "grad_norm": 7.694517476480957, + "learning_rate": 7.214691600978891e-06, + "loss": 18.037, + "step": 20424 + }, + { + "epoch": 0.373352587420257, + "grad_norm": 5.286063429211789, + "learning_rate": 7.214426207335765e-06, + "loss": 16.9748, + "step": 20425 + }, + { + "epoch": 0.37337086661670355, + "grad_norm": 6.853043960827987, + "learning_rate": 7.2141608059312665e-06, + "loss": 17.6959, + "step": 20426 + }, + { + "epoch": 0.3733891458131501, + "grad_norm": 7.6643730515163275, + "learning_rate": 7.213895396766327e-06, + "loss": 17.8825, + "step": 20427 + }, + { + "epoch": 0.37340742500959656, + "grad_norm": 5.409856853628547, + "learning_rate": 7.213629979841875e-06, + "loss": 16.9521, + "step": 20428 + }, + { + "epoch": 0.3734257042060431, + "grad_norm": 5.500316520841786, + "learning_rate": 7.213364555158843e-06, + "loss": 17.1085, + "step": 20429 + }, + { + "epoch": 0.37344398340248963, + "grad_norm": 6.484131949159568, + "learning_rate": 7.21309912271816e-06, + "loss": 17.4931, + "step": 20430 + }, + { + "epoch": 0.37346226259893617, + "grad_norm": 7.160145232373765, + "learning_rate": 7.212833682520758e-06, + "loss": 17.8472, + "step": 20431 + }, + { + "epoch": 0.37348054179538265, + "grad_norm": 8.808054033558152, + "learning_rate": 7.212568234567563e-06, + "loss": 17.7341, + "step": 20432 + }, + { + "epoch": 0.3734988209918292, + "grad_norm": 7.0031468822222935, + "learning_rate": 7.21230277885951e-06, + "loss": 17.9265, + "step": 20433 + }, + { + "epoch": 0.3735171001882757, + "grad_norm": 6.6477486261601335, + "learning_rate": 7.212037315397528e-06, + "loss": 17.5194, + "step": 20434 + }, + { + "epoch": 0.37353537938472225, + "grad_norm": 6.296791146161914, + "learning_rate": 7.2117718441825475e-06, + "loss": 17.4754, + "step": 20435 + }, + { + "epoch": 0.3735536585811688, + "grad_norm": 5.909369086372829, + "learning_rate": 7.211506365215499e-06, + "loss": 17.3268, + "step": 20436 + }, + { + "epoch": 0.37357193777761527, + "grad_norm": 7.5901947275425625, + "learning_rate": 7.21124087849731e-06, + "loss": 17.8533, + "step": 20437 + }, + { + "epoch": 0.3735902169740618, + "grad_norm": 6.5267304256580365, + "learning_rate": 7.210975384028917e-06, + "loss": 17.5819, + "step": 20438 + }, + { + "epoch": 0.37360849617050834, + "grad_norm": 8.301980468284086, + "learning_rate": 7.210709881811245e-06, + "loss": 18.2158, + "step": 20439 + }, + { + "epoch": 0.3736267753669549, + "grad_norm": 6.438116920575476, + "learning_rate": 7.210444371845227e-06, + "loss": 17.5852, + "step": 20440 + }, + { + "epoch": 0.3736450545634014, + "grad_norm": 5.890955174083961, + "learning_rate": 7.210178854131793e-06, + "loss": 17.1586, + "step": 20441 + }, + { + "epoch": 0.3736633337598479, + "grad_norm": 6.658707114323633, + "learning_rate": 7.2099133286718744e-06, + "loss": 17.5105, + "step": 20442 + }, + { + "epoch": 0.37368161295629443, + "grad_norm": 7.20389531042741, + "learning_rate": 7.209647795466401e-06, + "loss": 17.5541, + "step": 20443 + }, + { + "epoch": 0.37369989215274096, + "grad_norm": 6.692437785833514, + "learning_rate": 7.209382254516304e-06, + "loss": 17.5992, + "step": 20444 + }, + { + "epoch": 0.3737181713491875, + "grad_norm": 7.156500761876589, + "learning_rate": 7.209116705822516e-06, + "loss": 17.7442, + "step": 20445 + }, + { + "epoch": 0.37373645054563404, + "grad_norm": 10.273160095679948, + "learning_rate": 7.208851149385963e-06, + "loss": 17.881, + "step": 20446 + }, + { + "epoch": 0.3737547297420805, + "grad_norm": 7.652218591225395, + "learning_rate": 7.208585585207578e-06, + "loss": 18.0469, + "step": 20447 + }, + { + "epoch": 0.37377300893852705, + "grad_norm": 6.983571133024348, + "learning_rate": 7.208320013288295e-06, + "loss": 17.4955, + "step": 20448 + }, + { + "epoch": 0.3737912881349736, + "grad_norm": 8.059806225719282, + "learning_rate": 7.2080544336290395e-06, + "loss": 18.0089, + "step": 20449 + }, + { + "epoch": 0.3738095673314201, + "grad_norm": 7.542634018960026, + "learning_rate": 7.2077888462307456e-06, + "loss": 18.0137, + "step": 20450 + }, + { + "epoch": 0.37382784652786666, + "grad_norm": 5.932502408738489, + "learning_rate": 7.207523251094344e-06, + "loss": 17.358, + "step": 20451 + }, + { + "epoch": 0.37384612572431314, + "grad_norm": 6.534574091013455, + "learning_rate": 7.207257648220763e-06, + "loss": 17.3377, + "step": 20452 + }, + { + "epoch": 0.3738644049207597, + "grad_norm": 5.875549277401524, + "learning_rate": 7.206992037610937e-06, + "loss": 17.3167, + "step": 20453 + }, + { + "epoch": 0.3738826841172062, + "grad_norm": 7.025478727291052, + "learning_rate": 7.206726419265795e-06, + "loss": 17.4492, + "step": 20454 + }, + { + "epoch": 0.37390096331365275, + "grad_norm": 6.612097043680163, + "learning_rate": 7.206460793186268e-06, + "loss": 17.4482, + "step": 20455 + }, + { + "epoch": 0.3739192425100993, + "grad_norm": 6.267549369740469, + "learning_rate": 7.206195159373288e-06, + "loss": 17.2456, + "step": 20456 + }, + { + "epoch": 0.37393752170654576, + "grad_norm": 6.401926259821656, + "learning_rate": 7.205929517827785e-06, + "loss": 17.4843, + "step": 20457 + }, + { + "epoch": 0.3739558009029923, + "grad_norm": 6.560521431492206, + "learning_rate": 7.205663868550693e-06, + "loss": 17.5084, + "step": 20458 + }, + { + "epoch": 0.37397408009943883, + "grad_norm": 5.626893536183918, + "learning_rate": 7.205398211542938e-06, + "loss": 17.1795, + "step": 20459 + }, + { + "epoch": 0.37399235929588537, + "grad_norm": 5.339963679579897, + "learning_rate": 7.205132546805454e-06, + "loss": 17.0069, + "step": 20460 + }, + { + "epoch": 0.3740106384923319, + "grad_norm": 7.4430745967816385, + "learning_rate": 7.204866874339172e-06, + "loss": 17.921, + "step": 20461 + }, + { + "epoch": 0.3740289176887784, + "grad_norm": 5.656506238986444, + "learning_rate": 7.2046011941450225e-06, + "loss": 17.4381, + "step": 20462 + }, + { + "epoch": 0.3740471968852249, + "grad_norm": 6.401459680619209, + "learning_rate": 7.204335506223937e-06, + "loss": 17.2807, + "step": 20463 + }, + { + "epoch": 0.37406547608167146, + "grad_norm": 6.163360997915802, + "learning_rate": 7.204069810576848e-06, + "loss": 17.3161, + "step": 20464 + }, + { + "epoch": 0.374083755278118, + "grad_norm": 6.956249254360118, + "learning_rate": 7.203804107204684e-06, + "loss": 17.3061, + "step": 20465 + }, + { + "epoch": 0.37410203447456447, + "grad_norm": 7.914146130191101, + "learning_rate": 7.203538396108378e-06, + "loss": 18.0533, + "step": 20466 + }, + { + "epoch": 0.374120313671011, + "grad_norm": 6.022921740089152, + "learning_rate": 7.203272677288863e-06, + "loss": 17.1129, + "step": 20467 + }, + { + "epoch": 0.37413859286745754, + "grad_norm": 5.345696553436056, + "learning_rate": 7.2030069507470665e-06, + "loss": 17.0964, + "step": 20468 + }, + { + "epoch": 0.3741568720639041, + "grad_norm": 5.8767713802306165, + "learning_rate": 7.202741216483923e-06, + "loss": 17.344, + "step": 20469 + }, + { + "epoch": 0.3741751512603506, + "grad_norm": 7.090472714638295, + "learning_rate": 7.202475474500361e-06, + "loss": 17.8596, + "step": 20470 + }, + { + "epoch": 0.3741934304567971, + "grad_norm": 6.851878326223762, + "learning_rate": 7.202209724797316e-06, + "loss": 17.7297, + "step": 20471 + }, + { + "epoch": 0.37421170965324363, + "grad_norm": 6.653875934548672, + "learning_rate": 7.201943967375716e-06, + "loss": 17.8149, + "step": 20472 + }, + { + "epoch": 0.37422998884969016, + "grad_norm": 6.095453224164434, + "learning_rate": 7.201678202236493e-06, + "loss": 17.4917, + "step": 20473 + }, + { + "epoch": 0.3742482680461367, + "grad_norm": 6.747793075429817, + "learning_rate": 7.201412429380579e-06, + "loss": 17.7392, + "step": 20474 + }, + { + "epoch": 0.37426654724258324, + "grad_norm": 5.7077639193355285, + "learning_rate": 7.201146648808906e-06, + "loss": 17.2055, + "step": 20475 + }, + { + "epoch": 0.3742848264390297, + "grad_norm": 5.588325452177898, + "learning_rate": 7.200880860522405e-06, + "loss": 17.1681, + "step": 20476 + }, + { + "epoch": 0.37430310563547625, + "grad_norm": 7.116280070737461, + "learning_rate": 7.2006150645220075e-06, + "loss": 17.9519, + "step": 20477 + }, + { + "epoch": 0.3743213848319228, + "grad_norm": 7.354552839910853, + "learning_rate": 7.200349260808644e-06, + "loss": 17.9828, + "step": 20478 + }, + { + "epoch": 0.3743396640283693, + "grad_norm": 5.0117794482627955, + "learning_rate": 7.200083449383248e-06, + "loss": 17.0739, + "step": 20479 + }, + { + "epoch": 0.37435794322481586, + "grad_norm": 6.110387257820735, + "learning_rate": 7.199817630246751e-06, + "loss": 17.245, + "step": 20480 + }, + { + "epoch": 0.37437622242126234, + "grad_norm": 6.26316331195001, + "learning_rate": 7.1995518034000836e-06, + "loss": 17.5018, + "step": 20481 + }, + { + "epoch": 0.3743945016177089, + "grad_norm": 5.416961702897103, + "learning_rate": 7.199285968844178e-06, + "loss": 17.0941, + "step": 20482 + }, + { + "epoch": 0.3744127808141554, + "grad_norm": 7.296340194253948, + "learning_rate": 7.199020126579966e-06, + "loss": 18.0503, + "step": 20483 + }, + { + "epoch": 0.37443106001060195, + "grad_norm": 8.16897791809317, + "learning_rate": 7.19875427660838e-06, + "loss": 18.1047, + "step": 20484 + }, + { + "epoch": 0.3744493392070485, + "grad_norm": 5.496261649783343, + "learning_rate": 7.1984884189303495e-06, + "loss": 17.1197, + "step": 20485 + }, + { + "epoch": 0.37446761840349496, + "grad_norm": 6.9872561177916594, + "learning_rate": 7.19822255354681e-06, + "loss": 17.7445, + "step": 20486 + }, + { + "epoch": 0.3744858975999415, + "grad_norm": 5.224986596449498, + "learning_rate": 7.197956680458689e-06, + "loss": 16.9281, + "step": 20487 + }, + { + "epoch": 0.37450417679638803, + "grad_norm": 5.579414671273383, + "learning_rate": 7.197690799666921e-06, + "loss": 17.0586, + "step": 20488 + }, + { + "epoch": 0.37452245599283457, + "grad_norm": 6.510955389162127, + "learning_rate": 7.197424911172439e-06, + "loss": 17.3088, + "step": 20489 + }, + { + "epoch": 0.3745407351892811, + "grad_norm": 7.3533702767898, + "learning_rate": 7.197159014976172e-06, + "loss": 17.8623, + "step": 20490 + }, + { + "epoch": 0.3745590143857276, + "grad_norm": 6.424771511305801, + "learning_rate": 7.196893111079054e-06, + "loss": 17.471, + "step": 20491 + }, + { + "epoch": 0.3745772935821741, + "grad_norm": 7.165256769964428, + "learning_rate": 7.196627199482015e-06, + "loss": 17.5574, + "step": 20492 + }, + { + "epoch": 0.37459557277862066, + "grad_norm": 6.464879066796956, + "learning_rate": 7.19636128018599e-06, + "loss": 17.5617, + "step": 20493 + }, + { + "epoch": 0.3746138519750672, + "grad_norm": 5.3519671094987675, + "learning_rate": 7.196095353191909e-06, + "loss": 16.9451, + "step": 20494 + }, + { + "epoch": 0.3746321311715137, + "grad_norm": 7.342403173271185, + "learning_rate": 7.195829418500704e-06, + "loss": 17.6891, + "step": 20495 + }, + { + "epoch": 0.3746504103679602, + "grad_norm": 7.397482331241367, + "learning_rate": 7.195563476113306e-06, + "loss": 17.8364, + "step": 20496 + }, + { + "epoch": 0.37466868956440674, + "grad_norm": 7.601953773384147, + "learning_rate": 7.19529752603065e-06, + "loss": 17.7663, + "step": 20497 + }, + { + "epoch": 0.3746869687608533, + "grad_norm": 6.155741447949556, + "learning_rate": 7.195031568253667e-06, + "loss": 17.4188, + "step": 20498 + }, + { + "epoch": 0.3747052479572998, + "grad_norm": 8.528566994652131, + "learning_rate": 7.194765602783288e-06, + "loss": 18.6777, + "step": 20499 + }, + { + "epoch": 0.3747235271537463, + "grad_norm": 5.999514942833209, + "learning_rate": 7.194499629620446e-06, + "loss": 17.3963, + "step": 20500 + }, + { + "epoch": 0.37474180635019283, + "grad_norm": 5.988950183033299, + "learning_rate": 7.194233648766073e-06, + "loss": 17.3092, + "step": 20501 + }, + { + "epoch": 0.37476008554663937, + "grad_norm": 6.778376306879646, + "learning_rate": 7.193967660221103e-06, + "loss": 17.6975, + "step": 20502 + }, + { + "epoch": 0.3747783647430859, + "grad_norm": 6.825540751643079, + "learning_rate": 7.1937016639864665e-06, + "loss": 17.9962, + "step": 20503 + }, + { + "epoch": 0.37479664393953244, + "grad_norm": 6.92133841962337, + "learning_rate": 7.193435660063095e-06, + "loss": 17.7729, + "step": 20504 + }, + { + "epoch": 0.3748149231359789, + "grad_norm": 6.629127129797609, + "learning_rate": 7.193169648451921e-06, + "loss": 17.5761, + "step": 20505 + }, + { + "epoch": 0.37483320233242545, + "grad_norm": 6.247274377170819, + "learning_rate": 7.192903629153879e-06, + "loss": 17.6489, + "step": 20506 + }, + { + "epoch": 0.374851481528872, + "grad_norm": 4.864723505686175, + "learning_rate": 7.192637602169901e-06, + "loss": 16.8646, + "step": 20507 + }, + { + "epoch": 0.3748697607253185, + "grad_norm": 7.351974906030962, + "learning_rate": 7.192371567500917e-06, + "loss": 17.8657, + "step": 20508 + }, + { + "epoch": 0.37488803992176506, + "grad_norm": 6.279121183760727, + "learning_rate": 7.192105525147861e-06, + "loss": 17.553, + "step": 20509 + }, + { + "epoch": 0.37490631911821154, + "grad_norm": 6.4870410422967, + "learning_rate": 7.191839475111666e-06, + "loss": 17.7006, + "step": 20510 + }, + { + "epoch": 0.3749245983146581, + "grad_norm": 6.529975507119786, + "learning_rate": 7.191573417393264e-06, + "loss": 17.4445, + "step": 20511 + }, + { + "epoch": 0.3749428775111046, + "grad_norm": 5.882872064621326, + "learning_rate": 7.191307351993586e-06, + "loss": 17.4472, + "step": 20512 + }, + { + "epoch": 0.37496115670755115, + "grad_norm": 6.89285895607024, + "learning_rate": 7.191041278913566e-06, + "loss": 18.0626, + "step": 20513 + }, + { + "epoch": 0.3749794359039977, + "grad_norm": 7.402257841543211, + "learning_rate": 7.190775198154139e-06, + "loss": 17.7354, + "step": 20514 + }, + { + "epoch": 0.37499771510044416, + "grad_norm": 6.9256213267346975, + "learning_rate": 7.190509109716232e-06, + "loss": 17.7617, + "step": 20515 + }, + { + "epoch": 0.3750159942968907, + "grad_norm": 6.055825591041182, + "learning_rate": 7.190243013600782e-06, + "loss": 16.9858, + "step": 20516 + }, + { + "epoch": 0.37503427349333723, + "grad_norm": 6.9973535582198245, + "learning_rate": 7.189976909808721e-06, + "loss": 17.9019, + "step": 20517 + }, + { + "epoch": 0.37505255268978377, + "grad_norm": 5.49296416870087, + "learning_rate": 7.189710798340981e-06, + "loss": 17.0977, + "step": 20518 + }, + { + "epoch": 0.3750708318862303, + "grad_norm": 5.5940893216151295, + "learning_rate": 7.189444679198492e-06, + "loss": 17.1064, + "step": 20519 + }, + { + "epoch": 0.3750891110826768, + "grad_norm": 6.803714787433599, + "learning_rate": 7.189178552382192e-06, + "loss": 17.8292, + "step": 20520 + }, + { + "epoch": 0.3751073902791233, + "grad_norm": 6.706811302961145, + "learning_rate": 7.18891241789301e-06, + "loss": 17.8327, + "step": 20521 + }, + { + "epoch": 0.37512566947556986, + "grad_norm": 6.087035417363352, + "learning_rate": 7.188646275731881e-06, + "loss": 17.5479, + "step": 20522 + }, + { + "epoch": 0.3751439486720164, + "grad_norm": 5.825170628716262, + "learning_rate": 7.188380125899736e-06, + "loss": 17.5318, + "step": 20523 + }, + { + "epoch": 0.3751622278684629, + "grad_norm": 7.386143292146687, + "learning_rate": 7.188113968397508e-06, + "loss": 17.8499, + "step": 20524 + }, + { + "epoch": 0.3751805070649094, + "grad_norm": 5.670855151232415, + "learning_rate": 7.1878478032261314e-06, + "loss": 17.154, + "step": 20525 + }, + { + "epoch": 0.37519878626135594, + "grad_norm": 6.059875690237442, + "learning_rate": 7.187581630386538e-06, + "loss": 17.3713, + "step": 20526 + }, + { + "epoch": 0.3752170654578025, + "grad_norm": 5.732884623881086, + "learning_rate": 7.187315449879659e-06, + "loss": 17.1756, + "step": 20527 + }, + { + "epoch": 0.375235344654249, + "grad_norm": 7.308732349846579, + "learning_rate": 7.187049261706431e-06, + "loss": 18.1432, + "step": 20528 + }, + { + "epoch": 0.37525362385069555, + "grad_norm": 6.459090289358423, + "learning_rate": 7.186783065867785e-06, + "loss": 17.3665, + "step": 20529 + }, + { + "epoch": 0.37527190304714203, + "grad_norm": 5.737021132272651, + "learning_rate": 7.1865168623646546e-06, + "loss": 17.4221, + "step": 20530 + }, + { + "epoch": 0.37529018224358857, + "grad_norm": 6.279016688668446, + "learning_rate": 7.186250651197971e-06, + "loss": 17.5875, + "step": 20531 + }, + { + "epoch": 0.3753084614400351, + "grad_norm": 7.238675373326088, + "learning_rate": 7.185984432368669e-06, + "loss": 17.5531, + "step": 20532 + }, + { + "epoch": 0.37532674063648164, + "grad_norm": 5.654569050953152, + "learning_rate": 7.185718205877681e-06, + "loss": 17.1147, + "step": 20533 + }, + { + "epoch": 0.3753450198329281, + "grad_norm": 6.4257274957213575, + "learning_rate": 7.1854519717259416e-06, + "loss": 17.4075, + "step": 20534 + }, + { + "epoch": 0.37536329902937465, + "grad_norm": 6.637054571580879, + "learning_rate": 7.185185729914383e-06, + "loss": 17.5743, + "step": 20535 + }, + { + "epoch": 0.3753815782258212, + "grad_norm": 7.637741798668585, + "learning_rate": 7.184919480443936e-06, + "loss": 17.9811, + "step": 20536 + }, + { + "epoch": 0.3753998574222677, + "grad_norm": 6.407290892107951, + "learning_rate": 7.184653223315535e-06, + "loss": 17.5998, + "step": 20537 + }, + { + "epoch": 0.37541813661871426, + "grad_norm": 7.749589762691165, + "learning_rate": 7.184386958530117e-06, + "loss": 17.7453, + "step": 20538 + }, + { + "epoch": 0.37543641581516074, + "grad_norm": 6.905829872121333, + "learning_rate": 7.184120686088612e-06, + "loss": 17.4916, + "step": 20539 + }, + { + "epoch": 0.3754546950116073, + "grad_norm": 5.7683542321228, + "learning_rate": 7.183854405991952e-06, + "loss": 17.1817, + "step": 20540 + }, + { + "epoch": 0.3754729742080538, + "grad_norm": 5.981327578864603, + "learning_rate": 7.183588118241072e-06, + "loss": 17.3861, + "step": 20541 + }, + { + "epoch": 0.37549125340450035, + "grad_norm": 7.5470328785879515, + "learning_rate": 7.183321822836906e-06, + "loss": 18.2527, + "step": 20542 + }, + { + "epoch": 0.3755095326009469, + "grad_norm": 7.341944203848078, + "learning_rate": 7.183055519780385e-06, + "loss": 18.0454, + "step": 20543 + }, + { + "epoch": 0.37552781179739336, + "grad_norm": 7.321986932501018, + "learning_rate": 7.182789209072445e-06, + "loss": 17.8002, + "step": 20544 + }, + { + "epoch": 0.3755460909938399, + "grad_norm": 7.018625649196589, + "learning_rate": 7.182522890714018e-06, + "loss": 17.887, + "step": 20545 + }, + { + "epoch": 0.37556437019028643, + "grad_norm": 6.703391618043084, + "learning_rate": 7.182256564706039e-06, + "loss": 17.6745, + "step": 20546 + }, + { + "epoch": 0.37558264938673297, + "grad_norm": 7.104778415264163, + "learning_rate": 7.181990231049437e-06, + "loss": 17.6805, + "step": 20547 + }, + { + "epoch": 0.3756009285831795, + "grad_norm": 8.684152435477817, + "learning_rate": 7.181723889745151e-06, + "loss": 18.46, + "step": 20548 + }, + { + "epoch": 0.375619207779626, + "grad_norm": 7.3817123782854175, + "learning_rate": 7.181457540794112e-06, + "loss": 17.6573, + "step": 20549 + }, + { + "epoch": 0.3756374869760725, + "grad_norm": 6.750336302395451, + "learning_rate": 7.181191184197254e-06, + "loss": 17.623, + "step": 20550 + }, + { + "epoch": 0.37565576617251906, + "grad_norm": 5.881837910867297, + "learning_rate": 7.180924819955508e-06, + "loss": 17.1445, + "step": 20551 + }, + { + "epoch": 0.3756740453689656, + "grad_norm": 7.534997164159258, + "learning_rate": 7.180658448069811e-06, + "loss": 17.9065, + "step": 20552 + }, + { + "epoch": 0.37569232456541213, + "grad_norm": 9.022394311527165, + "learning_rate": 7.180392068541095e-06, + "loss": 18.0997, + "step": 20553 + }, + { + "epoch": 0.3757106037618586, + "grad_norm": 7.275141752031852, + "learning_rate": 7.180125681370296e-06, + "loss": 17.596, + "step": 20554 + }, + { + "epoch": 0.37572888295830514, + "grad_norm": 6.640547035363546, + "learning_rate": 7.1798592865583425e-06, + "loss": 17.6874, + "step": 20555 + }, + { + "epoch": 0.3757471621547517, + "grad_norm": 5.768911845931946, + "learning_rate": 7.179592884106174e-06, + "loss": 17.4228, + "step": 20556 + }, + { + "epoch": 0.3757654413511982, + "grad_norm": 6.99321745048219, + "learning_rate": 7.179326474014721e-06, + "loss": 17.7309, + "step": 20557 + }, + { + "epoch": 0.37578372054764475, + "grad_norm": 7.839414507814109, + "learning_rate": 7.179060056284917e-06, + "loss": 17.9429, + "step": 20558 + }, + { + "epoch": 0.37580199974409123, + "grad_norm": 5.920105089765995, + "learning_rate": 7.178793630917696e-06, + "loss": 17.2724, + "step": 20559 + }, + { + "epoch": 0.37582027894053777, + "grad_norm": 5.438035983080217, + "learning_rate": 7.178527197913994e-06, + "loss": 17.1341, + "step": 20560 + }, + { + "epoch": 0.3758385581369843, + "grad_norm": 6.888576926240088, + "learning_rate": 7.178260757274742e-06, + "loss": 17.8441, + "step": 20561 + }, + { + "epoch": 0.37585683733343084, + "grad_norm": 6.94751405345727, + "learning_rate": 7.177994309000876e-06, + "loss": 17.1709, + "step": 20562 + }, + { + "epoch": 0.3758751165298774, + "grad_norm": 7.568423208404413, + "learning_rate": 7.1777278530933295e-06, + "loss": 17.9601, + "step": 20563 + }, + { + "epoch": 0.37589339572632385, + "grad_norm": 7.295671286163825, + "learning_rate": 7.177461389553033e-06, + "loss": 17.7052, + "step": 20564 + }, + { + "epoch": 0.3759116749227704, + "grad_norm": 5.163917432288961, + "learning_rate": 7.177194918380926e-06, + "loss": 17.0495, + "step": 20565 + }, + { + "epoch": 0.3759299541192169, + "grad_norm": 6.35829451976066, + "learning_rate": 7.176928439577939e-06, + "loss": 17.3866, + "step": 20566 + }, + { + "epoch": 0.37594823331566346, + "grad_norm": 5.945944096813592, + "learning_rate": 7.176661953145007e-06, + "loss": 17.2316, + "step": 20567 + }, + { + "epoch": 0.37596651251210994, + "grad_norm": 6.3620505848576006, + "learning_rate": 7.176395459083063e-06, + "loss": 17.3621, + "step": 20568 + }, + { + "epoch": 0.3759847917085565, + "grad_norm": 8.249472346758372, + "learning_rate": 7.1761289573930425e-06, + "loss": 18.04, + "step": 20569 + }, + { + "epoch": 0.376003070905003, + "grad_norm": 7.075767540520091, + "learning_rate": 7.1758624480758776e-06, + "loss": 17.421, + "step": 20570 + }, + { + "epoch": 0.37602135010144955, + "grad_norm": 5.894654845925681, + "learning_rate": 7.175595931132505e-06, + "loss": 17.2072, + "step": 20571 + }, + { + "epoch": 0.3760396292978961, + "grad_norm": 7.297821368762594, + "learning_rate": 7.175329406563858e-06, + "loss": 17.847, + "step": 20572 + }, + { + "epoch": 0.37605790849434256, + "grad_norm": 7.334149966670582, + "learning_rate": 7.175062874370868e-06, + "loss": 17.9298, + "step": 20573 + }, + { + "epoch": 0.3760761876907891, + "grad_norm": 6.938856531852848, + "learning_rate": 7.174796334554473e-06, + "loss": 17.8027, + "step": 20574 + }, + { + "epoch": 0.37609446688723563, + "grad_norm": 5.923019276067725, + "learning_rate": 7.174529787115605e-06, + "loss": 17.2943, + "step": 20575 + }, + { + "epoch": 0.37611274608368217, + "grad_norm": 6.37713460699952, + "learning_rate": 7.174263232055198e-06, + "loss": 17.4922, + "step": 20576 + }, + { + "epoch": 0.3761310252801287, + "grad_norm": 6.8539348980493395, + "learning_rate": 7.1739966693741894e-06, + "loss": 17.7022, + "step": 20577 + }, + { + "epoch": 0.3761493044765752, + "grad_norm": 7.476638742288511, + "learning_rate": 7.1737300990735085e-06, + "loss": 17.9302, + "step": 20578 + }, + { + "epoch": 0.3761675836730217, + "grad_norm": 5.942880873420569, + "learning_rate": 7.173463521154094e-06, + "loss": 17.3077, + "step": 20579 + }, + { + "epoch": 0.37618586286946826, + "grad_norm": 8.375990830522591, + "learning_rate": 7.173196935616877e-06, + "loss": 18.302, + "step": 20580 + }, + { + "epoch": 0.3762041420659148, + "grad_norm": 6.2401076669671, + "learning_rate": 7.172930342462795e-06, + "loss": 17.3708, + "step": 20581 + }, + { + "epoch": 0.37622242126236133, + "grad_norm": 5.788733961449134, + "learning_rate": 7.17266374169278e-06, + "loss": 17.364, + "step": 20582 + }, + { + "epoch": 0.3762407004588078, + "grad_norm": 6.731599080527994, + "learning_rate": 7.172397133307767e-06, + "loss": 17.6133, + "step": 20583 + }, + { + "epoch": 0.37625897965525434, + "grad_norm": 5.956999934736685, + "learning_rate": 7.172130517308691e-06, + "loss": 17.4725, + "step": 20584 + }, + { + "epoch": 0.3762772588517009, + "grad_norm": 7.831714054322711, + "learning_rate": 7.171863893696485e-06, + "loss": 17.9058, + "step": 20585 + }, + { + "epoch": 0.3762955380481474, + "grad_norm": 6.823454541730952, + "learning_rate": 7.171597262472085e-06, + "loss": 17.7316, + "step": 20586 + }, + { + "epoch": 0.37631381724459395, + "grad_norm": 5.638160627563298, + "learning_rate": 7.171330623636426e-06, + "loss": 17.3128, + "step": 20587 + }, + { + "epoch": 0.37633209644104043, + "grad_norm": 7.103100444250907, + "learning_rate": 7.17106397719044e-06, + "loss": 17.8114, + "step": 20588 + }, + { + "epoch": 0.37635037563748697, + "grad_norm": 5.456728586507418, + "learning_rate": 7.170797323135065e-06, + "loss": 17.0995, + "step": 20589 + }, + { + "epoch": 0.3763686548339335, + "grad_norm": 5.971389395298871, + "learning_rate": 7.170530661471232e-06, + "loss": 17.4495, + "step": 20590 + }, + { + "epoch": 0.37638693403038004, + "grad_norm": 6.927627408702738, + "learning_rate": 7.170263992199878e-06, + "loss": 17.7794, + "step": 20591 + }, + { + "epoch": 0.3764052132268266, + "grad_norm": 6.10809545916464, + "learning_rate": 7.169997315321936e-06, + "loss": 17.6278, + "step": 20592 + }, + { + "epoch": 0.37642349242327305, + "grad_norm": 7.840387278475167, + "learning_rate": 7.169730630838344e-06, + "loss": 17.4599, + "step": 20593 + }, + { + "epoch": 0.3764417716197196, + "grad_norm": 5.808072367256486, + "learning_rate": 7.169463938750033e-06, + "loss": 17.2614, + "step": 20594 + }, + { + "epoch": 0.3764600508161661, + "grad_norm": 6.6618112247851675, + "learning_rate": 7.169197239057939e-06, + "loss": 17.4821, + "step": 20595 + }, + { + "epoch": 0.37647833001261266, + "grad_norm": 6.159097442050367, + "learning_rate": 7.168930531762998e-06, + "loss": 17.3987, + "step": 20596 + }, + { + "epoch": 0.3764966092090592, + "grad_norm": 5.5933978247039855, + "learning_rate": 7.1686638168661425e-06, + "loss": 17.1157, + "step": 20597 + }, + { + "epoch": 0.3765148884055057, + "grad_norm": 6.900925113958627, + "learning_rate": 7.168397094368309e-06, + "loss": 17.7539, + "step": 20598 + }, + { + "epoch": 0.3765331676019522, + "grad_norm": 6.619261728696811, + "learning_rate": 7.168130364270431e-06, + "loss": 17.4448, + "step": 20599 + }, + { + "epoch": 0.37655144679839875, + "grad_norm": 7.609053670754561, + "learning_rate": 7.167863626573446e-06, + "loss": 18.3832, + "step": 20600 + }, + { + "epoch": 0.3765697259948453, + "grad_norm": 5.803236237706797, + "learning_rate": 7.167596881278285e-06, + "loss": 17.4427, + "step": 20601 + }, + { + "epoch": 0.37658800519129176, + "grad_norm": 7.273994144546687, + "learning_rate": 7.167330128385886e-06, + "loss": 17.775, + "step": 20602 + }, + { + "epoch": 0.3766062843877383, + "grad_norm": 7.463503272074474, + "learning_rate": 7.167063367897184e-06, + "loss": 17.6611, + "step": 20603 + }, + { + "epoch": 0.37662456358418483, + "grad_norm": 7.0457114727194945, + "learning_rate": 7.1667965998131124e-06, + "loss": 17.7985, + "step": 20604 + }, + { + "epoch": 0.37664284278063137, + "grad_norm": 6.895245411245836, + "learning_rate": 7.166529824134606e-06, + "loss": 17.6429, + "step": 20605 + }, + { + "epoch": 0.3766611219770779, + "grad_norm": 4.420823489714812, + "learning_rate": 7.1662630408626e-06, + "loss": 16.8286, + "step": 20606 + }, + { + "epoch": 0.3766794011735244, + "grad_norm": 5.763056319838112, + "learning_rate": 7.165996249998033e-06, + "loss": 17.3129, + "step": 20607 + }, + { + "epoch": 0.3766976803699709, + "grad_norm": 6.765938780753487, + "learning_rate": 7.165729451541834e-06, + "loss": 17.5902, + "step": 20608 + }, + { + "epoch": 0.37671595956641746, + "grad_norm": 6.233005257901531, + "learning_rate": 7.165462645494943e-06, + "loss": 17.1538, + "step": 20609 + }, + { + "epoch": 0.376734238762864, + "grad_norm": 6.994044793039864, + "learning_rate": 7.165195831858293e-06, + "loss": 17.6151, + "step": 20610 + }, + { + "epoch": 0.37675251795931053, + "grad_norm": 4.5691847886265275, + "learning_rate": 7.164929010632818e-06, + "loss": 16.8158, + "step": 20611 + }, + { + "epoch": 0.376770797155757, + "grad_norm": 5.113949836479145, + "learning_rate": 7.164662181819456e-06, + "loss": 16.9211, + "step": 20612 + }, + { + "epoch": 0.37678907635220354, + "grad_norm": 6.731286088024672, + "learning_rate": 7.164395345419141e-06, + "loss": 17.5905, + "step": 20613 + }, + { + "epoch": 0.3768073555486501, + "grad_norm": 5.779281914613035, + "learning_rate": 7.164128501432808e-06, + "loss": 17.4421, + "step": 20614 + }, + { + "epoch": 0.3768256347450966, + "grad_norm": 7.339046452482347, + "learning_rate": 7.163861649861392e-06, + "loss": 18.3369, + "step": 20615 + }, + { + "epoch": 0.37684391394154315, + "grad_norm": 6.884621453446428, + "learning_rate": 7.163594790705829e-06, + "loss": 17.8277, + "step": 20616 + }, + { + "epoch": 0.37686219313798963, + "grad_norm": 5.655586210170619, + "learning_rate": 7.163327923967055e-06, + "loss": 17.1877, + "step": 20617 + }, + { + "epoch": 0.37688047233443617, + "grad_norm": 8.771523681859211, + "learning_rate": 7.163061049646003e-06, + "loss": 18.0978, + "step": 20618 + }, + { + "epoch": 0.3768987515308827, + "grad_norm": 5.9379482109307595, + "learning_rate": 7.1627941677436085e-06, + "loss": 17.5278, + "step": 20619 + }, + { + "epoch": 0.37691703072732924, + "grad_norm": 5.382141824677348, + "learning_rate": 7.162527278260811e-06, + "loss": 17.233, + "step": 20620 + }, + { + "epoch": 0.3769353099237758, + "grad_norm": 6.069812859379751, + "learning_rate": 7.162260381198541e-06, + "loss": 17.3415, + "step": 20621 + }, + { + "epoch": 0.37695358912022225, + "grad_norm": 5.784579558492788, + "learning_rate": 7.161993476557737e-06, + "loss": 17.3677, + "step": 20622 + }, + { + "epoch": 0.3769718683166688, + "grad_norm": 7.2001587613957065, + "learning_rate": 7.161726564339333e-06, + "loss": 18.0532, + "step": 20623 + }, + { + "epoch": 0.3769901475131153, + "grad_norm": 6.8242809603122145, + "learning_rate": 7.161459644544265e-06, + "loss": 17.3681, + "step": 20624 + }, + { + "epoch": 0.37700842670956186, + "grad_norm": 7.612123828849303, + "learning_rate": 7.161192717173469e-06, + "loss": 18.4676, + "step": 20625 + }, + { + "epoch": 0.3770267059060084, + "grad_norm": 5.312600056096935, + "learning_rate": 7.16092578222788e-06, + "loss": 17.2113, + "step": 20626 + }, + { + "epoch": 0.3770449851024549, + "grad_norm": 7.309212073447616, + "learning_rate": 7.160658839708433e-06, + "loss": 17.8747, + "step": 20627 + }, + { + "epoch": 0.3770632642989014, + "grad_norm": 7.697322528872857, + "learning_rate": 7.1603918896160655e-06, + "loss": 18.2026, + "step": 20628 + }, + { + "epoch": 0.37708154349534795, + "grad_norm": 9.588027842999113, + "learning_rate": 7.160124931951711e-06, + "loss": 18.5526, + "step": 20629 + }, + { + "epoch": 0.3770998226917945, + "grad_norm": 5.977328879269785, + "learning_rate": 7.1598579667163045e-06, + "loss": 17.5158, + "step": 20630 + }, + { + "epoch": 0.377118101888241, + "grad_norm": 6.09204584799851, + "learning_rate": 7.1595909939107864e-06, + "loss": 17.3506, + "step": 20631 + }, + { + "epoch": 0.3771363810846875, + "grad_norm": 6.83840505088421, + "learning_rate": 7.159324013536086e-06, + "loss": 17.6528, + "step": 20632 + }, + { + "epoch": 0.37715466028113404, + "grad_norm": 5.7778449402272765, + "learning_rate": 7.159057025593145e-06, + "loss": 17.2363, + "step": 20633 + }, + { + "epoch": 0.37717293947758057, + "grad_norm": 6.64526238744069, + "learning_rate": 7.158790030082896e-06, + "loss": 17.5401, + "step": 20634 + }, + { + "epoch": 0.3771912186740271, + "grad_norm": 5.803148609951093, + "learning_rate": 7.158523027006275e-06, + "loss": 17.439, + "step": 20635 + }, + { + "epoch": 0.3772094978704736, + "grad_norm": 6.60449995898493, + "learning_rate": 7.158256016364218e-06, + "loss": 17.4628, + "step": 20636 + }, + { + "epoch": 0.3772277770669201, + "grad_norm": 5.7326802198899705, + "learning_rate": 7.157988998157659e-06, + "loss": 17.0555, + "step": 20637 + }, + { + "epoch": 0.37724605626336666, + "grad_norm": 7.460395265804034, + "learning_rate": 7.157721972387539e-06, + "loss": 17.85, + "step": 20638 + }, + { + "epoch": 0.3772643354598132, + "grad_norm": 6.627173442440888, + "learning_rate": 7.157454939054788e-06, + "loss": 17.5353, + "step": 20639 + }, + { + "epoch": 0.37728261465625973, + "grad_norm": 5.914285399030155, + "learning_rate": 7.157187898160347e-06, + "loss": 17.3655, + "step": 20640 + }, + { + "epoch": 0.3773008938527062, + "grad_norm": 5.620032876235361, + "learning_rate": 7.156920849705149e-06, + "loss": 17.2645, + "step": 20641 + }, + { + "epoch": 0.37731917304915275, + "grad_norm": 7.513128525274539, + "learning_rate": 7.156653793690129e-06, + "loss": 17.9416, + "step": 20642 + }, + { + "epoch": 0.3773374522455993, + "grad_norm": 6.12097987736912, + "learning_rate": 7.156386730116227e-06, + "loss": 17.4376, + "step": 20643 + }, + { + "epoch": 0.3773557314420458, + "grad_norm": 6.206280891072118, + "learning_rate": 7.156119658984375e-06, + "loss": 17.4708, + "step": 20644 + }, + { + "epoch": 0.37737401063849235, + "grad_norm": 6.309603134178015, + "learning_rate": 7.155852580295513e-06, + "loss": 17.4303, + "step": 20645 + }, + { + "epoch": 0.37739228983493883, + "grad_norm": 6.091201990564179, + "learning_rate": 7.155585494050571e-06, + "loss": 17.1456, + "step": 20646 + }, + { + "epoch": 0.37741056903138537, + "grad_norm": 6.393127162720256, + "learning_rate": 7.15531840025049e-06, + "loss": 17.6871, + "step": 20647 + }, + { + "epoch": 0.3774288482278319, + "grad_norm": 5.684010274567867, + "learning_rate": 7.155051298896207e-06, + "loss": 17.1029, + "step": 20648 + }, + { + "epoch": 0.37744712742427844, + "grad_norm": 5.859678024083746, + "learning_rate": 7.154784189988655e-06, + "loss": 17.3521, + "step": 20649 + }, + { + "epoch": 0.377465406620725, + "grad_norm": 6.493662822507453, + "learning_rate": 7.154517073528772e-06, + "loss": 17.4507, + "step": 20650 + }, + { + "epoch": 0.37748368581717145, + "grad_norm": 7.461636475787436, + "learning_rate": 7.154249949517493e-06, + "loss": 18.1048, + "step": 20651 + }, + { + "epoch": 0.377501965013618, + "grad_norm": 8.337347370485924, + "learning_rate": 7.153982817955755e-06, + "loss": 18.4426, + "step": 20652 + }, + { + "epoch": 0.3775202442100645, + "grad_norm": 7.217298307508891, + "learning_rate": 7.153715678844494e-06, + "loss": 17.9003, + "step": 20653 + }, + { + "epoch": 0.37753852340651106, + "grad_norm": 6.72731521926569, + "learning_rate": 7.153448532184646e-06, + "loss": 17.7426, + "step": 20654 + }, + { + "epoch": 0.3775568026029576, + "grad_norm": 7.578537547101339, + "learning_rate": 7.153181377977148e-06, + "loss": 17.8402, + "step": 20655 + }, + { + "epoch": 0.3775750817994041, + "grad_norm": 6.645035765757099, + "learning_rate": 7.152914216222937e-06, + "loss": 17.6881, + "step": 20656 + }, + { + "epoch": 0.3775933609958506, + "grad_norm": 7.351151228534585, + "learning_rate": 7.152647046922947e-06, + "loss": 18.1416, + "step": 20657 + }, + { + "epoch": 0.37761164019229715, + "grad_norm": 6.366196253922736, + "learning_rate": 7.152379870078119e-06, + "loss": 17.3985, + "step": 20658 + }, + { + "epoch": 0.3776299193887437, + "grad_norm": 7.008910741742748, + "learning_rate": 7.152112685689383e-06, + "loss": 17.9082, + "step": 20659 + }, + { + "epoch": 0.3776481985851902, + "grad_norm": 6.443878939319213, + "learning_rate": 7.151845493757679e-06, + "loss": 17.4953, + "step": 20660 + }, + { + "epoch": 0.3776664777816367, + "grad_norm": 8.356011214369085, + "learning_rate": 7.151578294283944e-06, + "loss": 18.0463, + "step": 20661 + }, + { + "epoch": 0.37768475697808324, + "grad_norm": 5.906777639110561, + "learning_rate": 7.151311087269115e-06, + "loss": 17.2742, + "step": 20662 + }, + { + "epoch": 0.37770303617452977, + "grad_norm": 7.20076130382401, + "learning_rate": 7.151043872714126e-06, + "loss": 17.7422, + "step": 20663 + }, + { + "epoch": 0.3777213153709763, + "grad_norm": 6.3545204646164954, + "learning_rate": 7.150776650619915e-06, + "loss": 17.4427, + "step": 20664 + }, + { + "epoch": 0.37773959456742284, + "grad_norm": 6.162254158058019, + "learning_rate": 7.15050942098742e-06, + "loss": 17.161, + "step": 20665 + }, + { + "epoch": 0.3777578737638693, + "grad_norm": 7.150157393780649, + "learning_rate": 7.150242183817574e-06, + "loss": 18.031, + "step": 20666 + }, + { + "epoch": 0.37777615296031586, + "grad_norm": 6.03951850120098, + "learning_rate": 7.149974939111316e-06, + "loss": 17.2216, + "step": 20667 + }, + { + "epoch": 0.3777944321567624, + "grad_norm": 8.161372667305617, + "learning_rate": 7.149707686869582e-06, + "loss": 18.3367, + "step": 20668 + }, + { + "epoch": 0.37781271135320893, + "grad_norm": 6.55974120489108, + "learning_rate": 7.149440427093311e-06, + "loss": 17.6477, + "step": 20669 + }, + { + "epoch": 0.3778309905496554, + "grad_norm": 5.8649574848977215, + "learning_rate": 7.149173159783437e-06, + "loss": 17.6777, + "step": 20670 + }, + { + "epoch": 0.37784926974610195, + "grad_norm": 7.815085048357885, + "learning_rate": 7.148905884940898e-06, + "loss": 17.8094, + "step": 20671 + }, + { + "epoch": 0.3778675489425485, + "grad_norm": 5.209538297226656, + "learning_rate": 7.14863860256663e-06, + "loss": 17.1547, + "step": 20672 + }, + { + "epoch": 0.377885828138995, + "grad_norm": 6.423224718314327, + "learning_rate": 7.1483713126615685e-06, + "loss": 17.8088, + "step": 20673 + }, + { + "epoch": 0.37790410733544155, + "grad_norm": 5.678146266476477, + "learning_rate": 7.148104015226653e-06, + "loss": 17.3624, + "step": 20674 + }, + { + "epoch": 0.37792238653188803, + "grad_norm": 6.250913610206943, + "learning_rate": 7.14783671026282e-06, + "loss": 17.5371, + "step": 20675 + }, + { + "epoch": 0.37794066572833457, + "grad_norm": 7.075915479019987, + "learning_rate": 7.147569397771006e-06, + "loss": 17.9878, + "step": 20676 + }, + { + "epoch": 0.3779589449247811, + "grad_norm": 5.2718456219478655, + "learning_rate": 7.1473020777521474e-06, + "loss": 16.95, + "step": 20677 + }, + { + "epoch": 0.37797722412122764, + "grad_norm": 6.692447361100275, + "learning_rate": 7.1470347502071804e-06, + "loss": 17.5211, + "step": 20678 + }, + { + "epoch": 0.3779955033176742, + "grad_norm": 5.869065536809518, + "learning_rate": 7.146767415137044e-06, + "loss": 17.4061, + "step": 20679 + }, + { + "epoch": 0.37801378251412066, + "grad_norm": 5.385633023882873, + "learning_rate": 7.146500072542672e-06, + "loss": 17.283, + "step": 20680 + }, + { + "epoch": 0.3780320617105672, + "grad_norm": 6.259513688434857, + "learning_rate": 7.146232722425006e-06, + "loss": 17.4676, + "step": 20681 + }, + { + "epoch": 0.3780503409070137, + "grad_norm": 7.565017404182663, + "learning_rate": 7.145965364784979e-06, + "loss": 17.7473, + "step": 20682 + }, + { + "epoch": 0.37806862010346026, + "grad_norm": 6.2425939611643475, + "learning_rate": 7.145697999623531e-06, + "loss": 17.3411, + "step": 20683 + }, + { + "epoch": 0.3780868992999068, + "grad_norm": 6.515403647706122, + "learning_rate": 7.145430626941596e-06, + "loss": 17.5335, + "step": 20684 + }, + { + "epoch": 0.3781051784963533, + "grad_norm": 5.913250439289875, + "learning_rate": 7.145163246740114e-06, + "loss": 17.2398, + "step": 20685 + }, + { + "epoch": 0.3781234576927998, + "grad_norm": 6.178161015518975, + "learning_rate": 7.144895859020022e-06, + "loss": 17.678, + "step": 20686 + }, + { + "epoch": 0.37814173688924635, + "grad_norm": 6.245587199046832, + "learning_rate": 7.144628463782254e-06, + "loss": 17.3644, + "step": 20687 + }, + { + "epoch": 0.3781600160856929, + "grad_norm": 9.519394063770232, + "learning_rate": 7.14436106102775e-06, + "loss": 18.4064, + "step": 20688 + }, + { + "epoch": 0.3781782952821394, + "grad_norm": 8.162139299355768, + "learning_rate": 7.144093650757448e-06, + "loss": 18.1407, + "step": 20689 + }, + { + "epoch": 0.3781965744785859, + "grad_norm": 8.207625117832002, + "learning_rate": 7.1438262329722816e-06, + "loss": 17.5477, + "step": 20690 + }, + { + "epoch": 0.37821485367503244, + "grad_norm": 6.423680900033887, + "learning_rate": 7.143558807673191e-06, + "loss": 17.316, + "step": 20691 + }, + { + "epoch": 0.37823313287147897, + "grad_norm": 6.368076140486672, + "learning_rate": 7.143291374861113e-06, + "loss": 17.5855, + "step": 20692 + }, + { + "epoch": 0.3782514120679255, + "grad_norm": 6.50904391012658, + "learning_rate": 7.143023934536986e-06, + "loss": 17.2793, + "step": 20693 + }, + { + "epoch": 0.37826969126437204, + "grad_norm": 7.412766496640174, + "learning_rate": 7.142756486701744e-06, + "loss": 17.9571, + "step": 20694 + }, + { + "epoch": 0.3782879704608185, + "grad_norm": 6.464074321628694, + "learning_rate": 7.142489031356328e-06, + "loss": 17.6333, + "step": 20695 + }, + { + "epoch": 0.37830624965726506, + "grad_norm": 6.848667173751901, + "learning_rate": 7.1422215685016725e-06, + "loss": 17.7922, + "step": 20696 + }, + { + "epoch": 0.3783245288537116, + "grad_norm": 6.335291254994485, + "learning_rate": 7.141954098138717e-06, + "loss": 17.5243, + "step": 20697 + }, + { + "epoch": 0.37834280805015813, + "grad_norm": 5.87701609881068, + "learning_rate": 7.1416866202683975e-06, + "loss": 17.2739, + "step": 20698 + }, + { + "epoch": 0.37836108724660467, + "grad_norm": 6.276542507763416, + "learning_rate": 7.141419134891654e-06, + "loss": 17.4585, + "step": 20699 + }, + { + "epoch": 0.37837936644305115, + "grad_norm": 5.95125091657514, + "learning_rate": 7.141151642009421e-06, + "loss": 17.2629, + "step": 20700 + }, + { + "epoch": 0.3783976456394977, + "grad_norm": 6.099762326585936, + "learning_rate": 7.1408841416226375e-06, + "loss": 17.2621, + "step": 20701 + }, + { + "epoch": 0.3784159248359442, + "grad_norm": 6.472944595990493, + "learning_rate": 7.14061663373224e-06, + "loss": 17.6128, + "step": 20702 + }, + { + "epoch": 0.37843420403239075, + "grad_norm": 7.422113554229637, + "learning_rate": 7.1403491183391695e-06, + "loss": 17.966, + "step": 20703 + }, + { + "epoch": 0.37845248322883723, + "grad_norm": 6.347877429952918, + "learning_rate": 7.140081595444359e-06, + "loss": 17.2928, + "step": 20704 + }, + { + "epoch": 0.37847076242528377, + "grad_norm": 6.016001465431868, + "learning_rate": 7.139814065048748e-06, + "loss": 17.2871, + "step": 20705 + }, + { + "epoch": 0.3784890416217303, + "grad_norm": 6.682169672155171, + "learning_rate": 7.139546527153275e-06, + "loss": 17.435, + "step": 20706 + }, + { + "epoch": 0.37850732081817684, + "grad_norm": 6.188439116903879, + "learning_rate": 7.139278981758878e-06, + "loss": 17.6462, + "step": 20707 + }, + { + "epoch": 0.3785256000146234, + "grad_norm": 6.600287770503797, + "learning_rate": 7.139011428866493e-06, + "loss": 17.3779, + "step": 20708 + }, + { + "epoch": 0.37854387921106986, + "grad_norm": 5.474955385579385, + "learning_rate": 7.1387438684770585e-06, + "loss": 16.9861, + "step": 20709 + }, + { + "epoch": 0.3785621584075164, + "grad_norm": 5.5985919635142745, + "learning_rate": 7.138476300591511e-06, + "loss": 17.2531, + "step": 20710 + }, + { + "epoch": 0.3785804376039629, + "grad_norm": 5.726156526690359, + "learning_rate": 7.138208725210791e-06, + "loss": 17.3002, + "step": 20711 + }, + { + "epoch": 0.37859871680040946, + "grad_norm": 6.581332163686492, + "learning_rate": 7.137941142335836e-06, + "loss": 17.4097, + "step": 20712 + }, + { + "epoch": 0.378616995996856, + "grad_norm": 7.045607385744825, + "learning_rate": 7.137673551967581e-06, + "loss": 17.8435, + "step": 20713 + }, + { + "epoch": 0.3786352751933025, + "grad_norm": 6.638400931802244, + "learning_rate": 7.1374059541069665e-06, + "loss": 17.755, + "step": 20714 + }, + { + "epoch": 0.378653554389749, + "grad_norm": 7.596801101096576, + "learning_rate": 7.1371383487549296e-06, + "loss": 17.7562, + "step": 20715 + }, + { + "epoch": 0.37867183358619555, + "grad_norm": 9.420613093821757, + "learning_rate": 7.136870735912408e-06, + "loss": 18.0275, + "step": 20716 + }, + { + "epoch": 0.3786901127826421, + "grad_norm": 6.237802438989488, + "learning_rate": 7.13660311558034e-06, + "loss": 17.5701, + "step": 20717 + }, + { + "epoch": 0.3787083919790886, + "grad_norm": 8.624445529734015, + "learning_rate": 7.136335487759664e-06, + "loss": 18.3155, + "step": 20718 + }, + { + "epoch": 0.3787266711755351, + "grad_norm": 7.878952553669444, + "learning_rate": 7.136067852451316e-06, + "loss": 18.0405, + "step": 20719 + }, + { + "epoch": 0.37874495037198164, + "grad_norm": 4.998626687964603, + "learning_rate": 7.135800209656238e-06, + "loss": 16.9398, + "step": 20720 + }, + { + "epoch": 0.3787632295684282, + "grad_norm": 6.383723286774549, + "learning_rate": 7.135532559375364e-06, + "loss": 17.6559, + "step": 20721 + }, + { + "epoch": 0.3787815087648747, + "grad_norm": 8.815173822229411, + "learning_rate": 7.135264901609633e-06, + "loss": 18.904, + "step": 20722 + }, + { + "epoch": 0.37879978796132124, + "grad_norm": 7.22992635093191, + "learning_rate": 7.134997236359984e-06, + "loss": 17.6525, + "step": 20723 + }, + { + "epoch": 0.3788180671577677, + "grad_norm": 5.9283624355310165, + "learning_rate": 7.134729563627356e-06, + "loss": 17.2961, + "step": 20724 + }, + { + "epoch": 0.37883634635421426, + "grad_norm": 7.719535222571975, + "learning_rate": 7.134461883412686e-06, + "loss": 17.9128, + "step": 20725 + }, + { + "epoch": 0.3788546255506608, + "grad_norm": 7.733402061159149, + "learning_rate": 7.134194195716912e-06, + "loss": 17.5171, + "step": 20726 + }, + { + "epoch": 0.37887290474710733, + "grad_norm": 6.445478746516123, + "learning_rate": 7.133926500540973e-06, + "loss": 17.4731, + "step": 20727 + }, + { + "epoch": 0.37889118394355387, + "grad_norm": 6.5167505852322565, + "learning_rate": 7.133658797885806e-06, + "loss": 17.68, + "step": 20728 + }, + { + "epoch": 0.37890946314000035, + "grad_norm": 5.421027084675409, + "learning_rate": 7.1333910877523505e-06, + "loss": 17.1143, + "step": 20729 + }, + { + "epoch": 0.3789277423364469, + "grad_norm": 6.159280335343325, + "learning_rate": 7.1331233701415445e-06, + "loss": 17.4021, + "step": 20730 + }, + { + "epoch": 0.3789460215328934, + "grad_norm": 6.331667450781991, + "learning_rate": 7.132855645054326e-06, + "loss": 17.3254, + "step": 20731 + }, + { + "epoch": 0.37896430072933995, + "grad_norm": 5.962973632759887, + "learning_rate": 7.1325879124916335e-06, + "loss": 17.3799, + "step": 20732 + }, + { + "epoch": 0.3789825799257865, + "grad_norm": 9.026509699935183, + "learning_rate": 7.1323201724544054e-06, + "loss": 17.743, + "step": 20733 + }, + { + "epoch": 0.37900085912223297, + "grad_norm": 7.870083216701158, + "learning_rate": 7.13205242494358e-06, + "loss": 17.9164, + "step": 20734 + }, + { + "epoch": 0.3790191383186795, + "grad_norm": 6.315996277779612, + "learning_rate": 7.131784669960097e-06, + "loss": 17.4389, + "step": 20735 + }, + { + "epoch": 0.37903741751512604, + "grad_norm": 6.230132327519647, + "learning_rate": 7.1315169075048925e-06, + "loss": 17.5464, + "step": 20736 + }, + { + "epoch": 0.3790556967115726, + "grad_norm": 6.512243512904325, + "learning_rate": 7.131249137578906e-06, + "loss": 17.4722, + "step": 20737 + }, + { + "epoch": 0.37907397590801906, + "grad_norm": 6.23420620772002, + "learning_rate": 7.130981360183078e-06, + "loss": 17.5325, + "step": 20738 + }, + { + "epoch": 0.3790922551044656, + "grad_norm": 6.393638833110737, + "learning_rate": 7.130713575318343e-06, + "loss": 17.6389, + "step": 20739 + }, + { + "epoch": 0.3791105343009121, + "grad_norm": 6.95356601842442, + "learning_rate": 7.130445782985643e-06, + "loss": 17.6849, + "step": 20740 + }, + { + "epoch": 0.37912881349735866, + "grad_norm": 7.321391171174789, + "learning_rate": 7.1301779831859155e-06, + "loss": 17.7158, + "step": 20741 + }, + { + "epoch": 0.3791470926938052, + "grad_norm": 6.270581262878128, + "learning_rate": 7.129910175920098e-06, + "loss": 17.5712, + "step": 20742 + }, + { + "epoch": 0.3791653718902517, + "grad_norm": 5.7544699683778076, + "learning_rate": 7.129642361189131e-06, + "loss": 17.1531, + "step": 20743 + }, + { + "epoch": 0.3791836510866982, + "grad_norm": 5.94292761915387, + "learning_rate": 7.129374538993952e-06, + "loss": 17.3732, + "step": 20744 + }, + { + "epoch": 0.37920193028314475, + "grad_norm": 6.793645401089376, + "learning_rate": 7.129106709335502e-06, + "loss": 17.7743, + "step": 20745 + }, + { + "epoch": 0.3792202094795913, + "grad_norm": 5.669400652861999, + "learning_rate": 7.128838872214714e-06, + "loss": 17.4094, + "step": 20746 + }, + { + "epoch": 0.3792384886760378, + "grad_norm": 6.0989341256543135, + "learning_rate": 7.128571027632533e-06, + "loss": 17.5634, + "step": 20747 + }, + { + "epoch": 0.3792567678724843, + "grad_norm": 6.06441229343946, + "learning_rate": 7.128303175589894e-06, + "loss": 17.439, + "step": 20748 + }, + { + "epoch": 0.37927504706893084, + "grad_norm": 6.35104589498538, + "learning_rate": 7.128035316087738e-06, + "loss": 17.6118, + "step": 20749 + }, + { + "epoch": 0.3792933262653774, + "grad_norm": 7.39298736376242, + "learning_rate": 7.127767449127003e-06, + "loss": 17.9437, + "step": 20750 + }, + { + "epoch": 0.3793116054618239, + "grad_norm": 9.08367253886659, + "learning_rate": 7.127499574708626e-06, + "loss": 18.5506, + "step": 20751 + }, + { + "epoch": 0.37932988465827044, + "grad_norm": 6.919256850079278, + "learning_rate": 7.12723169283355e-06, + "loss": 17.8666, + "step": 20752 + }, + { + "epoch": 0.3793481638547169, + "grad_norm": 7.263917032680766, + "learning_rate": 7.126963803502711e-06, + "loss": 17.9848, + "step": 20753 + }, + { + "epoch": 0.37936644305116346, + "grad_norm": 6.694937443856486, + "learning_rate": 7.126695906717047e-06, + "loss": 17.6934, + "step": 20754 + }, + { + "epoch": 0.37938472224761, + "grad_norm": 7.969832870115532, + "learning_rate": 7.1264280024775e-06, + "loss": 18.1185, + "step": 20755 + }, + { + "epoch": 0.37940300144405653, + "grad_norm": 6.4036621065899, + "learning_rate": 7.126160090785006e-06, + "loss": 17.5468, + "step": 20756 + }, + { + "epoch": 0.37942128064050307, + "grad_norm": 5.827606480174566, + "learning_rate": 7.125892171640506e-06, + "loss": 17.217, + "step": 20757 + }, + { + "epoch": 0.37943955983694955, + "grad_norm": 5.506119830132652, + "learning_rate": 7.12562424504494e-06, + "loss": 17.0894, + "step": 20758 + }, + { + "epoch": 0.3794578390333961, + "grad_norm": 7.0339410972391745, + "learning_rate": 7.125356310999243e-06, + "loss": 17.8281, + "step": 20759 + }, + { + "epoch": 0.3794761182298426, + "grad_norm": 6.1191833339710815, + "learning_rate": 7.125088369504357e-06, + "loss": 17.2776, + "step": 20760 + }, + { + "epoch": 0.37949439742628915, + "grad_norm": 6.140600884159622, + "learning_rate": 7.124820420561221e-06, + "loss": 17.3743, + "step": 20761 + }, + { + "epoch": 0.3795126766227357, + "grad_norm": 7.652709346580124, + "learning_rate": 7.124552464170775e-06, + "loss": 18.0969, + "step": 20762 + }, + { + "epoch": 0.37953095581918217, + "grad_norm": 5.975453658052, + "learning_rate": 7.124284500333955e-06, + "loss": 17.5479, + "step": 20763 + }, + { + "epoch": 0.3795492350156287, + "grad_norm": 5.893502341608432, + "learning_rate": 7.124016529051703e-06, + "loss": 17.3345, + "step": 20764 + }, + { + "epoch": 0.37956751421207524, + "grad_norm": 6.661394297806554, + "learning_rate": 7.1237485503249585e-06, + "loss": 17.5253, + "step": 20765 + }, + { + "epoch": 0.3795857934085218, + "grad_norm": 5.5381015433825596, + "learning_rate": 7.123480564154659e-06, + "loss": 17.033, + "step": 20766 + }, + { + "epoch": 0.3796040726049683, + "grad_norm": 6.238925559423806, + "learning_rate": 7.123212570541743e-06, + "loss": 17.5199, + "step": 20767 + }, + { + "epoch": 0.3796223518014148, + "grad_norm": 6.2780918759583795, + "learning_rate": 7.122944569487153e-06, + "loss": 17.4589, + "step": 20768 + }, + { + "epoch": 0.37964063099786133, + "grad_norm": 5.659419884527585, + "learning_rate": 7.122676560991826e-06, + "loss": 17.1041, + "step": 20769 + }, + { + "epoch": 0.37965891019430786, + "grad_norm": 7.404834297493874, + "learning_rate": 7.1224085450567e-06, + "loss": 17.856, + "step": 20770 + }, + { + "epoch": 0.3796771893907544, + "grad_norm": 6.210689993677177, + "learning_rate": 7.122140521682719e-06, + "loss": 17.309, + "step": 20771 + }, + { + "epoch": 0.3796954685872009, + "grad_norm": 4.809070659389534, + "learning_rate": 7.121872490870818e-06, + "loss": 16.861, + "step": 20772 + }, + { + "epoch": 0.3797137477836474, + "grad_norm": 7.589711016124053, + "learning_rate": 7.1216044526219375e-06, + "loss": 17.6736, + "step": 20773 + }, + { + "epoch": 0.37973202698009395, + "grad_norm": 6.303364912652323, + "learning_rate": 7.121336406937018e-06, + "loss": 17.6306, + "step": 20774 + }, + { + "epoch": 0.3797503061765405, + "grad_norm": 7.443107831532645, + "learning_rate": 7.121068353817e-06, + "loss": 18.2774, + "step": 20775 + }, + { + "epoch": 0.379768585372987, + "grad_norm": 6.809614136229891, + "learning_rate": 7.120800293262821e-06, + "loss": 17.7834, + "step": 20776 + }, + { + "epoch": 0.3797868645694335, + "grad_norm": 5.856678594502468, + "learning_rate": 7.1205322252754206e-06, + "loss": 17.2173, + "step": 20777 + }, + { + "epoch": 0.37980514376588004, + "grad_norm": 7.222293962955577, + "learning_rate": 7.120264149855738e-06, + "loss": 17.7268, + "step": 20778 + }, + { + "epoch": 0.3798234229623266, + "grad_norm": 5.724796100158445, + "learning_rate": 7.119996067004714e-06, + "loss": 17.3644, + "step": 20779 + }, + { + "epoch": 0.3798417021587731, + "grad_norm": 5.224426821291531, + "learning_rate": 7.119727976723289e-06, + "loss": 17.0199, + "step": 20780 + }, + { + "epoch": 0.37985998135521964, + "grad_norm": 6.378530739003206, + "learning_rate": 7.119459879012399e-06, + "loss": 17.5581, + "step": 20781 + }, + { + "epoch": 0.3798782605516661, + "grad_norm": 7.493453941456023, + "learning_rate": 7.119191773872988e-06, + "loss": 17.8533, + "step": 20782 + }, + { + "epoch": 0.37989653974811266, + "grad_norm": 7.181188667677754, + "learning_rate": 7.118923661305992e-06, + "loss": 17.936, + "step": 20783 + }, + { + "epoch": 0.3799148189445592, + "grad_norm": 6.825187861916647, + "learning_rate": 7.118655541312354e-06, + "loss": 17.8693, + "step": 20784 + }, + { + "epoch": 0.37993309814100573, + "grad_norm": 6.078822640548004, + "learning_rate": 7.118387413893011e-06, + "loss": 17.4699, + "step": 20785 + }, + { + "epoch": 0.37995137733745227, + "grad_norm": 6.949376025687311, + "learning_rate": 7.118119279048905e-06, + "loss": 17.6847, + "step": 20786 + }, + { + "epoch": 0.37996965653389875, + "grad_norm": 6.2332728126778605, + "learning_rate": 7.117851136780974e-06, + "loss": 17.6239, + "step": 20787 + }, + { + "epoch": 0.3799879357303453, + "grad_norm": 10.07927838175929, + "learning_rate": 7.1175829870901595e-06, + "loss": 18.2738, + "step": 20788 + }, + { + "epoch": 0.3800062149267918, + "grad_norm": 6.965522917367423, + "learning_rate": 7.1173148299774e-06, + "loss": 17.8364, + "step": 20789 + }, + { + "epoch": 0.38002449412323835, + "grad_norm": 6.24263074851901, + "learning_rate": 7.117046665443635e-06, + "loss": 17.5051, + "step": 20790 + }, + { + "epoch": 0.3800427733196849, + "grad_norm": 6.529845780698912, + "learning_rate": 7.1167784934898044e-06, + "loss": 17.8506, + "step": 20791 + }, + { + "epoch": 0.38006105251613137, + "grad_norm": 6.713118413498703, + "learning_rate": 7.116510314116851e-06, + "loss": 17.42, + "step": 20792 + }, + { + "epoch": 0.3800793317125779, + "grad_norm": 5.7589875249438425, + "learning_rate": 7.116242127325712e-06, + "loss": 17.3183, + "step": 20793 + }, + { + "epoch": 0.38009761090902444, + "grad_norm": 5.604574003598077, + "learning_rate": 7.115973933117328e-06, + "loss": 17.4576, + "step": 20794 + }, + { + "epoch": 0.380115890105471, + "grad_norm": 5.863492297018321, + "learning_rate": 7.115705731492638e-06, + "loss": 17.2632, + "step": 20795 + }, + { + "epoch": 0.3801341693019175, + "grad_norm": 7.712852658319658, + "learning_rate": 7.115437522452584e-06, + "loss": 17.8007, + "step": 20796 + }, + { + "epoch": 0.380152448498364, + "grad_norm": 5.439655829759279, + "learning_rate": 7.115169305998104e-06, + "loss": 16.9575, + "step": 20797 + }, + { + "epoch": 0.38017072769481053, + "grad_norm": 6.5774090196801, + "learning_rate": 7.1149010821301404e-06, + "loss": 17.4319, + "step": 20798 + }, + { + "epoch": 0.38018900689125706, + "grad_norm": 6.539854924193836, + "learning_rate": 7.1146328508496325e-06, + "loss": 17.3212, + "step": 20799 + }, + { + "epoch": 0.3802072860877036, + "grad_norm": 6.136157464951033, + "learning_rate": 7.1143646121575185e-06, + "loss": 17.4212, + "step": 20800 + }, + { + "epoch": 0.38022556528415014, + "grad_norm": 5.321499503310928, + "learning_rate": 7.1140963660547394e-06, + "loss": 17.2692, + "step": 20801 + }, + { + "epoch": 0.3802438444805966, + "grad_norm": 5.402814100503582, + "learning_rate": 7.113828112542237e-06, + "loss": 17.0028, + "step": 20802 + }, + { + "epoch": 0.38026212367704315, + "grad_norm": 7.2308965458430885, + "learning_rate": 7.1135598516209515e-06, + "loss": 17.8786, + "step": 20803 + }, + { + "epoch": 0.3802804028734897, + "grad_norm": 9.61500681048284, + "learning_rate": 7.113291583291821e-06, + "loss": 18.4204, + "step": 20804 + }, + { + "epoch": 0.3802986820699362, + "grad_norm": 6.106425624782197, + "learning_rate": 7.113023307555786e-06, + "loss": 17.2703, + "step": 20805 + }, + { + "epoch": 0.3803169612663827, + "grad_norm": 6.99745351777501, + "learning_rate": 7.1127550244137885e-06, + "loss": 17.6546, + "step": 20806 + }, + { + "epoch": 0.38033524046282924, + "grad_norm": 6.538338269757374, + "learning_rate": 7.112486733866769e-06, + "loss": 17.8167, + "step": 20807 + }, + { + "epoch": 0.3803535196592758, + "grad_norm": 7.364576068447406, + "learning_rate": 7.112218435915667e-06, + "loss": 17.8228, + "step": 20808 + }, + { + "epoch": 0.3803717988557223, + "grad_norm": 6.757236656425216, + "learning_rate": 7.111950130561421e-06, + "loss": 17.7835, + "step": 20809 + }, + { + "epoch": 0.38039007805216885, + "grad_norm": 5.933350921832651, + "learning_rate": 7.111681817804974e-06, + "loss": 17.1217, + "step": 20810 + }, + { + "epoch": 0.3804083572486153, + "grad_norm": 6.232631490358939, + "learning_rate": 7.111413497647265e-06, + "loss": 17.4871, + "step": 20811 + }, + { + "epoch": 0.38042663644506186, + "grad_norm": 6.275613415622698, + "learning_rate": 7.1111451700892355e-06, + "loss": 17.231, + "step": 20812 + }, + { + "epoch": 0.3804449156415084, + "grad_norm": 6.23796317998585, + "learning_rate": 7.110876835131824e-06, + "loss": 17.502, + "step": 20813 + }, + { + "epoch": 0.38046319483795493, + "grad_norm": 6.2602286933265905, + "learning_rate": 7.110608492775974e-06, + "loss": 17.3774, + "step": 20814 + }, + { + "epoch": 0.38048147403440147, + "grad_norm": 6.931666805961042, + "learning_rate": 7.1103401430226225e-06, + "loss": 17.7166, + "step": 20815 + }, + { + "epoch": 0.38049975323084795, + "grad_norm": 6.035905670956372, + "learning_rate": 7.1100717858727145e-06, + "loss": 17.2214, + "step": 20816 + }, + { + "epoch": 0.3805180324272945, + "grad_norm": 5.7084629547077235, + "learning_rate": 7.109803421327187e-06, + "loss": 17.2142, + "step": 20817 + }, + { + "epoch": 0.380536311623741, + "grad_norm": 8.142998894952107, + "learning_rate": 7.1095350493869795e-06, + "loss": 18.3317, + "step": 20818 + }, + { + "epoch": 0.38055459082018755, + "grad_norm": 6.762787633424514, + "learning_rate": 7.109266670053036e-06, + "loss": 17.8856, + "step": 20819 + }, + { + "epoch": 0.3805728700166341, + "grad_norm": 5.078895769171451, + "learning_rate": 7.108998283326298e-06, + "loss": 17.0001, + "step": 20820 + }, + { + "epoch": 0.38059114921308057, + "grad_norm": 6.911115253979119, + "learning_rate": 7.1087298892077015e-06, + "loss": 17.8457, + "step": 20821 + }, + { + "epoch": 0.3806094284095271, + "grad_norm": 6.12260812313701, + "learning_rate": 7.108461487698192e-06, + "loss": 17.577, + "step": 20822 + }, + { + "epoch": 0.38062770760597364, + "grad_norm": 6.335144243529556, + "learning_rate": 7.108193078798704e-06, + "loss": 17.1269, + "step": 20823 + }, + { + "epoch": 0.3806459868024202, + "grad_norm": 6.537416389310044, + "learning_rate": 7.107924662510186e-06, + "loss": 17.5166, + "step": 20824 + }, + { + "epoch": 0.3806642659988667, + "grad_norm": 7.255772304525292, + "learning_rate": 7.107656238833573e-06, + "loss": 17.5818, + "step": 20825 + }, + { + "epoch": 0.3806825451953132, + "grad_norm": 6.587597096182242, + "learning_rate": 7.10738780776981e-06, + "loss": 17.718, + "step": 20826 + }, + { + "epoch": 0.38070082439175973, + "grad_norm": 6.450749813985822, + "learning_rate": 7.107119369319834e-06, + "loss": 17.4572, + "step": 20827 + }, + { + "epoch": 0.38071910358820626, + "grad_norm": 9.730322542962446, + "learning_rate": 7.106850923484587e-06, + "loss": 18.7782, + "step": 20828 + }, + { + "epoch": 0.3807373827846528, + "grad_norm": 6.702658270530379, + "learning_rate": 7.106582470265011e-06, + "loss": 17.5062, + "step": 20829 + }, + { + "epoch": 0.38075566198109934, + "grad_norm": 6.741417519344889, + "learning_rate": 7.1063140096620455e-06, + "loss": 17.8569, + "step": 20830 + }, + { + "epoch": 0.3807739411775458, + "grad_norm": 6.835549574482659, + "learning_rate": 7.106045541676634e-06, + "loss": 17.7487, + "step": 20831 + }, + { + "epoch": 0.38079222037399235, + "grad_norm": 5.835097889779242, + "learning_rate": 7.105777066309714e-06, + "loss": 17.3014, + "step": 20832 + }, + { + "epoch": 0.3808104995704389, + "grad_norm": 8.640088946506342, + "learning_rate": 7.105508583562227e-06, + "loss": 18.1967, + "step": 20833 + }, + { + "epoch": 0.3808287787668854, + "grad_norm": 6.4034437865262674, + "learning_rate": 7.105240093435118e-06, + "loss": 17.1212, + "step": 20834 + }, + { + "epoch": 0.38084705796333196, + "grad_norm": 5.855337970033611, + "learning_rate": 7.104971595929324e-06, + "loss": 17.2914, + "step": 20835 + }, + { + "epoch": 0.38086533715977844, + "grad_norm": 6.656582309527312, + "learning_rate": 7.1047030910457874e-06, + "loss": 17.8303, + "step": 20836 + }, + { + "epoch": 0.380883616356225, + "grad_norm": 6.657019482079074, + "learning_rate": 7.104434578785448e-06, + "loss": 17.9462, + "step": 20837 + }, + { + "epoch": 0.3809018955526715, + "grad_norm": 6.17664984070476, + "learning_rate": 7.104166059149249e-06, + "loss": 17.4421, + "step": 20838 + }, + { + "epoch": 0.38092017474911805, + "grad_norm": 5.880778338538877, + "learning_rate": 7.10389753213813e-06, + "loss": 17.3208, + "step": 20839 + }, + { + "epoch": 0.3809384539455645, + "grad_norm": 4.631100250135552, + "learning_rate": 7.103628997753033e-06, + "loss": 16.809, + "step": 20840 + }, + { + "epoch": 0.38095673314201106, + "grad_norm": 7.10246331981957, + "learning_rate": 7.1033604559948985e-06, + "loss": 17.8819, + "step": 20841 + }, + { + "epoch": 0.3809750123384576, + "grad_norm": 6.2676708374018135, + "learning_rate": 7.103091906864669e-06, + "loss": 17.5006, + "step": 20842 + }, + { + "epoch": 0.38099329153490413, + "grad_norm": 6.668340103982635, + "learning_rate": 7.102823350363283e-06, + "loss": 17.5777, + "step": 20843 + }, + { + "epoch": 0.38101157073135067, + "grad_norm": 6.365695726242811, + "learning_rate": 7.102554786491687e-06, + "loss": 17.5829, + "step": 20844 + }, + { + "epoch": 0.38102984992779715, + "grad_norm": 6.769414572402143, + "learning_rate": 7.102286215250815e-06, + "loss": 17.6714, + "step": 20845 + }, + { + "epoch": 0.3810481291242437, + "grad_norm": 6.226691924059086, + "learning_rate": 7.102017636641615e-06, + "loss": 17.8598, + "step": 20846 + }, + { + "epoch": 0.3810664083206902, + "grad_norm": 5.4387356711989865, + "learning_rate": 7.101749050665024e-06, + "loss": 17.2436, + "step": 20847 + }, + { + "epoch": 0.38108468751713676, + "grad_norm": 6.852695570490196, + "learning_rate": 7.101480457321987e-06, + "loss": 17.6438, + "step": 20848 + }, + { + "epoch": 0.3811029667135833, + "grad_norm": 6.629804302980361, + "learning_rate": 7.101211856613442e-06, + "loss": 17.5632, + "step": 20849 + }, + { + "epoch": 0.38112124591002977, + "grad_norm": 6.657351809764172, + "learning_rate": 7.100943248540331e-06, + "loss": 17.7071, + "step": 20850 + }, + { + "epoch": 0.3811395251064763, + "grad_norm": 7.011585648378357, + "learning_rate": 7.100674633103597e-06, + "loss": 17.6329, + "step": 20851 + }, + { + "epoch": 0.38115780430292284, + "grad_norm": 7.160553696981467, + "learning_rate": 7.100406010304182e-06, + "loss": 17.6871, + "step": 20852 + }, + { + "epoch": 0.3811760834993694, + "grad_norm": 7.141567162756561, + "learning_rate": 7.100137380143025e-06, + "loss": 18.0261, + "step": 20853 + }, + { + "epoch": 0.3811943626958159, + "grad_norm": 6.259878955116255, + "learning_rate": 7.099868742621069e-06, + "loss": 17.3837, + "step": 20854 + }, + { + "epoch": 0.3812126418922624, + "grad_norm": 5.492887614629066, + "learning_rate": 7.099600097739254e-06, + "loss": 17.3764, + "step": 20855 + }, + { + "epoch": 0.38123092108870893, + "grad_norm": 7.144588748872126, + "learning_rate": 7.099331445498524e-06, + "loss": 17.9633, + "step": 20856 + }, + { + "epoch": 0.38124920028515547, + "grad_norm": 7.159360110593641, + "learning_rate": 7.0990627858998195e-06, + "loss": 17.9482, + "step": 20857 + }, + { + "epoch": 0.381267479481602, + "grad_norm": 7.675817932055794, + "learning_rate": 7.098794118944083e-06, + "loss": 18.113, + "step": 20858 + }, + { + "epoch": 0.38128575867804854, + "grad_norm": 5.633699805040702, + "learning_rate": 7.098525444632253e-06, + "loss": 17.3153, + "step": 20859 + }, + { + "epoch": 0.381304037874495, + "grad_norm": 6.470043418287717, + "learning_rate": 7.0982567629652744e-06, + "loss": 17.4714, + "step": 20860 + }, + { + "epoch": 0.38132231707094155, + "grad_norm": 6.768151848349427, + "learning_rate": 7.0979880739440876e-06, + "loss": 17.6138, + "step": 20861 + }, + { + "epoch": 0.3813405962673881, + "grad_norm": 6.104392527320515, + "learning_rate": 7.0977193775696366e-06, + "loss": 17.4135, + "step": 20862 + }, + { + "epoch": 0.3813588754638346, + "grad_norm": 5.745199755207529, + "learning_rate": 7.097450673842859e-06, + "loss": 17.2317, + "step": 20863 + }, + { + "epoch": 0.38137715466028116, + "grad_norm": 6.5674161376538756, + "learning_rate": 7.097181962764699e-06, + "loss": 17.8981, + "step": 20864 + }, + { + "epoch": 0.38139543385672764, + "grad_norm": 7.043972020853318, + "learning_rate": 7.0969132443361e-06, + "loss": 17.7503, + "step": 20865 + }, + { + "epoch": 0.3814137130531742, + "grad_norm": 6.367059956679154, + "learning_rate": 7.096644518558e-06, + "loss": 17.639, + "step": 20866 + }, + { + "epoch": 0.3814319922496207, + "grad_norm": 6.702691715465437, + "learning_rate": 7.096375785431344e-06, + "loss": 17.6277, + "step": 20867 + }, + { + "epoch": 0.38145027144606725, + "grad_norm": 6.207568160627037, + "learning_rate": 7.096107044957072e-06, + "loss": 17.6149, + "step": 20868 + }, + { + "epoch": 0.3814685506425138, + "grad_norm": 6.595650471552085, + "learning_rate": 7.095838297136127e-06, + "loss": 17.7398, + "step": 20869 + }, + { + "epoch": 0.38148682983896026, + "grad_norm": 6.134431622447001, + "learning_rate": 7.095569541969451e-06, + "loss": 17.3198, + "step": 20870 + }, + { + "epoch": 0.3815051090354068, + "grad_norm": 6.07535366359923, + "learning_rate": 7.095300779457986e-06, + "loss": 17.5319, + "step": 20871 + }, + { + "epoch": 0.38152338823185333, + "grad_norm": 5.8705506077538585, + "learning_rate": 7.0950320096026735e-06, + "loss": 17.4445, + "step": 20872 + }, + { + "epoch": 0.38154166742829987, + "grad_norm": 6.168112308933465, + "learning_rate": 7.094763232404454e-06, + "loss": 17.3161, + "step": 20873 + }, + { + "epoch": 0.38155994662474635, + "grad_norm": 5.920086473077339, + "learning_rate": 7.094494447864272e-06, + "loss": 17.2226, + "step": 20874 + }, + { + "epoch": 0.3815782258211929, + "grad_norm": 6.572426053992683, + "learning_rate": 7.094225655983069e-06, + "loss": 17.3873, + "step": 20875 + }, + { + "epoch": 0.3815965050176394, + "grad_norm": 7.497605754510795, + "learning_rate": 7.093956856761788e-06, + "loss": 17.387, + "step": 20876 + }, + { + "epoch": 0.38161478421408596, + "grad_norm": 7.672030592204335, + "learning_rate": 7.0936880502013685e-06, + "loss": 17.9081, + "step": 20877 + }, + { + "epoch": 0.3816330634105325, + "grad_norm": 4.961913623652882, + "learning_rate": 7.093419236302753e-06, + "loss": 16.7968, + "step": 20878 + }, + { + "epoch": 0.38165134260697897, + "grad_norm": 5.825915175452671, + "learning_rate": 7.093150415066887e-06, + "loss": 17.1826, + "step": 20879 + }, + { + "epoch": 0.3816696218034255, + "grad_norm": 5.778189478281201, + "learning_rate": 7.09288158649471e-06, + "loss": 17.192, + "step": 20880 + }, + { + "epoch": 0.38168790099987204, + "grad_norm": 6.757521273809351, + "learning_rate": 7.092612750587164e-06, + "loss": 17.7086, + "step": 20881 + }, + { + "epoch": 0.3817061801963186, + "grad_norm": 7.279928892138955, + "learning_rate": 7.092343907345191e-06, + "loss": 18.1617, + "step": 20882 + }, + { + "epoch": 0.3817244593927651, + "grad_norm": 6.173932016935703, + "learning_rate": 7.092075056769735e-06, + "loss": 17.9202, + "step": 20883 + }, + { + "epoch": 0.3817427385892116, + "grad_norm": 6.879067491651368, + "learning_rate": 7.0918061988617386e-06, + "loss": 17.9197, + "step": 20884 + }, + { + "epoch": 0.38176101778565813, + "grad_norm": 6.638190201616912, + "learning_rate": 7.091537333622142e-06, + "loss": 17.7333, + "step": 20885 + }, + { + "epoch": 0.38177929698210467, + "grad_norm": 5.592561299929222, + "learning_rate": 7.0912684610518876e-06, + "loss": 17.1227, + "step": 20886 + }, + { + "epoch": 0.3817975761785512, + "grad_norm": 6.838690555546561, + "learning_rate": 7.090999581151919e-06, + "loss": 17.5876, + "step": 20887 + }, + { + "epoch": 0.38181585537499774, + "grad_norm": 7.727658466566001, + "learning_rate": 7.09073069392318e-06, + "loss": 17.8524, + "step": 20888 + }, + { + "epoch": 0.3818341345714442, + "grad_norm": 7.9605758252771865, + "learning_rate": 7.09046179936661e-06, + "loss": 18.4245, + "step": 20889 + }, + { + "epoch": 0.38185241376789075, + "grad_norm": 4.929522106376273, + "learning_rate": 7.090192897483154e-06, + "loss": 16.8717, + "step": 20890 + }, + { + "epoch": 0.3818706929643373, + "grad_norm": 7.145315426100771, + "learning_rate": 7.089923988273752e-06, + "loss": 17.7391, + "step": 20891 + }, + { + "epoch": 0.3818889721607838, + "grad_norm": 5.938780832852384, + "learning_rate": 7.089655071739347e-06, + "loss": 17.4467, + "step": 20892 + }, + { + "epoch": 0.38190725135723036, + "grad_norm": 6.16574190594442, + "learning_rate": 7.089386147880885e-06, + "loss": 17.5801, + "step": 20893 + }, + { + "epoch": 0.38192553055367684, + "grad_norm": 6.957619290412913, + "learning_rate": 7.089117216699304e-06, + "loss": 17.8385, + "step": 20894 + }, + { + "epoch": 0.3819438097501234, + "grad_norm": 5.634910750663889, + "learning_rate": 7.088848278195548e-06, + "loss": 17.4732, + "step": 20895 + }, + { + "epoch": 0.3819620889465699, + "grad_norm": 12.784091532845126, + "learning_rate": 7.088579332370561e-06, + "loss": 18.0073, + "step": 20896 + }, + { + "epoch": 0.38198036814301645, + "grad_norm": 5.366099207301944, + "learning_rate": 7.088310379225285e-06, + "loss": 17.1493, + "step": 20897 + }, + { + "epoch": 0.381998647339463, + "grad_norm": 7.263435265105196, + "learning_rate": 7.088041418760662e-06, + "loss": 17.5792, + "step": 20898 + }, + { + "epoch": 0.38201692653590946, + "grad_norm": 5.88373008069427, + "learning_rate": 7.087772450977634e-06, + "loss": 17.4534, + "step": 20899 + }, + { + "epoch": 0.382035205732356, + "grad_norm": 7.455630310783019, + "learning_rate": 7.087503475877145e-06, + "loss": 17.9119, + "step": 20900 + }, + { + "epoch": 0.38205348492880253, + "grad_norm": 5.745101603164724, + "learning_rate": 7.087234493460139e-06, + "loss": 17.0636, + "step": 20901 + }, + { + "epoch": 0.38207176412524907, + "grad_norm": 6.147402314200255, + "learning_rate": 7.086965503727556e-06, + "loss": 17.4028, + "step": 20902 + }, + { + "epoch": 0.3820900433216956, + "grad_norm": 7.93139180122912, + "learning_rate": 7.086696506680342e-06, + "loss": 18.0001, + "step": 20903 + }, + { + "epoch": 0.3821083225181421, + "grad_norm": 5.751434835443962, + "learning_rate": 7.086427502319435e-06, + "loss": 17.4825, + "step": 20904 + }, + { + "epoch": 0.3821266017145886, + "grad_norm": 10.527463834833005, + "learning_rate": 7.0861584906457805e-06, + "loss": 18.3152, + "step": 20905 + }, + { + "epoch": 0.38214488091103516, + "grad_norm": 6.088116082350334, + "learning_rate": 7.085889471660323e-06, + "loss": 17.3969, + "step": 20906 + }, + { + "epoch": 0.3821631601074817, + "grad_norm": 5.788142043169116, + "learning_rate": 7.085620445364005e-06, + "loss": 17.5298, + "step": 20907 + }, + { + "epoch": 0.38218143930392817, + "grad_norm": 7.462126406792685, + "learning_rate": 7.085351411757766e-06, + "loss": 18.252, + "step": 20908 + }, + { + "epoch": 0.3821997185003747, + "grad_norm": 6.409991001895903, + "learning_rate": 7.085082370842553e-06, + "loss": 17.4893, + "step": 20909 + }, + { + "epoch": 0.38221799769682124, + "grad_norm": 6.865954809362901, + "learning_rate": 7.084813322619306e-06, + "loss": 17.9921, + "step": 20910 + }, + { + "epoch": 0.3822362768932678, + "grad_norm": 6.856510409482297, + "learning_rate": 7.08454426708897e-06, + "loss": 17.4194, + "step": 20911 + }, + { + "epoch": 0.3822545560897143, + "grad_norm": 4.615319540814306, + "learning_rate": 7.0842752042524865e-06, + "loss": 16.9605, + "step": 20912 + }, + { + "epoch": 0.3822728352861608, + "grad_norm": 5.526644847407925, + "learning_rate": 7.084006134110799e-06, + "loss": 17.1899, + "step": 20913 + }, + { + "epoch": 0.38229111448260733, + "grad_norm": 8.99895679572845, + "learning_rate": 7.0837370566648525e-06, + "loss": 17.4682, + "step": 20914 + }, + { + "epoch": 0.38230939367905387, + "grad_norm": 6.301254732162111, + "learning_rate": 7.0834679719155876e-06, + "loss": 17.1955, + "step": 20915 + }, + { + "epoch": 0.3823276728755004, + "grad_norm": 7.629743795603142, + "learning_rate": 7.083198879863947e-06, + "loss": 17.5275, + "step": 20916 + }, + { + "epoch": 0.38234595207194694, + "grad_norm": 6.202724535045338, + "learning_rate": 7.082929780510877e-06, + "loss": 17.3454, + "step": 20917 + }, + { + "epoch": 0.3823642312683934, + "grad_norm": 6.982099796758858, + "learning_rate": 7.0826606738573175e-06, + "loss": 17.5226, + "step": 20918 + }, + { + "epoch": 0.38238251046483995, + "grad_norm": 6.245180649736514, + "learning_rate": 7.082391559904213e-06, + "loss": 17.4011, + "step": 20919 + }, + { + "epoch": 0.3824007896612865, + "grad_norm": 6.604345590638832, + "learning_rate": 7.082122438652508e-06, + "loss": 17.5027, + "step": 20920 + }, + { + "epoch": 0.382419068857733, + "grad_norm": 5.691258786852814, + "learning_rate": 7.081853310103145e-06, + "loss": 17.1142, + "step": 20921 + }, + { + "epoch": 0.38243734805417956, + "grad_norm": 6.314341060201153, + "learning_rate": 7.081584174257066e-06, + "loss": 17.4148, + "step": 20922 + }, + { + "epoch": 0.38245562725062604, + "grad_norm": 6.277514237526559, + "learning_rate": 7.081315031115214e-06, + "loss": 17.5835, + "step": 20923 + }, + { + "epoch": 0.3824739064470726, + "grad_norm": 5.968445440806513, + "learning_rate": 7.081045880678534e-06, + "loss": 17.3819, + "step": 20924 + }, + { + "epoch": 0.3824921856435191, + "grad_norm": 5.913309017293552, + "learning_rate": 7.08077672294797e-06, + "loss": 17.2318, + "step": 20925 + }, + { + "epoch": 0.38251046483996565, + "grad_norm": 7.212968971941925, + "learning_rate": 7.080507557924463e-06, + "loss": 17.874, + "step": 20926 + }, + { + "epoch": 0.3825287440364122, + "grad_norm": 5.659362249790801, + "learning_rate": 7.080238385608958e-06, + "loss": 17.3396, + "step": 20927 + }, + { + "epoch": 0.38254702323285866, + "grad_norm": 7.303117907501463, + "learning_rate": 7.079969206002397e-06, + "loss": 17.5264, + "step": 20928 + }, + { + "epoch": 0.3825653024293052, + "grad_norm": 7.148331468273093, + "learning_rate": 7.079700019105725e-06, + "loss": 18.0166, + "step": 20929 + }, + { + "epoch": 0.38258358162575173, + "grad_norm": 6.2101605807832945, + "learning_rate": 7.079430824919885e-06, + "loss": 17.5685, + "step": 20930 + }, + { + "epoch": 0.38260186082219827, + "grad_norm": 6.126608292160563, + "learning_rate": 7.0791616234458215e-06, + "loss": 17.3098, + "step": 20931 + }, + { + "epoch": 0.3826201400186448, + "grad_norm": 6.2424096754175515, + "learning_rate": 7.078892414684475e-06, + "loss": 17.2135, + "step": 20932 + }, + { + "epoch": 0.3826384192150913, + "grad_norm": 7.893451554677613, + "learning_rate": 7.078623198636792e-06, + "loss": 17.8848, + "step": 20933 + }, + { + "epoch": 0.3826566984115378, + "grad_norm": 6.27278983088745, + "learning_rate": 7.078353975303716e-06, + "loss": 17.4218, + "step": 20934 + }, + { + "epoch": 0.38267497760798436, + "grad_norm": 7.5959714124403614, + "learning_rate": 7.078084744686189e-06, + "loss": 18.019, + "step": 20935 + }, + { + "epoch": 0.3826932568044309, + "grad_norm": 6.751028770933, + "learning_rate": 7.077815506785154e-06, + "loss": 17.5426, + "step": 20936 + }, + { + "epoch": 0.38271153600087743, + "grad_norm": 5.352088787704339, + "learning_rate": 7.077546261601556e-06, + "loss": 17.0635, + "step": 20937 + }, + { + "epoch": 0.3827298151973239, + "grad_norm": 6.524017412712978, + "learning_rate": 7.077277009136341e-06, + "loss": 17.6072, + "step": 20938 + }, + { + "epoch": 0.38274809439377044, + "grad_norm": 5.923411217835149, + "learning_rate": 7.077007749390448e-06, + "loss": 17.1601, + "step": 20939 + }, + { + "epoch": 0.382766373590217, + "grad_norm": 6.382600647904093, + "learning_rate": 7.076738482364825e-06, + "loss": 17.797, + "step": 20940 + }, + { + "epoch": 0.3827846527866635, + "grad_norm": 7.349333360894832, + "learning_rate": 7.076469208060412e-06, + "loss": 18.4183, + "step": 20941 + }, + { + "epoch": 0.38280293198311, + "grad_norm": 7.886549704741756, + "learning_rate": 7.076199926478155e-06, + "loss": 17.9815, + "step": 20942 + }, + { + "epoch": 0.38282121117955653, + "grad_norm": 6.920208388927554, + "learning_rate": 7.075930637618998e-06, + "loss": 17.764, + "step": 20943 + }, + { + "epoch": 0.38283949037600307, + "grad_norm": 8.381040262324833, + "learning_rate": 7.075661341483884e-06, + "loss": 18.1572, + "step": 20944 + }, + { + "epoch": 0.3828577695724496, + "grad_norm": 6.618339656691645, + "learning_rate": 7.0753920380737564e-06, + "loss": 17.5145, + "step": 20945 + }, + { + "epoch": 0.38287604876889614, + "grad_norm": 6.021925121301376, + "learning_rate": 7.075122727389561e-06, + "loss": 17.5195, + "step": 20946 + }, + { + "epoch": 0.3828943279653426, + "grad_norm": 7.302842556870371, + "learning_rate": 7.07485340943224e-06, + "loss": 17.9682, + "step": 20947 + }, + { + "epoch": 0.38291260716178915, + "grad_norm": 6.476085190963245, + "learning_rate": 7.074584084202739e-06, + "loss": 17.7174, + "step": 20948 + }, + { + "epoch": 0.3829308863582357, + "grad_norm": 7.060712497037543, + "learning_rate": 7.074314751702e-06, + "loss": 17.7902, + "step": 20949 + }, + { + "epoch": 0.3829491655546822, + "grad_norm": 7.439936953729566, + "learning_rate": 7.0740454119309655e-06, + "loss": 17.8011, + "step": 20950 + }, + { + "epoch": 0.38296744475112876, + "grad_norm": 5.748107972841419, + "learning_rate": 7.073776064890584e-06, + "loss": 17.2943, + "step": 20951 + }, + { + "epoch": 0.38298572394757524, + "grad_norm": 7.592641996812257, + "learning_rate": 7.073506710581798e-06, + "loss": 18.1216, + "step": 20952 + }, + { + "epoch": 0.3830040031440218, + "grad_norm": 5.639567552391562, + "learning_rate": 7.073237349005551e-06, + "loss": 17.2927, + "step": 20953 + }, + { + "epoch": 0.3830222823404683, + "grad_norm": 6.2018678368437605, + "learning_rate": 7.072967980162785e-06, + "loss": 17.395, + "step": 20954 + }, + { + "epoch": 0.38304056153691485, + "grad_norm": 5.873436213169519, + "learning_rate": 7.072698604054448e-06, + "loss": 17.2147, + "step": 20955 + }, + { + "epoch": 0.3830588407333614, + "grad_norm": 6.504260340309082, + "learning_rate": 7.07242922068148e-06, + "loss": 17.9237, + "step": 20956 + }, + { + "epoch": 0.38307711992980786, + "grad_norm": 6.911863760797188, + "learning_rate": 7.072159830044829e-06, + "loss": 17.8015, + "step": 20957 + }, + { + "epoch": 0.3830953991262544, + "grad_norm": 6.0197496330229265, + "learning_rate": 7.071890432145438e-06, + "loss": 17.5761, + "step": 20958 + }, + { + "epoch": 0.38311367832270093, + "grad_norm": 5.974001537225322, + "learning_rate": 7.07162102698425e-06, + "loss": 17.3282, + "step": 20959 + }, + { + "epoch": 0.38313195751914747, + "grad_norm": 7.8089125046639625, + "learning_rate": 7.071351614562211e-06, + "loss": 18.4462, + "step": 20960 + }, + { + "epoch": 0.383150236715594, + "grad_norm": 6.679126497544561, + "learning_rate": 7.071082194880263e-06, + "loss": 17.5977, + "step": 20961 + }, + { + "epoch": 0.3831685159120405, + "grad_norm": 7.571452136221873, + "learning_rate": 7.070812767939353e-06, + "loss": 17.9026, + "step": 20962 + }, + { + "epoch": 0.383186795108487, + "grad_norm": 5.488059508087825, + "learning_rate": 7.0705433337404235e-06, + "loss": 17.1057, + "step": 20963 + }, + { + "epoch": 0.38320507430493356, + "grad_norm": 5.754784046064573, + "learning_rate": 7.070273892284418e-06, + "loss": 17.1884, + "step": 20964 + }, + { + "epoch": 0.3832233535013801, + "grad_norm": 6.562704278530298, + "learning_rate": 7.0700044435722845e-06, + "loss": 17.3791, + "step": 20965 + }, + { + "epoch": 0.38324163269782663, + "grad_norm": 6.10723289146391, + "learning_rate": 7.069734987604964e-06, + "loss": 17.1413, + "step": 20966 + }, + { + "epoch": 0.3832599118942731, + "grad_norm": 9.181396417149502, + "learning_rate": 7.069465524383401e-06, + "loss": 17.8588, + "step": 20967 + }, + { + "epoch": 0.38327819109071964, + "grad_norm": 5.816902113994662, + "learning_rate": 7.069196053908541e-06, + "loss": 17.29, + "step": 20968 + }, + { + "epoch": 0.3832964702871662, + "grad_norm": 7.247253740811549, + "learning_rate": 7.0689265761813295e-06, + "loss": 17.9448, + "step": 20969 + }, + { + "epoch": 0.3833147494836127, + "grad_norm": 5.846369869770649, + "learning_rate": 7.0686570912027095e-06, + "loss": 17.5586, + "step": 20970 + }, + { + "epoch": 0.38333302868005925, + "grad_norm": 6.9528831305859, + "learning_rate": 7.068387598973626e-06, + "loss": 17.7971, + "step": 20971 + }, + { + "epoch": 0.38335130787650573, + "grad_norm": 5.561074919618863, + "learning_rate": 7.068118099495023e-06, + "loss": 16.9145, + "step": 20972 + }, + { + "epoch": 0.38336958707295227, + "grad_norm": 6.119739518076784, + "learning_rate": 7.067848592767845e-06, + "loss": 17.3776, + "step": 20973 + }, + { + "epoch": 0.3833878662693988, + "grad_norm": 7.926793988714482, + "learning_rate": 7.0675790787930384e-06, + "loss": 18.1583, + "step": 20974 + }, + { + "epoch": 0.38340614546584534, + "grad_norm": 7.415874693286228, + "learning_rate": 7.067309557571546e-06, + "loss": 17.8907, + "step": 20975 + }, + { + "epoch": 0.3834244246622918, + "grad_norm": 5.336722379137145, + "learning_rate": 7.067040029104314e-06, + "loss": 16.8849, + "step": 20976 + }, + { + "epoch": 0.38344270385873835, + "grad_norm": 6.731954946186777, + "learning_rate": 7.066770493392284e-06, + "loss": 17.6902, + "step": 20977 + }, + { + "epoch": 0.3834609830551849, + "grad_norm": 6.256213226265865, + "learning_rate": 7.066500950436404e-06, + "loss": 17.5907, + "step": 20978 + }, + { + "epoch": 0.3834792622516314, + "grad_norm": 6.945786522373672, + "learning_rate": 7.066231400237619e-06, + "loss": 17.4622, + "step": 20979 + }, + { + "epoch": 0.38349754144807796, + "grad_norm": 5.882857721699971, + "learning_rate": 7.06596184279687e-06, + "loss": 17.3546, + "step": 20980 + }, + { + "epoch": 0.38351582064452444, + "grad_norm": 5.737339459003288, + "learning_rate": 7.065692278115105e-06, + "loss": 17.4939, + "step": 20981 + }, + { + "epoch": 0.383534099840971, + "grad_norm": 6.914059546544488, + "learning_rate": 7.065422706193266e-06, + "loss": 17.869, + "step": 20982 + }, + { + "epoch": 0.3835523790374175, + "grad_norm": 7.15039442679996, + "learning_rate": 7.065153127032303e-06, + "loss": 17.7811, + "step": 20983 + }, + { + "epoch": 0.38357065823386405, + "grad_norm": 7.5436196844894905, + "learning_rate": 7.064883540633155e-06, + "loss": 17.8848, + "step": 20984 + }, + { + "epoch": 0.3835889374303106, + "grad_norm": 8.22823033911563, + "learning_rate": 7.06461394699677e-06, + "loss": 18.5603, + "step": 20985 + }, + { + "epoch": 0.38360721662675706, + "grad_norm": 6.363131251137945, + "learning_rate": 7.064344346124092e-06, + "loss": 17.3607, + "step": 20986 + }, + { + "epoch": 0.3836254958232036, + "grad_norm": 5.632763651606355, + "learning_rate": 7.064074738016067e-06, + "loss": 17.3302, + "step": 20987 + }, + { + "epoch": 0.38364377501965014, + "grad_norm": 6.683524758785473, + "learning_rate": 7.063805122673638e-06, + "loss": 17.463, + "step": 20988 + }, + { + "epoch": 0.38366205421609667, + "grad_norm": 5.601327133997698, + "learning_rate": 7.0635355000977525e-06, + "loss": 17.3258, + "step": 20989 + }, + { + "epoch": 0.3836803334125432, + "grad_norm": 5.834606755959688, + "learning_rate": 7.063265870289353e-06, + "loss": 17.2437, + "step": 20990 + }, + { + "epoch": 0.3836986126089897, + "grad_norm": 7.335505367654194, + "learning_rate": 7.062996233249385e-06, + "loss": 17.736, + "step": 20991 + }, + { + "epoch": 0.3837168918054362, + "grad_norm": 7.605727190558551, + "learning_rate": 7.062726588978795e-06, + "loss": 17.6628, + "step": 20992 + }, + { + "epoch": 0.38373517100188276, + "grad_norm": 6.677220834372723, + "learning_rate": 7.062456937478529e-06, + "loss": 17.5839, + "step": 20993 + }, + { + "epoch": 0.3837534501983293, + "grad_norm": 5.9987708403078175, + "learning_rate": 7.062187278749528e-06, + "loss": 17.1523, + "step": 20994 + }, + { + "epoch": 0.38377172939477583, + "grad_norm": 5.824464208607177, + "learning_rate": 7.0619176127927416e-06, + "loss": 17.3179, + "step": 20995 + }, + { + "epoch": 0.3837900085912223, + "grad_norm": 6.46492619817191, + "learning_rate": 7.0616479396091105e-06, + "loss": 17.6594, + "step": 20996 + }, + { + "epoch": 0.38380828778766884, + "grad_norm": 7.486001052481146, + "learning_rate": 7.061378259199585e-06, + "loss": 18.256, + "step": 20997 + }, + { + "epoch": 0.3838265669841154, + "grad_norm": 6.76620453305171, + "learning_rate": 7.061108571565105e-06, + "loss": 17.6748, + "step": 20998 + }, + { + "epoch": 0.3838448461805619, + "grad_norm": 6.107454163429668, + "learning_rate": 7.06083887670662e-06, + "loss": 17.2595, + "step": 20999 + }, + { + "epoch": 0.38386312537700845, + "grad_norm": 6.861673884222232, + "learning_rate": 7.060569174625074e-06, + "loss": 17.6715, + "step": 21000 + }, + { + "epoch": 0.38388140457345493, + "grad_norm": 7.388862690006889, + "learning_rate": 7.060299465321409e-06, + "loss": 17.999, + "step": 21001 + }, + { + "epoch": 0.38389968376990147, + "grad_norm": 6.952039206444979, + "learning_rate": 7.060029748796575e-06, + "loss": 17.8205, + "step": 21002 + }, + { + "epoch": 0.383917962966348, + "grad_norm": 8.046804842311497, + "learning_rate": 7.059760025051517e-06, + "loss": 17.9271, + "step": 21003 + }, + { + "epoch": 0.38393624216279454, + "grad_norm": 6.753743435534955, + "learning_rate": 7.059490294087178e-06, + "loss": 17.7938, + "step": 21004 + }, + { + "epoch": 0.3839545213592411, + "grad_norm": 7.518158013897047, + "learning_rate": 7.059220555904503e-06, + "loss": 17.8388, + "step": 21005 + }, + { + "epoch": 0.38397280055568755, + "grad_norm": 6.211908699222352, + "learning_rate": 7.058950810504439e-06, + "loss": 17.3581, + "step": 21006 + }, + { + "epoch": 0.3839910797521341, + "grad_norm": 7.248228724588168, + "learning_rate": 7.058681057887932e-06, + "loss": 17.9582, + "step": 21007 + }, + { + "epoch": 0.3840093589485806, + "grad_norm": 6.761188584964818, + "learning_rate": 7.058411298055925e-06, + "loss": 17.8539, + "step": 21008 + }, + { + "epoch": 0.38402763814502716, + "grad_norm": 6.668129533612861, + "learning_rate": 7.058141531009366e-06, + "loss": 17.4327, + "step": 21009 + }, + { + "epoch": 0.38404591734147364, + "grad_norm": 6.720147588197332, + "learning_rate": 7.057871756749199e-06, + "loss": 17.5705, + "step": 21010 + }, + { + "epoch": 0.3840641965379202, + "grad_norm": 6.025626867685157, + "learning_rate": 7.057601975276372e-06, + "loss": 17.2125, + "step": 21011 + }, + { + "epoch": 0.3840824757343667, + "grad_norm": 6.418059717440586, + "learning_rate": 7.057332186591827e-06, + "loss": 17.2908, + "step": 21012 + }, + { + "epoch": 0.38410075493081325, + "grad_norm": 5.208297016391685, + "learning_rate": 7.057062390696511e-06, + "loss": 16.9275, + "step": 21013 + }, + { + "epoch": 0.3841190341272598, + "grad_norm": 6.7694717826112605, + "learning_rate": 7.05679258759137e-06, + "loss": 17.4841, + "step": 21014 + }, + { + "epoch": 0.38413731332370626, + "grad_norm": 5.318957700949014, + "learning_rate": 7.056522777277349e-06, + "loss": 16.9798, + "step": 21015 + }, + { + "epoch": 0.3841555925201528, + "grad_norm": 6.8537494940230745, + "learning_rate": 7.056252959755396e-06, + "loss": 17.4746, + "step": 21016 + }, + { + "epoch": 0.38417387171659934, + "grad_norm": 6.9875700824701035, + "learning_rate": 7.055983135026454e-06, + "loss": 17.7363, + "step": 21017 + }, + { + "epoch": 0.38419215091304587, + "grad_norm": 6.606386917732638, + "learning_rate": 7.055713303091467e-06, + "loss": 17.5111, + "step": 21018 + }, + { + "epoch": 0.3842104301094924, + "grad_norm": 5.8560429944783605, + "learning_rate": 7.055443463951386e-06, + "loss": 17.2994, + "step": 21019 + }, + { + "epoch": 0.3842287093059389, + "grad_norm": 5.899752883657581, + "learning_rate": 7.0551736176071525e-06, + "loss": 17.3462, + "step": 21020 + }, + { + "epoch": 0.3842469885023854, + "grad_norm": 5.719587758213765, + "learning_rate": 7.054903764059716e-06, + "loss": 17.4546, + "step": 21021 + }, + { + "epoch": 0.38426526769883196, + "grad_norm": 6.360485085785115, + "learning_rate": 7.054633903310018e-06, + "loss": 17.416, + "step": 21022 + }, + { + "epoch": 0.3842835468952785, + "grad_norm": 6.2388700986330194, + "learning_rate": 7.054364035359007e-06, + "loss": 17.3875, + "step": 21023 + }, + { + "epoch": 0.38430182609172503, + "grad_norm": 7.730507789397206, + "learning_rate": 7.05409416020763e-06, + "loss": 18.2943, + "step": 21024 + }, + { + "epoch": 0.3843201052881715, + "grad_norm": 7.365054977760105, + "learning_rate": 7.053824277856829e-06, + "loss": 18.2068, + "step": 21025 + }, + { + "epoch": 0.38433838448461805, + "grad_norm": 7.222826076162248, + "learning_rate": 7.053554388307553e-06, + "loss": 17.8555, + "step": 21026 + }, + { + "epoch": 0.3843566636810646, + "grad_norm": 7.403992404066004, + "learning_rate": 7.0532844915607464e-06, + "loss": 17.6077, + "step": 21027 + }, + { + "epoch": 0.3843749428775111, + "grad_norm": 6.646387237157337, + "learning_rate": 7.053014587617357e-06, + "loss": 17.6762, + "step": 21028 + }, + { + "epoch": 0.38439322207395765, + "grad_norm": 5.383995280193755, + "learning_rate": 7.052744676478329e-06, + "loss": 17.0154, + "step": 21029 + }, + { + "epoch": 0.38441150127040413, + "grad_norm": 5.231586236215032, + "learning_rate": 7.05247475814461e-06, + "loss": 17.0177, + "step": 21030 + }, + { + "epoch": 0.38442978046685067, + "grad_norm": 6.022003660024721, + "learning_rate": 7.0522048326171446e-06, + "loss": 17.5111, + "step": 21031 + }, + { + "epoch": 0.3844480596632972, + "grad_norm": 6.961967428506571, + "learning_rate": 7.051934899896877e-06, + "loss": 17.5252, + "step": 21032 + }, + { + "epoch": 0.38446633885974374, + "grad_norm": 6.121851983504221, + "learning_rate": 7.051664959984757e-06, + "loss": 17.6957, + "step": 21033 + }, + { + "epoch": 0.3844846180561903, + "grad_norm": 7.502628993346572, + "learning_rate": 7.051395012881732e-06, + "loss": 18.1055, + "step": 21034 + }, + { + "epoch": 0.38450289725263675, + "grad_norm": 6.937053189562557, + "learning_rate": 7.051125058588744e-06, + "loss": 17.5169, + "step": 21035 + }, + { + "epoch": 0.3845211764490833, + "grad_norm": 6.425997547010063, + "learning_rate": 7.0508550971067395e-06, + "loss": 17.5367, + "step": 21036 + }, + { + "epoch": 0.3845394556455298, + "grad_norm": 7.543885421665368, + "learning_rate": 7.050585128436664e-06, + "loss": 17.9488, + "step": 21037 + }, + { + "epoch": 0.38455773484197636, + "grad_norm": 5.393873116929426, + "learning_rate": 7.05031515257947e-06, + "loss": 17.1293, + "step": 21038 + }, + { + "epoch": 0.3845760140384229, + "grad_norm": 7.6006476187830305, + "learning_rate": 7.0500451695360974e-06, + "loss": 18.0129, + "step": 21039 + }, + { + "epoch": 0.3845942932348694, + "grad_norm": 6.896454748473229, + "learning_rate": 7.049775179307494e-06, + "loss": 17.8704, + "step": 21040 + }, + { + "epoch": 0.3846125724313159, + "grad_norm": 7.847156974161075, + "learning_rate": 7.049505181894607e-06, + "loss": 17.9775, + "step": 21041 + }, + { + "epoch": 0.38463085162776245, + "grad_norm": 7.167280215081267, + "learning_rate": 7.049235177298381e-06, + "loss": 17.628, + "step": 21042 + }, + { + "epoch": 0.384649130824209, + "grad_norm": 4.947757102027752, + "learning_rate": 7.048965165519764e-06, + "loss": 16.8362, + "step": 21043 + }, + { + "epoch": 0.38466741002065546, + "grad_norm": 6.1773599780632, + "learning_rate": 7.048695146559703e-06, + "loss": 17.6304, + "step": 21044 + }, + { + "epoch": 0.384685689217102, + "grad_norm": 7.392166611133998, + "learning_rate": 7.048425120419142e-06, + "loss": 17.799, + "step": 21045 + }, + { + "epoch": 0.38470396841354854, + "grad_norm": 7.947934466412802, + "learning_rate": 7.04815508709903e-06, + "loss": 17.9185, + "step": 21046 + }, + { + "epoch": 0.38472224760999507, + "grad_norm": 6.3371784457134375, + "learning_rate": 7.047885046600311e-06, + "loss": 17.5585, + "step": 21047 + }, + { + "epoch": 0.3847405268064416, + "grad_norm": 6.35844089344265, + "learning_rate": 7.047614998923934e-06, + "loss": 17.6056, + "step": 21048 + }, + { + "epoch": 0.3847588060028881, + "grad_norm": 6.371209181819641, + "learning_rate": 7.047344944070843e-06, + "loss": 17.772, + "step": 21049 + }, + { + "epoch": 0.3847770851993346, + "grad_norm": 9.318432427827792, + "learning_rate": 7.047074882041986e-06, + "loss": 18.2345, + "step": 21050 + }, + { + "epoch": 0.38479536439578116, + "grad_norm": 6.422751603195068, + "learning_rate": 7.046804812838308e-06, + "loss": 17.4365, + "step": 21051 + }, + { + "epoch": 0.3848136435922277, + "grad_norm": 5.859817860410559, + "learning_rate": 7.046534736460758e-06, + "loss": 17.2159, + "step": 21052 + }, + { + "epoch": 0.38483192278867423, + "grad_norm": 7.197963911762869, + "learning_rate": 7.046264652910282e-06, + "loss": 17.7393, + "step": 21053 + }, + { + "epoch": 0.3848502019851207, + "grad_norm": 7.672084740455577, + "learning_rate": 7.045994562187825e-06, + "loss": 17.8146, + "step": 21054 + }, + { + "epoch": 0.38486848118156725, + "grad_norm": 6.565158908516528, + "learning_rate": 7.045724464294335e-06, + "loss": 17.7477, + "step": 21055 + }, + { + "epoch": 0.3848867603780138, + "grad_norm": 7.8717063409463055, + "learning_rate": 7.045454359230757e-06, + "loss": 18.0792, + "step": 21056 + }, + { + "epoch": 0.3849050395744603, + "grad_norm": 5.79232056568044, + "learning_rate": 7.04518424699804e-06, + "loss": 17.4529, + "step": 21057 + }, + { + "epoch": 0.38492331877090685, + "grad_norm": 6.33561194468226, + "learning_rate": 7.044914127597131e-06, + "loss": 17.6643, + "step": 21058 + }, + { + "epoch": 0.38494159796735333, + "grad_norm": 6.785048526549523, + "learning_rate": 7.044644001028973e-06, + "loss": 17.7654, + "step": 21059 + }, + { + "epoch": 0.38495987716379987, + "grad_norm": 8.698879900998087, + "learning_rate": 7.044373867294516e-06, + "loss": 18.3851, + "step": 21060 + }, + { + "epoch": 0.3849781563602464, + "grad_norm": 6.081322171523658, + "learning_rate": 7.044103726394706e-06, + "loss": 17.4652, + "step": 21061 + }, + { + "epoch": 0.38499643555669294, + "grad_norm": 7.096132129346076, + "learning_rate": 7.04383357833049e-06, + "loss": 17.8868, + "step": 21062 + }, + { + "epoch": 0.3850147147531395, + "grad_norm": 6.499055407332533, + "learning_rate": 7.043563423102815e-06, + "loss": 17.4621, + "step": 21063 + }, + { + "epoch": 0.38503299394958596, + "grad_norm": 5.897318297174493, + "learning_rate": 7.043293260712627e-06, + "loss": 17.4509, + "step": 21064 + }, + { + "epoch": 0.3850512731460325, + "grad_norm": 7.0679735364353835, + "learning_rate": 7.043023091160875e-06, + "loss": 17.8111, + "step": 21065 + }, + { + "epoch": 0.385069552342479, + "grad_norm": 5.8669612247424086, + "learning_rate": 7.042752914448502e-06, + "loss": 17.293, + "step": 21066 + }, + { + "epoch": 0.38508783153892556, + "grad_norm": 7.397229059389791, + "learning_rate": 7.042482730576459e-06, + "loss": 17.7818, + "step": 21067 + }, + { + "epoch": 0.3851061107353721, + "grad_norm": 6.583045133857047, + "learning_rate": 7.04221253954569e-06, + "loss": 17.7877, + "step": 21068 + }, + { + "epoch": 0.3851243899318186, + "grad_norm": 6.318748984029918, + "learning_rate": 7.041942341357144e-06, + "loss": 17.4063, + "step": 21069 + }, + { + "epoch": 0.3851426691282651, + "grad_norm": 6.166478127521871, + "learning_rate": 7.0416721360117666e-06, + "loss": 17.7328, + "step": 21070 + }, + { + "epoch": 0.38516094832471165, + "grad_norm": 5.73594155656501, + "learning_rate": 7.041401923510505e-06, + "loss": 17.2638, + "step": 21071 + }, + { + "epoch": 0.3851792275211582, + "grad_norm": 7.858864102035168, + "learning_rate": 7.0411317038543095e-06, + "loss": 17.9374, + "step": 21072 + }, + { + "epoch": 0.3851975067176047, + "grad_norm": 5.148517394924511, + "learning_rate": 7.040861477044122e-06, + "loss": 16.8221, + "step": 21073 + }, + { + "epoch": 0.3852157859140512, + "grad_norm": 7.215845543892201, + "learning_rate": 7.040591243080893e-06, + "loss": 18.0409, + "step": 21074 + }, + { + "epoch": 0.38523406511049774, + "grad_norm": 7.545392211958562, + "learning_rate": 7.040321001965569e-06, + "loss": 17.8653, + "step": 21075 + }, + { + "epoch": 0.38525234430694427, + "grad_norm": 6.430969382890963, + "learning_rate": 7.040050753699097e-06, + "loss": 17.3215, + "step": 21076 + }, + { + "epoch": 0.3852706235033908, + "grad_norm": 6.494384680228185, + "learning_rate": 7.039780498282422e-06, + "loss": 17.5879, + "step": 21077 + }, + { + "epoch": 0.3852889026998373, + "grad_norm": 6.95596397269451, + "learning_rate": 7.039510235716496e-06, + "loss": 17.7674, + "step": 21078 + }, + { + "epoch": 0.3853071818962838, + "grad_norm": 5.629432299225228, + "learning_rate": 7.039239966002264e-06, + "loss": 17.2038, + "step": 21079 + }, + { + "epoch": 0.38532546109273036, + "grad_norm": 6.775178413315715, + "learning_rate": 7.038969689140671e-06, + "loss": 17.683, + "step": 21080 + }, + { + "epoch": 0.3853437402891769, + "grad_norm": 6.248519359035783, + "learning_rate": 7.038699405132668e-06, + "loss": 17.2174, + "step": 21081 + }, + { + "epoch": 0.38536201948562343, + "grad_norm": 6.120305207682711, + "learning_rate": 7.0384291139791975e-06, + "loss": 17.4106, + "step": 21082 + }, + { + "epoch": 0.3853802986820699, + "grad_norm": 5.493229917477497, + "learning_rate": 7.038158815681213e-06, + "loss": 17.1191, + "step": 21083 + }, + { + "epoch": 0.38539857787851645, + "grad_norm": 7.91059832809418, + "learning_rate": 7.037888510239657e-06, + "loss": 18.056, + "step": 21084 + }, + { + "epoch": 0.385416857074963, + "grad_norm": 5.522442157148008, + "learning_rate": 7.037618197655479e-06, + "loss": 17.2022, + "step": 21085 + }, + { + "epoch": 0.3854351362714095, + "grad_norm": 6.181231408716889, + "learning_rate": 7.037347877929626e-06, + "loss": 17.4109, + "step": 21086 + }, + { + "epoch": 0.38545341546785605, + "grad_norm": 6.892857205373257, + "learning_rate": 7.037077551063045e-06, + "loss": 17.3751, + "step": 21087 + }, + { + "epoch": 0.38547169466430253, + "grad_norm": 5.573036233274179, + "learning_rate": 7.036807217056685e-06, + "loss": 17.1242, + "step": 21088 + }, + { + "epoch": 0.38548997386074907, + "grad_norm": 5.633173271976434, + "learning_rate": 7.036536875911492e-06, + "loss": 16.9751, + "step": 21089 + }, + { + "epoch": 0.3855082530571956, + "grad_norm": 7.079818367249521, + "learning_rate": 7.036266527628415e-06, + "loss": 18.0067, + "step": 21090 + }, + { + "epoch": 0.38552653225364214, + "grad_norm": 6.997568015876288, + "learning_rate": 7.035996172208398e-06, + "loss": 17.6373, + "step": 21091 + }, + { + "epoch": 0.3855448114500887, + "grad_norm": 7.057284878058664, + "learning_rate": 7.035725809652392e-06, + "loss": 17.9884, + "step": 21092 + }, + { + "epoch": 0.38556309064653516, + "grad_norm": 6.205975896431654, + "learning_rate": 7.035455439961345e-06, + "loss": 17.2431, + "step": 21093 + }, + { + "epoch": 0.3855813698429817, + "grad_norm": 10.777434619190368, + "learning_rate": 7.035185063136203e-06, + "loss": 17.7487, + "step": 21094 + }, + { + "epoch": 0.3855996490394282, + "grad_norm": 7.723381474482577, + "learning_rate": 7.034914679177913e-06, + "loss": 17.8022, + "step": 21095 + }, + { + "epoch": 0.38561792823587476, + "grad_norm": 6.358376912339185, + "learning_rate": 7.034644288087424e-06, + "loss": 17.643, + "step": 21096 + }, + { + "epoch": 0.3856362074323213, + "grad_norm": 7.280527488684475, + "learning_rate": 7.034373889865683e-06, + "loss": 17.6818, + "step": 21097 + }, + { + "epoch": 0.3856544866287678, + "grad_norm": 7.806657770907371, + "learning_rate": 7.034103484513639e-06, + "loss": 18.6375, + "step": 21098 + }, + { + "epoch": 0.3856727658252143, + "grad_norm": 6.957910146235624, + "learning_rate": 7.033833072032238e-06, + "loss": 17.8476, + "step": 21099 + }, + { + "epoch": 0.38569104502166085, + "grad_norm": 8.754076732392846, + "learning_rate": 7.033562652422428e-06, + "loss": 18.5968, + "step": 21100 + }, + { + "epoch": 0.3857093242181074, + "grad_norm": 7.531241236820884, + "learning_rate": 7.033292225685159e-06, + "loss": 17.4104, + "step": 21101 + }, + { + "epoch": 0.3857276034145539, + "grad_norm": 5.7801749191855345, + "learning_rate": 7.0330217918213765e-06, + "loss": 17.1987, + "step": 21102 + }, + { + "epoch": 0.3857458826110004, + "grad_norm": 5.485070074298556, + "learning_rate": 7.03275135083203e-06, + "loss": 17.1562, + "step": 21103 + }, + { + "epoch": 0.38576416180744694, + "grad_norm": 6.9288162598212315, + "learning_rate": 7.032480902718064e-06, + "loss": 17.8123, + "step": 21104 + }, + { + "epoch": 0.3857824410038935, + "grad_norm": 6.468959941191855, + "learning_rate": 7.03221044748043e-06, + "loss": 17.5832, + "step": 21105 + }, + { + "epoch": 0.38580072020034, + "grad_norm": 7.959014596218402, + "learning_rate": 7.0319399851200754e-06, + "loss": 17.4631, + "step": 21106 + }, + { + "epoch": 0.38581899939678654, + "grad_norm": 6.044343220531988, + "learning_rate": 7.0316695156379475e-06, + "loss": 17.3057, + "step": 21107 + }, + { + "epoch": 0.385837278593233, + "grad_norm": 8.2256849637106, + "learning_rate": 7.031399039034994e-06, + "loss": 18.4641, + "step": 21108 + }, + { + "epoch": 0.38585555778967956, + "grad_norm": 5.699225958830676, + "learning_rate": 7.031128555312161e-06, + "loss": 17.1867, + "step": 21109 + }, + { + "epoch": 0.3858738369861261, + "grad_norm": 6.672498573674858, + "learning_rate": 7.030858064470402e-06, + "loss": 17.3648, + "step": 21110 + }, + { + "epoch": 0.38589211618257263, + "grad_norm": 4.735959575691783, + "learning_rate": 7.03058756651066e-06, + "loss": 16.7964, + "step": 21111 + }, + { + "epoch": 0.3859103953790191, + "grad_norm": 7.371837255640524, + "learning_rate": 7.030317061433884e-06, + "loss": 17.9339, + "step": 21112 + }, + { + "epoch": 0.38592867457546565, + "grad_norm": 6.87418183915726, + "learning_rate": 7.0300465492410256e-06, + "loss": 17.7446, + "step": 21113 + }, + { + "epoch": 0.3859469537719122, + "grad_norm": 5.61363960267249, + "learning_rate": 7.029776029933027e-06, + "loss": 17.2526, + "step": 21114 + }, + { + "epoch": 0.3859652329683587, + "grad_norm": 8.675659705115105, + "learning_rate": 7.029505503510842e-06, + "loss": 18.4315, + "step": 21115 + }, + { + "epoch": 0.38598351216480525, + "grad_norm": 6.314659400689751, + "learning_rate": 7.029234969975415e-06, + "loss": 17.6072, + "step": 21116 + }, + { + "epoch": 0.38600179136125173, + "grad_norm": 7.281348543329399, + "learning_rate": 7.028964429327697e-06, + "loss": 17.6171, + "step": 21117 + }, + { + "epoch": 0.38602007055769827, + "grad_norm": 6.886107038104054, + "learning_rate": 7.028693881568632e-06, + "loss": 17.8502, + "step": 21118 + }, + { + "epoch": 0.3860383497541448, + "grad_norm": 6.362316523667407, + "learning_rate": 7.028423326699173e-06, + "loss": 17.5793, + "step": 21119 + }, + { + "epoch": 0.38605662895059134, + "grad_norm": 7.459611734937078, + "learning_rate": 7.028152764720265e-06, + "loss": 17.6337, + "step": 21120 + }, + { + "epoch": 0.3860749081470379, + "grad_norm": 5.5907760913335265, + "learning_rate": 7.027882195632861e-06, + "loss": 17.2374, + "step": 21121 + }, + { + "epoch": 0.38609318734348436, + "grad_norm": 5.973703927348647, + "learning_rate": 7.027611619437902e-06, + "loss": 17.1285, + "step": 21122 + }, + { + "epoch": 0.3861114665399309, + "grad_norm": 4.924313880101816, + "learning_rate": 7.02734103613634e-06, + "loss": 16.8294, + "step": 21123 + }, + { + "epoch": 0.3861297457363774, + "grad_norm": 7.181231802780088, + "learning_rate": 7.027070445729127e-06, + "loss": 17.681, + "step": 21124 + }, + { + "epoch": 0.38614802493282396, + "grad_norm": 6.990090253560784, + "learning_rate": 7.026799848217206e-06, + "loss": 17.8761, + "step": 21125 + }, + { + "epoch": 0.3861663041292705, + "grad_norm": 6.184005663463678, + "learning_rate": 7.026529243601528e-06, + "loss": 17.3393, + "step": 21126 + }, + { + "epoch": 0.386184583325717, + "grad_norm": 6.267227339306514, + "learning_rate": 7.02625863188304e-06, + "loss": 17.6497, + "step": 21127 + }, + { + "epoch": 0.3862028625221635, + "grad_norm": 6.75662883580309, + "learning_rate": 7.025988013062691e-06, + "loss": 17.5729, + "step": 21128 + }, + { + "epoch": 0.38622114171861005, + "grad_norm": 9.296670103182908, + "learning_rate": 7.025717387141431e-06, + "loss": 18.15, + "step": 21129 + }, + { + "epoch": 0.3862394209150566, + "grad_norm": 4.852905765168992, + "learning_rate": 7.025446754120206e-06, + "loss": 16.86, + "step": 21130 + }, + { + "epoch": 0.3862577001115031, + "grad_norm": 5.890617171437006, + "learning_rate": 7.0251761139999674e-06, + "loss": 17.1843, + "step": 21131 + }, + { + "epoch": 0.3862759793079496, + "grad_norm": 7.53836892579514, + "learning_rate": 7.024905466781662e-06, + "loss": 17.8045, + "step": 21132 + }, + { + "epoch": 0.38629425850439614, + "grad_norm": 7.142241216227472, + "learning_rate": 7.0246348124662375e-06, + "loss": 17.5758, + "step": 21133 + }, + { + "epoch": 0.3863125377008427, + "grad_norm": 5.6274541869633925, + "learning_rate": 7.024364151054646e-06, + "loss": 17.1649, + "step": 21134 + }, + { + "epoch": 0.3863308168972892, + "grad_norm": 5.0354058954229926, + "learning_rate": 7.024093482547831e-06, + "loss": 16.9671, + "step": 21135 + }, + { + "epoch": 0.38634909609373574, + "grad_norm": 6.676414643799981, + "learning_rate": 7.023822806946745e-06, + "loss": 17.4365, + "step": 21136 + }, + { + "epoch": 0.3863673752901822, + "grad_norm": 6.334598866258384, + "learning_rate": 7.023552124252335e-06, + "loss": 17.5582, + "step": 21137 + }, + { + "epoch": 0.38638565448662876, + "grad_norm": 5.145320711120825, + "learning_rate": 7.023281434465553e-06, + "loss": 16.9881, + "step": 21138 + }, + { + "epoch": 0.3864039336830753, + "grad_norm": 8.21784938964477, + "learning_rate": 7.0230107375873435e-06, + "loss": 17.9658, + "step": 21139 + }, + { + "epoch": 0.38642221287952183, + "grad_norm": 6.194250296436442, + "learning_rate": 7.022740033618657e-06, + "loss": 17.2713, + "step": 21140 + }, + { + "epoch": 0.38644049207596837, + "grad_norm": 7.4350060971970615, + "learning_rate": 7.0224693225604415e-06, + "loss": 17.8246, + "step": 21141 + }, + { + "epoch": 0.38645877127241485, + "grad_norm": 6.433020845599466, + "learning_rate": 7.022198604413647e-06, + "loss": 17.5145, + "step": 21142 + }, + { + "epoch": 0.3864770504688614, + "grad_norm": 7.612530599796842, + "learning_rate": 7.0219278791792225e-06, + "loss": 17.9481, + "step": 21143 + }, + { + "epoch": 0.3864953296653079, + "grad_norm": 6.7636431475299235, + "learning_rate": 7.021657146858115e-06, + "loss": 17.6575, + "step": 21144 + }, + { + "epoch": 0.38651360886175445, + "grad_norm": 7.789390469904421, + "learning_rate": 7.021386407451276e-06, + "loss": 17.4399, + "step": 21145 + }, + { + "epoch": 0.38653188805820093, + "grad_norm": 5.859613362592341, + "learning_rate": 7.021115660959652e-06, + "loss": 17.052, + "step": 21146 + }, + { + "epoch": 0.38655016725464747, + "grad_norm": 7.707885355718643, + "learning_rate": 7.020844907384193e-06, + "loss": 17.7728, + "step": 21147 + }, + { + "epoch": 0.386568446451094, + "grad_norm": 6.576837103363616, + "learning_rate": 7.020574146725849e-06, + "loss": 17.473, + "step": 21148 + }, + { + "epoch": 0.38658672564754054, + "grad_norm": 6.165204627767476, + "learning_rate": 7.020303378985568e-06, + "loss": 17.5712, + "step": 21149 + }, + { + "epoch": 0.3866050048439871, + "grad_norm": 6.642705530824343, + "learning_rate": 7.020032604164297e-06, + "loss": 17.3808, + "step": 21150 + }, + { + "epoch": 0.38662328404043356, + "grad_norm": 4.789453489011865, + "learning_rate": 7.019761822262988e-06, + "loss": 16.6734, + "step": 21151 + }, + { + "epoch": 0.3866415632368801, + "grad_norm": 7.416221651627426, + "learning_rate": 7.019491033282591e-06, + "loss": 17.8973, + "step": 21152 + }, + { + "epoch": 0.38665984243332663, + "grad_norm": 6.346482425134587, + "learning_rate": 7.019220237224051e-06, + "loss": 17.6587, + "step": 21153 + }, + { + "epoch": 0.38667812162977316, + "grad_norm": 5.579443119528855, + "learning_rate": 7.01894943408832e-06, + "loss": 16.9855, + "step": 21154 + }, + { + "epoch": 0.3866964008262197, + "grad_norm": 6.875463484691944, + "learning_rate": 7.018678623876346e-06, + "loss": 17.9065, + "step": 21155 + }, + { + "epoch": 0.3867146800226662, + "grad_norm": 8.57757477707898, + "learning_rate": 7.0184078065890785e-06, + "loss": 17.675, + "step": 21156 + }, + { + "epoch": 0.3867329592191127, + "grad_norm": 8.57415771607685, + "learning_rate": 7.018136982227467e-06, + "loss": 18.3249, + "step": 21157 + }, + { + "epoch": 0.38675123841555925, + "grad_norm": 8.106472151018053, + "learning_rate": 7.017866150792461e-06, + "loss": 18.364, + "step": 21158 + }, + { + "epoch": 0.3867695176120058, + "grad_norm": 5.573386292208885, + "learning_rate": 7.017595312285008e-06, + "loss": 16.8987, + "step": 21159 + }, + { + "epoch": 0.3867877968084523, + "grad_norm": 7.469690580880193, + "learning_rate": 7.0173244667060606e-06, + "loss": 18.009, + "step": 21160 + }, + { + "epoch": 0.3868060760048988, + "grad_norm": 7.218461469271216, + "learning_rate": 7.017053614056564e-06, + "loss": 18.0056, + "step": 21161 + }, + { + "epoch": 0.38682435520134534, + "grad_norm": 6.64697296731165, + "learning_rate": 7.016782754337471e-06, + "loss": 17.7236, + "step": 21162 + }, + { + "epoch": 0.3868426343977919, + "grad_norm": 6.469455702339826, + "learning_rate": 7.016511887549729e-06, + "loss": 17.52, + "step": 21163 + }, + { + "epoch": 0.3868609135942384, + "grad_norm": 5.620907433957624, + "learning_rate": 7.016241013694287e-06, + "loss": 17.2523, + "step": 21164 + }, + { + "epoch": 0.38687919279068494, + "grad_norm": 5.6795585345674375, + "learning_rate": 7.015970132772097e-06, + "loss": 17.3294, + "step": 21165 + }, + { + "epoch": 0.3868974719871314, + "grad_norm": 7.020711561287148, + "learning_rate": 7.015699244784104e-06, + "loss": 17.7057, + "step": 21166 + }, + { + "epoch": 0.38691575118357796, + "grad_norm": 5.718392094795572, + "learning_rate": 7.015428349731261e-06, + "loss": 17.165, + "step": 21167 + }, + { + "epoch": 0.3869340303800245, + "grad_norm": 6.738196681250109, + "learning_rate": 7.015157447614518e-06, + "loss": 17.3594, + "step": 21168 + }, + { + "epoch": 0.38695230957647103, + "grad_norm": 6.514408171224374, + "learning_rate": 7.014886538434822e-06, + "loss": 17.7885, + "step": 21169 + }, + { + "epoch": 0.38697058877291757, + "grad_norm": 6.240929938726908, + "learning_rate": 7.014615622193124e-06, + "loss": 17.3936, + "step": 21170 + }, + { + "epoch": 0.38698886796936405, + "grad_norm": 6.488269955690865, + "learning_rate": 7.0143446988903714e-06, + "loss": 17.4412, + "step": 21171 + }, + { + "epoch": 0.3870071471658106, + "grad_norm": 7.510448804413826, + "learning_rate": 7.014073768527517e-06, + "loss": 17.868, + "step": 21172 + }, + { + "epoch": 0.3870254263622571, + "grad_norm": 6.568964597754895, + "learning_rate": 7.013802831105508e-06, + "loss": 17.8103, + "step": 21173 + }, + { + "epoch": 0.38704370555870365, + "grad_norm": 6.347534864591465, + "learning_rate": 7.0135318866252955e-06, + "loss": 17.3994, + "step": 21174 + }, + { + "epoch": 0.3870619847551502, + "grad_norm": 6.076323680890524, + "learning_rate": 7.0132609350878285e-06, + "loss": 17.266, + "step": 21175 + }, + { + "epoch": 0.38708026395159667, + "grad_norm": 6.32945252994887, + "learning_rate": 7.012989976494057e-06, + "loss": 17.3789, + "step": 21176 + }, + { + "epoch": 0.3870985431480432, + "grad_norm": 5.935308236977482, + "learning_rate": 7.012719010844928e-06, + "loss": 17.4507, + "step": 21177 + }, + { + "epoch": 0.38711682234448974, + "grad_norm": 6.854667051747896, + "learning_rate": 7.012448038141396e-06, + "loss": 17.7286, + "step": 21178 + }, + { + "epoch": 0.3871351015409363, + "grad_norm": 6.164314235251316, + "learning_rate": 7.012177058384408e-06, + "loss": 17.3948, + "step": 21179 + }, + { + "epoch": 0.38715338073738276, + "grad_norm": 6.860808195260509, + "learning_rate": 7.0119060715749146e-06, + "loss": 17.8231, + "step": 21180 + }, + { + "epoch": 0.3871716599338293, + "grad_norm": 5.204002291292977, + "learning_rate": 7.011635077713863e-06, + "loss": 17.0888, + "step": 21181 + }, + { + "epoch": 0.38718993913027583, + "grad_norm": 5.552994605069955, + "learning_rate": 7.0113640768022055e-06, + "loss": 17.0881, + "step": 21182 + }, + { + "epoch": 0.38720821832672236, + "grad_norm": 7.288976439343021, + "learning_rate": 7.0110930688408915e-06, + "loss": 17.6469, + "step": 21183 + }, + { + "epoch": 0.3872264975231689, + "grad_norm": 7.121803750697378, + "learning_rate": 7.010822053830872e-06, + "loss": 17.7485, + "step": 21184 + }, + { + "epoch": 0.3872447767196154, + "grad_norm": 7.171010355826255, + "learning_rate": 7.010551031773094e-06, + "loss": 18.1696, + "step": 21185 + }, + { + "epoch": 0.3872630559160619, + "grad_norm": 6.215532642912796, + "learning_rate": 7.01028000266851e-06, + "loss": 17.5473, + "step": 21186 + }, + { + "epoch": 0.38728133511250845, + "grad_norm": 6.009141313573754, + "learning_rate": 7.010008966518069e-06, + "loss": 17.534, + "step": 21187 + }, + { + "epoch": 0.387299614308955, + "grad_norm": 7.382995024123719, + "learning_rate": 7.009737923322722e-06, + "loss": 17.8126, + "step": 21188 + }, + { + "epoch": 0.3873178935054015, + "grad_norm": 7.008409613622167, + "learning_rate": 7.009466873083416e-06, + "loss": 17.8726, + "step": 21189 + }, + { + "epoch": 0.387336172701848, + "grad_norm": 6.326699647289833, + "learning_rate": 7.009195815801105e-06, + "loss": 17.2901, + "step": 21190 + }, + { + "epoch": 0.38735445189829454, + "grad_norm": 6.816441338042101, + "learning_rate": 7.008924751476734e-06, + "loss": 17.8958, + "step": 21191 + }, + { + "epoch": 0.3873727310947411, + "grad_norm": 6.4113341488070965, + "learning_rate": 7.0086536801112595e-06, + "loss": 17.5528, + "step": 21192 + }, + { + "epoch": 0.3873910102911876, + "grad_norm": 6.098204645303003, + "learning_rate": 7.008382601705626e-06, + "loss": 17.3523, + "step": 21193 + }, + { + "epoch": 0.38740928948763415, + "grad_norm": 5.224449357427487, + "learning_rate": 7.008111516260787e-06, + "loss": 17.1505, + "step": 21194 + }, + { + "epoch": 0.3874275686840806, + "grad_norm": 7.138159137774924, + "learning_rate": 7.007840423777691e-06, + "loss": 18.0406, + "step": 21195 + }, + { + "epoch": 0.38744584788052716, + "grad_norm": 7.110189495810454, + "learning_rate": 7.0075693242572885e-06, + "loss": 17.7023, + "step": 21196 + }, + { + "epoch": 0.3874641270769737, + "grad_norm": 8.82591569452853, + "learning_rate": 7.007298217700529e-06, + "loss": 18.3154, + "step": 21197 + }, + { + "epoch": 0.38748240627342023, + "grad_norm": 9.124803495482377, + "learning_rate": 7.0070271041083635e-06, + "loss": 18.4117, + "step": 21198 + }, + { + "epoch": 0.38750068546986677, + "grad_norm": 6.888477199115898, + "learning_rate": 7.006755983481744e-06, + "loss": 17.9521, + "step": 21199 + }, + { + "epoch": 0.38751896466631325, + "grad_norm": 7.585323345702857, + "learning_rate": 7.006484855821617e-06, + "loss": 17.995, + "step": 21200 + }, + { + "epoch": 0.3875372438627598, + "grad_norm": 5.733917607894738, + "learning_rate": 7.0062137211289346e-06, + "loss": 17.2265, + "step": 21201 + }, + { + "epoch": 0.3875555230592063, + "grad_norm": 7.162694575830329, + "learning_rate": 7.0059425794046485e-06, + "loss": 17.1872, + "step": 21202 + }, + { + "epoch": 0.38757380225565286, + "grad_norm": 6.972112316308997, + "learning_rate": 7.005671430649708e-06, + "loss": 17.7391, + "step": 21203 + }, + { + "epoch": 0.3875920814520994, + "grad_norm": 5.907092817545116, + "learning_rate": 7.005400274865062e-06, + "loss": 17.129, + "step": 21204 + }, + { + "epoch": 0.38761036064854587, + "grad_norm": 6.014288258255214, + "learning_rate": 7.005129112051662e-06, + "loss": 17.1242, + "step": 21205 + }, + { + "epoch": 0.3876286398449924, + "grad_norm": 7.994252068301885, + "learning_rate": 7.004857942210459e-06, + "loss": 18.3423, + "step": 21206 + }, + { + "epoch": 0.38764691904143894, + "grad_norm": 5.43324849083137, + "learning_rate": 7.004586765342403e-06, + "loss": 17.2776, + "step": 21207 + }, + { + "epoch": 0.3876651982378855, + "grad_norm": 6.869744228197584, + "learning_rate": 7.004315581448444e-06, + "loss": 17.8097, + "step": 21208 + }, + { + "epoch": 0.387683477434332, + "grad_norm": 8.093794571431726, + "learning_rate": 7.004044390529532e-06, + "loss": 18.0262, + "step": 21209 + }, + { + "epoch": 0.3877017566307785, + "grad_norm": 6.808434764031609, + "learning_rate": 7.00377319258662e-06, + "loss": 18.0233, + "step": 21210 + }, + { + "epoch": 0.38772003582722503, + "grad_norm": 6.4851077505687025, + "learning_rate": 7.003501987620655e-06, + "loss": 17.6019, + "step": 21211 + }, + { + "epoch": 0.38773831502367156, + "grad_norm": 5.779791405467095, + "learning_rate": 7.003230775632591e-06, + "loss": 17.2765, + "step": 21212 + }, + { + "epoch": 0.3877565942201181, + "grad_norm": 5.9871841761264095, + "learning_rate": 7.002959556623376e-06, + "loss": 17.2801, + "step": 21213 + }, + { + "epoch": 0.3877748734165646, + "grad_norm": 8.093114902296954, + "learning_rate": 7.002688330593963e-06, + "loss": 18.6881, + "step": 21214 + }, + { + "epoch": 0.3877931526130111, + "grad_norm": 9.376799662022735, + "learning_rate": 7.0024170975453e-06, + "loss": 19.07, + "step": 21215 + }, + { + "epoch": 0.38781143180945765, + "grad_norm": 8.387256154251714, + "learning_rate": 7.002145857478338e-06, + "loss": 18.239, + "step": 21216 + }, + { + "epoch": 0.3878297110059042, + "grad_norm": 7.62922656084555, + "learning_rate": 7.00187461039403e-06, + "loss": 18.0096, + "step": 21217 + }, + { + "epoch": 0.3878479902023507, + "grad_norm": 5.985392864732103, + "learning_rate": 7.001603356293325e-06, + "loss": 17.3189, + "step": 21218 + }, + { + "epoch": 0.3878662693987972, + "grad_norm": 7.650112208586723, + "learning_rate": 7.001332095177173e-06, + "loss": 18.0754, + "step": 21219 + }, + { + "epoch": 0.38788454859524374, + "grad_norm": 6.035053943900861, + "learning_rate": 7.001060827046527e-06, + "loss": 17.51, + "step": 21220 + }, + { + "epoch": 0.3879028277916903, + "grad_norm": 6.216438855753022, + "learning_rate": 7.0007895519023364e-06, + "loss": 17.2839, + "step": 21221 + }, + { + "epoch": 0.3879211069881368, + "grad_norm": 5.130572402721616, + "learning_rate": 7.000518269745551e-06, + "loss": 17.2857, + "step": 21222 + }, + { + "epoch": 0.38793938618458335, + "grad_norm": 5.96959733512535, + "learning_rate": 7.000246980577121e-06, + "loss": 17.3203, + "step": 21223 + }, + { + "epoch": 0.3879576653810298, + "grad_norm": 6.889450543395383, + "learning_rate": 6.999975684398002e-06, + "loss": 17.8088, + "step": 21224 + }, + { + "epoch": 0.38797594457747636, + "grad_norm": 6.029957558576468, + "learning_rate": 6.9997043812091415e-06, + "loss": 17.6056, + "step": 21225 + }, + { + "epoch": 0.3879942237739229, + "grad_norm": 6.906724005829455, + "learning_rate": 6.99943307101149e-06, + "loss": 17.6793, + "step": 21226 + }, + { + "epoch": 0.38801250297036943, + "grad_norm": 5.340520529071106, + "learning_rate": 6.999161753805999e-06, + "loss": 17.1273, + "step": 21227 + }, + { + "epoch": 0.38803078216681597, + "grad_norm": 6.47837350521822, + "learning_rate": 6.998890429593619e-06, + "loss": 17.6201, + "step": 21228 + }, + { + "epoch": 0.38804906136326245, + "grad_norm": 6.268382853033523, + "learning_rate": 6.9986190983753015e-06, + "loss": 17.4448, + "step": 21229 + }, + { + "epoch": 0.388067340559709, + "grad_norm": 5.7812184180378745, + "learning_rate": 6.9983477601519975e-06, + "loss": 17.2134, + "step": 21230 + }, + { + "epoch": 0.3880856197561555, + "grad_norm": 7.476410771269091, + "learning_rate": 6.9980764149246594e-06, + "loss": 17.9308, + "step": 21231 + }, + { + "epoch": 0.38810389895260206, + "grad_norm": 7.042216423096148, + "learning_rate": 6.997805062694235e-06, + "loss": 17.8468, + "step": 21232 + }, + { + "epoch": 0.3881221781490486, + "grad_norm": 7.155437551364444, + "learning_rate": 6.997533703461679e-06, + "loss": 17.9161, + "step": 21233 + }, + { + "epoch": 0.38814045734549507, + "grad_norm": 7.6242148961471905, + "learning_rate": 6.99726233722794e-06, + "loss": 17.8064, + "step": 21234 + }, + { + "epoch": 0.3881587365419416, + "grad_norm": 6.409742456396555, + "learning_rate": 6.996990963993971e-06, + "loss": 17.5401, + "step": 21235 + }, + { + "epoch": 0.38817701573838814, + "grad_norm": 4.970076298313822, + "learning_rate": 6.9967195837607184e-06, + "loss": 16.9293, + "step": 21236 + }, + { + "epoch": 0.3881952949348347, + "grad_norm": 6.927960706911375, + "learning_rate": 6.99644819652914e-06, + "loss": 17.711, + "step": 21237 + }, + { + "epoch": 0.3882135741312812, + "grad_norm": 7.612483565007718, + "learning_rate": 6.996176802300183e-06, + "loss": 17.8934, + "step": 21238 + }, + { + "epoch": 0.3882318533277277, + "grad_norm": 7.882187425633395, + "learning_rate": 6.9959054010748e-06, + "loss": 17.8482, + "step": 21239 + }, + { + "epoch": 0.38825013252417423, + "grad_norm": 9.240745684312687, + "learning_rate": 6.9956339928539406e-06, + "loss": 18.8445, + "step": 21240 + }, + { + "epoch": 0.38826841172062077, + "grad_norm": 7.712105683195305, + "learning_rate": 6.995362577638558e-06, + "loss": 17.8008, + "step": 21241 + }, + { + "epoch": 0.3882866909170673, + "grad_norm": 10.050560138755184, + "learning_rate": 6.995091155429603e-06, + "loss": 18.0613, + "step": 21242 + }, + { + "epoch": 0.38830497011351384, + "grad_norm": 6.018862137503063, + "learning_rate": 6.994819726228026e-06, + "loss": 17.3277, + "step": 21243 + }, + { + "epoch": 0.3883232493099603, + "grad_norm": 6.199811183230868, + "learning_rate": 6.994548290034779e-06, + "loss": 17.444, + "step": 21244 + }, + { + "epoch": 0.38834152850640685, + "grad_norm": 6.255313750780701, + "learning_rate": 6.9942768468508134e-06, + "loss": 17.5624, + "step": 21245 + }, + { + "epoch": 0.3883598077028534, + "grad_norm": 5.6463831568429645, + "learning_rate": 6.994005396677082e-06, + "loss": 16.9848, + "step": 21246 + }, + { + "epoch": 0.3883780868992999, + "grad_norm": 6.679686702760674, + "learning_rate": 6.993733939514534e-06, + "loss": 17.4929, + "step": 21247 + }, + { + "epoch": 0.3883963660957464, + "grad_norm": 6.9273344663011285, + "learning_rate": 6.993462475364121e-06, + "loss": 17.7167, + "step": 21248 + }, + { + "epoch": 0.38841464529219294, + "grad_norm": 6.811534633973879, + "learning_rate": 6.993191004226795e-06, + "loss": 17.6195, + "step": 21249 + }, + { + "epoch": 0.3884329244886395, + "grad_norm": 6.014366310528886, + "learning_rate": 6.992919526103507e-06, + "loss": 17.5811, + "step": 21250 + }, + { + "epoch": 0.388451203685086, + "grad_norm": 6.22316949754385, + "learning_rate": 6.992648040995209e-06, + "loss": 17.4077, + "step": 21251 + }, + { + "epoch": 0.38846948288153255, + "grad_norm": 5.558124965916242, + "learning_rate": 6.9923765489028535e-06, + "loss": 17.3147, + "step": 21252 + }, + { + "epoch": 0.388487762077979, + "grad_norm": 6.249392869860253, + "learning_rate": 6.9921050498273915e-06, + "loss": 17.476, + "step": 21253 + }, + { + "epoch": 0.38850604127442556, + "grad_norm": 7.6245737784902055, + "learning_rate": 6.9918335437697725e-06, + "loss": 17.9288, + "step": 21254 + }, + { + "epoch": 0.3885243204708721, + "grad_norm": 6.749996569070155, + "learning_rate": 6.99156203073095e-06, + "loss": 17.5503, + "step": 21255 + }, + { + "epoch": 0.38854259966731863, + "grad_norm": 6.65555231658176, + "learning_rate": 6.991290510711877e-06, + "loss": 17.3065, + "step": 21256 + }, + { + "epoch": 0.38856087886376517, + "grad_norm": 5.966298807431138, + "learning_rate": 6.991018983713502e-06, + "loss": 17.3712, + "step": 21257 + }, + { + "epoch": 0.38857915806021165, + "grad_norm": 6.940957382615158, + "learning_rate": 6.990747449736779e-06, + "loss": 17.5398, + "step": 21258 + }, + { + "epoch": 0.3885974372566582, + "grad_norm": 7.656777098521687, + "learning_rate": 6.990475908782659e-06, + "loss": 17.7731, + "step": 21259 + }, + { + "epoch": 0.3886157164531047, + "grad_norm": 5.286748892777709, + "learning_rate": 6.990204360852093e-06, + "loss": 17.0508, + "step": 21260 + }, + { + "epoch": 0.38863399564955126, + "grad_norm": 5.743004171762552, + "learning_rate": 6.989932805946033e-06, + "loss": 17.3825, + "step": 21261 + }, + { + "epoch": 0.3886522748459978, + "grad_norm": 7.176283377532258, + "learning_rate": 6.989661244065433e-06, + "loss": 17.9225, + "step": 21262 + }, + { + "epoch": 0.38867055404244427, + "grad_norm": 5.632309989099968, + "learning_rate": 6.989389675211241e-06, + "loss": 17.1852, + "step": 21263 + }, + { + "epoch": 0.3886888332388908, + "grad_norm": 6.291288617650993, + "learning_rate": 6.989118099384412e-06, + "loss": 17.4721, + "step": 21264 + }, + { + "epoch": 0.38870711243533734, + "grad_norm": 5.919406410920219, + "learning_rate": 6.9888465165858974e-06, + "loss": 17.304, + "step": 21265 + }, + { + "epoch": 0.3887253916317839, + "grad_norm": 7.682794152487566, + "learning_rate": 6.988574926816647e-06, + "loss": 18.1165, + "step": 21266 + }, + { + "epoch": 0.3887436708282304, + "grad_norm": 5.4366959645772654, + "learning_rate": 6.988303330077615e-06, + "loss": 17.1581, + "step": 21267 + }, + { + "epoch": 0.3887619500246769, + "grad_norm": 7.521380821342717, + "learning_rate": 6.988031726369751e-06, + "loss": 17.8559, + "step": 21268 + }, + { + "epoch": 0.38878022922112343, + "grad_norm": 5.808104123312755, + "learning_rate": 6.987760115694009e-06, + "loss": 17.1664, + "step": 21269 + }, + { + "epoch": 0.38879850841756997, + "grad_norm": 5.958179556614933, + "learning_rate": 6.987488498051341e-06, + "loss": 17.3527, + "step": 21270 + }, + { + "epoch": 0.3888167876140165, + "grad_norm": 7.133793472041586, + "learning_rate": 6.987216873442697e-06, + "loss": 17.6617, + "step": 21271 + }, + { + "epoch": 0.38883506681046304, + "grad_norm": 8.031273334361943, + "learning_rate": 6.986945241869032e-06, + "loss": 18.4279, + "step": 21272 + }, + { + "epoch": 0.3888533460069095, + "grad_norm": 8.503223922463164, + "learning_rate": 6.986673603331295e-06, + "loss": 17.3892, + "step": 21273 + }, + { + "epoch": 0.38887162520335605, + "grad_norm": 5.965962258022024, + "learning_rate": 6.9864019578304395e-06, + "loss": 17.3279, + "step": 21274 + }, + { + "epoch": 0.3888899043998026, + "grad_norm": 6.595645195422536, + "learning_rate": 6.9861303053674175e-06, + "loss": 17.6504, + "step": 21275 + }, + { + "epoch": 0.3889081835962491, + "grad_norm": 7.262360912825414, + "learning_rate": 6.985858645943182e-06, + "loss": 17.438, + "step": 21276 + }, + { + "epoch": 0.38892646279269566, + "grad_norm": 6.147591248388254, + "learning_rate": 6.9855869795586826e-06, + "loss": 17.2427, + "step": 21277 + }, + { + "epoch": 0.38894474198914214, + "grad_norm": 5.633766962180324, + "learning_rate": 6.9853153062148746e-06, + "loss": 17.1198, + "step": 21278 + }, + { + "epoch": 0.3889630211855887, + "grad_norm": 6.725508579829761, + "learning_rate": 6.9850436259127096e-06, + "loss": 17.775, + "step": 21279 + }, + { + "epoch": 0.3889813003820352, + "grad_norm": 5.49034085767427, + "learning_rate": 6.984771938653138e-06, + "loss": 17.1014, + "step": 21280 + }, + { + "epoch": 0.38899957957848175, + "grad_norm": 7.925168542674946, + "learning_rate": 6.984500244437112e-06, + "loss": 18.0358, + "step": 21281 + }, + { + "epoch": 0.3890178587749282, + "grad_norm": 8.082796156974405, + "learning_rate": 6.9842285432655845e-06, + "loss": 18.4971, + "step": 21282 + }, + { + "epoch": 0.38903613797137476, + "grad_norm": 7.126770318695678, + "learning_rate": 6.9839568351395095e-06, + "loss": 17.8173, + "step": 21283 + }, + { + "epoch": 0.3890544171678213, + "grad_norm": 5.307411454487401, + "learning_rate": 6.983685120059838e-06, + "loss": 17.0083, + "step": 21284 + }, + { + "epoch": 0.38907269636426783, + "grad_norm": 6.385024001756688, + "learning_rate": 6.983413398027522e-06, + "loss": 17.6907, + "step": 21285 + }, + { + "epoch": 0.38909097556071437, + "grad_norm": 7.627134498652095, + "learning_rate": 6.983141669043514e-06, + "loss": 17.709, + "step": 21286 + }, + { + "epoch": 0.38910925475716085, + "grad_norm": 7.2880895484751695, + "learning_rate": 6.982869933108766e-06, + "loss": 17.7053, + "step": 21287 + }, + { + "epoch": 0.3891275339536074, + "grad_norm": 5.2131421028496865, + "learning_rate": 6.982598190224233e-06, + "loss": 17.0961, + "step": 21288 + }, + { + "epoch": 0.3891458131500539, + "grad_norm": 5.955602767712263, + "learning_rate": 6.982326440390863e-06, + "loss": 17.3809, + "step": 21289 + }, + { + "epoch": 0.38916409234650046, + "grad_norm": 6.879215689190057, + "learning_rate": 6.982054683609613e-06, + "loss": 17.786, + "step": 21290 + }, + { + "epoch": 0.389182371542947, + "grad_norm": 6.564078461155945, + "learning_rate": 6.98178291988143e-06, + "loss": 17.4708, + "step": 21291 + }, + { + "epoch": 0.38920065073939347, + "grad_norm": 7.3157794080072795, + "learning_rate": 6.981511149207272e-06, + "loss": 17.9549, + "step": 21292 + }, + { + "epoch": 0.38921892993584, + "grad_norm": 7.74486846707566, + "learning_rate": 6.981239371588091e-06, + "loss": 18.1465, + "step": 21293 + }, + { + "epoch": 0.38923720913228654, + "grad_norm": 6.72366683820151, + "learning_rate": 6.980967587024836e-06, + "loss": 17.9271, + "step": 21294 + }, + { + "epoch": 0.3892554883287331, + "grad_norm": 5.436869165037432, + "learning_rate": 6.980695795518462e-06, + "loss": 17.1761, + "step": 21295 + }, + { + "epoch": 0.3892737675251796, + "grad_norm": 6.483646507250865, + "learning_rate": 6.980423997069921e-06, + "loss": 17.6019, + "step": 21296 + }, + { + "epoch": 0.3892920467216261, + "grad_norm": 5.810803759790037, + "learning_rate": 6.980152191680165e-06, + "loss": 17.2279, + "step": 21297 + }, + { + "epoch": 0.38931032591807263, + "grad_norm": 8.216575660666162, + "learning_rate": 6.979880379350148e-06, + "loss": 18.4252, + "step": 21298 + }, + { + "epoch": 0.38932860511451917, + "grad_norm": 6.958361073056182, + "learning_rate": 6.979608560080822e-06, + "loss": 17.8733, + "step": 21299 + }, + { + "epoch": 0.3893468843109657, + "grad_norm": 6.095786815181945, + "learning_rate": 6.979336733873139e-06, + "loss": 17.3995, + "step": 21300 + }, + { + "epoch": 0.38936516350741224, + "grad_norm": 6.236943758539881, + "learning_rate": 6.9790649007280544e-06, + "loss": 17.507, + "step": 21301 + }, + { + "epoch": 0.3893834427038587, + "grad_norm": 7.616428036504893, + "learning_rate": 6.978793060646517e-06, + "loss": 17.8563, + "step": 21302 + }, + { + "epoch": 0.38940172190030525, + "grad_norm": 7.625061378673789, + "learning_rate": 6.9785212136294835e-06, + "loss": 18.0646, + "step": 21303 + }, + { + "epoch": 0.3894200010967518, + "grad_norm": 7.409375462993235, + "learning_rate": 6.978249359677903e-06, + "loss": 17.8779, + "step": 21304 + }, + { + "epoch": 0.3894382802931983, + "grad_norm": 6.573970239933467, + "learning_rate": 6.977977498792732e-06, + "loss": 17.907, + "step": 21305 + }, + { + "epoch": 0.38945655948964486, + "grad_norm": 8.660433654350012, + "learning_rate": 6.97770563097492e-06, + "loss": 18.4487, + "step": 21306 + }, + { + "epoch": 0.38947483868609134, + "grad_norm": 8.063613108737854, + "learning_rate": 6.977433756225422e-06, + "loss": 18.5591, + "step": 21307 + }, + { + "epoch": 0.3894931178825379, + "grad_norm": 11.406531230982122, + "learning_rate": 6.9771618745451905e-06, + "loss": 18.5781, + "step": 21308 + }, + { + "epoch": 0.3895113970789844, + "grad_norm": 6.6462230803461875, + "learning_rate": 6.976889985935178e-06, + "loss": 17.8404, + "step": 21309 + }, + { + "epoch": 0.38952967627543095, + "grad_norm": 7.66327701944068, + "learning_rate": 6.976618090396339e-06, + "loss": 18.0304, + "step": 21310 + }, + { + "epoch": 0.3895479554718775, + "grad_norm": 6.727846858984811, + "learning_rate": 6.976346187929623e-06, + "loss": 17.8418, + "step": 21311 + }, + { + "epoch": 0.38956623466832396, + "grad_norm": 8.182694833695164, + "learning_rate": 6.976074278535986e-06, + "loss": 18.2975, + "step": 21312 + }, + { + "epoch": 0.3895845138647705, + "grad_norm": 6.926322785438558, + "learning_rate": 6.97580236221638e-06, + "loss": 17.9299, + "step": 21313 + }, + { + "epoch": 0.38960279306121703, + "grad_norm": 8.550408556159267, + "learning_rate": 6.975530438971759e-06, + "loss": 18.1579, + "step": 21314 + }, + { + "epoch": 0.38962107225766357, + "grad_norm": 6.021824361613114, + "learning_rate": 6.975258508803073e-06, + "loss": 17.5287, + "step": 21315 + }, + { + "epoch": 0.38963935145411005, + "grad_norm": 6.385797779918753, + "learning_rate": 6.974986571711279e-06, + "loss": 17.7935, + "step": 21316 + }, + { + "epoch": 0.3896576306505566, + "grad_norm": 5.637782107208922, + "learning_rate": 6.9747146276973285e-06, + "loss": 17.0898, + "step": 21317 + }, + { + "epoch": 0.3896759098470031, + "grad_norm": 5.861024354538965, + "learning_rate": 6.9744426767621745e-06, + "loss": 17.3149, + "step": 21318 + }, + { + "epoch": 0.38969418904344966, + "grad_norm": 7.489401160585184, + "learning_rate": 6.97417071890677e-06, + "loss": 18.1764, + "step": 21319 + }, + { + "epoch": 0.3897124682398962, + "grad_norm": 6.662047481926622, + "learning_rate": 6.973898754132068e-06, + "loss": 17.5862, + "step": 21320 + }, + { + "epoch": 0.3897307474363427, + "grad_norm": 6.698633197460809, + "learning_rate": 6.9736267824390235e-06, + "loss": 17.4519, + "step": 21321 + }, + { + "epoch": 0.3897490266327892, + "grad_norm": 5.485635347009831, + "learning_rate": 6.973354803828587e-06, + "loss": 17.1254, + "step": 21322 + }, + { + "epoch": 0.38976730582923574, + "grad_norm": 7.2791840642705035, + "learning_rate": 6.973082818301713e-06, + "loss": 17.8611, + "step": 21323 + }, + { + "epoch": 0.3897855850256823, + "grad_norm": 6.988091609826114, + "learning_rate": 6.972810825859357e-06, + "loss": 17.9001, + "step": 21324 + }, + { + "epoch": 0.3898038642221288, + "grad_norm": 7.631402227675237, + "learning_rate": 6.972538826502468e-06, + "loss": 17.6979, + "step": 21325 + }, + { + "epoch": 0.3898221434185753, + "grad_norm": 6.259318480862302, + "learning_rate": 6.972266820232002e-06, + "loss": 17.4767, + "step": 21326 + }, + { + "epoch": 0.38984042261502183, + "grad_norm": 6.440930814892935, + "learning_rate": 6.971994807048913e-06, + "loss": 17.3391, + "step": 21327 + }, + { + "epoch": 0.38985870181146837, + "grad_norm": 7.124678437374745, + "learning_rate": 6.971722786954153e-06, + "loss": 17.4527, + "step": 21328 + }, + { + "epoch": 0.3898769810079149, + "grad_norm": 6.218475416507136, + "learning_rate": 6.971450759948675e-06, + "loss": 17.5786, + "step": 21329 + }, + { + "epoch": 0.38989526020436144, + "grad_norm": 6.110012833418725, + "learning_rate": 6.971178726033434e-06, + "loss": 17.4619, + "step": 21330 + }, + { + "epoch": 0.3899135394008079, + "grad_norm": 5.964802041121985, + "learning_rate": 6.970906685209382e-06, + "loss": 17.4164, + "step": 21331 + }, + { + "epoch": 0.38993181859725445, + "grad_norm": 8.95644489526695, + "learning_rate": 6.9706346374774725e-06, + "loss": 18.7085, + "step": 21332 + }, + { + "epoch": 0.389950097793701, + "grad_norm": 6.611856409956911, + "learning_rate": 6.970362582838661e-06, + "loss": 17.3819, + "step": 21333 + }, + { + "epoch": 0.3899683769901475, + "grad_norm": 8.3973359875057, + "learning_rate": 6.9700905212938995e-06, + "loss": 18.4749, + "step": 21334 + }, + { + "epoch": 0.38998665618659406, + "grad_norm": 7.78691454636137, + "learning_rate": 6.969818452844141e-06, + "loss": 17.9705, + "step": 21335 + }, + { + "epoch": 0.39000493538304054, + "grad_norm": 7.820378788277538, + "learning_rate": 6.96954637749034e-06, + "loss": 18.2859, + "step": 21336 + }, + { + "epoch": 0.3900232145794871, + "grad_norm": 7.308608328921452, + "learning_rate": 6.969274295233449e-06, + "loss": 17.9314, + "step": 21337 + }, + { + "epoch": 0.3900414937759336, + "grad_norm": 5.854718803924013, + "learning_rate": 6.969002206074425e-06, + "loss": 17.4876, + "step": 21338 + }, + { + "epoch": 0.39005977297238015, + "grad_norm": 7.528831837381876, + "learning_rate": 6.968730110014217e-06, + "loss": 17.8921, + "step": 21339 + }, + { + "epoch": 0.3900780521688267, + "grad_norm": 5.982696355296637, + "learning_rate": 6.968458007053781e-06, + "loss": 17.5032, + "step": 21340 + }, + { + "epoch": 0.39009633136527316, + "grad_norm": 5.648254364233851, + "learning_rate": 6.968185897194071e-06, + "loss": 17.1825, + "step": 21341 + }, + { + "epoch": 0.3901146105617197, + "grad_norm": 7.828135749879042, + "learning_rate": 6.96791378043604e-06, + "loss": 17.9979, + "step": 21342 + }, + { + "epoch": 0.39013288975816623, + "grad_norm": 5.338550953651234, + "learning_rate": 6.967641656780641e-06, + "loss": 16.9122, + "step": 21343 + }, + { + "epoch": 0.39015116895461277, + "grad_norm": 6.0180414000488165, + "learning_rate": 6.9673695262288295e-06, + "loss": 17.5922, + "step": 21344 + }, + { + "epoch": 0.3901694481510593, + "grad_norm": 6.810479249087184, + "learning_rate": 6.967097388781558e-06, + "loss": 17.3728, + "step": 21345 + }, + { + "epoch": 0.3901877273475058, + "grad_norm": 6.766345601358699, + "learning_rate": 6.9668252444397825e-06, + "loss": 17.5683, + "step": 21346 + }, + { + "epoch": 0.3902060065439523, + "grad_norm": 5.059032446363334, + "learning_rate": 6.966553093204455e-06, + "loss": 17.015, + "step": 21347 + }, + { + "epoch": 0.39022428574039886, + "grad_norm": 6.346645110894162, + "learning_rate": 6.966280935076529e-06, + "loss": 17.3645, + "step": 21348 + }, + { + "epoch": 0.3902425649368454, + "grad_norm": 7.672018816807479, + "learning_rate": 6.966008770056959e-06, + "loss": 18.1503, + "step": 21349 + }, + { + "epoch": 0.3902608441332919, + "grad_norm": 7.026915302430959, + "learning_rate": 6.965736598146698e-06, + "loss": 18.068, + "step": 21350 + }, + { + "epoch": 0.3902791233297384, + "grad_norm": 6.5049983056414495, + "learning_rate": 6.9654644193467e-06, + "loss": 17.5476, + "step": 21351 + }, + { + "epoch": 0.39029740252618494, + "grad_norm": 8.315180938519537, + "learning_rate": 6.965192233657923e-06, + "loss": 17.9425, + "step": 21352 + }, + { + "epoch": 0.3903156817226315, + "grad_norm": 6.170959138669452, + "learning_rate": 6.964920041081315e-06, + "loss": 17.2836, + "step": 21353 + }, + { + "epoch": 0.390333960919078, + "grad_norm": 6.832621635727959, + "learning_rate": 6.964647841617834e-06, + "loss": 18.0796, + "step": 21354 + }, + { + "epoch": 0.3903522401155245, + "grad_norm": 6.560517745910997, + "learning_rate": 6.964375635268432e-06, + "loss": 17.8781, + "step": 21355 + }, + { + "epoch": 0.39037051931197103, + "grad_norm": 5.484001868073417, + "learning_rate": 6.964103422034065e-06, + "loss": 17.0136, + "step": 21356 + }, + { + "epoch": 0.39038879850841757, + "grad_norm": 5.99839690517561, + "learning_rate": 6.963831201915685e-06, + "loss": 17.2651, + "step": 21357 + }, + { + "epoch": 0.3904070777048641, + "grad_norm": 6.872484539048409, + "learning_rate": 6.963558974914248e-06, + "loss": 17.5205, + "step": 21358 + }, + { + "epoch": 0.39042535690131064, + "grad_norm": 5.506546562774649, + "learning_rate": 6.963286741030706e-06, + "loss": 16.9392, + "step": 21359 + }, + { + "epoch": 0.3904436360977571, + "grad_norm": 6.476657809631774, + "learning_rate": 6.963014500266015e-06, + "loss": 17.3503, + "step": 21360 + }, + { + "epoch": 0.39046191529420365, + "grad_norm": 5.417844657813247, + "learning_rate": 6.962742252621128e-06, + "loss": 17.1027, + "step": 21361 + }, + { + "epoch": 0.3904801944906502, + "grad_norm": 6.559373381981851, + "learning_rate": 6.962469998097001e-06, + "loss": 17.842, + "step": 21362 + }, + { + "epoch": 0.3904984736870967, + "grad_norm": 7.3015999342370455, + "learning_rate": 6.962197736694585e-06, + "loss": 17.8833, + "step": 21363 + }, + { + "epoch": 0.39051675288354326, + "grad_norm": 5.2950704084144, + "learning_rate": 6.961925468414838e-06, + "loss": 17.1602, + "step": 21364 + }, + { + "epoch": 0.39053503207998974, + "grad_norm": 6.701239897971625, + "learning_rate": 6.9616531932587115e-06, + "loss": 17.4443, + "step": 21365 + }, + { + "epoch": 0.3905533112764363, + "grad_norm": 6.224332576366943, + "learning_rate": 6.961380911227161e-06, + "loss": 17.3525, + "step": 21366 + }, + { + "epoch": 0.3905715904728828, + "grad_norm": 6.300876541429538, + "learning_rate": 6.961108622321141e-06, + "loss": 17.5501, + "step": 21367 + }, + { + "epoch": 0.39058986966932935, + "grad_norm": 4.867193414597315, + "learning_rate": 6.960836326541605e-06, + "loss": 16.8688, + "step": 21368 + }, + { + "epoch": 0.3906081488657759, + "grad_norm": 6.014989360490721, + "learning_rate": 6.960564023889508e-06, + "loss": 17.4641, + "step": 21369 + }, + { + "epoch": 0.39062642806222236, + "grad_norm": 6.288902481815039, + "learning_rate": 6.960291714365804e-06, + "loss": 17.2468, + "step": 21370 + }, + { + "epoch": 0.3906447072586689, + "grad_norm": 7.155918954696706, + "learning_rate": 6.960019397971448e-06, + "loss": 17.9502, + "step": 21371 + }, + { + "epoch": 0.39066298645511544, + "grad_norm": 8.354754346094992, + "learning_rate": 6.9597470747073936e-06, + "loss": 18.3131, + "step": 21372 + }, + { + "epoch": 0.39068126565156197, + "grad_norm": 5.953499741152448, + "learning_rate": 6.959474744574596e-06, + "loss": 16.9842, + "step": 21373 + }, + { + "epoch": 0.3906995448480085, + "grad_norm": 6.550937126367873, + "learning_rate": 6.95920240757401e-06, + "loss": 17.5067, + "step": 21374 + }, + { + "epoch": 0.390717824044455, + "grad_norm": 6.852875182419491, + "learning_rate": 6.958930063706588e-06, + "loss": 17.9154, + "step": 21375 + }, + { + "epoch": 0.3907361032409015, + "grad_norm": 6.767881163629693, + "learning_rate": 6.958657712973289e-06, + "loss": 17.5089, + "step": 21376 + }, + { + "epoch": 0.39075438243734806, + "grad_norm": 6.7261822161174045, + "learning_rate": 6.958385355375062e-06, + "loss": 17.4858, + "step": 21377 + }, + { + "epoch": 0.3907726616337946, + "grad_norm": 5.7532822623251345, + "learning_rate": 6.958112990912865e-06, + "loss": 17.4299, + "step": 21378 + }, + { + "epoch": 0.39079094083024113, + "grad_norm": 6.290265203103681, + "learning_rate": 6.957840619587653e-06, + "loss": 17.6446, + "step": 21379 + }, + { + "epoch": 0.3908092200266876, + "grad_norm": 7.53805699223157, + "learning_rate": 6.957568241400378e-06, + "loss": 18.0118, + "step": 21380 + }, + { + "epoch": 0.39082749922313414, + "grad_norm": 6.14553882754755, + "learning_rate": 6.957295856351997e-06, + "loss": 17.5025, + "step": 21381 + }, + { + "epoch": 0.3908457784195807, + "grad_norm": 6.223444201479475, + "learning_rate": 6.957023464443462e-06, + "loss": 17.4475, + "step": 21382 + }, + { + "epoch": 0.3908640576160272, + "grad_norm": 6.902839466662411, + "learning_rate": 6.956751065675732e-06, + "loss": 17.7503, + "step": 21383 + }, + { + "epoch": 0.3908823368124737, + "grad_norm": 6.001556486376833, + "learning_rate": 6.956478660049759e-06, + "loss": 17.3742, + "step": 21384 + }, + { + "epoch": 0.39090061600892023, + "grad_norm": 7.430794110392538, + "learning_rate": 6.956206247566497e-06, + "loss": 18.3194, + "step": 21385 + }, + { + "epoch": 0.39091889520536677, + "grad_norm": 6.880564037341758, + "learning_rate": 6.955933828226903e-06, + "loss": 17.5664, + "step": 21386 + }, + { + "epoch": 0.3909371744018133, + "grad_norm": 6.151128109271224, + "learning_rate": 6.95566140203193e-06, + "loss": 17.5719, + "step": 21387 + }, + { + "epoch": 0.39095545359825984, + "grad_norm": 6.9677245026585215, + "learning_rate": 6.955388968982533e-06, + "loss": 17.7612, + "step": 21388 + }, + { + "epoch": 0.3909737327947063, + "grad_norm": 6.084702929661392, + "learning_rate": 6.955116529079668e-06, + "loss": 17.3963, + "step": 21389 + }, + { + "epoch": 0.39099201199115285, + "grad_norm": 5.680522535403873, + "learning_rate": 6.9548440823242894e-06, + "loss": 17.2712, + "step": 21390 + }, + { + "epoch": 0.3910102911875994, + "grad_norm": 6.214114351366993, + "learning_rate": 6.954571628717352e-06, + "loss": 17.3423, + "step": 21391 + }, + { + "epoch": 0.3910285703840459, + "grad_norm": 6.662398299688071, + "learning_rate": 6.95429916825981e-06, + "loss": 17.4247, + "step": 21392 + }, + { + "epoch": 0.39104684958049246, + "grad_norm": 8.062051943231458, + "learning_rate": 6.9540267009526195e-06, + "loss": 18.1852, + "step": 21393 + }, + { + "epoch": 0.39106512877693894, + "grad_norm": 6.170123012231771, + "learning_rate": 6.953754226796735e-06, + "loss": 17.4669, + "step": 21394 + }, + { + "epoch": 0.3910834079733855, + "grad_norm": 6.779457287130032, + "learning_rate": 6.9534817457931106e-06, + "loss": 17.3266, + "step": 21395 + }, + { + "epoch": 0.391101687169832, + "grad_norm": 6.6416770898103845, + "learning_rate": 6.953209257942703e-06, + "loss": 17.7326, + "step": 21396 + }, + { + "epoch": 0.39111996636627855, + "grad_norm": 5.499987273354564, + "learning_rate": 6.9529367632464675e-06, + "loss": 17.1806, + "step": 21397 + }, + { + "epoch": 0.3911382455627251, + "grad_norm": 6.564647982097723, + "learning_rate": 6.952664261705357e-06, + "loss": 17.5584, + "step": 21398 + }, + { + "epoch": 0.39115652475917156, + "grad_norm": 4.950783792128595, + "learning_rate": 6.9523917533203264e-06, + "loss": 16.7491, + "step": 21399 + }, + { + "epoch": 0.3911748039556181, + "grad_norm": 5.412107465896945, + "learning_rate": 6.952119238092334e-06, + "loss": 17.1481, + "step": 21400 + }, + { + "epoch": 0.39119308315206464, + "grad_norm": 6.919772189678604, + "learning_rate": 6.951846716022333e-06, + "loss": 17.8896, + "step": 21401 + }, + { + "epoch": 0.39121136234851117, + "grad_norm": 6.4788537182646175, + "learning_rate": 6.9515741871112765e-06, + "loss": 17.4922, + "step": 21402 + }, + { + "epoch": 0.3912296415449577, + "grad_norm": 6.40365381631644, + "learning_rate": 6.951301651360125e-06, + "loss": 17.3484, + "step": 21403 + }, + { + "epoch": 0.3912479207414042, + "grad_norm": 10.072362100586338, + "learning_rate": 6.951029108769828e-06, + "loss": 17.8765, + "step": 21404 + }, + { + "epoch": 0.3912661999378507, + "grad_norm": 7.4249881377671265, + "learning_rate": 6.950756559341344e-06, + "loss": 17.793, + "step": 21405 + }, + { + "epoch": 0.39128447913429726, + "grad_norm": 5.574785088089333, + "learning_rate": 6.950484003075627e-06, + "loss": 17.0611, + "step": 21406 + }, + { + "epoch": 0.3913027583307438, + "grad_norm": 8.11320467579111, + "learning_rate": 6.950211439973635e-06, + "loss": 17.8801, + "step": 21407 + }, + { + "epoch": 0.39132103752719033, + "grad_norm": 5.957545539376082, + "learning_rate": 6.949938870036319e-06, + "loss": 17.5233, + "step": 21408 + }, + { + "epoch": 0.3913393167236368, + "grad_norm": 8.042419256128051, + "learning_rate": 6.949666293264636e-06, + "loss": 18.249, + "step": 21409 + }, + { + "epoch": 0.39135759592008335, + "grad_norm": 6.69497399498759, + "learning_rate": 6.949393709659545e-06, + "loss": 17.7397, + "step": 21410 + }, + { + "epoch": 0.3913758751165299, + "grad_norm": 5.444913876067578, + "learning_rate": 6.949121119221996e-06, + "loss": 17.1343, + "step": 21411 + }, + { + "epoch": 0.3913941543129764, + "grad_norm": 6.117132649027419, + "learning_rate": 6.948848521952946e-06, + "loss": 17.2501, + "step": 21412 + }, + { + "epoch": 0.39141243350942295, + "grad_norm": 7.002488487919857, + "learning_rate": 6.948575917853353e-06, + "loss": 17.3118, + "step": 21413 + }, + { + "epoch": 0.39143071270586943, + "grad_norm": 7.05743563768558, + "learning_rate": 6.948303306924169e-06, + "loss": 17.9495, + "step": 21414 + }, + { + "epoch": 0.39144899190231597, + "grad_norm": 6.8819713184902245, + "learning_rate": 6.9480306891663506e-06, + "loss": 17.7145, + "step": 21415 + }, + { + "epoch": 0.3914672710987625, + "grad_norm": 7.144076480747118, + "learning_rate": 6.947758064580854e-06, + "loss": 17.8562, + "step": 21416 + }, + { + "epoch": 0.39148555029520904, + "grad_norm": 5.910437964592411, + "learning_rate": 6.947485433168637e-06, + "loss": 17.4718, + "step": 21417 + }, + { + "epoch": 0.3915038294916555, + "grad_norm": 7.346846741473479, + "learning_rate": 6.947212794930649e-06, + "loss": 17.8076, + "step": 21418 + }, + { + "epoch": 0.39152210868810206, + "grad_norm": 5.877024921883323, + "learning_rate": 6.94694014986785e-06, + "loss": 17.3449, + "step": 21419 + }, + { + "epoch": 0.3915403878845486, + "grad_norm": 7.500077927696738, + "learning_rate": 6.946667497981195e-06, + "loss": 17.7087, + "step": 21420 + }, + { + "epoch": 0.3915586670809951, + "grad_norm": 6.33079279859324, + "learning_rate": 6.946394839271641e-06, + "loss": 17.1952, + "step": 21421 + }, + { + "epoch": 0.39157694627744166, + "grad_norm": 7.604093867593265, + "learning_rate": 6.946122173740139e-06, + "loss": 18.1463, + "step": 21422 + }, + { + "epoch": 0.39159522547388814, + "grad_norm": 6.961778367265984, + "learning_rate": 6.945849501387649e-06, + "loss": 17.9901, + "step": 21423 + }, + { + "epoch": 0.3916135046703347, + "grad_norm": 8.235848893700775, + "learning_rate": 6.945576822215127e-06, + "loss": 18.8584, + "step": 21424 + }, + { + "epoch": 0.3916317838667812, + "grad_norm": 7.0501785748067345, + "learning_rate": 6.945304136223525e-06, + "loss": 17.8166, + "step": 21425 + }, + { + "epoch": 0.39165006306322775, + "grad_norm": 7.336789474080342, + "learning_rate": 6.945031443413801e-06, + "loss": 18.0062, + "step": 21426 + }, + { + "epoch": 0.3916683422596743, + "grad_norm": 8.57694652541582, + "learning_rate": 6.944758743786912e-06, + "loss": 18.2202, + "step": 21427 + }, + { + "epoch": 0.39168662145612076, + "grad_norm": 6.263094385257538, + "learning_rate": 6.944486037343812e-06, + "loss": 17.2668, + "step": 21428 + }, + { + "epoch": 0.3917049006525673, + "grad_norm": 5.130306232587203, + "learning_rate": 6.944213324085456e-06, + "loss": 16.9621, + "step": 21429 + }, + { + "epoch": 0.39172317984901384, + "grad_norm": 6.770502139624869, + "learning_rate": 6.943940604012801e-06, + "loss": 17.6095, + "step": 21430 + }, + { + "epoch": 0.39174145904546037, + "grad_norm": 6.802158937988266, + "learning_rate": 6.943667877126803e-06, + "loss": 17.7199, + "step": 21431 + }, + { + "epoch": 0.3917597382419069, + "grad_norm": 9.665343205258099, + "learning_rate": 6.943395143428418e-06, + "loss": 17.8363, + "step": 21432 + }, + { + "epoch": 0.3917780174383534, + "grad_norm": 4.94746228459059, + "learning_rate": 6.943122402918603e-06, + "loss": 16.8839, + "step": 21433 + }, + { + "epoch": 0.3917962966347999, + "grad_norm": 7.560672697460262, + "learning_rate": 6.942849655598312e-06, + "loss": 17.9669, + "step": 21434 + }, + { + "epoch": 0.39181457583124646, + "grad_norm": 5.195320813019755, + "learning_rate": 6.942576901468501e-06, + "loss": 17.116, + "step": 21435 + }, + { + "epoch": 0.391832855027693, + "grad_norm": 5.082459973250335, + "learning_rate": 6.942304140530125e-06, + "loss": 16.927, + "step": 21436 + }, + { + "epoch": 0.39185113422413953, + "grad_norm": 5.687533246753722, + "learning_rate": 6.942031372784143e-06, + "loss": 17.1209, + "step": 21437 + }, + { + "epoch": 0.391869413420586, + "grad_norm": 6.92106101088448, + "learning_rate": 6.94175859823151e-06, + "loss": 17.5381, + "step": 21438 + }, + { + "epoch": 0.39188769261703255, + "grad_norm": 7.247306842169459, + "learning_rate": 6.9414858168731814e-06, + "loss": 17.9658, + "step": 21439 + }, + { + "epoch": 0.3919059718134791, + "grad_norm": 6.322222866411631, + "learning_rate": 6.941213028710113e-06, + "loss": 17.4772, + "step": 21440 + }, + { + "epoch": 0.3919242510099256, + "grad_norm": 6.506007597978849, + "learning_rate": 6.940940233743262e-06, + "loss": 17.5034, + "step": 21441 + }, + { + "epoch": 0.39194253020637215, + "grad_norm": 6.427767140365889, + "learning_rate": 6.9406674319735835e-06, + "loss": 17.938, + "step": 21442 + }, + { + "epoch": 0.39196080940281863, + "grad_norm": 8.099055649396236, + "learning_rate": 6.940394623402033e-06, + "loss": 17.9091, + "step": 21443 + }, + { + "epoch": 0.39197908859926517, + "grad_norm": 5.871450044502728, + "learning_rate": 6.940121808029569e-06, + "loss": 17.2447, + "step": 21444 + }, + { + "epoch": 0.3919973677957117, + "grad_norm": 6.802340027753673, + "learning_rate": 6.9398489858571475e-06, + "loss": 17.7163, + "step": 21445 + }, + { + "epoch": 0.39201564699215824, + "grad_norm": 6.690952033889844, + "learning_rate": 6.939576156885722e-06, + "loss": 17.4323, + "step": 21446 + }, + { + "epoch": 0.3920339261886048, + "grad_norm": 6.167617635270627, + "learning_rate": 6.9393033211162505e-06, + "loss": 17.3473, + "step": 21447 + }, + { + "epoch": 0.39205220538505126, + "grad_norm": 5.846650263525677, + "learning_rate": 6.939030478549691e-06, + "loss": 17.3274, + "step": 21448 + }, + { + "epoch": 0.3920704845814978, + "grad_norm": 5.950837418407083, + "learning_rate": 6.938757629186996e-06, + "loss": 17.0843, + "step": 21449 + }, + { + "epoch": 0.3920887637779443, + "grad_norm": 5.836430906477098, + "learning_rate": 6.938484773029123e-06, + "loss": 17.4089, + "step": 21450 + }, + { + "epoch": 0.39210704297439086, + "grad_norm": 6.608261446021528, + "learning_rate": 6.938211910077031e-06, + "loss": 17.6756, + "step": 21451 + }, + { + "epoch": 0.39212532217083734, + "grad_norm": 6.531429575224675, + "learning_rate": 6.937939040331674e-06, + "loss": 17.2479, + "step": 21452 + }, + { + "epoch": 0.3921436013672839, + "grad_norm": 6.041658607655284, + "learning_rate": 6.937666163794008e-06, + "loss": 17.2461, + "step": 21453 + }, + { + "epoch": 0.3921618805637304, + "grad_norm": 9.557082172843252, + "learning_rate": 6.9373932804649915e-06, + "loss": 18.5878, + "step": 21454 + }, + { + "epoch": 0.39218015976017695, + "grad_norm": 6.205925750888624, + "learning_rate": 6.937120390345579e-06, + "loss": 17.3433, + "step": 21455 + }, + { + "epoch": 0.3921984389566235, + "grad_norm": 5.817668641710071, + "learning_rate": 6.936847493436727e-06, + "loss": 17.1638, + "step": 21456 + }, + { + "epoch": 0.39221671815306997, + "grad_norm": 5.751541819348251, + "learning_rate": 6.9365745897393935e-06, + "loss": 17.2232, + "step": 21457 + }, + { + "epoch": 0.3922349973495165, + "grad_norm": 5.882162509912774, + "learning_rate": 6.936301679254533e-06, + "loss": 16.9619, + "step": 21458 + }, + { + "epoch": 0.39225327654596304, + "grad_norm": 6.3118147297515215, + "learning_rate": 6.9360287619831035e-06, + "loss": 17.4475, + "step": 21459 + }, + { + "epoch": 0.3922715557424096, + "grad_norm": 5.410386490362895, + "learning_rate": 6.9357558379260615e-06, + "loss": 16.971, + "step": 21460 + }, + { + "epoch": 0.3922898349388561, + "grad_norm": 5.477369075142672, + "learning_rate": 6.9354829070843635e-06, + "loss": 17.1283, + "step": 21461 + }, + { + "epoch": 0.3923081141353026, + "grad_norm": 6.588263923847018, + "learning_rate": 6.935209969458967e-06, + "loss": 17.66, + "step": 21462 + }, + { + "epoch": 0.3923263933317491, + "grad_norm": 6.770909018164611, + "learning_rate": 6.934937025050826e-06, + "loss": 17.7483, + "step": 21463 + }, + { + "epoch": 0.39234467252819566, + "grad_norm": 6.5732233956474415, + "learning_rate": 6.9346640738608975e-06, + "loss": 17.818, + "step": 21464 + }, + { + "epoch": 0.3923629517246422, + "grad_norm": 6.378638997720948, + "learning_rate": 6.934391115890142e-06, + "loss": 17.5235, + "step": 21465 + }, + { + "epoch": 0.39238123092108873, + "grad_norm": 5.609027096527029, + "learning_rate": 6.934118151139512e-06, + "loss": 17.1316, + "step": 21466 + }, + { + "epoch": 0.3923995101175352, + "grad_norm": 8.59040276509835, + "learning_rate": 6.933845179609966e-06, + "loss": 18.5035, + "step": 21467 + }, + { + "epoch": 0.39241778931398175, + "grad_norm": 6.018861877900512, + "learning_rate": 6.933572201302459e-06, + "loss": 17.3577, + "step": 21468 + }, + { + "epoch": 0.3924360685104283, + "grad_norm": 7.895633672450876, + "learning_rate": 6.933299216217952e-06, + "loss": 18.2459, + "step": 21469 + }, + { + "epoch": 0.3924543477068748, + "grad_norm": 7.084441441369362, + "learning_rate": 6.933026224357397e-06, + "loss": 17.7863, + "step": 21470 + }, + { + "epoch": 0.39247262690332135, + "grad_norm": 5.888197528605958, + "learning_rate": 6.932753225721753e-06, + "loss": 17.2933, + "step": 21471 + }, + { + "epoch": 0.39249090609976783, + "grad_norm": 5.328045823701662, + "learning_rate": 6.9324802203119766e-06, + "loss": 17.3001, + "step": 21472 + }, + { + "epoch": 0.39250918529621437, + "grad_norm": 5.873985335509079, + "learning_rate": 6.9322072081290245e-06, + "loss": 17.1989, + "step": 21473 + }, + { + "epoch": 0.3925274644926609, + "grad_norm": 6.382507901772117, + "learning_rate": 6.931934189173854e-06, + "loss": 17.2451, + "step": 21474 + }, + { + "epoch": 0.39254574368910744, + "grad_norm": 6.416150137299243, + "learning_rate": 6.931661163447423e-06, + "loss": 17.5899, + "step": 21475 + }, + { + "epoch": 0.392564022885554, + "grad_norm": 5.733098397335905, + "learning_rate": 6.931388130950688e-06, + "loss": 17.3838, + "step": 21476 + }, + { + "epoch": 0.39258230208200046, + "grad_norm": 5.390450152159793, + "learning_rate": 6.931115091684603e-06, + "loss": 17.1822, + "step": 21477 + }, + { + "epoch": 0.392600581278447, + "grad_norm": 7.565431469379208, + "learning_rate": 6.930842045650127e-06, + "loss": 17.9168, + "step": 21478 + }, + { + "epoch": 0.3926188604748935, + "grad_norm": 6.791175985526804, + "learning_rate": 6.930568992848219e-06, + "loss": 17.561, + "step": 21479 + }, + { + "epoch": 0.39263713967134006, + "grad_norm": 6.845110080529336, + "learning_rate": 6.930295933279833e-06, + "loss": 17.6659, + "step": 21480 + }, + { + "epoch": 0.3926554188677866, + "grad_norm": 5.9062726952214835, + "learning_rate": 6.930022866945928e-06, + "loss": 17.3578, + "step": 21481 + }, + { + "epoch": 0.3926736980642331, + "grad_norm": 9.224183488796932, + "learning_rate": 6.929749793847459e-06, + "loss": 17.6501, + "step": 21482 + }, + { + "epoch": 0.3926919772606796, + "grad_norm": 6.297355400920313, + "learning_rate": 6.929476713985386e-06, + "loss": 17.4091, + "step": 21483 + }, + { + "epoch": 0.39271025645712615, + "grad_norm": 6.22445551869341, + "learning_rate": 6.9292036273606635e-06, + "loss": 17.293, + "step": 21484 + }, + { + "epoch": 0.3927285356535727, + "grad_norm": 6.79711374166879, + "learning_rate": 6.92893053397425e-06, + "loss": 17.6671, + "step": 21485 + }, + { + "epoch": 0.39274681485001917, + "grad_norm": 7.058928683656009, + "learning_rate": 6.928657433827102e-06, + "loss": 17.8986, + "step": 21486 + }, + { + "epoch": 0.3927650940464657, + "grad_norm": 5.999522263824738, + "learning_rate": 6.928384326920178e-06, + "loss": 17.5663, + "step": 21487 + }, + { + "epoch": 0.39278337324291224, + "grad_norm": 5.60797011067639, + "learning_rate": 6.928111213254434e-06, + "loss": 17.1847, + "step": 21488 + }, + { + "epoch": 0.3928016524393588, + "grad_norm": 6.880346185719323, + "learning_rate": 6.927838092830826e-06, + "loss": 17.7063, + "step": 21489 + }, + { + "epoch": 0.3928199316358053, + "grad_norm": 6.67807673595115, + "learning_rate": 6.927564965650315e-06, + "loss": 17.7538, + "step": 21490 + }, + { + "epoch": 0.3928382108322518, + "grad_norm": 6.885612102038492, + "learning_rate": 6.927291831713855e-06, + "loss": 17.6307, + "step": 21491 + }, + { + "epoch": 0.3928564900286983, + "grad_norm": 5.380259452494266, + "learning_rate": 6.927018691022403e-06, + "loss": 17.0805, + "step": 21492 + }, + { + "epoch": 0.39287476922514486, + "grad_norm": 5.43083297474748, + "learning_rate": 6.926745543576921e-06, + "loss": 17.1407, + "step": 21493 + }, + { + "epoch": 0.3928930484215914, + "grad_norm": 7.756199643110793, + "learning_rate": 6.926472389378361e-06, + "loss": 17.8113, + "step": 21494 + }, + { + "epoch": 0.39291132761803793, + "grad_norm": 10.660610872866012, + "learning_rate": 6.926199228427681e-06, + "loss": 17.8452, + "step": 21495 + }, + { + "epoch": 0.3929296068144844, + "grad_norm": 8.26578635522386, + "learning_rate": 6.925926060725843e-06, + "loss": 18.2691, + "step": 21496 + }, + { + "epoch": 0.39294788601093095, + "grad_norm": 6.998510645103447, + "learning_rate": 6.925652886273799e-06, + "loss": 17.9226, + "step": 21497 + }, + { + "epoch": 0.3929661652073775, + "grad_norm": 6.001831030454143, + "learning_rate": 6.9253797050725104e-06, + "loss": 17.4689, + "step": 21498 + }, + { + "epoch": 0.392984444403824, + "grad_norm": 7.423516301738987, + "learning_rate": 6.925106517122932e-06, + "loss": 18.0992, + "step": 21499 + }, + { + "epoch": 0.39300272360027055, + "grad_norm": 6.370552452597878, + "learning_rate": 6.9248333224260226e-06, + "loss": 17.4909, + "step": 21500 + }, + { + "epoch": 0.39302100279671703, + "grad_norm": 5.862008907473447, + "learning_rate": 6.924560120982739e-06, + "loss": 17.4139, + "step": 21501 + }, + { + "epoch": 0.39303928199316357, + "grad_norm": 5.935590540687427, + "learning_rate": 6.924286912794039e-06, + "loss": 17.1046, + "step": 21502 + }, + { + "epoch": 0.3930575611896101, + "grad_norm": 6.926847413856296, + "learning_rate": 6.924013697860882e-06, + "loss": 17.7685, + "step": 21503 + }, + { + "epoch": 0.39307584038605664, + "grad_norm": 5.970307707596561, + "learning_rate": 6.923740476184222e-06, + "loss": 17.6556, + "step": 21504 + }, + { + "epoch": 0.3930941195825032, + "grad_norm": 6.713674199500309, + "learning_rate": 6.923467247765019e-06, + "loss": 17.6955, + "step": 21505 + }, + { + "epoch": 0.39311239877894966, + "grad_norm": 5.577756592376602, + "learning_rate": 6.923194012604231e-06, + "loss": 17.3995, + "step": 21506 + }, + { + "epoch": 0.3931306779753962, + "grad_norm": 5.9016469407663825, + "learning_rate": 6.9229207707028146e-06, + "loss": 17.5544, + "step": 21507 + }, + { + "epoch": 0.39314895717184273, + "grad_norm": 6.434933392640182, + "learning_rate": 6.922647522061727e-06, + "loss": 17.6404, + "step": 21508 + }, + { + "epoch": 0.39316723636828926, + "grad_norm": 6.4413342933589925, + "learning_rate": 6.922374266681927e-06, + "loss": 17.9014, + "step": 21509 + }, + { + "epoch": 0.3931855155647358, + "grad_norm": 6.065567085535461, + "learning_rate": 6.922101004564373e-06, + "loss": 17.1599, + "step": 21510 + }, + { + "epoch": 0.3932037947611823, + "grad_norm": 7.44199599360796, + "learning_rate": 6.92182773571002e-06, + "loss": 17.7583, + "step": 21511 + }, + { + "epoch": 0.3932220739576288, + "grad_norm": 7.304037294401231, + "learning_rate": 6.92155446011983e-06, + "loss": 17.6521, + "step": 21512 + }, + { + "epoch": 0.39324035315407535, + "grad_norm": 6.205163333011176, + "learning_rate": 6.9212811777947565e-06, + "loss": 17.4399, + "step": 21513 + }, + { + "epoch": 0.3932586323505219, + "grad_norm": 6.609599701956432, + "learning_rate": 6.92100788873576e-06, + "loss": 17.4695, + "step": 21514 + }, + { + "epoch": 0.3932769115469684, + "grad_norm": 6.584113271674233, + "learning_rate": 6.920734592943796e-06, + "loss": 17.5762, + "step": 21515 + }, + { + "epoch": 0.3932951907434149, + "grad_norm": 6.800658262301761, + "learning_rate": 6.920461290419825e-06, + "loss": 17.6977, + "step": 21516 + }, + { + "epoch": 0.39331346993986144, + "grad_norm": 6.457193982312407, + "learning_rate": 6.920187981164804e-06, + "loss": 17.6557, + "step": 21517 + }, + { + "epoch": 0.393331749136308, + "grad_norm": 7.853754503596599, + "learning_rate": 6.919914665179691e-06, + "loss": 18.1926, + "step": 21518 + }, + { + "epoch": 0.3933500283327545, + "grad_norm": 6.6955447511823225, + "learning_rate": 6.919641342465444e-06, + "loss": 17.4948, + "step": 21519 + }, + { + "epoch": 0.393368307529201, + "grad_norm": 5.966810488350211, + "learning_rate": 6.91936801302302e-06, + "loss": 17.3947, + "step": 21520 + }, + { + "epoch": 0.3933865867256475, + "grad_norm": 6.077602715754855, + "learning_rate": 6.919094676853378e-06, + "loss": 17.0745, + "step": 21521 + }, + { + "epoch": 0.39340486592209406, + "grad_norm": 5.584685044400404, + "learning_rate": 6.918821333957475e-06, + "loss": 17.0942, + "step": 21522 + }, + { + "epoch": 0.3934231451185406, + "grad_norm": 7.001524006806588, + "learning_rate": 6.91854798433627e-06, + "loss": 17.9249, + "step": 21523 + }, + { + "epoch": 0.39344142431498713, + "grad_norm": 6.599745661110747, + "learning_rate": 6.918274627990722e-06, + "loss": 17.7185, + "step": 21524 + }, + { + "epoch": 0.3934597035114336, + "grad_norm": 8.165704272755427, + "learning_rate": 6.918001264921786e-06, + "loss": 18.4602, + "step": 21525 + }, + { + "epoch": 0.39347798270788015, + "grad_norm": 6.1992574624140095, + "learning_rate": 6.917727895130423e-06, + "loss": 17.4023, + "step": 21526 + }, + { + "epoch": 0.3934962619043267, + "grad_norm": 7.073284774296896, + "learning_rate": 6.917454518617589e-06, + "loss": 17.572, + "step": 21527 + }, + { + "epoch": 0.3935145411007732, + "grad_norm": 7.292081190745441, + "learning_rate": 6.917181135384246e-06, + "loss": 17.8471, + "step": 21528 + }, + { + "epoch": 0.39353282029721975, + "grad_norm": 7.072267248965862, + "learning_rate": 6.9169077454313475e-06, + "loss": 17.8776, + "step": 21529 + }, + { + "epoch": 0.39355109949366623, + "grad_norm": 5.322622559874622, + "learning_rate": 6.916634348759853e-06, + "loss": 17.1781, + "step": 21530 + }, + { + "epoch": 0.39356937869011277, + "grad_norm": 7.077310316001486, + "learning_rate": 6.916360945370722e-06, + "loss": 17.94, + "step": 21531 + }, + { + "epoch": 0.3935876578865593, + "grad_norm": 6.440467751651259, + "learning_rate": 6.916087535264913e-06, + "loss": 17.45, + "step": 21532 + }, + { + "epoch": 0.39360593708300584, + "grad_norm": 7.40705158499139, + "learning_rate": 6.915814118443383e-06, + "loss": 17.6429, + "step": 21533 + }, + { + "epoch": 0.3936242162794524, + "grad_norm": 5.276780521040269, + "learning_rate": 6.915540694907092e-06, + "loss": 16.977, + "step": 21534 + }, + { + "epoch": 0.39364249547589886, + "grad_norm": 7.254487865106271, + "learning_rate": 6.9152672646569955e-06, + "loss": 17.8337, + "step": 21535 + }, + { + "epoch": 0.3936607746723454, + "grad_norm": 5.914857845336416, + "learning_rate": 6.914993827694053e-06, + "loss": 17.2768, + "step": 21536 + }, + { + "epoch": 0.39367905386879193, + "grad_norm": 5.750367364362483, + "learning_rate": 6.9147203840192246e-06, + "loss": 17.3195, + "step": 21537 + }, + { + "epoch": 0.39369733306523846, + "grad_norm": 6.5440410320419105, + "learning_rate": 6.914446933633467e-06, + "loss": 17.4559, + "step": 21538 + }, + { + "epoch": 0.393715612261685, + "grad_norm": 5.75955858533619, + "learning_rate": 6.914173476537739e-06, + "loss": 17.282, + "step": 21539 + }, + { + "epoch": 0.3937338914581315, + "grad_norm": 7.3803077544723585, + "learning_rate": 6.913900012732999e-06, + "loss": 17.4063, + "step": 21540 + }, + { + "epoch": 0.393752170654578, + "grad_norm": 6.739488790118494, + "learning_rate": 6.913626542220205e-06, + "loss": 17.7722, + "step": 21541 + }, + { + "epoch": 0.39377044985102455, + "grad_norm": 5.871982181584669, + "learning_rate": 6.913353065000317e-06, + "loss": 16.9883, + "step": 21542 + }, + { + "epoch": 0.3937887290474711, + "grad_norm": 6.849382124331771, + "learning_rate": 6.913079581074293e-06, + "loss": 17.9869, + "step": 21543 + }, + { + "epoch": 0.3938070082439176, + "grad_norm": 5.594692598085732, + "learning_rate": 6.91280609044309e-06, + "loss": 17.0854, + "step": 21544 + }, + { + "epoch": 0.3938252874403641, + "grad_norm": 5.758206562655694, + "learning_rate": 6.912532593107667e-06, + "loss": 17.2974, + "step": 21545 + }, + { + "epoch": 0.39384356663681064, + "grad_norm": 5.677117188934752, + "learning_rate": 6.912259089068984e-06, + "loss": 17.1663, + "step": 21546 + }, + { + "epoch": 0.3938618458332572, + "grad_norm": 6.861772832897799, + "learning_rate": 6.911985578327999e-06, + "loss": 17.7252, + "step": 21547 + }, + { + "epoch": 0.3938801250297037, + "grad_norm": 7.841222096859772, + "learning_rate": 6.911712060885672e-06, + "loss": 17.629, + "step": 21548 + }, + { + "epoch": 0.39389840422615024, + "grad_norm": 7.624450085180011, + "learning_rate": 6.9114385367429585e-06, + "loss": 18.033, + "step": 21549 + }, + { + "epoch": 0.3939166834225967, + "grad_norm": 7.868388483174876, + "learning_rate": 6.911165005900817e-06, + "loss": 18.2352, + "step": 21550 + }, + { + "epoch": 0.39393496261904326, + "grad_norm": 7.593997323363449, + "learning_rate": 6.91089146836021e-06, + "loss": 18.1603, + "step": 21551 + }, + { + "epoch": 0.3939532418154898, + "grad_norm": 9.515579700243576, + "learning_rate": 6.910617924122094e-06, + "loss": 17.5219, + "step": 21552 + }, + { + "epoch": 0.39397152101193633, + "grad_norm": 6.462431585095287, + "learning_rate": 6.9103443731874286e-06, + "loss": 17.7226, + "step": 21553 + }, + { + "epoch": 0.3939898002083828, + "grad_norm": 6.071070491291092, + "learning_rate": 6.9100708155571705e-06, + "loss": 17.3667, + "step": 21554 + }, + { + "epoch": 0.39400807940482935, + "grad_norm": 7.148145689259566, + "learning_rate": 6.909797251232282e-06, + "loss": 17.588, + "step": 21555 + }, + { + "epoch": 0.3940263586012759, + "grad_norm": 6.939347550435989, + "learning_rate": 6.9095236802137174e-06, + "loss": 17.5267, + "step": 21556 + }, + { + "epoch": 0.3940446377977224, + "grad_norm": 5.705199918354399, + "learning_rate": 6.909250102502439e-06, + "loss": 17.3792, + "step": 21557 + }, + { + "epoch": 0.39406291699416895, + "grad_norm": 7.28107304514705, + "learning_rate": 6.908976518099405e-06, + "loss": 18.0734, + "step": 21558 + }, + { + "epoch": 0.39408119619061543, + "grad_norm": 8.205423068008912, + "learning_rate": 6.908702927005574e-06, + "loss": 18.3236, + "step": 21559 + }, + { + "epoch": 0.39409947538706197, + "grad_norm": 6.7778303874948715, + "learning_rate": 6.9084293292219055e-06, + "loss": 17.8698, + "step": 21560 + }, + { + "epoch": 0.3941177545835085, + "grad_norm": 5.884320065261258, + "learning_rate": 6.908155724749357e-06, + "loss": 17.2822, + "step": 21561 + }, + { + "epoch": 0.39413603377995504, + "grad_norm": 6.104454965401443, + "learning_rate": 6.907882113588889e-06, + "loss": 17.4723, + "step": 21562 + }, + { + "epoch": 0.3941543129764016, + "grad_norm": 5.783308774319683, + "learning_rate": 6.907608495741458e-06, + "loss": 17.4832, + "step": 21563 + }, + { + "epoch": 0.39417259217284806, + "grad_norm": 6.2913806012946605, + "learning_rate": 6.907334871208024e-06, + "loss": 17.5201, + "step": 21564 + }, + { + "epoch": 0.3941908713692946, + "grad_norm": 5.327093358403174, + "learning_rate": 6.907061239989551e-06, + "loss": 16.931, + "step": 21565 + }, + { + "epoch": 0.39420915056574113, + "grad_norm": 5.1719985979508865, + "learning_rate": 6.9067876020869905e-06, + "loss": 17.1255, + "step": 21566 + }, + { + "epoch": 0.39422742976218766, + "grad_norm": 6.54416253618085, + "learning_rate": 6.906513957501306e-06, + "loss": 17.6631, + "step": 21567 + }, + { + "epoch": 0.3942457089586342, + "grad_norm": 6.253670207973528, + "learning_rate": 6.9062403062334545e-06, + "loss": 17.1095, + "step": 21568 + }, + { + "epoch": 0.3942639881550807, + "grad_norm": 7.244370332448828, + "learning_rate": 6.905966648284398e-06, + "loss": 17.801, + "step": 21569 + }, + { + "epoch": 0.3942822673515272, + "grad_norm": 6.038511238306077, + "learning_rate": 6.905692983655092e-06, + "loss": 17.4237, + "step": 21570 + }, + { + "epoch": 0.39430054654797375, + "grad_norm": 7.0794140402264265, + "learning_rate": 6.905419312346499e-06, + "loss": 17.909, + "step": 21571 + }, + { + "epoch": 0.3943188257444203, + "grad_norm": 6.083436443068745, + "learning_rate": 6.905145634359576e-06, + "loss": 17.4752, + "step": 21572 + }, + { + "epoch": 0.3943371049408668, + "grad_norm": 7.731608014735897, + "learning_rate": 6.904871949695282e-06, + "loss": 18.1243, + "step": 21573 + }, + { + "epoch": 0.3943553841373133, + "grad_norm": 6.637765956525025, + "learning_rate": 6.9045982583545775e-06, + "loss": 17.7536, + "step": 21574 + }, + { + "epoch": 0.39437366333375984, + "grad_norm": 6.532880660508335, + "learning_rate": 6.904324560338422e-06, + "loss": 17.4753, + "step": 21575 + }, + { + "epoch": 0.3943919425302064, + "grad_norm": 6.107409359039475, + "learning_rate": 6.904050855647775e-06, + "loss": 17.5301, + "step": 21576 + }, + { + "epoch": 0.3944102217266529, + "grad_norm": 5.78441930579743, + "learning_rate": 6.903777144283593e-06, + "loss": 17.3339, + "step": 21577 + }, + { + "epoch": 0.39442850092309945, + "grad_norm": 5.36158213128983, + "learning_rate": 6.9035034262468385e-06, + "loss": 17.2054, + "step": 21578 + }, + { + "epoch": 0.3944467801195459, + "grad_norm": 7.98591769113503, + "learning_rate": 6.903229701538469e-06, + "loss": 17.8269, + "step": 21579 + }, + { + "epoch": 0.39446505931599246, + "grad_norm": 6.972481701306595, + "learning_rate": 6.902955970159446e-06, + "loss": 17.9411, + "step": 21580 + }, + { + "epoch": 0.394483338512439, + "grad_norm": 7.447622898864084, + "learning_rate": 6.902682232110727e-06, + "loss": 17.9744, + "step": 21581 + }, + { + "epoch": 0.39450161770888553, + "grad_norm": 6.104928288760058, + "learning_rate": 6.90240848739327e-06, + "loss": 17.5479, + "step": 21582 + }, + { + "epoch": 0.39451989690533207, + "grad_norm": 6.141051315020905, + "learning_rate": 6.9021347360080385e-06, + "loss": 17.4445, + "step": 21583 + }, + { + "epoch": 0.39453817610177855, + "grad_norm": 5.794997374883734, + "learning_rate": 6.901860977955989e-06, + "loss": 17.3963, + "step": 21584 + }, + { + "epoch": 0.3945564552982251, + "grad_norm": 6.731518293898058, + "learning_rate": 6.901587213238081e-06, + "loss": 17.715, + "step": 21585 + }, + { + "epoch": 0.3945747344946716, + "grad_norm": 7.191380738474616, + "learning_rate": 6.901313441855275e-06, + "loss": 17.5182, + "step": 21586 + }, + { + "epoch": 0.39459301369111816, + "grad_norm": 6.365568178045943, + "learning_rate": 6.901039663808531e-06, + "loss": 17.5003, + "step": 21587 + }, + { + "epoch": 0.39461129288756464, + "grad_norm": 7.978823305473955, + "learning_rate": 6.9007658790988086e-06, + "loss": 17.7458, + "step": 21588 + }, + { + "epoch": 0.39462957208401117, + "grad_norm": 6.411446773921178, + "learning_rate": 6.900492087727065e-06, + "loss": 17.4705, + "step": 21589 + }, + { + "epoch": 0.3946478512804577, + "grad_norm": 7.511496973229927, + "learning_rate": 6.900218289694262e-06, + "loss": 18.0328, + "step": 21590 + }, + { + "epoch": 0.39466613047690424, + "grad_norm": 6.437665494260655, + "learning_rate": 6.8999444850013604e-06, + "loss": 17.6575, + "step": 21591 + }, + { + "epoch": 0.3946844096733508, + "grad_norm": 9.144353950375423, + "learning_rate": 6.899670673649317e-06, + "loss": 18.3534, + "step": 21592 + }, + { + "epoch": 0.39470268886979726, + "grad_norm": 6.14218081702421, + "learning_rate": 6.8993968556390945e-06, + "loss": 17.2321, + "step": 21593 + }, + { + "epoch": 0.3947209680662438, + "grad_norm": 5.523745245072925, + "learning_rate": 6.899123030971648e-06, + "loss": 16.9665, + "step": 21594 + }, + { + "epoch": 0.39473924726269033, + "grad_norm": 5.152206424640226, + "learning_rate": 6.8988491996479414e-06, + "loss": 17.0409, + "step": 21595 + }, + { + "epoch": 0.39475752645913686, + "grad_norm": 6.574388933840838, + "learning_rate": 6.8985753616689335e-06, + "loss": 17.6122, + "step": 21596 + }, + { + "epoch": 0.3947758056555834, + "grad_norm": 5.76128631986071, + "learning_rate": 6.898301517035584e-06, + "loss": 17.3483, + "step": 21597 + }, + { + "epoch": 0.3947940848520299, + "grad_norm": 6.052356447756394, + "learning_rate": 6.8980276657488505e-06, + "loss": 17.416, + "step": 21598 + }, + { + "epoch": 0.3948123640484764, + "grad_norm": 6.493268264946741, + "learning_rate": 6.897753807809696e-06, + "loss": 17.8602, + "step": 21599 + }, + { + "epoch": 0.39483064324492295, + "grad_norm": 6.095529945679339, + "learning_rate": 6.897479943219079e-06, + "loss": 17.2012, + "step": 21600 + }, + { + "epoch": 0.3948489224413695, + "grad_norm": 6.111414826188851, + "learning_rate": 6.89720607197796e-06, + "loss": 17.449, + "step": 21601 + }, + { + "epoch": 0.394867201637816, + "grad_norm": 7.318563016598272, + "learning_rate": 6.896932194087298e-06, + "loss": 17.6215, + "step": 21602 + }, + { + "epoch": 0.3948854808342625, + "grad_norm": 6.858298435815988, + "learning_rate": 6.896658309548053e-06, + "loss": 17.624, + "step": 21603 + }, + { + "epoch": 0.39490376003070904, + "grad_norm": 6.4539344725539065, + "learning_rate": 6.896384418361185e-06, + "loss": 17.7222, + "step": 21604 + }, + { + "epoch": 0.3949220392271556, + "grad_norm": 6.983623354502389, + "learning_rate": 6.896110520527655e-06, + "loss": 17.5526, + "step": 21605 + }, + { + "epoch": 0.3949403184236021, + "grad_norm": 8.105170382444221, + "learning_rate": 6.8958366160484214e-06, + "loss": 18.1537, + "step": 21606 + }, + { + "epoch": 0.39495859762004865, + "grad_norm": 6.996364276421232, + "learning_rate": 6.895562704924446e-06, + "loss": 18.1652, + "step": 21607 + }, + { + "epoch": 0.3949768768164951, + "grad_norm": 6.259866267692265, + "learning_rate": 6.895288787156687e-06, + "loss": 17.491, + "step": 21608 + }, + { + "epoch": 0.39499515601294166, + "grad_norm": 5.608185112770575, + "learning_rate": 6.895014862746103e-06, + "loss": 17.0852, + "step": 21609 + }, + { + "epoch": 0.3950134352093882, + "grad_norm": 7.386344062264315, + "learning_rate": 6.89474093169366e-06, + "loss": 17.7781, + "step": 21610 + }, + { + "epoch": 0.39503171440583473, + "grad_norm": 7.345399211896186, + "learning_rate": 6.894466994000313e-06, + "loss": 18.0912, + "step": 21611 + }, + { + "epoch": 0.39504999360228127, + "grad_norm": 5.4158628072361, + "learning_rate": 6.894193049667024e-06, + "loss": 17.2701, + "step": 21612 + }, + { + "epoch": 0.39506827279872775, + "grad_norm": 5.333877593585367, + "learning_rate": 6.893919098694752e-06, + "loss": 17.0123, + "step": 21613 + }, + { + "epoch": 0.3950865519951743, + "grad_norm": 8.665147609340739, + "learning_rate": 6.893645141084458e-06, + "loss": 18.3743, + "step": 21614 + }, + { + "epoch": 0.3951048311916208, + "grad_norm": 7.752494504262045, + "learning_rate": 6.893371176837103e-06, + "loss": 18.1383, + "step": 21615 + }, + { + "epoch": 0.39512311038806736, + "grad_norm": 5.804033550700176, + "learning_rate": 6.893097205953645e-06, + "loss": 17.3385, + "step": 21616 + }, + { + "epoch": 0.3951413895845139, + "grad_norm": 5.224038404367742, + "learning_rate": 6.8928232284350474e-06, + "loss": 16.9664, + "step": 21617 + }, + { + "epoch": 0.39515966878096037, + "grad_norm": 6.568917726489629, + "learning_rate": 6.892549244282267e-06, + "loss": 17.9612, + "step": 21618 + }, + { + "epoch": 0.3951779479774069, + "grad_norm": 5.9351522721466585, + "learning_rate": 6.8922752534962656e-06, + "loss": 17.5031, + "step": 21619 + }, + { + "epoch": 0.39519622717385344, + "grad_norm": 6.6544735398237975, + "learning_rate": 6.892001256078005e-06, + "loss": 17.6356, + "step": 21620 + }, + { + "epoch": 0.3952145063703, + "grad_norm": 7.363304186122785, + "learning_rate": 6.891727252028444e-06, + "loss": 17.6523, + "step": 21621 + }, + { + "epoch": 0.39523278556674646, + "grad_norm": 6.160753133854578, + "learning_rate": 6.8914532413485415e-06, + "loss": 17.2715, + "step": 21622 + }, + { + "epoch": 0.395251064763193, + "grad_norm": 7.278316791103219, + "learning_rate": 6.89117922403926e-06, + "loss": 17.9876, + "step": 21623 + }, + { + "epoch": 0.39526934395963953, + "grad_norm": 6.2495401952620275, + "learning_rate": 6.8909052001015616e-06, + "loss": 17.3047, + "step": 21624 + }, + { + "epoch": 0.39528762315608607, + "grad_norm": 8.317036432905699, + "learning_rate": 6.8906311695364035e-06, + "loss": 18.1389, + "step": 21625 + }, + { + "epoch": 0.3953059023525326, + "grad_norm": 6.982178598956792, + "learning_rate": 6.890357132344746e-06, + "loss": 17.6884, + "step": 21626 + }, + { + "epoch": 0.3953241815489791, + "grad_norm": 7.886219479599872, + "learning_rate": 6.890083088527551e-06, + "loss": 17.9141, + "step": 21627 + }, + { + "epoch": 0.3953424607454256, + "grad_norm": 6.750399332128435, + "learning_rate": 6.8898090380857795e-06, + "loss": 17.7167, + "step": 21628 + }, + { + "epoch": 0.39536073994187215, + "grad_norm": 5.841837478245158, + "learning_rate": 6.889534981020392e-06, + "loss": 17.3937, + "step": 21629 + }, + { + "epoch": 0.3953790191383187, + "grad_norm": 5.332917163119414, + "learning_rate": 6.889260917332347e-06, + "loss": 17.2659, + "step": 21630 + }, + { + "epoch": 0.3953972983347652, + "grad_norm": 6.827078039101366, + "learning_rate": 6.888986847022607e-06, + "loss": 17.7501, + "step": 21631 + }, + { + "epoch": 0.3954155775312117, + "grad_norm": 6.815880046059335, + "learning_rate": 6.888712770092132e-06, + "loss": 17.9065, + "step": 21632 + }, + { + "epoch": 0.39543385672765824, + "grad_norm": 5.806367709665783, + "learning_rate": 6.8884386865418825e-06, + "loss": 17.2579, + "step": 21633 + }, + { + "epoch": 0.3954521359241048, + "grad_norm": 6.373527468823904, + "learning_rate": 6.888164596372819e-06, + "loss": 17.1608, + "step": 21634 + }, + { + "epoch": 0.3954704151205513, + "grad_norm": 7.91456275985367, + "learning_rate": 6.887890499585903e-06, + "loss": 18.3949, + "step": 21635 + }, + { + "epoch": 0.39548869431699785, + "grad_norm": 6.545420292366348, + "learning_rate": 6.887616396182094e-06, + "loss": 17.4711, + "step": 21636 + }, + { + "epoch": 0.3955069735134443, + "grad_norm": 6.150967724446916, + "learning_rate": 6.887342286162354e-06, + "loss": 17.3862, + "step": 21637 + }, + { + "epoch": 0.39552525270989086, + "grad_norm": 6.086142113223638, + "learning_rate": 6.887068169527643e-06, + "loss": 17.2927, + "step": 21638 + }, + { + "epoch": 0.3955435319063374, + "grad_norm": 6.752372891295724, + "learning_rate": 6.886794046278922e-06, + "loss": 17.7975, + "step": 21639 + }, + { + "epoch": 0.39556181110278393, + "grad_norm": 5.734532996593663, + "learning_rate": 6.886519916417152e-06, + "loss": 17.3136, + "step": 21640 + }, + { + "epoch": 0.39558009029923047, + "grad_norm": 6.179620272208543, + "learning_rate": 6.886245779943291e-06, + "loss": 17.3563, + "step": 21641 + }, + { + "epoch": 0.39559836949567695, + "grad_norm": 6.746739964031116, + "learning_rate": 6.885971636858304e-06, + "loss": 17.4968, + "step": 21642 + }, + { + "epoch": 0.3956166486921235, + "grad_norm": 5.582131068210862, + "learning_rate": 6.88569748716315e-06, + "loss": 17.0617, + "step": 21643 + }, + { + "epoch": 0.39563492788857, + "grad_norm": 7.099942387867566, + "learning_rate": 6.8854233308587905e-06, + "loss": 17.7158, + "step": 21644 + }, + { + "epoch": 0.39565320708501656, + "grad_norm": 7.8912448083419, + "learning_rate": 6.885149167946185e-06, + "loss": 18.0542, + "step": 21645 + }, + { + "epoch": 0.3956714862814631, + "grad_norm": 5.404480522763143, + "learning_rate": 6.884874998426296e-06, + "loss": 16.9571, + "step": 21646 + }, + { + "epoch": 0.39568976547790957, + "grad_norm": 6.247336627791287, + "learning_rate": 6.8846008223000825e-06, + "loss": 17.5024, + "step": 21647 + }, + { + "epoch": 0.3957080446743561, + "grad_norm": 7.941220192674957, + "learning_rate": 6.884326639568508e-06, + "loss": 17.8973, + "step": 21648 + }, + { + "epoch": 0.39572632387080264, + "grad_norm": 6.379053972758212, + "learning_rate": 6.88405245023253e-06, + "loss": 17.4956, + "step": 21649 + }, + { + "epoch": 0.3957446030672492, + "grad_norm": 8.387081659392715, + "learning_rate": 6.883778254293113e-06, + "loss": 18.1086, + "step": 21650 + }, + { + "epoch": 0.3957628822636957, + "grad_norm": 7.3948425362543375, + "learning_rate": 6.883504051751217e-06, + "loss": 17.8278, + "step": 21651 + }, + { + "epoch": 0.3957811614601422, + "grad_norm": 8.063472087585946, + "learning_rate": 6.883229842607804e-06, + "loss": 18.4844, + "step": 21652 + }, + { + "epoch": 0.39579944065658873, + "grad_norm": 6.357610352214988, + "learning_rate": 6.882955626863832e-06, + "loss": 17.4236, + "step": 21653 + }, + { + "epoch": 0.39581771985303527, + "grad_norm": 5.873895105905679, + "learning_rate": 6.882681404520263e-06, + "loss": 17.1778, + "step": 21654 + }, + { + "epoch": 0.3958359990494818, + "grad_norm": 7.428820280037442, + "learning_rate": 6.882407175578061e-06, + "loss": 18.0671, + "step": 21655 + }, + { + "epoch": 0.3958542782459283, + "grad_norm": 7.706670152117786, + "learning_rate": 6.8821329400381844e-06, + "loss": 17.9579, + "step": 21656 + }, + { + "epoch": 0.3958725574423748, + "grad_norm": 7.5884015572154855, + "learning_rate": 6.881858697901596e-06, + "loss": 17.7861, + "step": 21657 + }, + { + "epoch": 0.39589083663882135, + "grad_norm": 7.288277868186172, + "learning_rate": 6.881584449169254e-06, + "loss": 18.0603, + "step": 21658 + }, + { + "epoch": 0.3959091158352679, + "grad_norm": 5.498976681355198, + "learning_rate": 6.881310193842123e-06, + "loss": 17.179, + "step": 21659 + }, + { + "epoch": 0.3959273950317144, + "grad_norm": 6.68694624285585, + "learning_rate": 6.881035931921164e-06, + "loss": 17.604, + "step": 21660 + }, + { + "epoch": 0.3959456742281609, + "grad_norm": 5.453455096321566, + "learning_rate": 6.880761663407336e-06, + "loss": 17.2405, + "step": 21661 + }, + { + "epoch": 0.39596395342460744, + "grad_norm": 6.035074401662799, + "learning_rate": 6.880487388301603e-06, + "loss": 17.5, + "step": 21662 + }, + { + "epoch": 0.395982232621054, + "grad_norm": 9.092492814031782, + "learning_rate": 6.880213106604923e-06, + "loss": 17.4662, + "step": 21663 + }, + { + "epoch": 0.3960005118175005, + "grad_norm": 5.664062192085136, + "learning_rate": 6.87993881831826e-06, + "loss": 17.2395, + "step": 21664 + }, + { + "epoch": 0.39601879101394705, + "grad_norm": 7.052725853189261, + "learning_rate": 6.879664523442575e-06, + "loss": 17.7064, + "step": 21665 + }, + { + "epoch": 0.3960370702103935, + "grad_norm": 7.638767286611265, + "learning_rate": 6.87939022197883e-06, + "loss": 18.2496, + "step": 21666 + }, + { + "epoch": 0.39605534940684006, + "grad_norm": 5.9105272706982594, + "learning_rate": 6.879115913927984e-06, + "loss": 17.2275, + "step": 21667 + }, + { + "epoch": 0.3960736286032866, + "grad_norm": 5.679793639716042, + "learning_rate": 6.878841599290998e-06, + "loss": 17.4539, + "step": 21668 + }, + { + "epoch": 0.39609190779973313, + "grad_norm": 6.795324756701742, + "learning_rate": 6.878567278068838e-06, + "loss": 17.7713, + "step": 21669 + }, + { + "epoch": 0.39611018699617967, + "grad_norm": 5.698620339964075, + "learning_rate": 6.8782929502624615e-06, + "loss": 17.2789, + "step": 21670 + }, + { + "epoch": 0.39612846619262615, + "grad_norm": 6.147221977014783, + "learning_rate": 6.878018615872832e-06, + "loss": 17.3817, + "step": 21671 + }, + { + "epoch": 0.3961467453890727, + "grad_norm": 6.531815043610964, + "learning_rate": 6.877744274900911e-06, + "loss": 17.4807, + "step": 21672 + }, + { + "epoch": 0.3961650245855192, + "grad_norm": 6.115114026166975, + "learning_rate": 6.8774699273476576e-06, + "loss": 17.2749, + "step": 21673 + }, + { + "epoch": 0.39618330378196576, + "grad_norm": 7.012860703434272, + "learning_rate": 6.877195573214035e-06, + "loss": 17.7193, + "step": 21674 + }, + { + "epoch": 0.3962015829784123, + "grad_norm": 6.41914177435871, + "learning_rate": 6.876921212501004e-06, + "loss": 17.3767, + "step": 21675 + }, + { + "epoch": 0.3962198621748588, + "grad_norm": 6.946961562007794, + "learning_rate": 6.876646845209529e-06, + "loss": 17.9227, + "step": 21676 + }, + { + "epoch": 0.3962381413713053, + "grad_norm": 6.488254765236719, + "learning_rate": 6.876372471340569e-06, + "loss": 17.7707, + "step": 21677 + }, + { + "epoch": 0.39625642056775184, + "grad_norm": 7.564070027824658, + "learning_rate": 6.876098090895086e-06, + "loss": 18.2453, + "step": 21678 + }, + { + "epoch": 0.3962746997641984, + "grad_norm": 6.780838371917999, + "learning_rate": 6.875823703874043e-06, + "loss": 17.5928, + "step": 21679 + }, + { + "epoch": 0.3962929789606449, + "grad_norm": 7.090326527895478, + "learning_rate": 6.8755493102784e-06, + "loss": 17.6829, + "step": 21680 + }, + { + "epoch": 0.3963112581570914, + "grad_norm": 6.033272197405425, + "learning_rate": 6.875274910109117e-06, + "loss": 17.3839, + "step": 21681 + }, + { + "epoch": 0.39632953735353793, + "grad_norm": 5.981024208762855, + "learning_rate": 6.875000503367162e-06, + "loss": 17.2835, + "step": 21682 + }, + { + "epoch": 0.39634781654998447, + "grad_norm": 6.067834244801076, + "learning_rate": 6.874726090053492e-06, + "loss": 17.361, + "step": 21683 + }, + { + "epoch": 0.396366095746431, + "grad_norm": 24.24281158832486, + "learning_rate": 6.874451670169069e-06, + "loss": 17.6624, + "step": 21684 + }, + { + "epoch": 0.39638437494287754, + "grad_norm": 5.5553463901806985, + "learning_rate": 6.874177243714856e-06, + "loss": 17.2165, + "step": 21685 + }, + { + "epoch": 0.396402654139324, + "grad_norm": 6.062006121777724, + "learning_rate": 6.8739028106918135e-06, + "loss": 17.4607, + "step": 21686 + }, + { + "epoch": 0.39642093333577055, + "grad_norm": 7.17781354196199, + "learning_rate": 6.873628371100906e-06, + "loss": 18.1326, + "step": 21687 + }, + { + "epoch": 0.3964392125322171, + "grad_norm": 7.301042531092424, + "learning_rate": 6.873353924943091e-06, + "loss": 17.8408, + "step": 21688 + }, + { + "epoch": 0.3964574917286636, + "grad_norm": 7.357280041350993, + "learning_rate": 6.873079472219335e-06, + "loss": 18.2311, + "step": 21689 + }, + { + "epoch": 0.3964757709251101, + "grad_norm": 5.448759267322615, + "learning_rate": 6.8728050129305975e-06, + "loss": 17.1483, + "step": 21690 + }, + { + "epoch": 0.39649405012155664, + "grad_norm": 5.594364101224168, + "learning_rate": 6.872530547077841e-06, + "loss": 17.3468, + "step": 21691 + }, + { + "epoch": 0.3965123293180032, + "grad_norm": 5.505243172890195, + "learning_rate": 6.872256074662027e-06, + "loss": 17.0655, + "step": 21692 + }, + { + "epoch": 0.3965306085144497, + "grad_norm": 5.557656220302946, + "learning_rate": 6.8719815956841196e-06, + "loss": 17.3279, + "step": 21693 + }, + { + "epoch": 0.39654888771089625, + "grad_norm": 6.88965028193292, + "learning_rate": 6.8717071101450785e-06, + "loss": 17.6607, + "step": 21694 + }, + { + "epoch": 0.3965671669073427, + "grad_norm": 5.57957657299959, + "learning_rate": 6.871432618045864e-06, + "loss": 17.1442, + "step": 21695 + }, + { + "epoch": 0.39658544610378926, + "grad_norm": 6.508591444589206, + "learning_rate": 6.871158119387442e-06, + "loss": 17.5329, + "step": 21696 + }, + { + "epoch": 0.3966037253002358, + "grad_norm": 7.373474480849557, + "learning_rate": 6.870883614170774e-06, + "loss": 17.3962, + "step": 21697 + }, + { + "epoch": 0.39662200449668233, + "grad_norm": 5.911328579089407, + "learning_rate": 6.8706091023968215e-06, + "loss": 17.3305, + "step": 21698 + }, + { + "epoch": 0.39664028369312887, + "grad_norm": 5.494601161598914, + "learning_rate": 6.870334584066546e-06, + "loss": 17.2719, + "step": 21699 + }, + { + "epoch": 0.39665856288957535, + "grad_norm": 7.0876504129624385, + "learning_rate": 6.87006005918091e-06, + "loss": 17.8883, + "step": 21700 + }, + { + "epoch": 0.3966768420860219, + "grad_norm": 6.69949522211074, + "learning_rate": 6.8697855277408756e-06, + "loss": 17.6359, + "step": 21701 + }, + { + "epoch": 0.3966951212824684, + "grad_norm": 7.254559317539576, + "learning_rate": 6.869510989747404e-06, + "loss": 17.7826, + "step": 21702 + }, + { + "epoch": 0.39671340047891496, + "grad_norm": 7.387419531232121, + "learning_rate": 6.869236445201462e-06, + "loss": 17.9149, + "step": 21703 + }, + { + "epoch": 0.3967316796753615, + "grad_norm": 6.903547161426229, + "learning_rate": 6.8689618941040045e-06, + "loss": 17.7069, + "step": 21704 + }, + { + "epoch": 0.396749958871808, + "grad_norm": 6.6084817588078755, + "learning_rate": 6.868687336455999e-06, + "loss": 17.6637, + "step": 21705 + }, + { + "epoch": 0.3967682380682545, + "grad_norm": 6.407653100612558, + "learning_rate": 6.868412772258407e-06, + "loss": 17.4011, + "step": 21706 + }, + { + "epoch": 0.39678651726470104, + "grad_norm": 6.483539091650716, + "learning_rate": 6.868138201512191e-06, + "loss": 17.5027, + "step": 21707 + }, + { + "epoch": 0.3968047964611476, + "grad_norm": 6.656166325296128, + "learning_rate": 6.867863624218313e-06, + "loss": 17.9308, + "step": 21708 + }, + { + "epoch": 0.3968230756575941, + "grad_norm": 6.37100398672029, + "learning_rate": 6.867589040377734e-06, + "loss": 17.5063, + "step": 21709 + }, + { + "epoch": 0.3968413548540406, + "grad_norm": 6.419081622356355, + "learning_rate": 6.867314449991418e-06, + "loss": 17.6277, + "step": 21710 + }, + { + "epoch": 0.39685963405048713, + "grad_norm": 7.351393233610686, + "learning_rate": 6.867039853060326e-06, + "loss": 17.8415, + "step": 21711 + }, + { + "epoch": 0.39687791324693367, + "grad_norm": 8.340268986995127, + "learning_rate": 6.866765249585422e-06, + "loss": 18.6467, + "step": 21712 + }, + { + "epoch": 0.3968961924433802, + "grad_norm": 5.347399172394467, + "learning_rate": 6.866490639567667e-06, + "loss": 16.9904, + "step": 21713 + }, + { + "epoch": 0.39691447163982674, + "grad_norm": 6.333002298780255, + "learning_rate": 6.8662160230080254e-06, + "loss": 17.5916, + "step": 21714 + }, + { + "epoch": 0.3969327508362732, + "grad_norm": 6.659012769856985, + "learning_rate": 6.8659413999074574e-06, + "loss": 17.5251, + "step": 21715 + }, + { + "epoch": 0.39695103003271975, + "grad_norm": 6.221801363568825, + "learning_rate": 6.865666770266928e-06, + "loss": 17.5295, + "step": 21716 + }, + { + "epoch": 0.3969693092291663, + "grad_norm": 7.381124587032561, + "learning_rate": 6.865392134087398e-06, + "loss": 17.8972, + "step": 21717 + }, + { + "epoch": 0.3969875884256128, + "grad_norm": 7.784792840951576, + "learning_rate": 6.86511749136983e-06, + "loss": 18.134, + "step": 21718 + }, + { + "epoch": 0.39700586762205936, + "grad_norm": 5.986191625320928, + "learning_rate": 6.864842842115187e-06, + "loss": 17.208, + "step": 21719 + }, + { + "epoch": 0.39702414681850584, + "grad_norm": 6.727676440293558, + "learning_rate": 6.864568186324432e-06, + "loss": 17.936, + "step": 21720 + }, + { + "epoch": 0.3970424260149524, + "grad_norm": 5.727698637854074, + "learning_rate": 6.864293523998529e-06, + "loss": 17.2057, + "step": 21721 + }, + { + "epoch": 0.3970607052113989, + "grad_norm": 6.279105869482758, + "learning_rate": 6.864018855138436e-06, + "loss": 17.5633, + "step": 21722 + }, + { + "epoch": 0.39707898440784545, + "grad_norm": 6.476167616880967, + "learning_rate": 6.86374417974512e-06, + "loss": 17.5121, + "step": 21723 + }, + { + "epoch": 0.39709726360429193, + "grad_norm": 7.9602930994660746, + "learning_rate": 6.8634694978195436e-06, + "loss": 18.1208, + "step": 21724 + }, + { + "epoch": 0.39711554280073846, + "grad_norm": 7.379102487241958, + "learning_rate": 6.863194809362666e-06, + "loss": 17.898, + "step": 21725 + }, + { + "epoch": 0.397133821997185, + "grad_norm": 8.96666518078975, + "learning_rate": 6.862920114375453e-06, + "loss": 18.7539, + "step": 21726 + }, + { + "epoch": 0.39715210119363153, + "grad_norm": 6.3219931753442, + "learning_rate": 6.862645412858867e-06, + "loss": 17.4428, + "step": 21727 + }, + { + "epoch": 0.39717038039007807, + "grad_norm": 6.353760273395351, + "learning_rate": 6.862370704813871e-06, + "loss": 17.3138, + "step": 21728 + }, + { + "epoch": 0.39718865958652455, + "grad_norm": 5.657058150834073, + "learning_rate": 6.862095990241426e-06, + "loss": 17.1173, + "step": 21729 + }, + { + "epoch": 0.3972069387829711, + "grad_norm": 6.049000545032745, + "learning_rate": 6.861821269142498e-06, + "loss": 17.4315, + "step": 21730 + }, + { + "epoch": 0.3972252179794176, + "grad_norm": 8.969556896900917, + "learning_rate": 6.8615465415180475e-06, + "loss": 18.502, + "step": 21731 + }, + { + "epoch": 0.39724349717586416, + "grad_norm": 6.173650364088976, + "learning_rate": 6.861271807369037e-06, + "loss": 17.4822, + "step": 21732 + }, + { + "epoch": 0.3972617763723107, + "grad_norm": 6.580220131505144, + "learning_rate": 6.86099706669643e-06, + "loss": 17.5246, + "step": 21733 + }, + { + "epoch": 0.3972800555687572, + "grad_norm": 6.1567155291020494, + "learning_rate": 6.8607223195011915e-06, + "loss": 17.4647, + "step": 21734 + }, + { + "epoch": 0.3972983347652037, + "grad_norm": 7.324932987615221, + "learning_rate": 6.860447565784281e-06, + "loss": 18.156, + "step": 21735 + }, + { + "epoch": 0.39731661396165024, + "grad_norm": 8.103877960545196, + "learning_rate": 6.8601728055466635e-06, + "loss": 18.2347, + "step": 21736 + }, + { + "epoch": 0.3973348931580968, + "grad_norm": 6.70610024045635, + "learning_rate": 6.859898038789301e-06, + "loss": 17.7807, + "step": 21737 + }, + { + "epoch": 0.3973531723545433, + "grad_norm": 5.7562338435786415, + "learning_rate": 6.85962326551316e-06, + "loss": 17.2446, + "step": 21738 + }, + { + "epoch": 0.3973714515509898, + "grad_norm": 7.755936289396021, + "learning_rate": 6.8593484857192e-06, + "loss": 17.9505, + "step": 21739 + }, + { + "epoch": 0.39738973074743633, + "grad_norm": 6.438809062253919, + "learning_rate": 6.859073699408383e-06, + "loss": 17.6234, + "step": 21740 + }, + { + "epoch": 0.39740800994388287, + "grad_norm": 6.267715065425261, + "learning_rate": 6.858798906581675e-06, + "loss": 17.6766, + "step": 21741 + }, + { + "epoch": 0.3974262891403294, + "grad_norm": 7.4280055700738306, + "learning_rate": 6.858524107240039e-06, + "loss": 17.7466, + "step": 21742 + }, + { + "epoch": 0.39744456833677594, + "grad_norm": 6.236803296639654, + "learning_rate": 6.858249301384437e-06, + "loss": 17.3597, + "step": 21743 + }, + { + "epoch": 0.3974628475332224, + "grad_norm": 6.489969263565319, + "learning_rate": 6.8579744890158305e-06, + "loss": 17.6892, + "step": 21744 + }, + { + "epoch": 0.39748112672966895, + "grad_norm": 5.842214122340492, + "learning_rate": 6.857699670135186e-06, + "loss": 17.2405, + "step": 21745 + }, + { + "epoch": 0.3974994059261155, + "grad_norm": 7.86180178100099, + "learning_rate": 6.857424844743465e-06, + "loss": 18.4659, + "step": 21746 + }, + { + "epoch": 0.397517685122562, + "grad_norm": 7.684834211311984, + "learning_rate": 6.857150012841633e-06, + "loss": 18.1623, + "step": 21747 + }, + { + "epoch": 0.39753596431900856, + "grad_norm": 4.927592761924374, + "learning_rate": 6.8568751744306505e-06, + "loss": 16.9394, + "step": 21748 + }, + { + "epoch": 0.39755424351545504, + "grad_norm": 6.420163071271816, + "learning_rate": 6.85660032951148e-06, + "loss": 17.5458, + "step": 21749 + }, + { + "epoch": 0.3975725227119016, + "grad_norm": 6.5515169109406415, + "learning_rate": 6.856325478085087e-06, + "loss": 17.6797, + "step": 21750 + }, + { + "epoch": 0.3975908019083481, + "grad_norm": 6.616783364994676, + "learning_rate": 6.856050620152435e-06, + "loss": 17.6536, + "step": 21751 + }, + { + "epoch": 0.39760908110479465, + "grad_norm": 6.223348051198621, + "learning_rate": 6.8557757557144874e-06, + "loss": 17.3968, + "step": 21752 + }, + { + "epoch": 0.3976273603012412, + "grad_norm": 6.569699690737651, + "learning_rate": 6.855500884772206e-06, + "loss": 17.5949, + "step": 21753 + }, + { + "epoch": 0.39764563949768766, + "grad_norm": 6.85853425702792, + "learning_rate": 6.855226007326554e-06, + "loss": 17.9053, + "step": 21754 + }, + { + "epoch": 0.3976639186941342, + "grad_norm": 6.80916540211321, + "learning_rate": 6.854951123378497e-06, + "loss": 17.6419, + "step": 21755 + }, + { + "epoch": 0.39768219789058074, + "grad_norm": 5.705231847810381, + "learning_rate": 6.854676232928997e-06, + "loss": 17.3183, + "step": 21756 + }, + { + "epoch": 0.39770047708702727, + "grad_norm": 6.397163156391924, + "learning_rate": 6.854401335979019e-06, + "loss": 17.4293, + "step": 21757 + }, + { + "epoch": 0.39771875628347375, + "grad_norm": 7.026852961160377, + "learning_rate": 6.854126432529523e-06, + "loss": 17.9778, + "step": 21758 + }, + { + "epoch": 0.3977370354799203, + "grad_norm": 6.489656750077679, + "learning_rate": 6.853851522581476e-06, + "loss": 17.6702, + "step": 21759 + }, + { + "epoch": 0.3977553146763668, + "grad_norm": 5.046174196276279, + "learning_rate": 6.85357660613584e-06, + "loss": 16.9933, + "step": 21760 + }, + { + "epoch": 0.39777359387281336, + "grad_norm": 5.231458690561238, + "learning_rate": 6.853301683193579e-06, + "loss": 17.0756, + "step": 21761 + }, + { + "epoch": 0.3977918730692599, + "grad_norm": 5.865646842255912, + "learning_rate": 6.853026753755656e-06, + "loss": 17.4636, + "step": 21762 + }, + { + "epoch": 0.3978101522657064, + "grad_norm": 5.577294626414456, + "learning_rate": 6.852751817823035e-06, + "loss": 17.1527, + "step": 21763 + }, + { + "epoch": 0.3978284314621529, + "grad_norm": 6.030721027048702, + "learning_rate": 6.85247687539668e-06, + "loss": 17.4301, + "step": 21764 + }, + { + "epoch": 0.39784671065859945, + "grad_norm": 5.51079709212565, + "learning_rate": 6.8522019264775544e-06, + "loss": 17.3503, + "step": 21765 + }, + { + "epoch": 0.397864989855046, + "grad_norm": 6.2641449921534775, + "learning_rate": 6.851926971066623e-06, + "loss": 17.3794, + "step": 21766 + }, + { + "epoch": 0.3978832690514925, + "grad_norm": 6.393330466474008, + "learning_rate": 6.851652009164846e-06, + "loss": 17.3104, + "step": 21767 + }, + { + "epoch": 0.397901548247939, + "grad_norm": 5.537558818735297, + "learning_rate": 6.851377040773189e-06, + "loss": 17.3696, + "step": 21768 + }, + { + "epoch": 0.39791982744438553, + "grad_norm": 6.64380040405659, + "learning_rate": 6.851102065892618e-06, + "loss": 17.5596, + "step": 21769 + }, + { + "epoch": 0.39793810664083207, + "grad_norm": 6.084923088989286, + "learning_rate": 6.850827084524094e-06, + "loss": 17.4808, + "step": 21770 + }, + { + "epoch": 0.3979563858372786, + "grad_norm": 6.786046481175895, + "learning_rate": 6.850552096668583e-06, + "loss": 16.8464, + "step": 21771 + }, + { + "epoch": 0.39797466503372514, + "grad_norm": 7.921359138492886, + "learning_rate": 6.850277102327047e-06, + "loss": 17.8138, + "step": 21772 + }, + { + "epoch": 0.3979929442301716, + "grad_norm": 6.3057469313954275, + "learning_rate": 6.8500021015004495e-06, + "loss": 17.6966, + "step": 21773 + }, + { + "epoch": 0.39801122342661815, + "grad_norm": 5.978307516896791, + "learning_rate": 6.849727094189755e-06, + "loss": 17.4407, + "step": 21774 + }, + { + "epoch": 0.3980295026230647, + "grad_norm": 5.361668479899914, + "learning_rate": 6.849452080395928e-06, + "loss": 16.9982, + "step": 21775 + }, + { + "epoch": 0.3980477818195112, + "grad_norm": 5.436084862090296, + "learning_rate": 6.849177060119931e-06, + "loss": 17.0593, + "step": 21776 + }, + { + "epoch": 0.39806606101595776, + "grad_norm": 7.219011796924425, + "learning_rate": 6.84890203336273e-06, + "loss": 17.4976, + "step": 21777 + }, + { + "epoch": 0.39808434021240424, + "grad_norm": 6.542009008742553, + "learning_rate": 6.848627000125288e-06, + "loss": 17.3236, + "step": 21778 + }, + { + "epoch": 0.3981026194088508, + "grad_norm": 6.471159757398091, + "learning_rate": 6.8483519604085695e-06, + "loss": 17.8908, + "step": 21779 + }, + { + "epoch": 0.3981208986052973, + "grad_norm": 8.547507575517846, + "learning_rate": 6.848076914213536e-06, + "loss": 18.24, + "step": 21780 + }, + { + "epoch": 0.39813917780174385, + "grad_norm": 5.05408154870597, + "learning_rate": 6.847801861541154e-06, + "loss": 16.8201, + "step": 21781 + }, + { + "epoch": 0.3981574569981904, + "grad_norm": 6.572162405922015, + "learning_rate": 6.847526802392386e-06, + "loss": 17.6547, + "step": 21782 + }, + { + "epoch": 0.39817573619463686, + "grad_norm": 5.963744775318282, + "learning_rate": 6.847251736768199e-06, + "loss": 17.384, + "step": 21783 + }, + { + "epoch": 0.3981940153910834, + "grad_norm": 5.2809965346776515, + "learning_rate": 6.846976664669553e-06, + "loss": 17.0352, + "step": 21784 + }, + { + "epoch": 0.39821229458752994, + "grad_norm": 7.468870171175749, + "learning_rate": 6.846701586097415e-06, + "loss": 17.8976, + "step": 21785 + }, + { + "epoch": 0.39823057378397647, + "grad_norm": 5.501253658388204, + "learning_rate": 6.846426501052748e-06, + "loss": 17.2988, + "step": 21786 + }, + { + "epoch": 0.398248852980423, + "grad_norm": 8.552318000196248, + "learning_rate": 6.846151409536516e-06, + "loss": 18.2101, + "step": 21787 + }, + { + "epoch": 0.3982671321768695, + "grad_norm": 5.161676492978516, + "learning_rate": 6.845876311549684e-06, + "loss": 17.0767, + "step": 21788 + }, + { + "epoch": 0.398285411373316, + "grad_norm": 6.1466095472310425, + "learning_rate": 6.845601207093215e-06, + "loss": 17.4433, + "step": 21789 + }, + { + "epoch": 0.39830369056976256, + "grad_norm": 6.246326853145241, + "learning_rate": 6.845326096168074e-06, + "loss": 17.5201, + "step": 21790 + }, + { + "epoch": 0.3983219697662091, + "grad_norm": 7.085188300824559, + "learning_rate": 6.8450509787752255e-06, + "loss": 18.0179, + "step": 21791 + }, + { + "epoch": 0.3983402489626556, + "grad_norm": 6.9875426997890075, + "learning_rate": 6.844775854915633e-06, + "loss": 17.6406, + "step": 21792 + }, + { + "epoch": 0.3983585281591021, + "grad_norm": 4.239942086341225, + "learning_rate": 6.8445007245902625e-06, + "loss": 16.7594, + "step": 21793 + }, + { + "epoch": 0.39837680735554865, + "grad_norm": 6.171533553468372, + "learning_rate": 6.844225587800077e-06, + "loss": 17.6613, + "step": 21794 + }, + { + "epoch": 0.3983950865519952, + "grad_norm": 6.560016173062775, + "learning_rate": 6.843950444546039e-06, + "loss": 17.5954, + "step": 21795 + }, + { + "epoch": 0.3984133657484417, + "grad_norm": 6.237759203957921, + "learning_rate": 6.843675294829115e-06, + "loss": 17.7816, + "step": 21796 + }, + { + "epoch": 0.3984316449448882, + "grad_norm": 7.600823146795452, + "learning_rate": 6.843400138650271e-06, + "loss": 17.7881, + "step": 21797 + }, + { + "epoch": 0.39844992414133473, + "grad_norm": 8.109419437101522, + "learning_rate": 6.843124976010469e-06, + "loss": 17.6862, + "step": 21798 + }, + { + "epoch": 0.39846820333778127, + "grad_norm": 6.655250879466462, + "learning_rate": 6.842849806910672e-06, + "loss": 17.8075, + "step": 21799 + }, + { + "epoch": 0.3984864825342278, + "grad_norm": 5.3213564636102735, + "learning_rate": 6.8425746313518485e-06, + "loss": 17.0101, + "step": 21800 + }, + { + "epoch": 0.39850476173067434, + "grad_norm": 8.147329827383535, + "learning_rate": 6.842299449334959e-06, + "loss": 17.7536, + "step": 21801 + }, + { + "epoch": 0.3985230409271208, + "grad_norm": 5.804868824451375, + "learning_rate": 6.842024260860971e-06, + "loss": 17.4383, + "step": 21802 + }, + { + "epoch": 0.39854132012356736, + "grad_norm": 6.1724928884144665, + "learning_rate": 6.841749065930847e-06, + "loss": 17.2015, + "step": 21803 + }, + { + "epoch": 0.3985595993200139, + "grad_norm": 6.715634459644148, + "learning_rate": 6.841473864545553e-06, + "loss": 17.6493, + "step": 21804 + }, + { + "epoch": 0.3985778785164604, + "grad_norm": 6.679022305429887, + "learning_rate": 6.841198656706053e-06, + "loss": 17.8111, + "step": 21805 + }, + { + "epoch": 0.39859615771290696, + "grad_norm": 7.257315299953628, + "learning_rate": 6.840923442413311e-06, + "loss": 17.1689, + "step": 21806 + }, + { + "epoch": 0.39861443690935344, + "grad_norm": 8.573541639910738, + "learning_rate": 6.8406482216682934e-06, + "loss": 18.0391, + "step": 21807 + }, + { + "epoch": 0.3986327161058, + "grad_norm": 6.4849479840258555, + "learning_rate": 6.840372994471961e-06, + "loss": 17.541, + "step": 21808 + }, + { + "epoch": 0.3986509953022465, + "grad_norm": 8.349189848762084, + "learning_rate": 6.8400977608252814e-06, + "loss": 18.3481, + "step": 21809 + }, + { + "epoch": 0.39866927449869305, + "grad_norm": 10.121523300115816, + "learning_rate": 6.839822520729221e-06, + "loss": 18.6284, + "step": 21810 + }, + { + "epoch": 0.3986875536951396, + "grad_norm": 7.817159093429263, + "learning_rate": 6.839547274184741e-06, + "loss": 17.7238, + "step": 21811 + }, + { + "epoch": 0.39870583289158606, + "grad_norm": 7.011889955934559, + "learning_rate": 6.8392720211928075e-06, + "loss": 17.7431, + "step": 21812 + }, + { + "epoch": 0.3987241120880326, + "grad_norm": 6.156123959449216, + "learning_rate": 6.838996761754384e-06, + "loss": 17.685, + "step": 21813 + }, + { + "epoch": 0.39874239128447914, + "grad_norm": 6.015380396421697, + "learning_rate": 6.838721495870438e-06, + "loss": 17.2928, + "step": 21814 + }, + { + "epoch": 0.39876067048092567, + "grad_norm": 5.2348778933493145, + "learning_rate": 6.838446223541933e-06, + "loss": 17.0707, + "step": 21815 + }, + { + "epoch": 0.3987789496773722, + "grad_norm": 5.51539712070171, + "learning_rate": 6.838170944769833e-06, + "loss": 17.2077, + "step": 21816 + }, + { + "epoch": 0.3987972288738187, + "grad_norm": 6.562161452212529, + "learning_rate": 6.837895659555103e-06, + "loss": 17.5551, + "step": 21817 + }, + { + "epoch": 0.3988155080702652, + "grad_norm": 5.654587977776979, + "learning_rate": 6.837620367898708e-06, + "loss": 17.2969, + "step": 21818 + }, + { + "epoch": 0.39883378726671176, + "grad_norm": 7.475892527216948, + "learning_rate": 6.837345069801613e-06, + "loss": 17.2382, + "step": 21819 + }, + { + "epoch": 0.3988520664631583, + "grad_norm": 7.995946912514331, + "learning_rate": 6.837069765264783e-06, + "loss": 18.3482, + "step": 21820 + }, + { + "epoch": 0.39887034565960483, + "grad_norm": 7.497912190028149, + "learning_rate": 6.8367944542891854e-06, + "loss": 18.0156, + "step": 21821 + }, + { + "epoch": 0.3988886248560513, + "grad_norm": 6.730666641172929, + "learning_rate": 6.836519136875779e-06, + "loss": 17.6704, + "step": 21822 + }, + { + "epoch": 0.39890690405249785, + "grad_norm": 6.453252477793417, + "learning_rate": 6.836243813025534e-06, + "loss": 17.8398, + "step": 21823 + }, + { + "epoch": 0.3989251832489444, + "grad_norm": 6.75137426284157, + "learning_rate": 6.835968482739415e-06, + "loss": 17.5854, + "step": 21824 + }, + { + "epoch": 0.3989434624453909, + "grad_norm": 8.37417003563625, + "learning_rate": 6.835693146018384e-06, + "loss": 18.5165, + "step": 21825 + }, + { + "epoch": 0.3989617416418374, + "grad_norm": 6.2616493045157124, + "learning_rate": 6.8354178028634084e-06, + "loss": 17.6568, + "step": 21826 + }, + { + "epoch": 0.39898002083828393, + "grad_norm": 8.738659647010948, + "learning_rate": 6.8351424532754515e-06, + "loss": 18.6223, + "step": 21827 + }, + { + "epoch": 0.39899830003473047, + "grad_norm": 6.4634686275494095, + "learning_rate": 6.834867097255482e-06, + "loss": 17.3887, + "step": 21828 + }, + { + "epoch": 0.399016579231177, + "grad_norm": 6.354428864865202, + "learning_rate": 6.834591734804461e-06, + "loss": 17.5772, + "step": 21829 + }, + { + "epoch": 0.39903485842762354, + "grad_norm": 7.608988584982392, + "learning_rate": 6.834316365923355e-06, + "loss": 18.2136, + "step": 21830 + }, + { + "epoch": 0.39905313762407, + "grad_norm": 5.989923457364409, + "learning_rate": 6.834040990613129e-06, + "loss": 17.2518, + "step": 21831 + }, + { + "epoch": 0.39907141682051656, + "grad_norm": 7.747319358810052, + "learning_rate": 6.83376560887475e-06, + "loss": 18.2965, + "step": 21832 + }, + { + "epoch": 0.3990896960169631, + "grad_norm": 7.6421741538121095, + "learning_rate": 6.8334902207091804e-06, + "loss": 17.9195, + "step": 21833 + }, + { + "epoch": 0.3991079752134096, + "grad_norm": 6.326266384956976, + "learning_rate": 6.833214826117387e-06, + "loss": 17.6251, + "step": 21834 + }, + { + "epoch": 0.39912625440985616, + "grad_norm": 5.960369971268348, + "learning_rate": 6.832939425100336e-06, + "loss": 17.3625, + "step": 21835 + }, + { + "epoch": 0.39914453360630264, + "grad_norm": 5.766202353274833, + "learning_rate": 6.832664017658988e-06, + "loss": 17.0969, + "step": 21836 + }, + { + "epoch": 0.3991628128027492, + "grad_norm": 7.035487552730544, + "learning_rate": 6.832388603794315e-06, + "loss": 17.8479, + "step": 21837 + }, + { + "epoch": 0.3991810919991957, + "grad_norm": 5.930590917364918, + "learning_rate": 6.832113183507278e-06, + "loss": 17.346, + "step": 21838 + }, + { + "epoch": 0.39919937119564225, + "grad_norm": 5.197521251504096, + "learning_rate": 6.831837756798842e-06, + "loss": 16.8392, + "step": 21839 + }, + { + "epoch": 0.3992176503920888, + "grad_norm": 5.994831294123699, + "learning_rate": 6.831562323669976e-06, + "loss": 17.6677, + "step": 21840 + }, + { + "epoch": 0.39923592958853527, + "grad_norm": 8.895594948925588, + "learning_rate": 6.831286884121642e-06, + "loss": 17.7985, + "step": 21841 + }, + { + "epoch": 0.3992542087849818, + "grad_norm": 6.367047764293951, + "learning_rate": 6.831011438154805e-06, + "loss": 17.4813, + "step": 21842 + }, + { + "epoch": 0.39927248798142834, + "grad_norm": 6.123505316100374, + "learning_rate": 6.8307359857704324e-06, + "loss": 17.084, + "step": 21843 + }, + { + "epoch": 0.3992907671778749, + "grad_norm": 4.732171505471359, + "learning_rate": 6.8304605269694904e-06, + "loss": 16.8162, + "step": 21844 + }, + { + "epoch": 0.3993090463743214, + "grad_norm": 6.73229843461191, + "learning_rate": 6.8301850617529405e-06, + "loss": 17.8678, + "step": 21845 + }, + { + "epoch": 0.3993273255707679, + "grad_norm": 7.1852536067024175, + "learning_rate": 6.829909590121752e-06, + "loss": 18.1702, + "step": 21846 + }, + { + "epoch": 0.3993456047672144, + "grad_norm": 7.180394346522405, + "learning_rate": 6.82963411207689e-06, + "loss": 17.9493, + "step": 21847 + }, + { + "epoch": 0.39936388396366096, + "grad_norm": 4.71416536056585, + "learning_rate": 6.82935862761932e-06, + "loss": 16.8741, + "step": 21848 + }, + { + "epoch": 0.3993821631601075, + "grad_norm": 6.544736596933013, + "learning_rate": 6.8290831367500055e-06, + "loss": 17.6977, + "step": 21849 + }, + { + "epoch": 0.39940044235655403, + "grad_norm": 5.202792661730387, + "learning_rate": 6.828807639469914e-06, + "loss": 17.0273, + "step": 21850 + }, + { + "epoch": 0.3994187215530005, + "grad_norm": 5.843070617820656, + "learning_rate": 6.828532135780008e-06, + "loss": 17.3643, + "step": 21851 + }, + { + "epoch": 0.39943700074944705, + "grad_norm": 7.029189764478648, + "learning_rate": 6.8282566256812584e-06, + "loss": 17.6621, + "step": 21852 + }, + { + "epoch": 0.3994552799458936, + "grad_norm": 10.042292797677177, + "learning_rate": 6.827981109174627e-06, + "loss": 18.8844, + "step": 21853 + }, + { + "epoch": 0.3994735591423401, + "grad_norm": 5.584993858822853, + "learning_rate": 6.82770558626108e-06, + "loss": 17.2078, + "step": 21854 + }, + { + "epoch": 0.39949183833878665, + "grad_norm": 7.134501801572942, + "learning_rate": 6.8274300569415845e-06, + "loss": 17.7828, + "step": 21855 + }, + { + "epoch": 0.39951011753523313, + "grad_norm": 6.869150866445629, + "learning_rate": 6.827154521217104e-06, + "loss": 17.4381, + "step": 21856 + }, + { + "epoch": 0.39952839673167967, + "grad_norm": 6.065259526390333, + "learning_rate": 6.826878979088607e-06, + "loss": 17.2889, + "step": 21857 + }, + { + "epoch": 0.3995466759281262, + "grad_norm": 7.516231241164341, + "learning_rate": 6.826603430557056e-06, + "loss": 18.2564, + "step": 21858 + }, + { + "epoch": 0.39956495512457274, + "grad_norm": 7.000265027217838, + "learning_rate": 6.826327875623419e-06, + "loss": 17.8884, + "step": 21859 + }, + { + "epoch": 0.3995832343210192, + "grad_norm": 5.977487675097315, + "learning_rate": 6.826052314288662e-06, + "loss": 17.355, + "step": 21860 + }, + { + "epoch": 0.39960151351746576, + "grad_norm": 6.090132413359348, + "learning_rate": 6.8257767465537496e-06, + "loss": 17.4178, + "step": 21861 + }, + { + "epoch": 0.3996197927139123, + "grad_norm": 6.309985701706487, + "learning_rate": 6.825501172419649e-06, + "loss": 17.4804, + "step": 21862 + }, + { + "epoch": 0.3996380719103588, + "grad_norm": 6.169446371465011, + "learning_rate": 6.825225591887323e-06, + "loss": 17.4456, + "step": 21863 + }, + { + "epoch": 0.39965635110680536, + "grad_norm": 5.208395078947912, + "learning_rate": 6.824950004957741e-06, + "loss": 16.8539, + "step": 21864 + }, + { + "epoch": 0.39967463030325184, + "grad_norm": 6.798308654497195, + "learning_rate": 6.824674411631868e-06, + "loss": 17.6094, + "step": 21865 + }, + { + "epoch": 0.3996929094996984, + "grad_norm": 6.468971865610881, + "learning_rate": 6.82439881191067e-06, + "loss": 17.5193, + "step": 21866 + }, + { + "epoch": 0.3997111886961449, + "grad_norm": 6.129154578278078, + "learning_rate": 6.824123205795111e-06, + "loss": 17.3764, + "step": 21867 + }, + { + "epoch": 0.39972946789259145, + "grad_norm": 6.4687796845339545, + "learning_rate": 6.823847593286159e-06, + "loss": 17.591, + "step": 21868 + }, + { + "epoch": 0.399747747089038, + "grad_norm": 7.197323925496892, + "learning_rate": 6.8235719743847795e-06, + "loss": 17.6638, + "step": 21869 + }, + { + "epoch": 0.39976602628548447, + "grad_norm": 5.736916691002986, + "learning_rate": 6.823296349091939e-06, + "loss": 17.1347, + "step": 21870 + }, + { + "epoch": 0.399784305481931, + "grad_norm": 7.0601501110054645, + "learning_rate": 6.823020717408603e-06, + "loss": 17.9473, + "step": 21871 + }, + { + "epoch": 0.39980258467837754, + "grad_norm": 7.433267916188017, + "learning_rate": 6.822745079335736e-06, + "loss": 17.7632, + "step": 21872 + }, + { + "epoch": 0.3998208638748241, + "grad_norm": 5.870480293815363, + "learning_rate": 6.822469434874307e-06, + "loss": 17.1841, + "step": 21873 + }, + { + "epoch": 0.3998391430712706, + "grad_norm": 6.783657703056416, + "learning_rate": 6.8221937840252805e-06, + "loss": 17.1817, + "step": 21874 + }, + { + "epoch": 0.3998574222677171, + "grad_norm": 8.472302679485521, + "learning_rate": 6.8219181267896225e-06, + "loss": 18.1618, + "step": 21875 + }, + { + "epoch": 0.3998757014641636, + "grad_norm": 7.146008847435934, + "learning_rate": 6.821642463168301e-06, + "loss": 18.0327, + "step": 21876 + }, + { + "epoch": 0.39989398066061016, + "grad_norm": 6.834847710285364, + "learning_rate": 6.821366793162279e-06, + "loss": 17.6565, + "step": 21877 + }, + { + "epoch": 0.3999122598570567, + "grad_norm": 5.498396193659899, + "learning_rate": 6.8210911167725256e-06, + "loss": 17.1138, + "step": 21878 + }, + { + "epoch": 0.39993053905350323, + "grad_norm": 6.8552746884165146, + "learning_rate": 6.820815434000007e-06, + "loss": 17.6906, + "step": 21879 + }, + { + "epoch": 0.3999488182499497, + "grad_norm": 6.639894723167643, + "learning_rate": 6.8205397448456865e-06, + "loss": 17.4614, + "step": 21880 + }, + { + "epoch": 0.39996709744639625, + "grad_norm": 6.37637902059711, + "learning_rate": 6.820264049310531e-06, + "loss": 17.404, + "step": 21881 + }, + { + "epoch": 0.3999853766428428, + "grad_norm": 6.069914179531985, + "learning_rate": 6.8199883473955094e-06, + "loss": 17.3451, + "step": 21882 + }, + { + "epoch": 0.4000036558392893, + "grad_norm": 7.420894879857651, + "learning_rate": 6.819712639101588e-06, + "loss": 18.2089, + "step": 21883 + }, + { + "epoch": 0.40002193503573585, + "grad_norm": 8.62891725858039, + "learning_rate": 6.8194369244297294e-06, + "loss": 18.5057, + "step": 21884 + }, + { + "epoch": 0.40004021423218233, + "grad_norm": 6.593069105903263, + "learning_rate": 6.819161203380903e-06, + "loss": 17.5398, + "step": 21885 + }, + { + "epoch": 0.40005849342862887, + "grad_norm": 7.625733575239709, + "learning_rate": 6.818885475956075e-06, + "loss": 18.1123, + "step": 21886 + }, + { + "epoch": 0.4000767726250754, + "grad_norm": 6.542194823489286, + "learning_rate": 6.81860974215621e-06, + "loss": 17.9121, + "step": 21887 + }, + { + "epoch": 0.40009505182152194, + "grad_norm": 5.649682944877462, + "learning_rate": 6.8183340019822774e-06, + "loss": 17.164, + "step": 21888 + }, + { + "epoch": 0.4001133310179685, + "grad_norm": 8.18719447903527, + "learning_rate": 6.81805825543524e-06, + "loss": 18.4074, + "step": 21889 + }, + { + "epoch": 0.40013161021441496, + "grad_norm": 5.971442482873152, + "learning_rate": 6.817782502516068e-06, + "loss": 17.4632, + "step": 21890 + }, + { + "epoch": 0.4001498894108615, + "grad_norm": 7.544173298075092, + "learning_rate": 6.817506743225725e-06, + "loss": 18.1639, + "step": 21891 + }, + { + "epoch": 0.40016816860730803, + "grad_norm": 6.585524425248248, + "learning_rate": 6.817230977565179e-06, + "loss": 17.309, + "step": 21892 + }, + { + "epoch": 0.40018644780375456, + "grad_norm": 6.47608704865308, + "learning_rate": 6.816955205535397e-06, + "loss": 17.5814, + "step": 21893 + }, + { + "epoch": 0.40020472700020104, + "grad_norm": 5.466854866379673, + "learning_rate": 6.8166794271373425e-06, + "loss": 17.2101, + "step": 21894 + }, + { + "epoch": 0.4002230061966476, + "grad_norm": 5.8153603545612915, + "learning_rate": 6.816403642371985e-06, + "loss": 17.5636, + "step": 21895 + }, + { + "epoch": 0.4002412853930941, + "grad_norm": 6.9351026441863315, + "learning_rate": 6.816127851240291e-06, + "loss": 17.5025, + "step": 21896 + }, + { + "epoch": 0.40025956458954065, + "grad_norm": 7.636826122033852, + "learning_rate": 6.815852053743227e-06, + "loss": 18.0502, + "step": 21897 + }, + { + "epoch": 0.4002778437859872, + "grad_norm": 6.464902345553054, + "learning_rate": 6.815576249881758e-06, + "loss": 17.5888, + "step": 21898 + }, + { + "epoch": 0.40029612298243367, + "grad_norm": 6.749419499798225, + "learning_rate": 6.815300439656852e-06, + "loss": 17.5443, + "step": 21899 + }, + { + "epoch": 0.4003144021788802, + "grad_norm": 6.58827908461623, + "learning_rate": 6.815024623069476e-06, + "loss": 17.5441, + "step": 21900 + }, + { + "epoch": 0.40033268137532674, + "grad_norm": 5.761502253496959, + "learning_rate": 6.8147488001205965e-06, + "loss": 17.4418, + "step": 21901 + }, + { + "epoch": 0.4003509605717733, + "grad_norm": 6.863697161354168, + "learning_rate": 6.814472970811179e-06, + "loss": 17.8328, + "step": 21902 + }, + { + "epoch": 0.4003692397682198, + "grad_norm": 5.95862440492216, + "learning_rate": 6.814197135142191e-06, + "loss": 17.1633, + "step": 21903 + }, + { + "epoch": 0.4003875189646663, + "grad_norm": 6.644524654873513, + "learning_rate": 6.8139212931145995e-06, + "loss": 17.3927, + "step": 21904 + }, + { + "epoch": 0.4004057981611128, + "grad_norm": 6.860794251018615, + "learning_rate": 6.813645444729372e-06, + "loss": 18.0471, + "step": 21905 + }, + { + "epoch": 0.40042407735755936, + "grad_norm": 6.001006930176777, + "learning_rate": 6.813369589987474e-06, + "loss": 17.3834, + "step": 21906 + }, + { + "epoch": 0.4004423565540059, + "grad_norm": 6.18695976509492, + "learning_rate": 6.813093728889874e-06, + "loss": 17.2937, + "step": 21907 + }, + { + "epoch": 0.40046063575045243, + "grad_norm": 6.4642207578544495, + "learning_rate": 6.812817861437536e-06, + "loss": 17.6252, + "step": 21908 + }, + { + "epoch": 0.4004789149468989, + "grad_norm": 6.632278544468539, + "learning_rate": 6.8125419876314295e-06, + "loss": 17.5642, + "step": 21909 + }, + { + "epoch": 0.40049719414334545, + "grad_norm": 7.109454895608768, + "learning_rate": 6.812266107472522e-06, + "loss": 17.7552, + "step": 21910 + }, + { + "epoch": 0.400515473339792, + "grad_norm": 8.508668659805597, + "learning_rate": 6.811990220961779e-06, + "loss": 18.43, + "step": 21911 + }, + { + "epoch": 0.4005337525362385, + "grad_norm": 7.185601574581772, + "learning_rate": 6.811714328100165e-06, + "loss": 17.9087, + "step": 21912 + }, + { + "epoch": 0.40055203173268505, + "grad_norm": 5.920070425212109, + "learning_rate": 6.81143842888865e-06, + "loss": 17.1764, + "step": 21913 + }, + { + "epoch": 0.40057031092913153, + "grad_norm": 6.966815940783827, + "learning_rate": 6.811162523328203e-06, + "loss": 17.8286, + "step": 21914 + }, + { + "epoch": 0.40058859012557807, + "grad_norm": 6.586401880593733, + "learning_rate": 6.810886611419787e-06, + "loss": 17.8632, + "step": 21915 + }, + { + "epoch": 0.4006068693220246, + "grad_norm": 6.063754430848439, + "learning_rate": 6.81061069316437e-06, + "loss": 17.4085, + "step": 21916 + }, + { + "epoch": 0.40062514851847114, + "grad_norm": 6.32509247933547, + "learning_rate": 6.810334768562921e-06, + "loss": 17.5364, + "step": 21917 + }, + { + "epoch": 0.4006434277149177, + "grad_norm": 6.724629586613398, + "learning_rate": 6.8100588376164036e-06, + "loss": 17.5292, + "step": 21918 + }, + { + "epoch": 0.40066170691136416, + "grad_norm": 7.667001702775765, + "learning_rate": 6.809782900325789e-06, + "loss": 18.004, + "step": 21919 + }, + { + "epoch": 0.4006799861078107, + "grad_norm": 7.118537808752815, + "learning_rate": 6.809506956692041e-06, + "loss": 17.9814, + "step": 21920 + }, + { + "epoch": 0.40069826530425723, + "grad_norm": 6.9903226744488745, + "learning_rate": 6.809231006716131e-06, + "loss": 17.4455, + "step": 21921 + }, + { + "epoch": 0.40071654450070376, + "grad_norm": 6.728524686486218, + "learning_rate": 6.808955050399018e-06, + "loss": 17.4874, + "step": 21922 + }, + { + "epoch": 0.4007348236971503, + "grad_norm": 7.310463906228096, + "learning_rate": 6.808679087741679e-06, + "loss": 18.2281, + "step": 21923 + }, + { + "epoch": 0.4007531028935968, + "grad_norm": 7.433994389988249, + "learning_rate": 6.808403118745076e-06, + "loss": 17.8506, + "step": 21924 + }, + { + "epoch": 0.4007713820900433, + "grad_norm": 6.082764194422455, + "learning_rate": 6.808127143410177e-06, + "loss": 17.3403, + "step": 21925 + }, + { + "epoch": 0.40078966128648985, + "grad_norm": 6.904402641846164, + "learning_rate": 6.8078511617379485e-06, + "loss": 17.7018, + "step": 21926 + }, + { + "epoch": 0.4008079404829364, + "grad_norm": 7.1843567484424575, + "learning_rate": 6.8075751737293575e-06, + "loss": 18.1391, + "step": 21927 + }, + { + "epoch": 0.40082621967938287, + "grad_norm": 8.693140802240887, + "learning_rate": 6.8072991793853734e-06, + "loss": 17.9093, + "step": 21928 + }, + { + "epoch": 0.4008444988758294, + "grad_norm": 7.323779640948805, + "learning_rate": 6.807023178706964e-06, + "loss": 18.4544, + "step": 21929 + }, + { + "epoch": 0.40086277807227594, + "grad_norm": 8.73760270559256, + "learning_rate": 6.8067471716950935e-06, + "loss": 18.1625, + "step": 21930 + }, + { + "epoch": 0.4008810572687225, + "grad_norm": 8.111036553412072, + "learning_rate": 6.8064711583507315e-06, + "loss": 18.7604, + "step": 21931 + }, + { + "epoch": 0.400899336465169, + "grad_norm": 7.337649671092504, + "learning_rate": 6.806195138674845e-06, + "loss": 17.4618, + "step": 21932 + }, + { + "epoch": 0.4009176156616155, + "grad_norm": 6.14522667941267, + "learning_rate": 6.8059191126684e-06, + "loss": 17.2946, + "step": 21933 + }, + { + "epoch": 0.400935894858062, + "grad_norm": 7.186178180742369, + "learning_rate": 6.805643080332366e-06, + "loss": 17.9071, + "step": 21934 + }, + { + "epoch": 0.40095417405450856, + "grad_norm": 7.029971897302851, + "learning_rate": 6.80536704166771e-06, + "loss": 17.6502, + "step": 21935 + }, + { + "epoch": 0.4009724532509551, + "grad_norm": 9.513560196022905, + "learning_rate": 6.805090996675399e-06, + "loss": 18.5135, + "step": 21936 + }, + { + "epoch": 0.40099073244740163, + "grad_norm": 7.605591732838555, + "learning_rate": 6.804814945356401e-06, + "loss": 17.6754, + "step": 21937 + }, + { + "epoch": 0.4010090116438481, + "grad_norm": 6.2261058022499824, + "learning_rate": 6.804538887711684e-06, + "loss": 17.4833, + "step": 21938 + }, + { + "epoch": 0.40102729084029465, + "grad_norm": 6.843279889744721, + "learning_rate": 6.804262823742214e-06, + "loss": 18.0529, + "step": 21939 + }, + { + "epoch": 0.4010455700367412, + "grad_norm": 6.811844571700934, + "learning_rate": 6.803986753448956e-06, + "loss": 17.7461, + "step": 21940 + }, + { + "epoch": 0.4010638492331877, + "grad_norm": 6.2373444016166015, + "learning_rate": 6.803710676832887e-06, + "loss": 17.7564, + "step": 21941 + }, + { + "epoch": 0.40108212842963425, + "grad_norm": 7.451427159131176, + "learning_rate": 6.803434593894965e-06, + "loss": 18.0171, + "step": 21942 + }, + { + "epoch": 0.40110040762608073, + "grad_norm": 7.006701312109924, + "learning_rate": 6.803158504636162e-06, + "loss": 17.8858, + "step": 21943 + }, + { + "epoch": 0.40111868682252727, + "grad_norm": 5.912711778513443, + "learning_rate": 6.8028824090574455e-06, + "loss": 17.4637, + "step": 21944 + }, + { + "epoch": 0.4011369660189738, + "grad_norm": 5.719679315294928, + "learning_rate": 6.802606307159782e-06, + "loss": 17.2627, + "step": 21945 + }, + { + "epoch": 0.40115524521542034, + "grad_norm": 6.126713852624104, + "learning_rate": 6.802330198944138e-06, + "loss": 17.4433, + "step": 21946 + }, + { + "epoch": 0.4011735244118669, + "grad_norm": 5.778026223407568, + "learning_rate": 6.802054084411486e-06, + "loss": 17.0654, + "step": 21947 + }, + { + "epoch": 0.40119180360831336, + "grad_norm": 6.839335613346102, + "learning_rate": 6.80177796356279e-06, + "loss": 17.4037, + "step": 21948 + }, + { + "epoch": 0.4012100828047599, + "grad_norm": 6.688918250827921, + "learning_rate": 6.801501836399017e-06, + "loss": 17.4062, + "step": 21949 + }, + { + "epoch": 0.40122836200120643, + "grad_norm": 5.431568159693095, + "learning_rate": 6.801225702921138e-06, + "loss": 17.3574, + "step": 21950 + }, + { + "epoch": 0.40124664119765296, + "grad_norm": 6.668323124251579, + "learning_rate": 6.800949563130119e-06, + "loss": 17.6149, + "step": 21951 + }, + { + "epoch": 0.4012649203940995, + "grad_norm": 6.999382372145549, + "learning_rate": 6.8006734170269284e-06, + "loss": 17.7897, + "step": 21952 + }, + { + "epoch": 0.401283199590546, + "grad_norm": 6.190362614469207, + "learning_rate": 6.800397264612533e-06, + "loss": 17.9581, + "step": 21953 + }, + { + "epoch": 0.4013014787869925, + "grad_norm": 5.57772871319978, + "learning_rate": 6.8001211058879e-06, + "loss": 17.3105, + "step": 21954 + }, + { + "epoch": 0.40131975798343905, + "grad_norm": 5.700006419530701, + "learning_rate": 6.799844940854002e-06, + "loss": 17.1836, + "step": 21955 + }, + { + "epoch": 0.4013380371798856, + "grad_norm": 6.858696783415418, + "learning_rate": 6.799568769511802e-06, + "loss": 17.7469, + "step": 21956 + }, + { + "epoch": 0.4013563163763321, + "grad_norm": 6.797137546460257, + "learning_rate": 6.7992925918622696e-06, + "loss": 17.7856, + "step": 21957 + }, + { + "epoch": 0.4013745955727786, + "grad_norm": 5.138139047181146, + "learning_rate": 6.799016407906372e-06, + "loss": 16.875, + "step": 21958 + }, + { + "epoch": 0.40139287476922514, + "grad_norm": 7.769990713000648, + "learning_rate": 6.79874021764508e-06, + "loss": 17.2655, + "step": 21959 + }, + { + "epoch": 0.4014111539656717, + "grad_norm": 5.793944453016079, + "learning_rate": 6.7984640210793586e-06, + "loss": 17.2418, + "step": 21960 + }, + { + "epoch": 0.4014294331621182, + "grad_norm": 6.6119078654034515, + "learning_rate": 6.798187818210176e-06, + "loss": 17.6804, + "step": 21961 + }, + { + "epoch": 0.4014477123585647, + "grad_norm": 6.897665729366046, + "learning_rate": 6.797911609038503e-06, + "loss": 17.5899, + "step": 21962 + }, + { + "epoch": 0.4014659915550112, + "grad_norm": 6.964162749019687, + "learning_rate": 6.797635393565304e-06, + "loss": 17.151, + "step": 21963 + }, + { + "epoch": 0.40148427075145776, + "grad_norm": 5.771019717862557, + "learning_rate": 6.797359171791549e-06, + "loss": 17.306, + "step": 21964 + }, + { + "epoch": 0.4015025499479043, + "grad_norm": 6.331485271180629, + "learning_rate": 6.797082943718207e-06, + "loss": 17.2906, + "step": 21965 + }, + { + "epoch": 0.40152082914435083, + "grad_norm": 5.79659720268587, + "learning_rate": 6.796806709346246e-06, + "loss": 17.4377, + "step": 21966 + }, + { + "epoch": 0.4015391083407973, + "grad_norm": 7.211064151600302, + "learning_rate": 6.796530468676632e-06, + "loss": 17.4476, + "step": 21967 + }, + { + "epoch": 0.40155738753724385, + "grad_norm": 7.455823922053474, + "learning_rate": 6.796254221710335e-06, + "loss": 17.9749, + "step": 21968 + }, + { + "epoch": 0.4015756667336904, + "grad_norm": 5.516866774663918, + "learning_rate": 6.795977968448323e-06, + "loss": 17.2624, + "step": 21969 + }, + { + "epoch": 0.4015939459301369, + "grad_norm": 5.234600543069211, + "learning_rate": 6.795701708891563e-06, + "loss": 17.0726, + "step": 21970 + }, + { + "epoch": 0.40161222512658346, + "grad_norm": 5.393485636893945, + "learning_rate": 6.795425443041026e-06, + "loss": 16.8985, + "step": 21971 + }, + { + "epoch": 0.40163050432302994, + "grad_norm": 7.261848924659315, + "learning_rate": 6.795149170897677e-06, + "loss": 18.2308, + "step": 21972 + }, + { + "epoch": 0.40164878351947647, + "grad_norm": 5.681941304243241, + "learning_rate": 6.794872892462487e-06, + "loss": 17.3867, + "step": 21973 + }, + { + "epoch": 0.401667062715923, + "grad_norm": 6.784338920543298, + "learning_rate": 6.794596607736423e-06, + "loss": 17.6111, + "step": 21974 + }, + { + "epoch": 0.40168534191236954, + "grad_norm": 6.381183413292857, + "learning_rate": 6.794320316720453e-06, + "loss": 17.5785, + "step": 21975 + }, + { + "epoch": 0.4017036211088161, + "grad_norm": 6.472831603511827, + "learning_rate": 6.794044019415547e-06, + "loss": 17.6216, + "step": 21976 + }, + { + "epoch": 0.40172190030526256, + "grad_norm": 8.033889329602548, + "learning_rate": 6.793767715822672e-06, + "loss": 17.621, + "step": 21977 + }, + { + "epoch": 0.4017401795017091, + "grad_norm": 7.652435122747328, + "learning_rate": 6.793491405942797e-06, + "loss": 18.1178, + "step": 21978 + }, + { + "epoch": 0.40175845869815563, + "grad_norm": 5.7795189774144795, + "learning_rate": 6.7932150897768914e-06, + "loss": 17.304, + "step": 21979 + }, + { + "epoch": 0.40177673789460217, + "grad_norm": 7.5296212022012545, + "learning_rate": 6.792938767325921e-06, + "loss": 18.2468, + "step": 21980 + }, + { + "epoch": 0.4017950170910487, + "grad_norm": 7.349797483946474, + "learning_rate": 6.792662438590854e-06, + "loss": 17.8214, + "step": 21981 + }, + { + "epoch": 0.4018132962874952, + "grad_norm": 7.531457348741944, + "learning_rate": 6.792386103572663e-06, + "loss": 17.9705, + "step": 21982 + }, + { + "epoch": 0.4018315754839417, + "grad_norm": 5.983560340242937, + "learning_rate": 6.792109762272315e-06, + "loss": 17.3376, + "step": 21983 + }, + { + "epoch": 0.40184985468038825, + "grad_norm": 8.365451107347186, + "learning_rate": 6.791833414690776e-06, + "loss": 18.2873, + "step": 21984 + }, + { + "epoch": 0.4018681338768348, + "grad_norm": 6.06164443050326, + "learning_rate": 6.791557060829017e-06, + "loss": 17.4937, + "step": 21985 + }, + { + "epoch": 0.4018864130732813, + "grad_norm": 5.389488352470851, + "learning_rate": 6.791280700688006e-06, + "loss": 17.1742, + "step": 21986 + }, + { + "epoch": 0.4019046922697278, + "grad_norm": 6.035099688578071, + "learning_rate": 6.7910043342687124e-06, + "loss": 17.2362, + "step": 21987 + }, + { + "epoch": 0.40192297146617434, + "grad_norm": 6.240104426224746, + "learning_rate": 6.790727961572103e-06, + "loss": 17.3403, + "step": 21988 + }, + { + "epoch": 0.4019412506626209, + "grad_norm": 4.771364297154467, + "learning_rate": 6.790451582599148e-06, + "loss": 16.7517, + "step": 21989 + }, + { + "epoch": 0.4019595298590674, + "grad_norm": 6.414069119573712, + "learning_rate": 6.790175197350814e-06, + "loss": 17.4858, + "step": 21990 + }, + { + "epoch": 0.40197780905551395, + "grad_norm": 5.394892539709728, + "learning_rate": 6.789898805828074e-06, + "loss": 16.9978, + "step": 21991 + }, + { + "epoch": 0.4019960882519604, + "grad_norm": 5.559454598292766, + "learning_rate": 6.789622408031893e-06, + "loss": 17.1492, + "step": 21992 + }, + { + "epoch": 0.40201436744840696, + "grad_norm": 7.353267904120058, + "learning_rate": 6.7893460039632404e-06, + "loss": 18.0896, + "step": 21993 + }, + { + "epoch": 0.4020326466448535, + "grad_norm": 6.918933038129437, + "learning_rate": 6.789069593623085e-06, + "loss": 17.9874, + "step": 21994 + }, + { + "epoch": 0.40205092584130003, + "grad_norm": 5.892259725242188, + "learning_rate": 6.788793177012396e-06, + "loss": 17.4803, + "step": 21995 + }, + { + "epoch": 0.4020692050377465, + "grad_norm": 6.42627628804412, + "learning_rate": 6.788516754132142e-06, + "loss": 17.584, + "step": 21996 + }, + { + "epoch": 0.40208748423419305, + "grad_norm": 6.86353923249927, + "learning_rate": 6.788240324983293e-06, + "loss": 17.9212, + "step": 21997 + }, + { + "epoch": 0.4021057634306396, + "grad_norm": 8.310368469293014, + "learning_rate": 6.7879638895668165e-06, + "loss": 17.9964, + "step": 21998 + }, + { + "epoch": 0.4021240426270861, + "grad_norm": 6.562962555960447, + "learning_rate": 6.787687447883682e-06, + "loss": 17.6401, + "step": 21999 + }, + { + "epoch": 0.40214232182353266, + "grad_norm": 7.473633228695363, + "learning_rate": 6.787410999934857e-06, + "loss": 18.0085, + "step": 22000 + }, + { + "epoch": 0.40216060101997914, + "grad_norm": 5.169612281976841, + "learning_rate": 6.787134545721312e-06, + "loss": 16.9001, + "step": 22001 + }, + { + "epoch": 0.40217888021642567, + "grad_norm": 5.74281798225418, + "learning_rate": 6.786858085244015e-06, + "loss": 17.1053, + "step": 22002 + }, + { + "epoch": 0.4021971594128722, + "grad_norm": 6.11722418767506, + "learning_rate": 6.786581618503936e-06, + "loss": 17.4146, + "step": 22003 + }, + { + "epoch": 0.40221543860931874, + "grad_norm": 6.453345918976262, + "learning_rate": 6.786305145502043e-06, + "loss": 17.4624, + "step": 22004 + }, + { + "epoch": 0.4022337178057653, + "grad_norm": 5.904459705572975, + "learning_rate": 6.786028666239306e-06, + "loss": 17.1647, + "step": 22005 + }, + { + "epoch": 0.40225199700221176, + "grad_norm": 7.64038538335007, + "learning_rate": 6.785752180716694e-06, + "loss": 17.9963, + "step": 22006 + }, + { + "epoch": 0.4022702761986583, + "grad_norm": 6.0691043780964105, + "learning_rate": 6.785475688935176e-06, + "loss": 17.3344, + "step": 22007 + }, + { + "epoch": 0.40228855539510483, + "grad_norm": 7.882165262024196, + "learning_rate": 6.785199190895719e-06, + "loss": 18.2275, + "step": 22008 + }, + { + "epoch": 0.40230683459155137, + "grad_norm": 6.959207615504242, + "learning_rate": 6.784922686599295e-06, + "loss": 17.6494, + "step": 22009 + }, + { + "epoch": 0.4023251137879979, + "grad_norm": 6.859123166489989, + "learning_rate": 6.7846461760468714e-06, + "loss": 17.4516, + "step": 22010 + }, + { + "epoch": 0.4023433929844444, + "grad_norm": 7.235208318542025, + "learning_rate": 6.784369659239418e-06, + "loss": 17.4774, + "step": 22011 + }, + { + "epoch": 0.4023616721808909, + "grad_norm": 5.988047756592985, + "learning_rate": 6.784093136177903e-06, + "loss": 17.3887, + "step": 22012 + }, + { + "epoch": 0.40237995137733745, + "grad_norm": 6.956947575994767, + "learning_rate": 6.783816606863296e-06, + "loss": 17.7987, + "step": 22013 + }, + { + "epoch": 0.402398230573784, + "grad_norm": 7.0798289717982765, + "learning_rate": 6.783540071296568e-06, + "loss": 17.8944, + "step": 22014 + }, + { + "epoch": 0.4024165097702305, + "grad_norm": 6.891298784286834, + "learning_rate": 6.783263529478686e-06, + "loss": 17.8773, + "step": 22015 + }, + { + "epoch": 0.402434788966677, + "grad_norm": 5.762533932607662, + "learning_rate": 6.782986981410621e-06, + "loss": 17.1087, + "step": 22016 + }, + { + "epoch": 0.40245306816312354, + "grad_norm": 8.80927131911007, + "learning_rate": 6.782710427093341e-06, + "loss": 18.2545, + "step": 22017 + }, + { + "epoch": 0.4024713473595701, + "grad_norm": 5.843840912648421, + "learning_rate": 6.782433866527815e-06, + "loss": 17.2093, + "step": 22018 + }, + { + "epoch": 0.4024896265560166, + "grad_norm": 8.975227642255565, + "learning_rate": 6.782157299715013e-06, + "loss": 18.5843, + "step": 22019 + }, + { + "epoch": 0.40250790575246315, + "grad_norm": 6.92250061229194, + "learning_rate": 6.781880726655905e-06, + "loss": 17.6077, + "step": 22020 + }, + { + "epoch": 0.4025261849489096, + "grad_norm": 6.820599787468956, + "learning_rate": 6.7816041473514606e-06, + "loss": 17.688, + "step": 22021 + }, + { + "epoch": 0.40254446414535616, + "grad_norm": 6.747177678529493, + "learning_rate": 6.781327561802645e-06, + "loss": 17.5352, + "step": 22022 + }, + { + "epoch": 0.4025627433418027, + "grad_norm": 5.259387240651166, + "learning_rate": 6.781050970010433e-06, + "loss": 17.1437, + "step": 22023 + }, + { + "epoch": 0.40258102253824923, + "grad_norm": 6.830641097745921, + "learning_rate": 6.780774371975794e-06, + "loss": 17.8107, + "step": 22024 + }, + { + "epoch": 0.40259930173469577, + "grad_norm": 6.203491879441497, + "learning_rate": 6.780497767699692e-06, + "loss": 17.7428, + "step": 22025 + }, + { + "epoch": 0.40261758093114225, + "grad_norm": 7.405202023530535, + "learning_rate": 6.780221157183101e-06, + "loss": 17.8754, + "step": 22026 + }, + { + "epoch": 0.4026358601275888, + "grad_norm": 6.102518656227493, + "learning_rate": 6.779944540426988e-06, + "loss": 17.5551, + "step": 22027 + }, + { + "epoch": 0.4026541393240353, + "grad_norm": 6.421375108809336, + "learning_rate": 6.7796679174323265e-06, + "loss": 17.4135, + "step": 22028 + }, + { + "epoch": 0.40267241852048186, + "grad_norm": 5.960106085602953, + "learning_rate": 6.7793912882000815e-06, + "loss": 17.4722, + "step": 22029 + }, + { + "epoch": 0.40269069771692834, + "grad_norm": 7.350397311128247, + "learning_rate": 6.779114652731224e-06, + "loss": 17.739, + "step": 22030 + }, + { + "epoch": 0.40270897691337487, + "grad_norm": 7.4286504026203595, + "learning_rate": 6.778838011026726e-06, + "loss": 18.2491, + "step": 22031 + }, + { + "epoch": 0.4027272561098214, + "grad_norm": 6.304389322318602, + "learning_rate": 6.778561363087555e-06, + "loss": 17.5771, + "step": 22032 + }, + { + "epoch": 0.40274553530626794, + "grad_norm": 6.152669886805366, + "learning_rate": 6.778284708914679e-06, + "loss": 17.4633, + "step": 22033 + }, + { + "epoch": 0.4027638145027145, + "grad_norm": 6.005907317098, + "learning_rate": 6.778008048509071e-06, + "loss": 17.727, + "step": 22034 + }, + { + "epoch": 0.40278209369916096, + "grad_norm": 6.711324495143864, + "learning_rate": 6.7777313818716974e-06, + "loss": 17.7124, + "step": 22035 + }, + { + "epoch": 0.4028003728956075, + "grad_norm": 6.923061489646611, + "learning_rate": 6.77745470900353e-06, + "loss": 18.0881, + "step": 22036 + }, + { + "epoch": 0.40281865209205403, + "grad_norm": 6.470315405504754, + "learning_rate": 6.777178029905539e-06, + "loss": 17.5918, + "step": 22037 + }, + { + "epoch": 0.40283693128850057, + "grad_norm": 6.143189444036186, + "learning_rate": 6.776901344578694e-06, + "loss": 17.4019, + "step": 22038 + }, + { + "epoch": 0.4028552104849471, + "grad_norm": 6.553847836721387, + "learning_rate": 6.776624653023962e-06, + "loss": 17.5724, + "step": 22039 + }, + { + "epoch": 0.4028734896813936, + "grad_norm": 7.314977848349203, + "learning_rate": 6.776347955242315e-06, + "loss": 17.931, + "step": 22040 + }, + { + "epoch": 0.4028917688778401, + "grad_norm": 7.257845640885772, + "learning_rate": 6.776071251234724e-06, + "loss": 18.0227, + "step": 22041 + }, + { + "epoch": 0.40291004807428665, + "grad_norm": 7.347759081709609, + "learning_rate": 6.7757945410021565e-06, + "loss": 17.9248, + "step": 22042 + }, + { + "epoch": 0.4029283272707332, + "grad_norm": 7.469395251740494, + "learning_rate": 6.775517824545583e-06, + "loss": 17.7553, + "step": 22043 + }, + { + "epoch": 0.4029466064671797, + "grad_norm": 6.61346924708778, + "learning_rate": 6.775241101865975e-06, + "loss": 17.6877, + "step": 22044 + }, + { + "epoch": 0.4029648856636262, + "grad_norm": 9.551444592270448, + "learning_rate": 6.774964372964299e-06, + "loss": 18.2672, + "step": 22045 + }, + { + "epoch": 0.40298316486007274, + "grad_norm": 5.8401528513055885, + "learning_rate": 6.7746876378415286e-06, + "loss": 17.421, + "step": 22046 + }, + { + "epoch": 0.4030014440565193, + "grad_norm": 6.567797173097257, + "learning_rate": 6.77441089649863e-06, + "loss": 17.3372, + "step": 22047 + }, + { + "epoch": 0.4030197232529658, + "grad_norm": 6.330795420549813, + "learning_rate": 6.774134148936578e-06, + "loss": 17.6241, + "step": 22048 + }, + { + "epoch": 0.40303800244941235, + "grad_norm": 4.981431999872036, + "learning_rate": 6.773857395156337e-06, + "loss": 16.9476, + "step": 22049 + }, + { + "epoch": 0.4030562816458588, + "grad_norm": 7.565433370051487, + "learning_rate": 6.7735806351588805e-06, + "loss": 18.1421, + "step": 22050 + }, + { + "epoch": 0.40307456084230536, + "grad_norm": 7.49806402746438, + "learning_rate": 6.773303868945178e-06, + "loss": 18.1754, + "step": 22051 + }, + { + "epoch": 0.4030928400387519, + "grad_norm": 7.030163964983321, + "learning_rate": 6.773027096516201e-06, + "loss": 17.6668, + "step": 22052 + }, + { + "epoch": 0.40311111923519843, + "grad_norm": 7.069973587782146, + "learning_rate": 6.772750317872916e-06, + "loss": 17.7277, + "step": 22053 + }, + { + "epoch": 0.40312939843164497, + "grad_norm": 6.569088556625905, + "learning_rate": 6.772473533016294e-06, + "loss": 17.4102, + "step": 22054 + }, + { + "epoch": 0.40314767762809145, + "grad_norm": 6.114960161641345, + "learning_rate": 6.772196741947308e-06, + "loss": 17.4188, + "step": 22055 + }, + { + "epoch": 0.403165956824538, + "grad_norm": 6.1939415525572254, + "learning_rate": 6.771919944666926e-06, + "loss": 17.467, + "step": 22056 + }, + { + "epoch": 0.4031842360209845, + "grad_norm": 6.43608017716079, + "learning_rate": 6.771643141176118e-06, + "loss": 17.6242, + "step": 22057 + }, + { + "epoch": 0.40320251521743106, + "grad_norm": 6.264800078105397, + "learning_rate": 6.771366331475854e-06, + "loss": 17.4917, + "step": 22058 + }, + { + "epoch": 0.4032207944138776, + "grad_norm": 6.575052766783038, + "learning_rate": 6.771089515567105e-06, + "loss": 17.8639, + "step": 22059 + }, + { + "epoch": 0.4032390736103241, + "grad_norm": 7.931746975453965, + "learning_rate": 6.770812693450841e-06, + "loss": 18.1557, + "step": 22060 + }, + { + "epoch": 0.4032573528067706, + "grad_norm": 4.861686580927656, + "learning_rate": 6.770535865128033e-06, + "loss": 16.9002, + "step": 22061 + }, + { + "epoch": 0.40327563200321714, + "grad_norm": 6.135266402479794, + "learning_rate": 6.7702590305996485e-06, + "loss": 17.1294, + "step": 22062 + }, + { + "epoch": 0.4032939111996637, + "grad_norm": 6.025703055757253, + "learning_rate": 6.769982189866662e-06, + "loss": 17.4299, + "step": 22063 + }, + { + "epoch": 0.40331219039611016, + "grad_norm": 5.818448677965238, + "learning_rate": 6.7697053429300395e-06, + "loss": 17.1443, + "step": 22064 + }, + { + "epoch": 0.4033304695925567, + "grad_norm": 6.500125203523118, + "learning_rate": 6.769428489790755e-06, + "loss": 17.5327, + "step": 22065 + }, + { + "epoch": 0.40334874878900323, + "grad_norm": 6.401033609103858, + "learning_rate": 6.7691516304497775e-06, + "loss": 17.3931, + "step": 22066 + }, + { + "epoch": 0.40336702798544977, + "grad_norm": 7.247341161607492, + "learning_rate": 6.768874764908074e-06, + "loss": 17.8451, + "step": 22067 + }, + { + "epoch": 0.4033853071818963, + "grad_norm": 9.345615282623246, + "learning_rate": 6.7685978931666204e-06, + "loss": 18.7439, + "step": 22068 + }, + { + "epoch": 0.4034035863783428, + "grad_norm": 6.891614580638836, + "learning_rate": 6.768321015226385e-06, + "loss": 17.4206, + "step": 22069 + }, + { + "epoch": 0.4034218655747893, + "grad_norm": 4.931124718854705, + "learning_rate": 6.768044131088337e-06, + "loss": 17.0932, + "step": 22070 + }, + { + "epoch": 0.40344014477123585, + "grad_norm": 6.823202517333984, + "learning_rate": 6.767767240753448e-06, + "loss": 17.6827, + "step": 22071 + }, + { + "epoch": 0.4034584239676824, + "grad_norm": 5.133011928741689, + "learning_rate": 6.767490344222687e-06, + "loss": 16.9361, + "step": 22072 + }, + { + "epoch": 0.4034767031641289, + "grad_norm": 6.51980297676806, + "learning_rate": 6.767213441497028e-06, + "loss": 17.5075, + "step": 22073 + }, + { + "epoch": 0.4034949823605754, + "grad_norm": 6.3817933788838435, + "learning_rate": 6.766936532577438e-06, + "loss": 17.2291, + "step": 22074 + }, + { + "epoch": 0.40351326155702194, + "grad_norm": 6.940379698871074, + "learning_rate": 6.766659617464889e-06, + "loss": 17.7432, + "step": 22075 + }, + { + "epoch": 0.4035315407534685, + "grad_norm": 7.381287030269214, + "learning_rate": 6.766382696160351e-06, + "loss": 17.8526, + "step": 22076 + }, + { + "epoch": 0.403549819949915, + "grad_norm": 7.3041508553010015, + "learning_rate": 6.766105768664795e-06, + "loss": 17.9687, + "step": 22077 + }, + { + "epoch": 0.40356809914636155, + "grad_norm": 6.578276686090134, + "learning_rate": 6.765828834979191e-06, + "loss": 17.6986, + "step": 22078 + }, + { + "epoch": 0.403586378342808, + "grad_norm": 6.286582031636894, + "learning_rate": 6.765551895104512e-06, + "loss": 17.4138, + "step": 22079 + }, + { + "epoch": 0.40360465753925456, + "grad_norm": 6.793606781535274, + "learning_rate": 6.765274949041726e-06, + "loss": 17.7358, + "step": 22080 + }, + { + "epoch": 0.4036229367357011, + "grad_norm": 8.42867156786083, + "learning_rate": 6.764997996791803e-06, + "loss": 17.849, + "step": 22081 + }, + { + "epoch": 0.40364121593214763, + "grad_norm": 6.2925320194200465, + "learning_rate": 6.764721038355716e-06, + "loss": 17.3358, + "step": 22082 + }, + { + "epoch": 0.40365949512859417, + "grad_norm": 6.380862232199877, + "learning_rate": 6.764444073734436e-06, + "loss": 17.5864, + "step": 22083 + }, + { + "epoch": 0.40367777432504065, + "grad_norm": 5.93191903512372, + "learning_rate": 6.764167102928932e-06, + "loss": 17.2688, + "step": 22084 + }, + { + "epoch": 0.4036960535214872, + "grad_norm": 6.916394760447434, + "learning_rate": 6.7638901259401755e-06, + "loss": 17.6485, + "step": 22085 + }, + { + "epoch": 0.4037143327179337, + "grad_norm": 7.711118810005729, + "learning_rate": 6.763613142769137e-06, + "loss": 18.2313, + "step": 22086 + }, + { + "epoch": 0.40373261191438026, + "grad_norm": 7.187299408147425, + "learning_rate": 6.763336153416787e-06, + "loss": 17.5883, + "step": 22087 + }, + { + "epoch": 0.4037508911108268, + "grad_norm": 6.224547537223937, + "learning_rate": 6.763059157884098e-06, + "loss": 17.4435, + "step": 22088 + }, + { + "epoch": 0.4037691703072733, + "grad_norm": 5.817792774023097, + "learning_rate": 6.762782156172037e-06, + "loss": 17.4436, + "step": 22089 + }, + { + "epoch": 0.4037874495037198, + "grad_norm": 7.816523837537163, + "learning_rate": 6.76250514828158e-06, + "loss": 17.8937, + "step": 22090 + }, + { + "epoch": 0.40380572870016634, + "grad_norm": 6.207199183236863, + "learning_rate": 6.762228134213695e-06, + "loss": 17.5285, + "step": 22091 + }, + { + "epoch": 0.4038240078966129, + "grad_norm": 6.839655499113275, + "learning_rate": 6.761951113969353e-06, + "loss": 17.7867, + "step": 22092 + }, + { + "epoch": 0.4038422870930594, + "grad_norm": 7.786225975050639, + "learning_rate": 6.761674087549526e-06, + "loss": 18.0969, + "step": 22093 + }, + { + "epoch": 0.4038605662895059, + "grad_norm": 6.049391074637899, + "learning_rate": 6.761397054955182e-06, + "loss": 17.4082, + "step": 22094 + }, + { + "epoch": 0.40387884548595243, + "grad_norm": 6.449817757699847, + "learning_rate": 6.761120016187296e-06, + "loss": 17.5026, + "step": 22095 + }, + { + "epoch": 0.40389712468239897, + "grad_norm": 6.807746451894597, + "learning_rate": 6.760842971246837e-06, + "loss": 17.4275, + "step": 22096 + }, + { + "epoch": 0.4039154038788455, + "grad_norm": 5.916084772017239, + "learning_rate": 6.760565920134776e-06, + "loss": 17.326, + "step": 22097 + }, + { + "epoch": 0.403933683075292, + "grad_norm": 7.6902640515359915, + "learning_rate": 6.760288862852085e-06, + "loss": 17.8501, + "step": 22098 + }, + { + "epoch": 0.4039519622717385, + "grad_norm": 7.595915409993594, + "learning_rate": 6.760011799399732e-06, + "loss": 18.0188, + "step": 22099 + }, + { + "epoch": 0.40397024146818505, + "grad_norm": 5.399474979429258, + "learning_rate": 6.759734729778693e-06, + "loss": 17.0223, + "step": 22100 + }, + { + "epoch": 0.4039885206646316, + "grad_norm": 6.034056826555642, + "learning_rate": 6.759457653989936e-06, + "loss": 17.1478, + "step": 22101 + }, + { + "epoch": 0.4040067998610781, + "grad_norm": 5.237464031625341, + "learning_rate": 6.759180572034432e-06, + "loss": 17.1195, + "step": 22102 + }, + { + "epoch": 0.4040250790575246, + "grad_norm": 6.949943876542296, + "learning_rate": 6.758903483913152e-06, + "loss": 17.529, + "step": 22103 + }, + { + "epoch": 0.40404335825397114, + "grad_norm": 5.775795031745005, + "learning_rate": 6.758626389627068e-06, + "loss": 17.3392, + "step": 22104 + }, + { + "epoch": 0.4040616374504177, + "grad_norm": 6.04868043272047, + "learning_rate": 6.7583492891771516e-06, + "loss": 17.0395, + "step": 22105 + }, + { + "epoch": 0.4040799166468642, + "grad_norm": 6.370321786337064, + "learning_rate": 6.758072182564374e-06, + "loss": 17.5069, + "step": 22106 + }, + { + "epoch": 0.40409819584331075, + "grad_norm": 6.813322830272538, + "learning_rate": 6.757795069789706e-06, + "loss": 17.2654, + "step": 22107 + }, + { + "epoch": 0.40411647503975723, + "grad_norm": 6.144323791285378, + "learning_rate": 6.757517950854118e-06, + "loss": 17.619, + "step": 22108 + }, + { + "epoch": 0.40413475423620376, + "grad_norm": 5.941140413558077, + "learning_rate": 6.757240825758582e-06, + "loss": 17.5066, + "step": 22109 + }, + { + "epoch": 0.4041530334326503, + "grad_norm": 6.320242546936558, + "learning_rate": 6.756963694504071e-06, + "loss": 17.3604, + "step": 22110 + }, + { + "epoch": 0.40417131262909683, + "grad_norm": 6.601216207740825, + "learning_rate": 6.756686557091554e-06, + "loss": 17.4847, + "step": 22111 + }, + { + "epoch": 0.40418959182554337, + "grad_norm": 6.657458883317337, + "learning_rate": 6.756409413522002e-06, + "loss": 17.4861, + "step": 22112 + }, + { + "epoch": 0.40420787102198985, + "grad_norm": 7.012513906515419, + "learning_rate": 6.7561322637963865e-06, + "loss": 17.7998, + "step": 22113 + }, + { + "epoch": 0.4042261502184364, + "grad_norm": 5.949843811360443, + "learning_rate": 6.755855107915683e-06, + "loss": 17.0085, + "step": 22114 + }, + { + "epoch": 0.4042444294148829, + "grad_norm": 5.812895215797421, + "learning_rate": 6.755577945880858e-06, + "loss": 17.3518, + "step": 22115 + }, + { + "epoch": 0.40426270861132946, + "grad_norm": 5.973497965673985, + "learning_rate": 6.755300777692885e-06, + "loss": 17.3914, + "step": 22116 + }, + { + "epoch": 0.404280987807776, + "grad_norm": 5.411971787295952, + "learning_rate": 6.755023603352735e-06, + "loss": 17.1019, + "step": 22117 + }, + { + "epoch": 0.4042992670042225, + "grad_norm": 5.909867899448887, + "learning_rate": 6.754746422861379e-06, + "loss": 17.1723, + "step": 22118 + }, + { + "epoch": 0.404317546200669, + "grad_norm": 7.245289313835801, + "learning_rate": 6.754469236219789e-06, + "loss": 17.984, + "step": 22119 + }, + { + "epoch": 0.40433582539711554, + "grad_norm": 6.789279055749452, + "learning_rate": 6.754192043428938e-06, + "loss": 17.5433, + "step": 22120 + }, + { + "epoch": 0.4043541045935621, + "grad_norm": 5.120970386248282, + "learning_rate": 6.753914844489795e-06, + "loss": 16.965, + "step": 22121 + }, + { + "epoch": 0.4043723837900086, + "grad_norm": 6.566986376265424, + "learning_rate": 6.753637639403332e-06, + "loss": 17.2193, + "step": 22122 + }, + { + "epoch": 0.4043906629864551, + "grad_norm": 6.546714540317617, + "learning_rate": 6.753360428170523e-06, + "loss": 17.2887, + "step": 22123 + }, + { + "epoch": 0.40440894218290163, + "grad_norm": 8.074619644270074, + "learning_rate": 6.753083210792337e-06, + "loss": 17.7437, + "step": 22124 + }, + { + "epoch": 0.40442722137934817, + "grad_norm": 6.365788857589704, + "learning_rate": 6.752805987269746e-06, + "loss": 17.5323, + "step": 22125 + }, + { + "epoch": 0.4044455005757947, + "grad_norm": 5.925189280272979, + "learning_rate": 6.752528757603722e-06, + "loss": 17.4087, + "step": 22126 + }, + { + "epoch": 0.40446377977224124, + "grad_norm": 7.873084083845511, + "learning_rate": 6.752251521795236e-06, + "loss": 18.2124, + "step": 22127 + }, + { + "epoch": 0.4044820589686877, + "grad_norm": 6.146300131736766, + "learning_rate": 6.751974279845264e-06, + "loss": 17.4707, + "step": 22128 + }, + { + "epoch": 0.40450033816513425, + "grad_norm": 5.7523359568742265, + "learning_rate": 6.751697031754772e-06, + "loss": 17.4216, + "step": 22129 + }, + { + "epoch": 0.4045186173615808, + "grad_norm": 6.698152489926536, + "learning_rate": 6.751419777524734e-06, + "loss": 17.41, + "step": 22130 + }, + { + "epoch": 0.4045368965580273, + "grad_norm": 6.921879800943967, + "learning_rate": 6.7511425171561205e-06, + "loss": 17.8194, + "step": 22131 + }, + { + "epoch": 0.4045551757544738, + "grad_norm": 6.662011338095347, + "learning_rate": 6.750865250649906e-06, + "loss": 17.5918, + "step": 22132 + }, + { + "epoch": 0.40457345495092034, + "grad_norm": 6.260882181315775, + "learning_rate": 6.75058797800706e-06, + "loss": 17.3202, + "step": 22133 + }, + { + "epoch": 0.4045917341473669, + "grad_norm": 5.924199633577894, + "learning_rate": 6.750310699228555e-06, + "loss": 17.4094, + "step": 22134 + }, + { + "epoch": 0.4046100133438134, + "grad_norm": 6.184279968253344, + "learning_rate": 6.750033414315363e-06, + "loss": 17.2393, + "step": 22135 + }, + { + "epoch": 0.40462829254025995, + "grad_norm": 9.379302029088581, + "learning_rate": 6.749756123268456e-06, + "loss": 18.2848, + "step": 22136 + }, + { + "epoch": 0.40464657173670643, + "grad_norm": 6.544399335645791, + "learning_rate": 6.749478826088806e-06, + "loss": 17.6162, + "step": 22137 + }, + { + "epoch": 0.40466485093315296, + "grad_norm": 5.786205780710364, + "learning_rate": 6.749201522777385e-06, + "loss": 17.2665, + "step": 22138 + }, + { + "epoch": 0.4046831301295995, + "grad_norm": 8.181619392120167, + "learning_rate": 6.748924213335163e-06, + "loss": 18.2582, + "step": 22139 + }, + { + "epoch": 0.40470140932604604, + "grad_norm": 6.457207279081686, + "learning_rate": 6.7486468977631126e-06, + "loss": 17.1033, + "step": 22140 + }, + { + "epoch": 0.40471968852249257, + "grad_norm": 7.855967774278722, + "learning_rate": 6.748369576062208e-06, + "loss": 18.189, + "step": 22141 + }, + { + "epoch": 0.40473796771893905, + "grad_norm": 4.969703936150911, + "learning_rate": 6.74809224823342e-06, + "loss": 16.9735, + "step": 22142 + }, + { + "epoch": 0.4047562469153856, + "grad_norm": 7.140640634446646, + "learning_rate": 6.74781491427772e-06, + "loss": 17.6187, + "step": 22143 + }, + { + "epoch": 0.4047745261118321, + "grad_norm": 6.037265655891585, + "learning_rate": 6.74753757419608e-06, + "loss": 17.2544, + "step": 22144 + }, + { + "epoch": 0.40479280530827866, + "grad_norm": 7.297147796514037, + "learning_rate": 6.747260227989473e-06, + "loss": 17.9993, + "step": 22145 + }, + { + "epoch": 0.4048110845047252, + "grad_norm": 9.024807169459095, + "learning_rate": 6.7469828756588694e-06, + "loss": 18.2871, + "step": 22146 + }, + { + "epoch": 0.4048293637011717, + "grad_norm": 5.0270602565412945, + "learning_rate": 6.746705517205244e-06, + "loss": 16.9534, + "step": 22147 + }, + { + "epoch": 0.4048476428976182, + "grad_norm": 5.270923813606672, + "learning_rate": 6.746428152629567e-06, + "loss": 16.99, + "step": 22148 + }, + { + "epoch": 0.40486592209406475, + "grad_norm": 6.167460161072971, + "learning_rate": 6.746150781932809e-06, + "loss": 17.4296, + "step": 22149 + }, + { + "epoch": 0.4048842012905113, + "grad_norm": 7.3482352376413695, + "learning_rate": 6.745873405115946e-06, + "loss": 18.2224, + "step": 22150 + }, + { + "epoch": 0.4049024804869578, + "grad_norm": 6.671407230482707, + "learning_rate": 6.7455960221799475e-06, + "loss": 17.0869, + "step": 22151 + }, + { + "epoch": 0.4049207596834043, + "grad_norm": 7.619510605552885, + "learning_rate": 6.745318633125788e-06, + "loss": 17.9083, + "step": 22152 + }, + { + "epoch": 0.40493903887985083, + "grad_norm": 7.018422401100924, + "learning_rate": 6.745041237954437e-06, + "loss": 17.8694, + "step": 22153 + }, + { + "epoch": 0.40495731807629737, + "grad_norm": 5.185636416302049, + "learning_rate": 6.744763836666866e-06, + "loss": 16.768, + "step": 22154 + }, + { + "epoch": 0.4049755972727439, + "grad_norm": 5.460020134601508, + "learning_rate": 6.7444864292640525e-06, + "loss": 17.1994, + "step": 22155 + }, + { + "epoch": 0.40499387646919044, + "grad_norm": 5.8555374893900485, + "learning_rate": 6.744209015746963e-06, + "loss": 17.2155, + "step": 22156 + }, + { + "epoch": 0.4050121556656369, + "grad_norm": 5.443443031534633, + "learning_rate": 6.743931596116573e-06, + "loss": 17.1019, + "step": 22157 + }, + { + "epoch": 0.40503043486208345, + "grad_norm": 6.232530723447787, + "learning_rate": 6.743654170373855e-06, + "loss": 17.3289, + "step": 22158 + }, + { + "epoch": 0.40504871405853, + "grad_norm": 6.257351557302948, + "learning_rate": 6.743376738519779e-06, + "loss": 17.3444, + "step": 22159 + }, + { + "epoch": 0.4050669932549765, + "grad_norm": 6.7702966690245034, + "learning_rate": 6.743099300555319e-06, + "loss": 17.714, + "step": 22160 + }, + { + "epoch": 0.40508527245142306, + "grad_norm": 7.751807796514337, + "learning_rate": 6.742821856481448e-06, + "loss": 17.7319, + "step": 22161 + }, + { + "epoch": 0.40510355164786954, + "grad_norm": 6.509454448143292, + "learning_rate": 6.742544406299137e-06, + "loss": 17.506, + "step": 22162 + }, + { + "epoch": 0.4051218308443161, + "grad_norm": 7.400758878878089, + "learning_rate": 6.742266950009359e-06, + "loss": 17.9424, + "step": 22163 + }, + { + "epoch": 0.4051401100407626, + "grad_norm": 5.882275896544902, + "learning_rate": 6.741989487613087e-06, + "loss": 17.4201, + "step": 22164 + }, + { + "epoch": 0.40515838923720915, + "grad_norm": 7.6409091347877975, + "learning_rate": 6.741712019111293e-06, + "loss": 17.9477, + "step": 22165 + }, + { + "epoch": 0.40517666843365563, + "grad_norm": 6.4758461795965205, + "learning_rate": 6.74143454450495e-06, + "loss": 17.6987, + "step": 22166 + }, + { + "epoch": 0.40519494763010216, + "grad_norm": 6.2413095213566905, + "learning_rate": 6.741157063795028e-06, + "loss": 17.3144, + "step": 22167 + }, + { + "epoch": 0.4052132268265487, + "grad_norm": 6.9565224842103675, + "learning_rate": 6.740879576982505e-06, + "loss": 17.714, + "step": 22168 + }, + { + "epoch": 0.40523150602299524, + "grad_norm": 6.0106981750372634, + "learning_rate": 6.740602084068349e-06, + "loss": 17.2819, + "step": 22169 + }, + { + "epoch": 0.40524978521944177, + "grad_norm": 5.607878065581783, + "learning_rate": 6.740324585053532e-06, + "loss": 16.992, + "step": 22170 + }, + { + "epoch": 0.40526806441588825, + "grad_norm": 6.215494327541819, + "learning_rate": 6.740047079939028e-06, + "loss": 17.2968, + "step": 22171 + }, + { + "epoch": 0.4052863436123348, + "grad_norm": 6.622060494217935, + "learning_rate": 6.7397695687258115e-06, + "loss": 17.5319, + "step": 22172 + }, + { + "epoch": 0.4053046228087813, + "grad_norm": 6.28169817228459, + "learning_rate": 6.7394920514148535e-06, + "loss": 17.4997, + "step": 22173 + }, + { + "epoch": 0.40532290200522786, + "grad_norm": 6.7644607074438605, + "learning_rate": 6.739214528007126e-06, + "loss": 17.8697, + "step": 22174 + }, + { + "epoch": 0.4053411812016744, + "grad_norm": 5.951257001626463, + "learning_rate": 6.738936998503603e-06, + "loss": 17.2977, + "step": 22175 + }, + { + "epoch": 0.4053594603981209, + "grad_norm": 5.930150434365645, + "learning_rate": 6.738659462905257e-06, + "loss": 17.2478, + "step": 22176 + }, + { + "epoch": 0.4053777395945674, + "grad_norm": 5.6784804525390715, + "learning_rate": 6.738381921213061e-06, + "loss": 17.4433, + "step": 22177 + }, + { + "epoch": 0.40539601879101395, + "grad_norm": 6.64308669008464, + "learning_rate": 6.738104373427986e-06, + "loss": 17.3754, + "step": 22178 + }, + { + "epoch": 0.4054142979874605, + "grad_norm": 5.741868228596022, + "learning_rate": 6.737826819551008e-06, + "loss": 17.1142, + "step": 22179 + }, + { + "epoch": 0.405432577183907, + "grad_norm": 5.997290038820609, + "learning_rate": 6.737549259583096e-06, + "loss": 17.3825, + "step": 22180 + }, + { + "epoch": 0.4054508563803535, + "grad_norm": 5.844866164401639, + "learning_rate": 6.7372716935252235e-06, + "loss": 17.267, + "step": 22181 + }, + { + "epoch": 0.40546913557680003, + "grad_norm": 6.673007568731624, + "learning_rate": 6.7369941213783664e-06, + "loss": 17.7447, + "step": 22182 + }, + { + "epoch": 0.40548741477324657, + "grad_norm": 6.79963815163332, + "learning_rate": 6.736716543143496e-06, + "loss": 17.7124, + "step": 22183 + }, + { + "epoch": 0.4055056939696931, + "grad_norm": 6.457810547830599, + "learning_rate": 6.736438958821584e-06, + "loss": 17.3048, + "step": 22184 + }, + { + "epoch": 0.40552397316613964, + "grad_norm": 6.999599851720819, + "learning_rate": 6.736161368413604e-06, + "loss": 17.8615, + "step": 22185 + }, + { + "epoch": 0.4055422523625861, + "grad_norm": 5.936760149888992, + "learning_rate": 6.735883771920528e-06, + "loss": 17.3027, + "step": 22186 + }, + { + "epoch": 0.40556053155903266, + "grad_norm": 6.800880808213196, + "learning_rate": 6.7356061693433314e-06, + "loss": 17.8916, + "step": 22187 + }, + { + "epoch": 0.4055788107554792, + "grad_norm": 6.662290234587748, + "learning_rate": 6.7353285606829855e-06, + "loss": 17.6892, + "step": 22188 + }, + { + "epoch": 0.4055970899519257, + "grad_norm": 6.0424886929919355, + "learning_rate": 6.7350509459404644e-06, + "loss": 17.3845, + "step": 22189 + }, + { + "epoch": 0.40561536914837226, + "grad_norm": 8.329936546732132, + "learning_rate": 6.734773325116739e-06, + "loss": 18.0189, + "step": 22190 + }, + { + "epoch": 0.40563364834481874, + "grad_norm": 6.792306073485588, + "learning_rate": 6.734495698212784e-06, + "loss": 17.8954, + "step": 22191 + }, + { + "epoch": 0.4056519275412653, + "grad_norm": 5.311830982957135, + "learning_rate": 6.734218065229572e-06, + "loss": 17.1164, + "step": 22192 + }, + { + "epoch": 0.4056702067377118, + "grad_norm": 6.775960255611034, + "learning_rate": 6.7339404261680775e-06, + "loss": 17.197, + "step": 22193 + }, + { + "epoch": 0.40568848593415835, + "grad_norm": 7.348697310052713, + "learning_rate": 6.733662781029271e-06, + "loss": 17.8498, + "step": 22194 + }, + { + "epoch": 0.4057067651306049, + "grad_norm": 8.793370791530828, + "learning_rate": 6.733385129814126e-06, + "loss": 17.9595, + "step": 22195 + }, + { + "epoch": 0.40572504432705137, + "grad_norm": 7.5304583369701, + "learning_rate": 6.733107472523618e-06, + "loss": 17.9314, + "step": 22196 + }, + { + "epoch": 0.4057433235234979, + "grad_norm": 7.494023003913464, + "learning_rate": 6.732829809158719e-06, + "loss": 17.8105, + "step": 22197 + }, + { + "epoch": 0.40576160271994444, + "grad_norm": 6.0353004471521405, + "learning_rate": 6.7325521397204005e-06, + "loss": 17.3592, + "step": 22198 + }, + { + "epoch": 0.40577988191639097, + "grad_norm": 6.462822894306157, + "learning_rate": 6.732274464209637e-06, + "loss": 17.5024, + "step": 22199 + }, + { + "epoch": 0.40579816111283745, + "grad_norm": 7.389732610475244, + "learning_rate": 6.731996782627404e-06, + "loss": 17.8692, + "step": 22200 + }, + { + "epoch": 0.405816440309284, + "grad_norm": 7.098776795984079, + "learning_rate": 6.731719094974671e-06, + "loss": 17.963, + "step": 22201 + }, + { + "epoch": 0.4058347195057305, + "grad_norm": 7.033700437195739, + "learning_rate": 6.7314414012524135e-06, + "loss": 17.4693, + "step": 22202 + }, + { + "epoch": 0.40585299870217706, + "grad_norm": 5.605984450205299, + "learning_rate": 6.731163701461603e-06, + "loss": 17.1704, + "step": 22203 + }, + { + "epoch": 0.4058712778986236, + "grad_norm": 6.494532560138076, + "learning_rate": 6.730885995603215e-06, + "loss": 17.3485, + "step": 22204 + }, + { + "epoch": 0.4058895570950701, + "grad_norm": 6.795673206135465, + "learning_rate": 6.730608283678222e-06, + "loss": 17.7579, + "step": 22205 + }, + { + "epoch": 0.4059078362915166, + "grad_norm": 6.104057564248977, + "learning_rate": 6.730330565687596e-06, + "loss": 17.2064, + "step": 22206 + }, + { + "epoch": 0.40592611548796315, + "grad_norm": 7.248129920360663, + "learning_rate": 6.730052841632313e-06, + "loss": 17.7974, + "step": 22207 + }, + { + "epoch": 0.4059443946844097, + "grad_norm": 7.279123128963967, + "learning_rate": 6.729775111513342e-06, + "loss": 17.781, + "step": 22208 + }, + { + "epoch": 0.4059626738808562, + "grad_norm": 6.649230320363981, + "learning_rate": 6.729497375331662e-06, + "loss": 17.9351, + "step": 22209 + }, + { + "epoch": 0.4059809530773027, + "grad_norm": 5.495569677274187, + "learning_rate": 6.729219633088244e-06, + "loss": 17.0028, + "step": 22210 + }, + { + "epoch": 0.40599923227374923, + "grad_norm": 6.623289284082819, + "learning_rate": 6.72894188478406e-06, + "loss": 17.7299, + "step": 22211 + }, + { + "epoch": 0.40601751147019577, + "grad_norm": 7.003139447383208, + "learning_rate": 6.728664130420085e-06, + "loss": 17.71, + "step": 22212 + }, + { + "epoch": 0.4060357906666423, + "grad_norm": 5.921916466668357, + "learning_rate": 6.728386369997292e-06, + "loss": 17.101, + "step": 22213 + }, + { + "epoch": 0.40605406986308884, + "grad_norm": 5.181627189879955, + "learning_rate": 6.728108603516655e-06, + "loss": 17.0198, + "step": 22214 + }, + { + "epoch": 0.4060723490595353, + "grad_norm": 8.467760984784013, + "learning_rate": 6.727830830979148e-06, + "loss": 18.6448, + "step": 22215 + }, + { + "epoch": 0.40609062825598186, + "grad_norm": 6.226974569968773, + "learning_rate": 6.727553052385742e-06, + "loss": 17.2704, + "step": 22216 + }, + { + "epoch": 0.4061089074524284, + "grad_norm": 7.620899612529509, + "learning_rate": 6.727275267737414e-06, + "loss": 17.6003, + "step": 22217 + }, + { + "epoch": 0.4061271866488749, + "grad_norm": 5.428206503269267, + "learning_rate": 6.726997477035137e-06, + "loss": 17.1095, + "step": 22218 + }, + { + "epoch": 0.40614546584532146, + "grad_norm": 6.234512034152487, + "learning_rate": 6.7267196802798814e-06, + "loss": 17.4033, + "step": 22219 + }, + { + "epoch": 0.40616374504176794, + "grad_norm": 5.91543801171165, + "learning_rate": 6.726441877472625e-06, + "loss": 17.2918, + "step": 22220 + }, + { + "epoch": 0.4061820242382145, + "grad_norm": 5.962861017427411, + "learning_rate": 6.726164068614338e-06, + "loss": 17.1846, + "step": 22221 + }, + { + "epoch": 0.406200303434661, + "grad_norm": 5.507652938256123, + "learning_rate": 6.725886253705996e-06, + "loss": 17.1938, + "step": 22222 + }, + { + "epoch": 0.40621858263110755, + "grad_norm": 6.847664016963516, + "learning_rate": 6.7256084327485735e-06, + "loss": 17.7769, + "step": 22223 + }, + { + "epoch": 0.4062368618275541, + "grad_norm": 6.7659342386056025, + "learning_rate": 6.725330605743043e-06, + "loss": 17.3308, + "step": 22224 + }, + { + "epoch": 0.40625514102400057, + "grad_norm": 7.855577634776607, + "learning_rate": 6.725052772690379e-06, + "loss": 17.9193, + "step": 22225 + }, + { + "epoch": 0.4062734202204471, + "grad_norm": 10.47053549414686, + "learning_rate": 6.7247749335915526e-06, + "loss": 18.4262, + "step": 22226 + }, + { + "epoch": 0.40629169941689364, + "grad_norm": 6.279469691492723, + "learning_rate": 6.724497088447541e-06, + "loss": 17.613, + "step": 22227 + }, + { + "epoch": 0.4063099786133402, + "grad_norm": 6.6329844426406765, + "learning_rate": 6.724219237259318e-06, + "loss": 17.5052, + "step": 22228 + }, + { + "epoch": 0.4063282578097867, + "grad_norm": 5.967539116521782, + "learning_rate": 6.723941380027854e-06, + "loss": 17.4117, + "step": 22229 + }, + { + "epoch": 0.4063465370062332, + "grad_norm": 6.275821818703162, + "learning_rate": 6.723663516754126e-06, + "loss": 17.66, + "step": 22230 + }, + { + "epoch": 0.4063648162026797, + "grad_norm": 6.574200769598269, + "learning_rate": 6.723385647439108e-06, + "loss": 17.7173, + "step": 22231 + }, + { + "epoch": 0.40638309539912626, + "grad_norm": 8.875882950778484, + "learning_rate": 6.7231077720837714e-06, + "loss": 18.4791, + "step": 22232 + }, + { + "epoch": 0.4064013745955728, + "grad_norm": 7.896599020986848, + "learning_rate": 6.722829890689092e-06, + "loss": 18.1513, + "step": 22233 + }, + { + "epoch": 0.4064196537920193, + "grad_norm": 6.937869112786421, + "learning_rate": 6.722552003256043e-06, + "loss": 17.5613, + "step": 22234 + }, + { + "epoch": 0.4064379329884658, + "grad_norm": 8.10140539035616, + "learning_rate": 6.722274109785599e-06, + "loss": 18.0932, + "step": 22235 + }, + { + "epoch": 0.40645621218491235, + "grad_norm": 5.71491763157779, + "learning_rate": 6.721996210278734e-06, + "loss": 17.2551, + "step": 22236 + }, + { + "epoch": 0.4064744913813589, + "grad_norm": 5.936790365388158, + "learning_rate": 6.721718304736421e-06, + "loss": 17.4498, + "step": 22237 + }, + { + "epoch": 0.4064927705778054, + "grad_norm": 6.032687222202416, + "learning_rate": 6.721440393159636e-06, + "loss": 17.2518, + "step": 22238 + }, + { + "epoch": 0.4065110497742519, + "grad_norm": 6.356822217101505, + "learning_rate": 6.721162475549351e-06, + "loss": 17.4824, + "step": 22239 + }, + { + "epoch": 0.40652932897069843, + "grad_norm": 6.64672910711208, + "learning_rate": 6.72088455190654e-06, + "loss": 17.798, + "step": 22240 + }, + { + "epoch": 0.40654760816714497, + "grad_norm": 6.381776877968177, + "learning_rate": 6.720606622232179e-06, + "loss": 17.3612, + "step": 22241 + }, + { + "epoch": 0.4065658873635915, + "grad_norm": 6.884852765220903, + "learning_rate": 6.720328686527242e-06, + "loss": 17.6881, + "step": 22242 + }, + { + "epoch": 0.40658416656003804, + "grad_norm": 6.519872071151057, + "learning_rate": 6.720050744792701e-06, + "loss": 17.3681, + "step": 22243 + }, + { + "epoch": 0.4066024457564845, + "grad_norm": 6.4910420545005465, + "learning_rate": 6.719772797029531e-06, + "loss": 17.542, + "step": 22244 + }, + { + "epoch": 0.40662072495293106, + "grad_norm": 6.679339522996079, + "learning_rate": 6.719494843238707e-06, + "loss": 17.6046, + "step": 22245 + }, + { + "epoch": 0.4066390041493776, + "grad_norm": 6.380391764448618, + "learning_rate": 6.7192168834212036e-06, + "loss": 17.5827, + "step": 22246 + }, + { + "epoch": 0.4066572833458241, + "grad_norm": 8.933926730953658, + "learning_rate": 6.718938917577993e-06, + "loss": 18.9389, + "step": 22247 + }, + { + "epoch": 0.40667556254227066, + "grad_norm": 6.058289307292574, + "learning_rate": 6.718660945710052e-06, + "loss": 17.395, + "step": 22248 + }, + { + "epoch": 0.40669384173871714, + "grad_norm": 6.329572252191765, + "learning_rate": 6.718382967818352e-06, + "loss": 17.6975, + "step": 22249 + }, + { + "epoch": 0.4067121209351637, + "grad_norm": 7.076438856369958, + "learning_rate": 6.718104983903869e-06, + "loss": 17.8666, + "step": 22250 + }, + { + "epoch": 0.4067304001316102, + "grad_norm": 6.23451601461986, + "learning_rate": 6.717826993967578e-06, + "loss": 17.6653, + "step": 22251 + }, + { + "epoch": 0.40674867932805675, + "grad_norm": 6.995557174581337, + "learning_rate": 6.717548998010454e-06, + "loss": 17.8749, + "step": 22252 + }, + { + "epoch": 0.4067669585245033, + "grad_norm": 7.147895900000048, + "learning_rate": 6.717270996033467e-06, + "loss": 17.6818, + "step": 22253 + }, + { + "epoch": 0.40678523772094977, + "grad_norm": 5.875860858663323, + "learning_rate": 6.716992988037594e-06, + "loss": 17.5146, + "step": 22254 + }, + { + "epoch": 0.4068035169173963, + "grad_norm": 7.237372281884785, + "learning_rate": 6.7167149740238125e-06, + "loss": 17.839, + "step": 22255 + }, + { + "epoch": 0.40682179611384284, + "grad_norm": 7.264890108518054, + "learning_rate": 6.716436953993092e-06, + "loss": 17.7052, + "step": 22256 + }, + { + "epoch": 0.4068400753102894, + "grad_norm": 6.724171135001746, + "learning_rate": 6.716158927946408e-06, + "loss": 17.6778, + "step": 22257 + }, + { + "epoch": 0.4068583545067359, + "grad_norm": 5.578468251230984, + "learning_rate": 6.715880895884738e-06, + "loss": 17.4131, + "step": 22258 + }, + { + "epoch": 0.4068766337031824, + "grad_norm": 6.560338080352419, + "learning_rate": 6.715602857809052e-06, + "loss": 17.6146, + "step": 22259 + }, + { + "epoch": 0.4068949128996289, + "grad_norm": 6.9180291916264975, + "learning_rate": 6.715324813720329e-06, + "loss": 17.9467, + "step": 22260 + }, + { + "epoch": 0.40691319209607546, + "grad_norm": 5.622453941909864, + "learning_rate": 6.715046763619541e-06, + "loss": 17.2836, + "step": 22261 + }, + { + "epoch": 0.406931471292522, + "grad_norm": 8.176080449579668, + "learning_rate": 6.714768707507662e-06, + "loss": 18.2418, + "step": 22262 + }, + { + "epoch": 0.40694975048896853, + "grad_norm": 6.274976231881443, + "learning_rate": 6.714490645385667e-06, + "loss": 17.7055, + "step": 22263 + }, + { + "epoch": 0.406968029685415, + "grad_norm": 7.577819589305502, + "learning_rate": 6.714212577254533e-06, + "loss": 18.304, + "step": 22264 + }, + { + "epoch": 0.40698630888186155, + "grad_norm": 6.573095639871201, + "learning_rate": 6.713934503115232e-06, + "loss": 17.6393, + "step": 22265 + }, + { + "epoch": 0.4070045880783081, + "grad_norm": 6.295120292715528, + "learning_rate": 6.713656422968739e-06, + "loss": 17.264, + "step": 22266 + }, + { + "epoch": 0.4070228672747546, + "grad_norm": 6.889377350472357, + "learning_rate": 6.7133783368160275e-06, + "loss": 17.6572, + "step": 22267 + }, + { + "epoch": 0.4070411464712011, + "grad_norm": 6.40510907772885, + "learning_rate": 6.713100244658075e-06, + "loss": 17.3039, + "step": 22268 + }, + { + "epoch": 0.40705942566764763, + "grad_norm": 7.383304609218675, + "learning_rate": 6.7128221464958565e-06, + "loss": 18.0031, + "step": 22269 + }, + { + "epoch": 0.40707770486409417, + "grad_norm": 6.5984351382322215, + "learning_rate": 6.712544042330342e-06, + "loss": 17.3374, + "step": 22270 + }, + { + "epoch": 0.4070959840605407, + "grad_norm": 6.941330714665212, + "learning_rate": 6.7122659321625115e-06, + "loss": 17.5744, + "step": 22271 + }, + { + "epoch": 0.40711426325698724, + "grad_norm": 5.798958432628722, + "learning_rate": 6.711987815993335e-06, + "loss": 17.1436, + "step": 22272 + }, + { + "epoch": 0.4071325424534337, + "grad_norm": 7.03506626909677, + "learning_rate": 6.711709693823793e-06, + "loss": 17.9521, + "step": 22273 + }, + { + "epoch": 0.40715082164988026, + "grad_norm": 6.350586760113581, + "learning_rate": 6.7114315656548554e-06, + "loss": 17.3257, + "step": 22274 + }, + { + "epoch": 0.4071691008463268, + "grad_norm": 5.637832901288479, + "learning_rate": 6.711153431487498e-06, + "loss": 17.2569, + "step": 22275 + }, + { + "epoch": 0.40718738004277333, + "grad_norm": 5.1169365619830165, + "learning_rate": 6.710875291322697e-06, + "loss": 16.9187, + "step": 22276 + }, + { + "epoch": 0.40720565923921986, + "grad_norm": 6.370177226031568, + "learning_rate": 6.710597145161427e-06, + "loss": 17.438, + "step": 22277 + }, + { + "epoch": 0.40722393843566634, + "grad_norm": 5.952175061029821, + "learning_rate": 6.710318993004662e-06, + "loss": 17.1577, + "step": 22278 + }, + { + "epoch": 0.4072422176321129, + "grad_norm": 6.831569909064983, + "learning_rate": 6.710040834853377e-06, + "loss": 17.7127, + "step": 22279 + }, + { + "epoch": 0.4072604968285594, + "grad_norm": 6.590500632536857, + "learning_rate": 6.709762670708548e-06, + "loss": 17.4051, + "step": 22280 + }, + { + "epoch": 0.40727877602500595, + "grad_norm": 7.026313355604036, + "learning_rate": 6.709484500571148e-06, + "loss": 17.7467, + "step": 22281 + }, + { + "epoch": 0.4072970552214525, + "grad_norm": 5.977094423814589, + "learning_rate": 6.709206324442154e-06, + "loss": 17.2317, + "step": 22282 + }, + { + "epoch": 0.40731533441789897, + "grad_norm": 5.483432737920128, + "learning_rate": 6.708928142322542e-06, + "loss": 17.2138, + "step": 22283 + }, + { + "epoch": 0.4073336136143455, + "grad_norm": 6.767125319549047, + "learning_rate": 6.708649954213282e-06, + "loss": 17.7343, + "step": 22284 + }, + { + "epoch": 0.40735189281079204, + "grad_norm": 6.409230829732946, + "learning_rate": 6.708371760115354e-06, + "loss": 17.5576, + "step": 22285 + }, + { + "epoch": 0.4073701720072386, + "grad_norm": 6.469226127075871, + "learning_rate": 6.7080935600297306e-06, + "loss": 17.5401, + "step": 22286 + }, + { + "epoch": 0.4073884512036851, + "grad_norm": 6.399617496700894, + "learning_rate": 6.7078153539573874e-06, + "loss": 17.5286, + "step": 22287 + }, + { + "epoch": 0.4074067304001316, + "grad_norm": 6.639113903558887, + "learning_rate": 6.7075371418993e-06, + "loss": 17.3282, + "step": 22288 + }, + { + "epoch": 0.4074250095965781, + "grad_norm": 6.1714005240139596, + "learning_rate": 6.707258923856442e-06, + "loss": 17.3113, + "step": 22289 + }, + { + "epoch": 0.40744328879302466, + "grad_norm": 8.751944703567593, + "learning_rate": 6.706980699829791e-06, + "loss": 18.5986, + "step": 22290 + }, + { + "epoch": 0.4074615679894712, + "grad_norm": 8.630337043098338, + "learning_rate": 6.70670246982032e-06, + "loss": 18.6933, + "step": 22291 + }, + { + "epoch": 0.40747984718591773, + "grad_norm": 6.414471102860324, + "learning_rate": 6.7064242338290055e-06, + "loss": 17.7053, + "step": 22292 + }, + { + "epoch": 0.4074981263823642, + "grad_norm": 5.758931406556247, + "learning_rate": 6.706145991856823e-06, + "loss": 17.2899, + "step": 22293 + }, + { + "epoch": 0.40751640557881075, + "grad_norm": 5.454292115298666, + "learning_rate": 6.705867743904744e-06, + "loss": 17.2698, + "step": 22294 + }, + { + "epoch": 0.4075346847752573, + "grad_norm": 6.140112711077134, + "learning_rate": 6.705589489973748e-06, + "loss": 17.2706, + "step": 22295 + }, + { + "epoch": 0.4075529639717038, + "grad_norm": 6.569895549115014, + "learning_rate": 6.705311230064809e-06, + "loss": 17.3748, + "step": 22296 + }, + { + "epoch": 0.40757124316815035, + "grad_norm": 5.985081258725076, + "learning_rate": 6.705032964178903e-06, + "loss": 17.5517, + "step": 22297 + }, + { + "epoch": 0.40758952236459683, + "grad_norm": 8.285743999725959, + "learning_rate": 6.704754692317004e-06, + "loss": 18.3401, + "step": 22298 + }, + { + "epoch": 0.40760780156104337, + "grad_norm": 6.897872490167506, + "learning_rate": 6.7044764144800865e-06, + "loss": 17.6131, + "step": 22299 + }, + { + "epoch": 0.4076260807574899, + "grad_norm": 6.236834655809541, + "learning_rate": 6.704198130669128e-06, + "loss": 17.6524, + "step": 22300 + }, + { + "epoch": 0.40764435995393644, + "grad_norm": 6.979269261389349, + "learning_rate": 6.703919840885104e-06, + "loss": 17.826, + "step": 22301 + }, + { + "epoch": 0.4076626391503829, + "grad_norm": 7.847914823017599, + "learning_rate": 6.703641545128987e-06, + "loss": 17.8131, + "step": 22302 + }, + { + "epoch": 0.40768091834682946, + "grad_norm": 7.6746039941179545, + "learning_rate": 6.703363243401755e-06, + "loss": 18.1945, + "step": 22303 + }, + { + "epoch": 0.407699197543276, + "grad_norm": 7.115684433826195, + "learning_rate": 6.703084935704383e-06, + "loss": 18.0043, + "step": 22304 + }, + { + "epoch": 0.40771747673972253, + "grad_norm": 5.602538289522343, + "learning_rate": 6.7028066220378455e-06, + "loss": 17.1549, + "step": 22305 + }, + { + "epoch": 0.40773575593616906, + "grad_norm": 5.5303830787105435, + "learning_rate": 6.702528302403118e-06, + "loss": 17.3422, + "step": 22306 + }, + { + "epoch": 0.40775403513261554, + "grad_norm": 6.849153816525019, + "learning_rate": 6.702249976801179e-06, + "loss": 17.395, + "step": 22307 + }, + { + "epoch": 0.4077723143290621, + "grad_norm": 6.642799615662462, + "learning_rate": 6.701971645232998e-06, + "loss": 17.6234, + "step": 22308 + }, + { + "epoch": 0.4077905935255086, + "grad_norm": 7.963889722521995, + "learning_rate": 6.701693307699556e-06, + "loss": 18.139, + "step": 22309 + }, + { + "epoch": 0.40780887272195515, + "grad_norm": 5.621074242739525, + "learning_rate": 6.701414964201828e-06, + "loss": 17.2245, + "step": 22310 + }, + { + "epoch": 0.4078271519184017, + "grad_norm": 5.369137339102429, + "learning_rate": 6.701136614740786e-06, + "loss": 17.2033, + "step": 22311 + }, + { + "epoch": 0.40784543111484817, + "grad_norm": 6.241584735197689, + "learning_rate": 6.700858259317409e-06, + "loss": 17.3039, + "step": 22312 + }, + { + "epoch": 0.4078637103112947, + "grad_norm": 6.024766009967673, + "learning_rate": 6.700579897932669e-06, + "loss": 17.3971, + "step": 22313 + }, + { + "epoch": 0.40788198950774124, + "grad_norm": 5.472173140584315, + "learning_rate": 6.700301530587547e-06, + "loss": 16.9745, + "step": 22314 + }, + { + "epoch": 0.4079002687041878, + "grad_norm": 6.249329870649448, + "learning_rate": 6.700023157283014e-06, + "loss": 17.3968, + "step": 22315 + }, + { + "epoch": 0.4079185479006343, + "grad_norm": 6.131460890280038, + "learning_rate": 6.6997447780200484e-06, + "loss": 17.4808, + "step": 22316 + }, + { + "epoch": 0.4079368270970808, + "grad_norm": 6.929633668418552, + "learning_rate": 6.699466392799624e-06, + "loss": 17.867, + "step": 22317 + }, + { + "epoch": 0.4079551062935273, + "grad_norm": 7.152533941069182, + "learning_rate": 6.6991880016227185e-06, + "loss": 17.9286, + "step": 22318 + }, + { + "epoch": 0.40797338548997386, + "grad_norm": 5.60451819455061, + "learning_rate": 6.698909604490304e-06, + "loss": 17.3974, + "step": 22319 + }, + { + "epoch": 0.4079916646864204, + "grad_norm": 5.387246173205216, + "learning_rate": 6.698631201403362e-06, + "loss": 17.1566, + "step": 22320 + }, + { + "epoch": 0.40800994388286693, + "grad_norm": 7.480777182546839, + "learning_rate": 6.698352792362863e-06, + "loss": 17.9302, + "step": 22321 + }, + { + "epoch": 0.4080282230793134, + "grad_norm": 5.409482611641295, + "learning_rate": 6.698074377369786e-06, + "loss": 17.2391, + "step": 22322 + }, + { + "epoch": 0.40804650227575995, + "grad_norm": 6.5794473185928055, + "learning_rate": 6.697795956425104e-06, + "loss": 17.4969, + "step": 22323 + }, + { + "epoch": 0.4080647814722065, + "grad_norm": 6.755483181362662, + "learning_rate": 6.6975175295297964e-06, + "loss": 17.7137, + "step": 22324 + }, + { + "epoch": 0.408083060668653, + "grad_norm": 7.968078334194961, + "learning_rate": 6.697239096684837e-06, + "loss": 18.395, + "step": 22325 + }, + { + "epoch": 0.40810133986509955, + "grad_norm": 5.974473819363313, + "learning_rate": 6.696960657891199e-06, + "loss": 17.447, + "step": 22326 + }, + { + "epoch": 0.40811961906154604, + "grad_norm": 6.771331882821818, + "learning_rate": 6.696682213149864e-06, + "loss": 17.6519, + "step": 22327 + }, + { + "epoch": 0.40813789825799257, + "grad_norm": 6.071063538193178, + "learning_rate": 6.696403762461805e-06, + "loss": 17.2803, + "step": 22328 + }, + { + "epoch": 0.4081561774544391, + "grad_norm": 5.61989192197024, + "learning_rate": 6.6961253058279975e-06, + "loss": 17.1686, + "step": 22329 + }, + { + "epoch": 0.40817445665088564, + "grad_norm": 7.3972282820148205, + "learning_rate": 6.695846843249418e-06, + "loss": 17.6974, + "step": 22330 + }, + { + "epoch": 0.4081927358473322, + "grad_norm": 6.140066467892387, + "learning_rate": 6.695568374727042e-06, + "loss": 17.3713, + "step": 22331 + }, + { + "epoch": 0.40821101504377866, + "grad_norm": 7.578357768826225, + "learning_rate": 6.695289900261847e-06, + "loss": 17.9596, + "step": 22332 + }, + { + "epoch": 0.4082292942402252, + "grad_norm": 6.018563344980862, + "learning_rate": 6.6950114198548065e-06, + "loss": 17.2115, + "step": 22333 + }, + { + "epoch": 0.40824757343667173, + "grad_norm": 5.8043348744124135, + "learning_rate": 6.694732933506899e-06, + "loss": 17.4287, + "step": 22334 + }, + { + "epoch": 0.40826585263311826, + "grad_norm": 6.321735976651865, + "learning_rate": 6.694454441219099e-06, + "loss": 17.6359, + "step": 22335 + }, + { + "epoch": 0.40828413182956474, + "grad_norm": 5.996166355033848, + "learning_rate": 6.694175942992385e-06, + "loss": 17.3165, + "step": 22336 + }, + { + "epoch": 0.4083024110260113, + "grad_norm": 6.517665764543585, + "learning_rate": 6.69389743882773e-06, + "loss": 17.792, + "step": 22337 + }, + { + "epoch": 0.4083206902224578, + "grad_norm": 6.617217406232964, + "learning_rate": 6.693618928726112e-06, + "loss": 17.7017, + "step": 22338 + }, + { + "epoch": 0.40833896941890435, + "grad_norm": 6.2426880409963506, + "learning_rate": 6.693340412688506e-06, + "loss": 17.3849, + "step": 22339 + }, + { + "epoch": 0.4083572486153509, + "grad_norm": 6.739043792869497, + "learning_rate": 6.6930618907158885e-06, + "loss": 17.6292, + "step": 22340 + }, + { + "epoch": 0.40837552781179737, + "grad_norm": 6.193059522850531, + "learning_rate": 6.692783362809237e-06, + "loss": 17.6269, + "step": 22341 + }, + { + "epoch": 0.4083938070082439, + "grad_norm": 8.52446269237601, + "learning_rate": 6.692504828969526e-06, + "loss": 18.5218, + "step": 22342 + }, + { + "epoch": 0.40841208620469044, + "grad_norm": 8.91671047362675, + "learning_rate": 6.692226289197732e-06, + "loss": 18.3849, + "step": 22343 + }, + { + "epoch": 0.408430365401137, + "grad_norm": 7.340285727156134, + "learning_rate": 6.691947743494834e-06, + "loss": 18.0694, + "step": 22344 + }, + { + "epoch": 0.4084486445975835, + "grad_norm": 7.706295893171392, + "learning_rate": 6.691669191861803e-06, + "loss": 17.5781, + "step": 22345 + }, + { + "epoch": 0.40846692379403, + "grad_norm": 5.679200024921107, + "learning_rate": 6.691390634299619e-06, + "loss": 17.2387, + "step": 22346 + }, + { + "epoch": 0.4084852029904765, + "grad_norm": 6.6261268333057055, + "learning_rate": 6.691112070809258e-06, + "loss": 17.3913, + "step": 22347 + }, + { + "epoch": 0.40850348218692306, + "grad_norm": 6.510059106698121, + "learning_rate": 6.690833501391697e-06, + "loss": 17.3602, + "step": 22348 + }, + { + "epoch": 0.4085217613833696, + "grad_norm": 6.95710940300285, + "learning_rate": 6.69055492604791e-06, + "loss": 17.7947, + "step": 22349 + }, + { + "epoch": 0.40854004057981613, + "grad_norm": 7.624081240706674, + "learning_rate": 6.690276344778875e-06, + "loss": 17.8705, + "step": 22350 + }, + { + "epoch": 0.4085583197762626, + "grad_norm": 7.093745915585537, + "learning_rate": 6.689997757585568e-06, + "loss": 17.3084, + "step": 22351 + }, + { + "epoch": 0.40857659897270915, + "grad_norm": 5.625715074646376, + "learning_rate": 6.689719164468967e-06, + "loss": 17.3643, + "step": 22352 + }, + { + "epoch": 0.4085948781691557, + "grad_norm": 6.754054896581014, + "learning_rate": 6.689440565430044e-06, + "loss": 17.7577, + "step": 22353 + }, + { + "epoch": 0.4086131573656022, + "grad_norm": 6.159552515619195, + "learning_rate": 6.68916196046978e-06, + "loss": 17.4922, + "step": 22354 + }, + { + "epoch": 0.40863143656204876, + "grad_norm": 6.114677035172412, + "learning_rate": 6.688883349589151e-06, + "loss": 17.4217, + "step": 22355 + }, + { + "epoch": 0.40864971575849524, + "grad_norm": 6.560570286473523, + "learning_rate": 6.688604732789131e-06, + "loss": 17.772, + "step": 22356 + }, + { + "epoch": 0.40866799495494177, + "grad_norm": 5.437434771663608, + "learning_rate": 6.6883261100706985e-06, + "loss": 17.0448, + "step": 22357 + }, + { + "epoch": 0.4086862741513883, + "grad_norm": 6.938271909855448, + "learning_rate": 6.6880474814348285e-06, + "loss": 17.8065, + "step": 22358 + }, + { + "epoch": 0.40870455334783484, + "grad_norm": 7.007961642145968, + "learning_rate": 6.687768846882501e-06, + "loss": 17.8035, + "step": 22359 + }, + { + "epoch": 0.4087228325442814, + "grad_norm": 6.780869384648125, + "learning_rate": 6.687490206414689e-06, + "loss": 17.6165, + "step": 22360 + }, + { + "epoch": 0.40874111174072786, + "grad_norm": 6.877968688405431, + "learning_rate": 6.687211560032368e-06, + "loss": 17.798, + "step": 22361 + }, + { + "epoch": 0.4087593909371744, + "grad_norm": 5.986241171382858, + "learning_rate": 6.686932907736518e-06, + "loss": 17.3656, + "step": 22362 + }, + { + "epoch": 0.40877767013362093, + "grad_norm": 6.660632585050558, + "learning_rate": 6.686654249528116e-06, + "loss": 17.5422, + "step": 22363 + }, + { + "epoch": 0.40879594933006747, + "grad_norm": 6.241903161658502, + "learning_rate": 6.686375585408137e-06, + "loss": 17.5906, + "step": 22364 + }, + { + "epoch": 0.408814228526514, + "grad_norm": 5.598750220896844, + "learning_rate": 6.686096915377557e-06, + "loss": 17.164, + "step": 22365 + }, + { + "epoch": 0.4088325077229605, + "grad_norm": 6.348137366423458, + "learning_rate": 6.685818239437355e-06, + "loss": 17.6996, + "step": 22366 + }, + { + "epoch": 0.408850786919407, + "grad_norm": 6.0425301995955, + "learning_rate": 6.685539557588504e-06, + "loss": 17.452, + "step": 22367 + }, + { + "epoch": 0.40886906611585355, + "grad_norm": 6.012375619513796, + "learning_rate": 6.685260869831984e-06, + "loss": 17.445, + "step": 22368 + }, + { + "epoch": 0.4088873453123001, + "grad_norm": 6.472431702978844, + "learning_rate": 6.684982176168773e-06, + "loss": 17.4102, + "step": 22369 + }, + { + "epoch": 0.40890562450874657, + "grad_norm": 6.952900079004711, + "learning_rate": 6.684703476599844e-06, + "loss": 17.6931, + "step": 22370 + }, + { + "epoch": 0.4089239037051931, + "grad_norm": 6.195808589104972, + "learning_rate": 6.684424771126176e-06, + "loss": 17.4166, + "step": 22371 + }, + { + "epoch": 0.40894218290163964, + "grad_norm": 6.543064768324731, + "learning_rate": 6.684146059748743e-06, + "loss": 17.9098, + "step": 22372 + }, + { + "epoch": 0.4089604620980862, + "grad_norm": 4.7435375951410075, + "learning_rate": 6.683867342468528e-06, + "loss": 16.8005, + "step": 22373 + }, + { + "epoch": 0.4089787412945327, + "grad_norm": 5.4370720825517616, + "learning_rate": 6.683588619286501e-06, + "loss": 17.1992, + "step": 22374 + }, + { + "epoch": 0.4089970204909792, + "grad_norm": 6.171659769049901, + "learning_rate": 6.683309890203643e-06, + "loss": 17.4659, + "step": 22375 + }, + { + "epoch": 0.4090152996874257, + "grad_norm": 5.8467812409448845, + "learning_rate": 6.683031155220931e-06, + "loss": 17.288, + "step": 22376 + }, + { + "epoch": 0.40903357888387226, + "grad_norm": 7.396621978798048, + "learning_rate": 6.682752414339339e-06, + "loss": 18.0265, + "step": 22377 + }, + { + "epoch": 0.4090518580803188, + "grad_norm": 6.049778369575566, + "learning_rate": 6.682473667559847e-06, + "loss": 17.7492, + "step": 22378 + }, + { + "epoch": 0.40907013727676533, + "grad_norm": 6.371293358168965, + "learning_rate": 6.682194914883431e-06, + "loss": 17.5876, + "step": 22379 + }, + { + "epoch": 0.4090884164732118, + "grad_norm": 6.525303404567865, + "learning_rate": 6.681916156311068e-06, + "loss": 17.7739, + "step": 22380 + }, + { + "epoch": 0.40910669566965835, + "grad_norm": 7.041370823694498, + "learning_rate": 6.681637391843732e-06, + "loss": 17.8203, + "step": 22381 + }, + { + "epoch": 0.4091249748661049, + "grad_norm": 6.3008761005745155, + "learning_rate": 6.681358621482405e-06, + "loss": 17.5252, + "step": 22382 + }, + { + "epoch": 0.4091432540625514, + "grad_norm": 7.765356271583535, + "learning_rate": 6.6810798452280635e-06, + "loss": 18.2771, + "step": 22383 + }, + { + "epoch": 0.40916153325899796, + "grad_norm": 6.365913360250412, + "learning_rate": 6.680801063081681e-06, + "loss": 17.4948, + "step": 22384 + }, + { + "epoch": 0.40917981245544444, + "grad_norm": 6.318444715530758, + "learning_rate": 6.6805222750442366e-06, + "loss": 17.4903, + "step": 22385 + }, + { + "epoch": 0.40919809165189097, + "grad_norm": 6.2602657409691345, + "learning_rate": 6.680243481116708e-06, + "loss": 17.344, + "step": 22386 + }, + { + "epoch": 0.4092163708483375, + "grad_norm": 7.076340057322319, + "learning_rate": 6.679964681300073e-06, + "loss": 17.7203, + "step": 22387 + }, + { + "epoch": 0.40923465004478404, + "grad_norm": 7.4483993347309765, + "learning_rate": 6.679685875595305e-06, + "loss": 18.0464, + "step": 22388 + }, + { + "epoch": 0.4092529292412306, + "grad_norm": 8.177442978394508, + "learning_rate": 6.679407064003386e-06, + "loss": 18.2687, + "step": 22389 + }, + { + "epoch": 0.40927120843767706, + "grad_norm": 5.67031421466407, + "learning_rate": 6.6791282465252895e-06, + "loss": 17.2061, + "step": 22390 + }, + { + "epoch": 0.4092894876341236, + "grad_norm": 6.073479551777875, + "learning_rate": 6.678849423161995e-06, + "loss": 17.29, + "step": 22391 + }, + { + "epoch": 0.40930776683057013, + "grad_norm": 5.908413856599679, + "learning_rate": 6.678570593914478e-06, + "loss": 17.412, + "step": 22392 + }, + { + "epoch": 0.40932604602701667, + "grad_norm": 6.850784777996676, + "learning_rate": 6.678291758783719e-06, + "loss": 17.8967, + "step": 22393 + }, + { + "epoch": 0.4093443252234632, + "grad_norm": 7.8328601566665, + "learning_rate": 6.6780129177706895e-06, + "loss": 18.6999, + "step": 22394 + }, + { + "epoch": 0.4093626044199097, + "grad_norm": 6.272933520527842, + "learning_rate": 6.677734070876373e-06, + "loss": 17.6513, + "step": 22395 + }, + { + "epoch": 0.4093808836163562, + "grad_norm": 6.514965334561195, + "learning_rate": 6.677455218101743e-06, + "loss": 17.4764, + "step": 22396 + }, + { + "epoch": 0.40939916281280275, + "grad_norm": 6.005791253197453, + "learning_rate": 6.67717635944778e-06, + "loss": 17.0802, + "step": 22397 + }, + { + "epoch": 0.4094174420092493, + "grad_norm": 6.7383995685433895, + "learning_rate": 6.676897494915457e-06, + "loss": 17.8608, + "step": 22398 + }, + { + "epoch": 0.4094357212056958, + "grad_norm": 8.845025797527725, + "learning_rate": 6.6766186245057544e-06, + "loss": 17.5709, + "step": 22399 + }, + { + "epoch": 0.4094540004021423, + "grad_norm": 6.539898475467322, + "learning_rate": 6.67633974821965e-06, + "loss": 17.4487, + "step": 22400 + }, + { + "epoch": 0.40947227959858884, + "grad_norm": 6.476897413372039, + "learning_rate": 6.67606086605812e-06, + "loss": 17.3881, + "step": 22401 + }, + { + "epoch": 0.4094905587950354, + "grad_norm": 6.139891906179132, + "learning_rate": 6.675781978022141e-06, + "loss": 17.5398, + "step": 22402 + }, + { + "epoch": 0.4095088379914819, + "grad_norm": 6.82874764899981, + "learning_rate": 6.675503084112692e-06, + "loss": 17.5812, + "step": 22403 + }, + { + "epoch": 0.4095271171879284, + "grad_norm": 5.798182208057855, + "learning_rate": 6.675224184330751e-06, + "loss": 17.6063, + "step": 22404 + }, + { + "epoch": 0.4095453963843749, + "grad_norm": 6.5577458900205485, + "learning_rate": 6.674945278677294e-06, + "loss": 17.4989, + "step": 22405 + }, + { + "epoch": 0.40956367558082146, + "grad_norm": 6.039979097365193, + "learning_rate": 6.674666367153299e-06, + "loss": 17.5457, + "step": 22406 + }, + { + "epoch": 0.409581954777268, + "grad_norm": 7.492943735372451, + "learning_rate": 6.674387449759744e-06, + "loss": 18.4872, + "step": 22407 + }, + { + "epoch": 0.40960023397371453, + "grad_norm": 5.967778949376407, + "learning_rate": 6.674108526497605e-06, + "loss": 17.2267, + "step": 22408 + }, + { + "epoch": 0.409618513170161, + "grad_norm": 6.551757154325815, + "learning_rate": 6.673829597367862e-06, + "loss": 17.5845, + "step": 22409 + }, + { + "epoch": 0.40963679236660755, + "grad_norm": 6.43448251059287, + "learning_rate": 6.673550662371491e-06, + "loss": 17.5492, + "step": 22410 + }, + { + "epoch": 0.4096550715630541, + "grad_norm": 5.390735848441046, + "learning_rate": 6.673271721509471e-06, + "loss": 17.1281, + "step": 22411 + }, + { + "epoch": 0.4096733507595006, + "grad_norm": 5.562757320291179, + "learning_rate": 6.672992774782779e-06, + "loss": 17.3111, + "step": 22412 + }, + { + "epoch": 0.40969162995594716, + "grad_norm": 7.848481641711909, + "learning_rate": 6.672713822192392e-06, + "loss": 17.7411, + "step": 22413 + }, + { + "epoch": 0.40970990915239364, + "grad_norm": 5.705962222544973, + "learning_rate": 6.672434863739288e-06, + "loss": 17.0998, + "step": 22414 + }, + { + "epoch": 0.40972818834884017, + "grad_norm": 8.514607143462618, + "learning_rate": 6.672155899424445e-06, + "loss": 18.5722, + "step": 22415 + }, + { + "epoch": 0.4097464675452867, + "grad_norm": 5.8988134022637535, + "learning_rate": 6.6718769292488406e-06, + "loss": 17.197, + "step": 22416 + }, + { + "epoch": 0.40976474674173324, + "grad_norm": 6.690551317855109, + "learning_rate": 6.6715979532134535e-06, + "loss": 17.8692, + "step": 22417 + }, + { + "epoch": 0.4097830259381798, + "grad_norm": 7.3224953502855, + "learning_rate": 6.67131897131926e-06, + "loss": 18.1417, + "step": 22418 + }, + { + "epoch": 0.40980130513462626, + "grad_norm": 7.578450397318651, + "learning_rate": 6.671039983567238e-06, + "loss": 17.9822, + "step": 22419 + }, + { + "epoch": 0.4098195843310728, + "grad_norm": 7.047372506277028, + "learning_rate": 6.670760989958366e-06, + "loss": 17.5138, + "step": 22420 + }, + { + "epoch": 0.40983786352751933, + "grad_norm": 6.500958699879609, + "learning_rate": 6.670481990493621e-06, + "loss": 17.5217, + "step": 22421 + }, + { + "epoch": 0.40985614272396587, + "grad_norm": 6.591173975816295, + "learning_rate": 6.670202985173983e-06, + "loss": 17.4485, + "step": 22422 + }, + { + "epoch": 0.4098744219204124, + "grad_norm": 6.497177345976398, + "learning_rate": 6.669923974000429e-06, + "loss": 17.5386, + "step": 22423 + }, + { + "epoch": 0.4098927011168589, + "grad_norm": 5.355490015264795, + "learning_rate": 6.669644956973935e-06, + "loss": 17.0498, + "step": 22424 + }, + { + "epoch": 0.4099109803133054, + "grad_norm": 6.149706652710731, + "learning_rate": 6.6693659340954804e-06, + "loss": 17.5281, + "step": 22425 + }, + { + "epoch": 0.40992925950975195, + "grad_norm": 5.630134258395935, + "learning_rate": 6.669086905366043e-06, + "loss": 17.379, + "step": 22426 + }, + { + "epoch": 0.4099475387061985, + "grad_norm": 6.22113460738467, + "learning_rate": 6.668807870786601e-06, + "loss": 17.5218, + "step": 22427 + }, + { + "epoch": 0.409965817902645, + "grad_norm": 6.225699203355149, + "learning_rate": 6.668528830358134e-06, + "loss": 17.3983, + "step": 22428 + }, + { + "epoch": 0.4099840970990915, + "grad_norm": 6.5146956328387775, + "learning_rate": 6.668249784081616e-06, + "loss": 17.6844, + "step": 22429 + }, + { + "epoch": 0.41000237629553804, + "grad_norm": 7.210863668792879, + "learning_rate": 6.667970731958029e-06, + "loss": 17.667, + "step": 22430 + }, + { + "epoch": 0.4100206554919846, + "grad_norm": 7.243398059648909, + "learning_rate": 6.667691673988348e-06, + "loss": 17.4272, + "step": 22431 + }, + { + "epoch": 0.4100389346884311, + "grad_norm": 5.536042701374809, + "learning_rate": 6.667412610173552e-06, + "loss": 17.3022, + "step": 22432 + }, + { + "epoch": 0.41005721388487765, + "grad_norm": 6.944375066789045, + "learning_rate": 6.667133540514621e-06, + "loss": 17.4673, + "step": 22433 + }, + { + "epoch": 0.4100754930813241, + "grad_norm": 6.800971125614902, + "learning_rate": 6.66685446501253e-06, + "loss": 17.7474, + "step": 22434 + }, + { + "epoch": 0.41009377227777066, + "grad_norm": 5.855007406131806, + "learning_rate": 6.666575383668259e-06, + "loss": 17.2631, + "step": 22435 + }, + { + "epoch": 0.4101120514742172, + "grad_norm": 7.458053534102518, + "learning_rate": 6.666296296482787e-06, + "loss": 17.7256, + "step": 22436 + }, + { + "epoch": 0.41013033067066373, + "grad_norm": 6.04020474663933, + "learning_rate": 6.66601720345709e-06, + "loss": 17.214, + "step": 22437 + }, + { + "epoch": 0.4101486098671102, + "grad_norm": 7.315070036328204, + "learning_rate": 6.665738104592149e-06, + "loss": 17.8686, + "step": 22438 + }, + { + "epoch": 0.41016688906355675, + "grad_norm": 6.991834659225314, + "learning_rate": 6.66545899988894e-06, + "loss": 17.6729, + "step": 22439 + }, + { + "epoch": 0.4101851682600033, + "grad_norm": 6.1356000894297935, + "learning_rate": 6.665179889348438e-06, + "loss": 17.457, + "step": 22440 + }, + { + "epoch": 0.4102034474564498, + "grad_norm": 7.530821124138062, + "learning_rate": 6.66490077297163e-06, + "loss": 18.1345, + "step": 22441 + }, + { + "epoch": 0.41022172665289636, + "grad_norm": 7.91404525806891, + "learning_rate": 6.664621650759487e-06, + "loss": 18.0275, + "step": 22442 + }, + { + "epoch": 0.41024000584934284, + "grad_norm": 6.957079326907108, + "learning_rate": 6.66434252271299e-06, + "loss": 17.6925, + "step": 22443 + }, + { + "epoch": 0.4102582850457894, + "grad_norm": 6.195271870605417, + "learning_rate": 6.664063388833116e-06, + "loss": 17.381, + "step": 22444 + }, + { + "epoch": 0.4102765642422359, + "grad_norm": 6.692512343516527, + "learning_rate": 6.663784249120846e-06, + "loss": 17.422, + "step": 22445 + }, + { + "epoch": 0.41029484343868244, + "grad_norm": 4.662301471960729, + "learning_rate": 6.663505103577155e-06, + "loss": 16.8443, + "step": 22446 + }, + { + "epoch": 0.410313122635129, + "grad_norm": 6.161177716814195, + "learning_rate": 6.663225952203023e-06, + "loss": 17.5143, + "step": 22447 + }, + { + "epoch": 0.41033140183157546, + "grad_norm": 7.114737601813586, + "learning_rate": 6.66294679499943e-06, + "loss": 17.5671, + "step": 22448 + }, + { + "epoch": 0.410349681028022, + "grad_norm": 6.490208520600908, + "learning_rate": 6.662667631967351e-06, + "loss": 17.5853, + "step": 22449 + }, + { + "epoch": 0.41036796022446853, + "grad_norm": 6.788196061136019, + "learning_rate": 6.6623884631077664e-06, + "loss": 17.6763, + "step": 22450 + }, + { + "epoch": 0.41038623942091507, + "grad_norm": 7.399119123919406, + "learning_rate": 6.6621092884216555e-06, + "loss": 18.0581, + "step": 22451 + }, + { + "epoch": 0.4104045186173616, + "grad_norm": 5.616390064467137, + "learning_rate": 6.661830107909996e-06, + "loss": 17.32, + "step": 22452 + }, + { + "epoch": 0.4104227978138081, + "grad_norm": 8.262323097739829, + "learning_rate": 6.661550921573764e-06, + "loss": 18.1139, + "step": 22453 + }, + { + "epoch": 0.4104410770102546, + "grad_norm": 6.9899293270499285, + "learning_rate": 6.661271729413942e-06, + "loss": 17.7583, + "step": 22454 + }, + { + "epoch": 0.41045935620670115, + "grad_norm": 7.651297782205858, + "learning_rate": 6.660992531431507e-06, + "loss": 18.151, + "step": 22455 + }, + { + "epoch": 0.4104776354031477, + "grad_norm": 6.046370442855944, + "learning_rate": 6.660713327627437e-06, + "loss": 17.24, + "step": 22456 + }, + { + "epoch": 0.4104959145995942, + "grad_norm": 6.8742298685763, + "learning_rate": 6.66043411800271e-06, + "loss": 17.7414, + "step": 22457 + }, + { + "epoch": 0.4105141937960407, + "grad_norm": 8.311854084271925, + "learning_rate": 6.660154902558304e-06, + "loss": 18.2518, + "step": 22458 + }, + { + "epoch": 0.41053247299248724, + "grad_norm": 7.01890204473356, + "learning_rate": 6.6598756812952026e-06, + "loss": 17.697, + "step": 22459 + }, + { + "epoch": 0.4105507521889338, + "grad_norm": 6.646090362366263, + "learning_rate": 6.65959645421438e-06, + "loss": 17.7471, + "step": 22460 + }, + { + "epoch": 0.4105690313853803, + "grad_norm": 5.416482398601893, + "learning_rate": 6.659317221316815e-06, + "loss": 17.3344, + "step": 22461 + }, + { + "epoch": 0.41058731058182685, + "grad_norm": 5.019278069723802, + "learning_rate": 6.659037982603488e-06, + "loss": 16.9551, + "step": 22462 + }, + { + "epoch": 0.4106055897782733, + "grad_norm": 6.676420183258173, + "learning_rate": 6.658758738075376e-06, + "loss": 17.7794, + "step": 22463 + }, + { + "epoch": 0.41062386897471986, + "grad_norm": 6.048997414497438, + "learning_rate": 6.658479487733459e-06, + "loss": 17.308, + "step": 22464 + }, + { + "epoch": 0.4106421481711664, + "grad_norm": 7.98950980278614, + "learning_rate": 6.6582002315787155e-06, + "loss": 17.9655, + "step": 22465 + }, + { + "epoch": 0.41066042736761293, + "grad_norm": 5.991122565262289, + "learning_rate": 6.657920969612124e-06, + "loss": 17.2028, + "step": 22466 + }, + { + "epoch": 0.41067870656405947, + "grad_norm": 7.535391185851954, + "learning_rate": 6.657641701834663e-06, + "loss": 18.0067, + "step": 22467 + }, + { + "epoch": 0.41069698576050595, + "grad_norm": 7.395234063393955, + "learning_rate": 6.657362428247311e-06, + "loss": 17.7825, + "step": 22468 + }, + { + "epoch": 0.4107152649569525, + "grad_norm": 6.148243444602376, + "learning_rate": 6.65708314885105e-06, + "loss": 17.6456, + "step": 22469 + }, + { + "epoch": 0.410733544153399, + "grad_norm": 6.940659798109537, + "learning_rate": 6.656803863646855e-06, + "loss": 17.3144, + "step": 22470 + }, + { + "epoch": 0.41075182334984556, + "grad_norm": 5.984508152192915, + "learning_rate": 6.656524572635705e-06, + "loss": 17.3747, + "step": 22471 + }, + { + "epoch": 0.41077010254629204, + "grad_norm": 8.984030284838962, + "learning_rate": 6.65624527581858e-06, + "loss": 18.2754, + "step": 22472 + }, + { + "epoch": 0.4107883817427386, + "grad_norm": 6.874324901779238, + "learning_rate": 6.655965973196461e-06, + "loss": 17.8326, + "step": 22473 + }, + { + "epoch": 0.4108066609391851, + "grad_norm": 6.649152355038819, + "learning_rate": 6.655686664770324e-06, + "loss": 17.6961, + "step": 22474 + }, + { + "epoch": 0.41082494013563164, + "grad_norm": 8.136475477998427, + "learning_rate": 6.6554073505411495e-06, + "loss": 17.9943, + "step": 22475 + }, + { + "epoch": 0.4108432193320782, + "grad_norm": 6.43012584151685, + "learning_rate": 6.655128030509915e-06, + "loss": 17.5833, + "step": 22476 + }, + { + "epoch": 0.41086149852852466, + "grad_norm": 7.342883675959253, + "learning_rate": 6.654848704677601e-06, + "loss": 17.7023, + "step": 22477 + }, + { + "epoch": 0.4108797777249712, + "grad_norm": 8.305732709692458, + "learning_rate": 6.654569373045185e-06, + "loss": 18.3501, + "step": 22478 + }, + { + "epoch": 0.41089805692141773, + "grad_norm": 5.804968513432319, + "learning_rate": 6.654290035613649e-06, + "loss": 17.3626, + "step": 22479 + }, + { + "epoch": 0.41091633611786427, + "grad_norm": 6.065199581638211, + "learning_rate": 6.654010692383967e-06, + "loss": 17.1871, + "step": 22480 + }, + { + "epoch": 0.4109346153143108, + "grad_norm": 5.798473532906206, + "learning_rate": 6.653731343357123e-06, + "loss": 17.3126, + "step": 22481 + }, + { + "epoch": 0.4109528945107573, + "grad_norm": 6.415907022619494, + "learning_rate": 6.653451988534094e-06, + "loss": 17.5288, + "step": 22482 + }, + { + "epoch": 0.4109711737072038, + "grad_norm": 7.033875247971466, + "learning_rate": 6.6531726279158595e-06, + "loss": 17.7119, + "step": 22483 + }, + { + "epoch": 0.41098945290365035, + "grad_norm": 8.652061811686448, + "learning_rate": 6.652893261503398e-06, + "loss": 18.2471, + "step": 22484 + }, + { + "epoch": 0.4110077321000969, + "grad_norm": 6.00480941005815, + "learning_rate": 6.6526138892976875e-06, + "loss": 17.4825, + "step": 22485 + }, + { + "epoch": 0.4110260112965434, + "grad_norm": 6.899236062989019, + "learning_rate": 6.652334511299712e-06, + "loss": 17.4574, + "step": 22486 + }, + { + "epoch": 0.4110442904929899, + "grad_norm": 6.592482875960514, + "learning_rate": 6.652055127510445e-06, + "loss": 17.6955, + "step": 22487 + }, + { + "epoch": 0.41106256968943644, + "grad_norm": 5.799762215987938, + "learning_rate": 6.651775737930869e-06, + "loss": 17.2804, + "step": 22488 + }, + { + "epoch": 0.411080848885883, + "grad_norm": 6.8970588756919256, + "learning_rate": 6.651496342561962e-06, + "loss": 17.3843, + "step": 22489 + }, + { + "epoch": 0.4110991280823295, + "grad_norm": 6.318700315418928, + "learning_rate": 6.651216941404703e-06, + "loss": 17.7247, + "step": 22490 + }, + { + "epoch": 0.41111740727877605, + "grad_norm": 6.513451357168261, + "learning_rate": 6.650937534460074e-06, + "loss": 17.484, + "step": 22491 + }, + { + "epoch": 0.41113568647522253, + "grad_norm": 6.453364609413719, + "learning_rate": 6.65065812172905e-06, + "loss": 17.474, + "step": 22492 + }, + { + "epoch": 0.41115396567166906, + "grad_norm": 5.901676580688798, + "learning_rate": 6.650378703212614e-06, + "loss": 17.3427, + "step": 22493 + }, + { + "epoch": 0.4111722448681156, + "grad_norm": 6.035202696120195, + "learning_rate": 6.650099278911742e-06, + "loss": 17.5816, + "step": 22494 + }, + { + "epoch": 0.41119052406456214, + "grad_norm": 6.328909776131016, + "learning_rate": 6.649819848827417e-06, + "loss": 17.407, + "step": 22495 + }, + { + "epoch": 0.41120880326100867, + "grad_norm": 5.54156689266959, + "learning_rate": 6.649540412960616e-06, + "loss": 17.1902, + "step": 22496 + }, + { + "epoch": 0.41122708245745515, + "grad_norm": 6.867040320968979, + "learning_rate": 6.649260971312319e-06, + "loss": 17.8289, + "step": 22497 + }, + { + "epoch": 0.4112453616539017, + "grad_norm": 5.781974445936039, + "learning_rate": 6.648981523883506e-06, + "loss": 17.2794, + "step": 22498 + }, + { + "epoch": 0.4112636408503482, + "grad_norm": 6.866986400203878, + "learning_rate": 6.6487020706751535e-06, + "loss": 17.5703, + "step": 22499 + }, + { + "epoch": 0.41128192004679476, + "grad_norm": 6.25314561124634, + "learning_rate": 6.648422611688247e-06, + "loss": 17.3703, + "step": 22500 + }, + { + "epoch": 0.4113001992432413, + "grad_norm": 5.527027959992802, + "learning_rate": 6.64814314692376e-06, + "loss": 17.0699, + "step": 22501 + }, + { + "epoch": 0.4113184784396878, + "grad_norm": 9.991418748524842, + "learning_rate": 6.6478636763826745e-06, + "loss": 17.6793, + "step": 22502 + }, + { + "epoch": 0.4113367576361343, + "grad_norm": 6.23830929250894, + "learning_rate": 6.647584200065971e-06, + "loss": 17.2719, + "step": 22503 + }, + { + "epoch": 0.41135503683258084, + "grad_norm": 7.572286135394841, + "learning_rate": 6.647304717974626e-06, + "loss": 18.0421, + "step": 22504 + }, + { + "epoch": 0.4113733160290274, + "grad_norm": 5.261262791016685, + "learning_rate": 6.647025230109622e-06, + "loss": 17.0682, + "step": 22505 + }, + { + "epoch": 0.41139159522547386, + "grad_norm": 6.678915311011433, + "learning_rate": 6.646745736471936e-06, + "loss": 17.4591, + "step": 22506 + }, + { + "epoch": 0.4114098744219204, + "grad_norm": 5.753913399849667, + "learning_rate": 6.646466237062551e-06, + "loss": 17.1814, + "step": 22507 + }, + { + "epoch": 0.41142815361836693, + "grad_norm": 9.14217943399806, + "learning_rate": 6.646186731882444e-06, + "loss": 18.6355, + "step": 22508 + }, + { + "epoch": 0.41144643281481347, + "grad_norm": 6.604336182941888, + "learning_rate": 6.645907220932595e-06, + "loss": 17.7191, + "step": 22509 + }, + { + "epoch": 0.41146471201126, + "grad_norm": 5.813419040298178, + "learning_rate": 6.645627704213985e-06, + "loss": 17.2996, + "step": 22510 + }, + { + "epoch": 0.4114829912077065, + "grad_norm": 6.552131724035272, + "learning_rate": 6.645348181727594e-06, + "loss": 17.4184, + "step": 22511 + }, + { + "epoch": 0.411501270404153, + "grad_norm": 6.598285296345405, + "learning_rate": 6.645068653474396e-06, + "loss": 17.5236, + "step": 22512 + }, + { + "epoch": 0.41151954960059955, + "grad_norm": 5.676660695960994, + "learning_rate": 6.644789119455377e-06, + "loss": 16.8865, + "step": 22513 + }, + { + "epoch": 0.4115378287970461, + "grad_norm": 5.061233180440918, + "learning_rate": 6.644509579671517e-06, + "loss": 16.8625, + "step": 22514 + }, + { + "epoch": 0.4115561079934926, + "grad_norm": 6.514822594679821, + "learning_rate": 6.644230034123792e-06, + "loss": 17.4267, + "step": 22515 + }, + { + "epoch": 0.4115743871899391, + "grad_norm": 4.932665565234066, + "learning_rate": 6.643950482813184e-06, + "loss": 17.12, + "step": 22516 + }, + { + "epoch": 0.41159266638638564, + "grad_norm": 6.643103736082339, + "learning_rate": 6.643670925740672e-06, + "loss": 17.9247, + "step": 22517 + }, + { + "epoch": 0.4116109455828322, + "grad_norm": 6.605073545749215, + "learning_rate": 6.643391362907235e-06, + "loss": 17.5981, + "step": 22518 + }, + { + "epoch": 0.4116292247792787, + "grad_norm": 7.392426403764373, + "learning_rate": 6.643111794313855e-06, + "loss": 17.9218, + "step": 22519 + }, + { + "epoch": 0.41164750397572525, + "grad_norm": 6.84728674931774, + "learning_rate": 6.6428322199615106e-06, + "loss": 17.656, + "step": 22520 + }, + { + "epoch": 0.41166578317217173, + "grad_norm": 5.647548514640339, + "learning_rate": 6.64255263985118e-06, + "loss": 17.2701, + "step": 22521 + }, + { + "epoch": 0.41168406236861826, + "grad_norm": 5.552168435205575, + "learning_rate": 6.642273053983848e-06, + "loss": 17.0794, + "step": 22522 + }, + { + "epoch": 0.4117023415650648, + "grad_norm": 6.78696796718912, + "learning_rate": 6.64199346236049e-06, + "loss": 17.6392, + "step": 22523 + }, + { + "epoch": 0.41172062076151134, + "grad_norm": 7.092229779419543, + "learning_rate": 6.641713864982088e-06, + "loss": 17.7869, + "step": 22524 + }, + { + "epoch": 0.41173889995795787, + "grad_norm": 5.618666480432428, + "learning_rate": 6.641434261849621e-06, + "loss": 17.4101, + "step": 22525 + }, + { + "epoch": 0.41175717915440435, + "grad_norm": 6.476638868176701, + "learning_rate": 6.641154652964068e-06, + "loss": 17.6316, + "step": 22526 + }, + { + "epoch": 0.4117754583508509, + "grad_norm": 7.067195271851774, + "learning_rate": 6.640875038326411e-06, + "loss": 18.0322, + "step": 22527 + }, + { + "epoch": 0.4117937375472974, + "grad_norm": 6.82609681223036, + "learning_rate": 6.640595417937631e-06, + "loss": 17.7255, + "step": 22528 + }, + { + "epoch": 0.41181201674374396, + "grad_norm": 7.708298233999794, + "learning_rate": 6.640315791798705e-06, + "loss": 17.8175, + "step": 22529 + }, + { + "epoch": 0.4118302959401905, + "grad_norm": 5.429983974169377, + "learning_rate": 6.640036159910614e-06, + "loss": 17.2022, + "step": 22530 + }, + { + "epoch": 0.411848575136637, + "grad_norm": 6.477489049801571, + "learning_rate": 6.639756522274341e-06, + "loss": 17.4828, + "step": 22531 + }, + { + "epoch": 0.4118668543330835, + "grad_norm": 7.084888439645354, + "learning_rate": 6.639476878890862e-06, + "loss": 17.7816, + "step": 22532 + }, + { + "epoch": 0.41188513352953005, + "grad_norm": 6.6279879840863645, + "learning_rate": 6.639197229761158e-06, + "loss": 17.7472, + "step": 22533 + }, + { + "epoch": 0.4119034127259766, + "grad_norm": 6.007976655677513, + "learning_rate": 6.638917574886211e-06, + "loss": 17.5309, + "step": 22534 + }, + { + "epoch": 0.4119216919224231, + "grad_norm": 6.343015233689347, + "learning_rate": 6.6386379142669996e-06, + "loss": 17.784, + "step": 22535 + }, + { + "epoch": 0.4119399711188696, + "grad_norm": 6.820796339191304, + "learning_rate": 6.638358247904505e-06, + "loss": 17.9703, + "step": 22536 + }, + { + "epoch": 0.41195825031531613, + "grad_norm": 5.85890992128528, + "learning_rate": 6.638078575799707e-06, + "loss": 17.3764, + "step": 22537 + }, + { + "epoch": 0.41197652951176267, + "grad_norm": 7.074606037217186, + "learning_rate": 6.637798897953585e-06, + "loss": 17.8454, + "step": 22538 + }, + { + "epoch": 0.4119948087082092, + "grad_norm": 5.784132235908096, + "learning_rate": 6.637519214367121e-06, + "loss": 17.1954, + "step": 22539 + }, + { + "epoch": 0.4120130879046557, + "grad_norm": 6.025932167870897, + "learning_rate": 6.637239525041293e-06, + "loss": 17.3466, + "step": 22540 + }, + { + "epoch": 0.4120313671011022, + "grad_norm": 6.169035862579544, + "learning_rate": 6.636959829977083e-06, + "loss": 17.4327, + "step": 22541 + }, + { + "epoch": 0.41204964629754876, + "grad_norm": 6.650060216978216, + "learning_rate": 6.636680129175472e-06, + "loss": 17.3783, + "step": 22542 + }, + { + "epoch": 0.4120679254939953, + "grad_norm": 5.154164921642721, + "learning_rate": 6.636400422637439e-06, + "loss": 16.9665, + "step": 22543 + }, + { + "epoch": 0.4120862046904418, + "grad_norm": 7.4621241910548815, + "learning_rate": 6.636120710363964e-06, + "loss": 18.1499, + "step": 22544 + }, + { + "epoch": 0.4121044838868883, + "grad_norm": 7.432794839233043, + "learning_rate": 6.635840992356026e-06, + "loss": 17.9092, + "step": 22545 + }, + { + "epoch": 0.41212276308333484, + "grad_norm": 7.170814548989489, + "learning_rate": 6.63556126861461e-06, + "loss": 17.5366, + "step": 22546 + }, + { + "epoch": 0.4121410422797814, + "grad_norm": 5.518909409711679, + "learning_rate": 6.635281539140692e-06, + "loss": 17.0978, + "step": 22547 + }, + { + "epoch": 0.4121593214762279, + "grad_norm": 6.302062730524471, + "learning_rate": 6.635001803935255e-06, + "loss": 17.6605, + "step": 22548 + }, + { + "epoch": 0.41217760067267445, + "grad_norm": 7.471207997707816, + "learning_rate": 6.634722062999278e-06, + "loss": 17.9399, + "step": 22549 + }, + { + "epoch": 0.41219587986912093, + "grad_norm": 5.324470003932659, + "learning_rate": 6.634442316333742e-06, + "loss": 17.2086, + "step": 22550 + }, + { + "epoch": 0.41221415906556746, + "grad_norm": 5.9771071974674905, + "learning_rate": 6.634162563939628e-06, + "loss": 17.3712, + "step": 22551 + }, + { + "epoch": 0.412232438262014, + "grad_norm": 6.7774724499917225, + "learning_rate": 6.633882805817917e-06, + "loss": 17.7359, + "step": 22552 + }, + { + "epoch": 0.41225071745846054, + "grad_norm": 7.551931887991059, + "learning_rate": 6.6336030419695866e-06, + "loss": 17.8439, + "step": 22553 + }, + { + "epoch": 0.41226899665490707, + "grad_norm": 5.753257938800097, + "learning_rate": 6.63332327239562e-06, + "loss": 17.4086, + "step": 22554 + }, + { + "epoch": 0.41228727585135355, + "grad_norm": 5.866038870768499, + "learning_rate": 6.633043497096998e-06, + "loss": 17.457, + "step": 22555 + }, + { + "epoch": 0.4123055550478001, + "grad_norm": 6.0536502638895, + "learning_rate": 6.632763716074699e-06, + "loss": 17.304, + "step": 22556 + }, + { + "epoch": 0.4123238342442466, + "grad_norm": 7.563518144366697, + "learning_rate": 6.632483929329705e-06, + "loss": 17.8347, + "step": 22557 + }, + { + "epoch": 0.41234211344069316, + "grad_norm": 8.167575832029081, + "learning_rate": 6.6322041368629965e-06, + "loss": 18.2212, + "step": 22558 + }, + { + "epoch": 0.4123603926371397, + "grad_norm": 6.886400060823418, + "learning_rate": 6.631924338675555e-06, + "loss": 18.0066, + "step": 22559 + }, + { + "epoch": 0.4123786718335862, + "grad_norm": 5.423171002706399, + "learning_rate": 6.63164453476836e-06, + "loss": 17.2012, + "step": 22560 + }, + { + "epoch": 0.4123969510300327, + "grad_norm": 6.1445295548696315, + "learning_rate": 6.631364725142392e-06, + "loss": 17.5894, + "step": 22561 + }, + { + "epoch": 0.41241523022647925, + "grad_norm": 7.212889865114428, + "learning_rate": 6.631084909798632e-06, + "loss": 17.8984, + "step": 22562 + }, + { + "epoch": 0.4124335094229258, + "grad_norm": 9.16653149833562, + "learning_rate": 6.630805088738061e-06, + "loss": 17.6354, + "step": 22563 + }, + { + "epoch": 0.4124517886193723, + "grad_norm": 6.325290511361483, + "learning_rate": 6.63052526196166e-06, + "loss": 17.5876, + "step": 22564 + }, + { + "epoch": 0.4124700678158188, + "grad_norm": 5.763074934803377, + "learning_rate": 6.63024542947041e-06, + "loss": 17.3363, + "step": 22565 + }, + { + "epoch": 0.41248834701226533, + "grad_norm": 5.958746031648419, + "learning_rate": 6.629965591265292e-06, + "loss": 17.223, + "step": 22566 + }, + { + "epoch": 0.41250662620871187, + "grad_norm": 6.515658349181106, + "learning_rate": 6.629685747347283e-06, + "loss": 17.6717, + "step": 22567 + }, + { + "epoch": 0.4125249054051584, + "grad_norm": 4.946023024333121, + "learning_rate": 6.629405897717368e-06, + "loss": 16.9446, + "step": 22568 + }, + { + "epoch": 0.41254318460160494, + "grad_norm": 5.283187874761963, + "learning_rate": 6.629126042376528e-06, + "loss": 17.1899, + "step": 22569 + }, + { + "epoch": 0.4125614637980514, + "grad_norm": 7.248605819013886, + "learning_rate": 6.628846181325742e-06, + "loss": 17.7481, + "step": 22570 + }, + { + "epoch": 0.41257974299449796, + "grad_norm": 7.405447399579446, + "learning_rate": 6.628566314565992e-06, + "loss": 17.8948, + "step": 22571 + }, + { + "epoch": 0.4125980221909445, + "grad_norm": 6.6967074034598175, + "learning_rate": 6.628286442098256e-06, + "loss": 17.7341, + "step": 22572 + }, + { + "epoch": 0.412616301387391, + "grad_norm": 5.734497313651601, + "learning_rate": 6.62800656392352e-06, + "loss": 17.2537, + "step": 22573 + }, + { + "epoch": 0.4126345805838375, + "grad_norm": 7.758596269655583, + "learning_rate": 6.627726680042762e-06, + "loss": 17.9411, + "step": 22574 + }, + { + "epoch": 0.41265285978028404, + "grad_norm": 5.201702593488365, + "learning_rate": 6.6274467904569615e-06, + "loss": 16.9402, + "step": 22575 + }, + { + "epoch": 0.4126711389767306, + "grad_norm": 5.9675413553645065, + "learning_rate": 6.627166895167103e-06, + "loss": 17.3452, + "step": 22576 + }, + { + "epoch": 0.4126894181731771, + "grad_norm": 7.291423129238422, + "learning_rate": 6.626886994174165e-06, + "loss": 17.779, + "step": 22577 + }, + { + "epoch": 0.41270769736962365, + "grad_norm": 5.968892468616462, + "learning_rate": 6.626607087479129e-06, + "loss": 17.5132, + "step": 22578 + }, + { + "epoch": 0.41272597656607013, + "grad_norm": 7.170439939026732, + "learning_rate": 6.6263271750829775e-06, + "loss": 17.7694, + "step": 22579 + }, + { + "epoch": 0.41274425576251667, + "grad_norm": 7.318670095737872, + "learning_rate": 6.626047256986688e-06, + "loss": 17.9435, + "step": 22580 + }, + { + "epoch": 0.4127625349589632, + "grad_norm": 6.036756291847952, + "learning_rate": 6.625767333191247e-06, + "loss": 17.398, + "step": 22581 + }, + { + "epoch": 0.41278081415540974, + "grad_norm": 6.053125609980945, + "learning_rate": 6.62548740369763e-06, + "loss": 17.3572, + "step": 22582 + }, + { + "epoch": 0.41279909335185627, + "grad_norm": 6.199717892438728, + "learning_rate": 6.625207468506822e-06, + "loss": 17.5492, + "step": 22583 + }, + { + "epoch": 0.41281737254830275, + "grad_norm": 6.698844190195537, + "learning_rate": 6.624927527619803e-06, + "loss": 17.5677, + "step": 22584 + }, + { + "epoch": 0.4128356517447493, + "grad_norm": 5.988876906425944, + "learning_rate": 6.624647581037553e-06, + "loss": 17.344, + "step": 22585 + }, + { + "epoch": 0.4128539309411958, + "grad_norm": 6.045429597912229, + "learning_rate": 6.624367628761056e-06, + "loss": 17.4643, + "step": 22586 + }, + { + "epoch": 0.41287221013764236, + "grad_norm": 6.42182730756578, + "learning_rate": 6.624087670791291e-06, + "loss": 17.6234, + "step": 22587 + }, + { + "epoch": 0.4128904893340889, + "grad_norm": 6.986066200367678, + "learning_rate": 6.623807707129237e-06, + "loss": 17.4393, + "step": 22588 + }, + { + "epoch": 0.4129087685305354, + "grad_norm": 5.657003922026771, + "learning_rate": 6.623527737775881e-06, + "loss": 17.2285, + "step": 22589 + }, + { + "epoch": 0.4129270477269819, + "grad_norm": 6.25354650988634, + "learning_rate": 6.623247762732199e-06, + "loss": 17.6109, + "step": 22590 + }, + { + "epoch": 0.41294532692342845, + "grad_norm": 6.1299618904758315, + "learning_rate": 6.622967781999175e-06, + "loss": 17.2798, + "step": 22591 + }, + { + "epoch": 0.412963606119875, + "grad_norm": 6.8638793367227775, + "learning_rate": 6.622687795577792e-06, + "loss": 17.8593, + "step": 22592 + }, + { + "epoch": 0.4129818853163215, + "grad_norm": 6.796283468696494, + "learning_rate": 6.622407803469027e-06, + "loss": 17.9025, + "step": 22593 + }, + { + "epoch": 0.413000164512768, + "grad_norm": 7.761046709983375, + "learning_rate": 6.622127805673863e-06, + "loss": 17.8826, + "step": 22594 + }, + { + "epoch": 0.41301844370921453, + "grad_norm": 5.004190494024626, + "learning_rate": 6.621847802193282e-06, + "loss": 17.0119, + "step": 22595 + }, + { + "epoch": 0.41303672290566107, + "grad_norm": 7.200378132739967, + "learning_rate": 6.621567793028265e-06, + "loss": 17.9817, + "step": 22596 + }, + { + "epoch": 0.4130550021021076, + "grad_norm": 5.330772260455991, + "learning_rate": 6.621287778179795e-06, + "loss": 17.1643, + "step": 22597 + }, + { + "epoch": 0.41307328129855414, + "grad_norm": 6.535466779561626, + "learning_rate": 6.621007757648852e-06, + "loss": 17.729, + "step": 22598 + }, + { + "epoch": 0.4130915604950006, + "grad_norm": 5.811170757742771, + "learning_rate": 6.620727731436416e-06, + "loss": 17.0422, + "step": 22599 + }, + { + "epoch": 0.41310983969144716, + "grad_norm": 6.824860401716977, + "learning_rate": 6.62044769954347e-06, + "loss": 17.7706, + "step": 22600 + }, + { + "epoch": 0.4131281188878937, + "grad_norm": 5.996010384567158, + "learning_rate": 6.620167661970998e-06, + "loss": 17.5312, + "step": 22601 + }, + { + "epoch": 0.4131463980843402, + "grad_norm": 7.0419128847561, + "learning_rate": 6.619887618719977e-06, + "loss": 17.9146, + "step": 22602 + }, + { + "epoch": 0.41316467728078676, + "grad_norm": 6.673918239797463, + "learning_rate": 6.61960756979139e-06, + "loss": 17.6691, + "step": 22603 + }, + { + "epoch": 0.41318295647723324, + "grad_norm": 5.993621475464369, + "learning_rate": 6.619327515186219e-06, + "loss": 17.352, + "step": 22604 + }, + { + "epoch": 0.4132012356736798, + "grad_norm": 6.092149993554768, + "learning_rate": 6.619047454905446e-06, + "loss": 17.5204, + "step": 22605 + }, + { + "epoch": 0.4132195148701263, + "grad_norm": 6.303515551780367, + "learning_rate": 6.618767388950052e-06, + "loss": 17.4342, + "step": 22606 + }, + { + "epoch": 0.41323779406657285, + "grad_norm": 7.357594247325871, + "learning_rate": 6.6184873173210194e-06, + "loss": 17.9901, + "step": 22607 + }, + { + "epoch": 0.41325607326301933, + "grad_norm": 8.321893973483117, + "learning_rate": 6.61820724001933e-06, + "loss": 18.4359, + "step": 22608 + }, + { + "epoch": 0.41327435245946587, + "grad_norm": 5.0807070169529585, + "learning_rate": 6.6179271570459625e-06, + "loss": 17.0118, + "step": 22609 + }, + { + "epoch": 0.4132926316559124, + "grad_norm": 7.203675890097184, + "learning_rate": 6.617647068401902e-06, + "loss": 18.179, + "step": 22610 + }, + { + "epoch": 0.41331091085235894, + "grad_norm": 5.735684038570406, + "learning_rate": 6.61736697408813e-06, + "loss": 17.46, + "step": 22611 + }, + { + "epoch": 0.4133291900488055, + "grad_norm": 6.064406790187848, + "learning_rate": 6.6170868741056235e-06, + "loss": 17.3494, + "step": 22612 + }, + { + "epoch": 0.41334746924525195, + "grad_norm": 6.664329183395076, + "learning_rate": 6.616806768455371e-06, + "loss": 17.6179, + "step": 22613 + }, + { + "epoch": 0.4133657484416985, + "grad_norm": 6.3572287918374055, + "learning_rate": 6.61652665713835e-06, + "loss": 17.4921, + "step": 22614 + }, + { + "epoch": 0.413384027638145, + "grad_norm": 6.907970743755561, + "learning_rate": 6.616246540155544e-06, + "loss": 17.6579, + "step": 22615 + }, + { + "epoch": 0.41340230683459156, + "grad_norm": 6.736745296231206, + "learning_rate": 6.615966417507933e-06, + "loss": 17.8245, + "step": 22616 + }, + { + "epoch": 0.4134205860310381, + "grad_norm": 6.102058842223797, + "learning_rate": 6.615686289196501e-06, + "loss": 17.5284, + "step": 22617 + }, + { + "epoch": 0.4134388652274846, + "grad_norm": 6.820874290130761, + "learning_rate": 6.615406155222228e-06, + "loss": 17.5447, + "step": 22618 + }, + { + "epoch": 0.4134571444239311, + "grad_norm": 6.451908386244689, + "learning_rate": 6.615126015586097e-06, + "loss": 17.7669, + "step": 22619 + }, + { + "epoch": 0.41347542362037765, + "grad_norm": 8.368993765242212, + "learning_rate": 6.614845870289089e-06, + "loss": 18.1534, + "step": 22620 + }, + { + "epoch": 0.4134937028168242, + "grad_norm": 6.6094965163326735, + "learning_rate": 6.614565719332187e-06, + "loss": 17.5565, + "step": 22621 + }, + { + "epoch": 0.4135119820132707, + "grad_norm": 8.729927958239205, + "learning_rate": 6.614285562716372e-06, + "loss": 18.5272, + "step": 22622 + }, + { + "epoch": 0.4135302612097172, + "grad_norm": 7.685832120660212, + "learning_rate": 6.614005400442625e-06, + "loss": 17.6718, + "step": 22623 + }, + { + "epoch": 0.41354854040616373, + "grad_norm": 5.114941706725837, + "learning_rate": 6.613725232511931e-06, + "loss": 17.1031, + "step": 22624 + }, + { + "epoch": 0.41356681960261027, + "grad_norm": 7.073382172132256, + "learning_rate": 6.613445058925271e-06, + "loss": 17.626, + "step": 22625 + }, + { + "epoch": 0.4135850987990568, + "grad_norm": 6.521550957369006, + "learning_rate": 6.613164879683622e-06, + "loss": 17.5834, + "step": 22626 + }, + { + "epoch": 0.41360337799550334, + "grad_norm": 5.709561474608981, + "learning_rate": 6.612884694787973e-06, + "loss": 17.4551, + "step": 22627 + }, + { + "epoch": 0.4136216571919498, + "grad_norm": 6.014032259728497, + "learning_rate": 6.612604504239304e-06, + "loss": 17.3407, + "step": 22628 + }, + { + "epoch": 0.41363993638839636, + "grad_norm": 6.536297970602517, + "learning_rate": 6.612324308038595e-06, + "loss": 17.4338, + "step": 22629 + }, + { + "epoch": 0.4136582155848429, + "grad_norm": 6.439188938365965, + "learning_rate": 6.612044106186829e-06, + "loss": 17.4706, + "step": 22630 + }, + { + "epoch": 0.4136764947812894, + "grad_norm": 6.860891132068323, + "learning_rate": 6.611763898684989e-06, + "loss": 17.84, + "step": 22631 + }, + { + "epoch": 0.41369477397773596, + "grad_norm": 6.780814567751688, + "learning_rate": 6.611483685534054e-06, + "loss": 17.4697, + "step": 22632 + }, + { + "epoch": 0.41371305317418244, + "grad_norm": 7.741936908099513, + "learning_rate": 6.61120346673501e-06, + "loss": 18.2421, + "step": 22633 + }, + { + "epoch": 0.413731332370629, + "grad_norm": 8.460448600785817, + "learning_rate": 6.610923242288838e-06, + "loss": 18.2521, + "step": 22634 + }, + { + "epoch": 0.4137496115670755, + "grad_norm": 9.804278378568787, + "learning_rate": 6.6106430121965206e-06, + "loss": 18.8881, + "step": 22635 + }, + { + "epoch": 0.41376789076352205, + "grad_norm": 6.469584170545846, + "learning_rate": 6.610362776459038e-06, + "loss": 17.5042, + "step": 22636 + }, + { + "epoch": 0.4137861699599686, + "grad_norm": 6.491743775339879, + "learning_rate": 6.610082535077373e-06, + "loss": 17.6091, + "step": 22637 + }, + { + "epoch": 0.41380444915641507, + "grad_norm": 6.723904337174401, + "learning_rate": 6.6098022880525114e-06, + "loss": 17.8345, + "step": 22638 + }, + { + "epoch": 0.4138227283528616, + "grad_norm": 6.348440816127864, + "learning_rate": 6.609522035385429e-06, + "loss": 17.4658, + "step": 22639 + }, + { + "epoch": 0.41384100754930814, + "grad_norm": 6.6049482488077915, + "learning_rate": 6.6092417770771135e-06, + "loss": 17.7139, + "step": 22640 + }, + { + "epoch": 0.4138592867457547, + "grad_norm": 7.664834529279627, + "learning_rate": 6.608961513128544e-06, + "loss": 18.1336, + "step": 22641 + }, + { + "epoch": 0.41387756594220115, + "grad_norm": 8.67237503839013, + "learning_rate": 6.608681243540706e-06, + "loss": 18.5005, + "step": 22642 + }, + { + "epoch": 0.4138958451386477, + "grad_norm": 6.90125030437421, + "learning_rate": 6.608400968314578e-06, + "loss": 17.5234, + "step": 22643 + }, + { + "epoch": 0.4139141243350942, + "grad_norm": 6.710162254357177, + "learning_rate": 6.608120687451144e-06, + "loss": 17.7885, + "step": 22644 + }, + { + "epoch": 0.41393240353154076, + "grad_norm": 7.94811464687793, + "learning_rate": 6.607840400951387e-06, + "loss": 18.3258, + "step": 22645 + }, + { + "epoch": 0.4139506827279873, + "grad_norm": 6.864843794923079, + "learning_rate": 6.60756010881629e-06, + "loss": 17.9661, + "step": 22646 + }, + { + "epoch": 0.4139689619244338, + "grad_norm": 5.348178509871798, + "learning_rate": 6.607279811046834e-06, + "loss": 17.0711, + "step": 22647 + }, + { + "epoch": 0.4139872411208803, + "grad_norm": 5.843528393840664, + "learning_rate": 6.6069995076440004e-06, + "loss": 17.3442, + "step": 22648 + }, + { + "epoch": 0.41400552031732685, + "grad_norm": 7.0493975084425164, + "learning_rate": 6.606719198608775e-06, + "loss": 17.751, + "step": 22649 + }, + { + "epoch": 0.4140237995137734, + "grad_norm": 6.850352787803978, + "learning_rate": 6.606438883942136e-06, + "loss": 17.8465, + "step": 22650 + }, + { + "epoch": 0.4140420787102199, + "grad_norm": 6.794192267071566, + "learning_rate": 6.606158563645069e-06, + "loss": 18.0543, + "step": 22651 + }, + { + "epoch": 0.4140603579066664, + "grad_norm": 5.6260671342242246, + "learning_rate": 6.605878237718557e-06, + "loss": 17.2493, + "step": 22652 + }, + { + "epoch": 0.41407863710311293, + "grad_norm": 5.133847299703579, + "learning_rate": 6.605597906163579e-06, + "loss": 17.0291, + "step": 22653 + }, + { + "epoch": 0.41409691629955947, + "grad_norm": 6.321677314254794, + "learning_rate": 6.60531756898112e-06, + "loss": 17.601, + "step": 22654 + }, + { + "epoch": 0.414115195496006, + "grad_norm": 7.713653231749315, + "learning_rate": 6.605037226172164e-06, + "loss": 18.0653, + "step": 22655 + }, + { + "epoch": 0.41413347469245254, + "grad_norm": 7.827450559057984, + "learning_rate": 6.60475687773769e-06, + "loss": 18.1411, + "step": 22656 + }, + { + "epoch": 0.414151753888899, + "grad_norm": 7.096833323457897, + "learning_rate": 6.604476523678682e-06, + "loss": 17.9774, + "step": 22657 + }, + { + "epoch": 0.41417003308534556, + "grad_norm": 8.250085366544415, + "learning_rate": 6.604196163996124e-06, + "loss": 18.4161, + "step": 22658 + }, + { + "epoch": 0.4141883122817921, + "grad_norm": 6.781771733149326, + "learning_rate": 6.603915798690999e-06, + "loss": 17.7022, + "step": 22659 + }, + { + "epoch": 0.41420659147823863, + "grad_norm": 6.757840342237743, + "learning_rate": 6.603635427764286e-06, + "loss": 17.8375, + "step": 22660 + }, + { + "epoch": 0.41422487067468516, + "grad_norm": 6.5419938792567, + "learning_rate": 6.603355051216971e-06, + "loss": 17.7905, + "step": 22661 + }, + { + "epoch": 0.41424314987113164, + "grad_norm": 5.537596873236283, + "learning_rate": 6.603074669050036e-06, + "loss": 17.2593, + "step": 22662 + }, + { + "epoch": 0.4142614290675782, + "grad_norm": 7.3491561295609324, + "learning_rate": 6.602794281264462e-06, + "loss": 17.7703, + "step": 22663 + }, + { + "epoch": 0.4142797082640247, + "grad_norm": 6.3830916393239745, + "learning_rate": 6.602513887861235e-06, + "loss": 17.7261, + "step": 22664 + }, + { + "epoch": 0.41429798746047125, + "grad_norm": 4.750540918325534, + "learning_rate": 6.6022334888413345e-06, + "loss": 16.8368, + "step": 22665 + }, + { + "epoch": 0.4143162666569178, + "grad_norm": 6.487918736243572, + "learning_rate": 6.601953084205745e-06, + "loss": 17.3947, + "step": 22666 + }, + { + "epoch": 0.41433454585336427, + "grad_norm": 8.488969122297817, + "learning_rate": 6.601672673955449e-06, + "loss": 17.4989, + "step": 22667 + }, + { + "epoch": 0.4143528250498108, + "grad_norm": 5.959678194243318, + "learning_rate": 6.601392258091429e-06, + "loss": 17.2292, + "step": 22668 + }, + { + "epoch": 0.41437110424625734, + "grad_norm": 6.752733173416814, + "learning_rate": 6.60111183661467e-06, + "loss": 17.5716, + "step": 22669 + }, + { + "epoch": 0.4143893834427039, + "grad_norm": 7.461818859216035, + "learning_rate": 6.600831409526152e-06, + "loss": 18.3179, + "step": 22670 + }, + { + "epoch": 0.4144076626391504, + "grad_norm": 6.52799862030602, + "learning_rate": 6.6005509768268575e-06, + "loss": 17.829, + "step": 22671 + }, + { + "epoch": 0.4144259418355969, + "grad_norm": 8.700369849679246, + "learning_rate": 6.60027053851777e-06, + "loss": 18.6796, + "step": 22672 + }, + { + "epoch": 0.4144442210320434, + "grad_norm": 6.722078435566233, + "learning_rate": 6.599990094599875e-06, + "loss": 17.6187, + "step": 22673 + }, + { + "epoch": 0.41446250022848996, + "grad_norm": 8.761588404165707, + "learning_rate": 6.599709645074154e-06, + "loss": 18.3351, + "step": 22674 + }, + { + "epoch": 0.4144807794249365, + "grad_norm": 5.366673880496607, + "learning_rate": 6.599429189941589e-06, + "loss": 16.8164, + "step": 22675 + }, + { + "epoch": 0.414499058621383, + "grad_norm": 5.554265745940292, + "learning_rate": 6.599148729203162e-06, + "loss": 17.0912, + "step": 22676 + }, + { + "epoch": 0.4145173378178295, + "grad_norm": 8.969641571116686, + "learning_rate": 6.598868262859859e-06, + "loss": 18.5551, + "step": 22677 + }, + { + "epoch": 0.41453561701427605, + "grad_norm": 5.718663842826723, + "learning_rate": 6.598587790912661e-06, + "loss": 17.3056, + "step": 22678 + }, + { + "epoch": 0.4145538962107226, + "grad_norm": 5.6854851969374405, + "learning_rate": 6.598307313362552e-06, + "loss": 17.4263, + "step": 22679 + }, + { + "epoch": 0.4145721754071691, + "grad_norm": 7.126189282650898, + "learning_rate": 6.598026830210513e-06, + "loss": 17.6173, + "step": 22680 + }, + { + "epoch": 0.4145904546036156, + "grad_norm": 6.867647480876624, + "learning_rate": 6.597746341457531e-06, + "loss": 17.8448, + "step": 22681 + }, + { + "epoch": 0.41460873380006213, + "grad_norm": 7.245180556532616, + "learning_rate": 6.597465847104585e-06, + "loss": 17.9264, + "step": 22682 + }, + { + "epoch": 0.41462701299650867, + "grad_norm": 5.698173578017259, + "learning_rate": 6.597185347152661e-06, + "loss": 17.2355, + "step": 22683 + }, + { + "epoch": 0.4146452921929552, + "grad_norm": 7.678229885503245, + "learning_rate": 6.596904841602741e-06, + "loss": 18.1914, + "step": 22684 + }, + { + "epoch": 0.41466357138940174, + "grad_norm": 6.946669557539084, + "learning_rate": 6.596624330455805e-06, + "loss": 17.7522, + "step": 22685 + }, + { + "epoch": 0.4146818505858482, + "grad_norm": 8.112722763877004, + "learning_rate": 6.596343813712843e-06, + "loss": 18.1864, + "step": 22686 + }, + { + "epoch": 0.41470012978229476, + "grad_norm": 7.077645673920443, + "learning_rate": 6.5960632913748334e-06, + "loss": 17.761, + "step": 22687 + }, + { + "epoch": 0.4147184089787413, + "grad_norm": 5.948136834264524, + "learning_rate": 6.595782763442759e-06, + "loss": 17.4198, + "step": 22688 + }, + { + "epoch": 0.41473668817518783, + "grad_norm": 5.479383944276968, + "learning_rate": 6.595502229917608e-06, + "loss": 17.3015, + "step": 22689 + }, + { + "epoch": 0.41475496737163436, + "grad_norm": 5.630273377241858, + "learning_rate": 6.595221690800356e-06, + "loss": 17.2719, + "step": 22690 + }, + { + "epoch": 0.41477324656808084, + "grad_norm": 7.345218275703665, + "learning_rate": 6.594941146091993e-06, + "loss": 18.4604, + "step": 22691 + }, + { + "epoch": 0.4147915257645274, + "grad_norm": 5.511659026344551, + "learning_rate": 6.594660595793498e-06, + "loss": 17.2525, + "step": 22692 + }, + { + "epoch": 0.4148098049609739, + "grad_norm": 5.954239280317081, + "learning_rate": 6.5943800399058586e-06, + "loss": 17.4721, + "step": 22693 + }, + { + "epoch": 0.41482808415742045, + "grad_norm": 8.52838192229829, + "learning_rate": 6.594099478430052e-06, + "loss": 18.3876, + "step": 22694 + }, + { + "epoch": 0.414846363353867, + "grad_norm": 5.610287645105502, + "learning_rate": 6.593818911367067e-06, + "loss": 17.1563, + "step": 22695 + }, + { + "epoch": 0.41486464255031347, + "grad_norm": 5.801369980537789, + "learning_rate": 6.593538338717885e-06, + "loss": 17.0791, + "step": 22696 + }, + { + "epoch": 0.41488292174676, + "grad_norm": 5.399986477203266, + "learning_rate": 6.59325776048349e-06, + "loss": 17.1426, + "step": 22697 + }, + { + "epoch": 0.41490120094320654, + "grad_norm": 5.841316509387635, + "learning_rate": 6.5929771766648646e-06, + "loss": 17.4869, + "step": 22698 + }, + { + "epoch": 0.4149194801396531, + "grad_norm": 7.118676761819252, + "learning_rate": 6.59269658726299e-06, + "loss": 18.0046, + "step": 22699 + }, + { + "epoch": 0.4149377593360996, + "grad_norm": 6.532402113897623, + "learning_rate": 6.592415992278855e-06, + "loss": 17.5097, + "step": 22700 + }, + { + "epoch": 0.4149560385325461, + "grad_norm": 5.9490599045365, + "learning_rate": 6.59213539171344e-06, + "loss": 17.435, + "step": 22701 + }, + { + "epoch": 0.4149743177289926, + "grad_norm": 6.041935462363598, + "learning_rate": 6.591854785567727e-06, + "loss": 17.2972, + "step": 22702 + }, + { + "epoch": 0.41499259692543916, + "grad_norm": 7.19247230647167, + "learning_rate": 6.591574173842702e-06, + "loss": 17.942, + "step": 22703 + }, + { + "epoch": 0.4150108761218857, + "grad_norm": 6.734515529317652, + "learning_rate": 6.591293556539348e-06, + "loss": 17.7786, + "step": 22704 + }, + { + "epoch": 0.41502915531833223, + "grad_norm": 6.395805860776399, + "learning_rate": 6.591012933658647e-06, + "loss": 17.2242, + "step": 22705 + }, + { + "epoch": 0.4150474345147787, + "grad_norm": 7.439591270775899, + "learning_rate": 6.5907323052015846e-06, + "loss": 18.0942, + "step": 22706 + }, + { + "epoch": 0.41506571371122525, + "grad_norm": 7.104558951426564, + "learning_rate": 6.590451671169143e-06, + "loss": 18.0514, + "step": 22707 + }, + { + "epoch": 0.4150839929076718, + "grad_norm": 6.619138140788436, + "learning_rate": 6.590171031562307e-06, + "loss": 17.6459, + "step": 22708 + }, + { + "epoch": 0.4151022721041183, + "grad_norm": 7.68805210518015, + "learning_rate": 6.589890386382058e-06, + "loss": 18.1722, + "step": 22709 + }, + { + "epoch": 0.4151205513005648, + "grad_norm": 6.3781541007056, + "learning_rate": 6.589609735629383e-06, + "loss": 17.4187, + "step": 22710 + }, + { + "epoch": 0.41513883049701134, + "grad_norm": 6.485791575507058, + "learning_rate": 6.589329079305265e-06, + "loss": 17.3851, + "step": 22711 + }, + { + "epoch": 0.41515710969345787, + "grad_norm": 7.265329608831831, + "learning_rate": 6.589048417410683e-06, + "loss": 18.2836, + "step": 22712 + }, + { + "epoch": 0.4151753888899044, + "grad_norm": 6.106993705205944, + "learning_rate": 6.5887677499466255e-06, + "loss": 17.276, + "step": 22713 + }, + { + "epoch": 0.41519366808635094, + "grad_norm": 8.007759577422881, + "learning_rate": 6.588487076914076e-06, + "loss": 17.994, + "step": 22714 + }, + { + "epoch": 0.4152119472827974, + "grad_norm": 6.874602017566855, + "learning_rate": 6.588206398314017e-06, + "loss": 17.7814, + "step": 22715 + }, + { + "epoch": 0.41523022647924396, + "grad_norm": 7.192935282340736, + "learning_rate": 6.58792571414743e-06, + "loss": 17.6344, + "step": 22716 + }, + { + "epoch": 0.4152485056756905, + "grad_norm": 7.48865060419624, + "learning_rate": 6.587645024415304e-06, + "loss": 17.8313, + "step": 22717 + }, + { + "epoch": 0.41526678487213703, + "grad_norm": 6.263113228871017, + "learning_rate": 6.587364329118619e-06, + "loss": 17.3444, + "step": 22718 + }, + { + "epoch": 0.41528506406858356, + "grad_norm": 7.208517383989723, + "learning_rate": 6.587083628258358e-06, + "loss": 17.7908, + "step": 22719 + }, + { + "epoch": 0.41530334326503004, + "grad_norm": 8.204107080778781, + "learning_rate": 6.586802921835509e-06, + "loss": 18.3743, + "step": 22720 + }, + { + "epoch": 0.4153216224614766, + "grad_norm": 6.759058520351196, + "learning_rate": 6.586522209851053e-06, + "loss": 17.6838, + "step": 22721 + }, + { + "epoch": 0.4153399016579231, + "grad_norm": 5.367233973973414, + "learning_rate": 6.586241492305974e-06, + "loss": 17.1537, + "step": 22722 + }, + { + "epoch": 0.41535818085436965, + "grad_norm": 5.641765597733447, + "learning_rate": 6.585960769201256e-06, + "loss": 17.2739, + "step": 22723 + }, + { + "epoch": 0.4153764600508162, + "grad_norm": 5.603372240086101, + "learning_rate": 6.585680040537884e-06, + "loss": 17.1906, + "step": 22724 + }, + { + "epoch": 0.41539473924726267, + "grad_norm": 6.509496739115115, + "learning_rate": 6.58539930631684e-06, + "loss": 17.7058, + "step": 22725 + }, + { + "epoch": 0.4154130184437092, + "grad_norm": 7.650496191049718, + "learning_rate": 6.585118566539108e-06, + "loss": 17.7613, + "step": 22726 + }, + { + "epoch": 0.41543129764015574, + "grad_norm": 6.774446900572114, + "learning_rate": 6.584837821205675e-06, + "loss": 18.0853, + "step": 22727 + }, + { + "epoch": 0.4154495768366023, + "grad_norm": 8.229439411922518, + "learning_rate": 6.584557070317523e-06, + "loss": 18.2243, + "step": 22728 + }, + { + "epoch": 0.4154678560330488, + "grad_norm": 5.692487977891022, + "learning_rate": 6.584276313875635e-06, + "loss": 17.2809, + "step": 22729 + }, + { + "epoch": 0.4154861352294953, + "grad_norm": 7.541651090569733, + "learning_rate": 6.583995551880996e-06, + "loss": 17.8468, + "step": 22730 + }, + { + "epoch": 0.4155044144259418, + "grad_norm": 7.593822362366229, + "learning_rate": 6.58371478433459e-06, + "loss": 17.8409, + "step": 22731 + }, + { + "epoch": 0.41552269362238836, + "grad_norm": 6.341984989300643, + "learning_rate": 6.5834340112374015e-06, + "loss": 17.7052, + "step": 22732 + }, + { + "epoch": 0.4155409728188349, + "grad_norm": 6.75870241976922, + "learning_rate": 6.583153232590415e-06, + "loss": 17.8381, + "step": 22733 + }, + { + "epoch": 0.41555925201528143, + "grad_norm": 6.017906222199273, + "learning_rate": 6.5828724483946124e-06, + "loss": 17.5057, + "step": 22734 + }, + { + "epoch": 0.4155775312117279, + "grad_norm": 6.575012576757038, + "learning_rate": 6.58259165865098e-06, + "loss": 17.4874, + "step": 22735 + }, + { + "epoch": 0.41559581040817445, + "grad_norm": 6.8646238780668005, + "learning_rate": 6.582310863360501e-06, + "loss": 17.6847, + "step": 22736 + }, + { + "epoch": 0.415614089604621, + "grad_norm": 6.004205502592466, + "learning_rate": 6.58203006252416e-06, + "loss": 17.261, + "step": 22737 + }, + { + "epoch": 0.4156323688010675, + "grad_norm": 6.6996546292894354, + "learning_rate": 6.581749256142941e-06, + "loss": 17.5514, + "step": 22738 + }, + { + "epoch": 0.41565064799751406, + "grad_norm": 5.002377555622047, + "learning_rate": 6.581468444217827e-06, + "loss": 16.9799, + "step": 22739 + }, + { + "epoch": 0.41566892719396054, + "grad_norm": 5.442555686252805, + "learning_rate": 6.581187626749803e-06, + "loss": 17.1699, + "step": 22740 + }, + { + "epoch": 0.41568720639040707, + "grad_norm": 5.511346881169742, + "learning_rate": 6.580906803739855e-06, + "loss": 17.1863, + "step": 22741 + }, + { + "epoch": 0.4157054855868536, + "grad_norm": 5.8617931785889565, + "learning_rate": 6.580625975188966e-06, + "loss": 17.1286, + "step": 22742 + }, + { + "epoch": 0.41572376478330014, + "grad_norm": 6.571820235000502, + "learning_rate": 6.58034514109812e-06, + "loss": 17.4238, + "step": 22743 + }, + { + "epoch": 0.4157420439797466, + "grad_norm": 7.297712380885935, + "learning_rate": 6.5800643014683e-06, + "loss": 17.8347, + "step": 22744 + }, + { + "epoch": 0.41576032317619316, + "grad_norm": 7.361179552002683, + "learning_rate": 6.579783456300494e-06, + "loss": 17.7443, + "step": 22745 + }, + { + "epoch": 0.4157786023726397, + "grad_norm": 5.652602800604153, + "learning_rate": 6.579502605595682e-06, + "loss": 17.3332, + "step": 22746 + }, + { + "epoch": 0.41579688156908623, + "grad_norm": 6.973782360010165, + "learning_rate": 6.579221749354851e-06, + "loss": 17.2906, + "step": 22747 + }, + { + "epoch": 0.41581516076553277, + "grad_norm": 5.04874986374777, + "learning_rate": 6.578940887578985e-06, + "loss": 16.7602, + "step": 22748 + }, + { + "epoch": 0.41583343996197925, + "grad_norm": 6.617958056138184, + "learning_rate": 6.578660020269069e-06, + "loss": 17.5798, + "step": 22749 + }, + { + "epoch": 0.4158517191584258, + "grad_norm": 5.884115906716348, + "learning_rate": 6.578379147426085e-06, + "loss": 17.076, + "step": 22750 + }, + { + "epoch": 0.4158699983548723, + "grad_norm": 5.746617761475577, + "learning_rate": 6.5780982690510195e-06, + "loss": 17.3955, + "step": 22751 + }, + { + "epoch": 0.41588827755131885, + "grad_norm": 5.915119519887098, + "learning_rate": 6.577817385144858e-06, + "loss": 17.3731, + "step": 22752 + }, + { + "epoch": 0.4159065567477654, + "grad_norm": 5.217123786820856, + "learning_rate": 6.577536495708582e-06, + "loss": 16.9362, + "step": 22753 + }, + { + "epoch": 0.41592483594421187, + "grad_norm": 7.274446557520727, + "learning_rate": 6.577255600743178e-06, + "loss": 17.6814, + "step": 22754 + }, + { + "epoch": 0.4159431151406584, + "grad_norm": 7.1672588174974035, + "learning_rate": 6.57697470024963e-06, + "loss": 17.6601, + "step": 22755 + }, + { + "epoch": 0.41596139433710494, + "grad_norm": 5.915564077256306, + "learning_rate": 6.5766937942289236e-06, + "loss": 17.3083, + "step": 22756 + }, + { + "epoch": 0.4159796735335515, + "grad_norm": 6.479994222430836, + "learning_rate": 6.5764128826820404e-06, + "loss": 17.5307, + "step": 22757 + }, + { + "epoch": 0.415997952729998, + "grad_norm": 7.006907757023098, + "learning_rate": 6.5761319656099665e-06, + "loss": 17.5083, + "step": 22758 + }, + { + "epoch": 0.4160162319264445, + "grad_norm": 6.909314029790856, + "learning_rate": 6.575851043013688e-06, + "loss": 17.5981, + "step": 22759 + }, + { + "epoch": 0.416034511122891, + "grad_norm": 8.133590289608975, + "learning_rate": 6.575570114894189e-06, + "loss": 18.3469, + "step": 22760 + }, + { + "epoch": 0.41605279031933756, + "grad_norm": 7.532132414079868, + "learning_rate": 6.575289181252452e-06, + "loss": 17.8717, + "step": 22761 + }, + { + "epoch": 0.4160710695157841, + "grad_norm": 8.058429272216127, + "learning_rate": 6.575008242089463e-06, + "loss": 17.4815, + "step": 22762 + }, + { + "epoch": 0.41608934871223063, + "grad_norm": 6.538101077384175, + "learning_rate": 6.574727297406208e-06, + "loss": 17.3078, + "step": 22763 + }, + { + "epoch": 0.4161076279086771, + "grad_norm": 7.684220028388074, + "learning_rate": 6.5744463472036705e-06, + "loss": 17.8021, + "step": 22764 + }, + { + "epoch": 0.41612590710512365, + "grad_norm": 6.244223985643519, + "learning_rate": 6.574165391482834e-06, + "loss": 17.3872, + "step": 22765 + }, + { + "epoch": 0.4161441863015702, + "grad_norm": 6.987439625975591, + "learning_rate": 6.573884430244686e-06, + "loss": 17.4608, + "step": 22766 + }, + { + "epoch": 0.4161624654980167, + "grad_norm": 6.891764479283446, + "learning_rate": 6.573603463490208e-06, + "loss": 17.8429, + "step": 22767 + }, + { + "epoch": 0.41618074469446326, + "grad_norm": 7.666691658856379, + "learning_rate": 6.573322491220387e-06, + "loss": 17.587, + "step": 22768 + }, + { + "epoch": 0.41619902389090974, + "grad_norm": 5.806290877069214, + "learning_rate": 6.573041513436208e-06, + "loss": 17.4055, + "step": 22769 + }, + { + "epoch": 0.41621730308735627, + "grad_norm": 5.16293829467992, + "learning_rate": 6.572760530138654e-06, + "loss": 17.1096, + "step": 22770 + }, + { + "epoch": 0.4162355822838028, + "grad_norm": 7.348799653268203, + "learning_rate": 6.572479541328711e-06, + "loss": 17.7816, + "step": 22771 + }, + { + "epoch": 0.41625386148024934, + "grad_norm": 6.755542268569418, + "learning_rate": 6.5721985470073635e-06, + "loss": 17.3061, + "step": 22772 + }, + { + "epoch": 0.4162721406766959, + "grad_norm": 7.217781189562275, + "learning_rate": 6.571917547175598e-06, + "loss": 18.0527, + "step": 22773 + }, + { + "epoch": 0.41629041987314236, + "grad_norm": 6.833506095546124, + "learning_rate": 6.571636541834396e-06, + "loss": 17.7029, + "step": 22774 + }, + { + "epoch": 0.4163086990695889, + "grad_norm": 5.849803002947453, + "learning_rate": 6.571355530984746e-06, + "loss": 17.3694, + "step": 22775 + }, + { + "epoch": 0.41632697826603543, + "grad_norm": 5.765323378988717, + "learning_rate": 6.571074514627629e-06, + "loss": 17.3174, + "step": 22776 + }, + { + "epoch": 0.41634525746248197, + "grad_norm": 6.690528133298416, + "learning_rate": 6.570793492764033e-06, + "loss": 17.779, + "step": 22777 + }, + { + "epoch": 0.41636353665892845, + "grad_norm": 5.990251398528859, + "learning_rate": 6.570512465394943e-06, + "loss": 17.3365, + "step": 22778 + }, + { + "epoch": 0.416381815855375, + "grad_norm": 7.25207053808171, + "learning_rate": 6.570231432521344e-06, + "loss": 17.7794, + "step": 22779 + }, + { + "epoch": 0.4164000950518215, + "grad_norm": 5.545128358764208, + "learning_rate": 6.5699503941442176e-06, + "loss": 17.218, + "step": 22780 + }, + { + "epoch": 0.41641837424826805, + "grad_norm": 7.852032475184119, + "learning_rate": 6.569669350264553e-06, + "loss": 17.7779, + "step": 22781 + }, + { + "epoch": 0.4164366534447146, + "grad_norm": 7.142927567560632, + "learning_rate": 6.569388300883332e-06, + "loss": 17.912, + "step": 22782 + }, + { + "epoch": 0.41645493264116107, + "grad_norm": 8.292464851180304, + "learning_rate": 6.569107246001542e-06, + "loss": 18.0269, + "step": 22783 + }, + { + "epoch": 0.4164732118376076, + "grad_norm": 7.474122331540772, + "learning_rate": 6.568826185620169e-06, + "loss": 17.2395, + "step": 22784 + }, + { + "epoch": 0.41649149103405414, + "grad_norm": 7.301032832584391, + "learning_rate": 6.568545119740193e-06, + "loss": 17.9761, + "step": 22785 + }, + { + "epoch": 0.4165097702305007, + "grad_norm": 6.912074852409992, + "learning_rate": 6.568264048362605e-06, + "loss": 17.6828, + "step": 22786 + }, + { + "epoch": 0.4165280494269472, + "grad_norm": 5.650365942879607, + "learning_rate": 6.567982971488387e-06, + "loss": 17.1882, + "step": 22787 + }, + { + "epoch": 0.4165463286233937, + "grad_norm": 6.1543212301739105, + "learning_rate": 6.5677018891185255e-06, + "loss": 17.501, + "step": 22788 + }, + { + "epoch": 0.4165646078198402, + "grad_norm": 5.830334058663455, + "learning_rate": 6.567420801254003e-06, + "loss": 17.3876, + "step": 22789 + }, + { + "epoch": 0.41658288701628676, + "grad_norm": 6.832920466203908, + "learning_rate": 6.567139707895808e-06, + "loss": 17.5439, + "step": 22790 + }, + { + "epoch": 0.4166011662127333, + "grad_norm": 7.110551234129028, + "learning_rate": 6.566858609044924e-06, + "loss": 17.7816, + "step": 22791 + }, + { + "epoch": 0.41661944540917983, + "grad_norm": 7.744163628442694, + "learning_rate": 6.5665775047023365e-06, + "loss": 18.228, + "step": 22792 + }, + { + "epoch": 0.4166377246056263, + "grad_norm": 8.667665539047851, + "learning_rate": 6.566296394869032e-06, + "loss": 18.431, + "step": 22793 + }, + { + "epoch": 0.41665600380207285, + "grad_norm": 6.7923573073427415, + "learning_rate": 6.566015279545991e-06, + "loss": 17.6889, + "step": 22794 + }, + { + "epoch": 0.4166742829985194, + "grad_norm": 6.236571850196767, + "learning_rate": 6.565734158734205e-06, + "loss": 17.5213, + "step": 22795 + }, + { + "epoch": 0.4166925621949659, + "grad_norm": 6.12475517112729, + "learning_rate": 6.565453032434657e-06, + "loss": 17.6715, + "step": 22796 + }, + { + "epoch": 0.41671084139141246, + "grad_norm": 5.582975984442605, + "learning_rate": 6.5651719006483304e-06, + "loss": 17.1853, + "step": 22797 + }, + { + "epoch": 0.41672912058785894, + "grad_norm": 7.714019679154534, + "learning_rate": 6.564890763376212e-06, + "loss": 18.1422, + "step": 22798 + }, + { + "epoch": 0.4167473997843055, + "grad_norm": 6.2442493922056475, + "learning_rate": 6.564609620619289e-06, + "loss": 17.3394, + "step": 22799 + }, + { + "epoch": 0.416765678980752, + "grad_norm": 6.482635908781921, + "learning_rate": 6.564328472378545e-06, + "loss": 17.7275, + "step": 22800 + }, + { + "epoch": 0.41678395817719854, + "grad_norm": 5.819707570598144, + "learning_rate": 6.564047318654965e-06, + "loss": 17.1874, + "step": 22801 + }, + { + "epoch": 0.4168022373736451, + "grad_norm": 7.5473504574752734, + "learning_rate": 6.563766159449534e-06, + "loss": 18.2899, + "step": 22802 + }, + { + "epoch": 0.41682051657009156, + "grad_norm": 9.299562735722224, + "learning_rate": 6.563484994763238e-06, + "loss": 18.1879, + "step": 22803 + }, + { + "epoch": 0.4168387957665381, + "grad_norm": 6.709706486727593, + "learning_rate": 6.563203824597064e-06, + "loss": 17.5875, + "step": 22804 + }, + { + "epoch": 0.41685707496298463, + "grad_norm": 6.495214829325099, + "learning_rate": 6.562922648951997e-06, + "loss": 17.5701, + "step": 22805 + }, + { + "epoch": 0.41687535415943117, + "grad_norm": 6.4480004868956415, + "learning_rate": 6.562641467829021e-06, + "loss": 17.1504, + "step": 22806 + }, + { + "epoch": 0.4168936333558777, + "grad_norm": 6.590807356068329, + "learning_rate": 6.562360281229121e-06, + "loss": 17.6114, + "step": 22807 + }, + { + "epoch": 0.4169119125523242, + "grad_norm": 5.205104114154562, + "learning_rate": 6.562079089153285e-06, + "loss": 16.862, + "step": 22808 + }, + { + "epoch": 0.4169301917487707, + "grad_norm": 7.197480642816911, + "learning_rate": 6.561797891602496e-06, + "loss": 18.1306, + "step": 22809 + }, + { + "epoch": 0.41694847094521725, + "grad_norm": 8.322559946414186, + "learning_rate": 6.561516688577743e-06, + "loss": 18.0837, + "step": 22810 + }, + { + "epoch": 0.4169667501416638, + "grad_norm": 6.383365600979941, + "learning_rate": 6.561235480080008e-06, + "loss": 17.2967, + "step": 22811 + }, + { + "epoch": 0.41698502933811027, + "grad_norm": 5.639384376893032, + "learning_rate": 6.560954266110278e-06, + "loss": 17.1241, + "step": 22812 + }, + { + "epoch": 0.4170033085345568, + "grad_norm": 7.4142106095428435, + "learning_rate": 6.560673046669539e-06, + "loss": 17.7709, + "step": 22813 + }, + { + "epoch": 0.41702158773100334, + "grad_norm": 5.281954945023397, + "learning_rate": 6.560391821758778e-06, + "loss": 16.9594, + "step": 22814 + }, + { + "epoch": 0.4170398669274499, + "grad_norm": 8.812066905483043, + "learning_rate": 6.560110591378978e-06, + "loss": 18.5953, + "step": 22815 + }, + { + "epoch": 0.4170581461238964, + "grad_norm": 6.876947558740086, + "learning_rate": 6.559829355531125e-06, + "loss": 18.0454, + "step": 22816 + }, + { + "epoch": 0.4170764253203429, + "grad_norm": 7.917584726064076, + "learning_rate": 6.5595481142162055e-06, + "loss": 17.8833, + "step": 22817 + }, + { + "epoch": 0.4170947045167894, + "grad_norm": 5.738162967416068, + "learning_rate": 6.559266867435207e-06, + "loss": 17.3356, + "step": 22818 + }, + { + "epoch": 0.41711298371323596, + "grad_norm": 6.922606770731424, + "learning_rate": 6.558985615189112e-06, + "loss": 17.8734, + "step": 22819 + }, + { + "epoch": 0.4171312629096825, + "grad_norm": 8.63541636951145, + "learning_rate": 6.5587043574789065e-06, + "loss": 18.5934, + "step": 22820 + }, + { + "epoch": 0.41714954210612903, + "grad_norm": 7.578627984517951, + "learning_rate": 6.55842309430558e-06, + "loss": 17.95, + "step": 22821 + }, + { + "epoch": 0.4171678213025755, + "grad_norm": 7.858886689256134, + "learning_rate": 6.558141825670114e-06, + "loss": 18.0093, + "step": 22822 + }, + { + "epoch": 0.41718610049902205, + "grad_norm": 7.001485042822444, + "learning_rate": 6.5578605515734964e-06, + "loss": 17.706, + "step": 22823 + }, + { + "epoch": 0.4172043796954686, + "grad_norm": 6.834582829360226, + "learning_rate": 6.557579272016714e-06, + "loss": 17.5111, + "step": 22824 + }, + { + "epoch": 0.4172226588919151, + "grad_norm": 8.267023838670658, + "learning_rate": 6.55729798700075e-06, + "loss": 18.0063, + "step": 22825 + }, + { + "epoch": 0.41724093808836166, + "grad_norm": 6.703518452516018, + "learning_rate": 6.557016696526592e-06, + "loss": 18.0998, + "step": 22826 + }, + { + "epoch": 0.41725921728480814, + "grad_norm": 8.303254247812571, + "learning_rate": 6.556735400595225e-06, + "loss": 18.0793, + "step": 22827 + }, + { + "epoch": 0.4172774964812547, + "grad_norm": 6.049871087523901, + "learning_rate": 6.556454099207638e-06, + "loss": 17.2825, + "step": 22828 + }, + { + "epoch": 0.4172957756777012, + "grad_norm": 7.0593863672495205, + "learning_rate": 6.5561727923648124e-06, + "loss": 17.8252, + "step": 22829 + }, + { + "epoch": 0.41731405487414774, + "grad_norm": 7.749978588804397, + "learning_rate": 6.555891480067736e-06, + "loss": 18.0291, + "step": 22830 + }, + { + "epoch": 0.4173323340705943, + "grad_norm": 6.377548209145335, + "learning_rate": 6.5556101623173966e-06, + "loss": 17.8959, + "step": 22831 + }, + { + "epoch": 0.41735061326704076, + "grad_norm": 7.416872017395103, + "learning_rate": 6.555328839114776e-06, + "loss": 18.2488, + "step": 22832 + }, + { + "epoch": 0.4173688924634873, + "grad_norm": 5.627472921197626, + "learning_rate": 6.555047510460866e-06, + "loss": 17.2994, + "step": 22833 + }, + { + "epoch": 0.41738717165993383, + "grad_norm": 7.142209636209356, + "learning_rate": 6.554766176356646e-06, + "loss": 17.8598, + "step": 22834 + }, + { + "epoch": 0.41740545085638037, + "grad_norm": 6.156923175015818, + "learning_rate": 6.554484836803108e-06, + "loss": 17.411, + "step": 22835 + }, + { + "epoch": 0.4174237300528269, + "grad_norm": 7.262796757388549, + "learning_rate": 6.554203491801235e-06, + "loss": 17.9443, + "step": 22836 + }, + { + "epoch": 0.4174420092492734, + "grad_norm": 6.31230909615951, + "learning_rate": 6.553922141352012e-06, + "loss": 17.5397, + "step": 22837 + }, + { + "epoch": 0.4174602884457199, + "grad_norm": 5.799852278081229, + "learning_rate": 6.55364078545643e-06, + "loss": 17.0805, + "step": 22838 + }, + { + "epoch": 0.41747856764216645, + "grad_norm": 6.628159449081681, + "learning_rate": 6.553359424115468e-06, + "loss": 17.6692, + "step": 22839 + }, + { + "epoch": 0.417496846838613, + "grad_norm": 6.820717034896563, + "learning_rate": 6.553078057330118e-06, + "loss": 17.6101, + "step": 22840 + }, + { + "epoch": 0.4175151260350595, + "grad_norm": 6.089442215659412, + "learning_rate": 6.552796685101364e-06, + "loss": 17.4498, + "step": 22841 + }, + { + "epoch": 0.417533405231506, + "grad_norm": 6.499586675908634, + "learning_rate": 6.552515307430194e-06, + "loss": 17.801, + "step": 22842 + }, + { + "epoch": 0.41755168442795254, + "grad_norm": 5.901833702082617, + "learning_rate": 6.55223392431759e-06, + "loss": 17.4328, + "step": 22843 + }, + { + "epoch": 0.4175699636243991, + "grad_norm": 8.705715413606889, + "learning_rate": 6.551952535764541e-06, + "loss": 18.5282, + "step": 22844 + }, + { + "epoch": 0.4175882428208456, + "grad_norm": 6.320404062671469, + "learning_rate": 6.5516711417720355e-06, + "loss": 17.5892, + "step": 22845 + }, + { + "epoch": 0.4176065220172921, + "grad_norm": 7.178860213931443, + "learning_rate": 6.551389742341055e-06, + "loss": 17.9891, + "step": 22846 + }, + { + "epoch": 0.41762480121373863, + "grad_norm": 7.656582368173924, + "learning_rate": 6.551108337472589e-06, + "loss": 18.0129, + "step": 22847 + }, + { + "epoch": 0.41764308041018516, + "grad_norm": 6.109532819932893, + "learning_rate": 6.550826927167623e-06, + "loss": 17.4247, + "step": 22848 + }, + { + "epoch": 0.4176613596066317, + "grad_norm": 6.092443474453851, + "learning_rate": 6.5505455114271424e-06, + "loss": 17.3379, + "step": 22849 + }, + { + "epoch": 0.41767963880307823, + "grad_norm": 6.133743134607131, + "learning_rate": 6.550264090252134e-06, + "loss": 17.7763, + "step": 22850 + }, + { + "epoch": 0.4176979179995247, + "grad_norm": 8.763945002107015, + "learning_rate": 6.549982663643586e-06, + "loss": 18.1602, + "step": 22851 + }, + { + "epoch": 0.41771619719597125, + "grad_norm": 6.210400200575394, + "learning_rate": 6.549701231602484e-06, + "loss": 17.2319, + "step": 22852 + }, + { + "epoch": 0.4177344763924178, + "grad_norm": 6.661533010185124, + "learning_rate": 6.54941979412981e-06, + "loss": 17.7869, + "step": 22853 + }, + { + "epoch": 0.4177527555888643, + "grad_norm": 6.898743035864414, + "learning_rate": 6.549138351226556e-06, + "loss": 17.7566, + "step": 22854 + }, + { + "epoch": 0.41777103478531086, + "grad_norm": 6.548996978112739, + "learning_rate": 6.548856902893708e-06, + "loss": 17.5209, + "step": 22855 + }, + { + "epoch": 0.41778931398175734, + "grad_norm": 7.238878738760886, + "learning_rate": 6.5485754491322494e-06, + "loss": 17.9588, + "step": 22856 + }, + { + "epoch": 0.4178075931782039, + "grad_norm": 6.436757042608861, + "learning_rate": 6.548293989943168e-06, + "loss": 17.5837, + "step": 22857 + }, + { + "epoch": 0.4178258723746504, + "grad_norm": 5.661168070102067, + "learning_rate": 6.5480125253274505e-06, + "loss": 17.3673, + "step": 22858 + }, + { + "epoch": 0.41784415157109694, + "grad_norm": 6.605295512255456, + "learning_rate": 6.547731055286085e-06, + "loss": 17.3872, + "step": 22859 + }, + { + "epoch": 0.4178624307675435, + "grad_norm": 7.909887497474025, + "learning_rate": 6.5474495798200555e-06, + "loss": 18.1409, + "step": 22860 + }, + { + "epoch": 0.41788070996398996, + "grad_norm": 6.512755013831273, + "learning_rate": 6.5471680989303495e-06, + "loss": 17.5697, + "step": 22861 + }, + { + "epoch": 0.4178989891604365, + "grad_norm": 5.761497015408156, + "learning_rate": 6.546886612617953e-06, + "loss": 17.1833, + "step": 22862 + }, + { + "epoch": 0.41791726835688303, + "grad_norm": 5.362426299524855, + "learning_rate": 6.546605120883854e-06, + "loss": 17.2209, + "step": 22863 + }, + { + "epoch": 0.41793554755332957, + "grad_norm": 5.837311405647309, + "learning_rate": 6.546323623729038e-06, + "loss": 17.453, + "step": 22864 + }, + { + "epoch": 0.4179538267497761, + "grad_norm": 7.939985383396437, + "learning_rate": 6.546042121154492e-06, + "loss": 17.9857, + "step": 22865 + }, + { + "epoch": 0.4179721059462226, + "grad_norm": 6.930859774128972, + "learning_rate": 6.545760613161202e-06, + "loss": 17.5704, + "step": 22866 + }, + { + "epoch": 0.4179903851426691, + "grad_norm": 6.103396396617472, + "learning_rate": 6.545479099750156e-06, + "loss": 17.1374, + "step": 22867 + }, + { + "epoch": 0.41800866433911565, + "grad_norm": 5.99368123074367, + "learning_rate": 6.545197580922339e-06, + "loss": 17.5591, + "step": 22868 + }, + { + "epoch": 0.4180269435355622, + "grad_norm": 6.182348943063241, + "learning_rate": 6.54491605667874e-06, + "loss": 17.5682, + "step": 22869 + }, + { + "epoch": 0.4180452227320087, + "grad_norm": 6.729086066286519, + "learning_rate": 6.544634527020343e-06, + "loss": 17.3765, + "step": 22870 + }, + { + "epoch": 0.4180635019284552, + "grad_norm": 7.225283574756662, + "learning_rate": 6.5443529919481355e-06, + "loss": 17.8946, + "step": 22871 + }, + { + "epoch": 0.41808178112490174, + "grad_norm": 7.6669910653718825, + "learning_rate": 6.5440714514631056e-06, + "loss": 18.1302, + "step": 22872 + }, + { + "epoch": 0.4181000603213483, + "grad_norm": 5.850875944637185, + "learning_rate": 6.54378990556624e-06, + "loss": 17.4011, + "step": 22873 + }, + { + "epoch": 0.4181183395177948, + "grad_norm": 7.086829435337445, + "learning_rate": 6.5435083542585235e-06, + "loss": 17.5344, + "step": 22874 + }, + { + "epoch": 0.41813661871424135, + "grad_norm": 7.1525906763665885, + "learning_rate": 6.543226797540945e-06, + "loss": 18.6293, + "step": 22875 + }, + { + "epoch": 0.41815489791068783, + "grad_norm": 7.118951815704408, + "learning_rate": 6.542945235414489e-06, + "loss": 17.6044, + "step": 22876 + }, + { + "epoch": 0.41817317710713436, + "grad_norm": 6.265518363771976, + "learning_rate": 6.542663667880145e-06, + "loss": 17.3717, + "step": 22877 + }, + { + "epoch": 0.4181914563035809, + "grad_norm": 6.87913448403044, + "learning_rate": 6.5423820949388995e-06, + "loss": 17.4798, + "step": 22878 + }, + { + "epoch": 0.41820973550002744, + "grad_norm": 5.81084269600853, + "learning_rate": 6.542100516591737e-06, + "loss": 17.2091, + "step": 22879 + }, + { + "epoch": 0.4182280146964739, + "grad_norm": 6.3975723906987625, + "learning_rate": 6.541818932839646e-06, + "loss": 17.7207, + "step": 22880 + }, + { + "epoch": 0.41824629389292045, + "grad_norm": 6.452800317594072, + "learning_rate": 6.541537343683615e-06, + "loss": 17.413, + "step": 22881 + }, + { + "epoch": 0.418264573089367, + "grad_norm": 6.346871107329755, + "learning_rate": 6.541255749124629e-06, + "loss": 17.3771, + "step": 22882 + }, + { + "epoch": 0.4182828522858135, + "grad_norm": 6.923827081749899, + "learning_rate": 6.5409741491636746e-06, + "loss": 17.3506, + "step": 22883 + }, + { + "epoch": 0.41830113148226006, + "grad_norm": 7.067361672916809, + "learning_rate": 6.54069254380174e-06, + "loss": 17.9107, + "step": 22884 + }, + { + "epoch": 0.41831941067870654, + "grad_norm": 6.980080056759994, + "learning_rate": 6.54041093303981e-06, + "loss": 17.8602, + "step": 22885 + }, + { + "epoch": 0.4183376898751531, + "grad_norm": 7.087277520644656, + "learning_rate": 6.540129316878876e-06, + "loss": 17.9097, + "step": 22886 + }, + { + "epoch": 0.4183559690715996, + "grad_norm": 5.600155285651116, + "learning_rate": 6.539847695319922e-06, + "loss": 17.1198, + "step": 22887 + }, + { + "epoch": 0.41837424826804614, + "grad_norm": 6.921820599430229, + "learning_rate": 6.539566068363934e-06, + "loss": 17.7032, + "step": 22888 + }, + { + "epoch": 0.4183925274644927, + "grad_norm": 7.571194767346009, + "learning_rate": 6.539284436011901e-06, + "loss": 17.7819, + "step": 22889 + }, + { + "epoch": 0.41841080666093916, + "grad_norm": 6.197019217095146, + "learning_rate": 6.539002798264811e-06, + "loss": 17.2801, + "step": 22890 + }, + { + "epoch": 0.4184290858573857, + "grad_norm": 6.59662714682823, + "learning_rate": 6.5387211551236485e-06, + "loss": 17.7627, + "step": 22891 + }, + { + "epoch": 0.41844736505383223, + "grad_norm": 5.962347021512397, + "learning_rate": 6.538439506589401e-06, + "loss": 17.2985, + "step": 22892 + }, + { + "epoch": 0.41846564425027877, + "grad_norm": 6.084208894677302, + "learning_rate": 6.538157852663059e-06, + "loss": 17.3511, + "step": 22893 + }, + { + "epoch": 0.4184839234467253, + "grad_norm": 8.792003026134429, + "learning_rate": 6.537876193345605e-06, + "loss": 18.112, + "step": 22894 + }, + { + "epoch": 0.4185022026431718, + "grad_norm": 5.730695756311766, + "learning_rate": 6.537594528638028e-06, + "loss": 17.2505, + "step": 22895 + }, + { + "epoch": 0.4185204818396183, + "grad_norm": 5.973897008972709, + "learning_rate": 6.537312858541317e-06, + "loss": 17.3319, + "step": 22896 + }, + { + "epoch": 0.41853876103606485, + "grad_norm": 5.26638274908141, + "learning_rate": 6.537031183056459e-06, + "loss": 17.4098, + "step": 22897 + }, + { + "epoch": 0.4185570402325114, + "grad_norm": 5.63597513759987, + "learning_rate": 6.536749502184437e-06, + "loss": 17.3671, + "step": 22898 + }, + { + "epoch": 0.4185753194289579, + "grad_norm": 6.58035156249633, + "learning_rate": 6.536467815926243e-06, + "loss": 17.7771, + "step": 22899 + }, + { + "epoch": 0.4185935986254044, + "grad_norm": 7.699676029301902, + "learning_rate": 6.5361861242828635e-06, + "loss": 18.0079, + "step": 22900 + }, + { + "epoch": 0.41861187782185094, + "grad_norm": 5.7441541297691785, + "learning_rate": 6.535904427255284e-06, + "loss": 17.2305, + "step": 22901 + }, + { + "epoch": 0.4186301570182975, + "grad_norm": 7.929346601580015, + "learning_rate": 6.535622724844492e-06, + "loss": 17.9838, + "step": 22902 + }, + { + "epoch": 0.418648436214744, + "grad_norm": 5.295553888626519, + "learning_rate": 6.535341017051477e-06, + "loss": 17.1963, + "step": 22903 + }, + { + "epoch": 0.41866671541119055, + "grad_norm": 9.134796239102027, + "learning_rate": 6.535059303877224e-06, + "loss": 17.7302, + "step": 22904 + }, + { + "epoch": 0.41868499460763703, + "grad_norm": 7.049232928319126, + "learning_rate": 6.534777585322722e-06, + "loss": 17.7632, + "step": 22905 + }, + { + "epoch": 0.41870327380408356, + "grad_norm": 6.862135989837325, + "learning_rate": 6.5344958613889575e-06, + "loss": 17.8258, + "step": 22906 + }, + { + "epoch": 0.4187215530005301, + "grad_norm": 7.14700917026873, + "learning_rate": 6.534214132076918e-06, + "loss": 17.7554, + "step": 22907 + }, + { + "epoch": 0.41873983219697664, + "grad_norm": 6.623554884593093, + "learning_rate": 6.533932397387591e-06, + "loss": 17.637, + "step": 22908 + }, + { + "epoch": 0.41875811139342317, + "grad_norm": 7.292976780208033, + "learning_rate": 6.533650657321965e-06, + "loss": 18.3185, + "step": 22909 + }, + { + "epoch": 0.41877639058986965, + "grad_norm": 6.2192238774027775, + "learning_rate": 6.5333689118810265e-06, + "loss": 17.295, + "step": 22910 + }, + { + "epoch": 0.4187946697863162, + "grad_norm": 6.987670920154048, + "learning_rate": 6.533087161065762e-06, + "loss": 17.6673, + "step": 22911 + }, + { + "epoch": 0.4188129489827627, + "grad_norm": 6.243765544688094, + "learning_rate": 6.5328054048771594e-06, + "loss": 17.4971, + "step": 22912 + }, + { + "epoch": 0.41883122817920926, + "grad_norm": 7.625280996094554, + "learning_rate": 6.5325236433162084e-06, + "loss": 18.003, + "step": 22913 + }, + { + "epoch": 0.41884950737565574, + "grad_norm": 6.994930904578712, + "learning_rate": 6.5322418763838954e-06, + "loss": 17.5114, + "step": 22914 + }, + { + "epoch": 0.4188677865721023, + "grad_norm": 7.214030486165409, + "learning_rate": 6.531960104081206e-06, + "loss": 17.4588, + "step": 22915 + }, + { + "epoch": 0.4188860657685488, + "grad_norm": 6.766063991825144, + "learning_rate": 6.53167832640913e-06, + "loss": 17.5853, + "step": 22916 + }, + { + "epoch": 0.41890434496499535, + "grad_norm": 5.607349715613173, + "learning_rate": 6.531396543368653e-06, + "loss": 17.0989, + "step": 22917 + }, + { + "epoch": 0.4189226241614419, + "grad_norm": 6.568315322497222, + "learning_rate": 6.531114754960767e-06, + "loss": 17.6328, + "step": 22918 + }, + { + "epoch": 0.41894090335788836, + "grad_norm": 6.6660130541892455, + "learning_rate": 6.5308329611864555e-06, + "loss": 17.5281, + "step": 22919 + }, + { + "epoch": 0.4189591825543349, + "grad_norm": 5.385308447069871, + "learning_rate": 6.5305511620467065e-06, + "loss": 17.34, + "step": 22920 + }, + { + "epoch": 0.41897746175078143, + "grad_norm": 5.628795621941321, + "learning_rate": 6.530269357542509e-06, + "loss": 16.9445, + "step": 22921 + }, + { + "epoch": 0.41899574094722797, + "grad_norm": 6.669207417037738, + "learning_rate": 6.52998754767485e-06, + "loss": 17.6181, + "step": 22922 + }, + { + "epoch": 0.4190140201436745, + "grad_norm": 7.998858964954297, + "learning_rate": 6.529705732444716e-06, + "loss": 17.9669, + "step": 22923 + }, + { + "epoch": 0.419032299340121, + "grad_norm": 8.60196555676147, + "learning_rate": 6.529423911853099e-06, + "loss": 18.0825, + "step": 22924 + }, + { + "epoch": 0.4190505785365675, + "grad_norm": 6.788464748694883, + "learning_rate": 6.529142085900981e-06, + "loss": 17.6837, + "step": 22925 + }, + { + "epoch": 0.41906885773301406, + "grad_norm": 6.576786036431059, + "learning_rate": 6.528860254589356e-06, + "loss": 17.4223, + "step": 22926 + }, + { + "epoch": 0.4190871369294606, + "grad_norm": 6.463190432849364, + "learning_rate": 6.528578417919206e-06, + "loss": 17.5257, + "step": 22927 + }, + { + "epoch": 0.4191054161259071, + "grad_norm": 6.992534174268173, + "learning_rate": 6.528296575891523e-06, + "loss": 17.8828, + "step": 22928 + }, + { + "epoch": 0.4191236953223536, + "grad_norm": 7.223467350509907, + "learning_rate": 6.5280147285072915e-06, + "loss": 17.7576, + "step": 22929 + }, + { + "epoch": 0.41914197451880014, + "grad_norm": 6.330170602348665, + "learning_rate": 6.527732875767501e-06, + "loss": 17.417, + "step": 22930 + }, + { + "epoch": 0.4191602537152467, + "grad_norm": 6.005615070281294, + "learning_rate": 6.527451017673141e-06, + "loss": 17.3151, + "step": 22931 + }, + { + "epoch": 0.4191785329116932, + "grad_norm": 6.5658764577434, + "learning_rate": 6.527169154225196e-06, + "loss": 17.2052, + "step": 22932 + }, + { + "epoch": 0.41919681210813975, + "grad_norm": 7.586881698945212, + "learning_rate": 6.526887285424657e-06, + "loss": 18.1262, + "step": 22933 + }, + { + "epoch": 0.41921509130458623, + "grad_norm": 8.520250697467581, + "learning_rate": 6.526605411272509e-06, + "loss": 18.5186, + "step": 22934 + }, + { + "epoch": 0.41923337050103276, + "grad_norm": 6.0149379766618445, + "learning_rate": 6.5263235317697425e-06, + "loss": 17.394, + "step": 22935 + }, + { + "epoch": 0.4192516496974793, + "grad_norm": 7.677557024289154, + "learning_rate": 6.526041646917344e-06, + "loss": 18.3367, + "step": 22936 + }, + { + "epoch": 0.41926992889392584, + "grad_norm": 8.267497217504378, + "learning_rate": 6.525759756716302e-06, + "loss": 18.0889, + "step": 22937 + }, + { + "epoch": 0.41928820809037237, + "grad_norm": 6.730750142506911, + "learning_rate": 6.525477861167606e-06, + "loss": 17.9213, + "step": 22938 + }, + { + "epoch": 0.41930648728681885, + "grad_norm": 5.9702144330438385, + "learning_rate": 6.52519596027224e-06, + "loss": 17.4348, + "step": 22939 + }, + { + "epoch": 0.4193247664832654, + "grad_norm": 6.446330953569598, + "learning_rate": 6.524914054031195e-06, + "loss": 17.8092, + "step": 22940 + }, + { + "epoch": 0.4193430456797119, + "grad_norm": 6.5551284079364756, + "learning_rate": 6.52463214244546e-06, + "loss": 17.7906, + "step": 22941 + }, + { + "epoch": 0.41936132487615846, + "grad_norm": 5.762021855641645, + "learning_rate": 6.524350225516022e-06, + "loss": 17.2061, + "step": 22942 + }, + { + "epoch": 0.419379604072605, + "grad_norm": 7.679490353746419, + "learning_rate": 6.5240683032438665e-06, + "loss": 17.8234, + "step": 22943 + }, + { + "epoch": 0.4193978832690515, + "grad_norm": 5.921225542284149, + "learning_rate": 6.5237863756299845e-06, + "loss": 17.2429, + "step": 22944 + }, + { + "epoch": 0.419416162465498, + "grad_norm": 7.252687262905585, + "learning_rate": 6.523504442675366e-06, + "loss": 17.5584, + "step": 22945 + }, + { + "epoch": 0.41943444166194455, + "grad_norm": 5.835753477697853, + "learning_rate": 6.523222504380994e-06, + "loss": 17.2518, + "step": 22946 + }, + { + "epoch": 0.4194527208583911, + "grad_norm": 5.739807876177038, + "learning_rate": 6.522940560747859e-06, + "loss": 17.1743, + "step": 22947 + }, + { + "epoch": 0.41947100005483756, + "grad_norm": 7.819202206090441, + "learning_rate": 6.5226586117769504e-06, + "loss": 18.5037, + "step": 22948 + }, + { + "epoch": 0.4194892792512841, + "grad_norm": 6.0028511634436015, + "learning_rate": 6.522376657469256e-06, + "loss": 17.5147, + "step": 22949 + }, + { + "epoch": 0.41950755844773063, + "grad_norm": 5.319944123196091, + "learning_rate": 6.522094697825763e-06, + "loss": 16.9886, + "step": 22950 + }, + { + "epoch": 0.41952583764417717, + "grad_norm": 5.881423393874906, + "learning_rate": 6.52181273284746e-06, + "loss": 17.3217, + "step": 22951 + }, + { + "epoch": 0.4195441168406237, + "grad_norm": 5.815097478620257, + "learning_rate": 6.521530762535336e-06, + "loss": 17.3631, + "step": 22952 + }, + { + "epoch": 0.4195623960370702, + "grad_norm": 5.740822827179228, + "learning_rate": 6.521248786890377e-06, + "loss": 17.2605, + "step": 22953 + }, + { + "epoch": 0.4195806752335167, + "grad_norm": 6.358788451252137, + "learning_rate": 6.5209668059135755e-06, + "loss": 17.606, + "step": 22954 + }, + { + "epoch": 0.41959895442996326, + "grad_norm": 5.87558205935959, + "learning_rate": 6.520684819605917e-06, + "loss": 17.2332, + "step": 22955 + }, + { + "epoch": 0.4196172336264098, + "grad_norm": 6.093464300381178, + "learning_rate": 6.520402827968389e-06, + "loss": 17.5618, + "step": 22956 + }, + { + "epoch": 0.4196355128228563, + "grad_norm": 5.730160457463107, + "learning_rate": 6.5201208310019815e-06, + "loss": 17.323, + "step": 22957 + }, + { + "epoch": 0.4196537920193028, + "grad_norm": 6.346543923820535, + "learning_rate": 6.51983882870768e-06, + "loss": 17.4396, + "step": 22958 + }, + { + "epoch": 0.41967207121574934, + "grad_norm": 7.25686662683186, + "learning_rate": 6.519556821086479e-06, + "loss": 17.8619, + "step": 22959 + }, + { + "epoch": 0.4196903504121959, + "grad_norm": 6.531134559914157, + "learning_rate": 6.519274808139362e-06, + "loss": 17.317, + "step": 22960 + }, + { + "epoch": 0.4197086296086424, + "grad_norm": 17.641543513914314, + "learning_rate": 6.5189927898673174e-06, + "loss": 18.4942, + "step": 22961 + }, + { + "epoch": 0.41972690880508895, + "grad_norm": 7.338485644450107, + "learning_rate": 6.518710766271337e-06, + "loss": 17.8047, + "step": 22962 + }, + { + "epoch": 0.41974518800153543, + "grad_norm": 7.69381003070154, + "learning_rate": 6.518428737352406e-06, + "loss": 18.1808, + "step": 22963 + }, + { + "epoch": 0.41976346719798197, + "grad_norm": 5.71042703009896, + "learning_rate": 6.518146703111513e-06, + "loss": 17.1054, + "step": 22964 + }, + { + "epoch": 0.4197817463944285, + "grad_norm": 5.663902545869665, + "learning_rate": 6.517864663549649e-06, + "loss": 17.2465, + "step": 22965 + }, + { + "epoch": 0.41980002559087504, + "grad_norm": 6.541940914774427, + "learning_rate": 6.5175826186678e-06, + "loss": 17.487, + "step": 22966 + }, + { + "epoch": 0.4198183047873216, + "grad_norm": 5.950104287109007, + "learning_rate": 6.517300568466956e-06, + "loss": 17.3955, + "step": 22967 + }, + { + "epoch": 0.41983658398376805, + "grad_norm": 6.896729789912742, + "learning_rate": 6.517018512948106e-06, + "loss": 17.9357, + "step": 22968 + }, + { + "epoch": 0.4198548631802146, + "grad_norm": 5.469552240937213, + "learning_rate": 6.516736452112238e-06, + "loss": 17.0821, + "step": 22969 + }, + { + "epoch": 0.4198731423766611, + "grad_norm": 4.997359057882777, + "learning_rate": 6.51645438596034e-06, + "loss": 16.9295, + "step": 22970 + }, + { + "epoch": 0.41989142157310766, + "grad_norm": 5.3222105377112765, + "learning_rate": 6.516172314493399e-06, + "loss": 17.0438, + "step": 22971 + }, + { + "epoch": 0.4199097007695542, + "grad_norm": 5.246128332099832, + "learning_rate": 6.515890237712408e-06, + "loss": 16.9717, + "step": 22972 + }, + { + "epoch": 0.4199279799660007, + "grad_norm": 5.515928106494591, + "learning_rate": 6.515608155618353e-06, + "loss": 17.2534, + "step": 22973 + }, + { + "epoch": 0.4199462591624472, + "grad_norm": 8.869645870466277, + "learning_rate": 6.515326068212222e-06, + "loss": 17.8627, + "step": 22974 + }, + { + "epoch": 0.41996453835889375, + "grad_norm": 6.34706392084448, + "learning_rate": 6.515043975495005e-06, + "loss": 17.3602, + "step": 22975 + }, + { + "epoch": 0.4199828175553403, + "grad_norm": 9.963344186945106, + "learning_rate": 6.5147618774676905e-06, + "loss": 18.176, + "step": 22976 + }, + { + "epoch": 0.4200010967517868, + "grad_norm": 5.851374040972541, + "learning_rate": 6.514479774131266e-06, + "loss": 17.4657, + "step": 22977 + }, + { + "epoch": 0.4200193759482333, + "grad_norm": 4.702057656452562, + "learning_rate": 6.514197665486723e-06, + "loss": 16.7217, + "step": 22978 + }, + { + "epoch": 0.42003765514467983, + "grad_norm": 5.870093732208084, + "learning_rate": 6.513915551535047e-06, + "loss": 17.2361, + "step": 22979 + }, + { + "epoch": 0.42005593434112637, + "grad_norm": 5.632922764412533, + "learning_rate": 6.513633432277229e-06, + "loss": 17.2576, + "step": 22980 + }, + { + "epoch": 0.4200742135375729, + "grad_norm": 8.060190828372876, + "learning_rate": 6.513351307714257e-06, + "loss": 18.1932, + "step": 22981 + }, + { + "epoch": 0.4200924927340194, + "grad_norm": 6.025354070894363, + "learning_rate": 6.51306917784712e-06, + "loss": 17.4459, + "step": 22982 + }, + { + "epoch": 0.4201107719304659, + "grad_norm": 6.708815475795794, + "learning_rate": 6.512787042676808e-06, + "loss": 17.6669, + "step": 22983 + }, + { + "epoch": 0.42012905112691246, + "grad_norm": 7.122797819201618, + "learning_rate": 6.512504902204309e-06, + "loss": 17.6847, + "step": 22984 + }, + { + "epoch": 0.420147330323359, + "grad_norm": 5.589616164174937, + "learning_rate": 6.512222756430609e-06, + "loss": 17.0379, + "step": 22985 + }, + { + "epoch": 0.4201656095198055, + "grad_norm": 6.83522881316481, + "learning_rate": 6.5119406053567e-06, + "loss": 17.559, + "step": 22986 + }, + { + "epoch": 0.420183888716252, + "grad_norm": 7.503206816650065, + "learning_rate": 6.511658448983572e-06, + "loss": 17.8493, + "step": 22987 + }, + { + "epoch": 0.42020216791269854, + "grad_norm": 6.789844118966189, + "learning_rate": 6.511376287312212e-06, + "loss": 17.7367, + "step": 22988 + }, + { + "epoch": 0.4202204471091451, + "grad_norm": 7.627172290430774, + "learning_rate": 6.511094120343608e-06, + "loss": 17.6574, + "step": 22989 + }, + { + "epoch": 0.4202387263055916, + "grad_norm": 7.249181799702012, + "learning_rate": 6.510811948078751e-06, + "loss": 17.883, + "step": 22990 + }, + { + "epoch": 0.42025700550203815, + "grad_norm": 6.714112096450881, + "learning_rate": 6.51052977051863e-06, + "loss": 17.7014, + "step": 22991 + }, + { + "epoch": 0.42027528469848463, + "grad_norm": 6.294913210124984, + "learning_rate": 6.510247587664231e-06, + "loss": 17.3123, + "step": 22992 + }, + { + "epoch": 0.42029356389493117, + "grad_norm": 7.10322660420361, + "learning_rate": 6.509965399516547e-06, + "loss": 17.5675, + "step": 22993 + }, + { + "epoch": 0.4203118430913777, + "grad_norm": 9.548721430594611, + "learning_rate": 6.509683206076565e-06, + "loss": 18.3707, + "step": 22994 + }, + { + "epoch": 0.42033012228782424, + "grad_norm": 5.962714972520607, + "learning_rate": 6.509401007345275e-06, + "loss": 17.2836, + "step": 22995 + }, + { + "epoch": 0.4203484014842708, + "grad_norm": 6.639399571622893, + "learning_rate": 6.509118803323664e-06, + "loss": 17.5949, + "step": 22996 + }, + { + "epoch": 0.42036668068071725, + "grad_norm": 7.976306646430064, + "learning_rate": 6.508836594012724e-06, + "loss": 18.7225, + "step": 22997 + }, + { + "epoch": 0.4203849598771638, + "grad_norm": 6.963961481132685, + "learning_rate": 6.508554379413441e-06, + "loss": 17.7807, + "step": 22998 + }, + { + "epoch": 0.4204032390736103, + "grad_norm": 6.820626992447245, + "learning_rate": 6.508272159526807e-06, + "loss": 17.661, + "step": 22999 + }, + { + "epoch": 0.42042151827005686, + "grad_norm": 5.6887619432817855, + "learning_rate": 6.507989934353811e-06, + "loss": 17.4962, + "step": 23000 + }, + { + "epoch": 0.4204397974665034, + "grad_norm": 6.520621347198876, + "learning_rate": 6.507707703895441e-06, + "loss": 17.683, + "step": 23001 + }, + { + "epoch": 0.4204580766629499, + "grad_norm": 7.230178250681007, + "learning_rate": 6.507425468152684e-06, + "loss": 17.8178, + "step": 23002 + }, + { + "epoch": 0.4204763558593964, + "grad_norm": 7.004311385823891, + "learning_rate": 6.5071432271265325e-06, + "loss": 17.6068, + "step": 23003 + }, + { + "epoch": 0.42049463505584295, + "grad_norm": 6.563694562402713, + "learning_rate": 6.506860980817975e-06, + "loss": 17.8427, + "step": 23004 + }, + { + "epoch": 0.4205129142522895, + "grad_norm": 5.298064693816009, + "learning_rate": 6.506578729228002e-06, + "loss": 17.1916, + "step": 23005 + }, + { + "epoch": 0.420531193448736, + "grad_norm": 5.726445908275986, + "learning_rate": 6.5062964723575984e-06, + "loss": 17.3356, + "step": 23006 + }, + { + "epoch": 0.4205494726451825, + "grad_norm": 7.583442717564308, + "learning_rate": 6.506014210207758e-06, + "loss": 18.0529, + "step": 23007 + }, + { + "epoch": 0.42056775184162903, + "grad_norm": 6.949887057451262, + "learning_rate": 6.505731942779469e-06, + "loss": 17.7024, + "step": 23008 + }, + { + "epoch": 0.42058603103807557, + "grad_norm": 9.40189290605422, + "learning_rate": 6.505449670073719e-06, + "loss": 18.596, + "step": 23009 + }, + { + "epoch": 0.4206043102345221, + "grad_norm": 6.785372381138408, + "learning_rate": 6.505167392091499e-06, + "loss": 17.7004, + "step": 23010 + }, + { + "epoch": 0.42062258943096864, + "grad_norm": 5.254013375883744, + "learning_rate": 6.5048851088338e-06, + "loss": 17.0435, + "step": 23011 + }, + { + "epoch": 0.4206408686274151, + "grad_norm": 6.722683697860286, + "learning_rate": 6.5046028203016056e-06, + "loss": 17.5095, + "step": 23012 + }, + { + "epoch": 0.42065914782386166, + "grad_norm": 11.684252960700595, + "learning_rate": 6.50432052649591e-06, + "loss": 19.2351, + "step": 23013 + }, + { + "epoch": 0.4206774270203082, + "grad_norm": 5.700003385395421, + "learning_rate": 6.504038227417703e-06, + "loss": 17.2616, + "step": 23014 + }, + { + "epoch": 0.42069570621675473, + "grad_norm": 6.3408082485463115, + "learning_rate": 6.503755923067972e-06, + "loss": 17.5155, + "step": 23015 + }, + { + "epoch": 0.4207139854132012, + "grad_norm": 6.399662701364488, + "learning_rate": 6.5034736134477064e-06, + "loss": 17.48, + "step": 23016 + }, + { + "epoch": 0.42073226460964774, + "grad_norm": 7.063894437669993, + "learning_rate": 6.503191298557895e-06, + "loss": 17.5484, + "step": 23017 + }, + { + "epoch": 0.4207505438060943, + "grad_norm": 5.456594985238224, + "learning_rate": 6.502908978399531e-06, + "loss": 17.2491, + "step": 23018 + }, + { + "epoch": 0.4207688230025408, + "grad_norm": 6.251009222255237, + "learning_rate": 6.502626652973601e-06, + "loss": 17.478, + "step": 23019 + }, + { + "epoch": 0.42078710219898735, + "grad_norm": 5.105701868192694, + "learning_rate": 6.502344322281093e-06, + "loss": 16.9507, + "step": 23020 + }, + { + "epoch": 0.42080538139543383, + "grad_norm": 5.973386292605992, + "learning_rate": 6.502061986323001e-06, + "loss": 17.4321, + "step": 23021 + }, + { + "epoch": 0.42082366059188037, + "grad_norm": 6.355501402620701, + "learning_rate": 6.50177964510031e-06, + "loss": 17.5487, + "step": 23022 + }, + { + "epoch": 0.4208419397883269, + "grad_norm": 6.529065987468653, + "learning_rate": 6.501497298614012e-06, + "loss": 17.4421, + "step": 23023 + }, + { + "epoch": 0.42086021898477344, + "grad_norm": 4.734227154551184, + "learning_rate": 6.501214946865099e-06, + "loss": 16.8256, + "step": 23024 + }, + { + "epoch": 0.42087849818122, + "grad_norm": 6.475746937751447, + "learning_rate": 6.500932589854554e-06, + "loss": 17.6178, + "step": 23025 + }, + { + "epoch": 0.42089677737766645, + "grad_norm": 7.898971635939524, + "learning_rate": 6.500650227583373e-06, + "loss": 17.9597, + "step": 23026 + }, + { + "epoch": 0.420915056574113, + "grad_norm": 5.868163200725381, + "learning_rate": 6.500367860052542e-06, + "loss": 17.55, + "step": 23027 + }, + { + "epoch": 0.4209333357705595, + "grad_norm": 6.305679871062329, + "learning_rate": 6.500085487263054e-06, + "loss": 17.4224, + "step": 23028 + }, + { + "epoch": 0.42095161496700606, + "grad_norm": 7.460606064130908, + "learning_rate": 6.4998031092158945e-06, + "loss": 17.7353, + "step": 23029 + }, + { + "epoch": 0.4209698941634526, + "grad_norm": 7.432347668733622, + "learning_rate": 6.4995207259120545e-06, + "loss": 18.1434, + "step": 23030 + }, + { + "epoch": 0.4209881733598991, + "grad_norm": 7.0641466773516255, + "learning_rate": 6.499238337352526e-06, + "loss": 17.4237, + "step": 23031 + }, + { + "epoch": 0.4210064525563456, + "grad_norm": 5.934801043033059, + "learning_rate": 6.498955943538296e-06, + "loss": 17.3854, + "step": 23032 + }, + { + "epoch": 0.42102473175279215, + "grad_norm": 5.9483563809520374, + "learning_rate": 6.498673544470357e-06, + "loss": 17.0176, + "step": 23033 + }, + { + "epoch": 0.4210430109492387, + "grad_norm": 6.40693209554785, + "learning_rate": 6.498391140149697e-06, + "loss": 17.5485, + "step": 23034 + }, + { + "epoch": 0.4210612901456852, + "grad_norm": 6.269989729490269, + "learning_rate": 6.498108730577305e-06, + "loss": 17.344, + "step": 23035 + }, + { + "epoch": 0.4210795693421317, + "grad_norm": 5.74404053994302, + "learning_rate": 6.4978263157541724e-06, + "loss": 17.3322, + "step": 23036 + }, + { + "epoch": 0.42109784853857823, + "grad_norm": 6.126397048386426, + "learning_rate": 6.497543895681289e-06, + "loss": 17.4026, + "step": 23037 + }, + { + "epoch": 0.42111612773502477, + "grad_norm": 7.4906858377145245, + "learning_rate": 6.497261470359645e-06, + "loss": 17.9141, + "step": 23038 + }, + { + "epoch": 0.4211344069314713, + "grad_norm": 7.246093139301408, + "learning_rate": 6.496979039790228e-06, + "loss": 17.6428, + "step": 23039 + }, + { + "epoch": 0.42115268612791784, + "grad_norm": 5.9149978582310725, + "learning_rate": 6.496696603974029e-06, + "loss": 17.2904, + "step": 23040 + }, + { + "epoch": 0.4211709653243643, + "grad_norm": 5.73176407449862, + "learning_rate": 6.496414162912039e-06, + "loss": 17.5765, + "step": 23041 + }, + { + "epoch": 0.42118924452081086, + "grad_norm": 6.743509820661, + "learning_rate": 6.496131716605247e-06, + "loss": 17.6755, + "step": 23042 + }, + { + "epoch": 0.4212075237172574, + "grad_norm": 21.24521701980779, + "learning_rate": 6.495849265054645e-06, + "loss": 17.8667, + "step": 23043 + }, + { + "epoch": 0.42122580291370393, + "grad_norm": 7.545617373943087, + "learning_rate": 6.495566808261218e-06, + "loss": 17.9976, + "step": 23044 + }, + { + "epoch": 0.42124408211015046, + "grad_norm": 7.0451922621308105, + "learning_rate": 6.4952843462259605e-06, + "loss": 17.7555, + "step": 23045 + }, + { + "epoch": 0.42126236130659694, + "grad_norm": 6.393711946691229, + "learning_rate": 6.495001878949862e-06, + "loss": 17.5035, + "step": 23046 + }, + { + "epoch": 0.4212806405030435, + "grad_norm": 7.00282923224376, + "learning_rate": 6.4947194064339106e-06, + "loss": 17.6665, + "step": 23047 + }, + { + "epoch": 0.42129891969949, + "grad_norm": 5.760467812965505, + "learning_rate": 6.494436928679098e-06, + "loss": 17.2732, + "step": 23048 + }, + { + "epoch": 0.42131719889593655, + "grad_norm": 7.456025998569738, + "learning_rate": 6.494154445686413e-06, + "loss": 17.6858, + "step": 23049 + }, + { + "epoch": 0.42133547809238303, + "grad_norm": 5.678786638497476, + "learning_rate": 6.493871957456847e-06, + "loss": 17.263, + "step": 23050 + }, + { + "epoch": 0.42135375728882957, + "grad_norm": 6.27811710201321, + "learning_rate": 6.493589463991389e-06, + "loss": 17.6338, + "step": 23051 + }, + { + "epoch": 0.4213720364852761, + "grad_norm": 6.141284749832784, + "learning_rate": 6.4933069652910286e-06, + "loss": 17.4214, + "step": 23052 + }, + { + "epoch": 0.42139031568172264, + "grad_norm": 6.329698450530063, + "learning_rate": 6.4930244613567585e-06, + "loss": 17.258, + "step": 23053 + }, + { + "epoch": 0.4214085948781692, + "grad_norm": 5.062037776340806, + "learning_rate": 6.492741952189566e-06, + "loss": 16.6702, + "step": 23054 + }, + { + "epoch": 0.42142687407461565, + "grad_norm": 6.427858729614545, + "learning_rate": 6.492459437790444e-06, + "loss": 17.6709, + "step": 23055 + }, + { + "epoch": 0.4214451532710622, + "grad_norm": 8.082825206280688, + "learning_rate": 6.49217691816038e-06, + "loss": 17.9099, + "step": 23056 + }, + { + "epoch": 0.4214634324675087, + "grad_norm": 6.148529649176768, + "learning_rate": 6.4918943933003654e-06, + "loss": 17.223, + "step": 23057 + }, + { + "epoch": 0.42148171166395526, + "grad_norm": 6.107731647693613, + "learning_rate": 6.49161186321139e-06, + "loss": 17.5471, + "step": 23058 + }, + { + "epoch": 0.4214999908604018, + "grad_norm": 6.598753730395351, + "learning_rate": 6.491329327894447e-06, + "loss": 17.6031, + "step": 23059 + }, + { + "epoch": 0.4215182700568483, + "grad_norm": 6.269006757437165, + "learning_rate": 6.4910467873505215e-06, + "loss": 17.3866, + "step": 23060 + }, + { + "epoch": 0.4215365492532948, + "grad_norm": 8.63377623937528, + "learning_rate": 6.490764241580607e-06, + "loss": 18.8411, + "step": 23061 + }, + { + "epoch": 0.42155482844974135, + "grad_norm": 6.131700349978271, + "learning_rate": 6.490481690585694e-06, + "loss": 17.1781, + "step": 23062 + }, + { + "epoch": 0.4215731076461879, + "grad_norm": 5.9149467235687485, + "learning_rate": 6.49019913436677e-06, + "loss": 17.239, + "step": 23063 + }, + { + "epoch": 0.4215913868426344, + "grad_norm": 7.234875253028634, + "learning_rate": 6.489916572924829e-06, + "loss": 17.8849, + "step": 23064 + }, + { + "epoch": 0.4216096660390809, + "grad_norm": 6.330188920281069, + "learning_rate": 6.4896340062608595e-06, + "loss": 17.2995, + "step": 23065 + }, + { + "epoch": 0.42162794523552743, + "grad_norm": 7.635884789761982, + "learning_rate": 6.489351434375852e-06, + "loss": 18.158, + "step": 23066 + }, + { + "epoch": 0.42164622443197397, + "grad_norm": 7.506853994220852, + "learning_rate": 6.4890688572707975e-06, + "loss": 17.9646, + "step": 23067 + }, + { + "epoch": 0.4216645036284205, + "grad_norm": 7.183464164255513, + "learning_rate": 6.488786274946684e-06, + "loss": 17.7851, + "step": 23068 + }, + { + "epoch": 0.42168278282486704, + "grad_norm": 6.9744005775285, + "learning_rate": 6.488503687404506e-06, + "loss": 18.1321, + "step": 23069 + }, + { + "epoch": 0.4217010620213135, + "grad_norm": 5.343039344496768, + "learning_rate": 6.4882210946452515e-06, + "loss": 17.066, + "step": 23070 + }, + { + "epoch": 0.42171934121776006, + "grad_norm": 7.2410374871601615, + "learning_rate": 6.48793849666991e-06, + "loss": 17.9285, + "step": 23071 + }, + { + "epoch": 0.4217376204142066, + "grad_norm": 6.736478462705318, + "learning_rate": 6.487655893479473e-06, + "loss": 17.6948, + "step": 23072 + }, + { + "epoch": 0.42175589961065313, + "grad_norm": 8.07099306422754, + "learning_rate": 6.487373285074933e-06, + "loss": 17.7591, + "step": 23073 + }, + { + "epoch": 0.42177417880709966, + "grad_norm": 8.15454074081976, + "learning_rate": 6.487090671457278e-06, + "loss": 17.8959, + "step": 23074 + }, + { + "epoch": 0.42179245800354614, + "grad_norm": 5.813501670173463, + "learning_rate": 6.4868080526274975e-06, + "loss": 17.3495, + "step": 23075 + }, + { + "epoch": 0.4218107371999927, + "grad_norm": 6.051453057483406, + "learning_rate": 6.4865254285865855e-06, + "loss": 17.4222, + "step": 23076 + }, + { + "epoch": 0.4218290163964392, + "grad_norm": 5.7059688922023994, + "learning_rate": 6.4862427993355315e-06, + "loss": 17.3903, + "step": 23077 + }, + { + "epoch": 0.42184729559288575, + "grad_norm": 7.380393933533788, + "learning_rate": 6.485960164875323e-06, + "loss": 17.5945, + "step": 23078 + }, + { + "epoch": 0.4218655747893323, + "grad_norm": 5.822752000654732, + "learning_rate": 6.485677525206955e-06, + "loss": 17.3908, + "step": 23079 + }, + { + "epoch": 0.42188385398577877, + "grad_norm": 6.866617406183222, + "learning_rate": 6.485394880331416e-06, + "loss": 17.6554, + "step": 23080 + }, + { + "epoch": 0.4219021331822253, + "grad_norm": 6.685040144882246, + "learning_rate": 6.485112230249696e-06, + "loss": 17.6405, + "step": 23081 + }, + { + "epoch": 0.42192041237867184, + "grad_norm": 5.7851308625382645, + "learning_rate": 6.484829574962788e-06, + "loss": 17.2046, + "step": 23082 + }, + { + "epoch": 0.4219386915751184, + "grad_norm": 7.830516847175503, + "learning_rate": 6.484546914471681e-06, + "loss": 18.1744, + "step": 23083 + }, + { + "epoch": 0.42195697077156485, + "grad_norm": 7.663114401718743, + "learning_rate": 6.484264248777365e-06, + "loss": 17.722, + "step": 23084 + }, + { + "epoch": 0.4219752499680114, + "grad_norm": 6.8836285566600095, + "learning_rate": 6.48398157788083e-06, + "loss": 17.803, + "step": 23085 + }, + { + "epoch": 0.4219935291644579, + "grad_norm": 6.639189576399866, + "learning_rate": 6.4836989017830705e-06, + "loss": 17.9358, + "step": 23086 + }, + { + "epoch": 0.42201180836090446, + "grad_norm": 6.4209760802856835, + "learning_rate": 6.483416220485076e-06, + "loss": 17.5268, + "step": 23087 + }, + { + "epoch": 0.422030087557351, + "grad_norm": 6.811218033578101, + "learning_rate": 6.4831335339878355e-06, + "loss": 18.1312, + "step": 23088 + }, + { + "epoch": 0.4220483667537975, + "grad_norm": 8.252758684835596, + "learning_rate": 6.4828508422923394e-06, + "loss": 17.8009, + "step": 23089 + }, + { + "epoch": 0.422066645950244, + "grad_norm": 5.580805128115323, + "learning_rate": 6.4825681453995805e-06, + "loss": 17.2021, + "step": 23090 + }, + { + "epoch": 0.42208492514669055, + "grad_norm": 6.77878781847524, + "learning_rate": 6.482285443310549e-06, + "loss": 17.3954, + "step": 23091 + }, + { + "epoch": 0.4221032043431371, + "grad_norm": 5.443725019425856, + "learning_rate": 6.482002736026236e-06, + "loss": 17.1061, + "step": 23092 + }, + { + "epoch": 0.4221214835395836, + "grad_norm": 7.432860429111075, + "learning_rate": 6.481720023547631e-06, + "loss": 17.858, + "step": 23093 + }, + { + "epoch": 0.4221397627360301, + "grad_norm": 7.115159419646255, + "learning_rate": 6.481437305875727e-06, + "loss": 17.8522, + "step": 23094 + }, + { + "epoch": 0.42215804193247664, + "grad_norm": 6.601983738113477, + "learning_rate": 6.481154583011513e-06, + "loss": 17.8641, + "step": 23095 + }, + { + "epoch": 0.42217632112892317, + "grad_norm": 6.441212479276569, + "learning_rate": 6.4808718549559815e-06, + "loss": 17.6611, + "step": 23096 + }, + { + "epoch": 0.4221946003253697, + "grad_norm": 5.8398323228293, + "learning_rate": 6.480589121710123e-06, + "loss": 17.3993, + "step": 23097 + }, + { + "epoch": 0.42221287952181624, + "grad_norm": 5.15225786937002, + "learning_rate": 6.480306383274926e-06, + "loss": 17.003, + "step": 23098 + }, + { + "epoch": 0.4222311587182627, + "grad_norm": 6.037309898579251, + "learning_rate": 6.480023639651385e-06, + "loss": 17.1447, + "step": 23099 + }, + { + "epoch": 0.42224943791470926, + "grad_norm": 7.347191776281626, + "learning_rate": 6.47974089084049e-06, + "loss": 18.1314, + "step": 23100 + }, + { + "epoch": 0.4222677171111558, + "grad_norm": 7.158394307901473, + "learning_rate": 6.479458136843232e-06, + "loss": 17.8036, + "step": 23101 + }, + { + "epoch": 0.42228599630760233, + "grad_norm": 6.573547552557479, + "learning_rate": 6.479175377660601e-06, + "loss": 17.4141, + "step": 23102 + }, + { + "epoch": 0.42230427550404886, + "grad_norm": 6.198939740718497, + "learning_rate": 6.478892613293586e-06, + "loss": 17.4501, + "step": 23103 + }, + { + "epoch": 0.42232255470049535, + "grad_norm": 6.512008325165434, + "learning_rate": 6.4786098437431845e-06, + "loss": 17.7391, + "step": 23104 + }, + { + "epoch": 0.4223408338969419, + "grad_norm": 7.510655871600087, + "learning_rate": 6.478327069010381e-06, + "loss": 17.8093, + "step": 23105 + }, + { + "epoch": 0.4223591130933884, + "grad_norm": 6.438621615415397, + "learning_rate": 6.478044289096173e-06, + "loss": 17.5805, + "step": 23106 + }, + { + "epoch": 0.42237739228983495, + "grad_norm": 7.336705497786379, + "learning_rate": 6.477761504001545e-06, + "loss": 17.7835, + "step": 23107 + }, + { + "epoch": 0.4223956714862815, + "grad_norm": 5.87458767097228, + "learning_rate": 6.477478713727492e-06, + "loss": 17.4316, + "step": 23108 + }, + { + "epoch": 0.42241395068272797, + "grad_norm": 6.552670239449119, + "learning_rate": 6.477195918275003e-06, + "loss": 17.901, + "step": 23109 + }, + { + "epoch": 0.4224322298791745, + "grad_norm": 6.2558059341346475, + "learning_rate": 6.476913117645073e-06, + "loss": 17.6013, + "step": 23110 + }, + { + "epoch": 0.42245050907562104, + "grad_norm": 6.159191106187636, + "learning_rate": 6.47663031183869e-06, + "loss": 17.4801, + "step": 23111 + }, + { + "epoch": 0.4224687882720676, + "grad_norm": 6.3639036507049935, + "learning_rate": 6.476347500856844e-06, + "loss": 17.7147, + "step": 23112 + }, + { + "epoch": 0.4224870674685141, + "grad_norm": 5.495717279870066, + "learning_rate": 6.476064684700529e-06, + "loss": 17.1045, + "step": 23113 + }, + { + "epoch": 0.4225053466649606, + "grad_norm": 5.334120959304089, + "learning_rate": 6.475781863370738e-06, + "loss": 17.3345, + "step": 23114 + }, + { + "epoch": 0.4225236258614071, + "grad_norm": 7.442688966357905, + "learning_rate": 6.4754990368684565e-06, + "loss": 17.7983, + "step": 23115 + }, + { + "epoch": 0.42254190505785366, + "grad_norm": 7.859035112315611, + "learning_rate": 6.475216205194681e-06, + "loss": 17.8951, + "step": 23116 + }, + { + "epoch": 0.4225601842543002, + "grad_norm": 6.195672053441522, + "learning_rate": 6.474933368350398e-06, + "loss": 17.2802, + "step": 23117 + }, + { + "epoch": 0.4225784634507467, + "grad_norm": 6.771734234262354, + "learning_rate": 6.4746505263366045e-06, + "loss": 17.7595, + "step": 23118 + }, + { + "epoch": 0.4225967426471932, + "grad_norm": 5.870869540255059, + "learning_rate": 6.4743676791542874e-06, + "loss": 17.2879, + "step": 23119 + }, + { + "epoch": 0.42261502184363975, + "grad_norm": 8.189513975913615, + "learning_rate": 6.474084826804438e-06, + "loss": 17.9759, + "step": 23120 + }, + { + "epoch": 0.4226333010400863, + "grad_norm": 6.303489517903496, + "learning_rate": 6.473801969288052e-06, + "loss": 17.3265, + "step": 23121 + }, + { + "epoch": 0.4226515802365328, + "grad_norm": 7.0375939108654695, + "learning_rate": 6.473519106606117e-06, + "loss": 17.8507, + "step": 23122 + }, + { + "epoch": 0.4226698594329793, + "grad_norm": 6.59593245315403, + "learning_rate": 6.473236238759625e-06, + "loss": 17.6573, + "step": 23123 + }, + { + "epoch": 0.42268813862942584, + "grad_norm": 6.561789534776478, + "learning_rate": 6.472953365749569e-06, + "loss": 17.5769, + "step": 23124 + }, + { + "epoch": 0.42270641782587237, + "grad_norm": 6.425788824945835, + "learning_rate": 6.472670487576937e-06, + "loss": 17.762, + "step": 23125 + }, + { + "epoch": 0.4227246970223189, + "grad_norm": 6.233744604495483, + "learning_rate": 6.4723876042427245e-06, + "loss": 17.5841, + "step": 23126 + }, + { + "epoch": 0.42274297621876544, + "grad_norm": 6.3898368059332755, + "learning_rate": 6.47210471574792e-06, + "loss": 17.5965, + "step": 23127 + }, + { + "epoch": 0.4227612554152119, + "grad_norm": 5.853990276212833, + "learning_rate": 6.471821822093518e-06, + "loss": 17.4561, + "step": 23128 + }, + { + "epoch": 0.42277953461165846, + "grad_norm": 6.057939282826052, + "learning_rate": 6.471538923280507e-06, + "loss": 17.346, + "step": 23129 + }, + { + "epoch": 0.422797813808105, + "grad_norm": 6.8457531698245715, + "learning_rate": 6.471256019309879e-06, + "loss": 17.6576, + "step": 23130 + }, + { + "epoch": 0.42281609300455153, + "grad_norm": 6.489701519716401, + "learning_rate": 6.470973110182629e-06, + "loss": 17.5682, + "step": 23131 + }, + { + "epoch": 0.42283437220099807, + "grad_norm": 6.988440599621172, + "learning_rate": 6.470690195899744e-06, + "loss": 17.9877, + "step": 23132 + }, + { + "epoch": 0.42285265139744455, + "grad_norm": 6.231174844169285, + "learning_rate": 6.470407276462217e-06, + "loss": 17.6179, + "step": 23133 + }, + { + "epoch": 0.4228709305938911, + "grad_norm": 8.562516756371206, + "learning_rate": 6.470124351871041e-06, + "loss": 17.6676, + "step": 23134 + }, + { + "epoch": 0.4228892097903376, + "grad_norm": 6.440482517126097, + "learning_rate": 6.4698414221272066e-06, + "loss": 17.6784, + "step": 23135 + }, + { + "epoch": 0.42290748898678415, + "grad_norm": 6.348356776678403, + "learning_rate": 6.469558487231706e-06, + "loss": 17.545, + "step": 23136 + }, + { + "epoch": 0.4229257681832307, + "grad_norm": 12.258602007158364, + "learning_rate": 6.469275547185529e-06, + "loss": 18.8457, + "step": 23137 + }, + { + "epoch": 0.42294404737967717, + "grad_norm": 7.0072444440524455, + "learning_rate": 6.468992601989671e-06, + "loss": 17.976, + "step": 23138 + }, + { + "epoch": 0.4229623265761237, + "grad_norm": 6.128780314200569, + "learning_rate": 6.468709651645119e-06, + "loss": 17.4697, + "step": 23139 + }, + { + "epoch": 0.42298060577257024, + "grad_norm": 6.223880414362575, + "learning_rate": 6.4684266961528675e-06, + "loss": 17.4181, + "step": 23140 + }, + { + "epoch": 0.4229988849690168, + "grad_norm": 5.158827222271919, + "learning_rate": 6.468143735513908e-06, + "loss": 16.8935, + "step": 23141 + }, + { + "epoch": 0.4230171641654633, + "grad_norm": 6.220023788164625, + "learning_rate": 6.467860769729234e-06, + "loss": 17.5723, + "step": 23142 + }, + { + "epoch": 0.4230354433619098, + "grad_norm": 6.199615221312094, + "learning_rate": 6.467577798799834e-06, + "loss": 17.3186, + "step": 23143 + }, + { + "epoch": 0.4230537225583563, + "grad_norm": 5.38133794882142, + "learning_rate": 6.4672948227267e-06, + "loss": 17.0014, + "step": 23144 + }, + { + "epoch": 0.42307200175480286, + "grad_norm": 6.8549333842776585, + "learning_rate": 6.467011841510827e-06, + "loss": 18.0086, + "step": 23145 + }, + { + "epoch": 0.4230902809512494, + "grad_norm": 5.703526497242912, + "learning_rate": 6.466728855153203e-06, + "loss": 17.295, + "step": 23146 + }, + { + "epoch": 0.42310856014769593, + "grad_norm": 7.304360598495538, + "learning_rate": 6.466445863654823e-06, + "loss": 17.7328, + "step": 23147 + }, + { + "epoch": 0.4231268393441424, + "grad_norm": 7.816409379870154, + "learning_rate": 6.466162867016677e-06, + "loss": 17.6701, + "step": 23148 + }, + { + "epoch": 0.42314511854058895, + "grad_norm": 6.000202311919672, + "learning_rate": 6.465879865239757e-06, + "loss": 17.4872, + "step": 23149 + }, + { + "epoch": 0.4231633977370355, + "grad_norm": 7.076186929357036, + "learning_rate": 6.465596858325056e-06, + "loss": 17.9496, + "step": 23150 + }, + { + "epoch": 0.423181676933482, + "grad_norm": 6.291260848980936, + "learning_rate": 6.465313846273566e-06, + "loss": 17.1886, + "step": 23151 + }, + { + "epoch": 0.4231999561299285, + "grad_norm": 4.861121498229915, + "learning_rate": 6.465030829086276e-06, + "loss": 16.8271, + "step": 23152 + }, + { + "epoch": 0.42321823532637504, + "grad_norm": 7.000038974411, + "learning_rate": 6.464747806764181e-06, + "loss": 17.7604, + "step": 23153 + }, + { + "epoch": 0.42323651452282157, + "grad_norm": 6.553291930245339, + "learning_rate": 6.4644647793082725e-06, + "loss": 17.5454, + "step": 23154 + }, + { + "epoch": 0.4232547937192681, + "grad_norm": 6.337761010366317, + "learning_rate": 6.464181746719541e-06, + "loss": 17.578, + "step": 23155 + }, + { + "epoch": 0.42327307291571464, + "grad_norm": 6.479008818263093, + "learning_rate": 6.463898708998981e-06, + "loss": 17.6494, + "step": 23156 + }, + { + "epoch": 0.4232913521121611, + "grad_norm": 6.435850887126168, + "learning_rate": 6.463615666147581e-06, + "loss": 17.4268, + "step": 23157 + }, + { + "epoch": 0.42330963130860766, + "grad_norm": 7.474651685203027, + "learning_rate": 6.463332618166337e-06, + "loss": 17.697, + "step": 23158 + }, + { + "epoch": 0.4233279105050542, + "grad_norm": 7.888308945496125, + "learning_rate": 6.463049565056239e-06, + "loss": 18.1867, + "step": 23159 + }, + { + "epoch": 0.42334618970150073, + "grad_norm": 5.471285991459773, + "learning_rate": 6.462766506818279e-06, + "loss": 17.1011, + "step": 23160 + }, + { + "epoch": 0.42336446889794727, + "grad_norm": 6.5632578482274875, + "learning_rate": 6.462483443453449e-06, + "loss": 17.5202, + "step": 23161 + }, + { + "epoch": 0.42338274809439375, + "grad_norm": 6.3339588163787255, + "learning_rate": 6.4622003749627415e-06, + "loss": 17.4519, + "step": 23162 + }, + { + "epoch": 0.4234010272908403, + "grad_norm": 7.567082923147876, + "learning_rate": 6.461917301347148e-06, + "loss": 17.8977, + "step": 23163 + }, + { + "epoch": 0.4234193064872868, + "grad_norm": 7.367710063713969, + "learning_rate": 6.461634222607662e-06, + "loss": 18.1403, + "step": 23164 + }, + { + "epoch": 0.42343758568373335, + "grad_norm": 8.318156710560396, + "learning_rate": 6.461351138745275e-06, + "loss": 18.1321, + "step": 23165 + }, + { + "epoch": 0.4234558648801799, + "grad_norm": 7.474583299570602, + "learning_rate": 6.461068049760978e-06, + "loss": 18.057, + "step": 23166 + }, + { + "epoch": 0.42347414407662637, + "grad_norm": 6.9254902409955275, + "learning_rate": 6.460784955655766e-06, + "loss": 17.8518, + "step": 23167 + }, + { + "epoch": 0.4234924232730729, + "grad_norm": 7.831107012584451, + "learning_rate": 6.4605018564306275e-06, + "loss": 18.1655, + "step": 23168 + }, + { + "epoch": 0.42351070246951944, + "grad_norm": 5.2637812020058234, + "learning_rate": 6.460218752086559e-06, + "loss": 17.1073, + "step": 23169 + }, + { + "epoch": 0.423528981665966, + "grad_norm": 6.165313486475107, + "learning_rate": 6.459935642624549e-06, + "loss": 17.4921, + "step": 23170 + }, + { + "epoch": 0.4235472608624125, + "grad_norm": 5.610198829730439, + "learning_rate": 6.45965252804559e-06, + "loss": 17.2784, + "step": 23171 + }, + { + "epoch": 0.423565540058859, + "grad_norm": 6.1106798370772415, + "learning_rate": 6.459369408350677e-06, + "loss": 17.2397, + "step": 23172 + }, + { + "epoch": 0.4235838192553055, + "grad_norm": 7.268801561247627, + "learning_rate": 6.459086283540802e-06, + "loss": 18.0769, + "step": 23173 + }, + { + "epoch": 0.42360209845175206, + "grad_norm": 5.950001126618713, + "learning_rate": 6.458803153616955e-06, + "loss": 17.5477, + "step": 23174 + }, + { + "epoch": 0.4236203776481986, + "grad_norm": 5.950580894332745, + "learning_rate": 6.45852001858013e-06, + "loss": 17.0636, + "step": 23175 + }, + { + "epoch": 0.42363865684464513, + "grad_norm": 6.151153173015378, + "learning_rate": 6.458236878431317e-06, + "loss": 17.3583, + "step": 23176 + }, + { + "epoch": 0.4236569360410916, + "grad_norm": 6.353086558370284, + "learning_rate": 6.457953733171513e-06, + "loss": 17.2193, + "step": 23177 + }, + { + "epoch": 0.42367521523753815, + "grad_norm": 6.90804130787632, + "learning_rate": 6.457670582801706e-06, + "loss": 17.4209, + "step": 23178 + }, + { + "epoch": 0.4236934944339847, + "grad_norm": 6.916016700328348, + "learning_rate": 6.457387427322889e-06, + "loss": 17.4882, + "step": 23179 + }, + { + "epoch": 0.4237117736304312, + "grad_norm": 6.390139397516349, + "learning_rate": 6.4571042667360585e-06, + "loss": 17.3082, + "step": 23180 + }, + { + "epoch": 0.42373005282687776, + "grad_norm": 7.211340731285922, + "learning_rate": 6.4568211010422025e-06, + "loss": 17.9054, + "step": 23181 + }, + { + "epoch": 0.42374833202332424, + "grad_norm": 7.222845905313327, + "learning_rate": 6.456537930242315e-06, + "loss": 17.7328, + "step": 23182 + }, + { + "epoch": 0.4237666112197708, + "grad_norm": 6.221297555481, + "learning_rate": 6.45625475433739e-06, + "loss": 17.6305, + "step": 23183 + }, + { + "epoch": 0.4237848904162173, + "grad_norm": 6.447643288950181, + "learning_rate": 6.455971573328415e-06, + "loss": 17.6717, + "step": 23184 + }, + { + "epoch": 0.42380316961266384, + "grad_norm": 8.04998433861434, + "learning_rate": 6.4556883872163875e-06, + "loss": 18.1202, + "step": 23185 + }, + { + "epoch": 0.4238214488091103, + "grad_norm": 7.751375134420809, + "learning_rate": 6.4554051960023e-06, + "loss": 18.5387, + "step": 23186 + }, + { + "epoch": 0.42383972800555686, + "grad_norm": 7.467149403090195, + "learning_rate": 6.455121999687143e-06, + "loss": 17.7701, + "step": 23187 + }, + { + "epoch": 0.4238580072020034, + "grad_norm": 7.1647614363195515, + "learning_rate": 6.454838798271909e-06, + "loss": 17.3247, + "step": 23188 + }, + { + "epoch": 0.42387628639844993, + "grad_norm": 8.03890099407871, + "learning_rate": 6.45455559175759e-06, + "loss": 18.1342, + "step": 23189 + }, + { + "epoch": 0.42389456559489647, + "grad_norm": 7.077863520526551, + "learning_rate": 6.454272380145183e-06, + "loss": 17.4789, + "step": 23190 + }, + { + "epoch": 0.42391284479134295, + "grad_norm": 7.8605563705078465, + "learning_rate": 6.453989163435676e-06, + "loss": 18.283, + "step": 23191 + }, + { + "epoch": 0.4239311239877895, + "grad_norm": 7.136978425686276, + "learning_rate": 6.453705941630062e-06, + "loss": 17.8003, + "step": 23192 + }, + { + "epoch": 0.423949403184236, + "grad_norm": 5.420422828767463, + "learning_rate": 6.453422714729336e-06, + "loss": 17.055, + "step": 23193 + }, + { + "epoch": 0.42396768238068255, + "grad_norm": 8.036881457389141, + "learning_rate": 6.453139482734489e-06, + "loss": 17.8265, + "step": 23194 + }, + { + "epoch": 0.4239859615771291, + "grad_norm": 6.376078633630916, + "learning_rate": 6.452856245646515e-06, + "loss": 17.5076, + "step": 23195 + }, + { + "epoch": 0.42400424077357557, + "grad_norm": 7.055613800982587, + "learning_rate": 6.4525730034664046e-06, + "loss": 17.6407, + "step": 23196 + }, + { + "epoch": 0.4240225199700221, + "grad_norm": 5.7198785669912136, + "learning_rate": 6.4522897561951536e-06, + "loss": 17.2559, + "step": 23197 + }, + { + "epoch": 0.42404079916646864, + "grad_norm": 7.325935359629535, + "learning_rate": 6.452006503833752e-06, + "loss": 17.8513, + "step": 23198 + }, + { + "epoch": 0.4240590783629152, + "grad_norm": 6.532728512763269, + "learning_rate": 6.451723246383194e-06, + "loss": 17.2482, + "step": 23199 + }, + { + "epoch": 0.4240773575593617, + "grad_norm": 6.1520616411847495, + "learning_rate": 6.451439983844472e-06, + "loss": 17.5248, + "step": 23200 + }, + { + "epoch": 0.4240956367558082, + "grad_norm": 5.030040310422997, + "learning_rate": 6.451156716218579e-06, + "loss": 16.9401, + "step": 23201 + }, + { + "epoch": 0.4241139159522547, + "grad_norm": 6.108694950567766, + "learning_rate": 6.450873443506507e-06, + "loss": 17.6505, + "step": 23202 + }, + { + "epoch": 0.42413219514870126, + "grad_norm": 6.204057434994778, + "learning_rate": 6.450590165709248e-06, + "loss": 17.585, + "step": 23203 + }, + { + "epoch": 0.4241504743451478, + "grad_norm": 9.611802540482145, + "learning_rate": 6.4503068828277994e-06, + "loss": 17.6319, + "step": 23204 + }, + { + "epoch": 0.42416875354159433, + "grad_norm": 5.975477673986726, + "learning_rate": 6.450023594863149e-06, + "loss": 17.2863, + "step": 23205 + }, + { + "epoch": 0.4241870327380408, + "grad_norm": 6.75798405226483, + "learning_rate": 6.449740301816292e-06, + "loss": 17.4903, + "step": 23206 + }, + { + "epoch": 0.42420531193448735, + "grad_norm": 6.041848193441785, + "learning_rate": 6.449457003688222e-06, + "loss": 17.4501, + "step": 23207 + }, + { + "epoch": 0.4242235911309339, + "grad_norm": 5.974743847344579, + "learning_rate": 6.4491737004799305e-06, + "loss": 17.2646, + "step": 23208 + }, + { + "epoch": 0.4242418703273804, + "grad_norm": 6.457713787705749, + "learning_rate": 6.44889039219241e-06, + "loss": 17.4153, + "step": 23209 + }, + { + "epoch": 0.42426014952382696, + "grad_norm": 5.316422577175712, + "learning_rate": 6.448607078826655e-06, + "loss": 17.2499, + "step": 23210 + }, + { + "epoch": 0.42427842872027344, + "grad_norm": 6.224997073142436, + "learning_rate": 6.448323760383659e-06, + "loss": 17.7254, + "step": 23211 + }, + { + "epoch": 0.42429670791672, + "grad_norm": 6.81697345358101, + "learning_rate": 6.448040436864412e-06, + "loss": 17.4188, + "step": 23212 + }, + { + "epoch": 0.4243149871131665, + "grad_norm": 7.262940284252221, + "learning_rate": 6.447757108269911e-06, + "loss": 17.9658, + "step": 23213 + }, + { + "epoch": 0.42433326630961304, + "grad_norm": 6.028857721532317, + "learning_rate": 6.4474737746011465e-06, + "loss": 17.2204, + "step": 23214 + }, + { + "epoch": 0.4243515455060596, + "grad_norm": 5.812748220060766, + "learning_rate": 6.447190435859111e-06, + "loss": 17.2248, + "step": 23215 + }, + { + "epoch": 0.42436982470250606, + "grad_norm": 7.9293828470095615, + "learning_rate": 6.446907092044799e-06, + "loss": 17.979, + "step": 23216 + }, + { + "epoch": 0.4243881038989526, + "grad_norm": 6.89259415779758, + "learning_rate": 6.446623743159203e-06, + "loss": 17.6296, + "step": 23217 + }, + { + "epoch": 0.42440638309539913, + "grad_norm": 7.8202367995967, + "learning_rate": 6.446340389203317e-06, + "loss": 18.0758, + "step": 23218 + }, + { + "epoch": 0.42442466229184567, + "grad_norm": 5.132927930426186, + "learning_rate": 6.446057030178132e-06, + "loss": 17.0155, + "step": 23219 + }, + { + "epoch": 0.42444294148829215, + "grad_norm": 4.992649609231906, + "learning_rate": 6.445773666084645e-06, + "loss": 16.9942, + "step": 23220 + }, + { + "epoch": 0.4244612206847387, + "grad_norm": 7.880871310450647, + "learning_rate": 6.445490296923844e-06, + "loss": 17.6014, + "step": 23221 + }, + { + "epoch": 0.4244794998811852, + "grad_norm": 6.238049813757726, + "learning_rate": 6.445206922696727e-06, + "loss": 17.4601, + "step": 23222 + }, + { + "epoch": 0.42449777907763175, + "grad_norm": 6.126625090422114, + "learning_rate": 6.444923543404285e-06, + "loss": 17.3324, + "step": 23223 + }, + { + "epoch": 0.4245160582740783, + "grad_norm": 6.958085470379649, + "learning_rate": 6.444640159047511e-06, + "loss": 17.7785, + "step": 23224 + }, + { + "epoch": 0.42453433747052477, + "grad_norm": 6.814057729507913, + "learning_rate": 6.444356769627398e-06, + "loss": 17.8646, + "step": 23225 + }, + { + "epoch": 0.4245526166669713, + "grad_norm": 5.994456734825881, + "learning_rate": 6.4440733751449396e-06, + "loss": 17.3985, + "step": 23226 + }, + { + "epoch": 0.42457089586341784, + "grad_norm": 8.369769332719311, + "learning_rate": 6.443789975601129e-06, + "loss": 17.7328, + "step": 23227 + }, + { + "epoch": 0.4245891750598644, + "grad_norm": 8.509696638553095, + "learning_rate": 6.443506570996962e-06, + "loss": 18.0756, + "step": 23228 + }, + { + "epoch": 0.4246074542563109, + "grad_norm": 5.68080480769699, + "learning_rate": 6.4432231613334295e-06, + "loss": 17.3222, + "step": 23229 + }, + { + "epoch": 0.4246257334527574, + "grad_norm": 5.864578603069478, + "learning_rate": 6.442939746611523e-06, + "loss": 17.4366, + "step": 23230 + }, + { + "epoch": 0.42464401264920393, + "grad_norm": 6.695005158913941, + "learning_rate": 6.44265632683224e-06, + "loss": 17.4122, + "step": 23231 + }, + { + "epoch": 0.42466229184565046, + "grad_norm": 7.886751378773805, + "learning_rate": 6.44237290199657e-06, + "loss": 18.1189, + "step": 23232 + }, + { + "epoch": 0.424680571042097, + "grad_norm": 5.667682538075096, + "learning_rate": 6.4420894721055094e-06, + "loss": 17.2764, + "step": 23233 + }, + { + "epoch": 0.42469885023854353, + "grad_norm": 6.566721020308051, + "learning_rate": 6.44180603716005e-06, + "loss": 17.4361, + "step": 23234 + }, + { + "epoch": 0.42471712943499, + "grad_norm": 6.160303505709899, + "learning_rate": 6.441522597161185e-06, + "loss": 17.2469, + "step": 23235 + }, + { + "epoch": 0.42473540863143655, + "grad_norm": 6.889576470886445, + "learning_rate": 6.4412391521099084e-06, + "loss": 17.6904, + "step": 23236 + }, + { + "epoch": 0.4247536878278831, + "grad_norm": 5.488787147695676, + "learning_rate": 6.4409557020072145e-06, + "loss": 16.8651, + "step": 23237 + }, + { + "epoch": 0.4247719670243296, + "grad_norm": 8.267265947832813, + "learning_rate": 6.440672246854096e-06, + "loss": 18.3391, + "step": 23238 + }, + { + "epoch": 0.42479024622077616, + "grad_norm": 6.037747717693437, + "learning_rate": 6.4403887866515445e-06, + "loss": 17.3174, + "step": 23239 + }, + { + "epoch": 0.42480852541722264, + "grad_norm": 6.060987718727519, + "learning_rate": 6.440105321400556e-06, + "loss": 17.6085, + "step": 23240 + }, + { + "epoch": 0.4248268046136692, + "grad_norm": 5.91925489394884, + "learning_rate": 6.439821851102124e-06, + "loss": 17.2479, + "step": 23241 + }, + { + "epoch": 0.4248450838101157, + "grad_norm": 5.738825810714, + "learning_rate": 6.439538375757243e-06, + "loss": 17.2108, + "step": 23242 + }, + { + "epoch": 0.42486336300656224, + "grad_norm": 7.070132764479361, + "learning_rate": 6.439254895366902e-06, + "loss": 17.7795, + "step": 23243 + }, + { + "epoch": 0.4248816422030088, + "grad_norm": 5.287897605428229, + "learning_rate": 6.4389714099320975e-06, + "loss": 17.1248, + "step": 23244 + }, + { + "epoch": 0.42489992139945526, + "grad_norm": 7.782209619870981, + "learning_rate": 6.438687919453826e-06, + "loss": 18.2702, + "step": 23245 + }, + { + "epoch": 0.4249182005959018, + "grad_norm": 6.820480185191707, + "learning_rate": 6.438404423933076e-06, + "loss": 17.3446, + "step": 23246 + }, + { + "epoch": 0.42493647979234833, + "grad_norm": 5.73377650808498, + "learning_rate": 6.438120923370843e-06, + "loss": 17.2321, + "step": 23247 + }, + { + "epoch": 0.42495475898879487, + "grad_norm": 7.453244852160705, + "learning_rate": 6.437837417768123e-06, + "loss": 18.1756, + "step": 23248 + }, + { + "epoch": 0.4249730381852414, + "grad_norm": 5.9834094339976, + "learning_rate": 6.437553907125905e-06, + "loss": 17.3006, + "step": 23249 + }, + { + "epoch": 0.4249913173816879, + "grad_norm": 5.715656527767392, + "learning_rate": 6.437270391445186e-06, + "loss": 17.3641, + "step": 23250 + }, + { + "epoch": 0.4250095965781344, + "grad_norm": 5.991435802699906, + "learning_rate": 6.43698687072696e-06, + "loss": 17.5296, + "step": 23251 + }, + { + "epoch": 0.42502787577458095, + "grad_norm": 5.914436881039171, + "learning_rate": 6.436703344972219e-06, + "loss": 17.3627, + "step": 23252 + }, + { + "epoch": 0.4250461549710275, + "grad_norm": 7.27600022634788, + "learning_rate": 6.436419814181958e-06, + "loss": 17.9451, + "step": 23253 + }, + { + "epoch": 0.42506443416747397, + "grad_norm": 6.39585124336612, + "learning_rate": 6.43613627835717e-06, + "loss": 17.7013, + "step": 23254 + }, + { + "epoch": 0.4250827133639205, + "grad_norm": 7.95961009723974, + "learning_rate": 6.435852737498849e-06, + "loss": 17.8516, + "step": 23255 + }, + { + "epoch": 0.42510099256036704, + "grad_norm": 7.026971734875077, + "learning_rate": 6.435569191607989e-06, + "loss": 17.6718, + "step": 23256 + }, + { + "epoch": 0.4251192717568136, + "grad_norm": 6.698563438269797, + "learning_rate": 6.435285640685582e-06, + "loss": 17.7578, + "step": 23257 + }, + { + "epoch": 0.4251375509532601, + "grad_norm": 6.703836671505481, + "learning_rate": 6.435002084732625e-06, + "loss": 17.5404, + "step": 23258 + }, + { + "epoch": 0.4251558301497066, + "grad_norm": 5.346873724366964, + "learning_rate": 6.4347185237501095e-06, + "loss": 17.004, + "step": 23259 + }, + { + "epoch": 0.42517410934615313, + "grad_norm": 5.8769726319864874, + "learning_rate": 6.4344349577390306e-06, + "loss": 16.9058, + "step": 23260 + }, + { + "epoch": 0.42519238854259966, + "grad_norm": 4.379997502804807, + "learning_rate": 6.434151386700382e-06, + "loss": 16.8215, + "step": 23261 + }, + { + "epoch": 0.4252106677390462, + "grad_norm": 6.778903453990103, + "learning_rate": 6.433867810635156e-06, + "loss": 17.7149, + "step": 23262 + }, + { + "epoch": 0.42522894693549274, + "grad_norm": 8.234971800394172, + "learning_rate": 6.433584229544348e-06, + "loss": 17.7708, + "step": 23263 + }, + { + "epoch": 0.4252472261319392, + "grad_norm": 7.117066939009308, + "learning_rate": 6.4333006434289525e-06, + "loss": 17.7574, + "step": 23264 + }, + { + "epoch": 0.42526550532838575, + "grad_norm": 7.891690304306368, + "learning_rate": 6.433017052289963e-06, + "loss": 18.013, + "step": 23265 + }, + { + "epoch": 0.4252837845248323, + "grad_norm": 6.682338327057211, + "learning_rate": 6.432733456128371e-06, + "loss": 17.53, + "step": 23266 + }, + { + "epoch": 0.4253020637212788, + "grad_norm": 7.742684286834764, + "learning_rate": 6.432449854945174e-06, + "loss": 18.009, + "step": 23267 + }, + { + "epoch": 0.42532034291772536, + "grad_norm": 7.87572476066597, + "learning_rate": 6.4321662487413634e-06, + "loss": 18.0306, + "step": 23268 + }, + { + "epoch": 0.42533862211417184, + "grad_norm": 12.092941117262523, + "learning_rate": 6.431882637517937e-06, + "loss": 18.5198, + "step": 23269 + }, + { + "epoch": 0.4253569013106184, + "grad_norm": 6.577591419740454, + "learning_rate": 6.431599021275885e-06, + "loss": 17.8837, + "step": 23270 + }, + { + "epoch": 0.4253751805070649, + "grad_norm": 5.7233591927392276, + "learning_rate": 6.4313154000162e-06, + "loss": 17.2162, + "step": 23271 + }, + { + "epoch": 0.42539345970351145, + "grad_norm": 5.9142626601897765, + "learning_rate": 6.431031773739882e-06, + "loss": 17.2328, + "step": 23272 + }, + { + "epoch": 0.425411738899958, + "grad_norm": 6.981238667079544, + "learning_rate": 6.430748142447921e-06, + "loss": 17.9035, + "step": 23273 + }, + { + "epoch": 0.42543001809640446, + "grad_norm": 7.6050498953989765, + "learning_rate": 6.430464506141312e-06, + "loss": 18.0571, + "step": 23274 + }, + { + "epoch": 0.425448297292851, + "grad_norm": 6.8033061858957655, + "learning_rate": 6.430180864821048e-06, + "loss": 17.6216, + "step": 23275 + }, + { + "epoch": 0.42546657648929753, + "grad_norm": 8.662456425833138, + "learning_rate": 6.4298972184881255e-06, + "loss": 18.2785, + "step": 23276 + }, + { + "epoch": 0.42548485568574407, + "grad_norm": 5.844497092877724, + "learning_rate": 6.4296135671435365e-06, + "loss": 17.3414, + "step": 23277 + }, + { + "epoch": 0.4255031348821906, + "grad_norm": 7.497452862917624, + "learning_rate": 6.429329910788276e-06, + "loss": 17.8492, + "step": 23278 + }, + { + "epoch": 0.4255214140786371, + "grad_norm": 7.102280082484249, + "learning_rate": 6.429046249423339e-06, + "loss": 17.7186, + "step": 23279 + }, + { + "epoch": 0.4255396932750836, + "grad_norm": 7.334603403675517, + "learning_rate": 6.428762583049718e-06, + "loss": 17.8792, + "step": 23280 + }, + { + "epoch": 0.42555797247153015, + "grad_norm": 5.9367013808119005, + "learning_rate": 6.428478911668408e-06, + "loss": 17.5629, + "step": 23281 + }, + { + "epoch": 0.4255762516679767, + "grad_norm": 6.119055864556667, + "learning_rate": 6.428195235280403e-06, + "loss": 17.2329, + "step": 23282 + }, + { + "epoch": 0.4255945308644232, + "grad_norm": 5.894820412562195, + "learning_rate": 6.4279115538867e-06, + "loss": 17.3982, + "step": 23283 + }, + { + "epoch": 0.4256128100608697, + "grad_norm": 6.780257747702837, + "learning_rate": 6.427627867488289e-06, + "loss": 18.1553, + "step": 23284 + }, + { + "epoch": 0.42563108925731624, + "grad_norm": 6.77157361870574, + "learning_rate": 6.427344176086166e-06, + "loss": 17.9649, + "step": 23285 + }, + { + "epoch": 0.4256493684537628, + "grad_norm": 7.303543310573126, + "learning_rate": 6.427060479681326e-06, + "loss": 17.8533, + "step": 23286 + }, + { + "epoch": 0.4256676476502093, + "grad_norm": 6.705371479411704, + "learning_rate": 6.426776778274763e-06, + "loss": 17.9653, + "step": 23287 + }, + { + "epoch": 0.4256859268466558, + "grad_norm": 5.539826475065183, + "learning_rate": 6.426493071867472e-06, + "loss": 17.0326, + "step": 23288 + }, + { + "epoch": 0.42570420604310233, + "grad_norm": 6.732033702819237, + "learning_rate": 6.426209360460445e-06, + "loss": 17.5677, + "step": 23289 + }, + { + "epoch": 0.42572248523954886, + "grad_norm": 5.35049837540552, + "learning_rate": 6.425925644054679e-06, + "loss": 16.9009, + "step": 23290 + }, + { + "epoch": 0.4257407644359954, + "grad_norm": 6.842504751471586, + "learning_rate": 6.425641922651167e-06, + "loss": 17.8086, + "step": 23291 + }, + { + "epoch": 0.42575904363244194, + "grad_norm": 6.861016117580617, + "learning_rate": 6.425358196250904e-06, + "loss": 17.7915, + "step": 23292 + }, + { + "epoch": 0.4257773228288884, + "grad_norm": 5.9472844534362315, + "learning_rate": 6.4250744648548835e-06, + "loss": 17.2729, + "step": 23293 + }, + { + "epoch": 0.42579560202533495, + "grad_norm": 5.451052307127618, + "learning_rate": 6.4247907284641005e-06, + "loss": 17.1485, + "step": 23294 + }, + { + "epoch": 0.4258138812217815, + "grad_norm": 4.984651170296348, + "learning_rate": 6.424506987079551e-06, + "loss": 16.9197, + "step": 23295 + }, + { + "epoch": 0.425832160418228, + "grad_norm": 7.692813012204413, + "learning_rate": 6.4242232407022274e-06, + "loss": 18.1744, + "step": 23296 + }, + { + "epoch": 0.42585043961467456, + "grad_norm": 6.741708956330376, + "learning_rate": 6.423939489333126e-06, + "loss": 17.6719, + "step": 23297 + }, + { + "epoch": 0.42586871881112104, + "grad_norm": 6.629877763655567, + "learning_rate": 6.423655732973237e-06, + "loss": 17.5006, + "step": 23298 + }, + { + "epoch": 0.4258869980075676, + "grad_norm": 5.856492412922344, + "learning_rate": 6.423371971623562e-06, + "loss": 17.4823, + "step": 23299 + }, + { + "epoch": 0.4259052772040141, + "grad_norm": 5.609827729623355, + "learning_rate": 6.423088205285091e-06, + "loss": 16.9767, + "step": 23300 + }, + { + "epoch": 0.42592355640046065, + "grad_norm": 6.290690897885583, + "learning_rate": 6.422804433958818e-06, + "loss": 17.642, + "step": 23301 + }, + { + "epoch": 0.4259418355969072, + "grad_norm": 5.601023951618471, + "learning_rate": 6.422520657645739e-06, + "loss": 17.2755, + "step": 23302 + }, + { + "epoch": 0.42596011479335366, + "grad_norm": 6.052452128632869, + "learning_rate": 6.422236876346848e-06, + "loss": 17.4081, + "step": 23303 + }, + { + "epoch": 0.4259783939898002, + "grad_norm": 6.388773808185296, + "learning_rate": 6.4219530900631425e-06, + "loss": 17.6369, + "step": 23304 + }, + { + "epoch": 0.42599667318624673, + "grad_norm": 5.870620735000374, + "learning_rate": 6.421669298795613e-06, + "loss": 17.2354, + "step": 23305 + }, + { + "epoch": 0.42601495238269327, + "grad_norm": 5.390375085752392, + "learning_rate": 6.4213855025452565e-06, + "loss": 17.1514, + "step": 23306 + }, + { + "epoch": 0.4260332315791398, + "grad_norm": 5.589035895055516, + "learning_rate": 6.421101701313067e-06, + "loss": 17.3054, + "step": 23307 + }, + { + "epoch": 0.4260515107755863, + "grad_norm": 6.425361073746728, + "learning_rate": 6.420817895100039e-06, + "loss": 17.4956, + "step": 23308 + }, + { + "epoch": 0.4260697899720328, + "grad_norm": 6.037017043839415, + "learning_rate": 6.420534083907169e-06, + "loss": 17.4951, + "step": 23309 + }, + { + "epoch": 0.42608806916847936, + "grad_norm": 6.237122949554329, + "learning_rate": 6.4202502677354485e-06, + "loss": 17.4349, + "step": 23310 + }, + { + "epoch": 0.4261063483649259, + "grad_norm": 6.083998987840913, + "learning_rate": 6.419966446585875e-06, + "loss": 17.135, + "step": 23311 + }, + { + "epoch": 0.4261246275613724, + "grad_norm": 6.549137200299183, + "learning_rate": 6.419682620459442e-06, + "loss": 17.6685, + "step": 23312 + }, + { + "epoch": 0.4261429067578189, + "grad_norm": 7.026041518216922, + "learning_rate": 6.419398789357144e-06, + "loss": 17.681, + "step": 23313 + }, + { + "epoch": 0.42616118595426544, + "grad_norm": 8.113965958966368, + "learning_rate": 6.419114953279979e-06, + "loss": 18.2747, + "step": 23314 + }, + { + "epoch": 0.426179465150712, + "grad_norm": 5.402074650973205, + "learning_rate": 6.418831112228937e-06, + "loss": 16.9344, + "step": 23315 + }, + { + "epoch": 0.4261977443471585, + "grad_norm": 6.291374197533516, + "learning_rate": 6.418547266205014e-06, + "loss": 17.5045, + "step": 23316 + }, + { + "epoch": 0.42621602354360505, + "grad_norm": 6.783662187149835, + "learning_rate": 6.418263415209207e-06, + "loss": 17.9824, + "step": 23317 + }, + { + "epoch": 0.42623430274005153, + "grad_norm": 6.520332513714922, + "learning_rate": 6.417979559242512e-06, + "loss": 17.5565, + "step": 23318 + }, + { + "epoch": 0.42625258193649807, + "grad_norm": 6.2588971858846625, + "learning_rate": 6.417695698305919e-06, + "loss": 17.3797, + "step": 23319 + }, + { + "epoch": 0.4262708611329446, + "grad_norm": 6.113436550965818, + "learning_rate": 6.417411832400427e-06, + "loss": 17.4842, + "step": 23320 + }, + { + "epoch": 0.42628914032939114, + "grad_norm": 4.839899465251502, + "learning_rate": 6.417127961527029e-06, + "loss": 16.9643, + "step": 23321 + }, + { + "epoch": 0.4263074195258376, + "grad_norm": 6.913841491735516, + "learning_rate": 6.41684408568672e-06, + "loss": 17.7049, + "step": 23322 + }, + { + "epoch": 0.42632569872228415, + "grad_norm": 6.253215869966576, + "learning_rate": 6.4165602048804964e-06, + "loss": 17.3477, + "step": 23323 + }, + { + "epoch": 0.4263439779187307, + "grad_norm": 6.160457300128599, + "learning_rate": 6.416276319109351e-06, + "loss": 17.4272, + "step": 23324 + }, + { + "epoch": 0.4263622571151772, + "grad_norm": 6.605787098097573, + "learning_rate": 6.415992428374281e-06, + "loss": 17.5389, + "step": 23325 + }, + { + "epoch": 0.42638053631162376, + "grad_norm": 5.904636517213099, + "learning_rate": 6.41570853267628e-06, + "loss": 17.3321, + "step": 23326 + }, + { + "epoch": 0.42639881550807024, + "grad_norm": 5.849282282109855, + "learning_rate": 6.4154246320163435e-06, + "loss": 17.2366, + "step": 23327 + }, + { + "epoch": 0.4264170947045168, + "grad_norm": 5.530342027558702, + "learning_rate": 6.415140726395468e-06, + "loss": 17.1125, + "step": 23328 + }, + { + "epoch": 0.4264353739009633, + "grad_norm": 6.067278157116494, + "learning_rate": 6.414856815814645e-06, + "loss": 17.5748, + "step": 23329 + }, + { + "epoch": 0.42645365309740985, + "grad_norm": 8.310868332090848, + "learning_rate": 6.414572900274871e-06, + "loss": 18.2586, + "step": 23330 + }, + { + "epoch": 0.4264719322938564, + "grad_norm": 5.3566791051364175, + "learning_rate": 6.414288979777145e-06, + "loss": 17.2356, + "step": 23331 + }, + { + "epoch": 0.42649021149030286, + "grad_norm": 5.551640164710231, + "learning_rate": 6.414005054322456e-06, + "loss": 17.1504, + "step": 23332 + }, + { + "epoch": 0.4265084906867494, + "grad_norm": 8.486077590578063, + "learning_rate": 6.413721123911803e-06, + "loss": 17.9259, + "step": 23333 + }, + { + "epoch": 0.42652676988319593, + "grad_norm": 7.864505064176411, + "learning_rate": 6.41343718854618e-06, + "loss": 17.8304, + "step": 23334 + }, + { + "epoch": 0.42654504907964247, + "grad_norm": 6.703655426624963, + "learning_rate": 6.413153248226583e-06, + "loss": 17.6709, + "step": 23335 + }, + { + "epoch": 0.426563328276089, + "grad_norm": 6.9711473328119515, + "learning_rate": 6.412869302954005e-06, + "loss": 16.8692, + "step": 23336 + }, + { + "epoch": 0.4265816074725355, + "grad_norm": 6.087758990966172, + "learning_rate": 6.412585352729443e-06, + "loss": 17.3385, + "step": 23337 + }, + { + "epoch": 0.426599886668982, + "grad_norm": 6.883960524960858, + "learning_rate": 6.412301397553893e-06, + "loss": 17.7104, + "step": 23338 + }, + { + "epoch": 0.42661816586542856, + "grad_norm": 5.557755489306266, + "learning_rate": 6.412017437428348e-06, + "loss": 17.3387, + "step": 23339 + }, + { + "epoch": 0.4266364450618751, + "grad_norm": 5.728770330626398, + "learning_rate": 6.411733472353805e-06, + "loss": 17.3812, + "step": 23340 + }, + { + "epoch": 0.4266547242583216, + "grad_norm": 5.894866565703461, + "learning_rate": 6.411449502331258e-06, + "loss": 17.2593, + "step": 23341 + }, + { + "epoch": 0.4266730034547681, + "grad_norm": 7.00927155631741, + "learning_rate": 6.411165527361705e-06, + "loss": 17.556, + "step": 23342 + }, + { + "epoch": 0.42669128265121464, + "grad_norm": 7.27315518515396, + "learning_rate": 6.410881547446137e-06, + "loss": 17.6639, + "step": 23343 + }, + { + "epoch": 0.4267095618476612, + "grad_norm": 7.228428613284711, + "learning_rate": 6.410597562585552e-06, + "loss": 18.053, + "step": 23344 + }, + { + "epoch": 0.4267278410441077, + "grad_norm": 6.687136055615292, + "learning_rate": 6.4103135727809465e-06, + "loss": 17.5049, + "step": 23345 + }, + { + "epoch": 0.42674612024055425, + "grad_norm": 6.764895121356559, + "learning_rate": 6.410029578033313e-06, + "loss": 17.7955, + "step": 23346 + }, + { + "epoch": 0.42676439943700073, + "grad_norm": 6.520786108739674, + "learning_rate": 6.4097455783436495e-06, + "loss": 17.4612, + "step": 23347 + }, + { + "epoch": 0.42678267863344727, + "grad_norm": 8.915873416299993, + "learning_rate": 6.409461573712947e-06, + "loss": 18.2511, + "step": 23348 + }, + { + "epoch": 0.4268009578298938, + "grad_norm": 6.260238044671665, + "learning_rate": 6.409177564142207e-06, + "loss": 17.3599, + "step": 23349 + }, + { + "epoch": 0.42681923702634034, + "grad_norm": 8.116313773124174, + "learning_rate": 6.408893549632421e-06, + "loss": 18.0807, + "step": 23350 + }, + { + "epoch": 0.4268375162227869, + "grad_norm": 5.993751166289965, + "learning_rate": 6.408609530184585e-06, + "loss": 17.2234, + "step": 23351 + }, + { + "epoch": 0.42685579541923335, + "grad_norm": 6.319804179162196, + "learning_rate": 6.4083255057996954e-06, + "loss": 17.6066, + "step": 23352 + }, + { + "epoch": 0.4268740746156799, + "grad_norm": 6.584059094697491, + "learning_rate": 6.408041476478747e-06, + "loss": 17.5112, + "step": 23353 + }, + { + "epoch": 0.4268923538121264, + "grad_norm": 6.5244990030763494, + "learning_rate": 6.407757442222735e-06, + "loss": 17.6516, + "step": 23354 + }, + { + "epoch": 0.42691063300857296, + "grad_norm": 5.883039291502872, + "learning_rate": 6.407473403032656e-06, + "loss": 17.0701, + "step": 23355 + }, + { + "epoch": 0.42692891220501944, + "grad_norm": 6.736911400804352, + "learning_rate": 6.407189358909505e-06, + "loss": 17.6317, + "step": 23356 + }, + { + "epoch": 0.426947191401466, + "grad_norm": 7.641623068033282, + "learning_rate": 6.406905309854275e-06, + "loss": 17.6438, + "step": 23357 + }, + { + "epoch": 0.4269654705979125, + "grad_norm": 6.224456284307733, + "learning_rate": 6.406621255867966e-06, + "loss": 17.3261, + "step": 23358 + }, + { + "epoch": 0.42698374979435905, + "grad_norm": 5.5787801716904655, + "learning_rate": 6.406337196951573e-06, + "loss": 17.2173, + "step": 23359 + }, + { + "epoch": 0.4270020289908056, + "grad_norm": 6.001649194149242, + "learning_rate": 6.406053133106088e-06, + "loss": 17.1758, + "step": 23360 + }, + { + "epoch": 0.42702030818725206, + "grad_norm": 6.336602194065833, + "learning_rate": 6.40576906433251e-06, + "loss": 17.4226, + "step": 23361 + }, + { + "epoch": 0.4270385873836986, + "grad_norm": 5.5915735460026985, + "learning_rate": 6.405484990631831e-06, + "loss": 17.441, + "step": 23362 + }, + { + "epoch": 0.42705686658014513, + "grad_norm": 5.60167724582844, + "learning_rate": 6.405200912005052e-06, + "loss": 17.2534, + "step": 23363 + }, + { + "epoch": 0.42707514577659167, + "grad_norm": 6.76547289832025, + "learning_rate": 6.404916828453165e-06, + "loss": 17.6248, + "step": 23364 + }, + { + "epoch": 0.4270934249730382, + "grad_norm": 6.751197872198745, + "learning_rate": 6.404632739977166e-06, + "loss": 17.728, + "step": 23365 + }, + { + "epoch": 0.4271117041694847, + "grad_norm": 5.4264654123146085, + "learning_rate": 6.40434864657805e-06, + "loss": 17.008, + "step": 23366 + }, + { + "epoch": 0.4271299833659312, + "grad_norm": 5.563911891125588, + "learning_rate": 6.404064548256815e-06, + "loss": 17.1815, + "step": 23367 + }, + { + "epoch": 0.42714826256237776, + "grad_norm": 7.417629212616718, + "learning_rate": 6.403780445014456e-06, + "loss": 17.7103, + "step": 23368 + }, + { + "epoch": 0.4271665417588243, + "grad_norm": 6.1007423515647, + "learning_rate": 6.403496336851969e-06, + "loss": 17.426, + "step": 23369 + }, + { + "epoch": 0.4271848209552708, + "grad_norm": 6.109197967976358, + "learning_rate": 6.403212223770348e-06, + "loss": 17.6285, + "step": 23370 + }, + { + "epoch": 0.4272031001517173, + "grad_norm": 6.768557816439791, + "learning_rate": 6.402928105770588e-06, + "loss": 17.5708, + "step": 23371 + }, + { + "epoch": 0.42722137934816384, + "grad_norm": 8.242561548441238, + "learning_rate": 6.402643982853689e-06, + "loss": 17.5663, + "step": 23372 + }, + { + "epoch": 0.4272396585446104, + "grad_norm": 6.9426266384781385, + "learning_rate": 6.402359855020645e-06, + "loss": 17.9845, + "step": 23373 + }, + { + "epoch": 0.4272579377410569, + "grad_norm": 6.324919899163823, + "learning_rate": 6.402075722272451e-06, + "loss": 17.673, + "step": 23374 + }, + { + "epoch": 0.42727621693750345, + "grad_norm": 5.840632425260436, + "learning_rate": 6.401791584610103e-06, + "loss": 17.3264, + "step": 23375 + }, + { + "epoch": 0.42729449613394993, + "grad_norm": 5.941959213094946, + "learning_rate": 6.401507442034597e-06, + "loss": 17.3341, + "step": 23376 + }, + { + "epoch": 0.42731277533039647, + "grad_norm": 5.083564261165598, + "learning_rate": 6.401223294546929e-06, + "loss": 17.1065, + "step": 23377 + }, + { + "epoch": 0.427331054526843, + "grad_norm": 7.467611811641787, + "learning_rate": 6.400939142148095e-06, + "loss": 18.23, + "step": 23378 + }, + { + "epoch": 0.42734933372328954, + "grad_norm": 5.464582705786741, + "learning_rate": 6.400654984839091e-06, + "loss": 17.0183, + "step": 23379 + }, + { + "epoch": 0.4273676129197361, + "grad_norm": 7.229755104089995, + "learning_rate": 6.4003708226209116e-06, + "loss": 18.0654, + "step": 23380 + }, + { + "epoch": 0.42738589211618255, + "grad_norm": 6.632513083793697, + "learning_rate": 6.400086655494555e-06, + "loss": 18.0036, + "step": 23381 + }, + { + "epoch": 0.4274041713126291, + "grad_norm": 7.4728082978196815, + "learning_rate": 6.399802483461017e-06, + "loss": 17.9919, + "step": 23382 + }, + { + "epoch": 0.4274224505090756, + "grad_norm": 7.787181386131671, + "learning_rate": 6.399518306521293e-06, + "loss": 17.7279, + "step": 23383 + }, + { + "epoch": 0.42744072970552216, + "grad_norm": 5.880190777567864, + "learning_rate": 6.399234124676376e-06, + "loss": 17.3932, + "step": 23384 + }, + { + "epoch": 0.4274590089019687, + "grad_norm": 6.543591168829177, + "learning_rate": 6.3989499379272665e-06, + "loss": 17.7307, + "step": 23385 + }, + { + "epoch": 0.4274772880984152, + "grad_norm": 7.560626540131132, + "learning_rate": 6.398665746274959e-06, + "loss": 18.2747, + "step": 23386 + }, + { + "epoch": 0.4274955672948617, + "grad_norm": 7.082023980148686, + "learning_rate": 6.398381549720452e-06, + "loss": 17.8332, + "step": 23387 + }, + { + "epoch": 0.42751384649130825, + "grad_norm": 7.786927866663507, + "learning_rate": 6.398097348264736e-06, + "loss": 18.5009, + "step": 23388 + }, + { + "epoch": 0.4275321256877548, + "grad_norm": 5.70308358439869, + "learning_rate": 6.397813141908809e-06, + "loss": 17.2726, + "step": 23389 + }, + { + "epoch": 0.42755040488420126, + "grad_norm": 5.99044632759544, + "learning_rate": 6.3975289306536704e-06, + "loss": 17.4928, + "step": 23390 + }, + { + "epoch": 0.4275686840806478, + "grad_norm": 6.975416516493688, + "learning_rate": 6.397244714500313e-06, + "loss": 17.9719, + "step": 23391 + }, + { + "epoch": 0.42758696327709433, + "grad_norm": 6.339173005272153, + "learning_rate": 6.396960493449735e-06, + "loss": 17.4774, + "step": 23392 + }, + { + "epoch": 0.42760524247354087, + "grad_norm": 6.574409026294816, + "learning_rate": 6.396676267502931e-06, + "loss": 17.6055, + "step": 23393 + }, + { + "epoch": 0.4276235216699874, + "grad_norm": 7.330237821050877, + "learning_rate": 6.396392036660899e-06, + "loss": 17.5929, + "step": 23394 + }, + { + "epoch": 0.4276418008664339, + "grad_norm": 12.230810864601223, + "learning_rate": 6.396107800924634e-06, + "loss": 18.3993, + "step": 23395 + }, + { + "epoch": 0.4276600800628804, + "grad_norm": 7.356162325461706, + "learning_rate": 6.395823560295131e-06, + "loss": 17.8599, + "step": 23396 + }, + { + "epoch": 0.42767835925932696, + "grad_norm": 6.247100254242655, + "learning_rate": 6.3955393147733895e-06, + "loss": 17.4883, + "step": 23397 + }, + { + "epoch": 0.4276966384557735, + "grad_norm": 6.020560638533876, + "learning_rate": 6.395255064360401e-06, + "loss": 17.5034, + "step": 23398 + }, + { + "epoch": 0.42771491765222003, + "grad_norm": 5.884288937345549, + "learning_rate": 6.394970809057166e-06, + "loss": 17.1348, + "step": 23399 + }, + { + "epoch": 0.4277331968486665, + "grad_norm": 5.84602641285627, + "learning_rate": 6.394686548864681e-06, + "loss": 17.1399, + "step": 23400 + }, + { + "epoch": 0.42775147604511304, + "grad_norm": 7.170843289748093, + "learning_rate": 6.394402283783938e-06, + "loss": 17.8178, + "step": 23401 + }, + { + "epoch": 0.4277697552415596, + "grad_norm": 6.12561822244916, + "learning_rate": 6.394118013815938e-06, + "loss": 17.493, + "step": 23402 + }, + { + "epoch": 0.4277880344380061, + "grad_norm": 7.458365081773975, + "learning_rate": 6.393833738961672e-06, + "loss": 17.832, + "step": 23403 + }, + { + "epoch": 0.42780631363445265, + "grad_norm": 5.824320554959616, + "learning_rate": 6.3935494592221435e-06, + "loss": 17.1904, + "step": 23404 + }, + { + "epoch": 0.42782459283089913, + "grad_norm": 6.063267217156814, + "learning_rate": 6.3932651745983444e-06, + "loss": 17.5405, + "step": 23405 + }, + { + "epoch": 0.42784287202734567, + "grad_norm": 7.899359840208344, + "learning_rate": 6.39298088509127e-06, + "loss": 17.8857, + "step": 23406 + }, + { + "epoch": 0.4278611512237922, + "grad_norm": 5.7405543562282615, + "learning_rate": 6.3926965907019205e-06, + "loss": 17.3189, + "step": 23407 + }, + { + "epoch": 0.42787943042023874, + "grad_norm": 5.869797000359069, + "learning_rate": 6.3924122914312895e-06, + "loss": 17.2573, + "step": 23408 + }, + { + "epoch": 0.4278977096166853, + "grad_norm": 5.407481655789767, + "learning_rate": 6.392127987280373e-06, + "loss": 17.1029, + "step": 23409 + }, + { + "epoch": 0.42791598881313175, + "grad_norm": 5.415827292402781, + "learning_rate": 6.39184367825017e-06, + "loss": 17.3705, + "step": 23410 + }, + { + "epoch": 0.4279342680095783, + "grad_norm": 6.435835641604202, + "learning_rate": 6.391559364341675e-06, + "loss": 17.6047, + "step": 23411 + }, + { + "epoch": 0.4279525472060248, + "grad_norm": 6.857008590435317, + "learning_rate": 6.391275045555886e-06, + "loss": 18.0624, + "step": 23412 + }, + { + "epoch": 0.42797082640247136, + "grad_norm": 6.466399128208936, + "learning_rate": 6.3909907218937985e-06, + "loss": 17.6225, + "step": 23413 + }, + { + "epoch": 0.4279891055989179, + "grad_norm": 5.721083240196607, + "learning_rate": 6.39070639335641e-06, + "loss": 17.2616, + "step": 23414 + }, + { + "epoch": 0.4280073847953644, + "grad_norm": 7.764525729143078, + "learning_rate": 6.390422059944716e-06, + "loss": 17.9437, + "step": 23415 + }, + { + "epoch": 0.4280256639918109, + "grad_norm": 7.267272213174589, + "learning_rate": 6.390137721659711e-06, + "loss": 17.7327, + "step": 23416 + }, + { + "epoch": 0.42804394318825745, + "grad_norm": 5.616225426549465, + "learning_rate": 6.389853378502395e-06, + "loss": 17.248, + "step": 23417 + }, + { + "epoch": 0.428062222384704, + "grad_norm": 6.208328680412768, + "learning_rate": 6.389569030473765e-06, + "loss": 17.5197, + "step": 23418 + }, + { + "epoch": 0.4280805015811505, + "grad_norm": 6.186402087249811, + "learning_rate": 6.389284677574815e-06, + "loss": 17.6012, + "step": 23419 + }, + { + "epoch": 0.428098780777597, + "grad_norm": 5.891358819072858, + "learning_rate": 6.389000319806543e-06, + "loss": 17.622, + "step": 23420 + }, + { + "epoch": 0.42811705997404353, + "grad_norm": 6.162886707761934, + "learning_rate": 6.388715957169947e-06, + "loss": 17.5174, + "step": 23421 + }, + { + "epoch": 0.42813533917049007, + "grad_norm": 5.778054424604584, + "learning_rate": 6.38843158966602e-06, + "loss": 17.3119, + "step": 23422 + }, + { + "epoch": 0.4281536183669366, + "grad_norm": 6.824577888808088, + "learning_rate": 6.3881472172957606e-06, + "loss": 17.694, + "step": 23423 + }, + { + "epoch": 0.4281718975633831, + "grad_norm": 7.722819229871561, + "learning_rate": 6.387862840060166e-06, + "loss": 17.8384, + "step": 23424 + }, + { + "epoch": 0.4281901767598296, + "grad_norm": 5.206806597121236, + "learning_rate": 6.387578457960233e-06, + "loss": 16.9836, + "step": 23425 + }, + { + "epoch": 0.42820845595627616, + "grad_norm": 6.920501065603111, + "learning_rate": 6.3872940709969575e-06, + "loss": 17.7597, + "step": 23426 + }, + { + "epoch": 0.4282267351527227, + "grad_norm": 6.899161944954252, + "learning_rate": 6.387009679171336e-06, + "loss": 17.8887, + "step": 23427 + }, + { + "epoch": 0.42824501434916923, + "grad_norm": 7.294812669355296, + "learning_rate": 6.386725282484369e-06, + "loss": 18.0346, + "step": 23428 + }, + { + "epoch": 0.4282632935456157, + "grad_norm": 6.217190853006324, + "learning_rate": 6.3864408809370484e-06, + "loss": 17.4678, + "step": 23429 + }, + { + "epoch": 0.42828157274206224, + "grad_norm": 6.162584665430932, + "learning_rate": 6.386156474530372e-06, + "loss": 17.5032, + "step": 23430 + }, + { + "epoch": 0.4282998519385088, + "grad_norm": 6.444946592469472, + "learning_rate": 6.385872063265338e-06, + "loss": 17.6911, + "step": 23431 + }, + { + "epoch": 0.4283181311349553, + "grad_norm": 7.7693908600627095, + "learning_rate": 6.385587647142944e-06, + "loss": 17.8688, + "step": 23432 + }, + { + "epoch": 0.42833641033140185, + "grad_norm": 5.589450438592937, + "learning_rate": 6.385303226164183e-06, + "loss": 17.3504, + "step": 23433 + }, + { + "epoch": 0.42835468952784833, + "grad_norm": 6.525097342409039, + "learning_rate": 6.385018800330056e-06, + "loss": 17.321, + "step": 23434 + }, + { + "epoch": 0.42837296872429487, + "grad_norm": 6.754817938260677, + "learning_rate": 6.384734369641558e-06, + "loss": 17.5407, + "step": 23435 + }, + { + "epoch": 0.4283912479207414, + "grad_norm": 5.851656400971432, + "learning_rate": 6.384449934099686e-06, + "loss": 17.4082, + "step": 23436 + }, + { + "epoch": 0.42840952711718794, + "grad_norm": 6.852300756559189, + "learning_rate": 6.384165493705437e-06, + "loss": 17.7443, + "step": 23437 + }, + { + "epoch": 0.4284278063136345, + "grad_norm": 7.375661965669248, + "learning_rate": 6.383881048459808e-06, + "loss": 18.0115, + "step": 23438 + }, + { + "epoch": 0.42844608551008095, + "grad_norm": 5.550086568877446, + "learning_rate": 6.383596598363796e-06, + "loss": 17.2862, + "step": 23439 + }, + { + "epoch": 0.4284643647065275, + "grad_norm": 5.993033378286841, + "learning_rate": 6.383312143418399e-06, + "loss": 17.3555, + "step": 23440 + }, + { + "epoch": 0.428482643902974, + "grad_norm": 5.820840740675721, + "learning_rate": 6.383027683624612e-06, + "loss": 17.1568, + "step": 23441 + }, + { + "epoch": 0.42850092309942056, + "grad_norm": 8.607797647673888, + "learning_rate": 6.382743218983434e-06, + "loss": 18.587, + "step": 23442 + }, + { + "epoch": 0.4285192022958671, + "grad_norm": 5.81046941666761, + "learning_rate": 6.382458749495859e-06, + "loss": 17.13, + "step": 23443 + }, + { + "epoch": 0.4285374814923136, + "grad_norm": 7.615414587016948, + "learning_rate": 6.382174275162887e-06, + "loss": 17.9039, + "step": 23444 + }, + { + "epoch": 0.4285557606887601, + "grad_norm": 6.909462525828415, + "learning_rate": 6.381889795985515e-06, + "loss": 17.6704, + "step": 23445 + }, + { + "epoch": 0.42857403988520665, + "grad_norm": 5.694158058507743, + "learning_rate": 6.3816053119647395e-06, + "loss": 17.2406, + "step": 23446 + }, + { + "epoch": 0.4285923190816532, + "grad_norm": 6.002936666070136, + "learning_rate": 6.381320823101556e-06, + "loss": 17.4432, + "step": 23447 + }, + { + "epoch": 0.4286105982780997, + "grad_norm": 6.189362379272837, + "learning_rate": 6.3810363293969615e-06, + "loss": 17.3398, + "step": 23448 + }, + { + "epoch": 0.4286288774745462, + "grad_norm": 5.539747891510892, + "learning_rate": 6.3807518308519575e-06, + "loss": 17.2422, + "step": 23449 + }, + { + "epoch": 0.42864715667099273, + "grad_norm": 6.804636414467735, + "learning_rate": 6.380467327467537e-06, + "loss": 17.7654, + "step": 23450 + }, + { + "epoch": 0.42866543586743927, + "grad_norm": 7.243815335154215, + "learning_rate": 6.380182819244698e-06, + "loss": 17.8922, + "step": 23451 + }, + { + "epoch": 0.4286837150638858, + "grad_norm": 6.036758226415079, + "learning_rate": 6.379898306184438e-06, + "loss": 17.4417, + "step": 23452 + }, + { + "epoch": 0.42870199426033234, + "grad_norm": 6.990790364334782, + "learning_rate": 6.379613788287754e-06, + "loss": 18.0702, + "step": 23453 + }, + { + "epoch": 0.4287202734567788, + "grad_norm": 6.70366083248654, + "learning_rate": 6.379329265555644e-06, + "loss": 17.7388, + "step": 23454 + }, + { + "epoch": 0.42873855265322536, + "grad_norm": 5.785569135151232, + "learning_rate": 6.379044737989104e-06, + "loss": 17.183, + "step": 23455 + }, + { + "epoch": 0.4287568318496719, + "grad_norm": 6.619216894772767, + "learning_rate": 6.378760205589134e-06, + "loss": 17.9898, + "step": 23456 + }, + { + "epoch": 0.42877511104611843, + "grad_norm": 6.623487871309558, + "learning_rate": 6.3784756683567265e-06, + "loss": 17.5719, + "step": 23457 + }, + { + "epoch": 0.42879339024256496, + "grad_norm": 7.345545332429029, + "learning_rate": 6.378191126292881e-06, + "loss": 17.6108, + "step": 23458 + }, + { + "epoch": 0.42881166943901144, + "grad_norm": 6.003816686915262, + "learning_rate": 6.377906579398598e-06, + "loss": 17.4517, + "step": 23459 + }, + { + "epoch": 0.428829948635458, + "grad_norm": 6.564628632479459, + "learning_rate": 6.37762202767487e-06, + "loss": 17.5803, + "step": 23460 + }, + { + "epoch": 0.4288482278319045, + "grad_norm": 5.4720816589631625, + "learning_rate": 6.377337471122698e-06, + "loss": 17.1215, + "step": 23461 + }, + { + "epoch": 0.42886650702835105, + "grad_norm": 6.655273116118058, + "learning_rate": 6.377052909743075e-06, + "loss": 17.5229, + "step": 23462 + }, + { + "epoch": 0.42888478622479753, + "grad_norm": 6.984137184578219, + "learning_rate": 6.376768343537003e-06, + "loss": 17.6818, + "step": 23463 + }, + { + "epoch": 0.42890306542124407, + "grad_norm": 6.047080046835342, + "learning_rate": 6.376483772505477e-06, + "loss": 17.4843, + "step": 23464 + }, + { + "epoch": 0.4289213446176906, + "grad_norm": 5.805838726085733, + "learning_rate": 6.376199196649494e-06, + "loss": 17.2506, + "step": 23465 + }, + { + "epoch": 0.42893962381413714, + "grad_norm": 7.52425932776498, + "learning_rate": 6.375914615970054e-06, + "loss": 18.106, + "step": 23466 + }, + { + "epoch": 0.4289579030105837, + "grad_norm": 5.908963140598852, + "learning_rate": 6.37563003046815e-06, + "loss": 17.5495, + "step": 23467 + }, + { + "epoch": 0.42897618220703015, + "grad_norm": 6.53915540978699, + "learning_rate": 6.3753454401447845e-06, + "loss": 17.4231, + "step": 23468 + }, + { + "epoch": 0.4289944614034767, + "grad_norm": 6.07557074565523, + "learning_rate": 6.375060845000953e-06, + "loss": 17.3704, + "step": 23469 + }, + { + "epoch": 0.4290127405999232, + "grad_norm": 5.44013386682845, + "learning_rate": 6.37477624503765e-06, + "loss": 17.157, + "step": 23470 + }, + { + "epoch": 0.42903101979636976, + "grad_norm": 8.099400722174494, + "learning_rate": 6.3744916402558775e-06, + "loss": 18.2346, + "step": 23471 + }, + { + "epoch": 0.4290492989928163, + "grad_norm": 6.548503239278486, + "learning_rate": 6.37420703065663e-06, + "loss": 17.7201, + "step": 23472 + }, + { + "epoch": 0.4290675781892628, + "grad_norm": 6.781953401474344, + "learning_rate": 6.373922416240907e-06, + "loss": 17.6232, + "step": 23473 + }, + { + "epoch": 0.4290858573857093, + "grad_norm": 5.94086083175987, + "learning_rate": 6.373637797009706e-06, + "loss": 17.1802, + "step": 23474 + }, + { + "epoch": 0.42910413658215585, + "grad_norm": 6.625007104623523, + "learning_rate": 6.373353172964021e-06, + "loss": 17.666, + "step": 23475 + }, + { + "epoch": 0.4291224157786024, + "grad_norm": 7.0803730896948105, + "learning_rate": 6.3730685441048545e-06, + "loss": 17.8875, + "step": 23476 + }, + { + "epoch": 0.4291406949750489, + "grad_norm": 7.0384334182787285, + "learning_rate": 6.372783910433202e-06, + "loss": 17.8789, + "step": 23477 + }, + { + "epoch": 0.4291589741714954, + "grad_norm": 6.710562453004961, + "learning_rate": 6.37249927195006e-06, + "loss": 17.7144, + "step": 23478 + }, + { + "epoch": 0.42917725336794194, + "grad_norm": 7.008047150991366, + "learning_rate": 6.372214628656427e-06, + "loss": 17.8009, + "step": 23479 + }, + { + "epoch": 0.42919553256438847, + "grad_norm": 5.735284382354732, + "learning_rate": 6.371929980553302e-06, + "loss": 17.407, + "step": 23480 + }, + { + "epoch": 0.429213811760835, + "grad_norm": 6.266421602105894, + "learning_rate": 6.37164532764168e-06, + "loss": 17.5883, + "step": 23481 + }, + { + "epoch": 0.42923209095728154, + "grad_norm": 6.604179934218001, + "learning_rate": 6.37136066992256e-06, + "loss": 17.4246, + "step": 23482 + }, + { + "epoch": 0.429250370153728, + "grad_norm": 6.678183162723747, + "learning_rate": 6.371076007396942e-06, + "loss": 17.809, + "step": 23483 + }, + { + "epoch": 0.42926864935017456, + "grad_norm": 6.645304757768106, + "learning_rate": 6.370791340065819e-06, + "loss": 17.4831, + "step": 23484 + }, + { + "epoch": 0.4292869285466211, + "grad_norm": 7.229561117047641, + "learning_rate": 6.370506667930193e-06, + "loss": 17.8044, + "step": 23485 + }, + { + "epoch": 0.42930520774306763, + "grad_norm": 7.211862745666257, + "learning_rate": 6.370221990991059e-06, + "loss": 17.7365, + "step": 23486 + }, + { + "epoch": 0.42932348693951417, + "grad_norm": 7.6654712734821855, + "learning_rate": 6.3699373092494185e-06, + "loss": 17.8216, + "step": 23487 + }, + { + "epoch": 0.42934176613596065, + "grad_norm": 6.927591734126307, + "learning_rate": 6.369652622706264e-06, + "loss": 17.8755, + "step": 23488 + }, + { + "epoch": 0.4293600453324072, + "grad_norm": 5.89864838012858, + "learning_rate": 6.3693679313625955e-06, + "loss": 17.4478, + "step": 23489 + }, + { + "epoch": 0.4293783245288537, + "grad_norm": 9.105643662289777, + "learning_rate": 6.369083235219413e-06, + "loss": 18.2581, + "step": 23490 + }, + { + "epoch": 0.42939660372530025, + "grad_norm": 6.688581317154519, + "learning_rate": 6.3687985342777115e-06, + "loss": 17.6537, + "step": 23491 + }, + { + "epoch": 0.4294148829217468, + "grad_norm": 6.440538128411903, + "learning_rate": 6.368513828538491e-06, + "loss": 17.3301, + "step": 23492 + }, + { + "epoch": 0.42943316211819327, + "grad_norm": 6.289178477966556, + "learning_rate": 6.368229118002746e-06, + "loss": 17.5724, + "step": 23493 + }, + { + "epoch": 0.4294514413146398, + "grad_norm": 5.8884013131330795, + "learning_rate": 6.367944402671479e-06, + "loss": 17.3068, + "step": 23494 + }, + { + "epoch": 0.42946972051108634, + "grad_norm": 7.201165273310998, + "learning_rate": 6.367659682545685e-06, + "loss": 18.0724, + "step": 23495 + }, + { + "epoch": 0.4294879997075329, + "grad_norm": 6.013576105633485, + "learning_rate": 6.367374957626362e-06, + "loss": 17.3887, + "step": 23496 + }, + { + "epoch": 0.42950627890397935, + "grad_norm": 5.994183588812937, + "learning_rate": 6.36709022791451e-06, + "loss": 17.4696, + "step": 23497 + }, + { + "epoch": 0.4295245581004259, + "grad_norm": 5.918961348027402, + "learning_rate": 6.366805493411122e-06, + "loss": 17.4055, + "step": 23498 + }, + { + "epoch": 0.4295428372968724, + "grad_norm": 6.664741140326103, + "learning_rate": 6.366520754117201e-06, + "loss": 17.3546, + "step": 23499 + }, + { + "epoch": 0.42956111649331896, + "grad_norm": 6.489303887878604, + "learning_rate": 6.366236010033745e-06, + "loss": 17.6144, + "step": 23500 + }, + { + "epoch": 0.4295793956897655, + "grad_norm": 6.184314106609628, + "learning_rate": 6.365951261161749e-06, + "loss": 17.7604, + "step": 23501 + }, + { + "epoch": 0.429597674886212, + "grad_norm": 7.7819691350396845, + "learning_rate": 6.365666507502213e-06, + "loss": 18.0776, + "step": 23502 + }, + { + "epoch": 0.4296159540826585, + "grad_norm": 7.812641912064223, + "learning_rate": 6.365381749056132e-06, + "loss": 17.8831, + "step": 23503 + }, + { + "epoch": 0.42963423327910505, + "grad_norm": 6.9714881228046695, + "learning_rate": 6.365096985824509e-06, + "loss": 17.7983, + "step": 23504 + }, + { + "epoch": 0.4296525124755516, + "grad_norm": 5.83690403922629, + "learning_rate": 6.364812217808339e-06, + "loss": 17.2189, + "step": 23505 + }, + { + "epoch": 0.4296707916719981, + "grad_norm": 8.227452114088974, + "learning_rate": 6.36452744500862e-06, + "loss": 18.3677, + "step": 23506 + }, + { + "epoch": 0.4296890708684446, + "grad_norm": 5.292856627022733, + "learning_rate": 6.364242667426351e-06, + "loss": 17.0268, + "step": 23507 + }, + { + "epoch": 0.42970735006489114, + "grad_norm": 7.44315320327994, + "learning_rate": 6.3639578850625305e-06, + "loss": 18.1939, + "step": 23508 + }, + { + "epoch": 0.42972562926133767, + "grad_norm": 8.495513373816406, + "learning_rate": 6.363673097918155e-06, + "loss": 18.83, + "step": 23509 + }, + { + "epoch": 0.4297439084577842, + "grad_norm": 5.843429288037855, + "learning_rate": 6.3633883059942246e-06, + "loss": 17.4012, + "step": 23510 + }, + { + "epoch": 0.42976218765423074, + "grad_norm": 5.653508601303307, + "learning_rate": 6.363103509291735e-06, + "loss": 17.3988, + "step": 23511 + }, + { + "epoch": 0.4297804668506772, + "grad_norm": 5.9362537578750905, + "learning_rate": 6.362818707811687e-06, + "loss": 17.495, + "step": 23512 + }, + { + "epoch": 0.42979874604712376, + "grad_norm": 5.418770510451064, + "learning_rate": 6.362533901555078e-06, + "loss": 17.0801, + "step": 23513 + }, + { + "epoch": 0.4298170252435703, + "grad_norm": 7.009895044952832, + "learning_rate": 6.362249090522906e-06, + "loss": 17.8629, + "step": 23514 + }, + { + "epoch": 0.42983530444001683, + "grad_norm": 7.28416435772863, + "learning_rate": 6.361964274716168e-06, + "loss": 18.1369, + "step": 23515 + }, + { + "epoch": 0.42985358363646337, + "grad_norm": 6.379897454566321, + "learning_rate": 6.361679454135863e-06, + "loss": 17.4548, + "step": 23516 + }, + { + "epoch": 0.42987186283290985, + "grad_norm": 6.540674653398182, + "learning_rate": 6.361394628782991e-06, + "loss": 17.6663, + "step": 23517 + }, + { + "epoch": 0.4298901420293564, + "grad_norm": 6.393240692581243, + "learning_rate": 6.361109798658549e-06, + "loss": 17.5582, + "step": 23518 + }, + { + "epoch": 0.4299084212258029, + "grad_norm": 5.943618125336763, + "learning_rate": 6.360824963763535e-06, + "loss": 17.4567, + "step": 23519 + }, + { + "epoch": 0.42992670042224945, + "grad_norm": 5.73368450021337, + "learning_rate": 6.3605401240989485e-06, + "loss": 17.2038, + "step": 23520 + }, + { + "epoch": 0.429944979618696, + "grad_norm": 6.206420853309497, + "learning_rate": 6.360255279665785e-06, + "loss": 17.5609, + "step": 23521 + }, + { + "epoch": 0.42996325881514247, + "grad_norm": 7.4257023033097305, + "learning_rate": 6.359970430465045e-06, + "loss": 18.1641, + "step": 23522 + }, + { + "epoch": 0.429981538011589, + "grad_norm": 6.678247074651477, + "learning_rate": 6.359685576497727e-06, + "loss": 17.5187, + "step": 23523 + }, + { + "epoch": 0.42999981720803554, + "grad_norm": 5.184782187513316, + "learning_rate": 6.35940071776483e-06, + "loss": 17.003, + "step": 23524 + }, + { + "epoch": 0.4300180964044821, + "grad_norm": 6.271490413621714, + "learning_rate": 6.359115854267351e-06, + "loss": 17.4747, + "step": 23525 + }, + { + "epoch": 0.4300363756009286, + "grad_norm": 7.708380800894812, + "learning_rate": 6.358830986006288e-06, + "loss": 17.8523, + "step": 23526 + }, + { + "epoch": 0.4300546547973751, + "grad_norm": 6.687626777871401, + "learning_rate": 6.358546112982642e-06, + "loss": 17.3331, + "step": 23527 + }, + { + "epoch": 0.4300729339938216, + "grad_norm": 6.3734659973425885, + "learning_rate": 6.358261235197409e-06, + "loss": 17.7526, + "step": 23528 + }, + { + "epoch": 0.43009121319026816, + "grad_norm": 6.961532529100626, + "learning_rate": 6.357976352651588e-06, + "loss": 18.1992, + "step": 23529 + }, + { + "epoch": 0.4301094923867147, + "grad_norm": 7.342318940314222, + "learning_rate": 6.357691465346176e-06, + "loss": 17.9878, + "step": 23530 + }, + { + "epoch": 0.4301277715831612, + "grad_norm": 4.868647472607592, + "learning_rate": 6.357406573282177e-06, + "loss": 16.9925, + "step": 23531 + }, + { + "epoch": 0.4301460507796077, + "grad_norm": 7.720392733134378, + "learning_rate": 6.3571216764605834e-06, + "loss": 18.1204, + "step": 23532 + }, + { + "epoch": 0.43016432997605425, + "grad_norm": 6.0582095173374935, + "learning_rate": 6.356836774882395e-06, + "loss": 17.3617, + "step": 23533 + }, + { + "epoch": 0.4301826091725008, + "grad_norm": 6.294570966416092, + "learning_rate": 6.356551868548614e-06, + "loss": 17.5776, + "step": 23534 + }, + { + "epoch": 0.4302008883689473, + "grad_norm": 6.149519870982751, + "learning_rate": 6.356266957460235e-06, + "loss": 17.7005, + "step": 23535 + }, + { + "epoch": 0.4302191675653938, + "grad_norm": 6.666495115279745, + "learning_rate": 6.355982041618258e-06, + "loss": 17.6561, + "step": 23536 + }, + { + "epoch": 0.43023744676184034, + "grad_norm": 5.5696277194670225, + "learning_rate": 6.355697121023681e-06, + "loss": 17.2011, + "step": 23537 + }, + { + "epoch": 0.43025572595828687, + "grad_norm": 6.57652460180183, + "learning_rate": 6.355412195677505e-06, + "loss": 17.9103, + "step": 23538 + }, + { + "epoch": 0.4302740051547334, + "grad_norm": 5.353515828827387, + "learning_rate": 6.355127265580726e-06, + "loss": 17.2027, + "step": 23539 + }, + { + "epoch": 0.43029228435117994, + "grad_norm": 6.7153313542503605, + "learning_rate": 6.354842330734343e-06, + "loss": 17.6947, + "step": 23540 + }, + { + "epoch": 0.4303105635476264, + "grad_norm": 6.61578978784584, + "learning_rate": 6.354557391139356e-06, + "loss": 17.4198, + "step": 23541 + }, + { + "epoch": 0.43032884274407296, + "grad_norm": 7.698702025652604, + "learning_rate": 6.354272446796763e-06, + "loss": 18.6244, + "step": 23542 + }, + { + "epoch": 0.4303471219405195, + "grad_norm": 5.020434350955654, + "learning_rate": 6.353987497707561e-06, + "loss": 17.1536, + "step": 23543 + }, + { + "epoch": 0.43036540113696603, + "grad_norm": 6.580445026362194, + "learning_rate": 6.353702543872752e-06, + "loss": 17.8923, + "step": 23544 + }, + { + "epoch": 0.43038368033341257, + "grad_norm": 6.9063397978375365, + "learning_rate": 6.353417585293333e-06, + "loss": 17.7039, + "step": 23545 + }, + { + "epoch": 0.43040195952985905, + "grad_norm": 5.466611397350951, + "learning_rate": 6.353132621970302e-06, + "loss": 17.0673, + "step": 23546 + }, + { + "epoch": 0.4304202387263056, + "grad_norm": 6.117090436525762, + "learning_rate": 6.352847653904659e-06, + "loss": 17.4691, + "step": 23547 + }, + { + "epoch": 0.4304385179227521, + "grad_norm": 8.191070441242521, + "learning_rate": 6.352562681097402e-06, + "loss": 18.4792, + "step": 23548 + }, + { + "epoch": 0.43045679711919865, + "grad_norm": 5.184905925924164, + "learning_rate": 6.352277703549532e-06, + "loss": 17.1745, + "step": 23549 + }, + { + "epoch": 0.4304750763156452, + "grad_norm": 6.102763577036956, + "learning_rate": 6.351992721262044e-06, + "loss": 17.6115, + "step": 23550 + }, + { + "epoch": 0.43049335551209167, + "grad_norm": 7.700583774581458, + "learning_rate": 6.351707734235939e-06, + "loss": 17.936, + "step": 23551 + }, + { + "epoch": 0.4305116347085382, + "grad_norm": 6.013999561214821, + "learning_rate": 6.351422742472215e-06, + "loss": 17.4293, + "step": 23552 + }, + { + "epoch": 0.43052991390498474, + "grad_norm": 6.4355427076619325, + "learning_rate": 6.351137745971874e-06, + "loss": 17.4397, + "step": 23553 + }, + { + "epoch": 0.4305481931014313, + "grad_norm": 6.155661478665161, + "learning_rate": 6.35085274473591e-06, + "loss": 17.3839, + "step": 23554 + }, + { + "epoch": 0.4305664722978778, + "grad_norm": 6.448929461968095, + "learning_rate": 6.350567738765325e-06, + "loss": 17.6711, + "step": 23555 + }, + { + "epoch": 0.4305847514943243, + "grad_norm": 5.863524683486678, + "learning_rate": 6.350282728061119e-06, + "loss": 17.3323, + "step": 23556 + }, + { + "epoch": 0.4306030306907708, + "grad_norm": 6.371609691187254, + "learning_rate": 6.349997712624287e-06, + "loss": 17.302, + "step": 23557 + }, + { + "epoch": 0.43062130988721736, + "grad_norm": 7.076900386101137, + "learning_rate": 6.34971269245583e-06, + "loss": 17.5653, + "step": 23558 + }, + { + "epoch": 0.4306395890836639, + "grad_norm": 7.148225022313409, + "learning_rate": 6.34942766755675e-06, + "loss": 17.9312, + "step": 23559 + }, + { + "epoch": 0.43065786828011043, + "grad_norm": 7.5029938852357025, + "learning_rate": 6.349142637928041e-06, + "loss": 17.7849, + "step": 23560 + }, + { + "epoch": 0.4306761474765569, + "grad_norm": 5.972651678416425, + "learning_rate": 6.348857603570704e-06, + "loss": 17.5773, + "step": 23561 + }, + { + "epoch": 0.43069442667300345, + "grad_norm": 6.354691840650409, + "learning_rate": 6.3485725644857375e-06, + "loss": 17.5921, + "step": 23562 + }, + { + "epoch": 0.43071270586945, + "grad_norm": 7.019444950097048, + "learning_rate": 6.348287520674144e-06, + "loss": 17.8997, + "step": 23563 + }, + { + "epoch": 0.4307309850658965, + "grad_norm": 5.534087623630858, + "learning_rate": 6.3480024721369175e-06, + "loss": 17.2789, + "step": 23564 + }, + { + "epoch": 0.430749264262343, + "grad_norm": 6.998121395021827, + "learning_rate": 6.347717418875059e-06, + "loss": 18.0811, + "step": 23565 + }, + { + "epoch": 0.43076754345878954, + "grad_norm": 6.394144033047483, + "learning_rate": 6.347432360889569e-06, + "loss": 17.8028, + "step": 23566 + }, + { + "epoch": 0.4307858226552361, + "grad_norm": 6.197785823895567, + "learning_rate": 6.3471472981814455e-06, + "loss": 17.3168, + "step": 23567 + }, + { + "epoch": 0.4308041018516826, + "grad_norm": 7.082447720214877, + "learning_rate": 6.346862230751687e-06, + "loss": 18.2345, + "step": 23568 + }, + { + "epoch": 0.43082238104812914, + "grad_norm": 6.853089912572116, + "learning_rate": 6.346577158601295e-06, + "loss": 17.658, + "step": 23569 + }, + { + "epoch": 0.4308406602445756, + "grad_norm": 6.759346800709279, + "learning_rate": 6.346292081731263e-06, + "loss": 17.6147, + "step": 23570 + }, + { + "epoch": 0.43085893944102216, + "grad_norm": 5.888435230387981, + "learning_rate": 6.346007000142597e-06, + "loss": 17.5343, + "step": 23571 + }, + { + "epoch": 0.4308772186374687, + "grad_norm": 6.506146373043827, + "learning_rate": 6.345721913836293e-06, + "loss": 17.7227, + "step": 23572 + }, + { + "epoch": 0.43089549783391523, + "grad_norm": 6.400574972811731, + "learning_rate": 6.345436822813351e-06, + "loss": 17.5505, + "step": 23573 + }, + { + "epoch": 0.43091377703036177, + "grad_norm": 6.925905266755376, + "learning_rate": 6.345151727074769e-06, + "loss": 17.981, + "step": 23574 + }, + { + "epoch": 0.43093205622680825, + "grad_norm": 6.203663754888339, + "learning_rate": 6.344866626621545e-06, + "loss": 17.6513, + "step": 23575 + }, + { + "epoch": 0.4309503354232548, + "grad_norm": 9.25854042975842, + "learning_rate": 6.3445815214546835e-06, + "loss": 17.6923, + "step": 23576 + }, + { + "epoch": 0.4309686146197013, + "grad_norm": 5.340424561620956, + "learning_rate": 6.34429641157518e-06, + "loss": 17.0434, + "step": 23577 + }, + { + "epoch": 0.43098689381614785, + "grad_norm": 7.223076131341303, + "learning_rate": 6.344011296984032e-06, + "loss": 17.7422, + "step": 23578 + }, + { + "epoch": 0.4310051730125944, + "grad_norm": 7.421662114646333, + "learning_rate": 6.343726177682242e-06, + "loss": 18.237, + "step": 23579 + }, + { + "epoch": 0.43102345220904087, + "grad_norm": 6.412888405338289, + "learning_rate": 6.343441053670809e-06, + "loss": 17.768, + "step": 23580 + }, + { + "epoch": 0.4310417314054874, + "grad_norm": 5.686539067838539, + "learning_rate": 6.343155924950731e-06, + "loss": 17.561, + "step": 23581 + }, + { + "epoch": 0.43106001060193394, + "grad_norm": 6.2577164798291305, + "learning_rate": 6.3428707915230084e-06, + "loss": 17.3655, + "step": 23582 + }, + { + "epoch": 0.4310782897983805, + "grad_norm": 6.478114533256353, + "learning_rate": 6.342585653388641e-06, + "loss": 17.7416, + "step": 23583 + }, + { + "epoch": 0.431096568994827, + "grad_norm": 6.864987302960307, + "learning_rate": 6.3423005105486255e-06, + "loss": 17.835, + "step": 23584 + }, + { + "epoch": 0.4311148481912735, + "grad_norm": 7.418753310101508, + "learning_rate": 6.342015363003964e-06, + "loss": 17.3938, + "step": 23585 + }, + { + "epoch": 0.43113312738772, + "grad_norm": 6.927264000389076, + "learning_rate": 6.341730210755656e-06, + "loss": 17.8835, + "step": 23586 + }, + { + "epoch": 0.43115140658416656, + "grad_norm": 6.540024975991596, + "learning_rate": 6.3414450538047e-06, + "loss": 17.5499, + "step": 23587 + }, + { + "epoch": 0.4311696857806131, + "grad_norm": 6.128366513281492, + "learning_rate": 6.341159892152094e-06, + "loss": 17.327, + "step": 23588 + }, + { + "epoch": 0.43118796497705963, + "grad_norm": 6.531556646376057, + "learning_rate": 6.340874725798839e-06, + "loss": 17.3907, + "step": 23589 + }, + { + "epoch": 0.4312062441735061, + "grad_norm": 5.874498961371777, + "learning_rate": 6.340589554745936e-06, + "loss": 17.3448, + "step": 23590 + }, + { + "epoch": 0.43122452336995265, + "grad_norm": 6.398861232659166, + "learning_rate": 6.340304378994382e-06, + "loss": 17.3126, + "step": 23591 + }, + { + "epoch": 0.4312428025663992, + "grad_norm": 6.822006102054159, + "learning_rate": 6.340019198545177e-06, + "loss": 17.5016, + "step": 23592 + }, + { + "epoch": 0.4312610817628457, + "grad_norm": 5.416352710818658, + "learning_rate": 6.339734013399323e-06, + "loss": 17.344, + "step": 23593 + }, + { + "epoch": 0.43127936095929226, + "grad_norm": 5.854315251684952, + "learning_rate": 6.339448823557816e-06, + "loss": 17.5095, + "step": 23594 + }, + { + "epoch": 0.43129764015573874, + "grad_norm": 4.862848523644214, + "learning_rate": 6.339163629021656e-06, + "loss": 16.6493, + "step": 23595 + }, + { + "epoch": 0.4313159193521853, + "grad_norm": 7.724796247332725, + "learning_rate": 6.338878429791846e-06, + "loss": 18.062, + "step": 23596 + }, + { + "epoch": 0.4313341985486318, + "grad_norm": 6.703452027022384, + "learning_rate": 6.338593225869382e-06, + "loss": 17.9162, + "step": 23597 + }, + { + "epoch": 0.43135247774507834, + "grad_norm": 6.360853912618733, + "learning_rate": 6.338308017255265e-06, + "loss": 17.6445, + "step": 23598 + }, + { + "epoch": 0.4313707569415248, + "grad_norm": 5.146724697823028, + "learning_rate": 6.338022803950495e-06, + "loss": 16.985, + "step": 23599 + }, + { + "epoch": 0.43138903613797136, + "grad_norm": 6.493735869695229, + "learning_rate": 6.337737585956072e-06, + "loss": 17.4252, + "step": 23600 + }, + { + "epoch": 0.4314073153344179, + "grad_norm": 6.177591743726019, + "learning_rate": 6.337452363272994e-06, + "loss": 17.499, + "step": 23601 + }, + { + "epoch": 0.43142559453086443, + "grad_norm": 5.310687994515556, + "learning_rate": 6.3371671359022595e-06, + "loss": 16.9643, + "step": 23602 + }, + { + "epoch": 0.43144387372731097, + "grad_norm": 6.164661027840266, + "learning_rate": 6.336881903844872e-06, + "loss": 17.3908, + "step": 23603 + }, + { + "epoch": 0.43146215292375745, + "grad_norm": 6.258563344737975, + "learning_rate": 6.33659666710183e-06, + "loss": 17.2719, + "step": 23604 + }, + { + "epoch": 0.431480432120204, + "grad_norm": 6.074955619878194, + "learning_rate": 6.336311425674132e-06, + "loss": 17.3799, + "step": 23605 + }, + { + "epoch": 0.4314987113166505, + "grad_norm": 6.905842635532751, + "learning_rate": 6.336026179562777e-06, + "loss": 17.6053, + "step": 23606 + }, + { + "epoch": 0.43151699051309705, + "grad_norm": 5.6531995983639165, + "learning_rate": 6.335740928768769e-06, + "loss": 17.2047, + "step": 23607 + }, + { + "epoch": 0.4315352697095436, + "grad_norm": 6.341681698187891, + "learning_rate": 6.335455673293102e-06, + "loss": 17.4688, + "step": 23608 + }, + { + "epoch": 0.43155354890599007, + "grad_norm": 6.6960972846629, + "learning_rate": 6.335170413136782e-06, + "loss": 17.6311, + "step": 23609 + }, + { + "epoch": 0.4315718281024366, + "grad_norm": 5.425467928272902, + "learning_rate": 6.3348851483008034e-06, + "loss": 17.2715, + "step": 23610 + }, + { + "epoch": 0.43159010729888314, + "grad_norm": 5.786997763853015, + "learning_rate": 6.334599878786169e-06, + "loss": 17.0508, + "step": 23611 + }, + { + "epoch": 0.4316083864953297, + "grad_norm": 7.2793864873006315, + "learning_rate": 6.334314604593877e-06, + "loss": 17.8548, + "step": 23612 + }, + { + "epoch": 0.4316266656917762, + "grad_norm": 7.507989362870152, + "learning_rate": 6.334029325724928e-06, + "loss": 17.5372, + "step": 23613 + }, + { + "epoch": 0.4316449448882227, + "grad_norm": 6.007074653945293, + "learning_rate": 6.333744042180324e-06, + "loss": 17.4673, + "step": 23614 + }, + { + "epoch": 0.43166322408466923, + "grad_norm": 7.066863931780278, + "learning_rate": 6.3334587539610616e-06, + "loss": 17.6358, + "step": 23615 + }, + { + "epoch": 0.43168150328111576, + "grad_norm": 5.730963674864685, + "learning_rate": 6.33317346106814e-06, + "loss": 17.2837, + "step": 23616 + }, + { + "epoch": 0.4316997824775623, + "grad_norm": 6.995700824601071, + "learning_rate": 6.3328881635025645e-06, + "loss": 17.9608, + "step": 23617 + }, + { + "epoch": 0.43171806167400884, + "grad_norm": 7.683474690210917, + "learning_rate": 6.33260286126533e-06, + "loss": 17.9472, + "step": 23618 + }, + { + "epoch": 0.4317363408704553, + "grad_norm": 5.3997313453108005, + "learning_rate": 6.332317554357439e-06, + "loss": 17.3005, + "step": 23619 + }, + { + "epoch": 0.43175462006690185, + "grad_norm": 7.647251536137354, + "learning_rate": 6.332032242779888e-06, + "loss": 18.0896, + "step": 23620 + }, + { + "epoch": 0.4317728992633484, + "grad_norm": 6.0922444223845025, + "learning_rate": 6.3317469265336825e-06, + "loss": 17.2557, + "step": 23621 + }, + { + "epoch": 0.4317911784597949, + "grad_norm": 6.41205927824415, + "learning_rate": 6.331461605619819e-06, + "loss": 17.6117, + "step": 23622 + }, + { + "epoch": 0.43180945765624146, + "grad_norm": 6.974559947009274, + "learning_rate": 6.331176280039297e-06, + "loss": 17.9107, + "step": 23623 + }, + { + "epoch": 0.43182773685268794, + "grad_norm": 6.111808477909253, + "learning_rate": 6.330890949793118e-06, + "loss": 17.6125, + "step": 23624 + }, + { + "epoch": 0.4318460160491345, + "grad_norm": 6.386831922776062, + "learning_rate": 6.330605614882282e-06, + "loss": 17.3546, + "step": 23625 + }, + { + "epoch": 0.431864295245581, + "grad_norm": 5.6752321721912455, + "learning_rate": 6.330320275307788e-06, + "loss": 17.4309, + "step": 23626 + }, + { + "epoch": 0.43188257444202754, + "grad_norm": 6.8700104537621645, + "learning_rate": 6.3300349310706385e-06, + "loss": 17.7857, + "step": 23627 + }, + { + "epoch": 0.4319008536384741, + "grad_norm": 5.587201181943335, + "learning_rate": 6.329749582171831e-06, + "loss": 17.4384, + "step": 23628 + }, + { + "epoch": 0.43191913283492056, + "grad_norm": 7.180760901175978, + "learning_rate": 6.329464228612366e-06, + "loss": 17.8585, + "step": 23629 + }, + { + "epoch": 0.4319374120313671, + "grad_norm": 6.486207732412409, + "learning_rate": 6.329178870393245e-06, + "loss": 17.4598, + "step": 23630 + }, + { + "epoch": 0.43195569122781363, + "grad_norm": 5.825725309102074, + "learning_rate": 6.328893507515469e-06, + "loss": 17.2494, + "step": 23631 + }, + { + "epoch": 0.43197397042426017, + "grad_norm": 6.5699124501434065, + "learning_rate": 6.328608139980035e-06, + "loss": 17.4834, + "step": 23632 + }, + { + "epoch": 0.43199224962070665, + "grad_norm": 5.819090964177129, + "learning_rate": 6.328322767787944e-06, + "loss": 17.1533, + "step": 23633 + }, + { + "epoch": 0.4320105288171532, + "grad_norm": 5.410143388116245, + "learning_rate": 6.328037390940196e-06, + "loss": 17.1068, + "step": 23634 + }, + { + "epoch": 0.4320288080135997, + "grad_norm": 5.6812501974864675, + "learning_rate": 6.327752009437795e-06, + "loss": 17.2393, + "step": 23635 + }, + { + "epoch": 0.43204708721004625, + "grad_norm": 5.394884613321816, + "learning_rate": 6.327466623281737e-06, + "loss": 17.105, + "step": 23636 + }, + { + "epoch": 0.4320653664064928, + "grad_norm": 6.279848727586088, + "learning_rate": 6.3271812324730246e-06, + "loss": 17.5912, + "step": 23637 + }, + { + "epoch": 0.43208364560293927, + "grad_norm": 7.0200001625514865, + "learning_rate": 6.326895837012657e-06, + "loss": 17.8785, + "step": 23638 + }, + { + "epoch": 0.4321019247993858, + "grad_norm": 6.6011727904737, + "learning_rate": 6.326610436901633e-06, + "loss": 17.6225, + "step": 23639 + }, + { + "epoch": 0.43212020399583234, + "grad_norm": 9.000078204813386, + "learning_rate": 6.3263250321409565e-06, + "loss": 18.1729, + "step": 23640 + }, + { + "epoch": 0.4321384831922789, + "grad_norm": 6.494816833204111, + "learning_rate": 6.326039622731625e-06, + "loss": 17.5466, + "step": 23641 + }, + { + "epoch": 0.4321567623887254, + "grad_norm": 7.552311891076966, + "learning_rate": 6.325754208674639e-06, + "loss": 18.2418, + "step": 23642 + }, + { + "epoch": 0.4321750415851719, + "grad_norm": 4.660648391971336, + "learning_rate": 6.325468789971e-06, + "loss": 17.0135, + "step": 23643 + }, + { + "epoch": 0.43219332078161843, + "grad_norm": 7.794188448478198, + "learning_rate": 6.325183366621708e-06, + "loss": 18.2133, + "step": 23644 + }, + { + "epoch": 0.43221159997806496, + "grad_norm": 6.958218183860368, + "learning_rate": 6.324897938627764e-06, + "loss": 17.6694, + "step": 23645 + }, + { + "epoch": 0.4322298791745115, + "grad_norm": 5.308723604304931, + "learning_rate": 6.3246125059901675e-06, + "loss": 17.1629, + "step": 23646 + }, + { + "epoch": 0.43224815837095804, + "grad_norm": 6.898854752428574, + "learning_rate": 6.324327068709919e-06, + "loss": 17.6634, + "step": 23647 + }, + { + "epoch": 0.4322664375674045, + "grad_norm": 5.657987460756355, + "learning_rate": 6.3240416267880176e-06, + "loss": 17.4149, + "step": 23648 + }, + { + "epoch": 0.43228471676385105, + "grad_norm": 6.514028981464109, + "learning_rate": 6.323756180225467e-06, + "loss": 17.4828, + "step": 23649 + }, + { + "epoch": 0.4323029959602976, + "grad_norm": 6.3316402606485065, + "learning_rate": 6.323470729023265e-06, + "loss": 17.4596, + "step": 23650 + }, + { + "epoch": 0.4323212751567441, + "grad_norm": 6.1611782907205495, + "learning_rate": 6.323185273182414e-06, + "loss": 17.7437, + "step": 23651 + }, + { + "epoch": 0.43233955435319066, + "grad_norm": 6.204323257004265, + "learning_rate": 6.322899812703912e-06, + "loss": 17.7884, + "step": 23652 + }, + { + "epoch": 0.43235783354963714, + "grad_norm": 6.031031266139402, + "learning_rate": 6.3226143475887615e-06, + "loss": 17.5877, + "step": 23653 + }, + { + "epoch": 0.4323761127460837, + "grad_norm": 6.642578101931615, + "learning_rate": 6.322328877837962e-06, + "loss": 17.8141, + "step": 23654 + }, + { + "epoch": 0.4323943919425302, + "grad_norm": 6.232290290975836, + "learning_rate": 6.322043403452516e-06, + "loss": 17.6875, + "step": 23655 + }, + { + "epoch": 0.43241267113897675, + "grad_norm": 6.447425512472471, + "learning_rate": 6.321757924433423e-06, + "loss": 17.7279, + "step": 23656 + }, + { + "epoch": 0.4324309503354233, + "grad_norm": 5.546854156608528, + "learning_rate": 6.32147244078168e-06, + "loss": 17.178, + "step": 23657 + }, + { + "epoch": 0.43244922953186976, + "grad_norm": 7.348328477775804, + "learning_rate": 6.321186952498292e-06, + "loss": 18.0173, + "step": 23658 + }, + { + "epoch": 0.4324675087283163, + "grad_norm": 6.889108306857017, + "learning_rate": 6.320901459584261e-06, + "loss": 17.8659, + "step": 23659 + }, + { + "epoch": 0.43248578792476283, + "grad_norm": 6.132608183396902, + "learning_rate": 6.320615962040582e-06, + "loss": 17.4422, + "step": 23660 + }, + { + "epoch": 0.43250406712120937, + "grad_norm": 6.72385031976017, + "learning_rate": 6.32033045986826e-06, + "loss": 17.7272, + "step": 23661 + }, + { + "epoch": 0.4325223463176559, + "grad_norm": 4.912147979234194, + "learning_rate": 6.320044953068292e-06, + "loss": 16.9572, + "step": 23662 + }, + { + "epoch": 0.4325406255141024, + "grad_norm": 6.492503014424679, + "learning_rate": 6.319759441641684e-06, + "loss": 17.6422, + "step": 23663 + }, + { + "epoch": 0.4325589047105489, + "grad_norm": 6.552009588590618, + "learning_rate": 6.319473925589434e-06, + "loss": 17.6427, + "step": 23664 + }, + { + "epoch": 0.43257718390699545, + "grad_norm": 5.223481546395908, + "learning_rate": 6.319188404912539e-06, + "loss": 16.9528, + "step": 23665 + }, + { + "epoch": 0.432595463103442, + "grad_norm": 6.261079903315186, + "learning_rate": 6.3189028796120064e-06, + "loss": 17.7824, + "step": 23666 + }, + { + "epoch": 0.43261374229988847, + "grad_norm": 7.699282425401338, + "learning_rate": 6.318617349688833e-06, + "loss": 18.1409, + "step": 23667 + }, + { + "epoch": 0.432632021496335, + "grad_norm": 6.000838364614353, + "learning_rate": 6.3183318151440185e-06, + "loss": 17.6232, + "step": 23668 + }, + { + "epoch": 0.43265030069278154, + "grad_norm": 7.386893603469021, + "learning_rate": 6.318046275978568e-06, + "loss": 17.883, + "step": 23669 + }, + { + "epoch": 0.4326685798892281, + "grad_norm": 5.803587478337489, + "learning_rate": 6.317760732193476e-06, + "loss": 17.1153, + "step": 23670 + }, + { + "epoch": 0.4326868590856746, + "grad_norm": 5.739835343185507, + "learning_rate": 6.317475183789749e-06, + "loss": 17.4805, + "step": 23671 + }, + { + "epoch": 0.4327051382821211, + "grad_norm": 6.590862740531634, + "learning_rate": 6.317189630768387e-06, + "loss": 17.5096, + "step": 23672 + }, + { + "epoch": 0.43272341747856763, + "grad_norm": 6.227370853172004, + "learning_rate": 6.31690407313039e-06, + "loss": 17.3566, + "step": 23673 + }, + { + "epoch": 0.43274169667501416, + "grad_norm": 7.471506976174249, + "learning_rate": 6.316618510876756e-06, + "loss": 17.8985, + "step": 23674 + }, + { + "epoch": 0.4327599758714607, + "grad_norm": 6.930991009726059, + "learning_rate": 6.316332944008489e-06, + "loss": 17.7353, + "step": 23675 + }, + { + "epoch": 0.43277825506790724, + "grad_norm": 8.710427864066192, + "learning_rate": 6.31604737252659e-06, + "loss": 17.9213, + "step": 23676 + }, + { + "epoch": 0.4327965342643537, + "grad_norm": 7.255891957310312, + "learning_rate": 6.315761796432059e-06, + "loss": 18.1452, + "step": 23677 + }, + { + "epoch": 0.43281481346080025, + "grad_norm": 7.331456921649562, + "learning_rate": 6.315476215725898e-06, + "loss": 18.0464, + "step": 23678 + }, + { + "epoch": 0.4328330926572468, + "grad_norm": 5.865152578344326, + "learning_rate": 6.3151906304091044e-06, + "loss": 17.3125, + "step": 23679 + }, + { + "epoch": 0.4328513718536933, + "grad_norm": 6.2557696130679314, + "learning_rate": 6.314905040482684e-06, + "loss": 17.2814, + "step": 23680 + }, + { + "epoch": 0.43286965105013986, + "grad_norm": 6.979713538355563, + "learning_rate": 6.314619445947635e-06, + "loss": 17.9718, + "step": 23681 + }, + { + "epoch": 0.43288793024658634, + "grad_norm": 6.949967968444671, + "learning_rate": 6.314333846804958e-06, + "loss": 17.8274, + "step": 23682 + }, + { + "epoch": 0.4329062094430329, + "grad_norm": 7.340115865364658, + "learning_rate": 6.3140482430556575e-06, + "loss": 18.0253, + "step": 23683 + }, + { + "epoch": 0.4329244886394794, + "grad_norm": 6.784555261414443, + "learning_rate": 6.3137626347007285e-06, + "loss": 17.7889, + "step": 23684 + }, + { + "epoch": 0.43294276783592595, + "grad_norm": 7.6079277560130905, + "learning_rate": 6.313477021741177e-06, + "loss": 18.6101, + "step": 23685 + }, + { + "epoch": 0.4329610470323725, + "grad_norm": 7.2347278507334245, + "learning_rate": 6.313191404178003e-06, + "loss": 17.5927, + "step": 23686 + }, + { + "epoch": 0.43297932622881896, + "grad_norm": 6.8709107838501025, + "learning_rate": 6.312905782012208e-06, + "loss": 17.9347, + "step": 23687 + }, + { + "epoch": 0.4329976054252655, + "grad_norm": 6.193055591317017, + "learning_rate": 6.312620155244791e-06, + "loss": 18.3008, + "step": 23688 + }, + { + "epoch": 0.43301588462171203, + "grad_norm": 7.119030086685474, + "learning_rate": 6.312334523876753e-06, + "loss": 17.7394, + "step": 23689 + }, + { + "epoch": 0.43303416381815857, + "grad_norm": 8.173876144565408, + "learning_rate": 6.312048887909098e-06, + "loss": 18.3403, + "step": 23690 + }, + { + "epoch": 0.4330524430146051, + "grad_norm": 4.51053767601419, + "learning_rate": 6.311763247342824e-06, + "loss": 16.9699, + "step": 23691 + }, + { + "epoch": 0.4330707222110516, + "grad_norm": 5.552189046937376, + "learning_rate": 6.311477602178936e-06, + "loss": 17.2013, + "step": 23692 + }, + { + "epoch": 0.4330890014074981, + "grad_norm": 5.192470755949138, + "learning_rate": 6.31119195241843e-06, + "loss": 16.9921, + "step": 23693 + }, + { + "epoch": 0.43310728060394466, + "grad_norm": 5.885915132582066, + "learning_rate": 6.310906298062313e-06, + "loss": 17.48, + "step": 23694 + }, + { + "epoch": 0.4331255598003912, + "grad_norm": 6.116542001462024, + "learning_rate": 6.310620639111581e-06, + "loss": 17.4715, + "step": 23695 + }, + { + "epoch": 0.4331438389968377, + "grad_norm": 7.1573774379069155, + "learning_rate": 6.310334975567238e-06, + "loss": 18.1411, + "step": 23696 + }, + { + "epoch": 0.4331621181932842, + "grad_norm": 6.87937979468646, + "learning_rate": 6.310049307430285e-06, + "loss": 17.7575, + "step": 23697 + }, + { + "epoch": 0.43318039738973074, + "grad_norm": 6.805774397697574, + "learning_rate": 6.309763634701722e-06, + "loss": 17.7559, + "step": 23698 + }, + { + "epoch": 0.4331986765861773, + "grad_norm": 6.354744043821025, + "learning_rate": 6.309477957382551e-06, + "loss": 17.8381, + "step": 23699 + }, + { + "epoch": 0.4332169557826238, + "grad_norm": 6.263618172154704, + "learning_rate": 6.309192275473776e-06, + "loss": 17.4542, + "step": 23700 + }, + { + "epoch": 0.4332352349790703, + "grad_norm": 5.9982633904496945, + "learning_rate": 6.308906588976393e-06, + "loss": 17.245, + "step": 23701 + }, + { + "epoch": 0.43325351417551683, + "grad_norm": 6.403164628163186, + "learning_rate": 6.3086208978914055e-06, + "loss": 17.5769, + "step": 23702 + }, + { + "epoch": 0.43327179337196337, + "grad_norm": 5.2043635896948865, + "learning_rate": 6.3083352022198176e-06, + "loss": 16.9292, + "step": 23703 + }, + { + "epoch": 0.4332900725684099, + "grad_norm": 5.8461032049150266, + "learning_rate": 6.308049501962628e-06, + "loss": 17.3624, + "step": 23704 + }, + { + "epoch": 0.43330835176485644, + "grad_norm": 6.958185011318118, + "learning_rate": 6.3077637971208376e-06, + "loss": 17.3023, + "step": 23705 + }, + { + "epoch": 0.4333266309613029, + "grad_norm": 6.964875055633574, + "learning_rate": 6.307478087695448e-06, + "loss": 17.7401, + "step": 23706 + }, + { + "epoch": 0.43334491015774945, + "grad_norm": 7.906333688005333, + "learning_rate": 6.307192373687462e-06, + "loss": 18.0221, + "step": 23707 + }, + { + "epoch": 0.433363189354196, + "grad_norm": 6.303503673044512, + "learning_rate": 6.3069066550978795e-06, + "loss": 17.6923, + "step": 23708 + }, + { + "epoch": 0.4333814685506425, + "grad_norm": 7.393071072362138, + "learning_rate": 6.306620931927702e-06, + "loss": 17.565, + "step": 23709 + }, + { + "epoch": 0.43339974774708906, + "grad_norm": 6.959454472348956, + "learning_rate": 6.306335204177933e-06, + "loss": 17.7927, + "step": 23710 + }, + { + "epoch": 0.43341802694353554, + "grad_norm": 6.3942945657447545, + "learning_rate": 6.306049471849572e-06, + "loss": 17.4261, + "step": 23711 + }, + { + "epoch": 0.4334363061399821, + "grad_norm": 7.0891884242612795, + "learning_rate": 6.305763734943622e-06, + "loss": 18.0542, + "step": 23712 + }, + { + "epoch": 0.4334545853364286, + "grad_norm": 5.988720333390101, + "learning_rate": 6.3054779934610825e-06, + "loss": 17.3924, + "step": 23713 + }, + { + "epoch": 0.43347286453287515, + "grad_norm": 8.290340208252422, + "learning_rate": 6.305192247402956e-06, + "loss": 18.3084, + "step": 23714 + }, + { + "epoch": 0.4334911437293217, + "grad_norm": 5.889952083587758, + "learning_rate": 6.304906496770244e-06, + "loss": 17.2681, + "step": 23715 + }, + { + "epoch": 0.43350942292576816, + "grad_norm": 6.112561906215684, + "learning_rate": 6.304620741563946e-06, + "loss": 17.4354, + "step": 23716 + }, + { + "epoch": 0.4335277021222147, + "grad_norm": 5.97949838288502, + "learning_rate": 6.304334981785067e-06, + "loss": 17.4104, + "step": 23717 + }, + { + "epoch": 0.43354598131866123, + "grad_norm": 6.254784076973419, + "learning_rate": 6.3040492174346095e-06, + "loss": 17.7769, + "step": 23718 + }, + { + "epoch": 0.43356426051510777, + "grad_norm": 4.91749993441896, + "learning_rate": 6.303763448513569e-06, + "loss": 16.818, + "step": 23719 + }, + { + "epoch": 0.4335825397115543, + "grad_norm": 6.556698095861072, + "learning_rate": 6.303477675022952e-06, + "loss": 17.5761, + "step": 23720 + }, + { + "epoch": 0.4336008189080008, + "grad_norm": 7.580466775949854, + "learning_rate": 6.3031918969637595e-06, + "loss": 17.9656, + "step": 23721 + }, + { + "epoch": 0.4336190981044473, + "grad_norm": 7.378613834067667, + "learning_rate": 6.302906114336992e-06, + "loss": 17.9768, + "step": 23722 + }, + { + "epoch": 0.43363737730089386, + "grad_norm": 6.573427134354408, + "learning_rate": 6.302620327143652e-06, + "loss": 17.6758, + "step": 23723 + }, + { + "epoch": 0.4336556564973404, + "grad_norm": 7.381865920222753, + "learning_rate": 6.3023345353847395e-06, + "loss": 17.8383, + "step": 23724 + }, + { + "epoch": 0.4336739356937869, + "grad_norm": 7.178408294525828, + "learning_rate": 6.302048739061258e-06, + "loss": 18.1773, + "step": 23725 + }, + { + "epoch": 0.4336922148902334, + "grad_norm": 6.765000874438852, + "learning_rate": 6.30176293817421e-06, + "loss": 17.7684, + "step": 23726 + }, + { + "epoch": 0.43371049408667994, + "grad_norm": 7.228686473083321, + "learning_rate": 6.301477132724594e-06, + "loss": 17.7485, + "step": 23727 + }, + { + "epoch": 0.4337287732831265, + "grad_norm": 7.387191203986933, + "learning_rate": 6.301191322713416e-06, + "loss": 17.9596, + "step": 23728 + }, + { + "epoch": 0.433747052479573, + "grad_norm": 6.741546671682292, + "learning_rate": 6.300905508141672e-06, + "loss": 17.6733, + "step": 23729 + }, + { + "epoch": 0.43376533167601955, + "grad_norm": 6.506066753363428, + "learning_rate": 6.30061968901037e-06, + "loss": 17.6092, + "step": 23730 + }, + { + "epoch": 0.43378361087246603, + "grad_norm": 6.6999741709423315, + "learning_rate": 6.300333865320507e-06, + "loss": 17.7757, + "step": 23731 + }, + { + "epoch": 0.43380189006891257, + "grad_norm": 6.947325471826711, + "learning_rate": 6.300048037073089e-06, + "loss": 17.9511, + "step": 23732 + }, + { + "epoch": 0.4338201692653591, + "grad_norm": 5.874680522608939, + "learning_rate": 6.299762204269113e-06, + "loss": 17.4567, + "step": 23733 + }, + { + "epoch": 0.43383844846180564, + "grad_norm": 6.361205809933727, + "learning_rate": 6.299476366909583e-06, + "loss": 17.7777, + "step": 23734 + }, + { + "epoch": 0.4338567276582521, + "grad_norm": 8.225822282790286, + "learning_rate": 6.299190524995503e-06, + "loss": 18.1674, + "step": 23735 + }, + { + "epoch": 0.43387500685469865, + "grad_norm": 6.356940099536296, + "learning_rate": 6.298904678527873e-06, + "loss": 17.5669, + "step": 23736 + }, + { + "epoch": 0.4338932860511452, + "grad_norm": 6.480234310564968, + "learning_rate": 6.2986188275076945e-06, + "loss": 17.5864, + "step": 23737 + }, + { + "epoch": 0.4339115652475917, + "grad_norm": 6.6292184884954555, + "learning_rate": 6.298332971935968e-06, + "loss": 17.2966, + "step": 23738 + }, + { + "epoch": 0.43392984444403826, + "grad_norm": 6.738738912862232, + "learning_rate": 6.298047111813699e-06, + "loss": 18.0916, + "step": 23739 + }, + { + "epoch": 0.43394812364048474, + "grad_norm": 7.26931032448823, + "learning_rate": 6.297761247141886e-06, + "loss": 18.0442, + "step": 23740 + }, + { + "epoch": 0.4339664028369313, + "grad_norm": 5.247154909447839, + "learning_rate": 6.297475377921534e-06, + "loss": 16.9678, + "step": 23741 + }, + { + "epoch": 0.4339846820333778, + "grad_norm": 6.959207063024884, + "learning_rate": 6.297189504153642e-06, + "loss": 17.497, + "step": 23742 + }, + { + "epoch": 0.43400296122982435, + "grad_norm": 6.192873902998268, + "learning_rate": 6.296903625839214e-06, + "loss": 17.2276, + "step": 23743 + }, + { + "epoch": 0.4340212404262709, + "grad_norm": 6.8783072443713555, + "learning_rate": 6.296617742979251e-06, + "loss": 17.8566, + "step": 23744 + }, + { + "epoch": 0.43403951962271736, + "grad_norm": 8.055878202472636, + "learning_rate": 6.296331855574757e-06, + "loss": 18.0506, + "step": 23745 + }, + { + "epoch": 0.4340577988191639, + "grad_norm": 6.284688876324532, + "learning_rate": 6.29604596362673e-06, + "loss": 17.8828, + "step": 23746 + }, + { + "epoch": 0.43407607801561043, + "grad_norm": 5.41551190715901, + "learning_rate": 6.295760067136177e-06, + "loss": 17.1019, + "step": 23747 + }, + { + "epoch": 0.43409435721205697, + "grad_norm": 6.578470511427789, + "learning_rate": 6.295474166104093e-06, + "loss": 17.4602, + "step": 23748 + }, + { + "epoch": 0.4341126364085035, + "grad_norm": 7.0332412085414715, + "learning_rate": 6.295188260531488e-06, + "loss": 17.862, + "step": 23749 + }, + { + "epoch": 0.43413091560495, + "grad_norm": 6.817839618855633, + "learning_rate": 6.294902350419361e-06, + "loss": 17.7536, + "step": 23750 + }, + { + "epoch": 0.4341491948013965, + "grad_norm": 6.1622146518976315, + "learning_rate": 6.2946164357687115e-06, + "loss": 17.4708, + "step": 23751 + }, + { + "epoch": 0.43416747399784306, + "grad_norm": 8.014778122356413, + "learning_rate": 6.294330516580545e-06, + "loss": 18.0563, + "step": 23752 + }, + { + "epoch": 0.4341857531942896, + "grad_norm": 6.252125107404907, + "learning_rate": 6.294044592855861e-06, + "loss": 17.3551, + "step": 23753 + }, + { + "epoch": 0.4342040323907361, + "grad_norm": 5.6135930489602375, + "learning_rate": 6.293758664595664e-06, + "loss": 17.2825, + "step": 23754 + }, + { + "epoch": 0.4342223115871826, + "grad_norm": 7.749024116360044, + "learning_rate": 6.2934727318009555e-06, + "loss": 17.9782, + "step": 23755 + }, + { + "epoch": 0.43424059078362914, + "grad_norm": 7.256405670159264, + "learning_rate": 6.293186794472736e-06, + "loss": 18.049, + "step": 23756 + }, + { + "epoch": 0.4342588699800757, + "grad_norm": 7.390234266504027, + "learning_rate": 6.2929008526120106e-06, + "loss": 17.8325, + "step": 23757 + }, + { + "epoch": 0.4342771491765222, + "grad_norm": 6.2910071211717975, + "learning_rate": 6.292614906219778e-06, + "loss": 17.4764, + "step": 23758 + }, + { + "epoch": 0.43429542837296875, + "grad_norm": 6.114919999794618, + "learning_rate": 6.292328955297046e-06, + "loss": 17.6181, + "step": 23759 + }, + { + "epoch": 0.43431370756941523, + "grad_norm": 5.737394877965056, + "learning_rate": 6.292042999844809e-06, + "loss": 17.2908, + "step": 23760 + }, + { + "epoch": 0.43433198676586177, + "grad_norm": 6.138453097840232, + "learning_rate": 6.2917570398640746e-06, + "loss": 17.6269, + "step": 23761 + }, + { + "epoch": 0.4343502659623083, + "grad_norm": 6.406815295454056, + "learning_rate": 6.291471075355845e-06, + "loss": 17.5962, + "step": 23762 + }, + { + "epoch": 0.43436854515875484, + "grad_norm": 7.716533458941187, + "learning_rate": 6.291185106321121e-06, + "loss": 18.1808, + "step": 23763 + }, + { + "epoch": 0.4343868243552014, + "grad_norm": 7.1176732978572765, + "learning_rate": 6.290899132760906e-06, + "loss": 17.9156, + "step": 23764 + }, + { + "epoch": 0.43440510355164785, + "grad_norm": 5.961970929296037, + "learning_rate": 6.2906131546761996e-06, + "loss": 17.4415, + "step": 23765 + }, + { + "epoch": 0.4344233827480944, + "grad_norm": 5.569594998420012, + "learning_rate": 6.290327172068007e-06, + "loss": 17.1737, + "step": 23766 + }, + { + "epoch": 0.4344416619445409, + "grad_norm": 7.591474637343277, + "learning_rate": 6.29004118493733e-06, + "loss": 18.2131, + "step": 23767 + }, + { + "epoch": 0.43445994114098746, + "grad_norm": 13.932519950208663, + "learning_rate": 6.28975519328517e-06, + "loss": 17.9378, + "step": 23768 + }, + { + "epoch": 0.43447822033743394, + "grad_norm": 7.058536196305738, + "learning_rate": 6.289469197112531e-06, + "loss": 17.882, + "step": 23769 + }, + { + "epoch": 0.4344964995338805, + "grad_norm": 6.365668185885612, + "learning_rate": 6.2891831964204116e-06, + "loss": 17.5266, + "step": 23770 + }, + { + "epoch": 0.434514778730327, + "grad_norm": 6.198416145919927, + "learning_rate": 6.28889719120982e-06, + "loss": 17.3559, + "step": 23771 + }, + { + "epoch": 0.43453305792677355, + "grad_norm": 7.714164527855773, + "learning_rate": 6.288611181481754e-06, + "loss": 18.0403, + "step": 23772 + }, + { + "epoch": 0.4345513371232201, + "grad_norm": 6.179672552393671, + "learning_rate": 6.288325167237219e-06, + "loss": 17.4772, + "step": 23773 + }, + { + "epoch": 0.43456961631966656, + "grad_norm": 6.9156131398090235, + "learning_rate": 6.2880391484772166e-06, + "loss": 17.4294, + "step": 23774 + }, + { + "epoch": 0.4345878955161131, + "grad_norm": 8.725267812372703, + "learning_rate": 6.287753125202744e-06, + "loss": 18.3998, + "step": 23775 + }, + { + "epoch": 0.43460617471255963, + "grad_norm": 7.209562219500126, + "learning_rate": 6.287467097414815e-06, + "loss": 17.7836, + "step": 23776 + }, + { + "epoch": 0.43462445390900617, + "grad_norm": 6.774363317910752, + "learning_rate": 6.287181065114421e-06, + "loss": 17.3588, + "step": 23777 + }, + { + "epoch": 0.4346427331054527, + "grad_norm": 7.268566592381338, + "learning_rate": 6.286895028302571e-06, + "loss": 17.9112, + "step": 23778 + }, + { + "epoch": 0.4346610123018992, + "grad_norm": 7.904588477619206, + "learning_rate": 6.286608986980265e-06, + "loss": 18.0713, + "step": 23779 + }, + { + "epoch": 0.4346792914983457, + "grad_norm": 6.923864533436848, + "learning_rate": 6.2863229411485064e-06, + "loss": 18.0276, + "step": 23780 + }, + { + "epoch": 0.43469757069479226, + "grad_norm": 6.622258261032361, + "learning_rate": 6.286036890808297e-06, + "loss": 17.4968, + "step": 23781 + }, + { + "epoch": 0.4347158498912388, + "grad_norm": 6.380302552900819, + "learning_rate": 6.28575083596064e-06, + "loss": 17.4397, + "step": 23782 + }, + { + "epoch": 0.43473412908768533, + "grad_norm": 4.829702097401405, + "learning_rate": 6.2854647766065395e-06, + "loss": 16.9652, + "step": 23783 + }, + { + "epoch": 0.4347524082841318, + "grad_norm": 6.179167919777229, + "learning_rate": 6.2851787127469935e-06, + "loss": 17.3528, + "step": 23784 + }, + { + "epoch": 0.43477068748057834, + "grad_norm": 6.818738640862289, + "learning_rate": 6.284892644383009e-06, + "loss": 17.8505, + "step": 23785 + }, + { + "epoch": 0.4347889666770249, + "grad_norm": 6.829626493712859, + "learning_rate": 6.284606571515588e-06, + "loss": 17.664, + "step": 23786 + }, + { + "epoch": 0.4348072458734714, + "grad_norm": 6.360443457777423, + "learning_rate": 6.284320494145732e-06, + "loss": 17.6461, + "step": 23787 + }, + { + "epoch": 0.43482552506991795, + "grad_norm": 6.773893232326095, + "learning_rate": 6.284034412274445e-06, + "loss": 17.6809, + "step": 23788 + }, + { + "epoch": 0.43484380426636443, + "grad_norm": 6.30655400159841, + "learning_rate": 6.283748325902726e-06, + "loss": 17.5792, + "step": 23789 + }, + { + "epoch": 0.43486208346281097, + "grad_norm": 11.005317080610869, + "learning_rate": 6.283462235031583e-06, + "loss": 17.8486, + "step": 23790 + }, + { + "epoch": 0.4348803626592575, + "grad_norm": 7.58798293626356, + "learning_rate": 6.283176139662016e-06, + "loss": 18.1948, + "step": 23791 + }, + { + "epoch": 0.43489864185570404, + "grad_norm": 6.606259008986435, + "learning_rate": 6.282890039795027e-06, + "loss": 17.6139, + "step": 23792 + }, + { + "epoch": 0.4349169210521506, + "grad_norm": 7.7573673955048825, + "learning_rate": 6.28260393543162e-06, + "loss": 17.9213, + "step": 23793 + }, + { + "epoch": 0.43493520024859705, + "grad_norm": 5.020639260069999, + "learning_rate": 6.282317826572799e-06, + "loss": 16.9778, + "step": 23794 + }, + { + "epoch": 0.4349534794450436, + "grad_norm": 5.935445833306718, + "learning_rate": 6.282031713219563e-06, + "loss": 17.347, + "step": 23795 + }, + { + "epoch": 0.4349717586414901, + "grad_norm": 6.553196914098264, + "learning_rate": 6.281745595372919e-06, + "loss": 17.6077, + "step": 23796 + }, + { + "epoch": 0.43499003783793666, + "grad_norm": 6.682440196137849, + "learning_rate": 6.281459473033867e-06, + "loss": 17.7438, + "step": 23797 + }, + { + "epoch": 0.4350083170343832, + "grad_norm": 5.695638352358544, + "learning_rate": 6.2811733462034105e-06, + "loss": 17.3181, + "step": 23798 + }, + { + "epoch": 0.4350265962308297, + "grad_norm": 6.105545382268568, + "learning_rate": 6.280887214882553e-06, + "loss": 17.5573, + "step": 23799 + }, + { + "epoch": 0.4350448754272762, + "grad_norm": 5.335979083358556, + "learning_rate": 6.280601079072298e-06, + "loss": 17.1656, + "step": 23800 + }, + { + "epoch": 0.43506315462372275, + "grad_norm": 5.198904730747608, + "learning_rate": 6.2803149387736464e-06, + "loss": 17.2512, + "step": 23801 + }, + { + "epoch": 0.4350814338201693, + "grad_norm": 7.323196392213511, + "learning_rate": 6.2800287939876e-06, + "loss": 18.1471, + "step": 23802 + }, + { + "epoch": 0.43509971301661576, + "grad_norm": 8.272485293683951, + "learning_rate": 6.279742644715166e-06, + "loss": 18.2761, + "step": 23803 + }, + { + "epoch": 0.4351179922130623, + "grad_norm": 6.603923717364079, + "learning_rate": 6.279456490957346e-06, + "loss": 17.671, + "step": 23804 + }, + { + "epoch": 0.43513627140950883, + "grad_norm": 5.854976550362765, + "learning_rate": 6.279170332715141e-06, + "loss": 17.2637, + "step": 23805 + }, + { + "epoch": 0.43515455060595537, + "grad_norm": 7.574194909508295, + "learning_rate": 6.2788841699895545e-06, + "loss": 18.2218, + "step": 23806 + }, + { + "epoch": 0.4351728298024019, + "grad_norm": 7.174977581973314, + "learning_rate": 6.278598002781591e-06, + "loss": 17.6843, + "step": 23807 + }, + { + "epoch": 0.4351911089988484, + "grad_norm": 6.157702531852071, + "learning_rate": 6.278311831092251e-06, + "loss": 17.5328, + "step": 23808 + }, + { + "epoch": 0.4352093881952949, + "grad_norm": 6.641045909320454, + "learning_rate": 6.278025654922539e-06, + "loss": 17.6615, + "step": 23809 + }, + { + "epoch": 0.43522766739174146, + "grad_norm": 6.868937306954469, + "learning_rate": 6.2777394742734585e-06, + "loss": 17.6829, + "step": 23810 + }, + { + "epoch": 0.435245946588188, + "grad_norm": 6.527845149836127, + "learning_rate": 6.277453289146013e-06, + "loss": 17.4385, + "step": 23811 + }, + { + "epoch": 0.43526422578463453, + "grad_norm": 6.613864051417975, + "learning_rate": 6.277167099541204e-06, + "loss": 17.6872, + "step": 23812 + }, + { + "epoch": 0.435282504981081, + "grad_norm": 6.637978323084171, + "learning_rate": 6.276880905460034e-06, + "loss": 17.6415, + "step": 23813 + }, + { + "epoch": 0.43530078417752754, + "grad_norm": 6.945530451501778, + "learning_rate": 6.276594706903509e-06, + "loss": 17.9313, + "step": 23814 + }, + { + "epoch": 0.4353190633739741, + "grad_norm": 7.2642954581539145, + "learning_rate": 6.276308503872629e-06, + "loss": 17.8815, + "step": 23815 + }, + { + "epoch": 0.4353373425704206, + "grad_norm": 6.325217827060266, + "learning_rate": 6.2760222963683985e-06, + "loss": 17.6147, + "step": 23816 + }, + { + "epoch": 0.43535562176686715, + "grad_norm": 5.012434633454674, + "learning_rate": 6.2757360843918204e-06, + "loss": 17.1863, + "step": 23817 + }, + { + "epoch": 0.43537390096331363, + "grad_norm": 6.864615915355431, + "learning_rate": 6.2754498679438995e-06, + "loss": 17.6262, + "step": 23818 + }, + { + "epoch": 0.43539218015976017, + "grad_norm": 6.14360434336811, + "learning_rate": 6.275163647025638e-06, + "loss": 17.7933, + "step": 23819 + }, + { + "epoch": 0.4354104593562067, + "grad_norm": 5.812737460701002, + "learning_rate": 6.274877421638036e-06, + "loss": 17.4681, + "step": 23820 + }, + { + "epoch": 0.43542873855265324, + "grad_norm": 5.59308688335235, + "learning_rate": 6.2745911917821e-06, + "loss": 17.2346, + "step": 23821 + }, + { + "epoch": 0.4354470177490998, + "grad_norm": 6.675402886746384, + "learning_rate": 6.274304957458833e-06, + "loss": 17.7082, + "step": 23822 + }, + { + "epoch": 0.43546529694554625, + "grad_norm": 5.454848945652792, + "learning_rate": 6.274018718669237e-06, + "loss": 17.4234, + "step": 23823 + }, + { + "epoch": 0.4354835761419928, + "grad_norm": 6.667585564411321, + "learning_rate": 6.273732475414317e-06, + "loss": 17.5829, + "step": 23824 + }, + { + "epoch": 0.4355018553384393, + "grad_norm": 7.194824399986449, + "learning_rate": 6.273446227695074e-06, + "loss": 17.6354, + "step": 23825 + }, + { + "epoch": 0.43552013453488586, + "grad_norm": 5.50968680104873, + "learning_rate": 6.273159975512514e-06, + "loss": 17.2895, + "step": 23826 + }, + { + "epoch": 0.4355384137313324, + "grad_norm": 5.744403015190538, + "learning_rate": 6.272873718867638e-06, + "loss": 17.4208, + "step": 23827 + }, + { + "epoch": 0.4355566929277789, + "grad_norm": 6.447591341865886, + "learning_rate": 6.272587457761451e-06, + "loss": 17.4899, + "step": 23828 + }, + { + "epoch": 0.4355749721242254, + "grad_norm": 6.886341073026387, + "learning_rate": 6.272301192194952e-06, + "loss": 17.7471, + "step": 23829 + }, + { + "epoch": 0.43559325132067195, + "grad_norm": 6.4278439276045605, + "learning_rate": 6.272014922169151e-06, + "loss": 17.5109, + "step": 23830 + }, + { + "epoch": 0.4356115305171185, + "grad_norm": 6.051503140524521, + "learning_rate": 6.271728647685047e-06, + "loss": 17.3598, + "step": 23831 + }, + { + "epoch": 0.435629809713565, + "grad_norm": 5.103946563957025, + "learning_rate": 6.271442368743645e-06, + "loss": 16.9667, + "step": 23832 + }, + { + "epoch": 0.4356480889100115, + "grad_norm": 6.8230330003778095, + "learning_rate": 6.271156085345949e-06, + "loss": 17.5054, + "step": 23833 + }, + { + "epoch": 0.43566636810645804, + "grad_norm": 6.123612548656846, + "learning_rate": 6.270869797492958e-06, + "loss": 17.6386, + "step": 23834 + }, + { + "epoch": 0.43568464730290457, + "grad_norm": 6.524032878715726, + "learning_rate": 6.270583505185681e-06, + "loss": 17.4919, + "step": 23835 + }, + { + "epoch": 0.4357029264993511, + "grad_norm": 8.047292619036462, + "learning_rate": 6.270297208425119e-06, + "loss": 18.374, + "step": 23836 + }, + { + "epoch": 0.4357212056957976, + "grad_norm": 5.43341562292553, + "learning_rate": 6.270010907212275e-06, + "loss": 17.1471, + "step": 23837 + }, + { + "epoch": 0.4357394848922441, + "grad_norm": 5.482138443011934, + "learning_rate": 6.269724601548152e-06, + "loss": 17.303, + "step": 23838 + }, + { + "epoch": 0.43575776408869066, + "grad_norm": 5.984226886374791, + "learning_rate": 6.269438291433756e-06, + "loss": 17.2762, + "step": 23839 + }, + { + "epoch": 0.4357760432851372, + "grad_norm": 7.463098994122314, + "learning_rate": 6.269151976870088e-06, + "loss": 17.9539, + "step": 23840 + }, + { + "epoch": 0.43579432248158373, + "grad_norm": 5.454584015390502, + "learning_rate": 6.268865657858153e-06, + "loss": 17.3301, + "step": 23841 + }, + { + "epoch": 0.4358126016780302, + "grad_norm": 6.536505420193189, + "learning_rate": 6.268579334398954e-06, + "loss": 17.5903, + "step": 23842 + }, + { + "epoch": 0.43583088087447674, + "grad_norm": 7.288094103806399, + "learning_rate": 6.268293006493493e-06, + "loss": 17.8827, + "step": 23843 + }, + { + "epoch": 0.4358491600709233, + "grad_norm": 7.773616907676997, + "learning_rate": 6.268006674142777e-06, + "loss": 17.6444, + "step": 23844 + }, + { + "epoch": 0.4358674392673698, + "grad_norm": 7.666929283601415, + "learning_rate": 6.2677203373478075e-06, + "loss": 17.8615, + "step": 23845 + }, + { + "epoch": 0.43588571846381635, + "grad_norm": 5.472988303768162, + "learning_rate": 6.267433996109589e-06, + "loss": 17.2007, + "step": 23846 + }, + { + "epoch": 0.43590399766026283, + "grad_norm": 6.383812497393178, + "learning_rate": 6.267147650429122e-06, + "loss": 17.7053, + "step": 23847 + }, + { + "epoch": 0.43592227685670937, + "grad_norm": 5.908331424186044, + "learning_rate": 6.266861300307412e-06, + "loss": 17.3104, + "step": 23848 + }, + { + "epoch": 0.4359405560531559, + "grad_norm": 7.414114736635272, + "learning_rate": 6.266574945745466e-06, + "loss": 17.7896, + "step": 23849 + }, + { + "epoch": 0.43595883524960244, + "grad_norm": 6.630469700512484, + "learning_rate": 6.266288586744283e-06, + "loss": 17.8471, + "step": 23850 + }, + { + "epoch": 0.435977114446049, + "grad_norm": 6.498282735265571, + "learning_rate": 6.266002223304869e-06, + "loss": 17.6131, + "step": 23851 + }, + { + "epoch": 0.43599539364249545, + "grad_norm": 5.1313976864739645, + "learning_rate": 6.265715855428227e-06, + "loss": 17.0579, + "step": 23852 + }, + { + "epoch": 0.436013672838942, + "grad_norm": 6.155423500859613, + "learning_rate": 6.26542948311536e-06, + "loss": 17.4741, + "step": 23853 + }, + { + "epoch": 0.4360319520353885, + "grad_norm": 9.077812198192008, + "learning_rate": 6.265143106367273e-06, + "loss": 18.3364, + "step": 23854 + }, + { + "epoch": 0.43605023123183506, + "grad_norm": 6.2349520297556404, + "learning_rate": 6.264856725184969e-06, + "loss": 17.7364, + "step": 23855 + }, + { + "epoch": 0.4360685104282816, + "grad_norm": 4.7269954324097725, + "learning_rate": 6.264570339569452e-06, + "loss": 16.9981, + "step": 23856 + }, + { + "epoch": 0.4360867896247281, + "grad_norm": 7.295275529568781, + "learning_rate": 6.264283949521725e-06, + "loss": 17.8293, + "step": 23857 + }, + { + "epoch": 0.4361050688211746, + "grad_norm": 6.532155770873544, + "learning_rate": 6.263997555042793e-06, + "loss": 17.7705, + "step": 23858 + }, + { + "epoch": 0.43612334801762115, + "grad_norm": 8.064467076302083, + "learning_rate": 6.263711156133662e-06, + "loss": 18.0814, + "step": 23859 + }, + { + "epoch": 0.4361416272140677, + "grad_norm": 7.302257298135736, + "learning_rate": 6.263424752795331e-06, + "loss": 17.988, + "step": 23860 + }, + { + "epoch": 0.4361599064105142, + "grad_norm": 6.190354554040243, + "learning_rate": 6.263138345028803e-06, + "loss": 17.4445, + "step": 23861 + }, + { + "epoch": 0.4361781856069607, + "grad_norm": 6.427026740535259, + "learning_rate": 6.2628519328350876e-06, + "loss": 17.4506, + "step": 23862 + }, + { + "epoch": 0.43619646480340724, + "grad_norm": 6.372205394798012, + "learning_rate": 6.262565516215187e-06, + "loss": 17.6951, + "step": 23863 + }, + { + "epoch": 0.43621474399985377, + "grad_norm": 8.06059915581161, + "learning_rate": 6.2622790951701006e-06, + "loss": 17.9144, + "step": 23864 + }, + { + "epoch": 0.4362330231963003, + "grad_norm": 6.949122154778351, + "learning_rate": 6.261992669700838e-06, + "loss": 17.8636, + "step": 23865 + }, + { + "epoch": 0.43625130239274684, + "grad_norm": 6.709881056579819, + "learning_rate": 6.2617062398084e-06, + "loss": 17.7407, + "step": 23866 + }, + { + "epoch": 0.4362695815891933, + "grad_norm": 5.416059978746071, + "learning_rate": 6.26141980549379e-06, + "loss": 17.1129, + "step": 23867 + }, + { + "epoch": 0.43628786078563986, + "grad_norm": 6.833323550308938, + "learning_rate": 6.261133366758014e-06, + "loss": 17.3961, + "step": 23868 + }, + { + "epoch": 0.4363061399820864, + "grad_norm": 6.178535823968487, + "learning_rate": 6.260846923602076e-06, + "loss": 17.0604, + "step": 23869 + }, + { + "epoch": 0.43632441917853293, + "grad_norm": 5.307022416040542, + "learning_rate": 6.2605604760269755e-06, + "loss": 17.076, + "step": 23870 + }, + { + "epoch": 0.4363426983749794, + "grad_norm": 7.152625980662234, + "learning_rate": 6.260274024033724e-06, + "loss": 17.9072, + "step": 23871 + }, + { + "epoch": 0.43636097757142595, + "grad_norm": 5.831215618894096, + "learning_rate": 6.259987567623318e-06, + "loss": 17.2684, + "step": 23872 + }, + { + "epoch": 0.4363792567678725, + "grad_norm": 7.49625516486939, + "learning_rate": 6.2597011067967674e-06, + "loss": 18.1041, + "step": 23873 + }, + { + "epoch": 0.436397535964319, + "grad_norm": 8.868963467919887, + "learning_rate": 6.259414641555072e-06, + "loss": 18.0595, + "step": 23874 + }, + { + "epoch": 0.43641581516076555, + "grad_norm": 7.660085715575635, + "learning_rate": 6.259128171899238e-06, + "loss": 18.0545, + "step": 23875 + }, + { + "epoch": 0.43643409435721203, + "grad_norm": 5.3908779914879235, + "learning_rate": 6.258841697830271e-06, + "loss": 17.1993, + "step": 23876 + }, + { + "epoch": 0.43645237355365857, + "grad_norm": 5.633074670352394, + "learning_rate": 6.2585552193491715e-06, + "loss": 17.0302, + "step": 23877 + }, + { + "epoch": 0.4364706527501051, + "grad_norm": 6.96628031102791, + "learning_rate": 6.258268736456945e-06, + "loss": 17.6736, + "step": 23878 + }, + { + "epoch": 0.43648893194655164, + "grad_norm": 7.111966315222163, + "learning_rate": 6.257982249154596e-06, + "loss": 17.6331, + "step": 23879 + }, + { + "epoch": 0.4365072111429982, + "grad_norm": 6.989383445396583, + "learning_rate": 6.257695757443128e-06, + "loss": 17.7052, + "step": 23880 + }, + { + "epoch": 0.43652549033944466, + "grad_norm": 6.529517624966005, + "learning_rate": 6.257409261323546e-06, + "loss": 17.5059, + "step": 23881 + }, + { + "epoch": 0.4365437695358912, + "grad_norm": 5.992859896160147, + "learning_rate": 6.257122760796853e-06, + "loss": 17.1186, + "step": 23882 + }, + { + "epoch": 0.4365620487323377, + "grad_norm": 7.781229654134481, + "learning_rate": 6.256836255864054e-06, + "loss": 17.7622, + "step": 23883 + }, + { + "epoch": 0.43658032792878426, + "grad_norm": 6.4979118999299725, + "learning_rate": 6.256549746526154e-06, + "loss": 17.6442, + "step": 23884 + }, + { + "epoch": 0.4365986071252308, + "grad_norm": 8.421286888859038, + "learning_rate": 6.2562632327841545e-06, + "loss": 17.7855, + "step": 23885 + }, + { + "epoch": 0.4366168863216773, + "grad_norm": 9.54913063909957, + "learning_rate": 6.2559767146390626e-06, + "loss": 18.0372, + "step": 23886 + }, + { + "epoch": 0.4366351655181238, + "grad_norm": 6.965151729683479, + "learning_rate": 6.255690192091882e-06, + "loss": 17.5536, + "step": 23887 + }, + { + "epoch": 0.43665344471457035, + "grad_norm": 10.681850626360164, + "learning_rate": 6.255403665143615e-06, + "loss": 17.6952, + "step": 23888 + }, + { + "epoch": 0.4366717239110169, + "grad_norm": 5.258765641273252, + "learning_rate": 6.255117133795266e-06, + "loss": 17.033, + "step": 23889 + }, + { + "epoch": 0.4366900031074634, + "grad_norm": 6.665458575687888, + "learning_rate": 6.254830598047843e-06, + "loss": 17.5997, + "step": 23890 + }, + { + "epoch": 0.4367082823039099, + "grad_norm": 6.035690424981967, + "learning_rate": 6.254544057902347e-06, + "loss": 17.4735, + "step": 23891 + }, + { + "epoch": 0.43672656150035644, + "grad_norm": 6.474421956015781, + "learning_rate": 6.254257513359781e-06, + "loss": 17.5113, + "step": 23892 + }, + { + "epoch": 0.43674484069680297, + "grad_norm": 8.314695596987976, + "learning_rate": 6.253970964421152e-06, + "loss": 18.2821, + "step": 23893 + }, + { + "epoch": 0.4367631198932495, + "grad_norm": 6.256334127436358, + "learning_rate": 6.253684411087465e-06, + "loss": 17.3606, + "step": 23894 + }, + { + "epoch": 0.43678139908969604, + "grad_norm": 7.6796115714427895, + "learning_rate": 6.253397853359723e-06, + "loss": 18.5128, + "step": 23895 + }, + { + "epoch": 0.4367996782861425, + "grad_norm": 6.795941357264282, + "learning_rate": 6.253111291238929e-06, + "loss": 17.7538, + "step": 23896 + }, + { + "epoch": 0.43681795748258906, + "grad_norm": 7.25199643861381, + "learning_rate": 6.2528247247260885e-06, + "loss": 17.594, + "step": 23897 + }, + { + "epoch": 0.4368362366790356, + "grad_norm": 6.372996474514302, + "learning_rate": 6.252538153822206e-06, + "loss": 17.2522, + "step": 23898 + }, + { + "epoch": 0.43685451587548213, + "grad_norm": 6.026701428622392, + "learning_rate": 6.252251578528287e-06, + "loss": 17.3368, + "step": 23899 + }, + { + "epoch": 0.43687279507192867, + "grad_norm": 4.761477883977416, + "learning_rate": 6.2519649988453345e-06, + "loss": 16.8937, + "step": 23900 + }, + { + "epoch": 0.43689107426837515, + "grad_norm": 6.334667458315501, + "learning_rate": 6.251678414774354e-06, + "loss": 17.3993, + "step": 23901 + }, + { + "epoch": 0.4369093534648217, + "grad_norm": 8.493483046830853, + "learning_rate": 6.251391826316348e-06, + "loss": 18.5209, + "step": 23902 + }, + { + "epoch": 0.4369276326612682, + "grad_norm": 5.988722862784205, + "learning_rate": 6.2511052334723225e-06, + "loss": 17.2655, + "step": 23903 + }, + { + "epoch": 0.43694591185771475, + "grad_norm": 7.315354498610283, + "learning_rate": 6.250818636243283e-06, + "loss": 17.8763, + "step": 23904 + }, + { + "epoch": 0.43696419105416123, + "grad_norm": 6.248714930340277, + "learning_rate": 6.250532034630231e-06, + "loss": 17.2859, + "step": 23905 + }, + { + "epoch": 0.43698247025060777, + "grad_norm": 5.514940662980491, + "learning_rate": 6.250245428634174e-06, + "loss": 17.0152, + "step": 23906 + }, + { + "epoch": 0.4370007494470543, + "grad_norm": 5.657675618127949, + "learning_rate": 6.249958818256115e-06, + "loss": 17.3315, + "step": 23907 + }, + { + "epoch": 0.43701902864350084, + "grad_norm": 6.360635433025995, + "learning_rate": 6.249672203497058e-06, + "loss": 17.7566, + "step": 23908 + }, + { + "epoch": 0.4370373078399474, + "grad_norm": 6.008786704469323, + "learning_rate": 6.249385584358009e-06, + "loss": 17.3999, + "step": 23909 + }, + { + "epoch": 0.43705558703639386, + "grad_norm": 7.33142682619034, + "learning_rate": 6.249098960839972e-06, + "loss": 17.7921, + "step": 23910 + }, + { + "epoch": 0.4370738662328404, + "grad_norm": 6.371861169270015, + "learning_rate": 6.248812332943951e-06, + "loss": 17.5232, + "step": 23911 + }, + { + "epoch": 0.4370921454292869, + "grad_norm": 5.869601600415608, + "learning_rate": 6.248525700670951e-06, + "loss": 17.4915, + "step": 23912 + }, + { + "epoch": 0.43711042462573346, + "grad_norm": 5.525187376485091, + "learning_rate": 6.248239064021977e-06, + "loss": 17.2771, + "step": 23913 + }, + { + "epoch": 0.43712870382218, + "grad_norm": 6.488219510405763, + "learning_rate": 6.247952422998035e-06, + "loss": 17.3501, + "step": 23914 + }, + { + "epoch": 0.4371469830186265, + "grad_norm": 5.909557666655518, + "learning_rate": 6.247665777600127e-06, + "loss": 17.2575, + "step": 23915 + }, + { + "epoch": 0.437165262215073, + "grad_norm": 5.06380006176535, + "learning_rate": 6.247379127829257e-06, + "loss": 16.8698, + "step": 23916 + }, + { + "epoch": 0.43718354141151955, + "grad_norm": 5.905539963323596, + "learning_rate": 6.247092473686432e-06, + "loss": 17.1606, + "step": 23917 + }, + { + "epoch": 0.4372018206079661, + "grad_norm": 6.807182451339311, + "learning_rate": 6.246805815172659e-06, + "loss": 17.4176, + "step": 23918 + }, + { + "epoch": 0.4372200998044126, + "grad_norm": 6.221123397001821, + "learning_rate": 6.246519152288937e-06, + "loss": 17.6172, + "step": 23919 + }, + { + "epoch": 0.4372383790008591, + "grad_norm": 6.212224300995334, + "learning_rate": 6.246232485036275e-06, + "loss": 17.3482, + "step": 23920 + }, + { + "epoch": 0.43725665819730564, + "grad_norm": 7.823034045524622, + "learning_rate": 6.2459458134156745e-06, + "loss": 18.1664, + "step": 23921 + }, + { + "epoch": 0.43727493739375217, + "grad_norm": 7.033783182189167, + "learning_rate": 6.2456591374281435e-06, + "loss": 17.8423, + "step": 23922 + }, + { + "epoch": 0.4372932165901987, + "grad_norm": 5.502804655149759, + "learning_rate": 6.245372457074685e-06, + "loss": 17.0922, + "step": 23923 + }, + { + "epoch": 0.43731149578664524, + "grad_norm": 6.576363494862701, + "learning_rate": 6.245085772356304e-06, + "loss": 17.6938, + "step": 23924 + }, + { + "epoch": 0.4373297749830917, + "grad_norm": 6.067144789740839, + "learning_rate": 6.244799083274004e-06, + "loss": 17.3229, + "step": 23925 + }, + { + "epoch": 0.43734805417953826, + "grad_norm": 6.394717174482385, + "learning_rate": 6.244512389828794e-06, + "loss": 17.4762, + "step": 23926 + }, + { + "epoch": 0.4373663333759848, + "grad_norm": 5.516036442517155, + "learning_rate": 6.244225692021675e-06, + "loss": 17.2713, + "step": 23927 + }, + { + "epoch": 0.43738461257243133, + "grad_norm": 8.086540536894024, + "learning_rate": 6.243938989853653e-06, + "loss": 17.7486, + "step": 23928 + }, + { + "epoch": 0.43740289176887787, + "grad_norm": 5.642024582760483, + "learning_rate": 6.2436522833257314e-06, + "loss": 17.1427, + "step": 23929 + }, + { + "epoch": 0.43742117096532435, + "grad_norm": 7.3292519349256535, + "learning_rate": 6.2433655724389175e-06, + "loss": 17.8424, + "step": 23930 + }, + { + "epoch": 0.4374394501617709, + "grad_norm": 5.842609832264661, + "learning_rate": 6.243078857194215e-06, + "loss": 17.1244, + "step": 23931 + }, + { + "epoch": 0.4374577293582174, + "grad_norm": 8.550158140001454, + "learning_rate": 6.24279213759263e-06, + "loss": 18.3504, + "step": 23932 + }, + { + "epoch": 0.43747600855466395, + "grad_norm": 6.4760633544487956, + "learning_rate": 6.242505413635166e-06, + "loss": 17.5759, + "step": 23933 + }, + { + "epoch": 0.4374942877511105, + "grad_norm": 6.731350216811516, + "learning_rate": 6.242218685322826e-06, + "loss": 17.5102, + "step": 23934 + }, + { + "epoch": 0.43751256694755697, + "grad_norm": 5.788240053547558, + "learning_rate": 6.24193195265662e-06, + "loss": 17.1085, + "step": 23935 + }, + { + "epoch": 0.4375308461440035, + "grad_norm": 7.5396417647391445, + "learning_rate": 6.24164521563755e-06, + "loss": 18.0382, + "step": 23936 + }, + { + "epoch": 0.43754912534045004, + "grad_norm": 4.642640229587409, + "learning_rate": 6.241358474266621e-06, + "loss": 16.8847, + "step": 23937 + }, + { + "epoch": 0.4375674045368966, + "grad_norm": 4.759367897182057, + "learning_rate": 6.241071728544837e-06, + "loss": 16.7752, + "step": 23938 + }, + { + "epoch": 0.43758568373334306, + "grad_norm": 6.08158858127298, + "learning_rate": 6.240784978473206e-06, + "loss": 17.1645, + "step": 23939 + }, + { + "epoch": 0.4376039629297896, + "grad_norm": 5.450517691313575, + "learning_rate": 6.2404982240527305e-06, + "loss": 17.2829, + "step": 23940 + }, + { + "epoch": 0.4376222421262361, + "grad_norm": 7.3670376610069255, + "learning_rate": 6.240211465284416e-06, + "loss": 18.0665, + "step": 23941 + }, + { + "epoch": 0.43764052132268266, + "grad_norm": 6.087789442483752, + "learning_rate": 6.23992470216927e-06, + "loss": 17.3223, + "step": 23942 + }, + { + "epoch": 0.4376588005191292, + "grad_norm": 7.9833410695256575, + "learning_rate": 6.2396379347082925e-06, + "loss": 18.3517, + "step": 23943 + }, + { + "epoch": 0.4376770797155757, + "grad_norm": 5.848633507256055, + "learning_rate": 6.239351162902493e-06, + "loss": 17.4503, + "step": 23944 + }, + { + "epoch": 0.4376953589120222, + "grad_norm": 5.796874455548671, + "learning_rate": 6.239064386752876e-06, + "loss": 17.2623, + "step": 23945 + }, + { + "epoch": 0.43771363810846875, + "grad_norm": 7.67556505670471, + "learning_rate": 6.2387776062604454e-06, + "loss": 18.3013, + "step": 23946 + }, + { + "epoch": 0.4377319173049153, + "grad_norm": 5.207616322576824, + "learning_rate": 6.238490821426206e-06, + "loss": 16.9911, + "step": 23947 + }, + { + "epoch": 0.4377501965013618, + "grad_norm": 6.335479282535727, + "learning_rate": 6.238204032251163e-06, + "loss": 17.1, + "step": 23948 + }, + { + "epoch": 0.4377684756978083, + "grad_norm": 9.483746020749363, + "learning_rate": 6.237917238736325e-06, + "loss": 18.4088, + "step": 23949 + }, + { + "epoch": 0.43778675489425484, + "grad_norm": 6.349133558818311, + "learning_rate": 6.237630440882693e-06, + "loss": 17.7754, + "step": 23950 + }, + { + "epoch": 0.4378050340907014, + "grad_norm": 7.326353212586243, + "learning_rate": 6.237343638691273e-06, + "loss": 17.7348, + "step": 23951 + }, + { + "epoch": 0.4378233132871479, + "grad_norm": 5.381147450195405, + "learning_rate": 6.237056832163072e-06, + "loss": 16.8319, + "step": 23952 + }, + { + "epoch": 0.43784159248359444, + "grad_norm": 7.024108480865005, + "learning_rate": 6.236770021299093e-06, + "loss": 17.6267, + "step": 23953 + }, + { + "epoch": 0.4378598716800409, + "grad_norm": 5.617575326960354, + "learning_rate": 6.236483206100344e-06, + "loss": 17.1481, + "step": 23954 + }, + { + "epoch": 0.43787815087648746, + "grad_norm": 11.712267076978886, + "learning_rate": 6.236196386567828e-06, + "loss": 18.2683, + "step": 23955 + }, + { + "epoch": 0.437896430072934, + "grad_norm": 8.004055374621178, + "learning_rate": 6.23590956270255e-06, + "loss": 18.5959, + "step": 23956 + }, + { + "epoch": 0.43791470926938053, + "grad_norm": 6.343183840251039, + "learning_rate": 6.2356227345055175e-06, + "loss": 17.7577, + "step": 23957 + }, + { + "epoch": 0.43793298846582707, + "grad_norm": 7.599136735195646, + "learning_rate": 6.2353359019777335e-06, + "loss": 17.7117, + "step": 23958 + }, + { + "epoch": 0.43795126766227355, + "grad_norm": 5.9030588525894885, + "learning_rate": 6.235049065120207e-06, + "loss": 17.3532, + "step": 23959 + }, + { + "epoch": 0.4379695468587201, + "grad_norm": 8.139557562651724, + "learning_rate": 6.2347622239339376e-06, + "loss": 17.592, + "step": 23960 + }, + { + "epoch": 0.4379878260551666, + "grad_norm": 6.3013355629707055, + "learning_rate": 6.234475378419934e-06, + "loss": 17.6954, + "step": 23961 + }, + { + "epoch": 0.43800610525161315, + "grad_norm": 5.630351907541568, + "learning_rate": 6.234188528579202e-06, + "loss": 17.0036, + "step": 23962 + }, + { + "epoch": 0.4380243844480597, + "grad_norm": 5.78054772288933, + "learning_rate": 6.233901674412748e-06, + "loss": 17.3421, + "step": 23963 + }, + { + "epoch": 0.43804266364450617, + "grad_norm": 5.959559681237487, + "learning_rate": 6.2336148159215735e-06, + "loss": 17.3405, + "step": 23964 + }, + { + "epoch": 0.4380609428409527, + "grad_norm": 6.7847804685119, + "learning_rate": 6.233327953106687e-06, + "loss": 17.6707, + "step": 23965 + }, + { + "epoch": 0.43807922203739924, + "grad_norm": 7.25773349149983, + "learning_rate": 6.233041085969092e-06, + "loss": 17.8941, + "step": 23966 + }, + { + "epoch": 0.4380975012338458, + "grad_norm": 7.789015459486722, + "learning_rate": 6.232754214509796e-06, + "loss": 18.152, + "step": 23967 + }, + { + "epoch": 0.4381157804302923, + "grad_norm": 5.943583740901636, + "learning_rate": 6.232467338729803e-06, + "loss": 17.2675, + "step": 23968 + }, + { + "epoch": 0.4381340596267388, + "grad_norm": 7.1440296579959455, + "learning_rate": 6.232180458630119e-06, + "loss": 17.6182, + "step": 23969 + }, + { + "epoch": 0.43815233882318533, + "grad_norm": 6.2490591830604805, + "learning_rate": 6.231893574211749e-06, + "loss": 17.4543, + "step": 23970 + }, + { + "epoch": 0.43817061801963186, + "grad_norm": 6.5830397561092004, + "learning_rate": 6.231606685475701e-06, + "loss": 17.7279, + "step": 23971 + }, + { + "epoch": 0.4381888972160784, + "grad_norm": 7.437304042880089, + "learning_rate": 6.231319792422977e-06, + "loss": 17.8313, + "step": 23972 + }, + { + "epoch": 0.4382071764125249, + "grad_norm": 6.352752017127395, + "learning_rate": 6.231032895054584e-06, + "loss": 17.6606, + "step": 23973 + }, + { + "epoch": 0.4382254556089714, + "grad_norm": 5.6922561812502535, + "learning_rate": 6.230745993371528e-06, + "loss": 17.383, + "step": 23974 + }, + { + "epoch": 0.43824373480541795, + "grad_norm": 5.680137833276108, + "learning_rate": 6.2304590873748115e-06, + "loss": 17.2924, + "step": 23975 + }, + { + "epoch": 0.4382620140018645, + "grad_norm": 6.695873536494616, + "learning_rate": 6.230172177065445e-06, + "loss": 17.8808, + "step": 23976 + }, + { + "epoch": 0.438280293198311, + "grad_norm": 6.708773107718349, + "learning_rate": 6.229885262444433e-06, + "loss": 17.6513, + "step": 23977 + }, + { + "epoch": 0.4382985723947575, + "grad_norm": 6.676344055924407, + "learning_rate": 6.229598343512777e-06, + "loss": 17.6905, + "step": 23978 + }, + { + "epoch": 0.43831685159120404, + "grad_norm": 6.201313962599765, + "learning_rate": 6.229311420271488e-06, + "loss": 17.547, + "step": 23979 + }, + { + "epoch": 0.4383351307876506, + "grad_norm": 6.183536383194596, + "learning_rate": 6.229024492721567e-06, + "loss": 17.4816, + "step": 23980 + }, + { + "epoch": 0.4383534099840971, + "grad_norm": 5.639793513166326, + "learning_rate": 6.228737560864024e-06, + "loss": 17.3323, + "step": 23981 + }, + { + "epoch": 0.43837168918054364, + "grad_norm": 7.02600896370297, + "learning_rate": 6.22845062469986e-06, + "loss": 17.6568, + "step": 23982 + }, + { + "epoch": 0.4383899683769901, + "grad_norm": 5.201563695186877, + "learning_rate": 6.228163684230084e-06, + "loss": 17.1166, + "step": 23983 + }, + { + "epoch": 0.43840824757343666, + "grad_norm": 6.3069927927281295, + "learning_rate": 6.227876739455702e-06, + "loss": 17.5435, + "step": 23984 + }, + { + "epoch": 0.4384265267698832, + "grad_norm": 6.684435791059988, + "learning_rate": 6.227589790377717e-06, + "loss": 17.5835, + "step": 23985 + }, + { + "epoch": 0.43844480596632973, + "grad_norm": 6.139683812962959, + "learning_rate": 6.2273028369971375e-06, + "loss": 17.2079, + "step": 23986 + }, + { + "epoch": 0.43846308516277627, + "grad_norm": 6.744759916293599, + "learning_rate": 6.2270158793149696e-06, + "loss": 17.8278, + "step": 23987 + }, + { + "epoch": 0.43848136435922275, + "grad_norm": 6.103049014534535, + "learning_rate": 6.226728917332215e-06, + "loss": 17.4248, + "step": 23988 + }, + { + "epoch": 0.4384996435556693, + "grad_norm": 6.857580617955428, + "learning_rate": 6.226441951049882e-06, + "loss": 17.6062, + "step": 23989 + }, + { + "epoch": 0.4385179227521158, + "grad_norm": 8.286891018914536, + "learning_rate": 6.226154980468978e-06, + "loss": 18.1121, + "step": 23990 + }, + { + "epoch": 0.43853620194856235, + "grad_norm": 6.281215225087562, + "learning_rate": 6.225868005590506e-06, + "loss": 17.5584, + "step": 23991 + }, + { + "epoch": 0.4385544811450089, + "grad_norm": 7.280953450911573, + "learning_rate": 6.225581026415473e-06, + "loss": 17.7032, + "step": 23992 + }, + { + "epoch": 0.43857276034145537, + "grad_norm": 6.979399398605442, + "learning_rate": 6.225294042944884e-06, + "loss": 17.6486, + "step": 23993 + }, + { + "epoch": 0.4385910395379019, + "grad_norm": 6.638458640357061, + "learning_rate": 6.225007055179748e-06, + "loss": 17.723, + "step": 23994 + }, + { + "epoch": 0.43860931873434844, + "grad_norm": 6.057993657988643, + "learning_rate": 6.224720063121067e-06, + "loss": 17.2632, + "step": 23995 + }, + { + "epoch": 0.438627597930795, + "grad_norm": 6.21037187523833, + "learning_rate": 6.224433066769849e-06, + "loss": 17.6976, + "step": 23996 + }, + { + "epoch": 0.4386458771272415, + "grad_norm": 6.595796145271207, + "learning_rate": 6.224146066127099e-06, + "loss": 17.8447, + "step": 23997 + }, + { + "epoch": 0.438664156323688, + "grad_norm": 8.278190147439984, + "learning_rate": 6.2238590611938234e-06, + "loss": 17.9456, + "step": 23998 + }, + { + "epoch": 0.43868243552013453, + "grad_norm": 5.362890817208312, + "learning_rate": 6.223572051971027e-06, + "loss": 17.3774, + "step": 23999 + }, + { + "epoch": 0.43870071471658106, + "grad_norm": 5.991677612466831, + "learning_rate": 6.223285038459719e-06, + "loss": 17.4217, + "step": 24000 + }, + { + "epoch": 0.4387189939130276, + "grad_norm": 6.0251617144174405, + "learning_rate": 6.222998020660903e-06, + "loss": 17.4952, + "step": 24001 + }, + { + "epoch": 0.43873727310947414, + "grad_norm": 6.764376032096751, + "learning_rate": 6.222710998575583e-06, + "loss": 17.6979, + "step": 24002 + }, + { + "epoch": 0.4387555523059206, + "grad_norm": 5.86926991320708, + "learning_rate": 6.222423972204768e-06, + "loss": 17.1538, + "step": 24003 + }, + { + "epoch": 0.43877383150236715, + "grad_norm": 7.762880804805859, + "learning_rate": 6.222136941549464e-06, + "loss": 17.8571, + "step": 24004 + }, + { + "epoch": 0.4387921106988137, + "grad_norm": 7.059405127136602, + "learning_rate": 6.221849906610674e-06, + "loss": 17.3909, + "step": 24005 + }, + { + "epoch": 0.4388103898952602, + "grad_norm": 6.2189132962688705, + "learning_rate": 6.221562867389408e-06, + "loss": 17.4762, + "step": 24006 + }, + { + "epoch": 0.4388286690917067, + "grad_norm": 7.639833437389585, + "learning_rate": 6.221275823886669e-06, + "loss": 18.0506, + "step": 24007 + }, + { + "epoch": 0.43884694828815324, + "grad_norm": 5.459067405090136, + "learning_rate": 6.220988776103465e-06, + "loss": 17.1166, + "step": 24008 + }, + { + "epoch": 0.4388652274845998, + "grad_norm": 6.609410233825979, + "learning_rate": 6.220701724040801e-06, + "loss": 17.6673, + "step": 24009 + }, + { + "epoch": 0.4388835066810463, + "grad_norm": 6.431944664243485, + "learning_rate": 6.220414667699682e-06, + "loss": 17.5546, + "step": 24010 + }, + { + "epoch": 0.43890178587749284, + "grad_norm": 6.221324915602247, + "learning_rate": 6.220127607081117e-06, + "loss": 17.1842, + "step": 24011 + }, + { + "epoch": 0.4389200650739393, + "grad_norm": 7.07345777971569, + "learning_rate": 6.219840542186111e-06, + "loss": 17.7193, + "step": 24012 + }, + { + "epoch": 0.43893834427038586, + "grad_norm": 7.0063190363877865, + "learning_rate": 6.219553473015668e-06, + "loss": 17.4253, + "step": 24013 + }, + { + "epoch": 0.4389566234668324, + "grad_norm": 6.2546168603984595, + "learning_rate": 6.219266399570798e-06, + "loss": 17.2838, + "step": 24014 + }, + { + "epoch": 0.43897490266327893, + "grad_norm": 6.258635538417954, + "learning_rate": 6.218979321852503e-06, + "loss": 17.4274, + "step": 24015 + }, + { + "epoch": 0.43899318185972547, + "grad_norm": 7.169503195365026, + "learning_rate": 6.218692239861793e-06, + "loss": 17.9354, + "step": 24016 + }, + { + "epoch": 0.43901146105617195, + "grad_norm": 6.601886440297481, + "learning_rate": 6.218405153599671e-06, + "loss": 17.5558, + "step": 24017 + }, + { + "epoch": 0.4390297402526185, + "grad_norm": 6.373022725769502, + "learning_rate": 6.218118063067147e-06, + "loss": 17.4031, + "step": 24018 + }, + { + "epoch": 0.439048019449065, + "grad_norm": 6.5719079781718115, + "learning_rate": 6.2178309682652235e-06, + "loss": 17.7041, + "step": 24019 + }, + { + "epoch": 0.43906629864551155, + "grad_norm": 7.017985430227371, + "learning_rate": 6.2175438691949065e-06, + "loss": 17.6017, + "step": 24020 + }, + { + "epoch": 0.4390845778419581, + "grad_norm": 6.251060702893441, + "learning_rate": 6.217256765857207e-06, + "loss": 17.2571, + "step": 24021 + }, + { + "epoch": 0.43910285703840457, + "grad_norm": 6.367375870634299, + "learning_rate": 6.216969658253125e-06, + "loss": 17.5228, + "step": 24022 + }, + { + "epoch": 0.4391211362348511, + "grad_norm": 6.377043967704567, + "learning_rate": 6.216682546383672e-06, + "loss": 17.8246, + "step": 24023 + }, + { + "epoch": 0.43913941543129764, + "grad_norm": 7.771306717169053, + "learning_rate": 6.216395430249852e-06, + "loss": 18.4007, + "step": 24024 + }, + { + "epoch": 0.4391576946277442, + "grad_norm": 6.000973498365739, + "learning_rate": 6.216108309852672e-06, + "loss": 17.0641, + "step": 24025 + }, + { + "epoch": 0.4391759738241907, + "grad_norm": 7.008358668382307, + "learning_rate": 6.215821185193137e-06, + "loss": 17.7032, + "step": 24026 + }, + { + "epoch": 0.4391942530206372, + "grad_norm": 6.928903847663157, + "learning_rate": 6.215534056272254e-06, + "loss": 17.4494, + "step": 24027 + }, + { + "epoch": 0.43921253221708373, + "grad_norm": 5.625535482139369, + "learning_rate": 6.215246923091032e-06, + "loss": 17.5746, + "step": 24028 + }, + { + "epoch": 0.43923081141353026, + "grad_norm": 6.9614181128989445, + "learning_rate": 6.214959785650472e-06, + "loss": 18.0524, + "step": 24029 + }, + { + "epoch": 0.4392490906099768, + "grad_norm": 7.409461275741407, + "learning_rate": 6.214672643951584e-06, + "loss": 18.0821, + "step": 24030 + }, + { + "epoch": 0.43926736980642334, + "grad_norm": 6.029300780715063, + "learning_rate": 6.214385497995374e-06, + "loss": 17.2179, + "step": 24031 + }, + { + "epoch": 0.4392856490028698, + "grad_norm": 6.11037002292352, + "learning_rate": 6.214098347782849e-06, + "loss": 17.4044, + "step": 24032 + }, + { + "epoch": 0.43930392819931635, + "grad_norm": 5.834711278241937, + "learning_rate": 6.213811193315015e-06, + "loss": 17.2023, + "step": 24033 + }, + { + "epoch": 0.4393222073957629, + "grad_norm": 6.200046138205611, + "learning_rate": 6.213524034592875e-06, + "loss": 17.4785, + "step": 24034 + }, + { + "epoch": 0.4393404865922094, + "grad_norm": 8.000951197088606, + "learning_rate": 6.213236871617442e-06, + "loss": 18.1242, + "step": 24035 + }, + { + "epoch": 0.43935876578865596, + "grad_norm": 5.27557532248393, + "learning_rate": 6.212949704389718e-06, + "loss": 17.0796, + "step": 24036 + }, + { + "epoch": 0.43937704498510244, + "grad_norm": 5.58697925314592, + "learning_rate": 6.21266253291071e-06, + "loss": 17.3713, + "step": 24037 + }, + { + "epoch": 0.439395324181549, + "grad_norm": 6.952475108885502, + "learning_rate": 6.212375357181426e-06, + "loss": 17.6035, + "step": 24038 + }, + { + "epoch": 0.4394136033779955, + "grad_norm": 6.9391452999085015, + "learning_rate": 6.21208817720287e-06, + "loss": 17.688, + "step": 24039 + }, + { + "epoch": 0.43943188257444205, + "grad_norm": 6.731781520110059, + "learning_rate": 6.211800992976051e-06, + "loss": 17.5839, + "step": 24040 + }, + { + "epoch": 0.4394501617708885, + "grad_norm": 6.403488127370576, + "learning_rate": 6.211513804501975e-06, + "loss": 17.6752, + "step": 24041 + }, + { + "epoch": 0.43946844096733506, + "grad_norm": 5.8170076131586255, + "learning_rate": 6.211226611781649e-06, + "loss": 17.202, + "step": 24042 + }, + { + "epoch": 0.4394867201637816, + "grad_norm": 6.704862089039464, + "learning_rate": 6.2109394148160774e-06, + "loss": 17.8916, + "step": 24043 + }, + { + "epoch": 0.43950499936022813, + "grad_norm": 6.6130829714901695, + "learning_rate": 6.210652213606269e-06, + "loss": 17.7099, + "step": 24044 + }, + { + "epoch": 0.43952327855667467, + "grad_norm": 5.804196571886088, + "learning_rate": 6.21036500815323e-06, + "loss": 17.314, + "step": 24045 + }, + { + "epoch": 0.43954155775312115, + "grad_norm": 5.870295302515808, + "learning_rate": 6.2100777984579655e-06, + "loss": 17.4325, + "step": 24046 + }, + { + "epoch": 0.4395598369495677, + "grad_norm": 5.406245643499944, + "learning_rate": 6.209790584521483e-06, + "loss": 17.091, + "step": 24047 + }, + { + "epoch": 0.4395781161460142, + "grad_norm": 5.301351690539633, + "learning_rate": 6.20950336634479e-06, + "loss": 16.9648, + "step": 24048 + }, + { + "epoch": 0.43959639534246076, + "grad_norm": 6.142972290332052, + "learning_rate": 6.209216143928895e-06, + "loss": 17.133, + "step": 24049 + }, + { + "epoch": 0.4396146745389073, + "grad_norm": 5.772682067366621, + "learning_rate": 6.208928917274799e-06, + "loss": 17.4193, + "step": 24050 + }, + { + "epoch": 0.43963295373535377, + "grad_norm": 6.561133445600708, + "learning_rate": 6.2086416863835145e-06, + "loss": 17.5559, + "step": 24051 + }, + { + "epoch": 0.4396512329318003, + "grad_norm": 7.225885480856654, + "learning_rate": 6.2083544512560434e-06, + "loss": 17.5924, + "step": 24052 + }, + { + "epoch": 0.43966951212824684, + "grad_norm": 6.301709506262397, + "learning_rate": 6.208067211893396e-06, + "loss": 17.5383, + "step": 24053 + }, + { + "epoch": 0.4396877913246934, + "grad_norm": 6.2750822658107355, + "learning_rate": 6.207779968296578e-06, + "loss": 17.3671, + "step": 24054 + }, + { + "epoch": 0.4397060705211399, + "grad_norm": 5.6883360896097, + "learning_rate": 6.207492720466596e-06, + "loss": 17.2641, + "step": 24055 + }, + { + "epoch": 0.4397243497175864, + "grad_norm": 5.160428857090913, + "learning_rate": 6.207205468404457e-06, + "loss": 17.0549, + "step": 24056 + }, + { + "epoch": 0.43974262891403293, + "grad_norm": 6.177780461799774, + "learning_rate": 6.206918212111167e-06, + "loss": 17.3782, + "step": 24057 + }, + { + "epoch": 0.43976090811047946, + "grad_norm": 6.555941508063238, + "learning_rate": 6.2066309515877334e-06, + "loss": 17.5858, + "step": 24058 + }, + { + "epoch": 0.439779187306926, + "grad_norm": 7.982207387034825, + "learning_rate": 6.206343686835165e-06, + "loss": 18.1021, + "step": 24059 + }, + { + "epoch": 0.43979746650337254, + "grad_norm": 6.827920589372808, + "learning_rate": 6.206056417854464e-06, + "loss": 17.5228, + "step": 24060 + }, + { + "epoch": 0.439815745699819, + "grad_norm": 5.243004926423027, + "learning_rate": 6.205769144646641e-06, + "loss": 17.1173, + "step": 24061 + }, + { + "epoch": 0.43983402489626555, + "grad_norm": 6.091493836617262, + "learning_rate": 6.205481867212701e-06, + "loss": 17.4518, + "step": 24062 + }, + { + "epoch": 0.4398523040927121, + "grad_norm": 5.630099018895974, + "learning_rate": 6.205194585553653e-06, + "loss": 17.106, + "step": 24063 + }, + { + "epoch": 0.4398705832891586, + "grad_norm": 5.488712327693138, + "learning_rate": 6.204907299670502e-06, + "loss": 17.0807, + "step": 24064 + }, + { + "epoch": 0.43988886248560516, + "grad_norm": 8.329318893114538, + "learning_rate": 6.204620009564255e-06, + "loss": 18.4138, + "step": 24065 + }, + { + "epoch": 0.43990714168205164, + "grad_norm": 6.201264196117154, + "learning_rate": 6.20433271523592e-06, + "loss": 17.3556, + "step": 24066 + }, + { + "epoch": 0.4399254208784982, + "grad_norm": 7.803245184763667, + "learning_rate": 6.204045416686503e-06, + "loss": 18.0564, + "step": 24067 + }, + { + "epoch": 0.4399437000749447, + "grad_norm": 5.848519091575874, + "learning_rate": 6.203758113917011e-06, + "loss": 17.3502, + "step": 24068 + }, + { + "epoch": 0.43996197927139125, + "grad_norm": 5.450613093420574, + "learning_rate": 6.2034708069284525e-06, + "loss": 17.0774, + "step": 24069 + }, + { + "epoch": 0.4399802584678378, + "grad_norm": 7.0861272134943984, + "learning_rate": 6.2031834957218314e-06, + "loss": 17.7915, + "step": 24070 + }, + { + "epoch": 0.43999853766428426, + "grad_norm": 6.2486276449081695, + "learning_rate": 6.202896180298158e-06, + "loss": 17.4001, + "step": 24071 + }, + { + "epoch": 0.4400168168607308, + "grad_norm": 5.976318337781716, + "learning_rate": 6.202608860658438e-06, + "loss": 17.5905, + "step": 24072 + }, + { + "epoch": 0.44003509605717733, + "grad_norm": 4.9373641132000525, + "learning_rate": 6.2023215368036785e-06, + "loss": 16.9522, + "step": 24073 + }, + { + "epoch": 0.44005337525362387, + "grad_norm": 6.556857284850879, + "learning_rate": 6.2020342087348854e-06, + "loss": 17.6768, + "step": 24074 + }, + { + "epoch": 0.44007165445007035, + "grad_norm": 7.061485093126709, + "learning_rate": 6.201746876453066e-06, + "loss": 18.1163, + "step": 24075 + }, + { + "epoch": 0.4400899336465169, + "grad_norm": 6.027018540386245, + "learning_rate": 6.201459539959229e-06, + "loss": 17.4606, + "step": 24076 + }, + { + "epoch": 0.4401082128429634, + "grad_norm": 6.039365159304191, + "learning_rate": 6.2011721992543814e-06, + "loss": 17.695, + "step": 24077 + }, + { + "epoch": 0.44012649203940996, + "grad_norm": 7.1681031907290125, + "learning_rate": 6.200884854339529e-06, + "loss": 17.8596, + "step": 24078 + }, + { + "epoch": 0.4401447712358565, + "grad_norm": 6.082145313149376, + "learning_rate": 6.2005975052156784e-06, + "loss": 17.4135, + "step": 24079 + }, + { + "epoch": 0.44016305043230297, + "grad_norm": 6.230506107679308, + "learning_rate": 6.200310151883838e-06, + "loss": 17.5036, + "step": 24080 + }, + { + "epoch": 0.4401813296287495, + "grad_norm": 5.464460759241678, + "learning_rate": 6.200022794345015e-06, + "loss": 17.2063, + "step": 24081 + }, + { + "epoch": 0.44019960882519604, + "grad_norm": 6.622869756721586, + "learning_rate": 6.199735432600216e-06, + "loss": 17.5457, + "step": 24082 + }, + { + "epoch": 0.4402178880216426, + "grad_norm": 7.554716121718214, + "learning_rate": 6.1994480666504484e-06, + "loss": 17.9225, + "step": 24083 + }, + { + "epoch": 0.4402361672180891, + "grad_norm": 5.31264702707009, + "learning_rate": 6.19916069649672e-06, + "loss": 17.0408, + "step": 24084 + }, + { + "epoch": 0.4402544464145356, + "grad_norm": 5.89945498771339, + "learning_rate": 6.198873322140038e-06, + "loss": 17.3624, + "step": 24085 + }, + { + "epoch": 0.44027272561098213, + "grad_norm": 7.644977464635734, + "learning_rate": 6.198585943581407e-06, + "loss": 17.8964, + "step": 24086 + }, + { + "epoch": 0.44029100480742867, + "grad_norm": 6.783231163989308, + "learning_rate": 6.198298560821838e-06, + "loss": 17.6885, + "step": 24087 + }, + { + "epoch": 0.4403092840038752, + "grad_norm": 5.355605410047858, + "learning_rate": 6.198011173862335e-06, + "loss": 17.2444, + "step": 24088 + }, + { + "epoch": 0.44032756320032174, + "grad_norm": 5.804702837279948, + "learning_rate": 6.197723782703908e-06, + "loss": 17.1333, + "step": 24089 + }, + { + "epoch": 0.4403458423967682, + "grad_norm": 7.146224777024456, + "learning_rate": 6.197436387347564e-06, + "loss": 17.8382, + "step": 24090 + }, + { + "epoch": 0.44036412159321475, + "grad_norm": 7.206326932353275, + "learning_rate": 6.197148987794308e-06, + "loss": 17.8005, + "step": 24091 + }, + { + "epoch": 0.4403824007896613, + "grad_norm": 6.633553428716376, + "learning_rate": 6.196861584045149e-06, + "loss": 17.7012, + "step": 24092 + }, + { + "epoch": 0.4404006799861078, + "grad_norm": 6.150559907538199, + "learning_rate": 6.196574176101093e-06, + "loss": 17.3696, + "step": 24093 + }, + { + "epoch": 0.44041895918255436, + "grad_norm": 5.471955820876253, + "learning_rate": 6.19628676396315e-06, + "loss": 17.1405, + "step": 24094 + }, + { + "epoch": 0.44043723837900084, + "grad_norm": 7.408212062568318, + "learning_rate": 6.195999347632324e-06, + "loss": 17.8337, + "step": 24095 + }, + { + "epoch": 0.4404555175754474, + "grad_norm": 6.785862576964398, + "learning_rate": 6.195711927109626e-06, + "loss": 17.7062, + "step": 24096 + }, + { + "epoch": 0.4404737967718939, + "grad_norm": 6.0664998422025045, + "learning_rate": 6.19542450239606e-06, + "loss": 17.7203, + "step": 24097 + }, + { + "epoch": 0.44049207596834045, + "grad_norm": 6.928755566027269, + "learning_rate": 6.1951370734926355e-06, + "loss": 17.978, + "step": 24098 + }, + { + "epoch": 0.440510355164787, + "grad_norm": 7.80758531994904, + "learning_rate": 6.194849640400359e-06, + "loss": 17.9731, + "step": 24099 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 6.613035027127089, + "learning_rate": 6.194562203120238e-06, + "loss": 17.6502, + "step": 24100 + }, + { + "epoch": 0.44054691355768, + "grad_norm": 7.56195043778486, + "learning_rate": 6.194274761653281e-06, + "loss": 18.3339, + "step": 24101 + }, + { + "epoch": 0.44056519275412653, + "grad_norm": 6.171567868539247, + "learning_rate": 6.1939873160004935e-06, + "loss": 17.5275, + "step": 24102 + }, + { + "epoch": 0.44058347195057307, + "grad_norm": 8.35353216457608, + "learning_rate": 6.193699866162884e-06, + "loss": 17.7445, + "step": 24103 + }, + { + "epoch": 0.4406017511470196, + "grad_norm": 7.110938906108972, + "learning_rate": 6.193412412141462e-06, + "loss": 17.7643, + "step": 24104 + }, + { + "epoch": 0.4406200303434661, + "grad_norm": 6.212587681886543, + "learning_rate": 6.193124953937232e-06, + "loss": 17.2705, + "step": 24105 + }, + { + "epoch": 0.4406383095399126, + "grad_norm": 8.214394566138274, + "learning_rate": 6.1928374915512024e-06, + "loss": 17.8221, + "step": 24106 + }, + { + "epoch": 0.44065658873635916, + "grad_norm": 5.751694848503822, + "learning_rate": 6.192550024984381e-06, + "loss": 17.581, + "step": 24107 + }, + { + "epoch": 0.4406748679328057, + "grad_norm": 5.798670043369045, + "learning_rate": 6.192262554237774e-06, + "loss": 17.3722, + "step": 24108 + }, + { + "epoch": 0.44069314712925217, + "grad_norm": 5.719614869280944, + "learning_rate": 6.191975079312391e-06, + "loss": 17.3738, + "step": 24109 + }, + { + "epoch": 0.4407114263256987, + "grad_norm": 6.068344991425805, + "learning_rate": 6.1916876002092394e-06, + "loss": 17.2393, + "step": 24110 + }, + { + "epoch": 0.44072970552214524, + "grad_norm": 5.9317090430344095, + "learning_rate": 6.191400116929326e-06, + "loss": 17.3586, + "step": 24111 + }, + { + "epoch": 0.4407479847185918, + "grad_norm": 8.087094429335183, + "learning_rate": 6.191112629473658e-06, + "loss": 17.7309, + "step": 24112 + }, + { + "epoch": 0.4407662639150383, + "grad_norm": 5.378131859592571, + "learning_rate": 6.1908251378432434e-06, + "loss": 17.1388, + "step": 24113 + }, + { + "epoch": 0.4407845431114848, + "grad_norm": 6.349659146169226, + "learning_rate": 6.190537642039092e-06, + "loss": 17.5838, + "step": 24114 + }, + { + "epoch": 0.44080282230793133, + "grad_norm": 6.523370246758639, + "learning_rate": 6.1902501420622066e-06, + "loss": 17.6528, + "step": 24115 + }, + { + "epoch": 0.44082110150437787, + "grad_norm": 7.487310635175428, + "learning_rate": 6.1899626379135995e-06, + "loss": 17.5962, + "step": 24116 + }, + { + "epoch": 0.4408393807008244, + "grad_norm": 7.361289340010666, + "learning_rate": 6.189675129594276e-06, + "loss": 18.0879, + "step": 24117 + }, + { + "epoch": 0.44085765989727094, + "grad_norm": 7.610033682556638, + "learning_rate": 6.189387617105246e-06, + "loss": 17.7131, + "step": 24118 + }, + { + "epoch": 0.4408759390937174, + "grad_norm": 5.350356172600791, + "learning_rate": 6.1891001004475135e-06, + "loss": 17.0352, + "step": 24119 + }, + { + "epoch": 0.44089421829016395, + "grad_norm": 7.939890278819534, + "learning_rate": 6.188812579622089e-06, + "loss": 17.9314, + "step": 24120 + }, + { + "epoch": 0.4409124974866105, + "grad_norm": 6.983534295055088, + "learning_rate": 6.1885250546299805e-06, + "loss": 17.6833, + "step": 24121 + }, + { + "epoch": 0.440930776683057, + "grad_norm": 6.748967627266763, + "learning_rate": 6.188237525472194e-06, + "loss": 17.6455, + "step": 24122 + }, + { + "epoch": 0.44094905587950356, + "grad_norm": 7.317423161030537, + "learning_rate": 6.187949992149737e-06, + "loss": 18.1427, + "step": 24123 + }, + { + "epoch": 0.44096733507595004, + "grad_norm": 7.7601246543196645, + "learning_rate": 6.18766245466362e-06, + "loss": 17.9725, + "step": 24124 + }, + { + "epoch": 0.4409856142723966, + "grad_norm": 6.938177549182238, + "learning_rate": 6.187374913014849e-06, + "loss": 17.5571, + "step": 24125 + }, + { + "epoch": 0.4410038934688431, + "grad_norm": 6.565980904736275, + "learning_rate": 6.187087367204431e-06, + "loss": 17.5904, + "step": 24126 + }, + { + "epoch": 0.44102217266528965, + "grad_norm": 7.039811303293514, + "learning_rate": 6.186799817233376e-06, + "loss": 17.7007, + "step": 24127 + }, + { + "epoch": 0.4410404518617362, + "grad_norm": 6.225445666144468, + "learning_rate": 6.186512263102691e-06, + "loss": 17.3729, + "step": 24128 + }, + { + "epoch": 0.44105873105818266, + "grad_norm": 4.526645758632178, + "learning_rate": 6.18622470481338e-06, + "loss": 16.7223, + "step": 24129 + }, + { + "epoch": 0.4410770102546292, + "grad_norm": 5.020622148618269, + "learning_rate": 6.1859371423664576e-06, + "loss": 16.7954, + "step": 24130 + }, + { + "epoch": 0.44109528945107573, + "grad_norm": 7.681872407141381, + "learning_rate": 6.185649575762927e-06, + "loss": 18.0012, + "step": 24131 + }, + { + "epoch": 0.44111356864752227, + "grad_norm": 6.0825591877760985, + "learning_rate": 6.1853620050038e-06, + "loss": 17.4602, + "step": 24132 + }, + { + "epoch": 0.4411318478439688, + "grad_norm": 7.331425433942787, + "learning_rate": 6.18507443009008e-06, + "loss": 17.8376, + "step": 24133 + }, + { + "epoch": 0.4411501270404153, + "grad_norm": 7.5145986448707145, + "learning_rate": 6.184786851022776e-06, + "loss": 17.823, + "step": 24134 + }, + { + "epoch": 0.4411684062368618, + "grad_norm": 8.883725417509767, + "learning_rate": 6.184499267802899e-06, + "loss": 18.034, + "step": 24135 + }, + { + "epoch": 0.44118668543330836, + "grad_norm": 5.554931324624878, + "learning_rate": 6.184211680431453e-06, + "loss": 16.9853, + "step": 24136 + }, + { + "epoch": 0.4412049646297549, + "grad_norm": 6.7366515615716205, + "learning_rate": 6.1839240889094494e-06, + "loss": 17.6005, + "step": 24137 + }, + { + "epoch": 0.44122324382620143, + "grad_norm": 7.613902781998414, + "learning_rate": 6.183636493237895e-06, + "loss": 18.2018, + "step": 24138 + }, + { + "epoch": 0.4412415230226479, + "grad_norm": 6.248567696616156, + "learning_rate": 6.1833488934177956e-06, + "loss": 17.35, + "step": 24139 + }, + { + "epoch": 0.44125980221909444, + "grad_norm": 6.427620493390211, + "learning_rate": 6.183061289450162e-06, + "loss": 17.4878, + "step": 24140 + }, + { + "epoch": 0.441278081415541, + "grad_norm": 6.949019104185665, + "learning_rate": 6.182773681336e-06, + "loss": 17.7226, + "step": 24141 + }, + { + "epoch": 0.4412963606119875, + "grad_norm": 6.536886009697157, + "learning_rate": 6.18248606907632e-06, + "loss": 17.4815, + "step": 24142 + }, + { + "epoch": 0.441314639808434, + "grad_norm": 6.913409699567346, + "learning_rate": 6.182198452672129e-06, + "loss": 17.379, + "step": 24143 + }, + { + "epoch": 0.44133291900488053, + "grad_norm": 6.160386367775416, + "learning_rate": 6.181910832124435e-06, + "loss": 17.5737, + "step": 24144 + }, + { + "epoch": 0.44135119820132707, + "grad_norm": 6.536423006612148, + "learning_rate": 6.181623207434246e-06, + "loss": 17.535, + "step": 24145 + }, + { + "epoch": 0.4413694773977736, + "grad_norm": 4.932389707799513, + "learning_rate": 6.1813355786025705e-06, + "loss": 16.9306, + "step": 24146 + }, + { + "epoch": 0.44138775659422014, + "grad_norm": 6.3232867337010665, + "learning_rate": 6.181047945630415e-06, + "loss": 17.3533, + "step": 24147 + }, + { + "epoch": 0.4414060357906666, + "grad_norm": 6.136195358541279, + "learning_rate": 6.18076030851879e-06, + "loss": 17.2286, + "step": 24148 + }, + { + "epoch": 0.44142431498711315, + "grad_norm": 6.138399060310465, + "learning_rate": 6.180472667268703e-06, + "loss": 17.3879, + "step": 24149 + }, + { + "epoch": 0.4414425941835597, + "grad_norm": 4.486653669905141, + "learning_rate": 6.180185021881161e-06, + "loss": 16.8849, + "step": 24150 + }, + { + "epoch": 0.4414608733800062, + "grad_norm": 5.733280473822919, + "learning_rate": 6.179897372357173e-06, + "loss": 17.1547, + "step": 24151 + }, + { + "epoch": 0.44147915257645276, + "grad_norm": 5.648891036525072, + "learning_rate": 6.179609718697748e-06, + "loss": 17.3554, + "step": 24152 + }, + { + "epoch": 0.44149743177289924, + "grad_norm": 7.292753873653095, + "learning_rate": 6.179322060903892e-06, + "loss": 17.5932, + "step": 24153 + }, + { + "epoch": 0.4415157109693458, + "grad_norm": 6.400696449672667, + "learning_rate": 6.1790343989766155e-06, + "loss": 17.6274, + "step": 24154 + }, + { + "epoch": 0.4415339901657923, + "grad_norm": 6.882805693823392, + "learning_rate": 6.1787467329169245e-06, + "loss": 17.4463, + "step": 24155 + }, + { + "epoch": 0.44155226936223885, + "grad_norm": 6.98579812805766, + "learning_rate": 6.178459062725829e-06, + "loss": 17.4662, + "step": 24156 + }, + { + "epoch": 0.4415705485586854, + "grad_norm": 5.993690162660362, + "learning_rate": 6.178171388404337e-06, + "loss": 17.3647, + "step": 24157 + }, + { + "epoch": 0.44158882775513186, + "grad_norm": 5.645586666406506, + "learning_rate": 6.177883709953457e-06, + "loss": 17.2167, + "step": 24158 + }, + { + "epoch": 0.4416071069515784, + "grad_norm": 6.216929702676079, + "learning_rate": 6.177596027374197e-06, + "loss": 17.6003, + "step": 24159 + }, + { + "epoch": 0.44162538614802493, + "grad_norm": 8.872753679417796, + "learning_rate": 6.177308340667565e-06, + "loss": 18.4974, + "step": 24160 + }, + { + "epoch": 0.44164366534447147, + "grad_norm": 7.430306716374661, + "learning_rate": 6.177020649834567e-06, + "loss": 18.0911, + "step": 24161 + }, + { + "epoch": 0.441661944540918, + "grad_norm": 7.063357979590903, + "learning_rate": 6.176732954876215e-06, + "loss": 17.4794, + "step": 24162 + }, + { + "epoch": 0.4416802237373645, + "grad_norm": 5.1590089963925445, + "learning_rate": 6.1764452557935185e-06, + "loss": 16.9837, + "step": 24163 + }, + { + "epoch": 0.441698502933811, + "grad_norm": 6.164279204045742, + "learning_rate": 6.176157552587481e-06, + "loss": 17.3539, + "step": 24164 + }, + { + "epoch": 0.44171678213025756, + "grad_norm": 9.5639257842599, + "learning_rate": 6.175869845259115e-06, + "loss": 18.3954, + "step": 24165 + }, + { + "epoch": 0.4417350613267041, + "grad_norm": 5.901771352074644, + "learning_rate": 6.175582133809426e-06, + "loss": 17.3821, + "step": 24166 + }, + { + "epoch": 0.44175334052315063, + "grad_norm": 7.029474309152619, + "learning_rate": 6.175294418239424e-06, + "loss": 17.1903, + "step": 24167 + }, + { + "epoch": 0.4417716197195971, + "grad_norm": 7.983708765747276, + "learning_rate": 6.175006698550117e-06, + "loss": 17.8349, + "step": 24168 + }, + { + "epoch": 0.44178989891604364, + "grad_norm": 5.763698744975223, + "learning_rate": 6.174718974742513e-06, + "loss": 17.1935, + "step": 24169 + }, + { + "epoch": 0.4418081781124902, + "grad_norm": 6.64625075210769, + "learning_rate": 6.174431246817621e-06, + "loss": 17.4247, + "step": 24170 + }, + { + "epoch": 0.4418264573089367, + "grad_norm": 6.923610002131327, + "learning_rate": 6.17414351477645e-06, + "loss": 17.6074, + "step": 24171 + }, + { + "epoch": 0.44184473650538325, + "grad_norm": 8.948947891851802, + "learning_rate": 6.173855778620007e-06, + "loss": 18.4662, + "step": 24172 + }, + { + "epoch": 0.44186301570182973, + "grad_norm": 7.043904495888951, + "learning_rate": 6.173568038349304e-06, + "loss": 17.8628, + "step": 24173 + }, + { + "epoch": 0.44188129489827627, + "grad_norm": 5.500254369039936, + "learning_rate": 6.173280293965343e-06, + "loss": 16.8723, + "step": 24174 + }, + { + "epoch": 0.4418995740947228, + "grad_norm": 6.490909643950598, + "learning_rate": 6.172992545469139e-06, + "loss": 17.4127, + "step": 24175 + }, + { + "epoch": 0.44191785329116934, + "grad_norm": 5.883672564127929, + "learning_rate": 6.172704792861698e-06, + "loss": 17.1861, + "step": 24176 + }, + { + "epoch": 0.4419361324876158, + "grad_norm": 6.70316592529986, + "learning_rate": 6.172417036144027e-06, + "loss": 17.4034, + "step": 24177 + }, + { + "epoch": 0.44195441168406235, + "grad_norm": 6.224850651602484, + "learning_rate": 6.172129275317137e-06, + "loss": 17.2891, + "step": 24178 + }, + { + "epoch": 0.4419726908805089, + "grad_norm": 7.706510807772626, + "learning_rate": 6.171841510382034e-06, + "loss": 18.1552, + "step": 24179 + }, + { + "epoch": 0.4419909700769554, + "grad_norm": 6.183663533753914, + "learning_rate": 6.17155374133973e-06, + "loss": 17.4231, + "step": 24180 + }, + { + "epoch": 0.44200924927340196, + "grad_norm": 7.01547013991577, + "learning_rate": 6.171265968191231e-06, + "loss": 17.8055, + "step": 24181 + }, + { + "epoch": 0.44202752846984844, + "grad_norm": 7.553437793710265, + "learning_rate": 6.170978190937547e-06, + "loss": 18.0711, + "step": 24182 + }, + { + "epoch": 0.442045807666295, + "grad_norm": 6.4266653424142115, + "learning_rate": 6.170690409579685e-06, + "loss": 17.5941, + "step": 24183 + }, + { + "epoch": 0.4420640868627415, + "grad_norm": 5.947258359679659, + "learning_rate": 6.170402624118655e-06, + "loss": 17.1631, + "step": 24184 + }, + { + "epoch": 0.44208236605918805, + "grad_norm": 5.5195636614023575, + "learning_rate": 6.170114834555466e-06, + "loss": 17.259, + "step": 24185 + }, + { + "epoch": 0.4421006452556346, + "grad_norm": 7.218084105817804, + "learning_rate": 6.1698270408911266e-06, + "loss": 17.7056, + "step": 24186 + }, + { + "epoch": 0.44211892445208106, + "grad_norm": 6.099651480618411, + "learning_rate": 6.169539243126644e-06, + "loss": 17.5093, + "step": 24187 + }, + { + "epoch": 0.4421372036485276, + "grad_norm": 6.663853645288097, + "learning_rate": 6.169251441263028e-06, + "loss": 17.769, + "step": 24188 + }, + { + "epoch": 0.44215548284497413, + "grad_norm": 5.849334981384076, + "learning_rate": 6.168963635301287e-06, + "loss": 17.1934, + "step": 24189 + }, + { + "epoch": 0.44217376204142067, + "grad_norm": 5.993995202711705, + "learning_rate": 6.168675825242431e-06, + "loss": 16.9622, + "step": 24190 + }, + { + "epoch": 0.4421920412378672, + "grad_norm": 5.630983754426648, + "learning_rate": 6.168388011087466e-06, + "loss": 17.2036, + "step": 24191 + }, + { + "epoch": 0.4422103204343137, + "grad_norm": 7.195487849986069, + "learning_rate": 6.168100192837403e-06, + "loss": 18.0233, + "step": 24192 + }, + { + "epoch": 0.4422285996307602, + "grad_norm": 7.121234997380964, + "learning_rate": 6.167812370493249e-06, + "loss": 17.7001, + "step": 24193 + }, + { + "epoch": 0.44224687882720676, + "grad_norm": 7.362998605201447, + "learning_rate": 6.167524544056018e-06, + "loss": 18.134, + "step": 24194 + }, + { + "epoch": 0.4422651580236533, + "grad_norm": 7.772638600808954, + "learning_rate": 6.167236713526711e-06, + "loss": 17.8615, + "step": 24195 + }, + { + "epoch": 0.44228343722009983, + "grad_norm": 7.129707872885745, + "learning_rate": 6.166948878906341e-06, + "loss": 17.7359, + "step": 24196 + }, + { + "epoch": 0.4423017164165463, + "grad_norm": 5.58093023938167, + "learning_rate": 6.166661040195917e-06, + "loss": 17.0113, + "step": 24197 + }, + { + "epoch": 0.44231999561299284, + "grad_norm": 5.2716662428945185, + "learning_rate": 6.166373197396448e-06, + "loss": 17.0409, + "step": 24198 + }, + { + "epoch": 0.4423382748094394, + "grad_norm": 6.236117649285141, + "learning_rate": 6.166085350508941e-06, + "loss": 17.4686, + "step": 24199 + }, + { + "epoch": 0.4423565540058859, + "grad_norm": 6.92155220381434, + "learning_rate": 6.165797499534407e-06, + "loss": 17.9482, + "step": 24200 + }, + { + "epoch": 0.44237483320233245, + "grad_norm": 6.806176269382211, + "learning_rate": 6.165509644473855e-06, + "loss": 18.0436, + "step": 24201 + }, + { + "epoch": 0.44239311239877893, + "grad_norm": 6.325650007245598, + "learning_rate": 6.165221785328289e-06, + "loss": 17.4151, + "step": 24202 + }, + { + "epoch": 0.44241139159522547, + "grad_norm": 5.469717470043323, + "learning_rate": 6.164933922098725e-06, + "loss": 17.1739, + "step": 24203 + }, + { + "epoch": 0.442429670791672, + "grad_norm": 7.086379925307295, + "learning_rate": 6.164646054786168e-06, + "loss": 17.9885, + "step": 24204 + }, + { + "epoch": 0.44244794998811854, + "grad_norm": 6.558634207594864, + "learning_rate": 6.164358183391628e-06, + "loss": 17.3538, + "step": 24205 + }, + { + "epoch": 0.4424662291845651, + "grad_norm": 9.546454805546572, + "learning_rate": 6.164070307916113e-06, + "loss": 18.1258, + "step": 24206 + }, + { + "epoch": 0.44248450838101155, + "grad_norm": 7.320380266248527, + "learning_rate": 6.1637824283606314e-06, + "loss": 18.2428, + "step": 24207 + }, + { + "epoch": 0.4425027875774581, + "grad_norm": 7.917832202990051, + "learning_rate": 6.163494544726195e-06, + "loss": 17.5665, + "step": 24208 + }, + { + "epoch": 0.4425210667739046, + "grad_norm": 7.062214357481698, + "learning_rate": 6.163206657013811e-06, + "loss": 17.871, + "step": 24209 + }, + { + "epoch": 0.44253934597035116, + "grad_norm": 7.780263464878502, + "learning_rate": 6.162918765224488e-06, + "loss": 17.4316, + "step": 24210 + }, + { + "epoch": 0.44255762516679764, + "grad_norm": 6.833201342721086, + "learning_rate": 6.162630869359236e-06, + "loss": 17.5612, + "step": 24211 + }, + { + "epoch": 0.4425759043632442, + "grad_norm": 6.497135885491084, + "learning_rate": 6.162342969419064e-06, + "loss": 17.6416, + "step": 24212 + }, + { + "epoch": 0.4425941835596907, + "grad_norm": 5.856626806966802, + "learning_rate": 6.162055065404981e-06, + "loss": 17.2533, + "step": 24213 + }, + { + "epoch": 0.44261246275613725, + "grad_norm": 6.313626717112335, + "learning_rate": 6.161767157317996e-06, + "loss": 17.6512, + "step": 24214 + }, + { + "epoch": 0.4426307419525838, + "grad_norm": 7.22027848710395, + "learning_rate": 6.161479245159115e-06, + "loss": 17.7917, + "step": 24215 + }, + { + "epoch": 0.44264902114903026, + "grad_norm": 6.204356458413, + "learning_rate": 6.161191328929354e-06, + "loss": 17.5948, + "step": 24216 + }, + { + "epoch": 0.4426673003454768, + "grad_norm": 7.258930959726999, + "learning_rate": 6.160903408629716e-06, + "loss": 17.3764, + "step": 24217 + }, + { + "epoch": 0.44268557954192334, + "grad_norm": 8.039318793161836, + "learning_rate": 6.160615484261213e-06, + "loss": 17.8878, + "step": 24218 + }, + { + "epoch": 0.44270385873836987, + "grad_norm": 5.714886397409613, + "learning_rate": 6.160327555824853e-06, + "loss": 17.2314, + "step": 24219 + }, + { + "epoch": 0.4427221379348164, + "grad_norm": 5.866988746985408, + "learning_rate": 6.160039623321645e-06, + "loss": 17.38, + "step": 24220 + }, + { + "epoch": 0.4427404171312629, + "grad_norm": 5.424162144011653, + "learning_rate": 6.159751686752601e-06, + "loss": 17.0537, + "step": 24221 + }, + { + "epoch": 0.4427586963277094, + "grad_norm": 6.119947301657276, + "learning_rate": 6.159463746118726e-06, + "loss": 17.5312, + "step": 24222 + }, + { + "epoch": 0.44277697552415596, + "grad_norm": 5.83570647973294, + "learning_rate": 6.159175801421031e-06, + "loss": 17.1355, + "step": 24223 + }, + { + "epoch": 0.4427952547206025, + "grad_norm": 7.079606293471452, + "learning_rate": 6.1588878526605265e-06, + "loss": 17.6112, + "step": 24224 + }, + { + "epoch": 0.44281353391704903, + "grad_norm": 6.662479255800925, + "learning_rate": 6.15859989983822e-06, + "loss": 17.9218, + "step": 24225 + }, + { + "epoch": 0.4428318131134955, + "grad_norm": 5.76213768489833, + "learning_rate": 6.158311942955122e-06, + "loss": 17.2206, + "step": 24226 + }, + { + "epoch": 0.44285009230994204, + "grad_norm": 7.465509608667722, + "learning_rate": 6.1580239820122414e-06, + "loss": 17.8642, + "step": 24227 + }, + { + "epoch": 0.4428683715063886, + "grad_norm": 7.247346089799373, + "learning_rate": 6.157736017010587e-06, + "loss": 17.7723, + "step": 24228 + }, + { + "epoch": 0.4428866507028351, + "grad_norm": 4.697584188159765, + "learning_rate": 6.157448047951166e-06, + "loss": 16.8858, + "step": 24229 + }, + { + "epoch": 0.44290492989928165, + "grad_norm": 8.43424148909306, + "learning_rate": 6.157160074834992e-06, + "loss": 18.319, + "step": 24230 + }, + { + "epoch": 0.44292320909572813, + "grad_norm": 6.109837674523738, + "learning_rate": 6.156872097663073e-06, + "loss": 17.1626, + "step": 24231 + }, + { + "epoch": 0.44294148829217467, + "grad_norm": 6.568676897566294, + "learning_rate": 6.1565841164364185e-06, + "loss": 17.2407, + "step": 24232 + }, + { + "epoch": 0.4429597674886212, + "grad_norm": 5.922361169144793, + "learning_rate": 6.156296131156036e-06, + "loss": 17.0792, + "step": 24233 + }, + { + "epoch": 0.44297804668506774, + "grad_norm": 7.670628458841614, + "learning_rate": 6.156008141822933e-06, + "loss": 18.2302, + "step": 24234 + }, + { + "epoch": 0.4429963258815143, + "grad_norm": 5.4385676906398555, + "learning_rate": 6.155720148438126e-06, + "loss": 17.2284, + "step": 24235 + }, + { + "epoch": 0.44301460507796075, + "grad_norm": 6.069382087305104, + "learning_rate": 6.155432151002618e-06, + "loss": 17.2016, + "step": 24236 + }, + { + "epoch": 0.4430328842744073, + "grad_norm": 5.568902653557678, + "learning_rate": 6.15514414951742e-06, + "loss": 17.0489, + "step": 24237 + }, + { + "epoch": 0.4430511634708538, + "grad_norm": 6.347852184975662, + "learning_rate": 6.154856143983544e-06, + "loss": 17.5024, + "step": 24238 + }, + { + "epoch": 0.44306944266730036, + "grad_norm": 6.4876102673276534, + "learning_rate": 6.154568134401996e-06, + "loss": 17.4501, + "step": 24239 + }, + { + "epoch": 0.4430877218637469, + "grad_norm": 5.946846618681404, + "learning_rate": 6.154280120773787e-06, + "loss": 17.3774, + "step": 24240 + }, + { + "epoch": 0.4431060010601934, + "grad_norm": 6.819805116977419, + "learning_rate": 6.1539921030999276e-06, + "loss": 17.2599, + "step": 24241 + }, + { + "epoch": 0.4431242802566399, + "grad_norm": 6.4322762761306285, + "learning_rate": 6.153704081381424e-06, + "loss": 17.499, + "step": 24242 + }, + { + "epoch": 0.44314255945308645, + "grad_norm": 7.1095276575998, + "learning_rate": 6.153416055619289e-06, + "loss": 17.6187, + "step": 24243 + }, + { + "epoch": 0.443160838649533, + "grad_norm": 5.3262334183648665, + "learning_rate": 6.15312802581453e-06, + "loss": 16.8974, + "step": 24244 + }, + { + "epoch": 0.44317911784597946, + "grad_norm": 6.892633490560132, + "learning_rate": 6.152839991968159e-06, + "loss": 17.5396, + "step": 24245 + }, + { + "epoch": 0.443197397042426, + "grad_norm": 6.6684426234079615, + "learning_rate": 6.152551954081183e-06, + "loss": 17.5883, + "step": 24246 + }, + { + "epoch": 0.44321567623887254, + "grad_norm": 5.857402012260117, + "learning_rate": 6.152263912154611e-06, + "loss": 17.3895, + "step": 24247 + }, + { + "epoch": 0.44323395543531907, + "grad_norm": 6.58462638691802, + "learning_rate": 6.151975866189455e-06, + "loss": 17.6756, + "step": 24248 + }, + { + "epoch": 0.4432522346317656, + "grad_norm": 8.599941133848265, + "learning_rate": 6.151687816186725e-06, + "loss": 18.5927, + "step": 24249 + }, + { + "epoch": 0.4432705138282121, + "grad_norm": 6.610459640862656, + "learning_rate": 6.151399762147428e-06, + "loss": 17.3349, + "step": 24250 + }, + { + "epoch": 0.4432887930246586, + "grad_norm": 6.320284833880291, + "learning_rate": 6.151111704072574e-06, + "loss": 17.5565, + "step": 24251 + }, + { + "epoch": 0.44330707222110516, + "grad_norm": 6.754158347300993, + "learning_rate": 6.150823641963174e-06, + "loss": 17.9968, + "step": 24252 + }, + { + "epoch": 0.4433253514175517, + "grad_norm": 6.009291161879157, + "learning_rate": 6.150535575820237e-06, + "loss": 17.4278, + "step": 24253 + }, + { + "epoch": 0.44334363061399823, + "grad_norm": 6.194526641385256, + "learning_rate": 6.150247505644773e-06, + "loss": 17.327, + "step": 24254 + }, + { + "epoch": 0.4433619098104447, + "grad_norm": 8.857128349580567, + "learning_rate": 6.149959431437791e-06, + "loss": 18.4417, + "step": 24255 + }, + { + "epoch": 0.44338018900689125, + "grad_norm": 5.916802344099659, + "learning_rate": 6.149671353200301e-06, + "loss": 17.2905, + "step": 24256 + }, + { + "epoch": 0.4433984682033378, + "grad_norm": 5.3010549313235895, + "learning_rate": 6.149383270933311e-06, + "loss": 17.1912, + "step": 24257 + }, + { + "epoch": 0.4434167473997843, + "grad_norm": 6.769126587055146, + "learning_rate": 6.149095184637834e-06, + "loss": 18.0512, + "step": 24258 + }, + { + "epoch": 0.44343502659623085, + "grad_norm": 6.034267195322498, + "learning_rate": 6.148807094314879e-06, + "loss": 17.3947, + "step": 24259 + }, + { + "epoch": 0.44345330579267733, + "grad_norm": 6.846131522263476, + "learning_rate": 6.148518999965454e-06, + "loss": 17.6969, + "step": 24260 + }, + { + "epoch": 0.44347158498912387, + "grad_norm": 6.654444558424248, + "learning_rate": 6.148230901590568e-06, + "loss": 17.5979, + "step": 24261 + }, + { + "epoch": 0.4434898641855704, + "grad_norm": 5.927856863203363, + "learning_rate": 6.147942799191235e-06, + "loss": 17.3457, + "step": 24262 + }, + { + "epoch": 0.44350814338201694, + "grad_norm": 5.881571836399706, + "learning_rate": 6.147654692768461e-06, + "loss": 17.3193, + "step": 24263 + }, + { + "epoch": 0.4435264225784635, + "grad_norm": 6.040106129984872, + "learning_rate": 6.1473665823232565e-06, + "loss": 17.1444, + "step": 24264 + }, + { + "epoch": 0.44354470177490996, + "grad_norm": 6.189774380879795, + "learning_rate": 6.147078467856632e-06, + "loss": 17.3509, + "step": 24265 + }, + { + "epoch": 0.4435629809713565, + "grad_norm": 6.826063409108439, + "learning_rate": 6.146790349369597e-06, + "loss": 17.5885, + "step": 24266 + }, + { + "epoch": 0.443581260167803, + "grad_norm": 6.001597727562739, + "learning_rate": 6.146502226863161e-06, + "loss": 17.4777, + "step": 24267 + }, + { + "epoch": 0.44359953936424956, + "grad_norm": 8.585103529418781, + "learning_rate": 6.146214100338335e-06, + "loss": 18.3625, + "step": 24268 + }, + { + "epoch": 0.4436178185606961, + "grad_norm": 6.588201065544275, + "learning_rate": 6.1459259697961275e-06, + "loss": 17.3728, + "step": 24269 + }, + { + "epoch": 0.4436360977571426, + "grad_norm": 6.145629592495774, + "learning_rate": 6.145637835237549e-06, + "loss": 17.6649, + "step": 24270 + }, + { + "epoch": 0.4436543769535891, + "grad_norm": 6.569078665093694, + "learning_rate": 6.145349696663608e-06, + "loss": 17.7227, + "step": 24271 + }, + { + "epoch": 0.44367265615003565, + "grad_norm": 7.128844505822352, + "learning_rate": 6.145061554075318e-06, + "loss": 17.815, + "step": 24272 + }, + { + "epoch": 0.4436909353464822, + "grad_norm": 5.4845654340425956, + "learning_rate": 6.144773407473686e-06, + "loss": 17.1778, + "step": 24273 + }, + { + "epoch": 0.4437092145429287, + "grad_norm": 5.535464992401839, + "learning_rate": 6.144485256859722e-06, + "loss": 17.23, + "step": 24274 + }, + { + "epoch": 0.4437274937393752, + "grad_norm": 5.522928988988442, + "learning_rate": 6.144197102234436e-06, + "loss": 17.0113, + "step": 24275 + }, + { + "epoch": 0.44374577293582174, + "grad_norm": 6.9864170491744995, + "learning_rate": 6.14390894359884e-06, + "loss": 17.6287, + "step": 24276 + }, + { + "epoch": 0.44376405213226827, + "grad_norm": 7.583486048710066, + "learning_rate": 6.143620780953941e-06, + "loss": 17.9127, + "step": 24277 + }, + { + "epoch": 0.4437823313287148, + "grad_norm": 6.465241793642631, + "learning_rate": 6.143332614300751e-06, + "loss": 17.4757, + "step": 24278 + }, + { + "epoch": 0.4438006105251613, + "grad_norm": 7.142216981775892, + "learning_rate": 6.143044443640278e-06, + "loss": 17.6552, + "step": 24279 + }, + { + "epoch": 0.4438188897216078, + "grad_norm": 5.9695328808129435, + "learning_rate": 6.142756268973536e-06, + "loss": 17.2828, + "step": 24280 + }, + { + "epoch": 0.44383716891805436, + "grad_norm": 6.9498493225935025, + "learning_rate": 6.142468090301531e-06, + "loss": 17.6509, + "step": 24281 + }, + { + "epoch": 0.4438554481145009, + "grad_norm": 5.187061746346155, + "learning_rate": 6.142179907625274e-06, + "loss": 16.9073, + "step": 24282 + }, + { + "epoch": 0.44387372731094743, + "grad_norm": 6.568881953882317, + "learning_rate": 6.141891720945776e-06, + "loss": 17.2912, + "step": 24283 + }, + { + "epoch": 0.4438920065073939, + "grad_norm": 10.60555002158264, + "learning_rate": 6.141603530264046e-06, + "loss": 17.8106, + "step": 24284 + }, + { + "epoch": 0.44391028570384045, + "grad_norm": 8.03583234211266, + "learning_rate": 6.141315335581096e-06, + "loss": 17.9401, + "step": 24285 + }, + { + "epoch": 0.443928564900287, + "grad_norm": 7.243956753975113, + "learning_rate": 6.141027136897935e-06, + "loss": 17.7223, + "step": 24286 + }, + { + "epoch": 0.4439468440967335, + "grad_norm": 7.500720564815997, + "learning_rate": 6.140738934215572e-06, + "loss": 17.6781, + "step": 24287 + }, + { + "epoch": 0.44396512329318005, + "grad_norm": 6.193149165372164, + "learning_rate": 6.140450727535018e-06, + "loss": 17.6141, + "step": 24288 + }, + { + "epoch": 0.44398340248962653, + "grad_norm": 7.03293458091303, + "learning_rate": 6.140162516857283e-06, + "loss": 17.5132, + "step": 24289 + }, + { + "epoch": 0.44400168168607307, + "grad_norm": 7.1255967726294385, + "learning_rate": 6.139874302183379e-06, + "loss": 17.7558, + "step": 24290 + }, + { + "epoch": 0.4440199608825196, + "grad_norm": 6.825648015376803, + "learning_rate": 6.1395860835143125e-06, + "loss": 17.4193, + "step": 24291 + }, + { + "epoch": 0.44403824007896614, + "grad_norm": 7.630204429301696, + "learning_rate": 6.139297860851097e-06, + "loss": 17.326, + "step": 24292 + }, + { + "epoch": 0.4440565192754127, + "grad_norm": 6.944416049690852, + "learning_rate": 6.139009634194739e-06, + "loss": 17.7461, + "step": 24293 + }, + { + "epoch": 0.44407479847185916, + "grad_norm": 6.009494017367086, + "learning_rate": 6.138721403546252e-06, + "loss": 17.4291, + "step": 24294 + }, + { + "epoch": 0.4440930776683057, + "grad_norm": 6.813643389771506, + "learning_rate": 6.1384331689066475e-06, + "loss": 17.8385, + "step": 24295 + }, + { + "epoch": 0.4441113568647522, + "grad_norm": 7.091083405823428, + "learning_rate": 6.138144930276931e-06, + "loss": 17.6301, + "step": 24296 + }, + { + "epoch": 0.44412963606119876, + "grad_norm": 7.7125590736864815, + "learning_rate": 6.137856687658117e-06, + "loss": 17.3912, + "step": 24297 + }, + { + "epoch": 0.4441479152576453, + "grad_norm": 6.410050452714684, + "learning_rate": 6.137568441051214e-06, + "loss": 17.7583, + "step": 24298 + }, + { + "epoch": 0.4441661944540918, + "grad_norm": 6.996499164391479, + "learning_rate": 6.137280190457231e-06, + "loss": 17.683, + "step": 24299 + }, + { + "epoch": 0.4441844736505383, + "grad_norm": 5.371767244130282, + "learning_rate": 6.1369919358771805e-06, + "loss": 17.1344, + "step": 24300 + }, + { + "epoch": 0.44420275284698485, + "grad_norm": 6.341875127461749, + "learning_rate": 6.136703677312071e-06, + "loss": 17.2414, + "step": 24301 + }, + { + "epoch": 0.4442210320434314, + "grad_norm": 6.628741169783307, + "learning_rate": 6.136415414762915e-06, + "loss": 17.8268, + "step": 24302 + }, + { + "epoch": 0.4442393112398779, + "grad_norm": 5.533741679658401, + "learning_rate": 6.13612714823072e-06, + "loss": 17.3195, + "step": 24303 + }, + { + "epoch": 0.4442575904363244, + "grad_norm": 6.661046720425952, + "learning_rate": 6.1358388777165e-06, + "loss": 17.4677, + "step": 24304 + }, + { + "epoch": 0.44427586963277094, + "grad_norm": 5.47171707160309, + "learning_rate": 6.1355506032212635e-06, + "loss": 17.1375, + "step": 24305 + }, + { + "epoch": 0.4442941488292175, + "grad_norm": 6.598453224838951, + "learning_rate": 6.135262324746017e-06, + "loss": 17.418, + "step": 24306 + }, + { + "epoch": 0.444312428025664, + "grad_norm": 6.932034174488093, + "learning_rate": 6.1349740422917785e-06, + "loss": 17.4595, + "step": 24307 + }, + { + "epoch": 0.44433070722211054, + "grad_norm": 6.785210180932653, + "learning_rate": 6.134685755859553e-06, + "loss": 17.5935, + "step": 24308 + }, + { + "epoch": 0.444348986418557, + "grad_norm": 6.757859135595257, + "learning_rate": 6.134397465450353e-06, + "loss": 17.3384, + "step": 24309 + }, + { + "epoch": 0.44436726561500356, + "grad_norm": 6.447510426771404, + "learning_rate": 6.1341091710651866e-06, + "loss": 17.4765, + "step": 24310 + }, + { + "epoch": 0.4443855448114501, + "grad_norm": 7.09644440450274, + "learning_rate": 6.133820872705068e-06, + "loss": 17.3937, + "step": 24311 + }, + { + "epoch": 0.44440382400789663, + "grad_norm": 5.429525403196993, + "learning_rate": 6.133532570371005e-06, + "loss": 17.2287, + "step": 24312 + }, + { + "epoch": 0.4444221032043431, + "grad_norm": 10.01770545589729, + "learning_rate": 6.133244264064007e-06, + "loss": 17.9477, + "step": 24313 + }, + { + "epoch": 0.44444038240078965, + "grad_norm": 5.171950826862728, + "learning_rate": 6.132955953785089e-06, + "loss": 16.983, + "step": 24314 + }, + { + "epoch": 0.4444586615972362, + "grad_norm": 7.792712040468101, + "learning_rate": 6.132667639535257e-06, + "loss": 17.5489, + "step": 24315 + }, + { + "epoch": 0.4444769407936827, + "grad_norm": 6.560204077919434, + "learning_rate": 6.132379321315522e-06, + "loss": 17.6316, + "step": 24316 + }, + { + "epoch": 0.44449521999012925, + "grad_norm": 5.694203920575719, + "learning_rate": 6.1320909991268984e-06, + "loss": 17.1112, + "step": 24317 + }, + { + "epoch": 0.44451349918657573, + "grad_norm": 5.907759863783654, + "learning_rate": 6.131802672970394e-06, + "loss": 17.4768, + "step": 24318 + }, + { + "epoch": 0.44453177838302227, + "grad_norm": 5.786420166290249, + "learning_rate": 6.131514342847018e-06, + "loss": 17.2686, + "step": 24319 + }, + { + "epoch": 0.4445500575794688, + "grad_norm": 5.972920879570193, + "learning_rate": 6.131226008757781e-06, + "loss": 17.2622, + "step": 24320 + }, + { + "epoch": 0.44456833677591534, + "grad_norm": 7.229629167659487, + "learning_rate": 6.1309376707036986e-06, + "loss": 17.5661, + "step": 24321 + }, + { + "epoch": 0.4445866159723619, + "grad_norm": 8.328817984606994, + "learning_rate": 6.130649328685776e-06, + "loss": 17.6267, + "step": 24322 + }, + { + "epoch": 0.44460489516880836, + "grad_norm": 6.499535407976775, + "learning_rate": 6.130360982705026e-06, + "loss": 17.4157, + "step": 24323 + }, + { + "epoch": 0.4446231743652549, + "grad_norm": 7.7690598525152765, + "learning_rate": 6.130072632762458e-06, + "loss": 18.2608, + "step": 24324 + }, + { + "epoch": 0.4446414535617014, + "grad_norm": 8.653909364164772, + "learning_rate": 6.129784278859083e-06, + "loss": 18.9854, + "step": 24325 + }, + { + "epoch": 0.44465973275814796, + "grad_norm": 6.04688392428587, + "learning_rate": 6.129495920995913e-06, + "loss": 17.3662, + "step": 24326 + }, + { + "epoch": 0.4446780119545945, + "grad_norm": 5.861343213502779, + "learning_rate": 6.129207559173958e-06, + "loss": 17.3296, + "step": 24327 + }, + { + "epoch": 0.444696291151041, + "grad_norm": 7.353721394555597, + "learning_rate": 6.128919193394231e-06, + "loss": 17.9263, + "step": 24328 + }, + { + "epoch": 0.4447145703474875, + "grad_norm": 6.899622820319202, + "learning_rate": 6.128630823657735e-06, + "loss": 17.7727, + "step": 24329 + }, + { + "epoch": 0.44473284954393405, + "grad_norm": 7.335581877620247, + "learning_rate": 6.128342449965488e-06, + "loss": 17.8733, + "step": 24330 + }, + { + "epoch": 0.4447511287403806, + "grad_norm": 5.702965774210911, + "learning_rate": 6.1280540723185e-06, + "loss": 17.1145, + "step": 24331 + }, + { + "epoch": 0.4447694079368271, + "grad_norm": 6.254112132783163, + "learning_rate": 6.127765690717781e-06, + "loss": 17.3912, + "step": 24332 + }, + { + "epoch": 0.4447876871332736, + "grad_norm": 6.160431164341467, + "learning_rate": 6.127477305164339e-06, + "loss": 17.5638, + "step": 24333 + }, + { + "epoch": 0.44480596632972014, + "grad_norm": 4.911584992933277, + "learning_rate": 6.127188915659186e-06, + "loss": 16.855, + "step": 24334 + }, + { + "epoch": 0.4448242455261667, + "grad_norm": 7.166647929728161, + "learning_rate": 6.126900522203336e-06, + "loss": 17.8046, + "step": 24335 + }, + { + "epoch": 0.4448425247226132, + "grad_norm": 7.861798426693862, + "learning_rate": 6.126612124797797e-06, + "loss": 18.479, + "step": 24336 + }, + { + "epoch": 0.44486080391905974, + "grad_norm": 6.754642858429004, + "learning_rate": 6.12632372344358e-06, + "loss": 17.7218, + "step": 24337 + }, + { + "epoch": 0.4448790831155062, + "grad_norm": 7.082069116742022, + "learning_rate": 6.126035318141694e-06, + "loss": 17.8935, + "step": 24338 + }, + { + "epoch": 0.44489736231195276, + "grad_norm": 5.984734050673375, + "learning_rate": 6.1257469088931556e-06, + "loss": 17.2154, + "step": 24339 + }, + { + "epoch": 0.4449156415083993, + "grad_norm": 6.255999503580697, + "learning_rate": 6.125458495698971e-06, + "loss": 17.597, + "step": 24340 + }, + { + "epoch": 0.44493392070484583, + "grad_norm": 5.921269898866655, + "learning_rate": 6.12517007856015e-06, + "loss": 17.6401, + "step": 24341 + }, + { + "epoch": 0.44495219990129237, + "grad_norm": 8.369976768902918, + "learning_rate": 6.124881657477707e-06, + "loss": 17.893, + "step": 24342 + }, + { + "epoch": 0.44497047909773885, + "grad_norm": 6.758463596355022, + "learning_rate": 6.124593232452652e-06, + "loss": 17.5642, + "step": 24343 + }, + { + "epoch": 0.4449887582941854, + "grad_norm": 5.48942460446099, + "learning_rate": 6.124304803485994e-06, + "loss": 17.0318, + "step": 24344 + }, + { + "epoch": 0.4450070374906319, + "grad_norm": 7.397430182434983, + "learning_rate": 6.124016370578747e-06, + "loss": 17.5278, + "step": 24345 + }, + { + "epoch": 0.44502531668707845, + "grad_norm": 6.604856253075043, + "learning_rate": 6.123727933731918e-06, + "loss": 17.6773, + "step": 24346 + }, + { + "epoch": 0.44504359588352493, + "grad_norm": 6.016318657053202, + "learning_rate": 6.1234394929465206e-06, + "loss": 17.0158, + "step": 24347 + }, + { + "epoch": 0.44506187507997147, + "grad_norm": 5.993272945402518, + "learning_rate": 6.123151048223565e-06, + "loss": 17.2964, + "step": 24348 + }, + { + "epoch": 0.445080154276418, + "grad_norm": 7.895884841218638, + "learning_rate": 6.1228625995640645e-06, + "loss": 18.2877, + "step": 24349 + }, + { + "epoch": 0.44509843347286454, + "grad_norm": 7.822172859346337, + "learning_rate": 6.122574146969026e-06, + "loss": 17.9766, + "step": 24350 + }, + { + "epoch": 0.4451167126693111, + "grad_norm": 4.894234732054747, + "learning_rate": 6.122285690439464e-06, + "loss": 16.969, + "step": 24351 + }, + { + "epoch": 0.44513499186575756, + "grad_norm": 5.545973586692187, + "learning_rate": 6.121997229976387e-06, + "loss": 17.0296, + "step": 24352 + }, + { + "epoch": 0.4451532710622041, + "grad_norm": 7.603804127291949, + "learning_rate": 6.121708765580807e-06, + "loss": 17.7488, + "step": 24353 + }, + { + "epoch": 0.44517155025865063, + "grad_norm": 6.185939227796631, + "learning_rate": 6.121420297253735e-06, + "loss": 17.534, + "step": 24354 + }, + { + "epoch": 0.44518982945509716, + "grad_norm": 5.910913781325826, + "learning_rate": 6.121131824996183e-06, + "loss": 17.3078, + "step": 24355 + }, + { + "epoch": 0.4452081086515437, + "grad_norm": 5.5531843054562, + "learning_rate": 6.1208433488091604e-06, + "loss": 17.0702, + "step": 24356 + }, + { + "epoch": 0.4452263878479902, + "grad_norm": 6.177239257868246, + "learning_rate": 6.12055486869368e-06, + "loss": 17.5976, + "step": 24357 + }, + { + "epoch": 0.4452446670444367, + "grad_norm": 6.5302201683004935, + "learning_rate": 6.1202663846507505e-06, + "loss": 17.3438, + "step": 24358 + }, + { + "epoch": 0.44526294624088325, + "grad_norm": 6.506877468055951, + "learning_rate": 6.119977896681387e-06, + "loss": 17.9006, + "step": 24359 + }, + { + "epoch": 0.4452812254373298, + "grad_norm": 7.115421182349851, + "learning_rate": 6.1196894047865964e-06, + "loss": 17.5706, + "step": 24360 + }, + { + "epoch": 0.4452995046337763, + "grad_norm": 6.869706757941675, + "learning_rate": 6.119400908967391e-06, + "loss": 17.8241, + "step": 24361 + }, + { + "epoch": 0.4453177838302228, + "grad_norm": 6.170737916919833, + "learning_rate": 6.119112409224783e-06, + "loss": 17.3096, + "step": 24362 + }, + { + "epoch": 0.44533606302666934, + "grad_norm": 6.320784406038538, + "learning_rate": 6.118823905559785e-06, + "loss": 17.5181, + "step": 24363 + }, + { + "epoch": 0.4453543422231159, + "grad_norm": 7.279107281238489, + "learning_rate": 6.1185353979734055e-06, + "loss": 17.5462, + "step": 24364 + }, + { + "epoch": 0.4453726214195624, + "grad_norm": 6.995689565213354, + "learning_rate": 6.118246886466655e-06, + "loss": 17.8895, + "step": 24365 + }, + { + "epoch": 0.44539090061600894, + "grad_norm": 7.393614652143189, + "learning_rate": 6.117958371040548e-06, + "loss": 17.9326, + "step": 24366 + }, + { + "epoch": 0.4454091798124554, + "grad_norm": 7.519159012904411, + "learning_rate": 6.1176698516960916e-06, + "loss": 17.8713, + "step": 24367 + }, + { + "epoch": 0.44542745900890196, + "grad_norm": 5.349614964005685, + "learning_rate": 6.117381328434302e-06, + "loss": 17.0897, + "step": 24368 + }, + { + "epoch": 0.4454457382053485, + "grad_norm": 5.568668906025615, + "learning_rate": 6.117092801256186e-06, + "loss": 17.0726, + "step": 24369 + }, + { + "epoch": 0.44546401740179503, + "grad_norm": 5.97316198049961, + "learning_rate": 6.1168042701627574e-06, + "loss": 17.3952, + "step": 24370 + }, + { + "epoch": 0.44548229659824157, + "grad_norm": 5.999665770267773, + "learning_rate": 6.116515735155026e-06, + "loss": 17.5104, + "step": 24371 + }, + { + "epoch": 0.44550057579468805, + "grad_norm": 7.194786978556543, + "learning_rate": 6.116227196234005e-06, + "loss": 17.5944, + "step": 24372 + }, + { + "epoch": 0.4455188549911346, + "grad_norm": 5.967959370906969, + "learning_rate": 6.115938653400705e-06, + "loss": 17.2163, + "step": 24373 + }, + { + "epoch": 0.4455371341875811, + "grad_norm": 6.690550135980042, + "learning_rate": 6.115650106656134e-06, + "loss": 17.3666, + "step": 24374 + }, + { + "epoch": 0.44555541338402765, + "grad_norm": 5.803152695053935, + "learning_rate": 6.115361556001308e-06, + "loss": 17.4216, + "step": 24375 + }, + { + "epoch": 0.4455736925804742, + "grad_norm": 6.895514055682177, + "learning_rate": 6.1150730014372375e-06, + "loss": 17.557, + "step": 24376 + }, + { + "epoch": 0.44559197177692067, + "grad_norm": 6.313370438085639, + "learning_rate": 6.114784442964932e-06, + "loss": 17.2118, + "step": 24377 + }, + { + "epoch": 0.4456102509733672, + "grad_norm": 6.0538085555126235, + "learning_rate": 6.114495880585404e-06, + "loss": 17.1377, + "step": 24378 + }, + { + "epoch": 0.44562853016981374, + "grad_norm": 6.642174924296055, + "learning_rate": 6.114207314299662e-06, + "loss": 17.888, + "step": 24379 + }, + { + "epoch": 0.4456468093662603, + "grad_norm": 7.1705988045603, + "learning_rate": 6.1139187441087246e-06, + "loss": 17.6493, + "step": 24380 + }, + { + "epoch": 0.44566508856270676, + "grad_norm": 6.5318335551960915, + "learning_rate": 6.113630170013596e-06, + "loss": 17.3605, + "step": 24381 + }, + { + "epoch": 0.4456833677591533, + "grad_norm": 7.349949186536805, + "learning_rate": 6.11334159201529e-06, + "loss": 18.0314, + "step": 24382 + }, + { + "epoch": 0.44570164695559983, + "grad_norm": 6.590783839230621, + "learning_rate": 6.11305301011482e-06, + "loss": 17.5504, + "step": 24383 + }, + { + "epoch": 0.44571992615204636, + "grad_norm": 6.166497304071057, + "learning_rate": 6.1127644243131945e-06, + "loss": 17.3252, + "step": 24384 + }, + { + "epoch": 0.4457382053484929, + "grad_norm": 7.171042314064051, + "learning_rate": 6.112475834611426e-06, + "loss": 17.6472, + "step": 24385 + }, + { + "epoch": 0.4457564845449394, + "grad_norm": 7.9448358019926735, + "learning_rate": 6.112187241010527e-06, + "loss": 18.3846, + "step": 24386 + }, + { + "epoch": 0.4457747637413859, + "grad_norm": 6.725274435039454, + "learning_rate": 6.111898643511509e-06, + "loss": 17.7046, + "step": 24387 + }, + { + "epoch": 0.44579304293783245, + "grad_norm": 6.3831455941825554, + "learning_rate": 6.111610042115381e-06, + "loss": 17.5592, + "step": 24388 + }, + { + "epoch": 0.445811322134279, + "grad_norm": 5.259620222601401, + "learning_rate": 6.111321436823157e-06, + "loss": 17.1178, + "step": 24389 + }, + { + "epoch": 0.4458296013307255, + "grad_norm": 5.712018163076865, + "learning_rate": 6.11103282763585e-06, + "loss": 17.2385, + "step": 24390 + }, + { + "epoch": 0.445847880527172, + "grad_norm": 6.489322288991205, + "learning_rate": 6.110744214554467e-06, + "loss": 17.3642, + "step": 24391 + }, + { + "epoch": 0.44586615972361854, + "grad_norm": 6.893296290587616, + "learning_rate": 6.110455597580022e-06, + "loss": 17.5147, + "step": 24392 + }, + { + "epoch": 0.4458844389200651, + "grad_norm": 5.634470184937584, + "learning_rate": 6.110166976713525e-06, + "loss": 17.1702, + "step": 24393 + }, + { + "epoch": 0.4459027181165116, + "grad_norm": 7.946888191806957, + "learning_rate": 6.109878351955992e-06, + "loss": 18.0497, + "step": 24394 + }, + { + "epoch": 0.44592099731295815, + "grad_norm": 7.873532003521282, + "learning_rate": 6.10958972330843e-06, + "loss": 18.1097, + "step": 24395 + }, + { + "epoch": 0.4459392765094046, + "grad_norm": 8.331721818258567, + "learning_rate": 6.109301090771853e-06, + "loss": 17.5412, + "step": 24396 + }, + { + "epoch": 0.44595755570585116, + "grad_norm": 6.31519033379773, + "learning_rate": 6.109012454347272e-06, + "loss": 17.3509, + "step": 24397 + }, + { + "epoch": 0.4459758349022977, + "grad_norm": 8.558776793205876, + "learning_rate": 6.108723814035697e-06, + "loss": 18.4301, + "step": 24398 + }, + { + "epoch": 0.44599411409874423, + "grad_norm": 6.507503840989197, + "learning_rate": 6.108435169838143e-06, + "loss": 17.6548, + "step": 24399 + }, + { + "epoch": 0.44601239329519077, + "grad_norm": 5.944197870978429, + "learning_rate": 6.108146521755619e-06, + "loss": 17.2269, + "step": 24400 + }, + { + "epoch": 0.44603067249163725, + "grad_norm": 5.1432332411241815, + "learning_rate": 6.107857869789139e-06, + "loss": 17.158, + "step": 24401 + }, + { + "epoch": 0.4460489516880838, + "grad_norm": 6.723672584918994, + "learning_rate": 6.107569213939712e-06, + "loss": 17.4827, + "step": 24402 + }, + { + "epoch": 0.4460672308845303, + "grad_norm": 8.294100445087008, + "learning_rate": 6.107280554208351e-06, + "loss": 17.8669, + "step": 24403 + }, + { + "epoch": 0.44608551008097685, + "grad_norm": 7.0036918377540776, + "learning_rate": 6.106991890596069e-06, + "loss": 17.6244, + "step": 24404 + }, + { + "epoch": 0.4461037892774234, + "grad_norm": 6.762475181686122, + "learning_rate": 6.106703223103876e-06, + "loss": 17.6889, + "step": 24405 + }, + { + "epoch": 0.44612206847386987, + "grad_norm": 7.7299651463420425, + "learning_rate": 6.106414551732782e-06, + "loss": 17.7296, + "step": 24406 + }, + { + "epoch": 0.4461403476703164, + "grad_norm": 5.927754323053762, + "learning_rate": 6.1061258764838025e-06, + "loss": 17.4274, + "step": 24407 + }, + { + "epoch": 0.44615862686676294, + "grad_norm": 7.176967531030489, + "learning_rate": 6.105837197357949e-06, + "loss": 17.7028, + "step": 24408 + }, + { + "epoch": 0.4461769060632095, + "grad_norm": 8.012934018334555, + "learning_rate": 6.105548514356232e-06, + "loss": 18.5327, + "step": 24409 + }, + { + "epoch": 0.446195185259656, + "grad_norm": 7.480270741527926, + "learning_rate": 6.105259827479662e-06, + "loss": 17.9369, + "step": 24410 + }, + { + "epoch": 0.4462134644561025, + "grad_norm": 6.2645173973984996, + "learning_rate": 6.104971136729253e-06, + "loss": 17.4705, + "step": 24411 + }, + { + "epoch": 0.44623174365254903, + "grad_norm": 6.237915343950155, + "learning_rate": 6.104682442106016e-06, + "loss": 17.6889, + "step": 24412 + }, + { + "epoch": 0.44625002284899556, + "grad_norm": 5.41888111081362, + "learning_rate": 6.1043937436109626e-06, + "loss": 17.0488, + "step": 24413 + }, + { + "epoch": 0.4462683020454421, + "grad_norm": 5.957470474484199, + "learning_rate": 6.104105041245106e-06, + "loss": 17.3516, + "step": 24414 + }, + { + "epoch": 0.4462865812418886, + "grad_norm": 5.838828191115028, + "learning_rate": 6.103816335009455e-06, + "loss": 17.174, + "step": 24415 + }, + { + "epoch": 0.4463048604383351, + "grad_norm": 6.069141699449847, + "learning_rate": 6.1035276249050246e-06, + "loss": 17.0595, + "step": 24416 + }, + { + "epoch": 0.44632313963478165, + "grad_norm": 7.87473920273802, + "learning_rate": 6.103238910932825e-06, + "loss": 17.7671, + "step": 24417 + }, + { + "epoch": 0.4463414188312282, + "grad_norm": 6.3488164438989685, + "learning_rate": 6.102950193093871e-06, + "loss": 17.5785, + "step": 24418 + }, + { + "epoch": 0.4463596980276747, + "grad_norm": 5.646912176149879, + "learning_rate": 6.102661471389171e-06, + "loss": 17.1245, + "step": 24419 + }, + { + "epoch": 0.4463779772241212, + "grad_norm": 5.528284778019849, + "learning_rate": 6.1023727458197355e-06, + "loss": 17.166, + "step": 24420 + }, + { + "epoch": 0.44639625642056774, + "grad_norm": 6.485533940654361, + "learning_rate": 6.102084016386583e-06, + "loss": 17.707, + "step": 24421 + }, + { + "epoch": 0.4464145356170143, + "grad_norm": 7.741099090791554, + "learning_rate": 6.101795283090721e-06, + "loss": 18.1211, + "step": 24422 + }, + { + "epoch": 0.4464328148134608, + "grad_norm": 6.9229104341689025, + "learning_rate": 6.101506545933161e-06, + "loss": 17.6524, + "step": 24423 + }, + { + "epoch": 0.44645109400990735, + "grad_norm": 6.377767345759849, + "learning_rate": 6.101217804914917e-06, + "loss": 17.6627, + "step": 24424 + }, + { + "epoch": 0.4464693732063538, + "grad_norm": 5.490576776855622, + "learning_rate": 6.1009290600369995e-06, + "loss": 17.3771, + "step": 24425 + }, + { + "epoch": 0.44648765240280036, + "grad_norm": 5.651945789549618, + "learning_rate": 6.100640311300421e-06, + "loss": 17.0331, + "step": 24426 + }, + { + "epoch": 0.4465059315992469, + "grad_norm": 5.449373409515362, + "learning_rate": 6.100351558706194e-06, + "loss": 17.0476, + "step": 24427 + }, + { + "epoch": 0.44652421079569343, + "grad_norm": 8.799801754872561, + "learning_rate": 6.100062802255331e-06, + "loss": 17.9051, + "step": 24428 + }, + { + "epoch": 0.44654248999213997, + "grad_norm": 5.901880458321972, + "learning_rate": 6.099774041948843e-06, + "loss": 17.3573, + "step": 24429 + }, + { + "epoch": 0.44656076918858645, + "grad_norm": 8.634143831220134, + "learning_rate": 6.099485277787741e-06, + "loss": 18.4925, + "step": 24430 + }, + { + "epoch": 0.446579048385033, + "grad_norm": 7.463467214231075, + "learning_rate": 6.09919650977304e-06, + "loss": 17.6599, + "step": 24431 + }, + { + "epoch": 0.4465973275814795, + "grad_norm": 5.546747571313554, + "learning_rate": 6.0989077379057516e-06, + "loss": 17.301, + "step": 24432 + }, + { + "epoch": 0.44661560677792606, + "grad_norm": 6.397673045441741, + "learning_rate": 6.098618962186884e-06, + "loss": 17.5818, + "step": 24433 + }, + { + "epoch": 0.4466338859743726, + "grad_norm": 6.2708999992759145, + "learning_rate": 6.098330182617453e-06, + "loss": 17.3073, + "step": 24434 + }, + { + "epoch": 0.44665216517081907, + "grad_norm": 6.074037112045683, + "learning_rate": 6.098041399198473e-06, + "loss": 17.3755, + "step": 24435 + }, + { + "epoch": 0.4466704443672656, + "grad_norm": 5.556667156349534, + "learning_rate": 6.097752611930951e-06, + "loss": 17.13, + "step": 24436 + }, + { + "epoch": 0.44668872356371214, + "grad_norm": 6.359632585710636, + "learning_rate": 6.097463820815901e-06, + "loss": 17.6939, + "step": 24437 + }, + { + "epoch": 0.4467070027601587, + "grad_norm": 6.201687773142437, + "learning_rate": 6.0971750258543346e-06, + "loss": 17.4796, + "step": 24438 + }, + { + "epoch": 0.4467252819566052, + "grad_norm": 5.467661989849664, + "learning_rate": 6.096886227047267e-06, + "loss": 16.8875, + "step": 24439 + }, + { + "epoch": 0.4467435611530517, + "grad_norm": 7.009486146427265, + "learning_rate": 6.0965974243957086e-06, + "loss": 17.646, + "step": 24440 + }, + { + "epoch": 0.44676184034949823, + "grad_norm": 7.013412644793047, + "learning_rate": 6.09630861790067e-06, + "loss": 17.7202, + "step": 24441 + }, + { + "epoch": 0.44678011954594476, + "grad_norm": 6.289208607952947, + "learning_rate": 6.096019807563165e-06, + "loss": 17.4421, + "step": 24442 + }, + { + "epoch": 0.4467983987423913, + "grad_norm": 6.966372039594387, + "learning_rate": 6.0957309933842065e-06, + "loss": 17.7467, + "step": 24443 + }, + { + "epoch": 0.44681667793883784, + "grad_norm": 6.863189672967621, + "learning_rate": 6.0954421753648056e-06, + "loss": 17.4474, + "step": 24444 + }, + { + "epoch": 0.4468349571352843, + "grad_norm": 4.46617239274621, + "learning_rate": 6.095153353505976e-06, + "loss": 16.6909, + "step": 24445 + }, + { + "epoch": 0.44685323633173085, + "grad_norm": 5.825656605819203, + "learning_rate": 6.094864527808727e-06, + "loss": 17.0685, + "step": 24446 + }, + { + "epoch": 0.4468715155281774, + "grad_norm": 4.714162601204519, + "learning_rate": 6.0945756982740725e-06, + "loss": 16.9861, + "step": 24447 + }, + { + "epoch": 0.4468897947246239, + "grad_norm": 6.777702908399464, + "learning_rate": 6.094286864903026e-06, + "loss": 17.6843, + "step": 24448 + }, + { + "epoch": 0.4469080739210704, + "grad_norm": 6.834913335229522, + "learning_rate": 6.0939980276966e-06, + "loss": 17.7261, + "step": 24449 + }, + { + "epoch": 0.44692635311751694, + "grad_norm": 6.745095667864185, + "learning_rate": 6.093709186655805e-06, + "loss": 17.845, + "step": 24450 + }, + { + "epoch": 0.4469446323139635, + "grad_norm": 5.955249358373833, + "learning_rate": 6.093420341781655e-06, + "loss": 17.3205, + "step": 24451 + }, + { + "epoch": 0.44696291151041, + "grad_norm": 7.981867678401039, + "learning_rate": 6.0931314930751606e-06, + "loss": 17.9401, + "step": 24452 + }, + { + "epoch": 0.44698119070685655, + "grad_norm": 7.955055418987271, + "learning_rate": 6.092842640537336e-06, + "loss": 18.6077, + "step": 24453 + }, + { + "epoch": 0.446999469903303, + "grad_norm": 8.27575083597538, + "learning_rate": 6.0925537841691906e-06, + "loss": 18.028, + "step": 24454 + }, + { + "epoch": 0.44701774909974956, + "grad_norm": 6.397660865664258, + "learning_rate": 6.092264923971742e-06, + "loss": 17.3809, + "step": 24455 + }, + { + "epoch": 0.4470360282961961, + "grad_norm": 6.529630593388632, + "learning_rate": 6.091976059945998e-06, + "loss": 17.7047, + "step": 24456 + }, + { + "epoch": 0.44705430749264263, + "grad_norm": 6.15868748906537, + "learning_rate": 6.091687192092972e-06, + "loss": 17.0119, + "step": 24457 + }, + { + "epoch": 0.44707258668908917, + "grad_norm": 7.1100094140139865, + "learning_rate": 6.091398320413679e-06, + "loss": 17.6149, + "step": 24458 + }, + { + "epoch": 0.44709086588553565, + "grad_norm": 7.13538518829046, + "learning_rate": 6.091109444909129e-06, + "loss": 17.9354, + "step": 24459 + }, + { + "epoch": 0.4471091450819822, + "grad_norm": 10.354510507253659, + "learning_rate": 6.090820565580333e-06, + "loss": 18.9472, + "step": 24460 + }, + { + "epoch": 0.4471274242784287, + "grad_norm": 7.783071261002511, + "learning_rate": 6.090531682428306e-06, + "loss": 18.0399, + "step": 24461 + }, + { + "epoch": 0.44714570347487526, + "grad_norm": 6.401935706870166, + "learning_rate": 6.090242795454062e-06, + "loss": 17.436, + "step": 24462 + }, + { + "epoch": 0.4471639826713218, + "grad_norm": 4.558443586842041, + "learning_rate": 6.089953904658612e-06, + "loss": 16.6771, + "step": 24463 + }, + { + "epoch": 0.44718226186776827, + "grad_norm": 6.583926730349258, + "learning_rate": 6.089665010042968e-06, + "loss": 17.4754, + "step": 24464 + }, + { + "epoch": 0.4472005410642148, + "grad_norm": 8.632820674805483, + "learning_rate": 6.089376111608141e-06, + "loss": 18.8808, + "step": 24465 + }, + { + "epoch": 0.44721882026066134, + "grad_norm": 7.053539542181739, + "learning_rate": 6.089087209355147e-06, + "loss": 17.3104, + "step": 24466 + }, + { + "epoch": 0.4472370994571079, + "grad_norm": 7.452025530751659, + "learning_rate": 6.088798303284995e-06, + "loss": 17.9288, + "step": 24467 + }, + { + "epoch": 0.4472553786535544, + "grad_norm": 6.336280300716763, + "learning_rate": 6.088509393398701e-06, + "loss": 17.484, + "step": 24468 + }, + { + "epoch": 0.4472736578500009, + "grad_norm": 6.085966663598267, + "learning_rate": 6.088220479697274e-06, + "loss": 16.9977, + "step": 24469 + }, + { + "epoch": 0.44729193704644743, + "grad_norm": 5.044427867867533, + "learning_rate": 6.087931562181731e-06, + "loss": 16.9515, + "step": 24470 + }, + { + "epoch": 0.44731021624289397, + "grad_norm": 7.232597980795171, + "learning_rate": 6.087642640853081e-06, + "loss": 17.8035, + "step": 24471 + }, + { + "epoch": 0.4473284954393405, + "grad_norm": 5.801893114607026, + "learning_rate": 6.087353715712337e-06, + "loss": 17.2479, + "step": 24472 + }, + { + "epoch": 0.44734677463578704, + "grad_norm": 8.394168635213568, + "learning_rate": 6.087064786760516e-06, + "loss": 18.3203, + "step": 24473 + }, + { + "epoch": 0.4473650538322335, + "grad_norm": 6.320306523985644, + "learning_rate": 6.086775853998623e-06, + "loss": 17.3289, + "step": 24474 + }, + { + "epoch": 0.44738333302868005, + "grad_norm": 8.609223173205471, + "learning_rate": 6.086486917427678e-06, + "loss": 18.4674, + "step": 24475 + }, + { + "epoch": 0.4474016122251266, + "grad_norm": 6.310444867558349, + "learning_rate": 6.086197977048689e-06, + "loss": 17.4766, + "step": 24476 + }, + { + "epoch": 0.4474198914215731, + "grad_norm": 6.807482670978645, + "learning_rate": 6.085909032862671e-06, + "loss": 17.4981, + "step": 24477 + }, + { + "epoch": 0.44743817061801966, + "grad_norm": 7.996647139645574, + "learning_rate": 6.0856200848706375e-06, + "loss": 17.9355, + "step": 24478 + }, + { + "epoch": 0.44745644981446614, + "grad_norm": 5.510315706969526, + "learning_rate": 6.085331133073596e-06, + "loss": 17.2577, + "step": 24479 + }, + { + "epoch": 0.4474747290109127, + "grad_norm": 6.080993520086858, + "learning_rate": 6.085042177472567e-06, + "loss": 17.5098, + "step": 24480 + }, + { + "epoch": 0.4474930082073592, + "grad_norm": 6.2517235184031215, + "learning_rate": 6.084753218068557e-06, + "loss": 17.4185, + "step": 24481 + }, + { + "epoch": 0.44751128740380575, + "grad_norm": 5.88677226674453, + "learning_rate": 6.084464254862582e-06, + "loss": 17.347, + "step": 24482 + }, + { + "epoch": 0.4475295666002522, + "grad_norm": 5.411578797994535, + "learning_rate": 6.084175287855654e-06, + "loss": 16.9384, + "step": 24483 + }, + { + "epoch": 0.44754784579669876, + "grad_norm": 6.045058578793596, + "learning_rate": 6.0838863170487846e-06, + "loss": 17.1605, + "step": 24484 + }, + { + "epoch": 0.4475661249931453, + "grad_norm": 6.455967636022619, + "learning_rate": 6.083597342442989e-06, + "loss": 17.7157, + "step": 24485 + }, + { + "epoch": 0.44758440418959183, + "grad_norm": 6.179426288420043, + "learning_rate": 6.083308364039279e-06, + "loss": 17.6575, + "step": 24486 + }, + { + "epoch": 0.44760268338603837, + "grad_norm": 5.652262027740546, + "learning_rate": 6.083019381838666e-06, + "loss": 17.0972, + "step": 24487 + }, + { + "epoch": 0.44762096258248485, + "grad_norm": 5.54168902868514, + "learning_rate": 6.082730395842165e-06, + "loss": 16.751, + "step": 24488 + }, + { + "epoch": 0.4476392417789314, + "grad_norm": 5.32191399402457, + "learning_rate": 6.0824414060507865e-06, + "loss": 16.9165, + "step": 24489 + }, + { + "epoch": 0.4476575209753779, + "grad_norm": 5.216734970115658, + "learning_rate": 6.082152412465546e-06, + "loss": 16.8919, + "step": 24490 + }, + { + "epoch": 0.44767580017182446, + "grad_norm": 5.817938937234099, + "learning_rate": 6.0818634150874554e-06, + "loss": 17.2652, + "step": 24491 + }, + { + "epoch": 0.447694079368271, + "grad_norm": 7.522501099547367, + "learning_rate": 6.081574413917527e-06, + "loss": 17.6549, + "step": 24492 + }, + { + "epoch": 0.44771235856471747, + "grad_norm": 5.931479171545933, + "learning_rate": 6.081285408956773e-06, + "loss": 17.3432, + "step": 24493 + }, + { + "epoch": 0.447730637761164, + "grad_norm": 4.41491681837655, + "learning_rate": 6.08099640020621e-06, + "loss": 16.6533, + "step": 24494 + }, + { + "epoch": 0.44774891695761054, + "grad_norm": 5.703115331221978, + "learning_rate": 6.080707387666847e-06, + "loss": 17.5828, + "step": 24495 + }, + { + "epoch": 0.4477671961540571, + "grad_norm": 5.684241655245052, + "learning_rate": 6.080418371339698e-06, + "loss": 17.2495, + "step": 24496 + }, + { + "epoch": 0.4477854753505036, + "grad_norm": 5.922163493167484, + "learning_rate": 6.0801293512257765e-06, + "loss": 17.3535, + "step": 24497 + }, + { + "epoch": 0.4478037545469501, + "grad_norm": 6.7646213945996605, + "learning_rate": 6.079840327326095e-06, + "loss": 17.7324, + "step": 24498 + }, + { + "epoch": 0.44782203374339663, + "grad_norm": 7.056552852321804, + "learning_rate": 6.079551299641667e-06, + "loss": 17.7943, + "step": 24499 + }, + { + "epoch": 0.44784031293984317, + "grad_norm": 6.695332757115799, + "learning_rate": 6.079262268173506e-06, + "loss": 17.8052, + "step": 24500 + }, + { + "epoch": 0.4478585921362897, + "grad_norm": 6.137749675423967, + "learning_rate": 6.078973232922625e-06, + "loss": 17.4122, + "step": 24501 + }, + { + "epoch": 0.44787687133273624, + "grad_norm": 6.932955082810483, + "learning_rate": 6.078684193890036e-06, + "loss": 17.692, + "step": 24502 + }, + { + "epoch": 0.4478951505291827, + "grad_norm": 7.238481088020135, + "learning_rate": 6.078395151076751e-06, + "loss": 17.6842, + "step": 24503 + }, + { + "epoch": 0.44791342972562925, + "grad_norm": 6.353353648148848, + "learning_rate": 6.078106104483787e-06, + "loss": 17.4092, + "step": 24504 + }, + { + "epoch": 0.4479317089220758, + "grad_norm": 5.394112992320563, + "learning_rate": 6.077817054112153e-06, + "loss": 17.0963, + "step": 24505 + }, + { + "epoch": 0.4479499881185223, + "grad_norm": 6.6213831348714125, + "learning_rate": 6.077527999962863e-06, + "loss": 17.4994, + "step": 24506 + }, + { + "epoch": 0.44796826731496886, + "grad_norm": 6.238180867135258, + "learning_rate": 6.0772389420369315e-06, + "loss": 17.5379, + "step": 24507 + }, + { + "epoch": 0.44798654651141534, + "grad_norm": 6.918233957899457, + "learning_rate": 6.076949880335373e-06, + "loss": 17.6738, + "step": 24508 + }, + { + "epoch": 0.4480048257078619, + "grad_norm": 5.578343451360341, + "learning_rate": 6.0766608148591965e-06, + "loss": 17.0726, + "step": 24509 + }, + { + "epoch": 0.4480231049043084, + "grad_norm": 6.591828819491328, + "learning_rate": 6.0763717456094185e-06, + "loss": 17.4182, + "step": 24510 + }, + { + "epoch": 0.44804138410075495, + "grad_norm": 5.434818719766534, + "learning_rate": 6.0760826725870506e-06, + "loss": 17.2368, + "step": 24511 + }, + { + "epoch": 0.4480596632972015, + "grad_norm": 6.644255578742306, + "learning_rate": 6.075793595793106e-06, + "loss": 17.6142, + "step": 24512 + }, + { + "epoch": 0.44807794249364796, + "grad_norm": 6.334112684827274, + "learning_rate": 6.075504515228597e-06, + "loss": 17.2672, + "step": 24513 + }, + { + "epoch": 0.4480962216900945, + "grad_norm": 5.539732552447868, + "learning_rate": 6.075215430894541e-06, + "loss": 17.2791, + "step": 24514 + }, + { + "epoch": 0.44811450088654103, + "grad_norm": 6.359708444980066, + "learning_rate": 6.074926342791945e-06, + "loss": 17.3534, + "step": 24515 + }, + { + "epoch": 0.44813278008298757, + "grad_norm": 6.455018272419638, + "learning_rate": 6.0746372509218264e-06, + "loss": 17.3637, + "step": 24516 + }, + { + "epoch": 0.44815105927943405, + "grad_norm": 4.894673736300759, + "learning_rate": 6.074348155285198e-06, + "loss": 16.7497, + "step": 24517 + }, + { + "epoch": 0.4481693384758806, + "grad_norm": 6.136956746908434, + "learning_rate": 6.074059055883074e-06, + "loss": 17.667, + "step": 24518 + }, + { + "epoch": 0.4481876176723271, + "grad_norm": 7.35503859221064, + "learning_rate": 6.073769952716465e-06, + "loss": 17.8807, + "step": 24519 + }, + { + "epoch": 0.44820589686877366, + "grad_norm": 7.248474537555424, + "learning_rate": 6.073480845786384e-06, + "loss": 17.9835, + "step": 24520 + }, + { + "epoch": 0.4482241760652202, + "grad_norm": 7.084398277839027, + "learning_rate": 6.073191735093848e-06, + "loss": 18.3454, + "step": 24521 + }, + { + "epoch": 0.4482424552616667, + "grad_norm": 5.520930994534029, + "learning_rate": 6.072902620639867e-06, + "loss": 17.1094, + "step": 24522 + }, + { + "epoch": 0.4482607344581132, + "grad_norm": 6.646691813263984, + "learning_rate": 6.0726135024254555e-06, + "loss": 17.3124, + "step": 24523 + }, + { + "epoch": 0.44827901365455974, + "grad_norm": 6.089075594839369, + "learning_rate": 6.072324380451626e-06, + "loss": 17.284, + "step": 24524 + }, + { + "epoch": 0.4482972928510063, + "grad_norm": 5.835320371397225, + "learning_rate": 6.072035254719394e-06, + "loss": 17.2227, + "step": 24525 + }, + { + "epoch": 0.4483155720474528, + "grad_norm": 7.1010148801621185, + "learning_rate": 6.0717461252297706e-06, + "loss": 17.6833, + "step": 24526 + }, + { + "epoch": 0.4483338512438993, + "grad_norm": 7.871774532027448, + "learning_rate": 6.071456991983771e-06, + "loss": 17.5686, + "step": 24527 + }, + { + "epoch": 0.44835213044034583, + "grad_norm": 6.50344425580137, + "learning_rate": 6.071167854982406e-06, + "loss": 17.5485, + "step": 24528 + }, + { + "epoch": 0.44837040963679237, + "grad_norm": 5.7587372362784865, + "learning_rate": 6.070878714226691e-06, + "loss": 17.1437, + "step": 24529 + }, + { + "epoch": 0.4483886888332389, + "grad_norm": 7.974223587236758, + "learning_rate": 6.07058956971764e-06, + "loss": 17.9765, + "step": 24530 + }, + { + "epoch": 0.44840696802968544, + "grad_norm": 5.616838625710783, + "learning_rate": 6.070300421456264e-06, + "loss": 17.3922, + "step": 24531 + }, + { + "epoch": 0.4484252472261319, + "grad_norm": 6.365724697268386, + "learning_rate": 6.070011269443581e-06, + "loss": 17.5502, + "step": 24532 + }, + { + "epoch": 0.44844352642257845, + "grad_norm": 5.381085605556312, + "learning_rate": 6.0697221136805975e-06, + "loss": 17.1453, + "step": 24533 + }, + { + "epoch": 0.448461805619025, + "grad_norm": 7.3249294609347375, + "learning_rate": 6.069432954168333e-06, + "loss": 17.647, + "step": 24534 + }, + { + "epoch": 0.4484800848154715, + "grad_norm": 6.86413334020217, + "learning_rate": 6.069143790907799e-06, + "loss": 17.7492, + "step": 24535 + }, + { + "epoch": 0.44849836401191806, + "grad_norm": 6.857971680791453, + "learning_rate": 6.068854623900008e-06, + "loss": 17.7351, + "step": 24536 + }, + { + "epoch": 0.44851664320836454, + "grad_norm": 6.371194485979414, + "learning_rate": 6.068565453145975e-06, + "loss": 17.5003, + "step": 24537 + }, + { + "epoch": 0.4485349224048111, + "grad_norm": 7.202973564893784, + "learning_rate": 6.068276278646711e-06, + "loss": 17.9177, + "step": 24538 + }, + { + "epoch": 0.4485532016012576, + "grad_norm": 6.680424328765258, + "learning_rate": 6.067987100403233e-06, + "loss": 17.7232, + "step": 24539 + }, + { + "epoch": 0.44857148079770415, + "grad_norm": 6.039204159314783, + "learning_rate": 6.067697918416553e-06, + "loss": 17.2954, + "step": 24540 + }, + { + "epoch": 0.4485897599941507, + "grad_norm": 5.480267338243896, + "learning_rate": 6.067408732687684e-06, + "loss": 17.2093, + "step": 24541 + }, + { + "epoch": 0.44860803919059716, + "grad_norm": 7.426914394730756, + "learning_rate": 6.06711954321764e-06, + "loss": 17.863, + "step": 24542 + }, + { + "epoch": 0.4486263183870437, + "grad_norm": 6.464402359165225, + "learning_rate": 6.066830350007435e-06, + "loss": 17.5623, + "step": 24543 + }, + { + "epoch": 0.44864459758349023, + "grad_norm": 6.824830301997114, + "learning_rate": 6.066541153058081e-06, + "loss": 17.7391, + "step": 24544 + }, + { + "epoch": 0.44866287677993677, + "grad_norm": 7.5960022816052595, + "learning_rate": 6.066251952370594e-06, + "loss": 18.0109, + "step": 24545 + }, + { + "epoch": 0.4486811559763833, + "grad_norm": 5.759280762000812, + "learning_rate": 6.0659627479459856e-06, + "loss": 17.2945, + "step": 24546 + }, + { + "epoch": 0.4486994351728298, + "grad_norm": 6.07739225476777, + "learning_rate": 6.065673539785271e-06, + "loss": 17.3572, + "step": 24547 + }, + { + "epoch": 0.4487177143692763, + "grad_norm": 7.522781197636879, + "learning_rate": 6.065384327889462e-06, + "loss": 17.8289, + "step": 24548 + }, + { + "epoch": 0.44873599356572286, + "grad_norm": 5.416111713084062, + "learning_rate": 6.065095112259575e-06, + "loss": 17.0497, + "step": 24549 + }, + { + "epoch": 0.4487542727621694, + "grad_norm": 6.2261954311888505, + "learning_rate": 6.064805892896621e-06, + "loss": 17.4546, + "step": 24550 + }, + { + "epoch": 0.4487725519586159, + "grad_norm": 6.698559746903378, + "learning_rate": 6.0645166698016145e-06, + "loss": 17.4948, + "step": 24551 + }, + { + "epoch": 0.4487908311550624, + "grad_norm": 5.989076779748443, + "learning_rate": 6.06422744297557e-06, + "loss": 17.2271, + "step": 24552 + }, + { + "epoch": 0.44880911035150894, + "grad_norm": 6.786639717361195, + "learning_rate": 6.063938212419501e-06, + "loss": 17.6503, + "step": 24553 + }, + { + "epoch": 0.4488273895479555, + "grad_norm": 7.987285151832187, + "learning_rate": 6.06364897813442e-06, + "loss": 17.8774, + "step": 24554 + }, + { + "epoch": 0.448845668744402, + "grad_norm": 7.534178762851669, + "learning_rate": 6.063359740121342e-06, + "loss": 18.2498, + "step": 24555 + }, + { + "epoch": 0.4488639479408485, + "grad_norm": 7.818544921555647, + "learning_rate": 6.063070498381281e-06, + "loss": 18.225, + "step": 24556 + }, + { + "epoch": 0.44888222713729503, + "grad_norm": 6.150402645822434, + "learning_rate": 6.0627812529152496e-06, + "loss": 17.2284, + "step": 24557 + }, + { + "epoch": 0.44890050633374157, + "grad_norm": 6.452872580875381, + "learning_rate": 6.062492003724262e-06, + "loss": 17.5387, + "step": 24558 + }, + { + "epoch": 0.4489187855301881, + "grad_norm": 5.609052918892873, + "learning_rate": 6.0622027508093325e-06, + "loss": 17.2485, + "step": 24559 + }, + { + "epoch": 0.44893706472663464, + "grad_norm": 6.278336245370838, + "learning_rate": 6.061913494171474e-06, + "loss": 17.2655, + "step": 24560 + }, + { + "epoch": 0.4489553439230811, + "grad_norm": 8.9152948236935, + "learning_rate": 6.0616242338117005e-06, + "loss": 18.1979, + "step": 24561 + }, + { + "epoch": 0.44897362311952765, + "grad_norm": 6.088876698717248, + "learning_rate": 6.0613349697310275e-06, + "loss": 17.317, + "step": 24562 + }, + { + "epoch": 0.4489919023159742, + "grad_norm": 5.977949774786967, + "learning_rate": 6.061045701930468e-06, + "loss": 17.5265, + "step": 24563 + }, + { + "epoch": 0.4490101815124207, + "grad_norm": 6.291955535729224, + "learning_rate": 6.060756430411033e-06, + "loss": 17.6719, + "step": 24564 + }, + { + "epoch": 0.44902846070886726, + "grad_norm": 5.83042788133474, + "learning_rate": 6.060467155173739e-06, + "loss": 17.2278, + "step": 24565 + }, + { + "epoch": 0.44904673990531374, + "grad_norm": 9.914272324998482, + "learning_rate": 6.0601778762196016e-06, + "loss": 17.3401, + "step": 24566 + }, + { + "epoch": 0.4490650191017603, + "grad_norm": 5.88376955223234, + "learning_rate": 6.059888593549632e-06, + "loss": 17.3663, + "step": 24567 + }, + { + "epoch": 0.4490832982982068, + "grad_norm": 6.75817276328447, + "learning_rate": 6.059599307164845e-06, + "loss": 17.7442, + "step": 24568 + }, + { + "epoch": 0.44910157749465335, + "grad_norm": 5.464164571011008, + "learning_rate": 6.059310017066254e-06, + "loss": 17.2119, + "step": 24569 + }, + { + "epoch": 0.4491198566910999, + "grad_norm": 5.066354716612397, + "learning_rate": 6.059020723254874e-06, + "loss": 17.247, + "step": 24570 + }, + { + "epoch": 0.44913813588754636, + "grad_norm": 6.02637680820274, + "learning_rate": 6.058731425731716e-06, + "loss": 17.4971, + "step": 24571 + }, + { + "epoch": 0.4491564150839929, + "grad_norm": 6.5116680623856835, + "learning_rate": 6.058442124497799e-06, + "loss": 17.6569, + "step": 24572 + }, + { + "epoch": 0.44917469428043943, + "grad_norm": 7.547813129000829, + "learning_rate": 6.058152819554134e-06, + "loss": 17.994, + "step": 24573 + }, + { + "epoch": 0.44919297347688597, + "grad_norm": 5.78438643898109, + "learning_rate": 6.057863510901733e-06, + "loss": 17.3803, + "step": 24574 + }, + { + "epoch": 0.4492112526733325, + "grad_norm": 6.207095392725289, + "learning_rate": 6.057574198541614e-06, + "loss": 17.4389, + "step": 24575 + }, + { + "epoch": 0.449229531869779, + "grad_norm": 7.965840733566035, + "learning_rate": 6.057284882474788e-06, + "loss": 18.28, + "step": 24576 + }, + { + "epoch": 0.4492478110662255, + "grad_norm": 5.2206629645349265, + "learning_rate": 6.056995562702271e-06, + "loss": 17.176, + "step": 24577 + }, + { + "epoch": 0.44926609026267206, + "grad_norm": 6.88168788536299, + "learning_rate": 6.056706239225076e-06, + "loss": 18.1214, + "step": 24578 + }, + { + "epoch": 0.4492843694591186, + "grad_norm": 6.376165550817576, + "learning_rate": 6.056416912044217e-06, + "loss": 17.6409, + "step": 24579 + }, + { + "epoch": 0.44930264865556513, + "grad_norm": 7.4322640736029, + "learning_rate": 6.0561275811607104e-06, + "loss": 17.9083, + "step": 24580 + }, + { + "epoch": 0.4493209278520116, + "grad_norm": 6.9442375255901405, + "learning_rate": 6.055838246575566e-06, + "loss": 17.6621, + "step": 24581 + }, + { + "epoch": 0.44933920704845814, + "grad_norm": 5.6862610391781265, + "learning_rate": 6.055548908289801e-06, + "loss": 17.0563, + "step": 24582 + }, + { + "epoch": 0.4493574862449047, + "grad_norm": 6.955649417451342, + "learning_rate": 6.055259566304429e-06, + "loss": 17.5888, + "step": 24583 + }, + { + "epoch": 0.4493757654413512, + "grad_norm": 6.470692818883817, + "learning_rate": 6.054970220620463e-06, + "loss": 17.3649, + "step": 24584 + }, + { + "epoch": 0.4493940446377977, + "grad_norm": 9.814720207131511, + "learning_rate": 6.054680871238918e-06, + "loss": 18.5805, + "step": 24585 + }, + { + "epoch": 0.44941232383424423, + "grad_norm": 6.234637690888976, + "learning_rate": 6.054391518160808e-06, + "loss": 17.7208, + "step": 24586 + }, + { + "epoch": 0.44943060303069077, + "grad_norm": 5.97628720273616, + "learning_rate": 6.054102161387147e-06, + "loss": 17.4372, + "step": 24587 + }, + { + "epoch": 0.4494488822271373, + "grad_norm": 7.366581984919246, + "learning_rate": 6.053812800918951e-06, + "loss": 17.609, + "step": 24588 + }, + { + "epoch": 0.44946716142358384, + "grad_norm": 6.589630624354999, + "learning_rate": 6.053523436757232e-06, + "loss": 17.6204, + "step": 24589 + }, + { + "epoch": 0.4494854406200303, + "grad_norm": 6.370969219120875, + "learning_rate": 6.053234068903004e-06, + "loss": 17.3953, + "step": 24590 + }, + { + "epoch": 0.44950371981647685, + "grad_norm": 5.747775020938252, + "learning_rate": 6.052944697357283e-06, + "loss": 17.2095, + "step": 24591 + }, + { + "epoch": 0.4495219990129234, + "grad_norm": 5.984339777999843, + "learning_rate": 6.052655322121081e-06, + "loss": 17.479, + "step": 24592 + }, + { + "epoch": 0.4495402782093699, + "grad_norm": 6.84481552137433, + "learning_rate": 6.052365943195413e-06, + "loss": 17.8045, + "step": 24593 + }, + { + "epoch": 0.44955855740581646, + "grad_norm": 7.8522367904267405, + "learning_rate": 6.0520765605812956e-06, + "loss": 18.0193, + "step": 24594 + }, + { + "epoch": 0.44957683660226294, + "grad_norm": 5.309549928513325, + "learning_rate": 6.051787174279741e-06, + "loss": 17.0146, + "step": 24595 + }, + { + "epoch": 0.4495951157987095, + "grad_norm": 5.985642368716371, + "learning_rate": 6.051497784291762e-06, + "loss": 17.5111, + "step": 24596 + }, + { + "epoch": 0.449613394995156, + "grad_norm": 6.398311933102849, + "learning_rate": 6.051208390618375e-06, + "loss": 17.4885, + "step": 24597 + }, + { + "epoch": 0.44963167419160255, + "grad_norm": 6.720363386418574, + "learning_rate": 6.050918993260595e-06, + "loss": 17.4864, + "step": 24598 + }, + { + "epoch": 0.4496499533880491, + "grad_norm": 6.057645664109545, + "learning_rate": 6.050629592219434e-06, + "loss": 17.5053, + "step": 24599 + }, + { + "epoch": 0.44966823258449556, + "grad_norm": 6.683413351050948, + "learning_rate": 6.050340187495908e-06, + "loss": 17.4866, + "step": 24600 + }, + { + "epoch": 0.4496865117809421, + "grad_norm": 6.5973144254902, + "learning_rate": 6.05005077909103e-06, + "loss": 17.6612, + "step": 24601 + }, + { + "epoch": 0.44970479097738864, + "grad_norm": 5.997397751800434, + "learning_rate": 6.049761367005815e-06, + "loss": 17.5349, + "step": 24602 + }, + { + "epoch": 0.44972307017383517, + "grad_norm": 6.240040430748273, + "learning_rate": 6.049471951241279e-06, + "loss": 17.4376, + "step": 24603 + }, + { + "epoch": 0.4497413493702817, + "grad_norm": 5.208798792321488, + "learning_rate": 6.049182531798434e-06, + "loss": 17.0191, + "step": 24604 + }, + { + "epoch": 0.4497596285667282, + "grad_norm": 5.919284012950762, + "learning_rate": 6.048893108678295e-06, + "loss": 17.2049, + "step": 24605 + }, + { + "epoch": 0.4497779077631747, + "grad_norm": 6.7964267950890385, + "learning_rate": 6.0486036818818775e-06, + "loss": 17.5385, + "step": 24606 + }, + { + "epoch": 0.44979618695962126, + "grad_norm": 7.811179831176289, + "learning_rate": 6.048314251410193e-06, + "loss": 17.5783, + "step": 24607 + }, + { + "epoch": 0.4498144661560678, + "grad_norm": 6.408143347933946, + "learning_rate": 6.048024817264261e-06, + "loss": 17.7159, + "step": 24608 + }, + { + "epoch": 0.44983274535251433, + "grad_norm": 7.157801644792308, + "learning_rate": 6.047735379445092e-06, + "loss": 17.6406, + "step": 24609 + }, + { + "epoch": 0.4498510245489608, + "grad_norm": 6.714112451459579, + "learning_rate": 6.047445937953701e-06, + "loss": 17.6858, + "step": 24610 + }, + { + "epoch": 0.44986930374540735, + "grad_norm": 5.638619957228446, + "learning_rate": 6.047156492791102e-06, + "loss": 17.0898, + "step": 24611 + }, + { + "epoch": 0.4498875829418539, + "grad_norm": 6.660373547952849, + "learning_rate": 6.046867043958311e-06, + "loss": 18.0357, + "step": 24612 + }, + { + "epoch": 0.4499058621383004, + "grad_norm": 5.914512935480703, + "learning_rate": 6.046577591456343e-06, + "loss": 17.0058, + "step": 24613 + }, + { + "epoch": 0.44992414133474695, + "grad_norm": 6.288417485868926, + "learning_rate": 6.0462881352862115e-06, + "loss": 17.5398, + "step": 24614 + }, + { + "epoch": 0.44994242053119343, + "grad_norm": 5.8650711695290525, + "learning_rate": 6.045998675448927e-06, + "loss": 17.2077, + "step": 24615 + }, + { + "epoch": 0.44996069972763997, + "grad_norm": 5.990436894644584, + "learning_rate": 6.045709211945512e-06, + "loss": 17.223, + "step": 24616 + }, + { + "epoch": 0.4499789789240865, + "grad_norm": 7.495889648121055, + "learning_rate": 6.045419744776976e-06, + "loss": 18.0186, + "step": 24617 + }, + { + "epoch": 0.44999725812053304, + "grad_norm": 8.213885327731585, + "learning_rate": 6.045130273944334e-06, + "loss": 18.2507, + "step": 24618 + }, + { + "epoch": 0.4500155373169795, + "grad_norm": 6.141209230567598, + "learning_rate": 6.044840799448602e-06, + "loss": 17.2438, + "step": 24619 + }, + { + "epoch": 0.45003381651342605, + "grad_norm": 5.854929778397421, + "learning_rate": 6.044551321290791e-06, + "loss": 17.3367, + "step": 24620 + }, + { + "epoch": 0.4500520957098726, + "grad_norm": 8.045059826317729, + "learning_rate": 6.044261839471921e-06, + "loss": 17.5542, + "step": 24621 + }, + { + "epoch": 0.4500703749063191, + "grad_norm": 5.220343005622515, + "learning_rate": 6.043972353993004e-06, + "loss": 17.014, + "step": 24622 + }, + { + "epoch": 0.45008865410276566, + "grad_norm": 5.407247716345242, + "learning_rate": 6.043682864855053e-06, + "loss": 17.0192, + "step": 24623 + }, + { + "epoch": 0.45010693329921214, + "grad_norm": 6.97528893326854, + "learning_rate": 6.0433933720590845e-06, + "loss": 17.7547, + "step": 24624 + }, + { + "epoch": 0.4501252124956587, + "grad_norm": 6.01521567472642, + "learning_rate": 6.0431038756061135e-06, + "loss": 17.1485, + "step": 24625 + }, + { + "epoch": 0.4501434916921052, + "grad_norm": 5.651430175005524, + "learning_rate": 6.0428143754971526e-06, + "loss": 17.0379, + "step": 24626 + }, + { + "epoch": 0.45016177088855175, + "grad_norm": 6.778124048190056, + "learning_rate": 6.042524871733218e-06, + "loss": 17.4037, + "step": 24627 + }, + { + "epoch": 0.4501800500849983, + "grad_norm": 5.674727814042923, + "learning_rate": 6.042235364315325e-06, + "loss": 17.1581, + "step": 24628 + }, + { + "epoch": 0.45019832928144476, + "grad_norm": 6.423002753043599, + "learning_rate": 6.0419458532444875e-06, + "loss": 17.5022, + "step": 24629 + }, + { + "epoch": 0.4502166084778913, + "grad_norm": 7.302713204386245, + "learning_rate": 6.04165633852172e-06, + "loss": 17.8249, + "step": 24630 + }, + { + "epoch": 0.45023488767433784, + "grad_norm": 7.458820722608766, + "learning_rate": 6.041366820148037e-06, + "loss": 18.0166, + "step": 24631 + }, + { + "epoch": 0.45025316687078437, + "grad_norm": 6.194179028173427, + "learning_rate": 6.0410772981244555e-06, + "loss": 17.6362, + "step": 24632 + }, + { + "epoch": 0.4502714460672309, + "grad_norm": 7.4911200748010245, + "learning_rate": 6.040787772451986e-06, + "loss": 17.8867, + "step": 24633 + }, + { + "epoch": 0.4502897252636774, + "grad_norm": 6.057510729245927, + "learning_rate": 6.040498243131646e-06, + "loss": 17.2536, + "step": 24634 + }, + { + "epoch": 0.4503080044601239, + "grad_norm": 8.547528988042247, + "learning_rate": 6.040208710164451e-06, + "loss": 17.9104, + "step": 24635 + }, + { + "epoch": 0.45032628365657046, + "grad_norm": 6.650799585296418, + "learning_rate": 6.0399191735514154e-06, + "loss": 17.4979, + "step": 24636 + }, + { + "epoch": 0.450344562853017, + "grad_norm": 5.931705650138957, + "learning_rate": 6.039629633293552e-06, + "loss": 17.0645, + "step": 24637 + }, + { + "epoch": 0.45036284204946353, + "grad_norm": 6.823833292013225, + "learning_rate": 6.039340089391876e-06, + "loss": 17.4026, + "step": 24638 + }, + { + "epoch": 0.45038112124591, + "grad_norm": 6.347875714287686, + "learning_rate": 6.039050541847405e-06, + "loss": 17.552, + "step": 24639 + }, + { + "epoch": 0.45039940044235655, + "grad_norm": 6.138020966787515, + "learning_rate": 6.038760990661151e-06, + "loss": 17.2043, + "step": 24640 + }, + { + "epoch": 0.4504176796388031, + "grad_norm": 6.06862788956139, + "learning_rate": 6.03847143583413e-06, + "loss": 17.286, + "step": 24641 + }, + { + "epoch": 0.4504359588352496, + "grad_norm": 6.661539874719218, + "learning_rate": 6.038181877367358e-06, + "loss": 17.328, + "step": 24642 + }, + { + "epoch": 0.45045423803169615, + "grad_norm": 6.225700155227202, + "learning_rate": 6.037892315261847e-06, + "loss": 17.5815, + "step": 24643 + }, + { + "epoch": 0.45047251722814263, + "grad_norm": 6.21853811677487, + "learning_rate": 6.037602749518614e-06, + "loss": 17.4411, + "step": 24644 + }, + { + "epoch": 0.45049079642458917, + "grad_norm": 6.902656552265081, + "learning_rate": 6.0373131801386734e-06, + "loss": 17.6042, + "step": 24645 + }, + { + "epoch": 0.4505090756210357, + "grad_norm": 7.846094296385334, + "learning_rate": 6.0370236071230414e-06, + "loss": 17.962, + "step": 24646 + }, + { + "epoch": 0.45052735481748224, + "grad_norm": 5.717781758447652, + "learning_rate": 6.036734030472729e-06, + "loss": 17.1185, + "step": 24647 + }, + { + "epoch": 0.4505456340139288, + "grad_norm": 4.763707573529443, + "learning_rate": 6.036444450188755e-06, + "loss": 16.8422, + "step": 24648 + }, + { + "epoch": 0.45056391321037526, + "grad_norm": 6.732233461299591, + "learning_rate": 6.036154866272135e-06, + "loss": 17.6115, + "step": 24649 + }, + { + "epoch": 0.4505821924068218, + "grad_norm": 5.251545990872237, + "learning_rate": 6.03586527872388e-06, + "loss": 17.0011, + "step": 24650 + }, + { + "epoch": 0.4506004716032683, + "grad_norm": 6.707542053326857, + "learning_rate": 6.035575687545008e-06, + "loss": 17.874, + "step": 24651 + }, + { + "epoch": 0.45061875079971486, + "grad_norm": 5.854115273020799, + "learning_rate": 6.035286092736532e-06, + "loss": 17.3612, + "step": 24652 + }, + { + "epoch": 0.45063702999616134, + "grad_norm": 6.948975364826289, + "learning_rate": 6.0349964942994685e-06, + "loss": 17.805, + "step": 24653 + }, + { + "epoch": 0.4506553091926079, + "grad_norm": 6.999709866000698, + "learning_rate": 6.034706892234833e-06, + "loss": 17.9041, + "step": 24654 + }, + { + "epoch": 0.4506735883890544, + "grad_norm": 7.182306356613054, + "learning_rate": 6.034417286543639e-06, + "loss": 17.9241, + "step": 24655 + }, + { + "epoch": 0.45069186758550095, + "grad_norm": 6.594254229495999, + "learning_rate": 6.034127677226902e-06, + "loss": 17.443, + "step": 24656 + }, + { + "epoch": 0.4507101467819475, + "grad_norm": 5.478485975266605, + "learning_rate": 6.033838064285638e-06, + "loss": 17.2909, + "step": 24657 + }, + { + "epoch": 0.45072842597839397, + "grad_norm": 6.41011908912437, + "learning_rate": 6.03354844772086e-06, + "loss": 17.5707, + "step": 24658 + }, + { + "epoch": 0.4507467051748405, + "grad_norm": 8.414537097297394, + "learning_rate": 6.033258827533586e-06, + "loss": 18.035, + "step": 24659 + }, + { + "epoch": 0.45076498437128704, + "grad_norm": 7.001185991744438, + "learning_rate": 6.032969203724828e-06, + "loss": 17.7875, + "step": 24660 + }, + { + "epoch": 0.45078326356773357, + "grad_norm": 6.089805242115694, + "learning_rate": 6.032679576295603e-06, + "loss": 17.1514, + "step": 24661 + }, + { + "epoch": 0.4508015427641801, + "grad_norm": 5.927690599311975, + "learning_rate": 6.032389945246925e-06, + "loss": 17.3021, + "step": 24662 + }, + { + "epoch": 0.4508198219606266, + "grad_norm": 6.095567075875497, + "learning_rate": 6.032100310579812e-06, + "loss": 17.4224, + "step": 24663 + }, + { + "epoch": 0.4508381011570731, + "grad_norm": 4.9729292296281935, + "learning_rate": 6.031810672295275e-06, + "loss": 16.9422, + "step": 24664 + }, + { + "epoch": 0.45085638035351966, + "grad_norm": 7.196499713694455, + "learning_rate": 6.03152103039433e-06, + "loss": 18.2755, + "step": 24665 + }, + { + "epoch": 0.4508746595499662, + "grad_norm": 6.45490234704856, + "learning_rate": 6.0312313848779965e-06, + "loss": 17.451, + "step": 24666 + }, + { + "epoch": 0.45089293874641273, + "grad_norm": 6.236169856996404, + "learning_rate": 6.030941735747285e-06, + "loss": 17.2154, + "step": 24667 + }, + { + "epoch": 0.4509112179428592, + "grad_norm": 6.194245727873289, + "learning_rate": 6.0306520830032124e-06, + "loss": 17.4885, + "step": 24668 + }, + { + "epoch": 0.45092949713930575, + "grad_norm": 6.4940184288310565, + "learning_rate": 6.030362426646793e-06, + "loss": 17.4401, + "step": 24669 + }, + { + "epoch": 0.4509477763357523, + "grad_norm": 6.039570888625534, + "learning_rate": 6.030072766679044e-06, + "loss": 17.329, + "step": 24670 + }, + { + "epoch": 0.4509660555321988, + "grad_norm": 7.073103273252511, + "learning_rate": 6.029783103100978e-06, + "loss": 17.642, + "step": 24671 + }, + { + "epoch": 0.45098433472864535, + "grad_norm": 8.236292636508232, + "learning_rate": 6.029493435913611e-06, + "loss": 18.2369, + "step": 24672 + }, + { + "epoch": 0.45100261392509183, + "grad_norm": 7.841646819805741, + "learning_rate": 6.029203765117961e-06, + "loss": 17.8693, + "step": 24673 + }, + { + "epoch": 0.45102089312153837, + "grad_norm": 5.856147196076327, + "learning_rate": 6.02891409071504e-06, + "loss": 17.5565, + "step": 24674 + }, + { + "epoch": 0.4510391723179849, + "grad_norm": 7.68190603764835, + "learning_rate": 6.028624412705863e-06, + "loss": 17.8626, + "step": 24675 + }, + { + "epoch": 0.45105745151443144, + "grad_norm": 5.343253256839253, + "learning_rate": 6.0283347310914485e-06, + "loss": 17.0762, + "step": 24676 + }, + { + "epoch": 0.451075730710878, + "grad_norm": 5.886742429105178, + "learning_rate": 6.028045045872811e-06, + "loss": 17.1906, + "step": 24677 + }, + { + "epoch": 0.45109400990732446, + "grad_norm": 7.367978120929744, + "learning_rate": 6.027755357050964e-06, + "loss": 17.5718, + "step": 24678 + }, + { + "epoch": 0.451112289103771, + "grad_norm": 7.035319286772534, + "learning_rate": 6.0274656646269215e-06, + "loss": 17.4689, + "step": 24679 + }, + { + "epoch": 0.4511305683002175, + "grad_norm": 5.245110714669578, + "learning_rate": 6.027175968601704e-06, + "loss": 16.8598, + "step": 24680 + }, + { + "epoch": 0.45114884749666406, + "grad_norm": 7.052759396033848, + "learning_rate": 6.026886268976322e-06, + "loss": 17.6692, + "step": 24681 + }, + { + "epoch": 0.4511671266931106, + "grad_norm": 6.680488552848285, + "learning_rate": 6.026596565751794e-06, + "loss": 17.6147, + "step": 24682 + }, + { + "epoch": 0.4511854058895571, + "grad_norm": 8.496915746510703, + "learning_rate": 6.026306858929133e-06, + "loss": 18.4555, + "step": 24683 + }, + { + "epoch": 0.4512036850860036, + "grad_norm": 6.630456798807599, + "learning_rate": 6.026017148509355e-06, + "loss": 17.6023, + "step": 24684 + }, + { + "epoch": 0.45122196428245015, + "grad_norm": 8.436450617612355, + "learning_rate": 6.025727434493477e-06, + "loss": 17.9402, + "step": 24685 + }, + { + "epoch": 0.4512402434788967, + "grad_norm": 6.927448939750945, + "learning_rate": 6.025437716882513e-06, + "loss": 17.8359, + "step": 24686 + }, + { + "epoch": 0.45125852267534317, + "grad_norm": 6.830529479138461, + "learning_rate": 6.02514799567748e-06, + "loss": 17.8449, + "step": 24687 + }, + { + "epoch": 0.4512768018717897, + "grad_norm": 4.461113871469783, + "learning_rate": 6.02485827087939e-06, + "loss": 16.8073, + "step": 24688 + }, + { + "epoch": 0.45129508106823624, + "grad_norm": 6.770820637432052, + "learning_rate": 6.024568542489262e-06, + "loss": 17.6159, + "step": 24689 + }, + { + "epoch": 0.4513133602646828, + "grad_norm": 6.644486299362443, + "learning_rate": 6.0242788105081106e-06, + "loss": 17.5468, + "step": 24690 + }, + { + "epoch": 0.4513316394611293, + "grad_norm": 7.095053551366182, + "learning_rate": 6.023989074936951e-06, + "loss": 17.7514, + "step": 24691 + }, + { + "epoch": 0.4513499186575758, + "grad_norm": 5.998084138138065, + "learning_rate": 6.0236993357767955e-06, + "loss": 17.3063, + "step": 24692 + }, + { + "epoch": 0.4513681978540223, + "grad_norm": 7.585781276479587, + "learning_rate": 6.023409593028666e-06, + "loss": 18.1675, + "step": 24693 + }, + { + "epoch": 0.45138647705046886, + "grad_norm": 6.2665781415529365, + "learning_rate": 6.0231198466935745e-06, + "loss": 17.5643, + "step": 24694 + }, + { + "epoch": 0.4514047562469154, + "grad_norm": 7.750558703464514, + "learning_rate": 6.0228300967725365e-06, + "loss": 17.747, + "step": 24695 + }, + { + "epoch": 0.45142303544336193, + "grad_norm": 7.602132480396472, + "learning_rate": 6.022540343266566e-06, + "loss": 17.7959, + "step": 24696 + }, + { + "epoch": 0.4514413146398084, + "grad_norm": 7.7349738302042566, + "learning_rate": 6.022250586176683e-06, + "loss": 17.7687, + "step": 24697 + }, + { + "epoch": 0.45145959383625495, + "grad_norm": 6.355945855864465, + "learning_rate": 6.021960825503897e-06, + "loss": 17.4123, + "step": 24698 + }, + { + "epoch": 0.4514778730327015, + "grad_norm": 6.6578356518514, + "learning_rate": 6.021671061249229e-06, + "loss": 17.4073, + "step": 24699 + }, + { + "epoch": 0.451496152229148, + "grad_norm": 5.713505249244532, + "learning_rate": 6.021381293413693e-06, + "loss": 17.1641, + "step": 24700 + }, + { + "epoch": 0.45151443142559455, + "grad_norm": 5.624758350692372, + "learning_rate": 6.021091521998304e-06, + "loss": 17.1406, + "step": 24701 + }, + { + "epoch": 0.45153271062204103, + "grad_norm": 5.948590414234612, + "learning_rate": 6.020801747004077e-06, + "loss": 17.2232, + "step": 24702 + }, + { + "epoch": 0.45155098981848757, + "grad_norm": 6.155157389616313, + "learning_rate": 6.020511968432029e-06, + "loss": 17.4346, + "step": 24703 + }, + { + "epoch": 0.4515692690149341, + "grad_norm": 5.474975628502764, + "learning_rate": 6.020222186283175e-06, + "loss": 17.1878, + "step": 24704 + }, + { + "epoch": 0.45158754821138064, + "grad_norm": 5.084933908634606, + "learning_rate": 6.019932400558531e-06, + "loss": 17.007, + "step": 24705 + }, + { + "epoch": 0.4516058274078272, + "grad_norm": 5.23240276581489, + "learning_rate": 6.019642611259111e-06, + "loss": 17.048, + "step": 24706 + }, + { + "epoch": 0.45162410660427366, + "grad_norm": 6.451885620662344, + "learning_rate": 6.019352818385934e-06, + "loss": 17.3354, + "step": 24707 + }, + { + "epoch": 0.4516423858007202, + "grad_norm": 8.407424004543389, + "learning_rate": 6.019063021940014e-06, + "loss": 17.9548, + "step": 24708 + }, + { + "epoch": 0.4516606649971667, + "grad_norm": 5.788778329765934, + "learning_rate": 6.018773221922366e-06, + "loss": 17.2667, + "step": 24709 + }, + { + "epoch": 0.45167894419361326, + "grad_norm": 6.064469964372707, + "learning_rate": 6.018483418334006e-06, + "loss": 17.3951, + "step": 24710 + }, + { + "epoch": 0.4516972233900598, + "grad_norm": 8.66837895395117, + "learning_rate": 6.01819361117595e-06, + "loss": 18.1703, + "step": 24711 + }, + { + "epoch": 0.4517155025865063, + "grad_norm": 6.830167778453176, + "learning_rate": 6.0179038004492144e-06, + "loss": 17.4709, + "step": 24712 + }, + { + "epoch": 0.4517337817829528, + "grad_norm": 6.965211136666187, + "learning_rate": 6.017613986154813e-06, + "loss": 17.6896, + "step": 24713 + }, + { + "epoch": 0.45175206097939935, + "grad_norm": 6.262331316183803, + "learning_rate": 6.017324168293763e-06, + "loss": 17.2691, + "step": 24714 + }, + { + "epoch": 0.4517703401758459, + "grad_norm": 8.443516784987569, + "learning_rate": 6.017034346867081e-06, + "loss": 17.7252, + "step": 24715 + }, + { + "epoch": 0.4517886193722924, + "grad_norm": 6.064474784741955, + "learning_rate": 6.016744521875782e-06, + "loss": 17.6041, + "step": 24716 + }, + { + "epoch": 0.4518068985687389, + "grad_norm": 7.668158594365066, + "learning_rate": 6.01645469332088e-06, + "loss": 18.2217, + "step": 24717 + }, + { + "epoch": 0.45182517776518544, + "grad_norm": 6.322105330152482, + "learning_rate": 6.016164861203395e-06, + "loss": 17.5224, + "step": 24718 + }, + { + "epoch": 0.451843456961632, + "grad_norm": 7.438910802051069, + "learning_rate": 6.015875025524338e-06, + "loss": 18.0336, + "step": 24719 + }, + { + "epoch": 0.4518617361580785, + "grad_norm": 6.704452739777389, + "learning_rate": 6.015585186284728e-06, + "loss": 17.4795, + "step": 24720 + }, + { + "epoch": 0.451880015354525, + "grad_norm": 6.718555993230533, + "learning_rate": 6.015295343485581e-06, + "loss": 17.8028, + "step": 24721 + }, + { + "epoch": 0.4518982945509715, + "grad_norm": 5.925313793829689, + "learning_rate": 6.015005497127911e-06, + "loss": 17.2797, + "step": 24722 + }, + { + "epoch": 0.45191657374741806, + "grad_norm": 6.746912088623491, + "learning_rate": 6.014715647212736e-06, + "loss": 17.4472, + "step": 24723 + }, + { + "epoch": 0.4519348529438646, + "grad_norm": 6.928342340989377, + "learning_rate": 6.014425793741068e-06, + "loss": 17.8301, + "step": 24724 + }, + { + "epoch": 0.45195313214031113, + "grad_norm": 6.042923310933138, + "learning_rate": 6.014135936713928e-06, + "loss": 17.3002, + "step": 24725 + }, + { + "epoch": 0.4519714113367576, + "grad_norm": 8.578425695372713, + "learning_rate": 6.013846076132329e-06, + "loss": 18.3965, + "step": 24726 + }, + { + "epoch": 0.45198969053320415, + "grad_norm": 6.111551031387784, + "learning_rate": 6.013556211997286e-06, + "loss": 17.0901, + "step": 24727 + }, + { + "epoch": 0.4520079697296507, + "grad_norm": 5.5082290783231045, + "learning_rate": 6.01326634430982e-06, + "loss": 17.04, + "step": 24728 + }, + { + "epoch": 0.4520262489260972, + "grad_norm": 4.90541389879834, + "learning_rate": 6.01297647307094e-06, + "loss": 17.063, + "step": 24729 + }, + { + "epoch": 0.45204452812254375, + "grad_norm": 5.598993418730989, + "learning_rate": 6.012686598281666e-06, + "loss": 17.3291, + "step": 24730 + }, + { + "epoch": 0.45206280731899023, + "grad_norm": 7.057060538631161, + "learning_rate": 6.012396719943014e-06, + "loss": 17.4468, + "step": 24731 + }, + { + "epoch": 0.45208108651543677, + "grad_norm": 6.644628032127668, + "learning_rate": 6.012106838056001e-06, + "loss": 17.9096, + "step": 24732 + }, + { + "epoch": 0.4520993657118833, + "grad_norm": 6.235272183127271, + "learning_rate": 6.011816952621639e-06, + "loss": 17.4477, + "step": 24733 + }, + { + "epoch": 0.45211764490832984, + "grad_norm": 7.395452813459593, + "learning_rate": 6.011527063640946e-06, + "loss": 18.0175, + "step": 24734 + }, + { + "epoch": 0.4521359241047764, + "grad_norm": 6.41941944832454, + "learning_rate": 6.011237171114941e-06, + "loss": 17.355, + "step": 24735 + }, + { + "epoch": 0.45215420330122286, + "grad_norm": 6.074373312847202, + "learning_rate": 6.010947275044635e-06, + "loss": 17.3462, + "step": 24736 + }, + { + "epoch": 0.4521724824976694, + "grad_norm": 7.89500808482335, + "learning_rate": 6.010657375431047e-06, + "loss": 18.4106, + "step": 24737 + }, + { + "epoch": 0.45219076169411593, + "grad_norm": 7.327090196334108, + "learning_rate": 6.010367472275192e-06, + "loss": 17.7476, + "step": 24738 + }, + { + "epoch": 0.45220904089056246, + "grad_norm": 5.418767125407057, + "learning_rate": 6.010077565578088e-06, + "loss": 17.0018, + "step": 24739 + }, + { + "epoch": 0.452227320087009, + "grad_norm": 5.740834123665435, + "learning_rate": 6.009787655340751e-06, + "loss": 17.1866, + "step": 24740 + }, + { + "epoch": 0.4522455992834555, + "grad_norm": 5.682718616324278, + "learning_rate": 6.009497741564194e-06, + "loss": 17.313, + "step": 24741 + }, + { + "epoch": 0.452263878479902, + "grad_norm": 5.373888396178508, + "learning_rate": 6.009207824249435e-06, + "loss": 17.0231, + "step": 24742 + }, + { + "epoch": 0.45228215767634855, + "grad_norm": 6.381700126000964, + "learning_rate": 6.008917903397491e-06, + "loss": 17.6324, + "step": 24743 + }, + { + "epoch": 0.4523004368727951, + "grad_norm": 5.812652938728273, + "learning_rate": 6.008627979009376e-06, + "loss": 17.3316, + "step": 24744 + }, + { + "epoch": 0.4523187160692416, + "grad_norm": 8.068093935386853, + "learning_rate": 6.008338051086109e-06, + "loss": 18.2421, + "step": 24745 + }, + { + "epoch": 0.4523369952656881, + "grad_norm": 6.458507344262246, + "learning_rate": 6.008048119628705e-06, + "loss": 17.3915, + "step": 24746 + }, + { + "epoch": 0.45235527446213464, + "grad_norm": 6.150373670861808, + "learning_rate": 6.007758184638177e-06, + "loss": 17.2885, + "step": 24747 + }, + { + "epoch": 0.4523735536585812, + "grad_norm": 5.755699726914589, + "learning_rate": 6.007468246115545e-06, + "loss": 17.2085, + "step": 24748 + }, + { + "epoch": 0.4523918328550277, + "grad_norm": 7.056788174499082, + "learning_rate": 6.007178304061827e-06, + "loss": 17.987, + "step": 24749 + }, + { + "epoch": 0.45241011205147424, + "grad_norm": 6.359862268820703, + "learning_rate": 6.0068883584780336e-06, + "loss": 17.4381, + "step": 24750 + }, + { + "epoch": 0.4524283912479207, + "grad_norm": 6.187298269703934, + "learning_rate": 6.006598409365185e-06, + "loss": 17.451, + "step": 24751 + }, + { + "epoch": 0.45244667044436726, + "grad_norm": 5.255808732439555, + "learning_rate": 6.006308456724296e-06, + "loss": 17.0127, + "step": 24752 + }, + { + "epoch": 0.4524649496408138, + "grad_norm": 5.954886878936499, + "learning_rate": 6.006018500556383e-06, + "loss": 17.5082, + "step": 24753 + }, + { + "epoch": 0.45248322883726033, + "grad_norm": 7.1927315341034905, + "learning_rate": 6.005728540862462e-06, + "loss": 17.5332, + "step": 24754 + }, + { + "epoch": 0.4525015080337068, + "grad_norm": 7.240682473726163, + "learning_rate": 6.005438577643551e-06, + "loss": 17.4798, + "step": 24755 + }, + { + "epoch": 0.45251978723015335, + "grad_norm": 7.584413677100589, + "learning_rate": 6.005148610900664e-06, + "loss": 18.0481, + "step": 24756 + }, + { + "epoch": 0.4525380664265999, + "grad_norm": 5.44395975670243, + "learning_rate": 6.004858640634819e-06, + "loss": 16.9502, + "step": 24757 + }, + { + "epoch": 0.4525563456230464, + "grad_norm": 6.467104458156888, + "learning_rate": 6.00456866684703e-06, + "loss": 17.4797, + "step": 24758 + }, + { + "epoch": 0.45257462481949295, + "grad_norm": 6.479292435086554, + "learning_rate": 6.004278689538319e-06, + "loss": 17.5469, + "step": 24759 + }, + { + "epoch": 0.45259290401593943, + "grad_norm": 6.200094013034941, + "learning_rate": 6.003988708709694e-06, + "loss": 17.6831, + "step": 24760 + }, + { + "epoch": 0.45261118321238597, + "grad_norm": 6.3042271961249305, + "learning_rate": 6.003698724362177e-06, + "loss": 17.4735, + "step": 24761 + }, + { + "epoch": 0.4526294624088325, + "grad_norm": 6.796098419122103, + "learning_rate": 6.003408736496784e-06, + "loss": 17.4488, + "step": 24762 + }, + { + "epoch": 0.45264774160527904, + "grad_norm": 6.787563180152649, + "learning_rate": 6.0031187451145314e-06, + "loss": 17.9, + "step": 24763 + }, + { + "epoch": 0.4526660208017256, + "grad_norm": 5.483334221140463, + "learning_rate": 6.002828750216433e-06, + "loss": 17.3169, + "step": 24764 + }, + { + "epoch": 0.45268429999817206, + "grad_norm": 5.447648707127053, + "learning_rate": 6.002538751803505e-06, + "loss": 17.0144, + "step": 24765 + }, + { + "epoch": 0.4527025791946186, + "grad_norm": 7.920318139622635, + "learning_rate": 6.002248749876769e-06, + "loss": 17.6761, + "step": 24766 + }, + { + "epoch": 0.45272085839106513, + "grad_norm": 6.695960442495997, + "learning_rate": 6.001958744437237e-06, + "loss": 17.4054, + "step": 24767 + }, + { + "epoch": 0.45273913758751166, + "grad_norm": 5.701863915732816, + "learning_rate": 6.001668735485926e-06, + "loss": 17.2778, + "step": 24768 + }, + { + "epoch": 0.4527574167839582, + "grad_norm": 6.932504052318536, + "learning_rate": 6.001378723023854e-06, + "loss": 17.5723, + "step": 24769 + }, + { + "epoch": 0.4527756959804047, + "grad_norm": 7.430116870954375, + "learning_rate": 6.001088707052035e-06, + "loss": 17.4729, + "step": 24770 + }, + { + "epoch": 0.4527939751768512, + "grad_norm": 5.385734016428016, + "learning_rate": 6.000798687571487e-06, + "loss": 17.1351, + "step": 24771 + }, + { + "epoch": 0.45281225437329775, + "grad_norm": 5.242606336630852, + "learning_rate": 6.0005086645832276e-06, + "loss": 16.9974, + "step": 24772 + }, + { + "epoch": 0.4528305335697443, + "grad_norm": 6.5685110889576475, + "learning_rate": 6.000218638088273e-06, + "loss": 17.6213, + "step": 24773 + }, + { + "epoch": 0.4528488127661908, + "grad_norm": 5.716236414320557, + "learning_rate": 5.999928608087637e-06, + "loss": 17.3367, + "step": 24774 + }, + { + "epoch": 0.4528670919626373, + "grad_norm": 8.564552218972441, + "learning_rate": 5.999638574582338e-06, + "loss": 18.2329, + "step": 24775 + }, + { + "epoch": 0.45288537115908384, + "grad_norm": 7.301717290044244, + "learning_rate": 5.999348537573394e-06, + "loss": 17.4011, + "step": 24776 + }, + { + "epoch": 0.4529036503555304, + "grad_norm": 6.38934496240644, + "learning_rate": 5.99905849706182e-06, + "loss": 17.6334, + "step": 24777 + }, + { + "epoch": 0.4529219295519769, + "grad_norm": 5.2947761883209905, + "learning_rate": 5.998768453048632e-06, + "loss": 17.122, + "step": 24778 + }, + { + "epoch": 0.45294020874842345, + "grad_norm": 8.074965858889485, + "learning_rate": 5.998478405534845e-06, + "loss": 17.8879, + "step": 24779 + }, + { + "epoch": 0.4529584879448699, + "grad_norm": 6.699026954328017, + "learning_rate": 5.998188354521481e-06, + "loss": 17.7634, + "step": 24780 + }, + { + "epoch": 0.45297676714131646, + "grad_norm": 6.182307564133946, + "learning_rate": 5.997898300009554e-06, + "loss": 17.4829, + "step": 24781 + }, + { + "epoch": 0.452995046337763, + "grad_norm": 6.368049414384359, + "learning_rate": 5.997608242000078e-06, + "loss": 17.3981, + "step": 24782 + }, + { + "epoch": 0.45301332553420953, + "grad_norm": 6.191705584338558, + "learning_rate": 5.997318180494071e-06, + "loss": 17.2676, + "step": 24783 + }, + { + "epoch": 0.45303160473065607, + "grad_norm": 5.93275458193394, + "learning_rate": 5.997028115492552e-06, + "loss": 17.4764, + "step": 24784 + }, + { + "epoch": 0.45304988392710255, + "grad_norm": 4.5247068484507444, + "learning_rate": 5.996738046996535e-06, + "loss": 16.7954, + "step": 24785 + }, + { + "epoch": 0.4530681631235491, + "grad_norm": 7.902685526078171, + "learning_rate": 5.99644797500704e-06, + "loss": 17.3129, + "step": 24786 + }, + { + "epoch": 0.4530864423199956, + "grad_norm": 6.06325082023824, + "learning_rate": 5.996157899525078e-06, + "loss": 17.2844, + "step": 24787 + }, + { + "epoch": 0.45310472151644215, + "grad_norm": 7.628112177345854, + "learning_rate": 5.995867820551671e-06, + "loss": 17.9469, + "step": 24788 + }, + { + "epoch": 0.45312300071288864, + "grad_norm": 7.873043759885314, + "learning_rate": 5.995577738087832e-06, + "loss": 17.643, + "step": 24789 + }, + { + "epoch": 0.45314127990933517, + "grad_norm": 6.797267503432025, + "learning_rate": 5.995287652134583e-06, + "loss": 17.6177, + "step": 24790 + }, + { + "epoch": 0.4531595591057817, + "grad_norm": 6.406551669096651, + "learning_rate": 5.994997562692934e-06, + "loss": 17.3904, + "step": 24791 + }, + { + "epoch": 0.45317783830222824, + "grad_norm": 5.9040284706172335, + "learning_rate": 5.994707469763904e-06, + "loss": 17.4223, + "step": 24792 + }, + { + "epoch": 0.4531961174986748, + "grad_norm": 7.130237826587623, + "learning_rate": 5.9944173733485125e-06, + "loss": 17.6801, + "step": 24793 + }, + { + "epoch": 0.45321439669512126, + "grad_norm": 6.8328317591233825, + "learning_rate": 5.994127273447775e-06, + "loss": 17.64, + "step": 24794 + }, + { + "epoch": 0.4532326758915678, + "grad_norm": 8.590765023395658, + "learning_rate": 5.993837170062708e-06, + "loss": 18.3209, + "step": 24795 + }, + { + "epoch": 0.45325095508801433, + "grad_norm": 6.8864481963997015, + "learning_rate": 5.993547063194326e-06, + "loss": 17.5819, + "step": 24796 + }, + { + "epoch": 0.45326923428446086, + "grad_norm": 5.636764255944445, + "learning_rate": 5.993256952843648e-06, + "loss": 17.1017, + "step": 24797 + }, + { + "epoch": 0.4532875134809074, + "grad_norm": 5.544907470650895, + "learning_rate": 5.992966839011691e-06, + "loss": 17.2787, + "step": 24798 + }, + { + "epoch": 0.4533057926773539, + "grad_norm": 6.258601438049628, + "learning_rate": 5.992676721699472e-06, + "loss": 17.5772, + "step": 24799 + }, + { + "epoch": 0.4533240718738004, + "grad_norm": 7.644932221665382, + "learning_rate": 5.992386600908007e-06, + "loss": 17.9644, + "step": 24800 + }, + { + "epoch": 0.45334235107024695, + "grad_norm": 6.548807081424216, + "learning_rate": 5.9920964766383114e-06, + "loss": 17.5698, + "step": 24801 + }, + { + "epoch": 0.4533606302666935, + "grad_norm": 6.928747559619665, + "learning_rate": 5.991806348891406e-06, + "loss": 17.5953, + "step": 24802 + }, + { + "epoch": 0.45337890946314, + "grad_norm": 6.24419831583202, + "learning_rate": 5.991516217668304e-06, + "loss": 17.7199, + "step": 24803 + }, + { + "epoch": 0.4533971886595865, + "grad_norm": 4.78474495283371, + "learning_rate": 5.991226082970025e-06, + "loss": 17.0338, + "step": 24804 + }, + { + "epoch": 0.45341546785603304, + "grad_norm": 5.934566945253231, + "learning_rate": 5.9909359447975845e-06, + "loss": 17.2925, + "step": 24805 + }, + { + "epoch": 0.4534337470524796, + "grad_norm": 6.538932399172217, + "learning_rate": 5.990645803151998e-06, + "loss": 17.7339, + "step": 24806 + }, + { + "epoch": 0.4534520262489261, + "grad_norm": 7.162529326065977, + "learning_rate": 5.990355658034285e-06, + "loss": 17.5858, + "step": 24807 + }, + { + "epoch": 0.45347030544537265, + "grad_norm": 7.854321443510832, + "learning_rate": 5.990065509445462e-06, + "loss": 18.0197, + "step": 24808 + }, + { + "epoch": 0.4534885846418191, + "grad_norm": 8.937205810721213, + "learning_rate": 5.989775357386544e-06, + "loss": 18.1315, + "step": 24809 + }, + { + "epoch": 0.45350686383826566, + "grad_norm": 5.685135286358327, + "learning_rate": 5.989485201858549e-06, + "loss": 17.2512, + "step": 24810 + }, + { + "epoch": 0.4535251430347122, + "grad_norm": 6.0497262468242425, + "learning_rate": 5.989195042862495e-06, + "loss": 17.3527, + "step": 24811 + }, + { + "epoch": 0.45354342223115873, + "grad_norm": 7.0295077846462934, + "learning_rate": 5.988904880399398e-06, + "loss": 17.764, + "step": 24812 + }, + { + "epoch": 0.45356170142760527, + "grad_norm": 6.518605241306475, + "learning_rate": 5.988614714470276e-06, + "loss": 17.5902, + "step": 24813 + }, + { + "epoch": 0.45357998062405175, + "grad_norm": 5.870952484450353, + "learning_rate": 5.988324545076144e-06, + "loss": 17.538, + "step": 24814 + }, + { + "epoch": 0.4535982598204983, + "grad_norm": 5.238022253591621, + "learning_rate": 5.988034372218021e-06, + "loss": 17.2232, + "step": 24815 + }, + { + "epoch": 0.4536165390169448, + "grad_norm": 6.168754075001521, + "learning_rate": 5.987744195896923e-06, + "loss": 17.3962, + "step": 24816 + }, + { + "epoch": 0.45363481821339136, + "grad_norm": 6.673533910286118, + "learning_rate": 5.987454016113867e-06, + "loss": 17.8792, + "step": 24817 + }, + { + "epoch": 0.4536530974098379, + "grad_norm": 7.751066565956767, + "learning_rate": 5.9871638328698725e-06, + "loss": 18.1194, + "step": 24818 + }, + { + "epoch": 0.45367137660628437, + "grad_norm": 6.507359471953103, + "learning_rate": 5.986873646165951e-06, + "loss": 17.6494, + "step": 24819 + }, + { + "epoch": 0.4536896558027309, + "grad_norm": 8.920332758130165, + "learning_rate": 5.986583456003124e-06, + "loss": 18.2551, + "step": 24820 + }, + { + "epoch": 0.45370793499917744, + "grad_norm": 7.692489007374717, + "learning_rate": 5.98629326238241e-06, + "loss": 18.1621, + "step": 24821 + }, + { + "epoch": 0.453726214195624, + "grad_norm": 6.3461456362686635, + "learning_rate": 5.986003065304822e-06, + "loss": 17.6561, + "step": 24822 + }, + { + "epoch": 0.45374449339207046, + "grad_norm": 7.554931960332167, + "learning_rate": 5.985712864771378e-06, + "loss": 17.7931, + "step": 24823 + }, + { + "epoch": 0.453762772588517, + "grad_norm": 5.62834503111923, + "learning_rate": 5.9854226607830955e-06, + "loss": 17.187, + "step": 24824 + }, + { + "epoch": 0.45378105178496353, + "grad_norm": 7.30912711086053, + "learning_rate": 5.985132453340995e-06, + "loss": 17.793, + "step": 24825 + }, + { + "epoch": 0.45379933098141007, + "grad_norm": 6.51726859739539, + "learning_rate": 5.9848422424460895e-06, + "loss": 17.8442, + "step": 24826 + }, + { + "epoch": 0.4538176101778566, + "grad_norm": 7.22422985826368, + "learning_rate": 5.984552028099396e-06, + "loss": 17.9982, + "step": 24827 + }, + { + "epoch": 0.4538358893743031, + "grad_norm": 7.219433123503709, + "learning_rate": 5.984261810301935e-06, + "loss": 17.3976, + "step": 24828 + }, + { + "epoch": 0.4538541685707496, + "grad_norm": 6.654532646470851, + "learning_rate": 5.98397158905472e-06, + "loss": 17.8668, + "step": 24829 + }, + { + "epoch": 0.45387244776719615, + "grad_norm": 6.292491359443311, + "learning_rate": 5.983681364358771e-06, + "loss": 17.4518, + "step": 24830 + }, + { + "epoch": 0.4538907269636427, + "grad_norm": 7.276505528291034, + "learning_rate": 5.983391136215104e-06, + "loss": 17.998, + "step": 24831 + }, + { + "epoch": 0.4539090061600892, + "grad_norm": 7.740058123791623, + "learning_rate": 5.983100904624737e-06, + "loss": 18.0715, + "step": 24832 + }, + { + "epoch": 0.4539272853565357, + "grad_norm": 6.70546100657501, + "learning_rate": 5.982810669588685e-06, + "loss": 17.5629, + "step": 24833 + }, + { + "epoch": 0.45394556455298224, + "grad_norm": 6.932532836554571, + "learning_rate": 5.982520431107968e-06, + "loss": 17.6811, + "step": 24834 + }, + { + "epoch": 0.4539638437494288, + "grad_norm": 5.7676651583805025, + "learning_rate": 5.982230189183602e-06, + "loss": 17.3768, + "step": 24835 + }, + { + "epoch": 0.4539821229458753, + "grad_norm": 8.112885332345646, + "learning_rate": 5.981939943816605e-06, + "loss": 17.9777, + "step": 24836 + }, + { + "epoch": 0.45400040214232185, + "grad_norm": 5.899663538561533, + "learning_rate": 5.981649695007993e-06, + "loss": 17.3216, + "step": 24837 + }, + { + "epoch": 0.4540186813387683, + "grad_norm": 7.48592461627228, + "learning_rate": 5.981359442758783e-06, + "loss": 17.8734, + "step": 24838 + }, + { + "epoch": 0.45403696053521486, + "grad_norm": 7.987240402143579, + "learning_rate": 5.981069187069996e-06, + "loss": 18.1016, + "step": 24839 + }, + { + "epoch": 0.4540552397316614, + "grad_norm": 5.7988877873438724, + "learning_rate": 5.980778927942644e-06, + "loss": 17.1847, + "step": 24840 + }, + { + "epoch": 0.45407351892810793, + "grad_norm": 7.3804228309967135, + "learning_rate": 5.980488665377748e-06, + "loss": 17.8344, + "step": 24841 + }, + { + "epoch": 0.45409179812455447, + "grad_norm": 5.739488152069684, + "learning_rate": 5.980198399376325e-06, + "loss": 17.1668, + "step": 24842 + }, + { + "epoch": 0.45411007732100095, + "grad_norm": 6.449068281897217, + "learning_rate": 5.979908129939391e-06, + "loss": 17.4461, + "step": 24843 + }, + { + "epoch": 0.4541283565174475, + "grad_norm": 6.511989432260754, + "learning_rate": 5.979617857067964e-06, + "loss": 17.6251, + "step": 24844 + }, + { + "epoch": 0.454146635713894, + "grad_norm": 7.184755907382845, + "learning_rate": 5.979327580763062e-06, + "loss": 17.7948, + "step": 24845 + }, + { + "epoch": 0.45416491491034056, + "grad_norm": 7.397502154523116, + "learning_rate": 5.979037301025701e-06, + "loss": 17.9996, + "step": 24846 + }, + { + "epoch": 0.4541831941067871, + "grad_norm": 7.44664159389226, + "learning_rate": 5.978747017856898e-06, + "loss": 17.6774, + "step": 24847 + }, + { + "epoch": 0.45420147330323357, + "grad_norm": 6.625820805484883, + "learning_rate": 5.978456731257674e-06, + "loss": 17.3918, + "step": 24848 + }, + { + "epoch": 0.4542197524996801, + "grad_norm": 7.660520868003206, + "learning_rate": 5.978166441229044e-06, + "loss": 17.7056, + "step": 24849 + }, + { + "epoch": 0.45423803169612664, + "grad_norm": 6.622079502102471, + "learning_rate": 5.977876147772025e-06, + "loss": 17.1085, + "step": 24850 + }, + { + "epoch": 0.4542563108925732, + "grad_norm": 7.282386391227921, + "learning_rate": 5.977585850887634e-06, + "loss": 18.1855, + "step": 24851 + }, + { + "epoch": 0.4542745900890197, + "grad_norm": 5.043317870253002, + "learning_rate": 5.97729555057689e-06, + "loss": 16.9436, + "step": 24852 + }, + { + "epoch": 0.4542928692854662, + "grad_norm": 6.813848243736479, + "learning_rate": 5.97700524684081e-06, + "loss": 17.655, + "step": 24853 + }, + { + "epoch": 0.45431114848191273, + "grad_norm": 5.891577703227318, + "learning_rate": 5.976714939680412e-06, + "loss": 17.1782, + "step": 24854 + }, + { + "epoch": 0.45432942767835927, + "grad_norm": 7.105356369781019, + "learning_rate": 5.976424629096712e-06, + "loss": 17.4883, + "step": 24855 + }, + { + "epoch": 0.4543477068748058, + "grad_norm": 7.454305661770091, + "learning_rate": 5.976134315090729e-06, + "loss": 17.7958, + "step": 24856 + }, + { + "epoch": 0.4543659860712523, + "grad_norm": 7.516791832633677, + "learning_rate": 5.97584399766348e-06, + "loss": 18.2962, + "step": 24857 + }, + { + "epoch": 0.4543842652676988, + "grad_norm": 7.0906485532158605, + "learning_rate": 5.975553676815982e-06, + "loss": 17.6407, + "step": 24858 + }, + { + "epoch": 0.45440254446414535, + "grad_norm": 7.128028939234638, + "learning_rate": 5.975263352549253e-06, + "loss": 17.8622, + "step": 24859 + }, + { + "epoch": 0.4544208236605919, + "grad_norm": 6.907942211710578, + "learning_rate": 5.97497302486431e-06, + "loss": 17.9482, + "step": 24860 + }, + { + "epoch": 0.4544391028570384, + "grad_norm": 6.368527253496372, + "learning_rate": 5.974682693762172e-06, + "loss": 17.4452, + "step": 24861 + }, + { + "epoch": 0.4544573820534849, + "grad_norm": 8.097922008715956, + "learning_rate": 5.9743923592438555e-06, + "loss": 17.9609, + "step": 24862 + }, + { + "epoch": 0.45447566124993144, + "grad_norm": 6.807453022908108, + "learning_rate": 5.97410202131038e-06, + "loss": 17.8506, + "step": 24863 + }, + { + "epoch": 0.454493940446378, + "grad_norm": 6.664126366238458, + "learning_rate": 5.973811679962759e-06, + "loss": 17.4324, + "step": 24864 + }, + { + "epoch": 0.4545122196428245, + "grad_norm": 4.72076247400394, + "learning_rate": 5.973521335202013e-06, + "loss": 16.7962, + "step": 24865 + }, + { + "epoch": 0.45453049883927105, + "grad_norm": 5.934770211234447, + "learning_rate": 5.97323098702916e-06, + "loss": 17.1794, + "step": 24866 + }, + { + "epoch": 0.4545487780357175, + "grad_norm": 7.472014628053679, + "learning_rate": 5.972940635445217e-06, + "loss": 17.7042, + "step": 24867 + }, + { + "epoch": 0.45456705723216406, + "grad_norm": 5.461609249343249, + "learning_rate": 5.9726502804512e-06, + "loss": 17.0504, + "step": 24868 + }, + { + "epoch": 0.4545853364286106, + "grad_norm": 6.322879836658449, + "learning_rate": 5.97235992204813e-06, + "loss": 17.5128, + "step": 24869 + }, + { + "epoch": 0.45460361562505713, + "grad_norm": 5.059208558620764, + "learning_rate": 5.9720695602370215e-06, + "loss": 16.9362, + "step": 24870 + }, + { + "epoch": 0.45462189482150367, + "grad_norm": 7.565480030308469, + "learning_rate": 5.971779195018894e-06, + "loss": 18.1777, + "step": 24871 + }, + { + "epoch": 0.45464017401795015, + "grad_norm": 7.677649071696698, + "learning_rate": 5.971488826394764e-06, + "loss": 17.9081, + "step": 24872 + }, + { + "epoch": 0.4546584532143967, + "grad_norm": 6.457744832161168, + "learning_rate": 5.971198454365652e-06, + "loss": 17.5226, + "step": 24873 + }, + { + "epoch": 0.4546767324108432, + "grad_norm": 8.486369762666998, + "learning_rate": 5.970908078932571e-06, + "loss": 18.1025, + "step": 24874 + }, + { + "epoch": 0.45469501160728976, + "grad_norm": 7.373906463484746, + "learning_rate": 5.9706177000965434e-06, + "loss": 17.8832, + "step": 24875 + }, + { + "epoch": 0.4547132908037363, + "grad_norm": 7.7853096676110285, + "learning_rate": 5.970327317858584e-06, + "loss": 18.1932, + "step": 24876 + }, + { + "epoch": 0.45473157000018277, + "grad_norm": 7.584841019866278, + "learning_rate": 5.970036932219714e-06, + "loss": 18.2716, + "step": 24877 + }, + { + "epoch": 0.4547498491966293, + "grad_norm": 6.319330515581081, + "learning_rate": 5.9697465431809455e-06, + "loss": 17.874, + "step": 24878 + }, + { + "epoch": 0.45476812839307584, + "grad_norm": 6.481658860565889, + "learning_rate": 5.9694561507433e-06, + "loss": 17.3555, + "step": 24879 + }, + { + "epoch": 0.4547864075895224, + "grad_norm": 6.38656394726239, + "learning_rate": 5.969165754907796e-06, + "loss": 17.7083, + "step": 24880 + }, + { + "epoch": 0.4548046867859689, + "grad_norm": 5.605613709880605, + "learning_rate": 5.96887535567545e-06, + "loss": 17.2987, + "step": 24881 + }, + { + "epoch": 0.4548229659824154, + "grad_norm": 6.6044745124806585, + "learning_rate": 5.9685849530472795e-06, + "loss": 17.4623, + "step": 24882 + }, + { + "epoch": 0.45484124517886193, + "grad_norm": 5.850295887623415, + "learning_rate": 5.968294547024303e-06, + "loss": 17.1722, + "step": 24883 + }, + { + "epoch": 0.45485952437530847, + "grad_norm": 6.675313290934436, + "learning_rate": 5.968004137607538e-06, + "loss": 17.9645, + "step": 24884 + }, + { + "epoch": 0.454877803571755, + "grad_norm": 7.694681496036081, + "learning_rate": 5.967713724798003e-06, + "loss": 18.1642, + "step": 24885 + }, + { + "epoch": 0.45489608276820154, + "grad_norm": 7.106606312621698, + "learning_rate": 5.9674233085967145e-06, + "loss": 17.8321, + "step": 24886 + }, + { + "epoch": 0.454914361964648, + "grad_norm": 7.192350868472989, + "learning_rate": 5.967132889004692e-06, + "loss": 17.7427, + "step": 24887 + }, + { + "epoch": 0.45493264116109455, + "grad_norm": 7.930449156621662, + "learning_rate": 5.966842466022952e-06, + "loss": 18.1769, + "step": 24888 + }, + { + "epoch": 0.4549509203575411, + "grad_norm": 5.3265330405979405, + "learning_rate": 5.9665520396525135e-06, + "loss": 16.9592, + "step": 24889 + }, + { + "epoch": 0.4549691995539876, + "grad_norm": 7.141694489175982, + "learning_rate": 5.966261609894395e-06, + "loss": 17.9663, + "step": 24890 + }, + { + "epoch": 0.4549874787504341, + "grad_norm": 7.5992211434718655, + "learning_rate": 5.965971176749612e-06, + "loss": 18.0856, + "step": 24891 + }, + { + "epoch": 0.45500575794688064, + "grad_norm": 6.695374744834487, + "learning_rate": 5.965680740219183e-06, + "loss": 17.4817, + "step": 24892 + }, + { + "epoch": 0.4550240371433272, + "grad_norm": 6.740658620386362, + "learning_rate": 5.965390300304128e-06, + "loss": 17.5219, + "step": 24893 + }, + { + "epoch": 0.4550423163397737, + "grad_norm": 6.3009442508213445, + "learning_rate": 5.965099857005464e-06, + "loss": 17.2256, + "step": 24894 + }, + { + "epoch": 0.45506059553622025, + "grad_norm": 5.805336415765952, + "learning_rate": 5.9648094103242096e-06, + "loss": 17.1404, + "step": 24895 + }, + { + "epoch": 0.4550788747326667, + "grad_norm": 6.654660750961497, + "learning_rate": 5.96451896026138e-06, + "loss": 18.0301, + "step": 24896 + }, + { + "epoch": 0.45509715392911326, + "grad_norm": 6.159094027491954, + "learning_rate": 5.964228506817996e-06, + "loss": 17.8566, + "step": 24897 + }, + { + "epoch": 0.4551154331255598, + "grad_norm": 5.617800758583667, + "learning_rate": 5.963938049995075e-06, + "loss": 17.1717, + "step": 24898 + }, + { + "epoch": 0.45513371232200633, + "grad_norm": 5.684078828624881, + "learning_rate": 5.963647589793634e-06, + "loss": 17.1505, + "step": 24899 + }, + { + "epoch": 0.45515199151845287, + "grad_norm": 6.164305056396494, + "learning_rate": 5.963357126214692e-06, + "loss": 17.1597, + "step": 24900 + }, + { + "epoch": 0.45517027071489935, + "grad_norm": 5.661822102120397, + "learning_rate": 5.963066659259267e-06, + "loss": 17.461, + "step": 24901 + }, + { + "epoch": 0.4551885499113459, + "grad_norm": 6.6575164760084835, + "learning_rate": 5.962776188928377e-06, + "loss": 17.5022, + "step": 24902 + }, + { + "epoch": 0.4552068291077924, + "grad_norm": 5.634278910422919, + "learning_rate": 5.962485715223041e-06, + "loss": 17.1694, + "step": 24903 + }, + { + "epoch": 0.45522510830423896, + "grad_norm": 6.8679635270014705, + "learning_rate": 5.962195238144275e-06, + "loss": 17.8086, + "step": 24904 + }, + { + "epoch": 0.4552433875006855, + "grad_norm": 6.154759540486805, + "learning_rate": 5.961904757693099e-06, + "loss": 17.3594, + "step": 24905 + }, + { + "epoch": 0.455261666697132, + "grad_norm": 7.605995253946768, + "learning_rate": 5.961614273870528e-06, + "loss": 17.8852, + "step": 24906 + }, + { + "epoch": 0.4552799458935785, + "grad_norm": 6.513969623764981, + "learning_rate": 5.9613237866775845e-06, + "loss": 17.4292, + "step": 24907 + }, + { + "epoch": 0.45529822509002504, + "grad_norm": 5.226876028951783, + "learning_rate": 5.961033296115285e-06, + "loss": 17.0332, + "step": 24908 + }, + { + "epoch": 0.4553165042864716, + "grad_norm": 6.233897614045115, + "learning_rate": 5.960742802184646e-06, + "loss": 17.5373, + "step": 24909 + }, + { + "epoch": 0.4553347834829181, + "grad_norm": 6.6655302509229, + "learning_rate": 5.9604523048866865e-06, + "loss": 17.2439, + "step": 24910 + }, + { + "epoch": 0.4553530626793646, + "grad_norm": 6.8305913291420985, + "learning_rate": 5.960161804222427e-06, + "loss": 17.7038, + "step": 24911 + }, + { + "epoch": 0.45537134187581113, + "grad_norm": 5.805860333489474, + "learning_rate": 5.959871300192882e-06, + "loss": 17.3428, + "step": 24912 + }, + { + "epoch": 0.45538962107225767, + "grad_norm": 6.114688002868169, + "learning_rate": 5.959580792799071e-06, + "loss": 17.515, + "step": 24913 + }, + { + "epoch": 0.4554079002687042, + "grad_norm": 6.3102515482798776, + "learning_rate": 5.959290282042014e-06, + "loss": 17.3935, + "step": 24914 + }, + { + "epoch": 0.45542617946515074, + "grad_norm": 6.762663799426242, + "learning_rate": 5.958999767922726e-06, + "loss": 17.5704, + "step": 24915 + }, + { + "epoch": 0.4554444586615972, + "grad_norm": 6.7378267892211126, + "learning_rate": 5.95870925044223e-06, + "loss": 17.7129, + "step": 24916 + }, + { + "epoch": 0.45546273785804375, + "grad_norm": 6.833504878076066, + "learning_rate": 5.958418729601538e-06, + "loss": 17.692, + "step": 24917 + }, + { + "epoch": 0.4554810170544903, + "grad_norm": 5.306666325362931, + "learning_rate": 5.958128205401674e-06, + "loss": 17.1075, + "step": 24918 + }, + { + "epoch": 0.4554992962509368, + "grad_norm": 5.969029578562306, + "learning_rate": 5.957837677843652e-06, + "loss": 17.4495, + "step": 24919 + }, + { + "epoch": 0.45551757544738336, + "grad_norm": 5.976336859409207, + "learning_rate": 5.957547146928493e-06, + "loss": 17.3549, + "step": 24920 + }, + { + "epoch": 0.45553585464382984, + "grad_norm": 5.806214475317869, + "learning_rate": 5.957256612657215e-06, + "loss": 17.2555, + "step": 24921 + }, + { + "epoch": 0.4555541338402764, + "grad_norm": 8.016121191859519, + "learning_rate": 5.956966075030834e-06, + "loss": 18.102, + "step": 24922 + }, + { + "epoch": 0.4555724130367229, + "grad_norm": 8.230586744775685, + "learning_rate": 5.956675534050371e-06, + "loss": 18.5998, + "step": 24923 + }, + { + "epoch": 0.45559069223316945, + "grad_norm": 6.598971293214389, + "learning_rate": 5.956384989716842e-06, + "loss": 17.5706, + "step": 24924 + }, + { + "epoch": 0.4556089714296159, + "grad_norm": 6.186971604231284, + "learning_rate": 5.956094442031269e-06, + "loss": 17.494, + "step": 24925 + }, + { + "epoch": 0.45562725062606246, + "grad_norm": 7.136106957598433, + "learning_rate": 5.955803890994667e-06, + "loss": 17.9298, + "step": 24926 + }, + { + "epoch": 0.455645529822509, + "grad_norm": 5.9943177520249, + "learning_rate": 5.9555133366080545e-06, + "loss": 17.3705, + "step": 24927 + }, + { + "epoch": 0.45566380901895553, + "grad_norm": 7.222490292310578, + "learning_rate": 5.95522277887245e-06, + "loss": 17.8616, + "step": 24928 + }, + { + "epoch": 0.45568208821540207, + "grad_norm": 7.608678769348252, + "learning_rate": 5.954932217788875e-06, + "loss": 17.9525, + "step": 24929 + }, + { + "epoch": 0.45570036741184855, + "grad_norm": 7.804162846358708, + "learning_rate": 5.954641653358343e-06, + "loss": 17.8717, + "step": 24930 + }, + { + "epoch": 0.4557186466082951, + "grad_norm": 6.6028374641371625, + "learning_rate": 5.954351085581876e-06, + "loss": 17.573, + "step": 24931 + }, + { + "epoch": 0.4557369258047416, + "grad_norm": 5.98152414111835, + "learning_rate": 5.954060514460492e-06, + "loss": 17.2916, + "step": 24932 + }, + { + "epoch": 0.45575520500118816, + "grad_norm": 6.051014682198928, + "learning_rate": 5.953769939995206e-06, + "loss": 17.3404, + "step": 24933 + }, + { + "epoch": 0.4557734841976347, + "grad_norm": 6.7063809868234925, + "learning_rate": 5.953479362187041e-06, + "loss": 17.6344, + "step": 24934 + }, + { + "epoch": 0.4557917633940812, + "grad_norm": 6.408814761826627, + "learning_rate": 5.953188781037015e-06, + "loss": 17.1016, + "step": 24935 + }, + { + "epoch": 0.4558100425905277, + "grad_norm": 7.052933799014533, + "learning_rate": 5.952898196546144e-06, + "loss": 17.4834, + "step": 24936 + }, + { + "epoch": 0.45582832178697424, + "grad_norm": 7.127083901926689, + "learning_rate": 5.952607608715447e-06, + "loss": 17.5944, + "step": 24937 + }, + { + "epoch": 0.4558466009834208, + "grad_norm": 6.907721348468742, + "learning_rate": 5.952317017545941e-06, + "loss": 17.5721, + "step": 24938 + }, + { + "epoch": 0.4558648801798673, + "grad_norm": 4.945476134036083, + "learning_rate": 5.952026423038651e-06, + "loss": 16.7368, + "step": 24939 + }, + { + "epoch": 0.4558831593763138, + "grad_norm": 7.127582004948006, + "learning_rate": 5.951735825194588e-06, + "loss": 18.0291, + "step": 24940 + }, + { + "epoch": 0.45590143857276033, + "grad_norm": 7.289376816504818, + "learning_rate": 5.951445224014773e-06, + "loss": 17.4337, + "step": 24941 + }, + { + "epoch": 0.45591971776920687, + "grad_norm": 7.195187834258994, + "learning_rate": 5.951154619500227e-06, + "loss": 17.8671, + "step": 24942 + }, + { + "epoch": 0.4559379969656534, + "grad_norm": 6.080345119539842, + "learning_rate": 5.9508640116519656e-06, + "loss": 17.2807, + "step": 24943 + }, + { + "epoch": 0.45595627616209994, + "grad_norm": 7.27655914283795, + "learning_rate": 5.950573400471008e-06, + "loss": 17.7143, + "step": 24944 + }, + { + "epoch": 0.4559745553585464, + "grad_norm": 5.197007266471913, + "learning_rate": 5.950282785958373e-06, + "loss": 16.8949, + "step": 24945 + }, + { + "epoch": 0.45599283455499295, + "grad_norm": 5.502133312801913, + "learning_rate": 5.949992168115081e-06, + "loss": 17.0616, + "step": 24946 + }, + { + "epoch": 0.4560111137514395, + "grad_norm": 7.636449685803365, + "learning_rate": 5.949701546942147e-06, + "loss": 17.619, + "step": 24947 + }, + { + "epoch": 0.456029392947886, + "grad_norm": 6.895739600713596, + "learning_rate": 5.949410922440592e-06, + "loss": 17.6776, + "step": 24948 + }, + { + "epoch": 0.45604767214433256, + "grad_norm": 7.892914071186963, + "learning_rate": 5.9491202946114355e-06, + "loss": 17.7504, + "step": 24949 + }, + { + "epoch": 0.45606595134077904, + "grad_norm": 5.7805011601384315, + "learning_rate": 5.948829663455694e-06, + "loss": 17.2404, + "step": 24950 + }, + { + "epoch": 0.4560842305372256, + "grad_norm": 6.332919356755202, + "learning_rate": 5.948539028974385e-06, + "loss": 17.2974, + "step": 24951 + }, + { + "epoch": 0.4561025097336721, + "grad_norm": 9.809507334768115, + "learning_rate": 5.9482483911685316e-06, + "loss": 18.2558, + "step": 24952 + }, + { + "epoch": 0.45612078893011865, + "grad_norm": 6.073160875066537, + "learning_rate": 5.947957750039148e-06, + "loss": 17.3897, + "step": 24953 + }, + { + "epoch": 0.4561390681265652, + "grad_norm": 6.531167093761964, + "learning_rate": 5.947667105587256e-06, + "loss": 17.439, + "step": 24954 + }, + { + "epoch": 0.45615734732301166, + "grad_norm": 7.527818403529953, + "learning_rate": 5.947376457813873e-06, + "loss": 17.7602, + "step": 24955 + }, + { + "epoch": 0.4561756265194582, + "grad_norm": 8.235916091449743, + "learning_rate": 5.947085806720017e-06, + "loss": 17.6098, + "step": 24956 + }, + { + "epoch": 0.45619390571590474, + "grad_norm": 7.187161374228202, + "learning_rate": 5.946795152306708e-06, + "loss": 17.783, + "step": 24957 + }, + { + "epoch": 0.45621218491235127, + "grad_norm": 6.41587946281288, + "learning_rate": 5.946504494574963e-06, + "loss": 17.2936, + "step": 24958 + }, + { + "epoch": 0.45623046410879775, + "grad_norm": 5.618748298564456, + "learning_rate": 5.946213833525805e-06, + "loss": 17.2479, + "step": 24959 + }, + { + "epoch": 0.4562487433052443, + "grad_norm": 5.766928935455178, + "learning_rate": 5.945923169160245e-06, + "loss": 17.3264, + "step": 24960 + }, + { + "epoch": 0.4562670225016908, + "grad_norm": 7.84812429726118, + "learning_rate": 5.945632501479309e-06, + "loss": 17.9254, + "step": 24961 + }, + { + "epoch": 0.45628530169813736, + "grad_norm": 7.181081642174104, + "learning_rate": 5.945341830484012e-06, + "loss": 17.5853, + "step": 24962 + }, + { + "epoch": 0.4563035808945839, + "grad_norm": 6.726035651839546, + "learning_rate": 5.945051156175377e-06, + "loss": 17.7259, + "step": 24963 + }, + { + "epoch": 0.4563218600910304, + "grad_norm": 5.917198199955994, + "learning_rate": 5.944760478554416e-06, + "loss": 17.3732, + "step": 24964 + }, + { + "epoch": 0.4563401392874769, + "grad_norm": 6.127464928487141, + "learning_rate": 5.9444697976221525e-06, + "loss": 17.0653, + "step": 24965 + }, + { + "epoch": 0.45635841848392344, + "grad_norm": 8.204374634512021, + "learning_rate": 5.944179113379606e-06, + "loss": 18.1827, + "step": 24966 + }, + { + "epoch": 0.45637669768037, + "grad_norm": 6.621224850233595, + "learning_rate": 5.943888425827793e-06, + "loss": 17.4072, + "step": 24967 + }, + { + "epoch": 0.4563949768768165, + "grad_norm": 6.47726189934075, + "learning_rate": 5.943597734967732e-06, + "loss": 17.6277, + "step": 24968 + }, + { + "epoch": 0.456413256073263, + "grad_norm": 6.388248961482317, + "learning_rate": 5.943307040800443e-06, + "loss": 17.3201, + "step": 24969 + }, + { + "epoch": 0.45643153526970953, + "grad_norm": 6.423716528385374, + "learning_rate": 5.943016343326945e-06, + "loss": 17.679, + "step": 24970 + }, + { + "epoch": 0.45644981446615607, + "grad_norm": 5.6762539725291195, + "learning_rate": 5.942725642548256e-06, + "loss": 17.3484, + "step": 24971 + }, + { + "epoch": 0.4564680936626026, + "grad_norm": 6.9716001442144675, + "learning_rate": 5.942434938465396e-06, + "loss": 17.838, + "step": 24972 + }, + { + "epoch": 0.45648637285904914, + "grad_norm": 5.593409809155962, + "learning_rate": 5.942144231079383e-06, + "loss": 17.1198, + "step": 24973 + }, + { + "epoch": 0.4565046520554956, + "grad_norm": 7.224772400014191, + "learning_rate": 5.941853520391237e-06, + "loss": 17.8676, + "step": 24974 + }, + { + "epoch": 0.45652293125194215, + "grad_norm": 7.4132826230866735, + "learning_rate": 5.941562806401975e-06, + "loss": 18.2126, + "step": 24975 + }, + { + "epoch": 0.4565412104483887, + "grad_norm": 5.8980284137638455, + "learning_rate": 5.941272089112617e-06, + "loss": 17.1877, + "step": 24976 + }, + { + "epoch": 0.4565594896448352, + "grad_norm": 6.153005448015386, + "learning_rate": 5.940981368524184e-06, + "loss": 17.4887, + "step": 24977 + }, + { + "epoch": 0.45657776884128176, + "grad_norm": 5.92254038898274, + "learning_rate": 5.940690644637691e-06, + "loss": 17.3755, + "step": 24978 + }, + { + "epoch": 0.45659604803772824, + "grad_norm": 6.557337023734522, + "learning_rate": 5.940399917454159e-06, + "loss": 17.6272, + "step": 24979 + }, + { + "epoch": 0.4566143272341748, + "grad_norm": 5.132409964638729, + "learning_rate": 5.940109186974609e-06, + "loss": 17.1082, + "step": 24980 + }, + { + "epoch": 0.4566326064306213, + "grad_norm": 4.745751923636569, + "learning_rate": 5.939818453200056e-06, + "loss": 16.8106, + "step": 24981 + }, + { + "epoch": 0.45665088562706785, + "grad_norm": 6.50431922714319, + "learning_rate": 5.939527716131521e-06, + "loss": 17.2606, + "step": 24982 + }, + { + "epoch": 0.4566691648235144, + "grad_norm": 7.071391533159738, + "learning_rate": 5.939236975770022e-06, + "loss": 17.753, + "step": 24983 + }, + { + "epoch": 0.45668744401996086, + "grad_norm": 6.161093548966815, + "learning_rate": 5.938946232116581e-06, + "loss": 17.1366, + "step": 24984 + }, + { + "epoch": 0.4567057232164074, + "grad_norm": 5.539176743544879, + "learning_rate": 5.9386554851722134e-06, + "loss": 17.2145, + "step": 24985 + }, + { + "epoch": 0.45672400241285394, + "grad_norm": 5.763589022567573, + "learning_rate": 5.938364734937941e-06, + "loss": 17.2451, + "step": 24986 + }, + { + "epoch": 0.45674228160930047, + "grad_norm": 6.188618538411882, + "learning_rate": 5.9380739814147805e-06, + "loss": 17.569, + "step": 24987 + }, + { + "epoch": 0.456760560805747, + "grad_norm": 7.275887982283109, + "learning_rate": 5.937783224603753e-06, + "loss": 18.0389, + "step": 24988 + }, + { + "epoch": 0.4567788400021935, + "grad_norm": 5.9095840773301, + "learning_rate": 5.937492464505875e-06, + "loss": 17.1005, + "step": 24989 + }, + { + "epoch": 0.45679711919864, + "grad_norm": 6.170282123101501, + "learning_rate": 5.937201701122171e-06, + "loss": 17.5119, + "step": 24990 + }, + { + "epoch": 0.45681539839508656, + "grad_norm": 6.781463448853069, + "learning_rate": 5.936910934453652e-06, + "loss": 17.6081, + "step": 24991 + }, + { + "epoch": 0.4568336775915331, + "grad_norm": 6.42650410498322, + "learning_rate": 5.9366201645013435e-06, + "loss": 17.6133, + "step": 24992 + }, + { + "epoch": 0.4568519567879796, + "grad_norm": 6.944239694729154, + "learning_rate": 5.936329391266261e-06, + "loss": 17.68, + "step": 24993 + }, + { + "epoch": 0.4568702359844261, + "grad_norm": 9.275047584937695, + "learning_rate": 5.936038614749429e-06, + "loss": 18.3214, + "step": 24994 + }, + { + "epoch": 0.45688851518087265, + "grad_norm": 6.422879899323243, + "learning_rate": 5.9357478349518595e-06, + "loss": 17.5967, + "step": 24995 + }, + { + "epoch": 0.4569067943773192, + "grad_norm": 7.40083545125522, + "learning_rate": 5.935457051874575e-06, + "loss": 18.1773, + "step": 24996 + }, + { + "epoch": 0.4569250735737657, + "grad_norm": 7.582751640422174, + "learning_rate": 5.935166265518597e-06, + "loss": 17.8034, + "step": 24997 + }, + { + "epoch": 0.4569433527702122, + "grad_norm": 5.281919500133999, + "learning_rate": 5.934875475884942e-06, + "loss": 17.1753, + "step": 24998 + }, + { + "epoch": 0.45696163196665873, + "grad_norm": 5.054218994977413, + "learning_rate": 5.9345846829746275e-06, + "loss": 16.9593, + "step": 24999 + }, + { + "epoch": 0.45697991116310527, + "grad_norm": 5.185175175409264, + "learning_rate": 5.934293886788676e-06, + "loss": 16.984, + "step": 25000 + }, + { + "epoch": 0.4569981903595518, + "grad_norm": 6.5781400783196835, + "learning_rate": 5.934003087328105e-06, + "loss": 17.9447, + "step": 25001 + }, + { + "epoch": 0.45701646955599834, + "grad_norm": 5.665450611948805, + "learning_rate": 5.933712284593936e-06, + "loss": 17.1155, + "step": 25002 + }, + { + "epoch": 0.4570347487524448, + "grad_norm": 6.668786084884377, + "learning_rate": 5.933421478587184e-06, + "loss": 17.3732, + "step": 25003 + }, + { + "epoch": 0.45705302794889135, + "grad_norm": 5.047739528366748, + "learning_rate": 5.933130669308873e-06, + "loss": 16.9366, + "step": 25004 + }, + { + "epoch": 0.4570713071453379, + "grad_norm": 5.7553338101981835, + "learning_rate": 5.932839856760018e-06, + "loss": 17.3289, + "step": 25005 + }, + { + "epoch": 0.4570895863417844, + "grad_norm": 5.205437676452599, + "learning_rate": 5.932549040941641e-06, + "loss": 16.904, + "step": 25006 + }, + { + "epoch": 0.45710786553823096, + "grad_norm": 6.927558186739496, + "learning_rate": 5.932258221854761e-06, + "loss": 17.8677, + "step": 25007 + }, + { + "epoch": 0.45712614473467744, + "grad_norm": 6.593786348312188, + "learning_rate": 5.931967399500397e-06, + "loss": 17.5772, + "step": 25008 + }, + { + "epoch": 0.457144423931124, + "grad_norm": 8.053694123470054, + "learning_rate": 5.931676573879568e-06, + "loss": 17.9786, + "step": 25009 + }, + { + "epoch": 0.4571627031275705, + "grad_norm": 5.583726033411606, + "learning_rate": 5.931385744993292e-06, + "loss": 17.0944, + "step": 25010 + }, + { + "epoch": 0.45718098232401705, + "grad_norm": 5.1863500794669015, + "learning_rate": 5.931094912842592e-06, + "loss": 16.9153, + "step": 25011 + }, + { + "epoch": 0.4571992615204636, + "grad_norm": 6.834577803973572, + "learning_rate": 5.930804077428484e-06, + "loss": 17.7712, + "step": 25012 + }, + { + "epoch": 0.45721754071691006, + "grad_norm": 5.897365251803732, + "learning_rate": 5.930513238751988e-06, + "loss": 17.4395, + "step": 25013 + }, + { + "epoch": 0.4572358199133566, + "grad_norm": 5.8493365305636535, + "learning_rate": 5.930222396814125e-06, + "loss": 17.4145, + "step": 25014 + }, + { + "epoch": 0.45725409910980314, + "grad_norm": 5.287957136092569, + "learning_rate": 5.929931551615912e-06, + "loss": 17.0397, + "step": 25015 + }, + { + "epoch": 0.45727237830624967, + "grad_norm": 7.513279019333759, + "learning_rate": 5.9296407031583705e-06, + "loss": 18.2377, + "step": 25016 + }, + { + "epoch": 0.4572906575026962, + "grad_norm": 7.1404263870812175, + "learning_rate": 5.929349851442519e-06, + "loss": 17.5738, + "step": 25017 + }, + { + "epoch": 0.4573089366991427, + "grad_norm": 5.902897723117464, + "learning_rate": 5.929058996469377e-06, + "loss": 17.2764, + "step": 25018 + }, + { + "epoch": 0.4573272158955892, + "grad_norm": 6.950774681367328, + "learning_rate": 5.928768138239962e-06, + "loss": 17.6763, + "step": 25019 + }, + { + "epoch": 0.45734549509203576, + "grad_norm": 5.235636441851553, + "learning_rate": 5.928477276755297e-06, + "loss": 16.9038, + "step": 25020 + }, + { + "epoch": 0.4573637742884823, + "grad_norm": 5.592681693440445, + "learning_rate": 5.9281864120164e-06, + "loss": 17.149, + "step": 25021 + }, + { + "epoch": 0.45738205348492883, + "grad_norm": 6.098654773706037, + "learning_rate": 5.927895544024289e-06, + "loss": 17.6138, + "step": 25022 + }, + { + "epoch": 0.4574003326813753, + "grad_norm": 5.711515865679313, + "learning_rate": 5.927604672779985e-06, + "loss": 17.1243, + "step": 25023 + }, + { + "epoch": 0.45741861187782185, + "grad_norm": 7.063529120006668, + "learning_rate": 5.927313798284507e-06, + "loss": 17.8486, + "step": 25024 + }, + { + "epoch": 0.4574368910742684, + "grad_norm": 7.12987208995607, + "learning_rate": 5.927022920538876e-06, + "loss": 18.1592, + "step": 25025 + }, + { + "epoch": 0.4574551702707149, + "grad_norm": 6.505539588273021, + "learning_rate": 5.926732039544109e-06, + "loss": 17.3972, + "step": 25026 + }, + { + "epoch": 0.4574734494671614, + "grad_norm": 6.7104988310585645, + "learning_rate": 5.926441155301226e-06, + "loss": 17.385, + "step": 25027 + }, + { + "epoch": 0.45749172866360793, + "grad_norm": 6.991219655943665, + "learning_rate": 5.926150267811248e-06, + "loss": 17.8121, + "step": 25028 + }, + { + "epoch": 0.45751000786005447, + "grad_norm": 5.934969451134591, + "learning_rate": 5.9258593770751935e-06, + "loss": 17.486, + "step": 25029 + }, + { + "epoch": 0.457528287056501, + "grad_norm": 9.175652058688627, + "learning_rate": 5.925568483094081e-06, + "loss": 17.9355, + "step": 25030 + }, + { + "epoch": 0.45754656625294754, + "grad_norm": 6.92586047309848, + "learning_rate": 5.925277585868934e-06, + "loss": 17.6992, + "step": 25031 + }, + { + "epoch": 0.457564845449394, + "grad_norm": 7.093206602569276, + "learning_rate": 5.9249866854007685e-06, + "loss": 17.9072, + "step": 25032 + }, + { + "epoch": 0.45758312464584056, + "grad_norm": 5.559411962234882, + "learning_rate": 5.924695781690604e-06, + "loss": 17.046, + "step": 25033 + }, + { + "epoch": 0.4576014038422871, + "grad_norm": 7.940295854282515, + "learning_rate": 5.92440487473946e-06, + "loss": 17.6611, + "step": 25034 + }, + { + "epoch": 0.4576196830387336, + "grad_norm": 9.122628018645774, + "learning_rate": 5.924113964548361e-06, + "loss": 17.8055, + "step": 25035 + }, + { + "epoch": 0.45763796223518016, + "grad_norm": 6.720342112232046, + "learning_rate": 5.923823051118319e-06, + "loss": 17.6522, + "step": 25036 + }, + { + "epoch": 0.45765624143162664, + "grad_norm": 6.209130475710021, + "learning_rate": 5.923532134450358e-06, + "loss": 17.2298, + "step": 25037 + }, + { + "epoch": 0.4576745206280732, + "grad_norm": 4.943226370519743, + "learning_rate": 5.923241214545496e-06, + "loss": 16.8489, + "step": 25038 + }, + { + "epoch": 0.4576927998245197, + "grad_norm": 6.909209484041424, + "learning_rate": 5.9229502914047565e-06, + "loss": 17.4053, + "step": 25039 + }, + { + "epoch": 0.45771107902096625, + "grad_norm": 5.227227420071569, + "learning_rate": 5.922659365029156e-06, + "loss": 17.0036, + "step": 25040 + }, + { + "epoch": 0.4577293582174128, + "grad_norm": 6.522922057325364, + "learning_rate": 5.922368435419713e-06, + "loss": 17.774, + "step": 25041 + }, + { + "epoch": 0.45774763741385927, + "grad_norm": 6.219062308287128, + "learning_rate": 5.922077502577449e-06, + "loss": 17.3931, + "step": 25042 + }, + { + "epoch": 0.4577659166103058, + "grad_norm": 6.206941750695732, + "learning_rate": 5.921786566503384e-06, + "loss": 17.4327, + "step": 25043 + }, + { + "epoch": 0.45778419580675234, + "grad_norm": 8.247628077256223, + "learning_rate": 5.921495627198537e-06, + "loss": 18.1207, + "step": 25044 + }, + { + "epoch": 0.45780247500319887, + "grad_norm": 5.742348481906552, + "learning_rate": 5.921204684663927e-06, + "loss": 17.5051, + "step": 25045 + }, + { + "epoch": 0.4578207541996454, + "grad_norm": 7.0053822967011214, + "learning_rate": 5.920913738900575e-06, + "loss": 17.9018, + "step": 25046 + }, + { + "epoch": 0.4578390333960919, + "grad_norm": 6.3120630670410005, + "learning_rate": 5.920622789909499e-06, + "loss": 17.2657, + "step": 25047 + }, + { + "epoch": 0.4578573125925384, + "grad_norm": 6.638560846178379, + "learning_rate": 5.920331837691722e-06, + "loss": 17.3365, + "step": 25048 + }, + { + "epoch": 0.45787559178898496, + "grad_norm": 6.913310899836669, + "learning_rate": 5.920040882248261e-06, + "loss": 17.3952, + "step": 25049 + }, + { + "epoch": 0.4578938709854315, + "grad_norm": 5.8329902562283165, + "learning_rate": 5.919749923580137e-06, + "loss": 17.53, + "step": 25050 + }, + { + "epoch": 0.45791215018187803, + "grad_norm": 7.880407501848141, + "learning_rate": 5.919458961688368e-06, + "loss": 17.8751, + "step": 25051 + }, + { + "epoch": 0.4579304293783245, + "grad_norm": 7.8721008769038985, + "learning_rate": 5.919167996573975e-06, + "loss": 17.7918, + "step": 25052 + }, + { + "epoch": 0.45794870857477105, + "grad_norm": 6.212387184352187, + "learning_rate": 5.918877028237982e-06, + "loss": 17.2153, + "step": 25053 + }, + { + "epoch": 0.4579669877712176, + "grad_norm": 7.679853254937747, + "learning_rate": 5.9185860566814005e-06, + "loss": 18.0365, + "step": 25054 + }, + { + "epoch": 0.4579852669676641, + "grad_norm": 5.9041637537375316, + "learning_rate": 5.9182950819052554e-06, + "loss": 16.9869, + "step": 25055 + }, + { + "epoch": 0.45800354616411065, + "grad_norm": 7.22318367184783, + "learning_rate": 5.9180041039105664e-06, + "loss": 18.0856, + "step": 25056 + }, + { + "epoch": 0.45802182536055713, + "grad_norm": 8.581666754043548, + "learning_rate": 5.917713122698352e-06, + "loss": 18.331, + "step": 25057 + }, + { + "epoch": 0.45804010455700367, + "grad_norm": 5.827562596542903, + "learning_rate": 5.9174221382696325e-06, + "loss": 17.3273, + "step": 25058 + }, + { + "epoch": 0.4580583837534502, + "grad_norm": 5.2291750829573775, + "learning_rate": 5.917131150625431e-06, + "loss": 17.1274, + "step": 25059 + }, + { + "epoch": 0.45807666294989674, + "grad_norm": 7.002193119910955, + "learning_rate": 5.91684015976676e-06, + "loss": 17.8088, + "step": 25060 + }, + { + "epoch": 0.4580949421463432, + "grad_norm": 5.064940999586588, + "learning_rate": 5.916549165694646e-06, + "loss": 16.8917, + "step": 25061 + }, + { + "epoch": 0.45811322134278976, + "grad_norm": 6.07487078474585, + "learning_rate": 5.9162581684101065e-06, + "loss": 17.2801, + "step": 25062 + }, + { + "epoch": 0.4581315005392363, + "grad_norm": 6.100885615395288, + "learning_rate": 5.915967167914163e-06, + "loss": 17.3161, + "step": 25063 + }, + { + "epoch": 0.4581497797356828, + "grad_norm": 6.365595687647508, + "learning_rate": 5.915676164207833e-06, + "loss": 17.3906, + "step": 25064 + }, + { + "epoch": 0.45816805893212936, + "grad_norm": 8.370558908105483, + "learning_rate": 5.915385157292135e-06, + "loss": 17.9775, + "step": 25065 + }, + { + "epoch": 0.45818633812857584, + "grad_norm": 4.743868565626321, + "learning_rate": 5.915094147168096e-06, + "loss": 16.7813, + "step": 25066 + }, + { + "epoch": 0.4582046173250224, + "grad_norm": 6.470216535288608, + "learning_rate": 5.914803133836729e-06, + "loss": 17.4377, + "step": 25067 + }, + { + "epoch": 0.4582228965214689, + "grad_norm": 5.208946094185995, + "learning_rate": 5.914512117299056e-06, + "loss": 16.9007, + "step": 25068 + }, + { + "epoch": 0.45824117571791545, + "grad_norm": 6.511039458565328, + "learning_rate": 5.914221097556097e-06, + "loss": 17.6099, + "step": 25069 + }, + { + "epoch": 0.458259454914362, + "grad_norm": 6.199890150444937, + "learning_rate": 5.913930074608873e-06, + "loss": 17.3178, + "step": 25070 + }, + { + "epoch": 0.45827773411080847, + "grad_norm": 6.396809693260093, + "learning_rate": 5.913639048458404e-06, + "loss": 17.4528, + "step": 25071 + }, + { + "epoch": 0.458296013307255, + "grad_norm": 5.383985903555381, + "learning_rate": 5.913348019105709e-06, + "loss": 17.1753, + "step": 25072 + }, + { + "epoch": 0.45831429250370154, + "grad_norm": 8.01317385967738, + "learning_rate": 5.913056986551809e-06, + "loss": 18.2772, + "step": 25073 + }, + { + "epoch": 0.4583325717001481, + "grad_norm": 5.055525897880802, + "learning_rate": 5.912765950797723e-06, + "loss": 17.0403, + "step": 25074 + }, + { + "epoch": 0.4583508508965946, + "grad_norm": 5.834932092271731, + "learning_rate": 5.912474911844471e-06, + "loss": 17.349, + "step": 25075 + }, + { + "epoch": 0.4583691300930411, + "grad_norm": 7.3257768903772265, + "learning_rate": 5.912183869693074e-06, + "loss": 17.8751, + "step": 25076 + }, + { + "epoch": 0.4583874092894876, + "grad_norm": 7.538587155687487, + "learning_rate": 5.911892824344554e-06, + "loss": 18.3766, + "step": 25077 + }, + { + "epoch": 0.45840568848593416, + "grad_norm": 7.160764942228313, + "learning_rate": 5.911601775799925e-06, + "loss": 17.8963, + "step": 25078 + }, + { + "epoch": 0.4584239676823807, + "grad_norm": 7.378645955279105, + "learning_rate": 5.911310724060213e-06, + "loss": 17.7232, + "step": 25079 + }, + { + "epoch": 0.45844224687882723, + "grad_norm": 5.049835034722711, + "learning_rate": 5.9110196691264365e-06, + "loss": 17.0266, + "step": 25080 + }, + { + "epoch": 0.4584605260752737, + "grad_norm": 6.34414887518242, + "learning_rate": 5.9107286109996135e-06, + "loss": 17.4845, + "step": 25081 + }, + { + "epoch": 0.45847880527172025, + "grad_norm": 5.977932616193766, + "learning_rate": 5.910437549680766e-06, + "loss": 17.3735, + "step": 25082 + }, + { + "epoch": 0.4584970844681668, + "grad_norm": 6.308358394565755, + "learning_rate": 5.910146485170914e-06, + "loss": 17.6364, + "step": 25083 + }, + { + "epoch": 0.4585153636646133, + "grad_norm": 5.86124418007269, + "learning_rate": 5.9098554174710785e-06, + "loss": 17.3927, + "step": 25084 + }, + { + "epoch": 0.45853364286105985, + "grad_norm": 6.138031504468722, + "learning_rate": 5.909564346582279e-06, + "loss": 17.458, + "step": 25085 + }, + { + "epoch": 0.45855192205750633, + "grad_norm": 5.4553383569489755, + "learning_rate": 5.909273272505534e-06, + "loss": 17.2117, + "step": 25086 + }, + { + "epoch": 0.45857020125395287, + "grad_norm": 7.030659033261773, + "learning_rate": 5.908982195241865e-06, + "loss": 17.5818, + "step": 25087 + }, + { + "epoch": 0.4585884804503994, + "grad_norm": 5.483053980848989, + "learning_rate": 5.908691114792293e-06, + "loss": 17.0663, + "step": 25088 + }, + { + "epoch": 0.45860675964684594, + "grad_norm": 6.1136181010802275, + "learning_rate": 5.908400031157837e-06, + "loss": 17.1744, + "step": 25089 + }, + { + "epoch": 0.4586250388432925, + "grad_norm": 5.69582017469816, + "learning_rate": 5.908108944339519e-06, + "loss": 17.0884, + "step": 25090 + }, + { + "epoch": 0.45864331803973896, + "grad_norm": 5.736528975665548, + "learning_rate": 5.907817854338357e-06, + "loss": 17.1045, + "step": 25091 + }, + { + "epoch": 0.4586615972361855, + "grad_norm": 7.6264465897051, + "learning_rate": 5.907526761155371e-06, + "loss": 17.871, + "step": 25092 + }, + { + "epoch": 0.458679876432632, + "grad_norm": 6.128675020624806, + "learning_rate": 5.907235664791583e-06, + "loss": 17.5724, + "step": 25093 + }, + { + "epoch": 0.45869815562907856, + "grad_norm": 5.955904531388647, + "learning_rate": 5.906944565248015e-06, + "loss": 17.3476, + "step": 25094 + }, + { + "epoch": 0.45871643482552504, + "grad_norm": 7.08895749649592, + "learning_rate": 5.9066534625256836e-06, + "loss": 17.7101, + "step": 25095 + }, + { + "epoch": 0.4587347140219716, + "grad_norm": 4.708930776386075, + "learning_rate": 5.90636235662561e-06, + "loss": 16.7383, + "step": 25096 + }, + { + "epoch": 0.4587529932184181, + "grad_norm": 7.568027262060807, + "learning_rate": 5.906071247548814e-06, + "loss": 18.0702, + "step": 25097 + }, + { + "epoch": 0.45877127241486465, + "grad_norm": 6.160265851589954, + "learning_rate": 5.90578013529632e-06, + "loss": 17.3222, + "step": 25098 + }, + { + "epoch": 0.4587895516113112, + "grad_norm": 5.221119349192646, + "learning_rate": 5.905489019869142e-06, + "loss": 16.8484, + "step": 25099 + }, + { + "epoch": 0.45880783080775767, + "grad_norm": 5.777141960437828, + "learning_rate": 5.905197901268305e-06, + "loss": 17.3222, + "step": 25100 + }, + { + "epoch": 0.4588261100042042, + "grad_norm": 6.447375505580861, + "learning_rate": 5.9049067794948275e-06, + "loss": 17.2996, + "step": 25101 + }, + { + "epoch": 0.45884438920065074, + "grad_norm": 6.997710347061462, + "learning_rate": 5.904615654549732e-06, + "loss": 17.3454, + "step": 25102 + }, + { + "epoch": 0.4588626683970973, + "grad_norm": 7.18329244941718, + "learning_rate": 5.904324526434035e-06, + "loss": 17.8857, + "step": 25103 + }, + { + "epoch": 0.4588809475935438, + "grad_norm": 6.85829402999007, + "learning_rate": 5.904033395148761e-06, + "loss": 17.6004, + "step": 25104 + }, + { + "epoch": 0.4588992267899903, + "grad_norm": 6.094213590941932, + "learning_rate": 5.903742260694926e-06, + "loss": 17.2283, + "step": 25105 + }, + { + "epoch": 0.4589175059864368, + "grad_norm": 7.3716304976459135, + "learning_rate": 5.903451123073554e-06, + "loss": 17.8677, + "step": 25106 + }, + { + "epoch": 0.45893578518288336, + "grad_norm": 5.238332336507377, + "learning_rate": 5.903159982285663e-06, + "loss": 17.0321, + "step": 25107 + }, + { + "epoch": 0.4589540643793299, + "grad_norm": 6.935188174699001, + "learning_rate": 5.902868838332277e-06, + "loss": 17.7228, + "step": 25108 + }, + { + "epoch": 0.45897234357577643, + "grad_norm": 6.557339988151756, + "learning_rate": 5.9025776912144125e-06, + "loss": 17.7469, + "step": 25109 + }, + { + "epoch": 0.4589906227722229, + "grad_norm": 7.055907107137074, + "learning_rate": 5.902286540933091e-06, + "loss": 17.8525, + "step": 25110 + }, + { + "epoch": 0.45900890196866945, + "grad_norm": 5.214122725260912, + "learning_rate": 5.901995387489335e-06, + "loss": 17.1292, + "step": 25111 + }, + { + "epoch": 0.459027181165116, + "grad_norm": 6.4066059822303485, + "learning_rate": 5.9017042308841635e-06, + "loss": 17.475, + "step": 25112 + }, + { + "epoch": 0.4590454603615625, + "grad_norm": 6.777180689280732, + "learning_rate": 5.901413071118596e-06, + "loss": 17.4952, + "step": 25113 + }, + { + "epoch": 0.45906373955800905, + "grad_norm": 6.146671576227362, + "learning_rate": 5.901121908193654e-06, + "loss": 17.554, + "step": 25114 + }, + { + "epoch": 0.45908201875445553, + "grad_norm": 6.734242849027381, + "learning_rate": 5.900830742110358e-06, + "loss": 17.8993, + "step": 25115 + }, + { + "epoch": 0.45910029795090207, + "grad_norm": 5.438781339591436, + "learning_rate": 5.900539572869728e-06, + "loss": 17.2216, + "step": 25116 + }, + { + "epoch": 0.4591185771473486, + "grad_norm": 5.51344483745984, + "learning_rate": 5.900248400472786e-06, + "loss": 17.3257, + "step": 25117 + }, + { + "epoch": 0.45913685634379514, + "grad_norm": 6.841229462459413, + "learning_rate": 5.899957224920551e-06, + "loss": 17.4026, + "step": 25118 + }, + { + "epoch": 0.4591551355402417, + "grad_norm": 7.015296903704281, + "learning_rate": 5.899666046214043e-06, + "loss": 17.751, + "step": 25119 + }, + { + "epoch": 0.45917341473668816, + "grad_norm": 6.198221657390064, + "learning_rate": 5.899374864354284e-06, + "loss": 17.2304, + "step": 25120 + }, + { + "epoch": 0.4591916939331347, + "grad_norm": 5.665737824164518, + "learning_rate": 5.899083679342296e-06, + "loss": 17.0227, + "step": 25121 + }, + { + "epoch": 0.45920997312958123, + "grad_norm": 7.453643756704126, + "learning_rate": 5.898792491179096e-06, + "loss": 17.9597, + "step": 25122 + }, + { + "epoch": 0.45922825232602776, + "grad_norm": 8.077937538556672, + "learning_rate": 5.898501299865707e-06, + "loss": 18.109, + "step": 25123 + }, + { + "epoch": 0.4592465315224743, + "grad_norm": 8.610773605697204, + "learning_rate": 5.898210105403147e-06, + "loss": 18.0171, + "step": 25124 + }, + { + "epoch": 0.4592648107189208, + "grad_norm": 6.443037608981733, + "learning_rate": 5.897918907792442e-06, + "loss": 17.3407, + "step": 25125 + }, + { + "epoch": 0.4592830899153673, + "grad_norm": 6.200238922158968, + "learning_rate": 5.897627707034606e-06, + "loss": 17.6508, + "step": 25126 + }, + { + "epoch": 0.45930136911181385, + "grad_norm": 9.187035784712638, + "learning_rate": 5.897336503130664e-06, + "loss": 18.0105, + "step": 25127 + }, + { + "epoch": 0.4593196483082604, + "grad_norm": 7.111063903992415, + "learning_rate": 5.897045296081636e-06, + "loss": 17.4023, + "step": 25128 + }, + { + "epoch": 0.45933792750470687, + "grad_norm": 6.1989517543475054, + "learning_rate": 5.896754085888541e-06, + "loss": 17.3739, + "step": 25129 + }, + { + "epoch": 0.4593562067011534, + "grad_norm": 5.6610742089571335, + "learning_rate": 5.896462872552401e-06, + "loss": 17.3181, + "step": 25130 + }, + { + "epoch": 0.45937448589759994, + "grad_norm": 6.512710182304373, + "learning_rate": 5.896171656074237e-06, + "loss": 17.3586, + "step": 25131 + }, + { + "epoch": 0.4593927650940465, + "grad_norm": 6.210866625513039, + "learning_rate": 5.895880436455068e-06, + "loss": 17.3805, + "step": 25132 + }, + { + "epoch": 0.459411044290493, + "grad_norm": 6.937713023128045, + "learning_rate": 5.895589213695917e-06, + "loss": 17.7345, + "step": 25133 + }, + { + "epoch": 0.4594293234869395, + "grad_norm": 9.909806208021777, + "learning_rate": 5.895297987797803e-06, + "loss": 17.8982, + "step": 25134 + }, + { + "epoch": 0.459447602683386, + "grad_norm": 5.917635441195128, + "learning_rate": 5.895006758761749e-06, + "loss": 17.294, + "step": 25135 + }, + { + "epoch": 0.45946588187983256, + "grad_norm": 6.053944887018149, + "learning_rate": 5.894715526588771e-06, + "loss": 17.3829, + "step": 25136 + }, + { + "epoch": 0.4594841610762791, + "grad_norm": 6.8268018748677575, + "learning_rate": 5.8944242912798935e-06, + "loss": 17.483, + "step": 25137 + }, + { + "epoch": 0.45950244027272563, + "grad_norm": 5.369844108200249, + "learning_rate": 5.894133052836138e-06, + "loss": 17.2398, + "step": 25138 + }, + { + "epoch": 0.4595207194691721, + "grad_norm": 13.63973965013231, + "learning_rate": 5.8938418112585225e-06, + "loss": 18.634, + "step": 25139 + }, + { + "epoch": 0.45953899866561865, + "grad_norm": 5.121883898150083, + "learning_rate": 5.8935505665480695e-06, + "loss": 17.051, + "step": 25140 + }, + { + "epoch": 0.4595572778620652, + "grad_norm": 6.89128015628205, + "learning_rate": 5.893259318705799e-06, + "loss": 17.4457, + "step": 25141 + }, + { + "epoch": 0.4595755570585117, + "grad_norm": 6.989561441812156, + "learning_rate": 5.892968067732731e-06, + "loss": 17.7903, + "step": 25142 + }, + { + "epoch": 0.45959383625495825, + "grad_norm": 5.770252141612089, + "learning_rate": 5.892676813629889e-06, + "loss": 17.1302, + "step": 25143 + }, + { + "epoch": 0.45961211545140473, + "grad_norm": 6.059587407676246, + "learning_rate": 5.892385556398292e-06, + "loss": 17.501, + "step": 25144 + }, + { + "epoch": 0.45963039464785127, + "grad_norm": 6.491129225958094, + "learning_rate": 5.892094296038961e-06, + "loss": 17.6745, + "step": 25145 + }, + { + "epoch": 0.4596486738442978, + "grad_norm": 7.600274561858066, + "learning_rate": 5.891803032552916e-06, + "loss": 17.8503, + "step": 25146 + }, + { + "epoch": 0.45966695304074434, + "grad_norm": 7.128989380532426, + "learning_rate": 5.89151176594118e-06, + "loss": 17.8868, + "step": 25147 + }, + { + "epoch": 0.4596852322371909, + "grad_norm": 6.210565817815751, + "learning_rate": 5.891220496204772e-06, + "loss": 17.5276, + "step": 25148 + }, + { + "epoch": 0.45970351143363736, + "grad_norm": 5.885705935678427, + "learning_rate": 5.890929223344715e-06, + "loss": 17.3678, + "step": 25149 + }, + { + "epoch": 0.4597217906300839, + "grad_norm": 6.667881055100639, + "learning_rate": 5.890637947362027e-06, + "loss": 17.8547, + "step": 25150 + }, + { + "epoch": 0.45974006982653043, + "grad_norm": 6.99481690001039, + "learning_rate": 5.890346668257729e-06, + "loss": 17.7187, + "step": 25151 + }, + { + "epoch": 0.45975834902297696, + "grad_norm": 7.695289062844708, + "learning_rate": 5.890055386032845e-06, + "loss": 18.2169, + "step": 25152 + }, + { + "epoch": 0.4597766282194235, + "grad_norm": 6.9050078947645925, + "learning_rate": 5.889764100688394e-06, + "loss": 17.721, + "step": 25153 + }, + { + "epoch": 0.45979490741587, + "grad_norm": 6.58402216988812, + "learning_rate": 5.8894728122253965e-06, + "loss": 17.4457, + "step": 25154 + }, + { + "epoch": 0.4598131866123165, + "grad_norm": 6.988560929739352, + "learning_rate": 5.889181520644874e-06, + "loss": 17.2455, + "step": 25155 + }, + { + "epoch": 0.45983146580876305, + "grad_norm": 6.206599477481273, + "learning_rate": 5.888890225947848e-06, + "loss": 17.4213, + "step": 25156 + }, + { + "epoch": 0.4598497450052096, + "grad_norm": 5.6008437350115985, + "learning_rate": 5.888598928135338e-06, + "loss": 17.0786, + "step": 25157 + }, + { + "epoch": 0.4598680242016561, + "grad_norm": 7.791893690638781, + "learning_rate": 5.888307627208366e-06, + "loss": 18.0871, + "step": 25158 + }, + { + "epoch": 0.4598863033981026, + "grad_norm": 5.786612455520085, + "learning_rate": 5.888016323167954e-06, + "loss": 17.1307, + "step": 25159 + }, + { + "epoch": 0.45990458259454914, + "grad_norm": 6.026676468863097, + "learning_rate": 5.88772501601512e-06, + "loss": 17.2374, + "step": 25160 + }, + { + "epoch": 0.4599228617909957, + "grad_norm": 9.161378002187389, + "learning_rate": 5.887433705750889e-06, + "loss": 18.0735, + "step": 25161 + }, + { + "epoch": 0.4599411409874422, + "grad_norm": 7.409644025610671, + "learning_rate": 5.887142392376279e-06, + "loss": 17.5688, + "step": 25162 + }, + { + "epoch": 0.4599594201838887, + "grad_norm": 5.614609338578762, + "learning_rate": 5.886851075892311e-06, + "loss": 17.2005, + "step": 25163 + }, + { + "epoch": 0.4599776993803352, + "grad_norm": 6.605912678841178, + "learning_rate": 5.886559756300008e-06, + "loss": 17.5157, + "step": 25164 + }, + { + "epoch": 0.45999597857678176, + "grad_norm": 4.981666970189918, + "learning_rate": 5.886268433600388e-06, + "loss": 16.7706, + "step": 25165 + }, + { + "epoch": 0.4600142577732283, + "grad_norm": 6.723853799327433, + "learning_rate": 5.885977107794477e-06, + "loss": 17.6284, + "step": 25166 + }, + { + "epoch": 0.46003253696967483, + "grad_norm": 4.767315444881347, + "learning_rate": 5.885685778883292e-06, + "loss": 16.9863, + "step": 25167 + }, + { + "epoch": 0.4600508161661213, + "grad_norm": 5.883460762423431, + "learning_rate": 5.885394446867855e-06, + "loss": 17.5723, + "step": 25168 + }, + { + "epoch": 0.46006909536256785, + "grad_norm": 6.7623460547856915, + "learning_rate": 5.885103111749186e-06, + "loss": 17.5193, + "step": 25169 + }, + { + "epoch": 0.4600873745590144, + "grad_norm": 5.578138536650387, + "learning_rate": 5.884811773528309e-06, + "loss": 17.1289, + "step": 25170 + }, + { + "epoch": 0.4601056537554609, + "grad_norm": 7.633891638786964, + "learning_rate": 5.884520432206243e-06, + "loss": 17.5063, + "step": 25171 + }, + { + "epoch": 0.46012393295190746, + "grad_norm": 7.294758059690856, + "learning_rate": 5.88422908778401e-06, + "loss": 17.6058, + "step": 25172 + }, + { + "epoch": 0.46014221214835394, + "grad_norm": 6.53870452695131, + "learning_rate": 5.883937740262631e-06, + "loss": 17.7105, + "step": 25173 + }, + { + "epoch": 0.46016049134480047, + "grad_norm": 7.234411483567285, + "learning_rate": 5.883646389643126e-06, + "loss": 17.8144, + "step": 25174 + }, + { + "epoch": 0.460178770541247, + "grad_norm": 5.87520245327371, + "learning_rate": 5.883355035926518e-06, + "loss": 17.2566, + "step": 25175 + }, + { + "epoch": 0.46019704973769354, + "grad_norm": 7.127310428675387, + "learning_rate": 5.8830636791138265e-06, + "loss": 17.3071, + "step": 25176 + }, + { + "epoch": 0.4602153289341401, + "grad_norm": 7.0579754506303205, + "learning_rate": 5.8827723192060745e-06, + "loss": 17.7469, + "step": 25177 + }, + { + "epoch": 0.46023360813058656, + "grad_norm": 7.4672521690141584, + "learning_rate": 5.882480956204281e-06, + "loss": 18.2015, + "step": 25178 + }, + { + "epoch": 0.4602518873270331, + "grad_norm": 5.607334949849727, + "learning_rate": 5.882189590109468e-06, + "loss": 17.1994, + "step": 25179 + }, + { + "epoch": 0.46027016652347963, + "grad_norm": 6.617843704706007, + "learning_rate": 5.881898220922658e-06, + "loss": 17.7119, + "step": 25180 + }, + { + "epoch": 0.46028844571992616, + "grad_norm": 8.233597757977755, + "learning_rate": 5.881606848644872e-06, + "loss": 18.2433, + "step": 25181 + }, + { + "epoch": 0.4603067249163727, + "grad_norm": 6.630516398476976, + "learning_rate": 5.881315473277129e-06, + "loss": 17.459, + "step": 25182 + }, + { + "epoch": 0.4603250041128192, + "grad_norm": 6.928581051265842, + "learning_rate": 5.881024094820451e-06, + "loss": 17.697, + "step": 25183 + }, + { + "epoch": 0.4603432833092657, + "grad_norm": 6.309244438724938, + "learning_rate": 5.880732713275863e-06, + "loss": 17.4451, + "step": 25184 + }, + { + "epoch": 0.46036156250571225, + "grad_norm": 10.785039141572657, + "learning_rate": 5.880441328644381e-06, + "loss": 17.9353, + "step": 25185 + }, + { + "epoch": 0.4603798417021588, + "grad_norm": 5.6893484008183135, + "learning_rate": 5.880149940927029e-06, + "loss": 17.4092, + "step": 25186 + }, + { + "epoch": 0.4603981208986053, + "grad_norm": 12.42222377509774, + "learning_rate": 5.879858550124827e-06, + "loss": 17.9524, + "step": 25187 + }, + { + "epoch": 0.4604164000950518, + "grad_norm": 6.33427230977597, + "learning_rate": 5.879567156238799e-06, + "loss": 17.2989, + "step": 25188 + }, + { + "epoch": 0.46043467929149834, + "grad_norm": 7.729890168576504, + "learning_rate": 5.879275759269963e-06, + "loss": 18.2629, + "step": 25189 + }, + { + "epoch": 0.4604529584879449, + "grad_norm": 7.295201757649203, + "learning_rate": 5.878984359219343e-06, + "loss": 17.7452, + "step": 25190 + }, + { + "epoch": 0.4604712376843914, + "grad_norm": 6.160423920275361, + "learning_rate": 5.878692956087959e-06, + "loss": 17.3426, + "step": 25191 + }, + { + "epoch": 0.46048951688083795, + "grad_norm": 8.42585663647094, + "learning_rate": 5.87840154987683e-06, + "loss": 18.0182, + "step": 25192 + }, + { + "epoch": 0.4605077960772844, + "grad_norm": 7.534976925029885, + "learning_rate": 5.878110140586981e-06, + "loss": 18.3795, + "step": 25193 + }, + { + "epoch": 0.46052607527373096, + "grad_norm": 7.314838726838559, + "learning_rate": 5.877818728219434e-06, + "loss": 17.5525, + "step": 25194 + }, + { + "epoch": 0.4605443544701775, + "grad_norm": 5.671522377539959, + "learning_rate": 5.877527312775207e-06, + "loss": 17.1415, + "step": 25195 + }, + { + "epoch": 0.46056263366662403, + "grad_norm": 6.277055034352918, + "learning_rate": 5.877235894255323e-06, + "loss": 17.2759, + "step": 25196 + }, + { + "epoch": 0.4605809128630705, + "grad_norm": 5.267296296698131, + "learning_rate": 5.876944472660803e-06, + "loss": 17.0471, + "step": 25197 + }, + { + "epoch": 0.46059919205951705, + "grad_norm": 7.532153553065105, + "learning_rate": 5.87665304799267e-06, + "loss": 18.3739, + "step": 25198 + }, + { + "epoch": 0.4606174712559636, + "grad_norm": 6.211914389122407, + "learning_rate": 5.8763616202519435e-06, + "loss": 17.4008, + "step": 25199 + }, + { + "epoch": 0.4606357504524101, + "grad_norm": 4.745888638877521, + "learning_rate": 5.876070189439645e-06, + "loss": 16.9409, + "step": 25200 + }, + { + "epoch": 0.46065402964885666, + "grad_norm": 7.994165483051935, + "learning_rate": 5.875778755556797e-06, + "loss": 18.1199, + "step": 25201 + }, + { + "epoch": 0.46067230884530314, + "grad_norm": 7.676489971327513, + "learning_rate": 5.8754873186044205e-06, + "loss": 17.8826, + "step": 25202 + }, + { + "epoch": 0.46069058804174967, + "grad_norm": 6.803787940231963, + "learning_rate": 5.875195878583536e-06, + "loss": 17.6777, + "step": 25203 + }, + { + "epoch": 0.4607088672381962, + "grad_norm": 6.612945588168071, + "learning_rate": 5.874904435495168e-06, + "loss": 17.4038, + "step": 25204 + }, + { + "epoch": 0.46072714643464274, + "grad_norm": 5.49287347466228, + "learning_rate": 5.874612989340334e-06, + "loss": 16.9628, + "step": 25205 + }, + { + "epoch": 0.4607454256310893, + "grad_norm": 6.219369992367084, + "learning_rate": 5.874321540120057e-06, + "loss": 17.4567, + "step": 25206 + }, + { + "epoch": 0.46076370482753576, + "grad_norm": 5.8198794907322515, + "learning_rate": 5.87403008783536e-06, + "loss": 17.366, + "step": 25207 + }, + { + "epoch": 0.4607819840239823, + "grad_norm": 7.891903307016829, + "learning_rate": 5.873738632487265e-06, + "loss": 18.1387, + "step": 25208 + }, + { + "epoch": 0.46080026322042883, + "grad_norm": 5.462459582829687, + "learning_rate": 5.873447174076789e-06, + "loss": 16.8464, + "step": 25209 + }, + { + "epoch": 0.46081854241687537, + "grad_norm": 5.379997424751426, + "learning_rate": 5.873155712604956e-06, + "loss": 16.9951, + "step": 25210 + }, + { + "epoch": 0.4608368216133219, + "grad_norm": 5.334205765095013, + "learning_rate": 5.8728642480727915e-06, + "loss": 17.0456, + "step": 25211 + }, + { + "epoch": 0.4608551008097684, + "grad_norm": 6.22783642163609, + "learning_rate": 5.8725727804813115e-06, + "loss": 17.4138, + "step": 25212 + }, + { + "epoch": 0.4608733800062149, + "grad_norm": 4.8363785858796575, + "learning_rate": 5.87228130983154e-06, + "loss": 16.8598, + "step": 25213 + }, + { + "epoch": 0.46089165920266145, + "grad_norm": 7.366645808860496, + "learning_rate": 5.871989836124498e-06, + "loss": 17.8513, + "step": 25214 + }, + { + "epoch": 0.460909938399108, + "grad_norm": 6.301782432938622, + "learning_rate": 5.871698359361207e-06, + "loss": 17.4138, + "step": 25215 + }, + { + "epoch": 0.4609282175955545, + "grad_norm": 6.958742772704289, + "learning_rate": 5.871406879542688e-06, + "loss": 17.8689, + "step": 25216 + }, + { + "epoch": 0.460946496792001, + "grad_norm": 6.499677068371723, + "learning_rate": 5.871115396669965e-06, + "loss": 17.2869, + "step": 25217 + }, + { + "epoch": 0.46096477598844754, + "grad_norm": 6.879665208187112, + "learning_rate": 5.870823910744059e-06, + "loss": 17.3627, + "step": 25218 + }, + { + "epoch": 0.4609830551848941, + "grad_norm": 7.381367835499502, + "learning_rate": 5.8705324217659886e-06, + "loss": 17.6932, + "step": 25219 + }, + { + "epoch": 0.4610013343813406, + "grad_norm": 5.87604450242848, + "learning_rate": 5.870240929736778e-06, + "loss": 17.2681, + "step": 25220 + }, + { + "epoch": 0.46101961357778715, + "grad_norm": 7.899233988968155, + "learning_rate": 5.869949434657449e-06, + "loss": 17.809, + "step": 25221 + }, + { + "epoch": 0.4610378927742336, + "grad_norm": 5.016263511686514, + "learning_rate": 5.869657936529023e-06, + "loss": 16.9404, + "step": 25222 + }, + { + "epoch": 0.46105617197068016, + "grad_norm": 6.545002506397308, + "learning_rate": 5.869366435352521e-06, + "loss": 17.3206, + "step": 25223 + }, + { + "epoch": 0.4610744511671267, + "grad_norm": 7.149939246215548, + "learning_rate": 5.869074931128964e-06, + "loss": 17.4842, + "step": 25224 + }, + { + "epoch": 0.46109273036357323, + "grad_norm": 6.060316208116345, + "learning_rate": 5.868783423859378e-06, + "loss": 17.4576, + "step": 25225 + }, + { + "epoch": 0.46111100956001977, + "grad_norm": 5.562265651496669, + "learning_rate": 5.868491913544779e-06, + "loss": 17.0999, + "step": 25226 + }, + { + "epoch": 0.46112928875646625, + "grad_norm": 6.356959184287293, + "learning_rate": 5.868200400186191e-06, + "loss": 17.4441, + "step": 25227 + }, + { + "epoch": 0.4611475679529128, + "grad_norm": 6.443925494825827, + "learning_rate": 5.867908883784637e-06, + "loss": 17.124, + "step": 25228 + }, + { + "epoch": 0.4611658471493593, + "grad_norm": 6.061029860344893, + "learning_rate": 5.867617364341137e-06, + "loss": 17.1974, + "step": 25229 + }, + { + "epoch": 0.46118412634580586, + "grad_norm": 5.775275949768164, + "learning_rate": 5.8673258418567134e-06, + "loss": 17.0047, + "step": 25230 + }, + { + "epoch": 0.46120240554225234, + "grad_norm": 5.633981491550323, + "learning_rate": 5.867034316332389e-06, + "loss": 17.2366, + "step": 25231 + }, + { + "epoch": 0.46122068473869887, + "grad_norm": 6.859929030125392, + "learning_rate": 5.8667427877691825e-06, + "loss": 17.5946, + "step": 25232 + }, + { + "epoch": 0.4612389639351454, + "grad_norm": 7.569064890538536, + "learning_rate": 5.86645125616812e-06, + "loss": 17.7965, + "step": 25233 + }, + { + "epoch": 0.46125724313159194, + "grad_norm": 6.642838065807771, + "learning_rate": 5.86615972153022e-06, + "loss": 17.7194, + "step": 25234 + }, + { + "epoch": 0.4612755223280385, + "grad_norm": 6.321309871406881, + "learning_rate": 5.8658681838565065e-06, + "loss": 17.1463, + "step": 25235 + }, + { + "epoch": 0.46129380152448496, + "grad_norm": 6.560191692175135, + "learning_rate": 5.865576643147999e-06, + "loss": 17.7838, + "step": 25236 + }, + { + "epoch": 0.4613120807209315, + "grad_norm": 10.608667819433098, + "learning_rate": 5.8652850994057184e-06, + "loss": 17.9546, + "step": 25237 + }, + { + "epoch": 0.46133035991737803, + "grad_norm": 5.644111811817615, + "learning_rate": 5.8649935526306915e-06, + "loss": 17.4048, + "step": 25238 + }, + { + "epoch": 0.46134863911382457, + "grad_norm": 5.340542725307962, + "learning_rate": 5.864702002823938e-06, + "loss": 17.1213, + "step": 25239 + }, + { + "epoch": 0.4613669183102711, + "grad_norm": 6.584077175225929, + "learning_rate": 5.864410449986478e-06, + "loss": 17.4067, + "step": 25240 + }, + { + "epoch": 0.4613851975067176, + "grad_norm": 7.2018494393832295, + "learning_rate": 5.864118894119333e-06, + "loss": 17.9102, + "step": 25241 + }, + { + "epoch": 0.4614034767031641, + "grad_norm": 5.720497312017708, + "learning_rate": 5.863827335223526e-06, + "loss": 17.1027, + "step": 25242 + }, + { + "epoch": 0.46142175589961065, + "grad_norm": 6.62557114176098, + "learning_rate": 5.863535773300081e-06, + "loss": 17.6402, + "step": 25243 + }, + { + "epoch": 0.4614400350960572, + "grad_norm": 6.843530139692724, + "learning_rate": 5.863244208350017e-06, + "loss": 17.7313, + "step": 25244 + }, + { + "epoch": 0.4614583142925037, + "grad_norm": 6.098550955711443, + "learning_rate": 5.862952640374358e-06, + "loss": 17.4683, + "step": 25245 + }, + { + "epoch": 0.4614765934889502, + "grad_norm": 5.8955470014577465, + "learning_rate": 5.862661069374123e-06, + "loss": 17.3704, + "step": 25246 + }, + { + "epoch": 0.46149487268539674, + "grad_norm": 5.348734500097625, + "learning_rate": 5.862369495350337e-06, + "loss": 16.8836, + "step": 25247 + }, + { + "epoch": 0.4615131518818433, + "grad_norm": 7.734146139843284, + "learning_rate": 5.862077918304021e-06, + "loss": 18.1261, + "step": 25248 + }, + { + "epoch": 0.4615314310782898, + "grad_norm": 6.757837059362166, + "learning_rate": 5.861786338236198e-06, + "loss": 17.9775, + "step": 25249 + }, + { + "epoch": 0.46154971027473635, + "grad_norm": 7.728993264104784, + "learning_rate": 5.861494755147887e-06, + "loss": 18.1239, + "step": 25250 + }, + { + "epoch": 0.4615679894711828, + "grad_norm": 5.6011239945637, + "learning_rate": 5.86120316904011e-06, + "loss": 17.2771, + "step": 25251 + }, + { + "epoch": 0.46158626866762936, + "grad_norm": 6.037583735673573, + "learning_rate": 5.8609115799138925e-06, + "loss": 17.5366, + "step": 25252 + }, + { + "epoch": 0.4616045478640759, + "grad_norm": 7.21085759175853, + "learning_rate": 5.860619987770256e-06, + "loss": 18.182, + "step": 25253 + }, + { + "epoch": 0.46162282706052243, + "grad_norm": 6.658784762118231, + "learning_rate": 5.86032839261022e-06, + "loss": 17.7426, + "step": 25254 + }, + { + "epoch": 0.46164110625696897, + "grad_norm": 5.8194026285300415, + "learning_rate": 5.860036794434807e-06, + "loss": 17.4987, + "step": 25255 + }, + { + "epoch": 0.46165938545341545, + "grad_norm": 6.869381633538686, + "learning_rate": 5.85974519324504e-06, + "loss": 17.8578, + "step": 25256 + }, + { + "epoch": 0.461677664649862, + "grad_norm": 6.092164871274417, + "learning_rate": 5.8594535890419405e-06, + "loss": 17.3048, + "step": 25257 + }, + { + "epoch": 0.4616959438463085, + "grad_norm": 5.755045555872947, + "learning_rate": 5.859161981826531e-06, + "loss": 17.2976, + "step": 25258 + }, + { + "epoch": 0.46171422304275506, + "grad_norm": 6.588632099795179, + "learning_rate": 5.858870371599833e-06, + "loss": 17.2774, + "step": 25259 + }, + { + "epoch": 0.4617325022392016, + "grad_norm": 6.464079790384099, + "learning_rate": 5.858578758362869e-06, + "loss": 17.6903, + "step": 25260 + }, + { + "epoch": 0.46175078143564807, + "grad_norm": 6.743054818292839, + "learning_rate": 5.858287142116661e-06, + "loss": 17.5171, + "step": 25261 + }, + { + "epoch": 0.4617690606320946, + "grad_norm": 8.15978190782185, + "learning_rate": 5.8579955228622305e-06, + "loss": 17.9251, + "step": 25262 + }, + { + "epoch": 0.46178733982854114, + "grad_norm": 7.962239552499945, + "learning_rate": 5.857703900600602e-06, + "loss": 17.6902, + "step": 25263 + }, + { + "epoch": 0.4618056190249877, + "grad_norm": 6.622267708487954, + "learning_rate": 5.857412275332795e-06, + "loss": 17.5825, + "step": 25264 + }, + { + "epoch": 0.46182389822143416, + "grad_norm": 6.302864673146543, + "learning_rate": 5.8571206470598304e-06, + "loss": 17.3838, + "step": 25265 + }, + { + "epoch": 0.4618421774178807, + "grad_norm": 6.524106147421126, + "learning_rate": 5.856829015782734e-06, + "loss": 17.4938, + "step": 25266 + }, + { + "epoch": 0.46186045661432723, + "grad_norm": 6.8560153214061295, + "learning_rate": 5.856537381502527e-06, + "loss": 17.8109, + "step": 25267 + }, + { + "epoch": 0.46187873581077377, + "grad_norm": 5.7603696737380785, + "learning_rate": 5.85624574422023e-06, + "loss": 17.306, + "step": 25268 + }, + { + "epoch": 0.4618970150072203, + "grad_norm": 6.245738680757659, + "learning_rate": 5.8559541039368654e-06, + "loss": 17.4456, + "step": 25269 + }, + { + "epoch": 0.4619152942036668, + "grad_norm": 9.23885331020808, + "learning_rate": 5.855662460653457e-06, + "loss": 17.7326, + "step": 25270 + }, + { + "epoch": 0.4619335734001133, + "grad_norm": 4.698139770850545, + "learning_rate": 5.855370814371024e-06, + "loss": 16.9017, + "step": 25271 + }, + { + "epoch": 0.46195185259655985, + "grad_norm": 6.815264462425667, + "learning_rate": 5.8550791650905925e-06, + "loss": 17.996, + "step": 25272 + }, + { + "epoch": 0.4619701317930064, + "grad_norm": 5.414622819904023, + "learning_rate": 5.854787512813183e-06, + "loss": 16.9644, + "step": 25273 + }, + { + "epoch": 0.4619884109894529, + "grad_norm": 8.818899491645826, + "learning_rate": 5.854495857539816e-06, + "loss": 18.2126, + "step": 25274 + }, + { + "epoch": 0.4620066901858994, + "grad_norm": 8.150483084597855, + "learning_rate": 5.854204199271515e-06, + "loss": 17.9843, + "step": 25275 + }, + { + "epoch": 0.46202496938234594, + "grad_norm": 7.73954821080085, + "learning_rate": 5.853912538009303e-06, + "loss": 18.2651, + "step": 25276 + }, + { + "epoch": 0.4620432485787925, + "grad_norm": 6.402496847467016, + "learning_rate": 5.853620873754202e-06, + "loss": 17.48, + "step": 25277 + }, + { + "epoch": 0.462061527775239, + "grad_norm": 6.084215762293935, + "learning_rate": 5.853329206507234e-06, + "loss": 17.6089, + "step": 25278 + }, + { + "epoch": 0.46207980697168555, + "grad_norm": 7.669596020238035, + "learning_rate": 5.85303753626942e-06, + "loss": 18.0464, + "step": 25279 + }, + { + "epoch": 0.462098086168132, + "grad_norm": 7.3662375825774316, + "learning_rate": 5.852745863041786e-06, + "loss": 17.9197, + "step": 25280 + }, + { + "epoch": 0.46211636536457856, + "grad_norm": 7.026272663659752, + "learning_rate": 5.85245418682535e-06, + "loss": 17.8691, + "step": 25281 + }, + { + "epoch": 0.4621346445610251, + "grad_norm": 5.535689058701501, + "learning_rate": 5.852162507621135e-06, + "loss": 17.3449, + "step": 25282 + }, + { + "epoch": 0.46215292375747163, + "grad_norm": 6.74302090668274, + "learning_rate": 5.851870825430165e-06, + "loss": 17.4523, + "step": 25283 + }, + { + "epoch": 0.46217120295391817, + "grad_norm": 6.12401175446677, + "learning_rate": 5.851579140253463e-06, + "loss": 17.1992, + "step": 25284 + }, + { + "epoch": 0.46218948215036465, + "grad_norm": 5.439906872526443, + "learning_rate": 5.851287452092048e-06, + "loss": 17.3654, + "step": 25285 + }, + { + "epoch": 0.4622077613468112, + "grad_norm": 7.301342799027574, + "learning_rate": 5.850995760946946e-06, + "loss": 17.372, + "step": 25286 + }, + { + "epoch": 0.4622260405432577, + "grad_norm": 7.031633964986034, + "learning_rate": 5.850704066819177e-06, + "loss": 17.9206, + "step": 25287 + }, + { + "epoch": 0.46224431973970426, + "grad_norm": 5.336220084573948, + "learning_rate": 5.850412369709764e-06, + "loss": 17.0533, + "step": 25288 + }, + { + "epoch": 0.4622625989361508, + "grad_norm": 5.539355997825748, + "learning_rate": 5.8501206696197296e-06, + "loss": 17.2112, + "step": 25289 + }, + { + "epoch": 0.4622808781325973, + "grad_norm": 7.1189937992224275, + "learning_rate": 5.849828966550098e-06, + "loss": 17.7607, + "step": 25290 + }, + { + "epoch": 0.4622991573290438, + "grad_norm": 6.133941928886113, + "learning_rate": 5.849537260501886e-06, + "loss": 17.5482, + "step": 25291 + }, + { + "epoch": 0.46231743652549034, + "grad_norm": 6.225187836437039, + "learning_rate": 5.849245551476122e-06, + "loss": 17.4509, + "step": 25292 + }, + { + "epoch": 0.4623357157219369, + "grad_norm": 7.658657985499206, + "learning_rate": 5.8489538394738245e-06, + "loss": 17.999, + "step": 25293 + }, + { + "epoch": 0.4623539949183834, + "grad_norm": 5.98802306611786, + "learning_rate": 5.84866212449602e-06, + "loss": 17.285, + "step": 25294 + }, + { + "epoch": 0.4623722741148299, + "grad_norm": 7.570375819435523, + "learning_rate": 5.848370406543727e-06, + "loss": 18.1253, + "step": 25295 + }, + { + "epoch": 0.46239055331127643, + "grad_norm": 8.033185384736198, + "learning_rate": 5.848078685617967e-06, + "loss": 18.0514, + "step": 25296 + }, + { + "epoch": 0.46240883250772297, + "grad_norm": 6.587730052328041, + "learning_rate": 5.847786961719768e-06, + "loss": 17.3455, + "step": 25297 + }, + { + "epoch": 0.4624271117041695, + "grad_norm": 7.186198500662436, + "learning_rate": 5.847495234850148e-06, + "loss": 18.0893, + "step": 25298 + }, + { + "epoch": 0.462445390900616, + "grad_norm": 6.613017319394958, + "learning_rate": 5.8472035050101305e-06, + "loss": 17.8159, + "step": 25299 + }, + { + "epoch": 0.4624636700970625, + "grad_norm": 7.119548517905186, + "learning_rate": 5.846911772200738e-06, + "loss": 18.079, + "step": 25300 + }, + { + "epoch": 0.46248194929350905, + "grad_norm": 5.0024120341206295, + "learning_rate": 5.846620036422994e-06, + "loss": 16.9131, + "step": 25301 + }, + { + "epoch": 0.4625002284899556, + "grad_norm": 5.990592914536484, + "learning_rate": 5.84632829767792e-06, + "loss": 17.4368, + "step": 25302 + }, + { + "epoch": 0.4625185076864021, + "grad_norm": 6.52735317668258, + "learning_rate": 5.8460365559665385e-06, + "loss": 17.2994, + "step": 25303 + }, + { + "epoch": 0.4625367868828486, + "grad_norm": 6.455628074021686, + "learning_rate": 5.845744811289874e-06, + "loss": 17.6648, + "step": 25304 + }, + { + "epoch": 0.46255506607929514, + "grad_norm": 7.138957096937389, + "learning_rate": 5.845453063648945e-06, + "loss": 17.9178, + "step": 25305 + }, + { + "epoch": 0.4625733452757417, + "grad_norm": 6.196540200439552, + "learning_rate": 5.845161313044777e-06, + "loss": 17.5285, + "step": 25306 + }, + { + "epoch": 0.4625916244721882, + "grad_norm": 6.210113427208343, + "learning_rate": 5.844869559478392e-06, + "loss": 17.61, + "step": 25307 + }, + { + "epoch": 0.46260990366863475, + "grad_norm": 7.13312957663764, + "learning_rate": 5.844577802950815e-06, + "loss": 17.6366, + "step": 25308 + }, + { + "epoch": 0.46262818286508123, + "grad_norm": 6.990945895824653, + "learning_rate": 5.844286043463063e-06, + "loss": 17.8473, + "step": 25309 + }, + { + "epoch": 0.46264646206152776, + "grad_norm": 5.8883079948583665, + "learning_rate": 5.843994281016161e-06, + "loss": 17.3688, + "step": 25310 + }, + { + "epoch": 0.4626647412579743, + "grad_norm": 6.139783268220673, + "learning_rate": 5.843702515611136e-06, + "loss": 17.429, + "step": 25311 + }, + { + "epoch": 0.46268302045442083, + "grad_norm": 6.756691423272204, + "learning_rate": 5.843410747249004e-06, + "loss": 17.6165, + "step": 25312 + }, + { + "epoch": 0.46270129965086737, + "grad_norm": 6.564538204662522, + "learning_rate": 5.843118975930792e-06, + "loss": 17.2877, + "step": 25313 + }, + { + "epoch": 0.46271957884731385, + "grad_norm": 5.636229527122742, + "learning_rate": 5.8428272016575196e-06, + "loss": 17.2892, + "step": 25314 + }, + { + "epoch": 0.4627378580437604, + "grad_norm": 6.562940087437146, + "learning_rate": 5.8425354244302116e-06, + "loss": 17.2753, + "step": 25315 + }, + { + "epoch": 0.4627561372402069, + "grad_norm": 5.201640249630994, + "learning_rate": 5.84224364424989e-06, + "loss": 17.1833, + "step": 25316 + }, + { + "epoch": 0.46277441643665346, + "grad_norm": 6.451804603219493, + "learning_rate": 5.841951861117578e-06, + "loss": 17.3407, + "step": 25317 + }, + { + "epoch": 0.4627926956331, + "grad_norm": 6.161920593051882, + "learning_rate": 5.8416600750342985e-06, + "loss": 17.5662, + "step": 25318 + }, + { + "epoch": 0.4628109748295465, + "grad_norm": 6.2909873660523905, + "learning_rate": 5.8413682860010715e-06, + "loss": 17.4847, + "step": 25319 + }, + { + "epoch": 0.462829254025993, + "grad_norm": 6.678807603183959, + "learning_rate": 5.841076494018922e-06, + "loss": 17.5152, + "step": 25320 + }, + { + "epoch": 0.46284753322243954, + "grad_norm": 6.201930651209116, + "learning_rate": 5.840784699088873e-06, + "loss": 17.5612, + "step": 25321 + }, + { + "epoch": 0.4628658124188861, + "grad_norm": 6.736141269907265, + "learning_rate": 5.840492901211949e-06, + "loss": 17.7945, + "step": 25322 + }, + { + "epoch": 0.4628840916153326, + "grad_norm": 6.219596559882062, + "learning_rate": 5.8402011003891665e-06, + "loss": 17.289, + "step": 25323 + }, + { + "epoch": 0.4629023708117791, + "grad_norm": 6.894170696110101, + "learning_rate": 5.839909296621553e-06, + "loss": 17.5829, + "step": 25324 + }, + { + "epoch": 0.46292065000822563, + "grad_norm": 6.866118240618525, + "learning_rate": 5.839617489910132e-06, + "loss": 17.7614, + "step": 25325 + }, + { + "epoch": 0.46293892920467217, + "grad_norm": 7.584770837758176, + "learning_rate": 5.839325680255923e-06, + "loss": 18.1404, + "step": 25326 + }, + { + "epoch": 0.4629572084011187, + "grad_norm": 6.497741617928724, + "learning_rate": 5.839033867659951e-06, + "loss": 17.4433, + "step": 25327 + }, + { + "epoch": 0.46297548759756524, + "grad_norm": 6.794961528294284, + "learning_rate": 5.8387420521232375e-06, + "loss": 17.5179, + "step": 25328 + }, + { + "epoch": 0.4629937667940117, + "grad_norm": 6.43904306414136, + "learning_rate": 5.8384502336468065e-06, + "loss": 17.4582, + "step": 25329 + }, + { + "epoch": 0.46301204599045825, + "grad_norm": 9.384331646384522, + "learning_rate": 5.838158412231679e-06, + "loss": 18.4846, + "step": 25330 + }, + { + "epoch": 0.4630303251869048, + "grad_norm": 7.290277001149687, + "learning_rate": 5.83786658787888e-06, + "loss": 17.895, + "step": 25331 + }, + { + "epoch": 0.4630486043833513, + "grad_norm": 7.414494454667557, + "learning_rate": 5.8375747605894305e-06, + "loss": 17.7033, + "step": 25332 + }, + { + "epoch": 0.4630668835797978, + "grad_norm": 5.793884845650024, + "learning_rate": 5.837282930364355e-06, + "loss": 17.3952, + "step": 25333 + }, + { + "epoch": 0.46308516277624434, + "grad_norm": 7.216531494080016, + "learning_rate": 5.836991097204676e-06, + "loss": 18.011, + "step": 25334 + }, + { + "epoch": 0.4631034419726909, + "grad_norm": 6.921636301413677, + "learning_rate": 5.836699261111416e-06, + "loss": 17.6392, + "step": 25335 + }, + { + "epoch": 0.4631217211691374, + "grad_norm": 6.148023916485857, + "learning_rate": 5.836407422085597e-06, + "loss": 17.5761, + "step": 25336 + }, + { + "epoch": 0.46314000036558395, + "grad_norm": 6.454094203469489, + "learning_rate": 5.836115580128241e-06, + "loss": 17.5949, + "step": 25337 + }, + { + "epoch": 0.46315827956203043, + "grad_norm": 7.478081110918355, + "learning_rate": 5.835823735240374e-06, + "loss": 17.8644, + "step": 25338 + }, + { + "epoch": 0.46317655875847696, + "grad_norm": 6.6794437943755565, + "learning_rate": 5.835531887423018e-06, + "loss": 17.6262, + "step": 25339 + }, + { + "epoch": 0.4631948379549235, + "grad_norm": 6.636371695461813, + "learning_rate": 5.835240036677195e-06, + "loss": 17.4355, + "step": 25340 + }, + { + "epoch": 0.46321311715137004, + "grad_norm": 6.4468585853660105, + "learning_rate": 5.834948183003927e-06, + "loss": 17.5162, + "step": 25341 + }, + { + "epoch": 0.46323139634781657, + "grad_norm": 6.415564959924538, + "learning_rate": 5.8346563264042376e-06, + "loss": 17.4264, + "step": 25342 + }, + { + "epoch": 0.46324967554426305, + "grad_norm": 6.0162155921609255, + "learning_rate": 5.8343644668791525e-06, + "loss": 17.5026, + "step": 25343 + }, + { + "epoch": 0.4632679547407096, + "grad_norm": 5.887120949260308, + "learning_rate": 5.83407260442969e-06, + "loss": 17.2072, + "step": 25344 + }, + { + "epoch": 0.4632862339371561, + "grad_norm": 7.261838044243021, + "learning_rate": 5.833780739056877e-06, + "loss": 17.6527, + "step": 25345 + }, + { + "epoch": 0.46330451313360266, + "grad_norm": 8.076103650438975, + "learning_rate": 5.833488870761734e-06, + "loss": 18.1868, + "step": 25346 + }, + { + "epoch": 0.4633227923300492, + "grad_norm": 6.297252061462196, + "learning_rate": 5.833196999545285e-06, + "loss": 17.7289, + "step": 25347 + }, + { + "epoch": 0.4633410715264957, + "grad_norm": 7.313819585494033, + "learning_rate": 5.832905125408553e-06, + "loss": 18.4109, + "step": 25348 + }, + { + "epoch": 0.4633593507229422, + "grad_norm": 7.601253779940887, + "learning_rate": 5.832613248352562e-06, + "loss": 17.9015, + "step": 25349 + }, + { + "epoch": 0.46337762991938874, + "grad_norm": 6.179775210548726, + "learning_rate": 5.832321368378333e-06, + "loss": 17.3932, + "step": 25350 + }, + { + "epoch": 0.4633959091158353, + "grad_norm": 6.334473440110927, + "learning_rate": 5.832029485486888e-06, + "loss": 17.4904, + "step": 25351 + }, + { + "epoch": 0.4634141883122818, + "grad_norm": 6.247934125522594, + "learning_rate": 5.831737599679254e-06, + "loss": 17.3286, + "step": 25352 + }, + { + "epoch": 0.4634324675087283, + "grad_norm": 7.355119546749972, + "learning_rate": 5.831445710956452e-06, + "loss": 18.326, + "step": 25353 + }, + { + "epoch": 0.46345074670517483, + "grad_norm": 6.091327128978012, + "learning_rate": 5.831153819319504e-06, + "loss": 17.6493, + "step": 25354 + }, + { + "epoch": 0.46346902590162137, + "grad_norm": 6.971561419558052, + "learning_rate": 5.830861924769433e-06, + "loss": 17.8274, + "step": 25355 + }, + { + "epoch": 0.4634873050980679, + "grad_norm": 5.619793787453107, + "learning_rate": 5.830570027307265e-06, + "loss": 17.236, + "step": 25356 + }, + { + "epoch": 0.46350558429451444, + "grad_norm": 5.792039180591749, + "learning_rate": 5.83027812693402e-06, + "loss": 17.3111, + "step": 25357 + }, + { + "epoch": 0.4635238634909609, + "grad_norm": 7.829832211260318, + "learning_rate": 5.829986223650722e-06, + "loss": 18.3417, + "step": 25358 + }, + { + "epoch": 0.46354214268740745, + "grad_norm": 6.0999711054193675, + "learning_rate": 5.8296943174583955e-06, + "loss": 17.3464, + "step": 25359 + }, + { + "epoch": 0.463560421883854, + "grad_norm": 6.605675552514442, + "learning_rate": 5.829402408358061e-06, + "loss": 17.3732, + "step": 25360 + }, + { + "epoch": 0.4635787010803005, + "grad_norm": 5.863169034171898, + "learning_rate": 5.829110496350744e-06, + "loss": 17.1284, + "step": 25361 + }, + { + "epoch": 0.46359698027674706, + "grad_norm": 7.091978667450804, + "learning_rate": 5.828818581437467e-06, + "loss": 17.7498, + "step": 25362 + }, + { + "epoch": 0.46361525947319354, + "grad_norm": 6.241537816369766, + "learning_rate": 5.828526663619253e-06, + "loss": 17.4417, + "step": 25363 + }, + { + "epoch": 0.4636335386696401, + "grad_norm": 7.3549456243561, + "learning_rate": 5.8282347428971235e-06, + "loss": 17.5559, + "step": 25364 + }, + { + "epoch": 0.4636518178660866, + "grad_norm": 5.578223423588588, + "learning_rate": 5.8279428192721035e-06, + "loss": 16.9686, + "step": 25365 + }, + { + "epoch": 0.46367009706253315, + "grad_norm": 7.081109685546922, + "learning_rate": 5.8276508927452165e-06, + "loss": 17.5608, + "step": 25366 + }, + { + "epoch": 0.46368837625897963, + "grad_norm": 5.8333112989986144, + "learning_rate": 5.827358963317485e-06, + "loss": 17.3277, + "step": 25367 + }, + { + "epoch": 0.46370665545542616, + "grad_norm": 6.273552577904575, + "learning_rate": 5.827067030989931e-06, + "loss": 17.3467, + "step": 25368 + }, + { + "epoch": 0.4637249346518727, + "grad_norm": 5.6332370903147675, + "learning_rate": 5.826775095763578e-06, + "loss": 17.3149, + "step": 25369 + }, + { + "epoch": 0.46374321384831924, + "grad_norm": 5.125673178882156, + "learning_rate": 5.826483157639453e-06, + "loss": 16.8984, + "step": 25370 + }, + { + "epoch": 0.46376149304476577, + "grad_norm": 5.542666792038269, + "learning_rate": 5.826191216618574e-06, + "loss": 17.3516, + "step": 25371 + }, + { + "epoch": 0.46377977224121225, + "grad_norm": 5.973093200492799, + "learning_rate": 5.825899272701968e-06, + "loss": 17.3629, + "step": 25372 + }, + { + "epoch": 0.4637980514376588, + "grad_norm": 7.58868573698555, + "learning_rate": 5.825607325890655e-06, + "loss": 17.8242, + "step": 25373 + }, + { + "epoch": 0.4638163306341053, + "grad_norm": 6.26002998800929, + "learning_rate": 5.825315376185662e-06, + "loss": 17.1687, + "step": 25374 + }, + { + "epoch": 0.46383460983055186, + "grad_norm": 5.446282005113881, + "learning_rate": 5.825023423588009e-06, + "loss": 17.0268, + "step": 25375 + }, + { + "epoch": 0.4638528890269984, + "grad_norm": 6.700845822706817, + "learning_rate": 5.82473146809872e-06, + "loss": 17.542, + "step": 25376 + }, + { + "epoch": 0.4638711682234449, + "grad_norm": 5.392900742683065, + "learning_rate": 5.82443950971882e-06, + "loss": 16.9712, + "step": 25377 + }, + { + "epoch": 0.4638894474198914, + "grad_norm": 8.590892151220999, + "learning_rate": 5.824147548449329e-06, + "loss": 18.2489, + "step": 25378 + }, + { + "epoch": 0.46390772661633795, + "grad_norm": 6.090399025769546, + "learning_rate": 5.823855584291274e-06, + "loss": 17.465, + "step": 25379 + }, + { + "epoch": 0.4639260058127845, + "grad_norm": 7.01374334481513, + "learning_rate": 5.823563617245678e-06, + "loss": 17.891, + "step": 25380 + }, + { + "epoch": 0.463944285009231, + "grad_norm": 8.63108118246894, + "learning_rate": 5.8232716473135605e-06, + "loss": 17.4832, + "step": 25381 + }, + { + "epoch": 0.4639625642056775, + "grad_norm": 6.861777262285128, + "learning_rate": 5.8229796744959485e-06, + "loss": 17.8547, + "step": 25382 + }, + { + "epoch": 0.46398084340212403, + "grad_norm": 6.175446626925928, + "learning_rate": 5.822687698793863e-06, + "loss": 17.1716, + "step": 25383 + }, + { + "epoch": 0.46399912259857057, + "grad_norm": 4.995754632381177, + "learning_rate": 5.82239572020833e-06, + "loss": 16.9765, + "step": 25384 + }, + { + "epoch": 0.4640174017950171, + "grad_norm": 6.297137853896752, + "learning_rate": 5.8221037387403715e-06, + "loss": 17.3626, + "step": 25385 + }, + { + "epoch": 0.46403568099146364, + "grad_norm": 5.997894405928749, + "learning_rate": 5.82181175439101e-06, + "loss": 17.3152, + "step": 25386 + }, + { + "epoch": 0.4640539601879101, + "grad_norm": 9.412705800128995, + "learning_rate": 5.821519767161269e-06, + "loss": 17.9353, + "step": 25387 + }, + { + "epoch": 0.46407223938435666, + "grad_norm": 5.846131578774165, + "learning_rate": 5.821227777052173e-06, + "loss": 17.7164, + "step": 25388 + }, + { + "epoch": 0.4640905185808032, + "grad_norm": 6.777301688189443, + "learning_rate": 5.820935784064745e-06, + "loss": 17.3869, + "step": 25389 + }, + { + "epoch": 0.4641087977772497, + "grad_norm": 7.3930174262124675, + "learning_rate": 5.820643788200009e-06, + "loss": 17.6377, + "step": 25390 + }, + { + "epoch": 0.46412707697369626, + "grad_norm": 6.179387443757268, + "learning_rate": 5.8203517894589865e-06, + "loss": 17.3686, + "step": 25391 + }, + { + "epoch": 0.46414535617014274, + "grad_norm": 6.3682285265058525, + "learning_rate": 5.820059787842702e-06, + "loss": 17.4647, + "step": 25392 + }, + { + "epoch": 0.4641636353665893, + "grad_norm": 6.687257072835672, + "learning_rate": 5.8197677833521805e-06, + "loss": 17.3832, + "step": 25393 + }, + { + "epoch": 0.4641819145630358, + "grad_norm": 8.697887815710882, + "learning_rate": 5.819475775988445e-06, + "loss": 18.3869, + "step": 25394 + }, + { + "epoch": 0.46420019375948235, + "grad_norm": 6.373782285258622, + "learning_rate": 5.819183765752516e-06, + "loss": 17.4581, + "step": 25395 + }, + { + "epoch": 0.4642184729559289, + "grad_norm": 5.93386617840029, + "learning_rate": 5.818891752645418e-06, + "loss": 17.1143, + "step": 25396 + }, + { + "epoch": 0.46423675215237536, + "grad_norm": 5.622582804489323, + "learning_rate": 5.818599736668178e-06, + "loss": 17.1578, + "step": 25397 + }, + { + "epoch": 0.4642550313488219, + "grad_norm": 5.715015822660491, + "learning_rate": 5.8183077178218166e-06, + "loss": 17.1295, + "step": 25398 + }, + { + "epoch": 0.46427331054526844, + "grad_norm": 6.563659242416588, + "learning_rate": 5.8180156961073566e-06, + "loss": 17.4553, + "step": 25399 + }, + { + "epoch": 0.46429158974171497, + "grad_norm": 5.851177216489818, + "learning_rate": 5.817723671525822e-06, + "loss": 17.2711, + "step": 25400 + }, + { + "epoch": 0.46430986893816145, + "grad_norm": 7.7478468610353, + "learning_rate": 5.8174316440782375e-06, + "loss": 17.9985, + "step": 25401 + }, + { + "epoch": 0.464328148134608, + "grad_norm": 7.663742962480571, + "learning_rate": 5.8171396137656265e-06, + "loss": 18.1791, + "step": 25402 + }, + { + "epoch": 0.4643464273310545, + "grad_norm": 5.864716069555697, + "learning_rate": 5.8168475805890125e-06, + "loss": 17.3009, + "step": 25403 + }, + { + "epoch": 0.46436470652750106, + "grad_norm": 7.865864842195836, + "learning_rate": 5.816555544549418e-06, + "loss": 18.2962, + "step": 25404 + }, + { + "epoch": 0.4643829857239476, + "grad_norm": 5.933238752604491, + "learning_rate": 5.8162635056478665e-06, + "loss": 17.2437, + "step": 25405 + }, + { + "epoch": 0.4644012649203941, + "grad_norm": 6.257970487848118, + "learning_rate": 5.815971463885383e-06, + "loss": 17.3387, + "step": 25406 + }, + { + "epoch": 0.4644195441168406, + "grad_norm": 5.873520787476609, + "learning_rate": 5.815679419262989e-06, + "loss": 17.2506, + "step": 25407 + }, + { + "epoch": 0.46443782331328715, + "grad_norm": 5.236429270078546, + "learning_rate": 5.815387371781713e-06, + "loss": 16.9959, + "step": 25408 + }, + { + "epoch": 0.4644561025097337, + "grad_norm": 6.122325410689532, + "learning_rate": 5.815095321442572e-06, + "loss": 17.189, + "step": 25409 + }, + { + "epoch": 0.4644743817061802, + "grad_norm": 6.761716591140318, + "learning_rate": 5.814803268246593e-06, + "loss": 18.0985, + "step": 25410 + }, + { + "epoch": 0.4644926609026267, + "grad_norm": 6.242224172683804, + "learning_rate": 5.8145112121948e-06, + "loss": 17.6028, + "step": 25411 + }, + { + "epoch": 0.46451094009907323, + "grad_norm": 7.912110355549454, + "learning_rate": 5.814219153288215e-06, + "loss": 17.3915, + "step": 25412 + }, + { + "epoch": 0.46452921929551977, + "grad_norm": 7.727862681621994, + "learning_rate": 5.813927091527864e-06, + "loss": 17.9699, + "step": 25413 + }, + { + "epoch": 0.4645474984919663, + "grad_norm": 6.419234879825803, + "learning_rate": 5.813635026914767e-06, + "loss": 17.6021, + "step": 25414 + }, + { + "epoch": 0.46456577768841284, + "grad_norm": 6.38559028325601, + "learning_rate": 5.813342959449951e-06, + "loss": 17.4399, + "step": 25415 + }, + { + "epoch": 0.4645840568848593, + "grad_norm": 6.663499737680368, + "learning_rate": 5.813050889134438e-06, + "loss": 17.4291, + "step": 25416 + }, + { + "epoch": 0.46460233608130586, + "grad_norm": 7.055634140752238, + "learning_rate": 5.812758815969253e-06, + "loss": 17.7337, + "step": 25417 + }, + { + "epoch": 0.4646206152777524, + "grad_norm": 5.865997723408139, + "learning_rate": 5.812466739955418e-06, + "loss": 17.0736, + "step": 25418 + }, + { + "epoch": 0.4646388944741989, + "grad_norm": 7.534559974340752, + "learning_rate": 5.8121746610939575e-06, + "loss": 17.673, + "step": 25419 + }, + { + "epoch": 0.46465717367064546, + "grad_norm": 7.6623166898443555, + "learning_rate": 5.811882579385897e-06, + "loss": 18.3083, + "step": 25420 + }, + { + "epoch": 0.46467545286709194, + "grad_norm": 7.165215723988402, + "learning_rate": 5.8115904948322565e-06, + "loss": 17.5709, + "step": 25421 + }, + { + "epoch": 0.4646937320635385, + "grad_norm": 8.324835671437848, + "learning_rate": 5.811298407434064e-06, + "loss": 18.0978, + "step": 25422 + }, + { + "epoch": 0.464712011259985, + "grad_norm": 7.465843154080268, + "learning_rate": 5.811006317192338e-06, + "loss": 18.0381, + "step": 25423 + }, + { + "epoch": 0.46473029045643155, + "grad_norm": 6.404398169454394, + "learning_rate": 5.810714224108107e-06, + "loss": 17.35, + "step": 25424 + }, + { + "epoch": 0.4647485696528781, + "grad_norm": 5.558068167350239, + "learning_rate": 5.810422128182393e-06, + "loss": 17.0357, + "step": 25425 + }, + { + "epoch": 0.46476684884932457, + "grad_norm": 7.94388708563255, + "learning_rate": 5.810130029416221e-06, + "loss": 18.3155, + "step": 25426 + }, + { + "epoch": 0.4647851280457711, + "grad_norm": 10.070031384060563, + "learning_rate": 5.809837927810612e-06, + "loss": 18.129, + "step": 25427 + }, + { + "epoch": 0.46480340724221764, + "grad_norm": 6.813759307518242, + "learning_rate": 5.8095458233665915e-06, + "loss": 17.3762, + "step": 25428 + }, + { + "epoch": 0.46482168643866417, + "grad_norm": 8.24614842268811, + "learning_rate": 5.8092537160851825e-06, + "loss": 18.2599, + "step": 25429 + }, + { + "epoch": 0.4648399656351107, + "grad_norm": 7.886666816792465, + "learning_rate": 5.80896160596741e-06, + "loss": 17.9468, + "step": 25430 + }, + { + "epoch": 0.4648582448315572, + "grad_norm": 6.734079013518263, + "learning_rate": 5.808669493014297e-06, + "loss": 17.51, + "step": 25431 + }, + { + "epoch": 0.4648765240280037, + "grad_norm": 5.77974965198436, + "learning_rate": 5.808377377226868e-06, + "loss": 17.169, + "step": 25432 + }, + { + "epoch": 0.46489480322445026, + "grad_norm": 7.621575056975452, + "learning_rate": 5.808085258606146e-06, + "loss": 17.8084, + "step": 25433 + }, + { + "epoch": 0.4649130824208968, + "grad_norm": 5.757923524112118, + "learning_rate": 5.807793137153156e-06, + "loss": 17.0912, + "step": 25434 + }, + { + "epoch": 0.4649313616173433, + "grad_norm": 5.969163553293595, + "learning_rate": 5.8075010128689226e-06, + "loss": 17.1662, + "step": 25435 + }, + { + "epoch": 0.4649496408137898, + "grad_norm": 7.14085071453925, + "learning_rate": 5.807208885754466e-06, + "loss": 17.5669, + "step": 25436 + }, + { + "epoch": 0.46496792001023635, + "grad_norm": 6.7941607972969935, + "learning_rate": 5.806916755810812e-06, + "loss": 17.5949, + "step": 25437 + }, + { + "epoch": 0.4649861992066829, + "grad_norm": 7.7719566649871785, + "learning_rate": 5.806624623038985e-06, + "loss": 17.5299, + "step": 25438 + }, + { + "epoch": 0.4650044784031294, + "grad_norm": 6.084064617451337, + "learning_rate": 5.80633248744001e-06, + "loss": 17.7187, + "step": 25439 + }, + { + "epoch": 0.4650227575995759, + "grad_norm": 6.3385224137895415, + "learning_rate": 5.806040349014908e-06, + "loss": 17.3292, + "step": 25440 + }, + { + "epoch": 0.46504103679602243, + "grad_norm": 5.967159038319389, + "learning_rate": 5.805748207764707e-06, + "loss": 17.1518, + "step": 25441 + }, + { + "epoch": 0.46505931599246897, + "grad_norm": 7.480021498899485, + "learning_rate": 5.805456063690426e-06, + "loss": 17.6674, + "step": 25442 + }, + { + "epoch": 0.4650775951889155, + "grad_norm": 5.934298317671547, + "learning_rate": 5.805163916793092e-06, + "loss": 17.2422, + "step": 25443 + }, + { + "epoch": 0.46509587438536204, + "grad_norm": 5.419061835388019, + "learning_rate": 5.804871767073729e-06, + "loss": 17.2889, + "step": 25444 + }, + { + "epoch": 0.4651141535818085, + "grad_norm": 6.091419538500869, + "learning_rate": 5.804579614533359e-06, + "loss": 17.4997, + "step": 25445 + }, + { + "epoch": 0.46513243277825506, + "grad_norm": 5.920096238098323, + "learning_rate": 5.804287459173008e-06, + "loss": 17.3415, + "step": 25446 + }, + { + "epoch": 0.4651507119747016, + "grad_norm": 8.725667131244787, + "learning_rate": 5.8039953009937e-06, + "loss": 17.7631, + "step": 25447 + }, + { + "epoch": 0.4651689911711481, + "grad_norm": 6.398170107632234, + "learning_rate": 5.803703139996457e-06, + "loss": 17.2332, + "step": 25448 + }, + { + "epoch": 0.46518727036759466, + "grad_norm": 6.594791005756539, + "learning_rate": 5.803410976182306e-06, + "loss": 17.3875, + "step": 25449 + }, + { + "epoch": 0.46520554956404114, + "grad_norm": 5.356000716930894, + "learning_rate": 5.803118809552268e-06, + "loss": 17.138, + "step": 25450 + }, + { + "epoch": 0.4652238287604877, + "grad_norm": 6.044378429430535, + "learning_rate": 5.802826640107367e-06, + "loss": 17.1697, + "step": 25451 + }, + { + "epoch": 0.4652421079569342, + "grad_norm": 7.951520853661495, + "learning_rate": 5.802534467848629e-06, + "loss": 18.0788, + "step": 25452 + }, + { + "epoch": 0.46526038715338075, + "grad_norm": 6.48639673924519, + "learning_rate": 5.80224229277708e-06, + "loss": 17.3391, + "step": 25453 + }, + { + "epoch": 0.4652786663498273, + "grad_norm": 5.80189892277259, + "learning_rate": 5.80195011489374e-06, + "loss": 17.255, + "step": 25454 + }, + { + "epoch": 0.46529694554627377, + "grad_norm": 6.871948405060312, + "learning_rate": 5.801657934199633e-06, + "loss": 17.7307, + "step": 25455 + }, + { + "epoch": 0.4653152247427203, + "grad_norm": 6.067583718378898, + "learning_rate": 5.801365750695786e-06, + "loss": 17.0874, + "step": 25456 + }, + { + "epoch": 0.46533350393916684, + "grad_norm": 6.5609355757583945, + "learning_rate": 5.801073564383219e-06, + "loss": 17.272, + "step": 25457 + }, + { + "epoch": 0.4653517831356134, + "grad_norm": 6.271905588458909, + "learning_rate": 5.800781375262962e-06, + "loss": 17.386, + "step": 25458 + }, + { + "epoch": 0.4653700623320599, + "grad_norm": 7.648823117763026, + "learning_rate": 5.800489183336033e-06, + "loss": 17.7272, + "step": 25459 + }, + { + "epoch": 0.4653883415285064, + "grad_norm": 8.224700023842017, + "learning_rate": 5.800196988603461e-06, + "loss": 17.8575, + "step": 25460 + }, + { + "epoch": 0.4654066207249529, + "grad_norm": 7.975275975421861, + "learning_rate": 5.799904791066266e-06, + "loss": 17.7064, + "step": 25461 + }, + { + "epoch": 0.46542489992139946, + "grad_norm": 7.692745960079071, + "learning_rate": 5.799612590725477e-06, + "loss": 18.0154, + "step": 25462 + }, + { + "epoch": 0.465443179117846, + "grad_norm": 6.198587248863337, + "learning_rate": 5.799320387582113e-06, + "loss": 17.4678, + "step": 25463 + }, + { + "epoch": 0.46546145831429253, + "grad_norm": 5.770454018950131, + "learning_rate": 5.7990281816372e-06, + "loss": 17.1268, + "step": 25464 + }, + { + "epoch": 0.465479737510739, + "grad_norm": 8.128152895873397, + "learning_rate": 5.798735972891764e-06, + "loss": 18.4631, + "step": 25465 + }, + { + "epoch": 0.46549801670718555, + "grad_norm": 4.776212786610156, + "learning_rate": 5.798443761346828e-06, + "loss": 16.8338, + "step": 25466 + }, + { + "epoch": 0.4655162959036321, + "grad_norm": 8.268703185539234, + "learning_rate": 5.798151547003416e-06, + "loss": 17.7133, + "step": 25467 + }, + { + "epoch": 0.4655345751000786, + "grad_norm": 6.109843705524758, + "learning_rate": 5.797859329862551e-06, + "loss": 17.4378, + "step": 25468 + }, + { + "epoch": 0.4655528542965251, + "grad_norm": 6.166562862592454, + "learning_rate": 5.7975671099252575e-06, + "loss": 17.7501, + "step": 25469 + }, + { + "epoch": 0.46557113349297163, + "grad_norm": 5.4298879723126285, + "learning_rate": 5.797274887192562e-06, + "loss": 17.3158, + "step": 25470 + }, + { + "epoch": 0.46558941268941817, + "grad_norm": 6.311308844092306, + "learning_rate": 5.796982661665487e-06, + "loss": 17.1825, + "step": 25471 + }, + { + "epoch": 0.4656076918858647, + "grad_norm": 6.356383502204275, + "learning_rate": 5.796690433345056e-06, + "loss": 17.1621, + "step": 25472 + }, + { + "epoch": 0.46562597108231124, + "grad_norm": 6.694012136081018, + "learning_rate": 5.796398202232295e-06, + "loss": 17.6111, + "step": 25473 + }, + { + "epoch": 0.4656442502787577, + "grad_norm": 5.5338284597810485, + "learning_rate": 5.796105968328227e-06, + "loss": 17.1924, + "step": 25474 + }, + { + "epoch": 0.46566252947520426, + "grad_norm": 6.168919143212222, + "learning_rate": 5.795813731633877e-06, + "loss": 17.3206, + "step": 25475 + }, + { + "epoch": 0.4656808086716508, + "grad_norm": 5.722622995627302, + "learning_rate": 5.795521492150269e-06, + "loss": 17.2056, + "step": 25476 + }, + { + "epoch": 0.46569908786809733, + "grad_norm": 5.637556516954348, + "learning_rate": 5.795229249878427e-06, + "loss": 17.1989, + "step": 25477 + }, + { + "epoch": 0.46571736706454386, + "grad_norm": 6.9322596818191276, + "learning_rate": 5.794937004819374e-06, + "loss": 17.6157, + "step": 25478 + }, + { + "epoch": 0.46573564626099034, + "grad_norm": 6.298532134776038, + "learning_rate": 5.794644756974138e-06, + "loss": 17.4987, + "step": 25479 + }, + { + "epoch": 0.4657539254574369, + "grad_norm": 5.948370314153637, + "learning_rate": 5.7943525063437415e-06, + "loss": 17.3412, + "step": 25480 + }, + { + "epoch": 0.4657722046538834, + "grad_norm": 6.1791203787472515, + "learning_rate": 5.7940602529292065e-06, + "loss": 17.5173, + "step": 25481 + }, + { + "epoch": 0.46579048385032995, + "grad_norm": 7.65046005754977, + "learning_rate": 5.793767996731561e-06, + "loss": 17.9836, + "step": 25482 + }, + { + "epoch": 0.4658087630467765, + "grad_norm": 6.968752098824836, + "learning_rate": 5.793475737751825e-06, + "loss": 17.7423, + "step": 25483 + }, + { + "epoch": 0.46582704224322297, + "grad_norm": 5.743834748829708, + "learning_rate": 5.793183475991028e-06, + "loss": 17.2186, + "step": 25484 + }, + { + "epoch": 0.4658453214396695, + "grad_norm": 6.34696715960195, + "learning_rate": 5.79289121145019e-06, + "loss": 17.4219, + "step": 25485 + }, + { + "epoch": 0.46586360063611604, + "grad_norm": 11.808143119530456, + "learning_rate": 5.792598944130338e-06, + "loss": 17.6976, + "step": 25486 + }, + { + "epoch": 0.4658818798325626, + "grad_norm": 8.46839004191002, + "learning_rate": 5.7923066740324954e-06, + "loss": 18.5328, + "step": 25487 + }, + { + "epoch": 0.4659001590290091, + "grad_norm": 7.399257564711092, + "learning_rate": 5.792014401157686e-06, + "loss": 17.7309, + "step": 25488 + }, + { + "epoch": 0.4659184382254556, + "grad_norm": 7.342365479060027, + "learning_rate": 5.791722125506935e-06, + "loss": 17.8551, + "step": 25489 + }, + { + "epoch": 0.4659367174219021, + "grad_norm": 6.414269926068148, + "learning_rate": 5.791429847081268e-06, + "loss": 17.376, + "step": 25490 + }, + { + "epoch": 0.46595499661834866, + "grad_norm": 6.692070420332744, + "learning_rate": 5.791137565881706e-06, + "loss": 17.1239, + "step": 25491 + }, + { + "epoch": 0.4659732758147952, + "grad_norm": 5.941832870729837, + "learning_rate": 5.790845281909278e-06, + "loss": 17.342, + "step": 25492 + }, + { + "epoch": 0.46599155501124173, + "grad_norm": 7.054093746543174, + "learning_rate": 5.790552995165003e-06, + "loss": 17.82, + "step": 25493 + }, + { + "epoch": 0.4660098342076882, + "grad_norm": 7.1926783141619035, + "learning_rate": 5.790260705649912e-06, + "loss": 17.8253, + "step": 25494 + }, + { + "epoch": 0.46602811340413475, + "grad_norm": 7.290353227204587, + "learning_rate": 5.789968413365022e-06, + "loss": 17.8883, + "step": 25495 + }, + { + "epoch": 0.4660463926005813, + "grad_norm": 8.000022510651304, + "learning_rate": 5.789676118311362e-06, + "loss": 17.6913, + "step": 25496 + }, + { + "epoch": 0.4660646717970278, + "grad_norm": 5.7223660166521855, + "learning_rate": 5.789383820489958e-06, + "loss": 17.2571, + "step": 25497 + }, + { + "epoch": 0.46608295099347435, + "grad_norm": 6.014089842940683, + "learning_rate": 5.7890915199018305e-06, + "loss": 17.3348, + "step": 25498 + }, + { + "epoch": 0.46610123018992083, + "grad_norm": 7.728944553249146, + "learning_rate": 5.788799216548007e-06, + "loss": 18.1525, + "step": 25499 + }, + { + "epoch": 0.46611950938636737, + "grad_norm": 6.740202405766377, + "learning_rate": 5.788506910429509e-06, + "loss": 18.0152, + "step": 25500 + }, + { + "epoch": 0.4661377885828139, + "grad_norm": 5.878783018698597, + "learning_rate": 5.7882146015473635e-06, + "loss": 17.3935, + "step": 25501 + }, + { + "epoch": 0.46615606777926044, + "grad_norm": 5.514175149194307, + "learning_rate": 5.787922289902594e-06, + "loss": 17.1685, + "step": 25502 + }, + { + "epoch": 0.4661743469757069, + "grad_norm": 5.575428012857195, + "learning_rate": 5.787629975496225e-06, + "loss": 17.3216, + "step": 25503 + }, + { + "epoch": 0.46619262617215346, + "grad_norm": 6.837583545419483, + "learning_rate": 5.787337658329283e-06, + "loss": 17.6341, + "step": 25504 + }, + { + "epoch": 0.4662109053686, + "grad_norm": 6.86522844280634, + "learning_rate": 5.787045338402788e-06, + "loss": 17.6928, + "step": 25505 + }, + { + "epoch": 0.46622918456504653, + "grad_norm": 6.6057154665080375, + "learning_rate": 5.7867530157177695e-06, + "loss": 17.4884, + "step": 25506 + }, + { + "epoch": 0.46624746376149306, + "grad_norm": 6.234282463089636, + "learning_rate": 5.786460690275248e-06, + "loss": 17.4245, + "step": 25507 + }, + { + "epoch": 0.46626574295793954, + "grad_norm": 5.2487149707511795, + "learning_rate": 5.786168362076253e-06, + "loss": 17.1315, + "step": 25508 + }, + { + "epoch": 0.4662840221543861, + "grad_norm": 6.327196159722178, + "learning_rate": 5.785876031121804e-06, + "loss": 17.3609, + "step": 25509 + }, + { + "epoch": 0.4663023013508326, + "grad_norm": 4.98513399112824, + "learning_rate": 5.7855836974129275e-06, + "loss": 16.8159, + "step": 25510 + }, + { + "epoch": 0.46632058054727915, + "grad_norm": 7.033274968593185, + "learning_rate": 5.78529136095065e-06, + "loss": 17.8629, + "step": 25511 + }, + { + "epoch": 0.4663388597437257, + "grad_norm": 6.4682641751182395, + "learning_rate": 5.784999021735994e-06, + "loss": 17.729, + "step": 25512 + }, + { + "epoch": 0.46635713894017217, + "grad_norm": 6.061961259786219, + "learning_rate": 5.7847066797699835e-06, + "loss": 17.3876, + "step": 25513 + }, + { + "epoch": 0.4663754181366187, + "grad_norm": 7.0861948553921135, + "learning_rate": 5.784414335053645e-06, + "loss": 17.6794, + "step": 25514 + }, + { + "epoch": 0.46639369733306524, + "grad_norm": 5.939268965139865, + "learning_rate": 5.7841219875880014e-06, + "loss": 17.4165, + "step": 25515 + }, + { + "epoch": 0.4664119765295118, + "grad_norm": 7.868548777318356, + "learning_rate": 5.783829637374079e-06, + "loss": 17.8753, + "step": 25516 + }, + { + "epoch": 0.4664302557259583, + "grad_norm": 5.904137056078681, + "learning_rate": 5.783537284412901e-06, + "loss": 17.0714, + "step": 25517 + }, + { + "epoch": 0.4664485349224048, + "grad_norm": 8.465949297581712, + "learning_rate": 5.783244928705494e-06, + "loss": 18.3392, + "step": 25518 + }, + { + "epoch": 0.4664668141188513, + "grad_norm": 5.678356745077976, + "learning_rate": 5.782952570252881e-06, + "loss": 17.1089, + "step": 25519 + }, + { + "epoch": 0.46648509331529786, + "grad_norm": 6.723494075443567, + "learning_rate": 5.782660209056087e-06, + "loss": 17.9239, + "step": 25520 + }, + { + "epoch": 0.4665033725117444, + "grad_norm": 6.949571521758994, + "learning_rate": 5.782367845116137e-06, + "loss": 17.7583, + "step": 25521 + }, + { + "epoch": 0.46652165170819093, + "grad_norm": 7.673193173161843, + "learning_rate": 5.782075478434056e-06, + "loss": 18.2279, + "step": 25522 + }, + { + "epoch": 0.4665399309046374, + "grad_norm": 7.222699785340751, + "learning_rate": 5.7817831090108665e-06, + "loss": 17.7451, + "step": 25523 + }, + { + "epoch": 0.46655821010108395, + "grad_norm": 8.3256639385663, + "learning_rate": 5.781490736847597e-06, + "loss": 18.2498, + "step": 25524 + }, + { + "epoch": 0.4665764892975305, + "grad_norm": 6.864595074317109, + "learning_rate": 5.78119836194527e-06, + "loss": 17.7798, + "step": 25525 + }, + { + "epoch": 0.466594768493977, + "grad_norm": 7.300946696061787, + "learning_rate": 5.780905984304911e-06, + "loss": 17.8623, + "step": 25526 + }, + { + "epoch": 0.46661304769042355, + "grad_norm": 7.140533085292159, + "learning_rate": 5.780613603927543e-06, + "loss": 17.791, + "step": 25527 + }, + { + "epoch": 0.46663132688687003, + "grad_norm": 6.553813815456046, + "learning_rate": 5.7803212208141925e-06, + "loss": 17.4941, + "step": 25528 + }, + { + "epoch": 0.46664960608331657, + "grad_norm": 6.372771379056917, + "learning_rate": 5.780028834965884e-06, + "loss": 17.5054, + "step": 25529 + }, + { + "epoch": 0.4666678852797631, + "grad_norm": 6.650607337775036, + "learning_rate": 5.779736446383642e-06, + "loss": 17.7421, + "step": 25530 + }, + { + "epoch": 0.46668616447620964, + "grad_norm": 5.45317082491413, + "learning_rate": 5.7794440550684914e-06, + "loss": 17.1432, + "step": 25531 + }, + { + "epoch": 0.4667044436726562, + "grad_norm": 6.692476524637104, + "learning_rate": 5.779151661021457e-06, + "loss": 17.5515, + "step": 25532 + }, + { + "epoch": 0.46672272286910266, + "grad_norm": 6.150310827331175, + "learning_rate": 5.778859264243564e-06, + "loss": 17.3746, + "step": 25533 + }, + { + "epoch": 0.4667410020655492, + "grad_norm": 5.5649850611619085, + "learning_rate": 5.778566864735836e-06, + "loss": 17.3589, + "step": 25534 + }, + { + "epoch": 0.46675928126199573, + "grad_norm": 6.7430881717107, + "learning_rate": 5.778274462499301e-06, + "loss": 18.0345, + "step": 25535 + }, + { + "epoch": 0.46677756045844226, + "grad_norm": 6.655482925491027, + "learning_rate": 5.777982057534978e-06, + "loss": 17.6676, + "step": 25536 + }, + { + "epoch": 0.46679583965488874, + "grad_norm": 8.495092585597954, + "learning_rate": 5.777689649843897e-06, + "loss": 18.0385, + "step": 25537 + }, + { + "epoch": 0.4668141188513353, + "grad_norm": 7.124209805019028, + "learning_rate": 5.777397239427081e-06, + "loss": 17.7627, + "step": 25538 + }, + { + "epoch": 0.4668323980477818, + "grad_norm": 6.998044890956389, + "learning_rate": 5.7771048262855565e-06, + "loss": 17.7135, + "step": 25539 + }, + { + "epoch": 0.46685067724422835, + "grad_norm": 6.909892355272215, + "learning_rate": 5.776812410420347e-06, + "loss": 18.0957, + "step": 25540 + }, + { + "epoch": 0.4668689564406749, + "grad_norm": 6.803884410587914, + "learning_rate": 5.7765199918324766e-06, + "loss": 17.3433, + "step": 25541 + }, + { + "epoch": 0.46688723563712137, + "grad_norm": 6.327646757334009, + "learning_rate": 5.77622757052297e-06, + "loss": 17.2302, + "step": 25542 + }, + { + "epoch": 0.4669055148335679, + "grad_norm": 6.224291355273158, + "learning_rate": 5.775935146492855e-06, + "loss": 17.4444, + "step": 25543 + }, + { + "epoch": 0.46692379403001444, + "grad_norm": 6.495749539530074, + "learning_rate": 5.775642719743153e-06, + "loss": 17.742, + "step": 25544 + }, + { + "epoch": 0.466942073226461, + "grad_norm": 5.932850909083503, + "learning_rate": 5.7753502902748915e-06, + "loss": 17.1484, + "step": 25545 + }, + { + "epoch": 0.4669603524229075, + "grad_norm": 6.508198917908852, + "learning_rate": 5.775057858089094e-06, + "loss": 17.3845, + "step": 25546 + }, + { + "epoch": 0.466978631619354, + "grad_norm": 8.117538089044235, + "learning_rate": 5.774765423186786e-06, + "loss": 17.6249, + "step": 25547 + }, + { + "epoch": 0.4669969108158005, + "grad_norm": 6.58849137660269, + "learning_rate": 5.774472985568993e-06, + "loss": 17.6773, + "step": 25548 + }, + { + "epoch": 0.46701519001224706, + "grad_norm": 8.029901195463289, + "learning_rate": 5.7741805452367395e-06, + "loss": 17.898, + "step": 25549 + }, + { + "epoch": 0.4670334692086936, + "grad_norm": 5.626040265528789, + "learning_rate": 5.77388810219105e-06, + "loss": 17.1408, + "step": 25550 + }, + { + "epoch": 0.46705174840514013, + "grad_norm": 7.145792048078933, + "learning_rate": 5.773595656432949e-06, + "loss": 17.8677, + "step": 25551 + }, + { + "epoch": 0.4670700276015866, + "grad_norm": 6.079540984990817, + "learning_rate": 5.773303207963463e-06, + "loss": 17.4458, + "step": 25552 + }, + { + "epoch": 0.46708830679803315, + "grad_norm": 7.3454322016886815, + "learning_rate": 5.773010756783618e-06, + "loss": 17.6739, + "step": 25553 + }, + { + "epoch": 0.4671065859944797, + "grad_norm": 7.135559965269335, + "learning_rate": 5.772718302894436e-06, + "loss": 17.7609, + "step": 25554 + }, + { + "epoch": 0.4671248651909262, + "grad_norm": 6.4889447700450615, + "learning_rate": 5.772425846296942e-06, + "loss": 17.4732, + "step": 25555 + }, + { + "epoch": 0.46714314438737276, + "grad_norm": 6.333431867166509, + "learning_rate": 5.772133386992164e-06, + "loss": 17.6238, + "step": 25556 + }, + { + "epoch": 0.46716142358381924, + "grad_norm": 6.435345424507448, + "learning_rate": 5.771840924981126e-06, + "loss": 17.1634, + "step": 25557 + }, + { + "epoch": 0.46717970278026577, + "grad_norm": 7.149523412735398, + "learning_rate": 5.771548460264851e-06, + "loss": 17.5551, + "step": 25558 + }, + { + "epoch": 0.4671979819767123, + "grad_norm": 6.234826849885168, + "learning_rate": 5.771255992844367e-06, + "loss": 17.5132, + "step": 25559 + }, + { + "epoch": 0.46721626117315884, + "grad_norm": 8.309589306755823, + "learning_rate": 5.770963522720696e-06, + "loss": 18.2804, + "step": 25560 + }, + { + "epoch": 0.4672345403696054, + "grad_norm": 6.5399304772071725, + "learning_rate": 5.770671049894866e-06, + "loss": 17.3866, + "step": 25561 + }, + { + "epoch": 0.46725281956605186, + "grad_norm": 5.692793393220293, + "learning_rate": 5.7703785743679005e-06, + "loss": 17.2978, + "step": 25562 + }, + { + "epoch": 0.4672710987624984, + "grad_norm": 9.360252230875442, + "learning_rate": 5.770086096140826e-06, + "loss": 18.3529, + "step": 25563 + }, + { + "epoch": 0.46728937795894493, + "grad_norm": 7.343001489445058, + "learning_rate": 5.769793615214665e-06, + "loss": 18.1661, + "step": 25564 + }, + { + "epoch": 0.46730765715539146, + "grad_norm": 5.8445637134500386, + "learning_rate": 5.769501131590445e-06, + "loss": 17.3307, + "step": 25565 + }, + { + "epoch": 0.467325936351838, + "grad_norm": 5.3673205644296695, + "learning_rate": 5.769208645269191e-06, + "loss": 16.9959, + "step": 25566 + }, + { + "epoch": 0.4673442155482845, + "grad_norm": 5.995327681747548, + "learning_rate": 5.768916156251926e-06, + "loss": 17.3419, + "step": 25567 + }, + { + "epoch": 0.467362494744731, + "grad_norm": 5.9037602333119095, + "learning_rate": 5.768623664539677e-06, + "loss": 17.4071, + "step": 25568 + }, + { + "epoch": 0.46738077394117755, + "grad_norm": 7.740053557973855, + "learning_rate": 5.7683311701334675e-06, + "loss": 18.268, + "step": 25569 + }, + { + "epoch": 0.4673990531376241, + "grad_norm": 5.596008801615207, + "learning_rate": 5.7680386730343265e-06, + "loss": 17.4299, + "step": 25570 + }, + { + "epoch": 0.46741733233407057, + "grad_norm": 6.375225596961252, + "learning_rate": 5.767746173243276e-06, + "loss": 17.421, + "step": 25571 + }, + { + "epoch": 0.4674356115305171, + "grad_norm": 6.040953522506135, + "learning_rate": 5.7674536707613404e-06, + "loss": 17.4077, + "step": 25572 + }, + { + "epoch": 0.46745389072696364, + "grad_norm": 5.330775525693102, + "learning_rate": 5.767161165589548e-06, + "loss": 17.1028, + "step": 25573 + }, + { + "epoch": 0.4674721699234102, + "grad_norm": 7.585647974726364, + "learning_rate": 5.766868657728921e-06, + "loss": 17.7483, + "step": 25574 + }, + { + "epoch": 0.4674904491198567, + "grad_norm": 6.346581416172484, + "learning_rate": 5.766576147180486e-06, + "loss": 17.537, + "step": 25575 + }, + { + "epoch": 0.4675087283163032, + "grad_norm": 6.514677848062042, + "learning_rate": 5.766283633945269e-06, + "loss": 17.7364, + "step": 25576 + }, + { + "epoch": 0.4675270075127497, + "grad_norm": 6.8426307403884, + "learning_rate": 5.765991118024294e-06, + "loss": 17.6425, + "step": 25577 + }, + { + "epoch": 0.46754528670919626, + "grad_norm": 6.11212857663012, + "learning_rate": 5.765698599418585e-06, + "loss": 17.4694, + "step": 25578 + }, + { + "epoch": 0.4675635659056428, + "grad_norm": 6.029309951634189, + "learning_rate": 5.76540607812917e-06, + "loss": 17.3648, + "step": 25579 + }, + { + "epoch": 0.46758184510208933, + "grad_norm": 7.3512979418722075, + "learning_rate": 5.7651135541570745e-06, + "loss": 17.4534, + "step": 25580 + }, + { + "epoch": 0.4676001242985358, + "grad_norm": 6.478351434879791, + "learning_rate": 5.764821027503321e-06, + "loss": 17.1912, + "step": 25581 + }, + { + "epoch": 0.46761840349498235, + "grad_norm": 6.2906457901331825, + "learning_rate": 5.764528498168935e-06, + "loss": 17.4239, + "step": 25582 + }, + { + "epoch": 0.4676366826914289, + "grad_norm": 5.0143946326574875, + "learning_rate": 5.764235966154945e-06, + "loss": 17.0215, + "step": 25583 + }, + { + "epoch": 0.4676549618878754, + "grad_norm": 6.367645216377919, + "learning_rate": 5.763943431462375e-06, + "loss": 17.4521, + "step": 25584 + }, + { + "epoch": 0.46767324108432196, + "grad_norm": 5.883589161899149, + "learning_rate": 5.763650894092247e-06, + "loss": 17.3561, + "step": 25585 + }, + { + "epoch": 0.46769152028076844, + "grad_norm": 7.519895788858407, + "learning_rate": 5.763358354045591e-06, + "loss": 17.4102, + "step": 25586 + }, + { + "epoch": 0.46770979947721497, + "grad_norm": 7.090251417467326, + "learning_rate": 5.763065811323429e-06, + "loss": 17.4349, + "step": 25587 + }, + { + "epoch": 0.4677280786736615, + "grad_norm": 6.605999878783802, + "learning_rate": 5.762773265926788e-06, + "loss": 17.434, + "step": 25588 + }, + { + "epoch": 0.46774635787010804, + "grad_norm": 5.274989284856472, + "learning_rate": 5.762480717856692e-06, + "loss": 16.9382, + "step": 25589 + }, + { + "epoch": 0.4677646370665546, + "grad_norm": 6.385074581300302, + "learning_rate": 5.762188167114168e-06, + "loss": 17.5914, + "step": 25590 + }, + { + "epoch": 0.46778291626300106, + "grad_norm": 5.097482259909812, + "learning_rate": 5.76189561370024e-06, + "loss": 17.1079, + "step": 25591 + }, + { + "epoch": 0.4678011954594476, + "grad_norm": 6.112364499099624, + "learning_rate": 5.761603057615936e-06, + "loss": 17.45, + "step": 25592 + }, + { + "epoch": 0.46781947465589413, + "grad_norm": 6.6746320042266545, + "learning_rate": 5.761310498862277e-06, + "loss": 17.5868, + "step": 25593 + }, + { + "epoch": 0.46783775385234067, + "grad_norm": 6.196454607512979, + "learning_rate": 5.761017937440293e-06, + "loss": 17.5181, + "step": 25594 + }, + { + "epoch": 0.4678560330487872, + "grad_norm": 7.1611120361721925, + "learning_rate": 5.760725373351006e-06, + "loss": 17.6064, + "step": 25595 + }, + { + "epoch": 0.4678743122452337, + "grad_norm": 6.581741303080483, + "learning_rate": 5.760432806595441e-06, + "loss": 17.7773, + "step": 25596 + }, + { + "epoch": 0.4678925914416802, + "grad_norm": 6.500205512880502, + "learning_rate": 5.760140237174627e-06, + "loss": 17.6136, + "step": 25597 + }, + { + "epoch": 0.46791087063812675, + "grad_norm": 6.2234080923482935, + "learning_rate": 5.759847665089587e-06, + "loss": 17.37, + "step": 25598 + }, + { + "epoch": 0.4679291498345733, + "grad_norm": 5.453934784457371, + "learning_rate": 5.759555090341347e-06, + "loss": 17.2709, + "step": 25599 + }, + { + "epoch": 0.4679474290310198, + "grad_norm": 5.221289181546808, + "learning_rate": 5.759262512930932e-06, + "loss": 17.1167, + "step": 25600 + }, + { + "epoch": 0.4679657082274663, + "grad_norm": 7.9108630108612665, + "learning_rate": 5.7589699328593675e-06, + "loss": 18.2024, + "step": 25601 + }, + { + "epoch": 0.46798398742391284, + "grad_norm": 7.952453852758127, + "learning_rate": 5.758677350127679e-06, + "loss": 17.6896, + "step": 25602 + }, + { + "epoch": 0.4680022666203594, + "grad_norm": 4.938859191373233, + "learning_rate": 5.758384764736893e-06, + "loss": 17.1023, + "step": 25603 + }, + { + "epoch": 0.4680205458168059, + "grad_norm": 7.937993365598106, + "learning_rate": 5.758092176688035e-06, + "loss": 18.5179, + "step": 25604 + }, + { + "epoch": 0.4680388250132524, + "grad_norm": 6.450681511813594, + "learning_rate": 5.757799585982128e-06, + "loss": 17.8099, + "step": 25605 + }, + { + "epoch": 0.4680571042096989, + "grad_norm": 6.683748311075554, + "learning_rate": 5.7575069926201995e-06, + "loss": 17.5805, + "step": 25606 + }, + { + "epoch": 0.46807538340614546, + "grad_norm": 6.964070926733987, + "learning_rate": 5.757214396603275e-06, + "loss": 18.0081, + "step": 25607 + }, + { + "epoch": 0.468093662602592, + "grad_norm": 6.770428775611666, + "learning_rate": 5.756921797932381e-06, + "loss": 17.1792, + "step": 25608 + }, + { + "epoch": 0.46811194179903853, + "grad_norm": 5.657804233910794, + "learning_rate": 5.756629196608541e-06, + "loss": 17.0043, + "step": 25609 + }, + { + "epoch": 0.468130220995485, + "grad_norm": 5.359336554783504, + "learning_rate": 5.756336592632779e-06, + "loss": 16.9921, + "step": 25610 + }, + { + "epoch": 0.46814850019193155, + "grad_norm": 6.278354616573513, + "learning_rate": 5.756043986006127e-06, + "loss": 17.5775, + "step": 25611 + }, + { + "epoch": 0.4681667793883781, + "grad_norm": 8.526799560984587, + "learning_rate": 5.7557513767296035e-06, + "loss": 17.9176, + "step": 25612 + }, + { + "epoch": 0.4681850585848246, + "grad_norm": 5.399386204186027, + "learning_rate": 5.7554587648042384e-06, + "loss": 17.0852, + "step": 25613 + }, + { + "epoch": 0.46820333778127116, + "grad_norm": 8.148357792618592, + "learning_rate": 5.755166150231055e-06, + "loss": 17.9017, + "step": 25614 + }, + { + "epoch": 0.46822161697771764, + "grad_norm": 5.188454402782272, + "learning_rate": 5.754873533011079e-06, + "loss": 17.1485, + "step": 25615 + }, + { + "epoch": 0.46823989617416417, + "grad_norm": 6.797866173821247, + "learning_rate": 5.754580913145339e-06, + "loss": 17.8525, + "step": 25616 + }, + { + "epoch": 0.4682581753706107, + "grad_norm": 6.606843059171352, + "learning_rate": 5.754288290634856e-06, + "loss": 17.5798, + "step": 25617 + }, + { + "epoch": 0.46827645456705724, + "grad_norm": 6.558975467364231, + "learning_rate": 5.753995665480659e-06, + "loss": 17.4101, + "step": 25618 + }, + { + "epoch": 0.4682947337635038, + "grad_norm": 6.4949767267599166, + "learning_rate": 5.753703037683774e-06, + "loss": 17.6557, + "step": 25619 + }, + { + "epoch": 0.46831301295995026, + "grad_norm": 6.863121463205504, + "learning_rate": 5.753410407245224e-06, + "loss": 17.7778, + "step": 25620 + }, + { + "epoch": 0.4683312921563968, + "grad_norm": 6.479891724331871, + "learning_rate": 5.753117774166036e-06, + "loss": 17.263, + "step": 25621 + }, + { + "epoch": 0.46834957135284333, + "grad_norm": 5.386820293998476, + "learning_rate": 5.7528251384472365e-06, + "loss": 16.9541, + "step": 25622 + }, + { + "epoch": 0.46836785054928987, + "grad_norm": 6.605155191840947, + "learning_rate": 5.752532500089848e-06, + "loss": 17.2899, + "step": 25623 + }, + { + "epoch": 0.4683861297457364, + "grad_norm": 5.857745284694996, + "learning_rate": 5.7522398590948995e-06, + "loss": 17.3025, + "step": 25624 + }, + { + "epoch": 0.4684044089421829, + "grad_norm": 5.650735443579671, + "learning_rate": 5.7519472154634174e-06, + "loss": 17.2117, + "step": 25625 + }, + { + "epoch": 0.4684226881386294, + "grad_norm": 7.130100135051834, + "learning_rate": 5.751654569196423e-06, + "loss": 17.9528, + "step": 25626 + }, + { + "epoch": 0.46844096733507595, + "grad_norm": 8.278536021762902, + "learning_rate": 5.751361920294946e-06, + "loss": 18.1778, + "step": 25627 + }, + { + "epoch": 0.4684592465315225, + "grad_norm": 5.957601076068427, + "learning_rate": 5.751069268760008e-06, + "loss": 17.4728, + "step": 25628 + }, + { + "epoch": 0.468477525727969, + "grad_norm": 6.7556101549333585, + "learning_rate": 5.750776614592641e-06, + "loss": 17.7987, + "step": 25629 + }, + { + "epoch": 0.4684958049244155, + "grad_norm": 6.83338256167171, + "learning_rate": 5.750483957793865e-06, + "loss": 17.4064, + "step": 25630 + }, + { + "epoch": 0.46851408412086204, + "grad_norm": 7.054059252306268, + "learning_rate": 5.750191298364709e-06, + "loss": 17.7911, + "step": 25631 + }, + { + "epoch": 0.4685323633173086, + "grad_norm": 7.719577776442061, + "learning_rate": 5.749898636306197e-06, + "loss": 17.7469, + "step": 25632 + }, + { + "epoch": 0.4685506425137551, + "grad_norm": 5.074989754595142, + "learning_rate": 5.749605971619355e-06, + "loss": 16.9432, + "step": 25633 + }, + { + "epoch": 0.46856892171020165, + "grad_norm": 5.9451183656073985, + "learning_rate": 5.74931330430521e-06, + "loss": 17.2048, + "step": 25634 + }, + { + "epoch": 0.4685872009066481, + "grad_norm": 6.742594603455647, + "learning_rate": 5.749020634364787e-06, + "loss": 17.5849, + "step": 25635 + }, + { + "epoch": 0.46860548010309466, + "grad_norm": 6.11718331539814, + "learning_rate": 5.748727961799111e-06, + "loss": 17.7196, + "step": 25636 + }, + { + "epoch": 0.4686237592995412, + "grad_norm": 6.610508876323689, + "learning_rate": 5.7484352866092075e-06, + "loss": 17.4668, + "step": 25637 + }, + { + "epoch": 0.46864203849598773, + "grad_norm": 7.691559971277722, + "learning_rate": 5.748142608796105e-06, + "loss": 17.7219, + "step": 25638 + }, + { + "epoch": 0.4686603176924342, + "grad_norm": 6.308453846473192, + "learning_rate": 5.747849928360827e-06, + "loss": 17.3177, + "step": 25639 + }, + { + "epoch": 0.46867859688888075, + "grad_norm": 5.393722225054013, + "learning_rate": 5.7475572453044e-06, + "loss": 17.0605, + "step": 25640 + }, + { + "epoch": 0.4686968760853273, + "grad_norm": 9.16376237447455, + "learning_rate": 5.7472645596278495e-06, + "loss": 17.6939, + "step": 25641 + }, + { + "epoch": 0.4687151552817738, + "grad_norm": 7.240996906399217, + "learning_rate": 5.7469718713322024e-06, + "loss": 18.1665, + "step": 25642 + }, + { + "epoch": 0.46873343447822036, + "grad_norm": 7.507610472592406, + "learning_rate": 5.7466791804184815e-06, + "loss": 18.2205, + "step": 25643 + }, + { + "epoch": 0.46875171367466684, + "grad_norm": 5.822709102917425, + "learning_rate": 5.746386486887718e-06, + "loss": 17.2523, + "step": 25644 + }, + { + "epoch": 0.4687699928711134, + "grad_norm": 6.237261321188969, + "learning_rate": 5.746093790740932e-06, + "loss": 17.6036, + "step": 25645 + }, + { + "epoch": 0.4687882720675599, + "grad_norm": 6.15218480557225, + "learning_rate": 5.745801091979153e-06, + "loss": 17.4224, + "step": 25646 + }, + { + "epoch": 0.46880655126400644, + "grad_norm": 6.24742142239045, + "learning_rate": 5.745508390603405e-06, + "loss": 17.301, + "step": 25647 + }, + { + "epoch": 0.468824830460453, + "grad_norm": 6.127466908515292, + "learning_rate": 5.7452156866147155e-06, + "loss": 17.361, + "step": 25648 + }, + { + "epoch": 0.46884310965689946, + "grad_norm": 6.378781447889848, + "learning_rate": 5.744922980014111e-06, + "loss": 17.5905, + "step": 25649 + }, + { + "epoch": 0.468861388853346, + "grad_norm": 6.266970334193923, + "learning_rate": 5.744630270802614e-06, + "loss": 17.6054, + "step": 25650 + }, + { + "epoch": 0.46887966804979253, + "grad_norm": 5.714962090082019, + "learning_rate": 5.744337558981253e-06, + "loss": 17.4403, + "step": 25651 + }, + { + "epoch": 0.46889794724623907, + "grad_norm": 6.36557250001768, + "learning_rate": 5.744044844551053e-06, + "loss": 17.1341, + "step": 25652 + }, + { + "epoch": 0.4689162264426856, + "grad_norm": 7.188647325522799, + "learning_rate": 5.743752127513042e-06, + "loss": 17.8325, + "step": 25653 + }, + { + "epoch": 0.4689345056391321, + "grad_norm": 7.547680872870857, + "learning_rate": 5.743459407868245e-06, + "loss": 17.8632, + "step": 25654 + }, + { + "epoch": 0.4689527848355786, + "grad_norm": 6.92933596374259, + "learning_rate": 5.743166685617685e-06, + "loss": 17.7693, + "step": 25655 + }, + { + "epoch": 0.46897106403202515, + "grad_norm": 7.349112223008469, + "learning_rate": 5.742873960762392e-06, + "loss": 18.1488, + "step": 25656 + }, + { + "epoch": 0.4689893432284717, + "grad_norm": 6.8542570673454755, + "learning_rate": 5.7425812333033884e-06, + "loss": 17.6014, + "step": 25657 + }, + { + "epoch": 0.4690076224249182, + "grad_norm": 6.339414208739852, + "learning_rate": 5.742288503241703e-06, + "loss": 17.6208, + "step": 25658 + }, + { + "epoch": 0.4690259016213647, + "grad_norm": 6.584263814777369, + "learning_rate": 5.741995770578362e-06, + "loss": 17.9069, + "step": 25659 + }, + { + "epoch": 0.46904418081781124, + "grad_norm": 5.819636579535961, + "learning_rate": 5.741703035314388e-06, + "loss": 17.5112, + "step": 25660 + }, + { + "epoch": 0.4690624600142578, + "grad_norm": 6.241513009919817, + "learning_rate": 5.74141029745081e-06, + "loss": 17.5247, + "step": 25661 + }, + { + "epoch": 0.4690807392107043, + "grad_norm": 6.614371455467719, + "learning_rate": 5.741117556988653e-06, + "loss": 17.8962, + "step": 25662 + }, + { + "epoch": 0.46909901840715085, + "grad_norm": 5.626675146884744, + "learning_rate": 5.740824813928945e-06, + "loss": 17.2283, + "step": 25663 + }, + { + "epoch": 0.4691172976035973, + "grad_norm": 6.17983942813688, + "learning_rate": 5.740532068272708e-06, + "loss": 17.5033, + "step": 25664 + }, + { + "epoch": 0.46913557680004386, + "grad_norm": 6.929165717542995, + "learning_rate": 5.740239320020972e-06, + "loss": 17.4919, + "step": 25665 + }, + { + "epoch": 0.4691538559964904, + "grad_norm": 7.643820412534104, + "learning_rate": 5.739946569174761e-06, + "loss": 18.0734, + "step": 25666 + }, + { + "epoch": 0.46917213519293693, + "grad_norm": 6.86572819422133, + "learning_rate": 5.739653815735101e-06, + "loss": 17.5621, + "step": 25667 + }, + { + "epoch": 0.46919041438938347, + "grad_norm": 6.634938317137737, + "learning_rate": 5.7393610597030195e-06, + "loss": 17.7162, + "step": 25668 + }, + { + "epoch": 0.46920869358582995, + "grad_norm": 5.9285304036871045, + "learning_rate": 5.739068301079539e-06, + "loss": 17.4825, + "step": 25669 + }, + { + "epoch": 0.4692269727822765, + "grad_norm": 6.362775910391573, + "learning_rate": 5.738775539865692e-06, + "loss": 17.9573, + "step": 25670 + }, + { + "epoch": 0.469245251978723, + "grad_norm": 5.632427385560699, + "learning_rate": 5.738482776062499e-06, + "loss": 17.141, + "step": 25671 + }, + { + "epoch": 0.46926353117516956, + "grad_norm": 5.64359341938006, + "learning_rate": 5.738190009670987e-06, + "loss": 17.3118, + "step": 25672 + }, + { + "epoch": 0.46928181037161604, + "grad_norm": 7.628796705740088, + "learning_rate": 5.737897240692185e-06, + "loss": 18.1154, + "step": 25673 + }, + { + "epoch": 0.4693000895680626, + "grad_norm": 6.6720638000211006, + "learning_rate": 5.737604469127116e-06, + "loss": 17.6352, + "step": 25674 + }, + { + "epoch": 0.4693183687645091, + "grad_norm": 7.118285106718248, + "learning_rate": 5.737311694976807e-06, + "loss": 17.8439, + "step": 25675 + }, + { + "epoch": 0.46933664796095564, + "grad_norm": 6.679490191867098, + "learning_rate": 5.737018918242285e-06, + "loss": 17.4833, + "step": 25676 + }, + { + "epoch": 0.4693549271574022, + "grad_norm": 6.56219098604823, + "learning_rate": 5.7367261389245765e-06, + "loss": 17.4497, + "step": 25677 + }, + { + "epoch": 0.46937320635384866, + "grad_norm": 5.93748186058725, + "learning_rate": 5.736433357024706e-06, + "loss": 17.2334, + "step": 25678 + }, + { + "epoch": 0.4693914855502952, + "grad_norm": 5.548583926147259, + "learning_rate": 5.7361405725437005e-06, + "loss": 17.118, + "step": 25679 + }, + { + "epoch": 0.46940976474674173, + "grad_norm": 7.320166665768787, + "learning_rate": 5.735847785482587e-06, + "loss": 17.8548, + "step": 25680 + }, + { + "epoch": 0.46942804394318827, + "grad_norm": 6.010595277612539, + "learning_rate": 5.73555499584239e-06, + "loss": 16.8336, + "step": 25681 + }, + { + "epoch": 0.4694463231396348, + "grad_norm": 5.908111636604885, + "learning_rate": 5.7352622036241356e-06, + "loss": 17.4006, + "step": 25682 + }, + { + "epoch": 0.4694646023360813, + "grad_norm": 8.56743637344712, + "learning_rate": 5.734969408828852e-06, + "loss": 18.1486, + "step": 25683 + }, + { + "epoch": 0.4694828815325278, + "grad_norm": 8.068802025596723, + "learning_rate": 5.734676611457566e-06, + "loss": 18.1027, + "step": 25684 + }, + { + "epoch": 0.46950116072897435, + "grad_norm": 6.568334598477962, + "learning_rate": 5.734383811511301e-06, + "loss": 17.4182, + "step": 25685 + }, + { + "epoch": 0.4695194399254209, + "grad_norm": 6.602268560068675, + "learning_rate": 5.734091008991083e-06, + "loss": 17.3948, + "step": 25686 + }, + { + "epoch": 0.4695377191218674, + "grad_norm": 8.575644626828332, + "learning_rate": 5.733798203897941e-06, + "loss": 18.5066, + "step": 25687 + }, + { + "epoch": 0.4695559983183139, + "grad_norm": 5.9666038254173435, + "learning_rate": 5.7335053962329e-06, + "loss": 17.3666, + "step": 25688 + }, + { + "epoch": 0.46957427751476044, + "grad_norm": 7.61695503473196, + "learning_rate": 5.7332125859969856e-06, + "loss": 17.5837, + "step": 25689 + }, + { + "epoch": 0.469592556711207, + "grad_norm": 6.1250613603963835, + "learning_rate": 5.7329197731912256e-06, + "loss": 17.5394, + "step": 25690 + }, + { + "epoch": 0.4696108359076535, + "grad_norm": 6.446822408970243, + "learning_rate": 5.732626957816645e-06, + "loss": 17.4589, + "step": 25691 + }, + { + "epoch": 0.46962911510410005, + "grad_norm": 5.94007934733352, + "learning_rate": 5.7323341398742706e-06, + "loss": 17.2912, + "step": 25692 + }, + { + "epoch": 0.46964739430054653, + "grad_norm": 6.384094508784263, + "learning_rate": 5.732041319365128e-06, + "loss": 17.5478, + "step": 25693 + }, + { + "epoch": 0.46966567349699306, + "grad_norm": 7.089703335800537, + "learning_rate": 5.731748496290246e-06, + "loss": 17.5673, + "step": 25694 + }, + { + "epoch": 0.4696839526934396, + "grad_norm": 4.857902234470191, + "learning_rate": 5.731455670650647e-06, + "loss": 16.9854, + "step": 25695 + }, + { + "epoch": 0.46970223188988613, + "grad_norm": 6.634609408981984, + "learning_rate": 5.731162842447359e-06, + "loss": 17.2928, + "step": 25696 + }, + { + "epoch": 0.46972051108633267, + "grad_norm": 6.032974300249462, + "learning_rate": 5.730870011681411e-06, + "loss": 17.3983, + "step": 25697 + }, + { + "epoch": 0.46973879028277915, + "grad_norm": 5.073998822564762, + "learning_rate": 5.730577178353824e-06, + "loss": 16.9713, + "step": 25698 + }, + { + "epoch": 0.4697570694792257, + "grad_norm": 6.252144364596876, + "learning_rate": 5.7302843424656294e-06, + "loss": 17.2702, + "step": 25699 + }, + { + "epoch": 0.4697753486756722, + "grad_norm": 6.534455522100211, + "learning_rate": 5.729991504017851e-06, + "loss": 17.7289, + "step": 25700 + }, + { + "epoch": 0.46979362787211876, + "grad_norm": 7.048492106480271, + "learning_rate": 5.729698663011516e-06, + "loss": 17.7036, + "step": 25701 + }, + { + "epoch": 0.4698119070685653, + "grad_norm": 7.787887934353227, + "learning_rate": 5.729405819447651e-06, + "loss": 17.8927, + "step": 25702 + }, + { + "epoch": 0.4698301862650118, + "grad_norm": 6.843305471009764, + "learning_rate": 5.729112973327281e-06, + "loss": 17.6049, + "step": 25703 + }, + { + "epoch": 0.4698484654614583, + "grad_norm": 6.609894127921328, + "learning_rate": 5.728820124651435e-06, + "loss": 17.4264, + "step": 25704 + }, + { + "epoch": 0.46986674465790484, + "grad_norm": 5.988494520884227, + "learning_rate": 5.728527273421135e-06, + "loss": 17.3569, + "step": 25705 + }, + { + "epoch": 0.4698850238543514, + "grad_norm": 6.828862597846488, + "learning_rate": 5.728234419637411e-06, + "loss": 17.5556, + "step": 25706 + }, + { + "epoch": 0.46990330305079786, + "grad_norm": 7.474874776494301, + "learning_rate": 5.7279415633012895e-06, + "loss": 18.2851, + "step": 25707 + }, + { + "epoch": 0.4699215822472444, + "grad_norm": 7.734436070776567, + "learning_rate": 5.7276487044137964e-06, + "loss": 18.4307, + "step": 25708 + }, + { + "epoch": 0.46993986144369093, + "grad_norm": 7.963822028095857, + "learning_rate": 5.727355842975956e-06, + "loss": 18.0831, + "step": 25709 + }, + { + "epoch": 0.46995814064013747, + "grad_norm": 7.74876160533145, + "learning_rate": 5.7270629789887966e-06, + "loss": 17.859, + "step": 25710 + }, + { + "epoch": 0.469976419836584, + "grad_norm": 5.603722211222619, + "learning_rate": 5.7267701124533455e-06, + "loss": 17.1839, + "step": 25711 + }, + { + "epoch": 0.4699946990330305, + "grad_norm": 7.350472567641458, + "learning_rate": 5.7264772433706284e-06, + "loss": 17.7715, + "step": 25712 + }, + { + "epoch": 0.470012978229477, + "grad_norm": 8.48293326398749, + "learning_rate": 5.726184371741671e-06, + "loss": 17.9233, + "step": 25713 + }, + { + "epoch": 0.47003125742592355, + "grad_norm": 7.3769601066609365, + "learning_rate": 5.7258914975675e-06, + "loss": 18.0175, + "step": 25714 + }, + { + "epoch": 0.4700495366223701, + "grad_norm": 5.47695415709752, + "learning_rate": 5.725598620849144e-06, + "loss": 16.9339, + "step": 25715 + }, + { + "epoch": 0.4700678158188166, + "grad_norm": 6.340040965722052, + "learning_rate": 5.725305741587627e-06, + "loss": 17.6432, + "step": 25716 + }, + { + "epoch": 0.4700860950152631, + "grad_norm": 6.050953636640179, + "learning_rate": 5.725012859783975e-06, + "loss": 17.3825, + "step": 25717 + }, + { + "epoch": 0.47010437421170964, + "grad_norm": 6.8669465037179975, + "learning_rate": 5.724719975439217e-06, + "loss": 17.7668, + "step": 25718 + }, + { + "epoch": 0.4701226534081562, + "grad_norm": 5.122800605776736, + "learning_rate": 5.724427088554379e-06, + "loss": 17.0787, + "step": 25719 + }, + { + "epoch": 0.4701409326046027, + "grad_norm": 6.818098158107216, + "learning_rate": 5.7241341991304855e-06, + "loss": 17.5973, + "step": 25720 + }, + { + "epoch": 0.47015921180104925, + "grad_norm": 7.719653622645311, + "learning_rate": 5.723841307168565e-06, + "loss": 17.8661, + "step": 25721 + }, + { + "epoch": 0.47017749099749573, + "grad_norm": 7.152130450960114, + "learning_rate": 5.723548412669644e-06, + "loss": 17.8253, + "step": 25722 + }, + { + "epoch": 0.47019577019394226, + "grad_norm": 8.988701786963508, + "learning_rate": 5.723255515634747e-06, + "loss": 17.6978, + "step": 25723 + }, + { + "epoch": 0.4702140493903888, + "grad_norm": 5.22453220394301, + "learning_rate": 5.722962616064903e-06, + "loss": 17.0157, + "step": 25724 + }, + { + "epoch": 0.47023232858683534, + "grad_norm": 5.863474360587279, + "learning_rate": 5.72266971396114e-06, + "loss": 17.3891, + "step": 25725 + }, + { + "epoch": 0.47025060778328187, + "grad_norm": 8.334234473614273, + "learning_rate": 5.72237680932448e-06, + "loss": 18.2978, + "step": 25726 + }, + { + "epoch": 0.47026888697972835, + "grad_norm": 7.895708930511546, + "learning_rate": 5.722083902155952e-06, + "loss": 18.1887, + "step": 25727 + }, + { + "epoch": 0.4702871661761749, + "grad_norm": 5.5779541338091025, + "learning_rate": 5.7217909924565825e-06, + "loss": 17.3675, + "step": 25728 + }, + { + "epoch": 0.4703054453726214, + "grad_norm": 7.120368008290413, + "learning_rate": 5.7214980802274e-06, + "loss": 17.6981, + "step": 25729 + }, + { + "epoch": 0.47032372456906796, + "grad_norm": 6.318338417013921, + "learning_rate": 5.721205165469428e-06, + "loss": 17.3642, + "step": 25730 + }, + { + "epoch": 0.4703420037655145, + "grad_norm": 6.455983042945042, + "learning_rate": 5.7209122481836944e-06, + "loss": 17.5759, + "step": 25731 + }, + { + "epoch": 0.470360282961961, + "grad_norm": 7.351994669905593, + "learning_rate": 5.720619328371226e-06, + "loss": 18.0706, + "step": 25732 + }, + { + "epoch": 0.4703785621584075, + "grad_norm": 6.5837840471987, + "learning_rate": 5.72032640603305e-06, + "loss": 17.7664, + "step": 25733 + }, + { + "epoch": 0.47039684135485405, + "grad_norm": 8.831748245630628, + "learning_rate": 5.720033481170192e-06, + "loss": 18.2569, + "step": 25734 + }, + { + "epoch": 0.4704151205513006, + "grad_norm": 6.8952771330461395, + "learning_rate": 5.719740553783679e-06, + "loss": 17.6438, + "step": 25735 + }, + { + "epoch": 0.4704333997477471, + "grad_norm": 5.684297049286084, + "learning_rate": 5.719447623874539e-06, + "loss": 17.1094, + "step": 25736 + }, + { + "epoch": 0.4704516789441936, + "grad_norm": 6.4742925462084004, + "learning_rate": 5.719154691443795e-06, + "loss": 17.4488, + "step": 25737 + }, + { + "epoch": 0.47046995814064013, + "grad_norm": 4.955174616071838, + "learning_rate": 5.718861756492478e-06, + "loss": 17.0022, + "step": 25738 + }, + { + "epoch": 0.47048823733708667, + "grad_norm": 6.669365353904177, + "learning_rate": 5.718568819021612e-06, + "loss": 17.7681, + "step": 25739 + }, + { + "epoch": 0.4705065165335332, + "grad_norm": 6.576506940733695, + "learning_rate": 5.718275879032226e-06, + "loss": 17.3518, + "step": 25740 + }, + { + "epoch": 0.4705247957299797, + "grad_norm": 6.843285743590159, + "learning_rate": 5.717982936525345e-06, + "loss": 17.7304, + "step": 25741 + }, + { + "epoch": 0.4705430749264262, + "grad_norm": 6.1425211194349005, + "learning_rate": 5.717689991501996e-06, + "loss": 17.572, + "step": 25742 + }, + { + "epoch": 0.47056135412287275, + "grad_norm": 6.259091147614705, + "learning_rate": 5.7173970439632044e-06, + "loss": 17.2392, + "step": 25743 + }, + { + "epoch": 0.4705796333193193, + "grad_norm": 6.56324687763438, + "learning_rate": 5.71710409391e-06, + "loss": 17.699, + "step": 25744 + }, + { + "epoch": 0.4705979125157658, + "grad_norm": 6.4870259587971075, + "learning_rate": 5.716811141343407e-06, + "loss": 17.575, + "step": 25745 + }, + { + "epoch": 0.4706161917122123, + "grad_norm": 7.0280958866553185, + "learning_rate": 5.716518186264454e-06, + "loss": 17.7948, + "step": 25746 + }, + { + "epoch": 0.47063447090865884, + "grad_norm": 7.43835303131793, + "learning_rate": 5.716225228674166e-06, + "loss": 17.4661, + "step": 25747 + }, + { + "epoch": 0.4706527501051054, + "grad_norm": 5.680874544795799, + "learning_rate": 5.715932268573572e-06, + "loss": 17.3705, + "step": 25748 + }, + { + "epoch": 0.4706710293015519, + "grad_norm": 11.223624627611429, + "learning_rate": 5.715639305963697e-06, + "loss": 18.3935, + "step": 25749 + }, + { + "epoch": 0.47068930849799845, + "grad_norm": 7.106054349598446, + "learning_rate": 5.715346340845568e-06, + "loss": 17.6383, + "step": 25750 + }, + { + "epoch": 0.47070758769444493, + "grad_norm": 5.927422855884639, + "learning_rate": 5.7150533732202115e-06, + "loss": 17.346, + "step": 25751 + }, + { + "epoch": 0.47072586689089146, + "grad_norm": 6.295244208832737, + "learning_rate": 5.714760403088656e-06, + "loss": 17.5507, + "step": 25752 + }, + { + "epoch": 0.470744146087338, + "grad_norm": 9.004936621510755, + "learning_rate": 5.7144674304519275e-06, + "loss": 17.6309, + "step": 25753 + }, + { + "epoch": 0.47076242528378454, + "grad_norm": 8.922715486165398, + "learning_rate": 5.714174455311052e-06, + "loss": 17.9797, + "step": 25754 + }, + { + "epoch": 0.47078070448023107, + "grad_norm": 7.6902900504639415, + "learning_rate": 5.713881477667057e-06, + "loss": 18.4467, + "step": 25755 + }, + { + "epoch": 0.47079898367667755, + "grad_norm": 10.561340904277566, + "learning_rate": 5.713588497520971e-06, + "loss": 18.5204, + "step": 25756 + }, + { + "epoch": 0.4708172628731241, + "grad_norm": 6.6134355482343175, + "learning_rate": 5.7132955148738174e-06, + "loss": 17.7061, + "step": 25757 + }, + { + "epoch": 0.4708355420695706, + "grad_norm": 6.8893242974843885, + "learning_rate": 5.713002529726626e-06, + "loss": 17.7588, + "step": 25758 + }, + { + "epoch": 0.47085382126601716, + "grad_norm": 7.474456619203395, + "learning_rate": 5.712709542080421e-06, + "loss": 18.0075, + "step": 25759 + }, + { + "epoch": 0.4708721004624637, + "grad_norm": 6.948855013586783, + "learning_rate": 5.712416551936232e-06, + "loss": 17.8254, + "step": 25760 + }, + { + "epoch": 0.4708903796589102, + "grad_norm": 6.50457562766823, + "learning_rate": 5.7121235592950855e-06, + "loss": 17.751, + "step": 25761 + }, + { + "epoch": 0.4709086588553567, + "grad_norm": 5.83894267974012, + "learning_rate": 5.711830564158006e-06, + "loss": 17.2684, + "step": 25762 + }, + { + "epoch": 0.47092693805180325, + "grad_norm": 7.653457693003221, + "learning_rate": 5.711537566526024e-06, + "loss": 17.9138, + "step": 25763 + }, + { + "epoch": 0.4709452172482498, + "grad_norm": 5.1323006430626785, + "learning_rate": 5.711244566400163e-06, + "loss": 16.9562, + "step": 25764 + }, + { + "epoch": 0.4709634964446963, + "grad_norm": 8.25864421776104, + "learning_rate": 5.710951563781452e-06, + "loss": 17.7398, + "step": 25765 + }, + { + "epoch": 0.4709817756411428, + "grad_norm": 7.69456264534389, + "learning_rate": 5.710658558670919e-06, + "loss": 18.1504, + "step": 25766 + }, + { + "epoch": 0.47100005483758933, + "grad_norm": 6.088654055819759, + "learning_rate": 5.710365551069588e-06, + "loss": 17.5746, + "step": 25767 + }, + { + "epoch": 0.47101833403403587, + "grad_norm": 5.194843577707228, + "learning_rate": 5.710072540978487e-06, + "loss": 17.1521, + "step": 25768 + }, + { + "epoch": 0.4710366132304824, + "grad_norm": 6.697547190304205, + "learning_rate": 5.709779528398643e-06, + "loss": 17.759, + "step": 25769 + }, + { + "epoch": 0.47105489242692894, + "grad_norm": 6.429279863825913, + "learning_rate": 5.709486513331085e-06, + "loss": 17.1669, + "step": 25770 + }, + { + "epoch": 0.4710731716233754, + "grad_norm": 6.441112590700953, + "learning_rate": 5.709193495776837e-06, + "loss": 17.7022, + "step": 25771 + }, + { + "epoch": 0.47109145081982196, + "grad_norm": 6.625448028730175, + "learning_rate": 5.708900475736928e-06, + "loss": 17.4705, + "step": 25772 + }, + { + "epoch": 0.4711097300162685, + "grad_norm": 6.305411184269652, + "learning_rate": 5.708607453212385e-06, + "loss": 17.5364, + "step": 25773 + }, + { + "epoch": 0.471128009212715, + "grad_norm": 6.290464556869714, + "learning_rate": 5.708314428204233e-06, + "loss": 17.5009, + "step": 25774 + }, + { + "epoch": 0.4711462884091615, + "grad_norm": 7.906891643381622, + "learning_rate": 5.708021400713501e-06, + "loss": 17.8957, + "step": 25775 + }, + { + "epoch": 0.47116456760560804, + "grad_norm": 6.183705673203319, + "learning_rate": 5.707728370741215e-06, + "loss": 17.5608, + "step": 25776 + }, + { + "epoch": 0.4711828468020546, + "grad_norm": 7.540136204940559, + "learning_rate": 5.707435338288403e-06, + "loss": 18.1934, + "step": 25777 + }, + { + "epoch": 0.4712011259985011, + "grad_norm": 6.791809613177821, + "learning_rate": 5.707142303356092e-06, + "loss": 17.4112, + "step": 25778 + }, + { + "epoch": 0.47121940519494765, + "grad_norm": 5.89670348103306, + "learning_rate": 5.706849265945307e-06, + "loss": 17.3449, + "step": 25779 + }, + { + "epoch": 0.47123768439139413, + "grad_norm": 7.1241957625525085, + "learning_rate": 5.706556226057078e-06, + "loss": 17.7355, + "step": 25780 + }, + { + "epoch": 0.47125596358784066, + "grad_norm": 7.153960642312598, + "learning_rate": 5.706263183692431e-06, + "loss": 17.6717, + "step": 25781 + }, + { + "epoch": 0.4712742427842872, + "grad_norm": 6.438701194873751, + "learning_rate": 5.70597013885239e-06, + "loss": 17.5661, + "step": 25782 + }, + { + "epoch": 0.47129252198073374, + "grad_norm": 7.390245278411702, + "learning_rate": 5.705677091537988e-06, + "loss": 18.0024, + "step": 25783 + }, + { + "epoch": 0.47131080117718027, + "grad_norm": 5.88308197225255, + "learning_rate": 5.705384041750249e-06, + "loss": 17.3385, + "step": 25784 + }, + { + "epoch": 0.47132908037362675, + "grad_norm": 5.560797798888823, + "learning_rate": 5.705090989490199e-06, + "loss": 17.2345, + "step": 25785 + }, + { + "epoch": 0.4713473595700733, + "grad_norm": 6.386049160659695, + "learning_rate": 5.7047979347588665e-06, + "loss": 17.8416, + "step": 25786 + }, + { + "epoch": 0.4713656387665198, + "grad_norm": 6.9627073599178715, + "learning_rate": 5.704504877557279e-06, + "loss": 18.0885, + "step": 25787 + }, + { + "epoch": 0.47138391796296636, + "grad_norm": 6.204843270334688, + "learning_rate": 5.704211817886463e-06, + "loss": 17.7679, + "step": 25788 + }, + { + "epoch": 0.4714021971594129, + "grad_norm": 6.480918566703043, + "learning_rate": 5.703918755747444e-06, + "loss": 17.6984, + "step": 25789 + }, + { + "epoch": 0.4714204763558594, + "grad_norm": 7.127484284389984, + "learning_rate": 5.703625691141252e-06, + "loss": 17.9324, + "step": 25790 + }, + { + "epoch": 0.4714387555523059, + "grad_norm": 6.2940842817535, + "learning_rate": 5.703332624068914e-06, + "loss": 17.2779, + "step": 25791 + }, + { + "epoch": 0.47145703474875245, + "grad_norm": 6.574987717773518, + "learning_rate": 5.703039554531454e-06, + "loss": 17.7116, + "step": 25792 + }, + { + "epoch": 0.471475313945199, + "grad_norm": 7.253700012881749, + "learning_rate": 5.7027464825299024e-06, + "loss": 17.8144, + "step": 25793 + }, + { + "epoch": 0.4714935931416455, + "grad_norm": 6.939616955610362, + "learning_rate": 5.702453408065287e-06, + "loss": 17.7171, + "step": 25794 + }, + { + "epoch": 0.471511872338092, + "grad_norm": 7.077047832098274, + "learning_rate": 5.702160331138632e-06, + "loss": 17.4779, + "step": 25795 + }, + { + "epoch": 0.47153015153453853, + "grad_norm": 8.067816705056266, + "learning_rate": 5.7018672517509645e-06, + "loss": 18.0141, + "step": 25796 + }, + { + "epoch": 0.47154843073098507, + "grad_norm": 4.89697354386606, + "learning_rate": 5.7015741699033144e-06, + "loss": 16.8199, + "step": 25797 + }, + { + "epoch": 0.4715667099274316, + "grad_norm": 6.599982962770622, + "learning_rate": 5.701281085596709e-06, + "loss": 17.2145, + "step": 25798 + }, + { + "epoch": 0.47158498912387814, + "grad_norm": 6.756677056868582, + "learning_rate": 5.700987998832173e-06, + "loss": 17.5423, + "step": 25799 + }, + { + "epoch": 0.4716032683203246, + "grad_norm": 6.4648199917368325, + "learning_rate": 5.700694909610736e-06, + "loss": 17.7403, + "step": 25800 + }, + { + "epoch": 0.47162154751677116, + "grad_norm": 6.397784271903643, + "learning_rate": 5.700401817933422e-06, + "loss": 17.6628, + "step": 25801 + }, + { + "epoch": 0.4716398267132177, + "grad_norm": 5.149442698610189, + "learning_rate": 5.700108723801262e-06, + "loss": 17.0186, + "step": 25802 + }, + { + "epoch": 0.4716581059096642, + "grad_norm": 7.4748213213920796, + "learning_rate": 5.69981562721528e-06, + "loss": 17.7124, + "step": 25803 + }, + { + "epoch": 0.47167638510611076, + "grad_norm": 7.234924381618311, + "learning_rate": 5.699522528176506e-06, + "loss": 18.0197, + "step": 25804 + }, + { + "epoch": 0.47169466430255724, + "grad_norm": 6.348990477810652, + "learning_rate": 5.699229426685967e-06, + "loss": 17.305, + "step": 25805 + }, + { + "epoch": 0.4717129434990038, + "grad_norm": 5.804665038058139, + "learning_rate": 5.698936322744689e-06, + "loss": 17.1312, + "step": 25806 + }, + { + "epoch": 0.4717312226954503, + "grad_norm": 5.685403952588046, + "learning_rate": 5.6986432163537e-06, + "loss": 17.1033, + "step": 25807 + }, + { + "epoch": 0.47174950189189685, + "grad_norm": 5.6113171001790985, + "learning_rate": 5.698350107514028e-06, + "loss": 17.089, + "step": 25808 + }, + { + "epoch": 0.47176778108834333, + "grad_norm": 6.010913038377717, + "learning_rate": 5.698056996226697e-06, + "loss": 17.4232, + "step": 25809 + }, + { + "epoch": 0.47178606028478987, + "grad_norm": 7.38772038955717, + "learning_rate": 5.6977638824927385e-06, + "loss": 17.7902, + "step": 25810 + }, + { + "epoch": 0.4718043394812364, + "grad_norm": 6.259717543016462, + "learning_rate": 5.697470766313179e-06, + "loss": 17.4974, + "step": 25811 + }, + { + "epoch": 0.47182261867768294, + "grad_norm": 7.24695507568173, + "learning_rate": 5.697177647689044e-06, + "loss": 18.066, + "step": 25812 + }, + { + "epoch": 0.4718408978741295, + "grad_norm": 6.138165958681334, + "learning_rate": 5.696884526621361e-06, + "loss": 17.2708, + "step": 25813 + }, + { + "epoch": 0.47185917707057595, + "grad_norm": 5.908285647364228, + "learning_rate": 5.696591403111158e-06, + "loss": 17.0526, + "step": 25814 + }, + { + "epoch": 0.4718774562670225, + "grad_norm": 5.9555887823675135, + "learning_rate": 5.6962982771594645e-06, + "loss": 17.3364, + "step": 25815 + }, + { + "epoch": 0.471895735463469, + "grad_norm": 6.547020115876095, + "learning_rate": 5.696005148767305e-06, + "loss": 17.6849, + "step": 25816 + }, + { + "epoch": 0.47191401465991556, + "grad_norm": 5.5296341109417115, + "learning_rate": 5.695712017935707e-06, + "loss": 17.2958, + "step": 25817 + }, + { + "epoch": 0.4719322938563621, + "grad_norm": 5.914017653103802, + "learning_rate": 5.6954188846657e-06, + "loss": 17.355, + "step": 25818 + }, + { + "epoch": 0.4719505730528086, + "grad_norm": 6.347253209844765, + "learning_rate": 5.69512574895831e-06, + "loss": 17.6088, + "step": 25819 + }, + { + "epoch": 0.4719688522492551, + "grad_norm": 6.8219558055636105, + "learning_rate": 5.694832610814565e-06, + "loss": 17.7155, + "step": 25820 + }, + { + "epoch": 0.47198713144570165, + "grad_norm": 6.475568929341647, + "learning_rate": 5.694539470235491e-06, + "loss": 17.4291, + "step": 25821 + }, + { + "epoch": 0.4720054106421482, + "grad_norm": 6.238996419924572, + "learning_rate": 5.694246327222117e-06, + "loss": 17.3621, + "step": 25822 + }, + { + "epoch": 0.4720236898385947, + "grad_norm": 5.822179323273069, + "learning_rate": 5.693953181775469e-06, + "loss": 17.1884, + "step": 25823 + }, + { + "epoch": 0.4720419690350412, + "grad_norm": 7.282504982699104, + "learning_rate": 5.6936600338965755e-06, + "loss": 17.8156, + "step": 25824 + }, + { + "epoch": 0.47206024823148773, + "grad_norm": 6.595031253693095, + "learning_rate": 5.693366883586465e-06, + "loss": 17.5496, + "step": 25825 + }, + { + "epoch": 0.47207852742793427, + "grad_norm": 6.624650194623883, + "learning_rate": 5.693073730846162e-06, + "loss": 17.6315, + "step": 25826 + }, + { + "epoch": 0.4720968066243808, + "grad_norm": 6.576914094424634, + "learning_rate": 5.6927805756766975e-06, + "loss": 17.4683, + "step": 25827 + }, + { + "epoch": 0.47211508582082734, + "grad_norm": 6.415511974670059, + "learning_rate": 5.692487418079096e-06, + "loss": 17.4982, + "step": 25828 + }, + { + "epoch": 0.4721333650172738, + "grad_norm": 6.482107892871176, + "learning_rate": 5.692194258054387e-06, + "loss": 17.8158, + "step": 25829 + }, + { + "epoch": 0.47215164421372036, + "grad_norm": 6.574487703205978, + "learning_rate": 5.691901095603596e-06, + "loss": 17.4369, + "step": 25830 + }, + { + "epoch": 0.4721699234101669, + "grad_norm": 8.795784141479906, + "learning_rate": 5.6916079307277525e-06, + "loss": 17.8158, + "step": 25831 + }, + { + "epoch": 0.4721882026066134, + "grad_norm": 8.248989669523736, + "learning_rate": 5.691314763427883e-06, + "loss": 17.8837, + "step": 25832 + }, + { + "epoch": 0.47220648180305996, + "grad_norm": 6.722758123023115, + "learning_rate": 5.691021593705015e-06, + "loss": 17.8966, + "step": 25833 + }, + { + "epoch": 0.47222476099950644, + "grad_norm": 7.033595405632596, + "learning_rate": 5.690728421560177e-06, + "loss": 17.7698, + "step": 25834 + }, + { + "epoch": 0.472243040195953, + "grad_norm": 5.509576586719086, + "learning_rate": 5.690435246994397e-06, + "loss": 17.2702, + "step": 25835 + }, + { + "epoch": 0.4722613193923995, + "grad_norm": 6.454036418745367, + "learning_rate": 5.6901420700086985e-06, + "loss": 17.6068, + "step": 25836 + }, + { + "epoch": 0.47227959858884605, + "grad_norm": 6.5781206231900065, + "learning_rate": 5.689848890604113e-06, + "loss": 17.5991, + "step": 25837 + }, + { + "epoch": 0.4722978777852926, + "grad_norm": 6.2061747849983675, + "learning_rate": 5.689555708781667e-06, + "loss": 17.3686, + "step": 25838 + }, + { + "epoch": 0.47231615698173907, + "grad_norm": 5.537459395375557, + "learning_rate": 5.689262524542389e-06, + "loss": 17.2471, + "step": 25839 + }, + { + "epoch": 0.4723344361781856, + "grad_norm": 6.814166713417278, + "learning_rate": 5.688969337887305e-06, + "loss": 17.7222, + "step": 25840 + }, + { + "epoch": 0.47235271537463214, + "grad_norm": 6.132540921005035, + "learning_rate": 5.688676148817442e-06, + "loss": 17.0324, + "step": 25841 + }, + { + "epoch": 0.4723709945710787, + "grad_norm": 5.9814842542380395, + "learning_rate": 5.688382957333831e-06, + "loss": 17.4957, + "step": 25842 + }, + { + "epoch": 0.47238927376752515, + "grad_norm": 6.635167267324463, + "learning_rate": 5.688089763437498e-06, + "loss": 17.6083, + "step": 25843 + }, + { + "epoch": 0.4724075529639717, + "grad_norm": 6.772207805176339, + "learning_rate": 5.687796567129468e-06, + "loss": 18.0892, + "step": 25844 + }, + { + "epoch": 0.4724258321604182, + "grad_norm": 6.155238518836094, + "learning_rate": 5.687503368410772e-06, + "loss": 17.3286, + "step": 25845 + }, + { + "epoch": 0.47244411135686476, + "grad_norm": 6.557655040142976, + "learning_rate": 5.687210167282435e-06, + "loss": 17.805, + "step": 25846 + }, + { + "epoch": 0.4724623905533113, + "grad_norm": 5.832099023514787, + "learning_rate": 5.686916963745487e-06, + "loss": 17.4085, + "step": 25847 + }, + { + "epoch": 0.4724806697497578, + "grad_norm": 5.806232684173829, + "learning_rate": 5.686623757800954e-06, + "loss": 17.4479, + "step": 25848 + }, + { + "epoch": 0.4724989489462043, + "grad_norm": 8.043928830639969, + "learning_rate": 5.686330549449866e-06, + "loss": 17.8331, + "step": 25849 + }, + { + "epoch": 0.47251722814265085, + "grad_norm": 6.535933902088941, + "learning_rate": 5.6860373386932465e-06, + "loss": 17.6848, + "step": 25850 + }, + { + "epoch": 0.4725355073390974, + "grad_norm": 7.168594583730302, + "learning_rate": 5.685744125532127e-06, + "loss": 17.5545, + "step": 25851 + }, + { + "epoch": 0.4725537865355439, + "grad_norm": 5.374707794732045, + "learning_rate": 5.685450909967533e-06, + "loss": 17.1791, + "step": 25852 + }, + { + "epoch": 0.4725720657319904, + "grad_norm": 6.280479088854206, + "learning_rate": 5.685157692000494e-06, + "loss": 17.7383, + "step": 25853 + }, + { + "epoch": 0.47259034492843693, + "grad_norm": 6.757402852042405, + "learning_rate": 5.6848644716320365e-06, + "loss": 17.4559, + "step": 25854 + }, + { + "epoch": 0.47260862412488347, + "grad_norm": 6.907323659330623, + "learning_rate": 5.684571248863188e-06, + "loss": 17.5365, + "step": 25855 + }, + { + "epoch": 0.47262690332133, + "grad_norm": 6.153775305233588, + "learning_rate": 5.684278023694978e-06, + "loss": 17.3031, + "step": 25856 + }, + { + "epoch": 0.47264518251777654, + "grad_norm": 13.665868608285148, + "learning_rate": 5.683984796128432e-06, + "loss": 18.349, + "step": 25857 + }, + { + "epoch": 0.472663461714223, + "grad_norm": 6.817935297092144, + "learning_rate": 5.683691566164579e-06, + "loss": 17.463, + "step": 25858 + }, + { + "epoch": 0.47268174091066956, + "grad_norm": 6.551114817335246, + "learning_rate": 5.683398333804446e-06, + "loss": 17.8015, + "step": 25859 + }, + { + "epoch": 0.4727000201071161, + "grad_norm": 5.18392331601108, + "learning_rate": 5.683105099049061e-06, + "loss": 17.0122, + "step": 25860 + }, + { + "epoch": 0.47271829930356263, + "grad_norm": 6.0659547084974585, + "learning_rate": 5.682811861899452e-06, + "loss": 17.4556, + "step": 25861 + }, + { + "epoch": 0.47273657850000916, + "grad_norm": 6.1119645321856195, + "learning_rate": 5.682518622356647e-06, + "loss": 17.3908, + "step": 25862 + }, + { + "epoch": 0.47275485769645564, + "grad_norm": 7.284642014411717, + "learning_rate": 5.682225380421674e-06, + "loss": 17.9496, + "step": 25863 + }, + { + "epoch": 0.4727731368929022, + "grad_norm": 6.929019893916304, + "learning_rate": 5.681932136095558e-06, + "loss": 17.4113, + "step": 25864 + }, + { + "epoch": 0.4727914160893487, + "grad_norm": 6.6258985334680025, + "learning_rate": 5.681638889379331e-06, + "loss": 17.316, + "step": 25865 + }, + { + "epoch": 0.47280969528579525, + "grad_norm": 5.242997829500244, + "learning_rate": 5.681345640274018e-06, + "loss": 16.9227, + "step": 25866 + }, + { + "epoch": 0.4728279744822418, + "grad_norm": 6.577576620433062, + "learning_rate": 5.681052388780649e-06, + "loss": 17.6209, + "step": 25867 + }, + { + "epoch": 0.47284625367868827, + "grad_norm": 5.71879287588394, + "learning_rate": 5.680759134900249e-06, + "loss": 17.5261, + "step": 25868 + }, + { + "epoch": 0.4728645328751348, + "grad_norm": 6.196560471184433, + "learning_rate": 5.680465878633848e-06, + "loss": 17.5856, + "step": 25869 + }, + { + "epoch": 0.47288281207158134, + "grad_norm": 6.68080064698184, + "learning_rate": 5.680172619982474e-06, + "loss": 17.7189, + "step": 25870 + }, + { + "epoch": 0.4729010912680279, + "grad_norm": 6.161544304228171, + "learning_rate": 5.6798793589471526e-06, + "loss": 17.308, + "step": 25871 + }, + { + "epoch": 0.4729193704644744, + "grad_norm": 8.332024594696001, + "learning_rate": 5.679586095528913e-06, + "loss": 17.9046, + "step": 25872 + }, + { + "epoch": 0.4729376496609209, + "grad_norm": 12.59299099131102, + "learning_rate": 5.679292829728783e-06, + "loss": 18.5357, + "step": 25873 + }, + { + "epoch": 0.4729559288573674, + "grad_norm": 7.905383411209632, + "learning_rate": 5.678999561547791e-06, + "loss": 18.2884, + "step": 25874 + }, + { + "epoch": 0.47297420805381396, + "grad_norm": 6.661492886445078, + "learning_rate": 5.678706290986964e-06, + "loss": 17.5146, + "step": 25875 + }, + { + "epoch": 0.4729924872502605, + "grad_norm": 4.781496507417505, + "learning_rate": 5.678413018047331e-06, + "loss": 16.9854, + "step": 25876 + }, + { + "epoch": 0.473010766446707, + "grad_norm": 7.208361767350122, + "learning_rate": 5.678119742729919e-06, + "loss": 17.4394, + "step": 25877 + }, + { + "epoch": 0.4730290456431535, + "grad_norm": 6.994738151235073, + "learning_rate": 5.6778264650357565e-06, + "loss": 17.588, + "step": 25878 + }, + { + "epoch": 0.47304732483960005, + "grad_norm": 7.306192444508178, + "learning_rate": 5.677533184965871e-06, + "loss": 17.7977, + "step": 25879 + }, + { + "epoch": 0.4730656040360466, + "grad_norm": 6.0981017799010075, + "learning_rate": 5.677239902521291e-06, + "loss": 17.4439, + "step": 25880 + }, + { + "epoch": 0.4730838832324931, + "grad_norm": 6.438982786471811, + "learning_rate": 5.676946617703043e-06, + "loss": 17.5578, + "step": 25881 + }, + { + "epoch": 0.4731021624289396, + "grad_norm": 6.134441006378765, + "learning_rate": 5.676653330512155e-06, + "loss": 17.29, + "step": 25882 + }, + { + "epoch": 0.47312044162538613, + "grad_norm": 6.184028519930327, + "learning_rate": 5.676360040949656e-06, + "loss": 17.6106, + "step": 25883 + }, + { + "epoch": 0.47313872082183267, + "grad_norm": 6.186597448277817, + "learning_rate": 5.676066749016577e-06, + "loss": 17.1757, + "step": 25884 + }, + { + "epoch": 0.4731570000182792, + "grad_norm": 6.887298996952183, + "learning_rate": 5.67577345471394e-06, + "loss": 17.5008, + "step": 25885 + }, + { + "epoch": 0.47317527921472574, + "grad_norm": 5.17511996340504, + "learning_rate": 5.6754801580427755e-06, + "loss": 17.1795, + "step": 25886 + }, + { + "epoch": 0.4731935584111722, + "grad_norm": 7.466202467595964, + "learning_rate": 5.6751868590041125e-06, + "loss": 17.8165, + "step": 25887 + }, + { + "epoch": 0.47321183760761876, + "grad_norm": 7.265150142125658, + "learning_rate": 5.6748935575989775e-06, + "loss": 17.9262, + "step": 25888 + }, + { + "epoch": 0.4732301168040653, + "grad_norm": 7.85296419328889, + "learning_rate": 5.6746002538284e-06, + "loss": 18.3895, + "step": 25889 + }, + { + "epoch": 0.47324839600051183, + "grad_norm": 6.851732004003136, + "learning_rate": 5.674306947693406e-06, + "loss": 17.6846, + "step": 25890 + }, + { + "epoch": 0.47326667519695836, + "grad_norm": 6.713183541630558, + "learning_rate": 5.674013639195025e-06, + "loss": 17.7856, + "step": 25891 + }, + { + "epoch": 0.47328495439340484, + "grad_norm": 8.11777700522724, + "learning_rate": 5.6737203283342846e-06, + "loss": 18.2853, + "step": 25892 + }, + { + "epoch": 0.4733032335898514, + "grad_norm": 5.807472787390272, + "learning_rate": 5.673427015112214e-06, + "loss": 17.3082, + "step": 25893 + }, + { + "epoch": 0.4733215127862979, + "grad_norm": 6.708169580046064, + "learning_rate": 5.673133699529841e-06, + "loss": 17.7547, + "step": 25894 + }, + { + "epoch": 0.47333979198274445, + "grad_norm": 5.90589898079177, + "learning_rate": 5.67284038158819e-06, + "loss": 17.3468, + "step": 25895 + }, + { + "epoch": 0.473358071179191, + "grad_norm": 6.762469218667008, + "learning_rate": 5.672547061288292e-06, + "loss": 17.6666, + "step": 25896 + }, + { + "epoch": 0.47337635037563747, + "grad_norm": 6.57452511513548, + "learning_rate": 5.672253738631176e-06, + "loss": 17.4593, + "step": 25897 + }, + { + "epoch": 0.473394629572084, + "grad_norm": 4.857877928913913, + "learning_rate": 5.671960413617869e-06, + "loss": 16.9493, + "step": 25898 + }, + { + "epoch": 0.47341290876853054, + "grad_norm": 6.762618836487802, + "learning_rate": 5.671667086249399e-06, + "loss": 17.3922, + "step": 25899 + }, + { + "epoch": 0.4734311879649771, + "grad_norm": 7.510592587270578, + "learning_rate": 5.671373756526795e-06, + "loss": 17.6653, + "step": 25900 + }, + { + "epoch": 0.4734494671614236, + "grad_norm": 6.607209235175018, + "learning_rate": 5.671080424451083e-06, + "loss": 17.8866, + "step": 25901 + }, + { + "epoch": 0.4734677463578701, + "grad_norm": 5.585064724011794, + "learning_rate": 5.670787090023293e-06, + "loss": 17.0745, + "step": 25902 + }, + { + "epoch": 0.4734860255543166, + "grad_norm": 6.124320979701702, + "learning_rate": 5.670493753244452e-06, + "loss": 17.4572, + "step": 25903 + }, + { + "epoch": 0.47350430475076316, + "grad_norm": 6.563376709963528, + "learning_rate": 5.670200414115589e-06, + "loss": 17.6055, + "step": 25904 + }, + { + "epoch": 0.4735225839472097, + "grad_norm": 6.343559243910293, + "learning_rate": 5.66990707263773e-06, + "loss": 17.398, + "step": 25905 + }, + { + "epoch": 0.47354086314365623, + "grad_norm": 6.588324721187836, + "learning_rate": 5.669613728811907e-06, + "loss": 17.7244, + "step": 25906 + }, + { + "epoch": 0.4735591423401027, + "grad_norm": 7.63767999502871, + "learning_rate": 5.669320382639145e-06, + "loss": 17.9324, + "step": 25907 + }, + { + "epoch": 0.47357742153654925, + "grad_norm": 4.932081138185488, + "learning_rate": 5.669027034120474e-06, + "loss": 16.8443, + "step": 25908 + }, + { + "epoch": 0.4735957007329958, + "grad_norm": 6.400489033653264, + "learning_rate": 5.66873368325692e-06, + "loss": 17.6367, + "step": 25909 + }, + { + "epoch": 0.4736139799294423, + "grad_norm": 5.927089896266275, + "learning_rate": 5.6684403300495135e-06, + "loss": 17.129, + "step": 25910 + }, + { + "epoch": 0.4736322591258888, + "grad_norm": 6.13647123586034, + "learning_rate": 5.668146974499282e-06, + "loss": 17.468, + "step": 25911 + }, + { + "epoch": 0.47365053832233533, + "grad_norm": 6.247460530699331, + "learning_rate": 5.667853616607253e-06, + "loss": 17.3167, + "step": 25912 + }, + { + "epoch": 0.47366881751878187, + "grad_norm": 6.628616623180669, + "learning_rate": 5.667560256374455e-06, + "loss": 17.5867, + "step": 25913 + }, + { + "epoch": 0.4736870967152284, + "grad_norm": 6.181169806865533, + "learning_rate": 5.667266893801915e-06, + "loss": 17.2199, + "step": 25914 + }, + { + "epoch": 0.47370537591167494, + "grad_norm": 7.61247354414544, + "learning_rate": 5.6669735288906634e-06, + "loss": 17.9029, + "step": 25915 + }, + { + "epoch": 0.4737236551081214, + "grad_norm": 7.861975335849096, + "learning_rate": 5.666680161641728e-06, + "loss": 17.6708, + "step": 25916 + }, + { + "epoch": 0.47374193430456796, + "grad_norm": 6.866773229149948, + "learning_rate": 5.6663867920561354e-06, + "loss": 17.6993, + "step": 25917 + }, + { + "epoch": 0.4737602135010145, + "grad_norm": 8.542882237612853, + "learning_rate": 5.666093420134916e-06, + "loss": 18.2594, + "step": 25918 + }, + { + "epoch": 0.47377849269746103, + "grad_norm": 6.402801659901995, + "learning_rate": 5.665800045879097e-06, + "loss": 17.5634, + "step": 25919 + }, + { + "epoch": 0.47379677189390756, + "grad_norm": 6.7766241834966525, + "learning_rate": 5.665506669289705e-06, + "loss": 17.5705, + "step": 25920 + }, + { + "epoch": 0.47381505109035404, + "grad_norm": 9.444608585705058, + "learning_rate": 5.66521329036777e-06, + "loss": 18.2186, + "step": 25921 + }, + { + "epoch": 0.4738333302868006, + "grad_norm": 5.785322703644276, + "learning_rate": 5.664919909114322e-06, + "loss": 17.4296, + "step": 25922 + }, + { + "epoch": 0.4738516094832471, + "grad_norm": 6.267172285029286, + "learning_rate": 5.664626525530385e-06, + "loss": 17.4039, + "step": 25923 + }, + { + "epoch": 0.47386988867969365, + "grad_norm": 5.974093653947869, + "learning_rate": 5.664333139616992e-06, + "loss": 17.5757, + "step": 25924 + }, + { + "epoch": 0.4738881678761402, + "grad_norm": 7.500747167628114, + "learning_rate": 5.66403975137517e-06, + "loss": 17.7725, + "step": 25925 + }, + { + "epoch": 0.47390644707258667, + "grad_norm": 5.964027363515949, + "learning_rate": 5.663746360805944e-06, + "loss": 17.081, + "step": 25926 + }, + { + "epoch": 0.4739247262690332, + "grad_norm": 6.290251241856698, + "learning_rate": 5.663452967910345e-06, + "loss": 17.6068, + "step": 25927 + }, + { + "epoch": 0.47394300546547974, + "grad_norm": 5.208218032142328, + "learning_rate": 5.6631595726894e-06, + "loss": 17.0189, + "step": 25928 + }, + { + "epoch": 0.4739612846619263, + "grad_norm": 7.29574622467421, + "learning_rate": 5.662866175144139e-06, + "loss": 17.9344, + "step": 25929 + }, + { + "epoch": 0.4739795638583728, + "grad_norm": 5.434198716830011, + "learning_rate": 5.662572775275591e-06, + "loss": 17.1999, + "step": 25930 + }, + { + "epoch": 0.4739978430548193, + "grad_norm": 6.895067401429106, + "learning_rate": 5.662279373084782e-06, + "loss": 18.0767, + "step": 25931 + }, + { + "epoch": 0.4740161222512658, + "grad_norm": 6.052100713760119, + "learning_rate": 5.661985968572741e-06, + "loss": 17.3477, + "step": 25932 + }, + { + "epoch": 0.47403440144771236, + "grad_norm": 7.534649191841099, + "learning_rate": 5.6616925617404965e-06, + "loss": 17.4309, + "step": 25933 + }, + { + "epoch": 0.4740526806441589, + "grad_norm": 5.7560926047448335, + "learning_rate": 5.661399152589077e-06, + "loss": 17.3121, + "step": 25934 + }, + { + "epoch": 0.47407095984060543, + "grad_norm": 5.212578901397439, + "learning_rate": 5.6611057411195115e-06, + "loss": 17.0144, + "step": 25935 + }, + { + "epoch": 0.4740892390370519, + "grad_norm": 6.813388999902722, + "learning_rate": 5.660812327332825e-06, + "loss": 17.7488, + "step": 25936 + }, + { + "epoch": 0.47410751823349845, + "grad_norm": 5.903166788230887, + "learning_rate": 5.6605189112300515e-06, + "loss": 17.2933, + "step": 25937 + }, + { + "epoch": 0.474125797429945, + "grad_norm": 5.312244703909885, + "learning_rate": 5.660225492812216e-06, + "loss": 17.2784, + "step": 25938 + }, + { + "epoch": 0.4741440766263915, + "grad_norm": 7.317861567662505, + "learning_rate": 5.659932072080349e-06, + "loss": 18.0144, + "step": 25939 + }, + { + "epoch": 0.47416235582283806, + "grad_norm": 5.920722899051549, + "learning_rate": 5.6596386490354745e-06, + "loss": 17.3487, + "step": 25940 + }, + { + "epoch": 0.47418063501928454, + "grad_norm": 6.411628454375291, + "learning_rate": 5.659345223678624e-06, + "loss": 17.5412, + "step": 25941 + }, + { + "epoch": 0.47419891421573107, + "grad_norm": 7.406774194138147, + "learning_rate": 5.659051796010828e-06, + "loss": 18.295, + "step": 25942 + }, + { + "epoch": 0.4742171934121776, + "grad_norm": 5.98952482355676, + "learning_rate": 5.658758366033111e-06, + "loss": 17.0943, + "step": 25943 + }, + { + "epoch": 0.47423547260862414, + "grad_norm": 6.0568664036181366, + "learning_rate": 5.6584649337465036e-06, + "loss": 17.3889, + "step": 25944 + }, + { + "epoch": 0.4742537518050706, + "grad_norm": 7.333534487383448, + "learning_rate": 5.6581714991520335e-06, + "loss": 18.02, + "step": 25945 + }, + { + "epoch": 0.47427203100151716, + "grad_norm": 5.345981274064981, + "learning_rate": 5.657878062250729e-06, + "loss": 17.1343, + "step": 25946 + }, + { + "epoch": 0.4742903101979637, + "grad_norm": 7.045236121429757, + "learning_rate": 5.657584623043619e-06, + "loss": 17.6542, + "step": 25947 + }, + { + "epoch": 0.47430858939441023, + "grad_norm": 5.870662473700976, + "learning_rate": 5.657291181531732e-06, + "loss": 17.3836, + "step": 25948 + }, + { + "epoch": 0.47432686859085677, + "grad_norm": 6.906669538816162, + "learning_rate": 5.6569977377160985e-06, + "loss": 17.6594, + "step": 25949 + }, + { + "epoch": 0.47434514778730325, + "grad_norm": 5.133526592293263, + "learning_rate": 5.656704291597742e-06, + "loss": 16.8409, + "step": 25950 + }, + { + "epoch": 0.4743634269837498, + "grad_norm": 6.365411223856171, + "learning_rate": 5.656410843177695e-06, + "loss": 17.318, + "step": 25951 + }, + { + "epoch": 0.4743817061801963, + "grad_norm": 5.788346236899196, + "learning_rate": 5.656117392456986e-06, + "loss": 17.2644, + "step": 25952 + }, + { + "epoch": 0.47439998537664285, + "grad_norm": 6.593894142725526, + "learning_rate": 5.655823939436642e-06, + "loss": 17.4617, + "step": 25953 + }, + { + "epoch": 0.4744182645730894, + "grad_norm": 7.152965042730411, + "learning_rate": 5.655530484117691e-06, + "loss": 17.8426, + "step": 25954 + }, + { + "epoch": 0.47443654376953587, + "grad_norm": 5.5782092289664575, + "learning_rate": 5.655237026501162e-06, + "loss": 17.3581, + "step": 25955 + }, + { + "epoch": 0.4744548229659824, + "grad_norm": 6.861717928090626, + "learning_rate": 5.654943566588087e-06, + "loss": 17.9868, + "step": 25956 + }, + { + "epoch": 0.47447310216242894, + "grad_norm": 6.025530385063021, + "learning_rate": 5.65465010437949e-06, + "loss": 17.5004, + "step": 25957 + }, + { + "epoch": 0.4744913813588755, + "grad_norm": 5.756300202233745, + "learning_rate": 5.654356639876401e-06, + "loss": 16.9075, + "step": 25958 + }, + { + "epoch": 0.474509660555322, + "grad_norm": 5.906484596240676, + "learning_rate": 5.654063173079849e-06, + "loss": 17.4008, + "step": 25959 + }, + { + "epoch": 0.4745279397517685, + "grad_norm": 6.307032854882647, + "learning_rate": 5.6537697039908616e-06, + "loss": 17.4723, + "step": 25960 + }, + { + "epoch": 0.474546218948215, + "grad_norm": 7.954707887473592, + "learning_rate": 5.653476232610469e-06, + "loss": 18.1062, + "step": 25961 + }, + { + "epoch": 0.47456449814466156, + "grad_norm": 8.200524458621782, + "learning_rate": 5.653182758939698e-06, + "loss": 18.505, + "step": 25962 + }, + { + "epoch": 0.4745827773411081, + "grad_norm": 6.6348159359859515, + "learning_rate": 5.652889282979579e-06, + "loss": 17.3301, + "step": 25963 + }, + { + "epoch": 0.47460105653755463, + "grad_norm": 5.168494641857537, + "learning_rate": 5.652595804731139e-06, + "loss": 17.0669, + "step": 25964 + }, + { + "epoch": 0.4746193357340011, + "grad_norm": 5.690613203882012, + "learning_rate": 5.6523023241954076e-06, + "loss": 17.1961, + "step": 25965 + }, + { + "epoch": 0.47463761493044765, + "grad_norm": 6.898315220190865, + "learning_rate": 5.652008841373413e-06, + "loss": 17.6733, + "step": 25966 + }, + { + "epoch": 0.4746558941268942, + "grad_norm": 6.814659740351716, + "learning_rate": 5.651715356266187e-06, + "loss": 17.678, + "step": 25967 + }, + { + "epoch": 0.4746741733233407, + "grad_norm": 6.526756873649642, + "learning_rate": 5.65142186887475e-06, + "loss": 17.4209, + "step": 25968 + }, + { + "epoch": 0.47469245251978726, + "grad_norm": 6.556606698719368, + "learning_rate": 5.651128379200139e-06, + "loss": 17.5862, + "step": 25969 + }, + { + "epoch": 0.47471073171623374, + "grad_norm": 5.146666597874986, + "learning_rate": 5.650834887243379e-06, + "loss": 16.9093, + "step": 25970 + }, + { + "epoch": 0.47472901091268027, + "grad_norm": 7.757187958135804, + "learning_rate": 5.6505413930055e-06, + "loss": 17.8769, + "step": 25971 + }, + { + "epoch": 0.4747472901091268, + "grad_norm": 6.710453664682444, + "learning_rate": 5.650247896487528e-06, + "loss": 17.8814, + "step": 25972 + }, + { + "epoch": 0.47476556930557334, + "grad_norm": 7.123010685500772, + "learning_rate": 5.649954397690496e-06, + "loss": 17.6994, + "step": 25973 + }, + { + "epoch": 0.4747838485020199, + "grad_norm": 4.969359930302907, + "learning_rate": 5.649660896615428e-06, + "loss": 16.9672, + "step": 25974 + }, + { + "epoch": 0.47480212769846636, + "grad_norm": 6.9708902897066185, + "learning_rate": 5.6493673932633555e-06, + "loss": 17.9988, + "step": 25975 + }, + { + "epoch": 0.4748204068949129, + "grad_norm": 5.975754995200632, + "learning_rate": 5.6490738876353066e-06, + "loss": 17.502, + "step": 25976 + }, + { + "epoch": 0.47483868609135943, + "grad_norm": 5.851913605484984, + "learning_rate": 5.64878037973231e-06, + "loss": 17.3717, + "step": 25977 + }, + { + "epoch": 0.47485696528780597, + "grad_norm": 5.6586834549541125, + "learning_rate": 5.648486869555395e-06, + "loss": 17.2946, + "step": 25978 + }, + { + "epoch": 0.47487524448425245, + "grad_norm": 7.157862049440766, + "learning_rate": 5.64819335710559e-06, + "loss": 17.7344, + "step": 25979 + }, + { + "epoch": 0.474893523680699, + "grad_norm": 8.014626993561148, + "learning_rate": 5.647899842383923e-06, + "loss": 18.4884, + "step": 25980 + }, + { + "epoch": 0.4749118028771455, + "grad_norm": 6.046323901293027, + "learning_rate": 5.647606325391425e-06, + "loss": 17.1595, + "step": 25981 + }, + { + "epoch": 0.47493008207359205, + "grad_norm": 6.691217919244507, + "learning_rate": 5.647312806129119e-06, + "loss": 17.7766, + "step": 25982 + }, + { + "epoch": 0.4749483612700386, + "grad_norm": 6.359084571375429, + "learning_rate": 5.647019284598041e-06, + "loss": 17.3846, + "step": 25983 + }, + { + "epoch": 0.47496664046648507, + "grad_norm": 6.427892313531851, + "learning_rate": 5.646725760799216e-06, + "loss": 17.4413, + "step": 25984 + }, + { + "epoch": 0.4749849196629316, + "grad_norm": 6.274193033345976, + "learning_rate": 5.646432234733674e-06, + "loss": 17.6532, + "step": 25985 + }, + { + "epoch": 0.47500319885937814, + "grad_norm": 6.768050627752301, + "learning_rate": 5.646138706402442e-06, + "loss": 17.5241, + "step": 25986 + }, + { + "epoch": 0.4750214780558247, + "grad_norm": 5.819678207070442, + "learning_rate": 5.64584517580655e-06, + "loss": 17.5962, + "step": 25987 + }, + { + "epoch": 0.4750397572522712, + "grad_norm": 5.951836988982997, + "learning_rate": 5.645551642947027e-06, + "loss": 17.3161, + "step": 25988 + }, + { + "epoch": 0.4750580364487177, + "grad_norm": 6.789371409648295, + "learning_rate": 5.6452581078249006e-06, + "loss": 17.5762, + "step": 25989 + }, + { + "epoch": 0.4750763156451642, + "grad_norm": 9.374394706643287, + "learning_rate": 5.644964570441202e-06, + "loss": 18.3603, + "step": 25990 + }, + { + "epoch": 0.47509459484161076, + "grad_norm": 7.474719096925224, + "learning_rate": 5.644671030796958e-06, + "loss": 18.0784, + "step": 25991 + }, + { + "epoch": 0.4751128740380573, + "grad_norm": 5.8469241580465985, + "learning_rate": 5.6443774888931964e-06, + "loss": 16.9541, + "step": 25992 + }, + { + "epoch": 0.47513115323450383, + "grad_norm": 6.610767546635071, + "learning_rate": 5.644083944730949e-06, + "loss": 17.4141, + "step": 25993 + }, + { + "epoch": 0.4751494324309503, + "grad_norm": 5.82041686861602, + "learning_rate": 5.643790398311244e-06, + "loss": 17.0751, + "step": 25994 + }, + { + "epoch": 0.47516771162739685, + "grad_norm": 6.541205305517173, + "learning_rate": 5.64349684963511e-06, + "loss": 17.5317, + "step": 25995 + }, + { + "epoch": 0.4751859908238434, + "grad_norm": 5.705654479397359, + "learning_rate": 5.643203298703572e-06, + "loss": 17.2416, + "step": 25996 + }, + { + "epoch": 0.4752042700202899, + "grad_norm": 5.929132218688094, + "learning_rate": 5.642909745517665e-06, + "loss": 17.0134, + "step": 25997 + }, + { + "epoch": 0.47522254921673646, + "grad_norm": 6.505326066339509, + "learning_rate": 5.6426161900784146e-06, + "loss": 17.3633, + "step": 25998 + }, + { + "epoch": 0.47524082841318294, + "grad_norm": 6.685730152464692, + "learning_rate": 5.642322632386851e-06, + "loss": 17.5658, + "step": 25999 + }, + { + "epoch": 0.47525910760962947, + "grad_norm": 6.376676619157217, + "learning_rate": 5.642029072443999e-06, + "loss": 17.6068, + "step": 26000 + }, + { + "epoch": 0.475277386806076, + "grad_norm": 7.857717160114505, + "learning_rate": 5.641735510250895e-06, + "loss": 18.0532, + "step": 26001 + }, + { + "epoch": 0.47529566600252254, + "grad_norm": 6.084575919193199, + "learning_rate": 5.641441945808562e-06, + "loss": 17.3179, + "step": 26002 + }, + { + "epoch": 0.4753139451989691, + "grad_norm": 5.884143566256101, + "learning_rate": 5.641148379118031e-06, + "loss": 17.3732, + "step": 26003 + }, + { + "epoch": 0.47533222439541556, + "grad_norm": 7.197660095175341, + "learning_rate": 5.64085481018033e-06, + "loss": 17.9315, + "step": 26004 + }, + { + "epoch": 0.4753505035918621, + "grad_norm": 7.2089115755654625, + "learning_rate": 5.640561238996489e-06, + "loss": 17.9124, + "step": 26005 + }, + { + "epoch": 0.47536878278830863, + "grad_norm": 6.665101417994158, + "learning_rate": 5.640267665567536e-06, + "loss": 17.5239, + "step": 26006 + }, + { + "epoch": 0.47538706198475517, + "grad_norm": 6.039014754649297, + "learning_rate": 5.6399740898944995e-06, + "loss": 17.5545, + "step": 26007 + }, + { + "epoch": 0.4754053411812017, + "grad_norm": 5.889816194995486, + "learning_rate": 5.6396805119784125e-06, + "loss": 17.4227, + "step": 26008 + }, + { + "epoch": 0.4754236203776482, + "grad_norm": 6.266189007191372, + "learning_rate": 5.639386931820298e-06, + "loss": 17.5186, + "step": 26009 + }, + { + "epoch": 0.4754418995740947, + "grad_norm": 5.6734377472976485, + "learning_rate": 5.639093349421187e-06, + "loss": 17.2666, + "step": 26010 + }, + { + "epoch": 0.47546017877054125, + "grad_norm": 5.059160481785002, + "learning_rate": 5.638799764782113e-06, + "loss": 16.9287, + "step": 26011 + }, + { + "epoch": 0.4754784579669878, + "grad_norm": 7.62269200425769, + "learning_rate": 5.6385061779040986e-06, + "loss": 17.7419, + "step": 26012 + }, + { + "epoch": 0.47549673716343427, + "grad_norm": 7.4300084586237745, + "learning_rate": 5.638212588788175e-06, + "loss": 17.9371, + "step": 26013 + }, + { + "epoch": 0.4755150163598808, + "grad_norm": 6.190862055253693, + "learning_rate": 5.6379189974353724e-06, + "loss": 17.4832, + "step": 26014 + }, + { + "epoch": 0.47553329555632734, + "grad_norm": 6.148301697943547, + "learning_rate": 5.63762540384672e-06, + "loss": 17.6687, + "step": 26015 + }, + { + "epoch": 0.4755515747527739, + "grad_norm": 6.783249319787577, + "learning_rate": 5.6373318080232455e-06, + "loss": 17.8364, + "step": 26016 + }, + { + "epoch": 0.4755698539492204, + "grad_norm": 9.257873471346558, + "learning_rate": 5.637038209965977e-06, + "loss": 18.4764, + "step": 26017 + }, + { + "epoch": 0.4755881331456669, + "grad_norm": 6.158810833202474, + "learning_rate": 5.636744609675946e-06, + "loss": 17.564, + "step": 26018 + }, + { + "epoch": 0.4756064123421134, + "grad_norm": 6.401517358591553, + "learning_rate": 5.63645100715418e-06, + "loss": 17.5254, + "step": 26019 + }, + { + "epoch": 0.47562469153855996, + "grad_norm": 5.391545148250154, + "learning_rate": 5.6361574024017085e-06, + "loss": 17.2427, + "step": 26020 + }, + { + "epoch": 0.4756429707350065, + "grad_norm": 7.675663362478772, + "learning_rate": 5.635863795419561e-06, + "loss": 18.3336, + "step": 26021 + }, + { + "epoch": 0.47566124993145303, + "grad_norm": 4.807485419133941, + "learning_rate": 5.6355701862087665e-06, + "loss": 16.82, + "step": 26022 + }, + { + "epoch": 0.4756795291278995, + "grad_norm": 6.581052885289084, + "learning_rate": 5.635276574770352e-06, + "loss": 17.4988, + "step": 26023 + }, + { + "epoch": 0.47569780832434605, + "grad_norm": 5.08216685605317, + "learning_rate": 5.634982961105349e-06, + "loss": 17.0414, + "step": 26024 + }, + { + "epoch": 0.4757160875207926, + "grad_norm": 6.988364270643437, + "learning_rate": 5.634689345214787e-06, + "loss": 18.0667, + "step": 26025 + }, + { + "epoch": 0.4757343667172391, + "grad_norm": 6.381569064303758, + "learning_rate": 5.634395727099692e-06, + "loss": 17.3598, + "step": 26026 + }, + { + "epoch": 0.47575264591368566, + "grad_norm": 6.380849986204152, + "learning_rate": 5.634102106761095e-06, + "loss": 17.548, + "step": 26027 + }, + { + "epoch": 0.47577092511013214, + "grad_norm": 7.404830013781351, + "learning_rate": 5.633808484200026e-06, + "loss": 17.8752, + "step": 26028 + }, + { + "epoch": 0.4757892043065787, + "grad_norm": 6.500785528836138, + "learning_rate": 5.633514859417514e-06, + "loss": 17.56, + "step": 26029 + }, + { + "epoch": 0.4758074835030252, + "grad_norm": 6.321136405646386, + "learning_rate": 5.633221232414587e-06, + "loss": 17.3529, + "step": 26030 + }, + { + "epoch": 0.47582576269947174, + "grad_norm": 5.165881963417881, + "learning_rate": 5.632927603192274e-06, + "loss": 17.038, + "step": 26031 + }, + { + "epoch": 0.4758440418959183, + "grad_norm": 6.385283406981002, + "learning_rate": 5.632633971751604e-06, + "loss": 17.1728, + "step": 26032 + }, + { + "epoch": 0.47586232109236476, + "grad_norm": 7.2731084059304365, + "learning_rate": 5.632340338093608e-06, + "loss": 17.9146, + "step": 26033 + }, + { + "epoch": 0.4758806002888113, + "grad_norm": 6.42716275932108, + "learning_rate": 5.632046702219314e-06, + "loss": 17.2662, + "step": 26034 + }, + { + "epoch": 0.47589887948525783, + "grad_norm": 6.9941749540297105, + "learning_rate": 5.631753064129751e-06, + "loss": 17.4194, + "step": 26035 + }, + { + "epoch": 0.47591715868170437, + "grad_norm": 6.033991187815937, + "learning_rate": 5.631459423825948e-06, + "loss": 17.294, + "step": 26036 + }, + { + "epoch": 0.4759354378781509, + "grad_norm": 5.936301524659878, + "learning_rate": 5.631165781308934e-06, + "loss": 17.7412, + "step": 26037 + }, + { + "epoch": 0.4759537170745974, + "grad_norm": 6.1020200600307835, + "learning_rate": 5.630872136579739e-06, + "loss": 17.3488, + "step": 26038 + }, + { + "epoch": 0.4759719962710439, + "grad_norm": 6.877421053511342, + "learning_rate": 5.630578489639393e-06, + "loss": 17.6673, + "step": 26039 + }, + { + "epoch": 0.47599027546749045, + "grad_norm": 4.975494633060373, + "learning_rate": 5.6302848404889245e-06, + "loss": 16.8864, + "step": 26040 + }, + { + "epoch": 0.476008554663937, + "grad_norm": 6.525103130084275, + "learning_rate": 5.62999118912936e-06, + "loss": 17.4129, + "step": 26041 + }, + { + "epoch": 0.4760268338603835, + "grad_norm": 5.996496404997624, + "learning_rate": 5.629697535561733e-06, + "loss": 17.3319, + "step": 26042 + }, + { + "epoch": 0.47604511305683, + "grad_norm": 7.351909418351181, + "learning_rate": 5.62940387978707e-06, + "loss": 17.9268, + "step": 26043 + }, + { + "epoch": 0.47606339225327654, + "grad_norm": 5.526687594872396, + "learning_rate": 5.6291102218064005e-06, + "loss": 17.299, + "step": 26044 + }, + { + "epoch": 0.4760816714497231, + "grad_norm": 5.425425747437962, + "learning_rate": 5.628816561620755e-06, + "loss": 16.9579, + "step": 26045 + }, + { + "epoch": 0.4760999506461696, + "grad_norm": 6.642975514835074, + "learning_rate": 5.628522899231163e-06, + "loss": 17.9246, + "step": 26046 + }, + { + "epoch": 0.4761182298426161, + "grad_norm": 5.958625007699751, + "learning_rate": 5.62822923463865e-06, + "loss": 17.1009, + "step": 26047 + }, + { + "epoch": 0.4761365090390626, + "grad_norm": 7.628080431055862, + "learning_rate": 5.62793556784425e-06, + "loss": 18.0556, + "step": 26048 + }, + { + "epoch": 0.47615478823550916, + "grad_norm": 5.801207100153378, + "learning_rate": 5.6276418988489916e-06, + "loss": 17.2965, + "step": 26049 + }, + { + "epoch": 0.4761730674319557, + "grad_norm": 9.79241349374093, + "learning_rate": 5.627348227653899e-06, + "loss": 18.6229, + "step": 26050 + }, + { + "epoch": 0.47619134662840223, + "grad_norm": 6.781462717629474, + "learning_rate": 5.627054554260008e-06, + "loss": 17.4345, + "step": 26051 + }, + { + "epoch": 0.4762096258248487, + "grad_norm": 5.812752241444554, + "learning_rate": 5.626760878668344e-06, + "loss": 17.3331, + "step": 26052 + }, + { + "epoch": 0.47622790502129525, + "grad_norm": 6.147366690483685, + "learning_rate": 5.62646720087994e-06, + "loss": 17.7013, + "step": 26053 + }, + { + "epoch": 0.4762461842177418, + "grad_norm": 5.122933959756373, + "learning_rate": 5.626173520895821e-06, + "loss": 17.1301, + "step": 26054 + }, + { + "epoch": 0.4762644634141883, + "grad_norm": 7.401273026911979, + "learning_rate": 5.6258798387170165e-06, + "loss": 17.7734, + "step": 26055 + }, + { + "epoch": 0.47628274261063486, + "grad_norm": 4.933972472532199, + "learning_rate": 5.6255861543445615e-06, + "loss": 16.9932, + "step": 26056 + }, + { + "epoch": 0.47630102180708134, + "grad_norm": 4.427933087749209, + "learning_rate": 5.625292467779479e-06, + "loss": 16.7622, + "step": 26057 + }, + { + "epoch": 0.4763193010035279, + "grad_norm": 5.7662417096621965, + "learning_rate": 5.6249987790228e-06, + "loss": 17.4404, + "step": 26058 + }, + { + "epoch": 0.4763375801999744, + "grad_norm": 6.577856752167029, + "learning_rate": 5.624705088075555e-06, + "loss": 17.7029, + "step": 26059 + }, + { + "epoch": 0.47635585939642094, + "grad_norm": 6.67163525678445, + "learning_rate": 5.624411394938772e-06, + "loss": 17.5504, + "step": 26060 + }, + { + "epoch": 0.4763741385928675, + "grad_norm": 6.360925961932458, + "learning_rate": 5.624117699613483e-06, + "loss": 17.6593, + "step": 26061 + }, + { + "epoch": 0.47639241778931396, + "grad_norm": 7.586802719897017, + "learning_rate": 5.6238240021007155e-06, + "loss": 17.9586, + "step": 26062 + }, + { + "epoch": 0.4764106969857605, + "grad_norm": 4.899688214664459, + "learning_rate": 5.6235303024014975e-06, + "loss": 16.7137, + "step": 26063 + }, + { + "epoch": 0.47642897618220703, + "grad_norm": 7.138013485464661, + "learning_rate": 5.623236600516861e-06, + "loss": 18.2766, + "step": 26064 + }, + { + "epoch": 0.47644725537865357, + "grad_norm": 7.056691352433283, + "learning_rate": 5.622942896447834e-06, + "loss": 17.9555, + "step": 26065 + }, + { + "epoch": 0.4764655345751001, + "grad_norm": 6.519716635122897, + "learning_rate": 5.622649190195446e-06, + "loss": 17.7805, + "step": 26066 + }, + { + "epoch": 0.4764838137715466, + "grad_norm": 6.2039302734864465, + "learning_rate": 5.622355481760728e-06, + "loss": 17.3564, + "step": 26067 + }, + { + "epoch": 0.4765020929679931, + "grad_norm": 6.708680452902, + "learning_rate": 5.622061771144706e-06, + "loss": 17.8124, + "step": 26068 + }, + { + "epoch": 0.47652037216443965, + "grad_norm": 6.146022834553187, + "learning_rate": 5.621768058348413e-06, + "loss": 17.3903, + "step": 26069 + }, + { + "epoch": 0.4765386513608862, + "grad_norm": 7.644073505989182, + "learning_rate": 5.621474343372877e-06, + "loss": 17.859, + "step": 26070 + }, + { + "epoch": 0.4765569305573327, + "grad_norm": 6.31873404662728, + "learning_rate": 5.621180626219126e-06, + "loss": 17.6161, + "step": 26071 + }, + { + "epoch": 0.4765752097537792, + "grad_norm": 6.857044687447573, + "learning_rate": 5.620886906888191e-06, + "loss": 17.5395, + "step": 26072 + }, + { + "epoch": 0.47659348895022574, + "grad_norm": 6.538786504817792, + "learning_rate": 5.620593185381102e-06, + "loss": 17.6519, + "step": 26073 + }, + { + "epoch": 0.4766117681466723, + "grad_norm": 6.460891049638645, + "learning_rate": 5.6202994616988884e-06, + "loss": 17.6651, + "step": 26074 + }, + { + "epoch": 0.4766300473431188, + "grad_norm": 5.771273994807442, + "learning_rate": 5.620005735842577e-06, + "loss": 17.2586, + "step": 26075 + }, + { + "epoch": 0.47664832653956535, + "grad_norm": 6.34127645290423, + "learning_rate": 5.6197120078132005e-06, + "loss": 17.2723, + "step": 26076 + }, + { + "epoch": 0.47666660573601183, + "grad_norm": 7.6002476847563525, + "learning_rate": 5.619418277611788e-06, + "loss": 17.7249, + "step": 26077 + }, + { + "epoch": 0.47668488493245836, + "grad_norm": 5.6028576690747185, + "learning_rate": 5.619124545239366e-06, + "loss": 17.1663, + "step": 26078 + }, + { + "epoch": 0.4767031641289049, + "grad_norm": 6.8009010872796845, + "learning_rate": 5.618830810696968e-06, + "loss": 17.3903, + "step": 26079 + }, + { + "epoch": 0.47672144332535143, + "grad_norm": 6.2757260823554475, + "learning_rate": 5.6185370739856226e-06, + "loss": 17.5399, + "step": 26080 + }, + { + "epoch": 0.4767397225217979, + "grad_norm": 7.007343328296595, + "learning_rate": 5.618243335106357e-06, + "loss": 17.8199, + "step": 26081 + }, + { + "epoch": 0.47675800171824445, + "grad_norm": 7.868802959252958, + "learning_rate": 5.6179495940602005e-06, + "loss": 17.9974, + "step": 26082 + }, + { + "epoch": 0.476776280914691, + "grad_norm": 7.3481225366603, + "learning_rate": 5.617655850848186e-06, + "loss": 18.1017, + "step": 26083 + }, + { + "epoch": 0.4767945601111375, + "grad_norm": 6.664199757996422, + "learning_rate": 5.617362105471342e-06, + "loss": 18.04, + "step": 26084 + }, + { + "epoch": 0.47681283930758406, + "grad_norm": 8.498700739774147, + "learning_rate": 5.617068357930697e-06, + "loss": 17.8875, + "step": 26085 + }, + { + "epoch": 0.47683111850403054, + "grad_norm": 6.243252142764387, + "learning_rate": 5.61677460822728e-06, + "loss": 17.4461, + "step": 26086 + }, + { + "epoch": 0.4768493977004771, + "grad_norm": 6.937977061291332, + "learning_rate": 5.616480856362123e-06, + "loss": 17.905, + "step": 26087 + }, + { + "epoch": 0.4768676768969236, + "grad_norm": 6.865585270736471, + "learning_rate": 5.616187102336252e-06, + "loss": 17.6613, + "step": 26088 + }, + { + "epoch": 0.47688595609337014, + "grad_norm": 4.784125178394753, + "learning_rate": 5.6158933461507e-06, + "loss": 16.9239, + "step": 26089 + }, + { + "epoch": 0.4769042352898167, + "grad_norm": 6.590838836207453, + "learning_rate": 5.615599587806496e-06, + "loss": 17.2997, + "step": 26090 + }, + { + "epoch": 0.47692251448626316, + "grad_norm": 6.671508649687292, + "learning_rate": 5.615305827304668e-06, + "loss": 17.68, + "step": 26091 + }, + { + "epoch": 0.4769407936827097, + "grad_norm": 7.76100921275125, + "learning_rate": 5.615012064646247e-06, + "loss": 18.0952, + "step": 26092 + }, + { + "epoch": 0.47695907287915623, + "grad_norm": 6.088363358254355, + "learning_rate": 5.614718299832262e-06, + "loss": 17.33, + "step": 26093 + }, + { + "epoch": 0.47697735207560277, + "grad_norm": 7.614245279204956, + "learning_rate": 5.614424532863743e-06, + "loss": 17.8937, + "step": 26094 + }, + { + "epoch": 0.4769956312720493, + "grad_norm": 6.900345840051787, + "learning_rate": 5.614130763741717e-06, + "loss": 17.6068, + "step": 26095 + }, + { + "epoch": 0.4770139104684958, + "grad_norm": 6.964787476654074, + "learning_rate": 5.613836992467217e-06, + "loss": 17.4893, + "step": 26096 + }, + { + "epoch": 0.4770321896649423, + "grad_norm": 6.002296108303925, + "learning_rate": 5.613543219041273e-06, + "loss": 17.4203, + "step": 26097 + }, + { + "epoch": 0.47705046886138885, + "grad_norm": 6.834447217700692, + "learning_rate": 5.613249443464913e-06, + "loss": 17.9869, + "step": 26098 + }, + { + "epoch": 0.4770687480578354, + "grad_norm": 6.104831198554275, + "learning_rate": 5.612955665739167e-06, + "loss": 17.3504, + "step": 26099 + }, + { + "epoch": 0.4770870272542819, + "grad_norm": 6.449593406950984, + "learning_rate": 5.612661885865063e-06, + "loss": 17.4109, + "step": 26100 + }, + { + "epoch": 0.4771053064507284, + "grad_norm": 7.0224103024352615, + "learning_rate": 5.612368103843634e-06, + "loss": 17.5904, + "step": 26101 + }, + { + "epoch": 0.47712358564717494, + "grad_norm": 6.135187132870457, + "learning_rate": 5.612074319675907e-06, + "loss": 17.4386, + "step": 26102 + }, + { + "epoch": 0.4771418648436215, + "grad_norm": 5.0770678705615495, + "learning_rate": 5.611780533362913e-06, + "loss": 17.1951, + "step": 26103 + }, + { + "epoch": 0.477160144040068, + "grad_norm": 5.778558709472459, + "learning_rate": 5.611486744905681e-06, + "loss": 17.3517, + "step": 26104 + }, + { + "epoch": 0.47717842323651455, + "grad_norm": 6.9569184192284546, + "learning_rate": 5.611192954305241e-06, + "loss": 17.6013, + "step": 26105 + }, + { + "epoch": 0.47719670243296103, + "grad_norm": 5.554985824878484, + "learning_rate": 5.610899161562623e-06, + "loss": 17.3215, + "step": 26106 + }, + { + "epoch": 0.47721498162940756, + "grad_norm": 7.256870840413215, + "learning_rate": 5.6106053666788566e-06, + "loss": 18.0335, + "step": 26107 + }, + { + "epoch": 0.4772332608258541, + "grad_norm": 6.578082303262216, + "learning_rate": 5.610311569654972e-06, + "loss": 17.5233, + "step": 26108 + }, + { + "epoch": 0.47725154002230064, + "grad_norm": 7.0026002953920905, + "learning_rate": 5.610017770491995e-06, + "loss": 17.642, + "step": 26109 + }, + { + "epoch": 0.47726981921874717, + "grad_norm": 8.634285905142995, + "learning_rate": 5.609723969190963e-06, + "loss": 18.8466, + "step": 26110 + }, + { + "epoch": 0.47728809841519365, + "grad_norm": 5.591100147596347, + "learning_rate": 5.6094301657529e-06, + "loss": 16.9774, + "step": 26111 + }, + { + "epoch": 0.4773063776116402, + "grad_norm": 6.155160576115009, + "learning_rate": 5.609136360178837e-06, + "loss": 17.5168, + "step": 26112 + }, + { + "epoch": 0.4773246568080867, + "grad_norm": 7.1575398367811545, + "learning_rate": 5.608842552469802e-06, + "loss": 17.7671, + "step": 26113 + }, + { + "epoch": 0.47734293600453326, + "grad_norm": 7.510841169697521, + "learning_rate": 5.608548742626827e-06, + "loss": 18.2355, + "step": 26114 + }, + { + "epoch": 0.47736121520097974, + "grad_norm": 5.402852436341276, + "learning_rate": 5.608254930650944e-06, + "loss": 17.061, + "step": 26115 + }, + { + "epoch": 0.4773794943974263, + "grad_norm": 5.388457298037819, + "learning_rate": 5.607961116543179e-06, + "loss": 16.9783, + "step": 26116 + }, + { + "epoch": 0.4773977735938728, + "grad_norm": 6.750534929649605, + "learning_rate": 5.607667300304563e-06, + "loss": 17.8664, + "step": 26117 + }, + { + "epoch": 0.47741605279031935, + "grad_norm": 7.515028893326799, + "learning_rate": 5.607373481936126e-06, + "loss": 17.9722, + "step": 26118 + }, + { + "epoch": 0.4774343319867659, + "grad_norm": 6.097286987623301, + "learning_rate": 5.607079661438897e-06, + "loss": 17.3201, + "step": 26119 + }, + { + "epoch": 0.47745261118321236, + "grad_norm": 5.582122826765157, + "learning_rate": 5.606785838813907e-06, + "loss": 17.1418, + "step": 26120 + }, + { + "epoch": 0.4774708903796589, + "grad_norm": 5.719784680253939, + "learning_rate": 5.6064920140621846e-06, + "loss": 17.3713, + "step": 26121 + }, + { + "epoch": 0.47748916957610543, + "grad_norm": 5.744226850913215, + "learning_rate": 5.606198187184762e-06, + "loss": 17.2014, + "step": 26122 + }, + { + "epoch": 0.47750744877255197, + "grad_norm": 7.999450040902364, + "learning_rate": 5.605904358182666e-06, + "loss": 18.2115, + "step": 26123 + }, + { + "epoch": 0.4775257279689985, + "grad_norm": 6.8892260325088435, + "learning_rate": 5.605610527056927e-06, + "loss": 18.0052, + "step": 26124 + }, + { + "epoch": 0.477544007165445, + "grad_norm": 6.857778199194525, + "learning_rate": 5.605316693808578e-06, + "loss": 17.5765, + "step": 26125 + }, + { + "epoch": 0.4775622863618915, + "grad_norm": 5.734045895742806, + "learning_rate": 5.6050228584386456e-06, + "loss": 17.4257, + "step": 26126 + }, + { + "epoch": 0.47758056555833805, + "grad_norm": 6.357934785959785, + "learning_rate": 5.604729020948158e-06, + "loss": 17.6335, + "step": 26127 + }, + { + "epoch": 0.4775988447547846, + "grad_norm": 7.308077097943614, + "learning_rate": 5.604435181338151e-06, + "loss": 17.7287, + "step": 26128 + }, + { + "epoch": 0.4776171239512311, + "grad_norm": 8.951314214338932, + "learning_rate": 5.6041413396096515e-06, + "loss": 17.9758, + "step": 26129 + }, + { + "epoch": 0.4776354031476776, + "grad_norm": 5.470556974764741, + "learning_rate": 5.603847495763687e-06, + "loss": 17.1528, + "step": 26130 + }, + { + "epoch": 0.47765368234412414, + "grad_norm": 6.183881165989675, + "learning_rate": 5.60355364980129e-06, + "loss": 17.3334, + "step": 26131 + }, + { + "epoch": 0.4776719615405707, + "grad_norm": 7.066583803623028, + "learning_rate": 5.603259801723489e-06, + "loss": 17.8592, + "step": 26132 + }, + { + "epoch": 0.4776902407370172, + "grad_norm": 6.293945421294758, + "learning_rate": 5.602965951531316e-06, + "loss": 17.4171, + "step": 26133 + }, + { + "epoch": 0.47770851993346375, + "grad_norm": 8.633406434286142, + "learning_rate": 5.6026720992258e-06, + "loss": 17.9421, + "step": 26134 + }, + { + "epoch": 0.47772679912991023, + "grad_norm": 5.846218605556748, + "learning_rate": 5.602378244807969e-06, + "loss": 17.2044, + "step": 26135 + }, + { + "epoch": 0.47774507832635676, + "grad_norm": 5.629178104367023, + "learning_rate": 5.602084388278856e-06, + "loss": 17.2794, + "step": 26136 + }, + { + "epoch": 0.4777633575228033, + "grad_norm": 6.874864594145727, + "learning_rate": 5.601790529639488e-06, + "loss": 17.5492, + "step": 26137 + }, + { + "epoch": 0.47778163671924984, + "grad_norm": 8.096302984276473, + "learning_rate": 5.601496668890898e-06, + "loss": 17.9595, + "step": 26138 + }, + { + "epoch": 0.47779991591569637, + "grad_norm": 6.4229967012341795, + "learning_rate": 5.601202806034114e-06, + "loss": 17.5692, + "step": 26139 + }, + { + "epoch": 0.47781819511214285, + "grad_norm": 7.275263196865297, + "learning_rate": 5.600908941070167e-06, + "loss": 17.7041, + "step": 26140 + }, + { + "epoch": 0.4778364743085894, + "grad_norm": 6.078711589615375, + "learning_rate": 5.6006150740000835e-06, + "loss": 17.4678, + "step": 26141 + }, + { + "epoch": 0.4778547535050359, + "grad_norm": 5.8877548028571685, + "learning_rate": 5.600321204824899e-06, + "loss": 17.2552, + "step": 26142 + }, + { + "epoch": 0.47787303270148246, + "grad_norm": 6.159760499766907, + "learning_rate": 5.60002733354564e-06, + "loss": 17.4516, + "step": 26143 + }, + { + "epoch": 0.477891311897929, + "grad_norm": 6.668883774809692, + "learning_rate": 5.5997334601633365e-06, + "loss": 17.5266, + "step": 26144 + }, + { + "epoch": 0.4779095910943755, + "grad_norm": 6.346232467794673, + "learning_rate": 5.5994395846790194e-06, + "loss": 17.2999, + "step": 26145 + }, + { + "epoch": 0.477927870290822, + "grad_norm": 6.279314627877693, + "learning_rate": 5.599145707093719e-06, + "loss": 17.4918, + "step": 26146 + }, + { + "epoch": 0.47794614948726855, + "grad_norm": 6.055620613960243, + "learning_rate": 5.598851827408466e-06, + "loss": 17.2214, + "step": 26147 + }, + { + "epoch": 0.4779644286837151, + "grad_norm": 8.800387241080244, + "learning_rate": 5.598557945624288e-06, + "loss": 17.9734, + "step": 26148 + }, + { + "epoch": 0.47798270788016156, + "grad_norm": 5.879055359319588, + "learning_rate": 5.598264061742217e-06, + "loss": 17.4847, + "step": 26149 + }, + { + "epoch": 0.4780009870766081, + "grad_norm": 7.840857060702316, + "learning_rate": 5.597970175763281e-06, + "loss": 17.9681, + "step": 26150 + }, + { + "epoch": 0.47801926627305463, + "grad_norm": 7.390125956957079, + "learning_rate": 5.5976762876885114e-06, + "loss": 17.9516, + "step": 26151 + }, + { + "epoch": 0.47803754546950117, + "grad_norm": 7.886241989784929, + "learning_rate": 5.59738239751894e-06, + "loss": 18.1274, + "step": 26152 + }, + { + "epoch": 0.4780558246659477, + "grad_norm": 7.6358127734958305, + "learning_rate": 5.597088505255596e-06, + "loss": 18.33, + "step": 26153 + }, + { + "epoch": 0.4780741038623942, + "grad_norm": 5.8727765713036115, + "learning_rate": 5.596794610899507e-06, + "loss": 17.2083, + "step": 26154 + }, + { + "epoch": 0.4780923830588407, + "grad_norm": 5.719505322466552, + "learning_rate": 5.596500714451703e-06, + "loss": 17.1283, + "step": 26155 + }, + { + "epoch": 0.47811066225528726, + "grad_norm": 5.249643386733936, + "learning_rate": 5.596206815913217e-06, + "loss": 17.0474, + "step": 26156 + }, + { + "epoch": 0.4781289414517338, + "grad_norm": 5.9512499632323586, + "learning_rate": 5.595912915285079e-06, + "loss": 17.1633, + "step": 26157 + }, + { + "epoch": 0.4781472206481803, + "grad_norm": 5.539861525542652, + "learning_rate": 5.595619012568318e-06, + "loss": 17.0582, + "step": 26158 + }, + { + "epoch": 0.4781654998446268, + "grad_norm": 6.746955695472212, + "learning_rate": 5.595325107763963e-06, + "loss": 17.6486, + "step": 26159 + }, + { + "epoch": 0.47818377904107334, + "grad_norm": 6.372113047626275, + "learning_rate": 5.595031200873045e-06, + "loss": 17.3573, + "step": 26160 + }, + { + "epoch": 0.4782020582375199, + "grad_norm": 5.8737005821916535, + "learning_rate": 5.594737291896594e-06, + "loss": 17.3061, + "step": 26161 + }, + { + "epoch": 0.4782203374339664, + "grad_norm": 6.239354686610514, + "learning_rate": 5.594443380835642e-06, + "loss": 17.6471, + "step": 26162 + }, + { + "epoch": 0.47823861663041295, + "grad_norm": 6.4618147944618105, + "learning_rate": 5.5941494676912165e-06, + "loss": 17.5302, + "step": 26163 + }, + { + "epoch": 0.47825689582685943, + "grad_norm": 6.8845883561397265, + "learning_rate": 5.593855552464348e-06, + "loss": 17.5376, + "step": 26164 + }, + { + "epoch": 0.47827517502330597, + "grad_norm": 6.808300698691769, + "learning_rate": 5.593561635156068e-06, + "loss": 17.4821, + "step": 26165 + }, + { + "epoch": 0.4782934542197525, + "grad_norm": 5.980524909712036, + "learning_rate": 5.593267715767406e-06, + "loss": 17.0913, + "step": 26166 + }, + { + "epoch": 0.47831173341619904, + "grad_norm": 5.126226771227641, + "learning_rate": 5.592973794299393e-06, + "loss": 17.0242, + "step": 26167 + }, + { + "epoch": 0.47833001261264557, + "grad_norm": 6.136243881740379, + "learning_rate": 5.592679870753057e-06, + "loss": 17.3082, + "step": 26168 + }, + { + "epoch": 0.47834829180909205, + "grad_norm": 6.093848300755434, + "learning_rate": 5.59238594512943e-06, + "loss": 17.2224, + "step": 26169 + }, + { + "epoch": 0.4783665710055386, + "grad_norm": 5.297080451154436, + "learning_rate": 5.592092017429543e-06, + "loss": 17.2881, + "step": 26170 + }, + { + "epoch": 0.4783848502019851, + "grad_norm": 6.05727917693309, + "learning_rate": 5.5917980876544235e-06, + "loss": 17.2438, + "step": 26171 + }, + { + "epoch": 0.47840312939843166, + "grad_norm": 7.863517824378537, + "learning_rate": 5.591504155805103e-06, + "loss": 18.2442, + "step": 26172 + }, + { + "epoch": 0.4784214085948782, + "grad_norm": 8.058677058472782, + "learning_rate": 5.591210221882611e-06, + "loss": 17.993, + "step": 26173 + }, + { + "epoch": 0.4784396877913247, + "grad_norm": 7.495887265653726, + "learning_rate": 5.590916285887979e-06, + "loss": 18.1109, + "step": 26174 + }, + { + "epoch": 0.4784579669877712, + "grad_norm": 6.267948127689959, + "learning_rate": 5.590622347822238e-06, + "loss": 17.4014, + "step": 26175 + }, + { + "epoch": 0.47847624618421775, + "grad_norm": 6.949861333830357, + "learning_rate": 5.590328407686415e-06, + "loss": 17.9126, + "step": 26176 + }, + { + "epoch": 0.4784945253806643, + "grad_norm": 9.55325244105705, + "learning_rate": 5.590034465481544e-06, + "loss": 17.4078, + "step": 26177 + }, + { + "epoch": 0.4785128045771108, + "grad_norm": 7.621456997317617, + "learning_rate": 5.589740521208652e-06, + "loss": 17.6216, + "step": 26178 + }, + { + "epoch": 0.4785310837735573, + "grad_norm": 6.386817809825003, + "learning_rate": 5.589446574868771e-06, + "loss": 17.2495, + "step": 26179 + }, + { + "epoch": 0.47854936297000383, + "grad_norm": 5.700295189066984, + "learning_rate": 5.589152626462933e-06, + "loss": 17.2569, + "step": 26180 + }, + { + "epoch": 0.47856764216645037, + "grad_norm": 5.936096719381702, + "learning_rate": 5.588858675992164e-06, + "loss": 17.2734, + "step": 26181 + }, + { + "epoch": 0.4785859213628969, + "grad_norm": 6.098904815202431, + "learning_rate": 5.5885647234574946e-06, + "loss": 17.3035, + "step": 26182 + }, + { + "epoch": 0.4786042005593434, + "grad_norm": 7.148858141497193, + "learning_rate": 5.588270768859959e-06, + "loss": 17.6, + "step": 26183 + }, + { + "epoch": 0.4786224797557899, + "grad_norm": 6.168080790795707, + "learning_rate": 5.587976812200587e-06, + "loss": 17.3124, + "step": 26184 + }, + { + "epoch": 0.47864075895223646, + "grad_norm": 5.496299195925315, + "learning_rate": 5.587682853480405e-06, + "loss": 17.1208, + "step": 26185 + }, + { + "epoch": 0.478659038148683, + "grad_norm": 6.916940082572477, + "learning_rate": 5.587388892700446e-06, + "loss": 17.675, + "step": 26186 + }, + { + "epoch": 0.4786773173451295, + "grad_norm": 6.065550357992969, + "learning_rate": 5.587094929861741e-06, + "loss": 17.3024, + "step": 26187 + }, + { + "epoch": 0.478695596541576, + "grad_norm": 5.804851435310755, + "learning_rate": 5.586800964965318e-06, + "loss": 17.29, + "step": 26188 + }, + { + "epoch": 0.47871387573802254, + "grad_norm": 6.033602700131579, + "learning_rate": 5.586506998012209e-06, + "loss": 17.2692, + "step": 26189 + }, + { + "epoch": 0.4787321549344691, + "grad_norm": 6.663829582698504, + "learning_rate": 5.586213029003443e-06, + "loss": 17.7129, + "step": 26190 + }, + { + "epoch": 0.4787504341309156, + "grad_norm": 6.951364507725383, + "learning_rate": 5.5859190579400526e-06, + "loss": 17.7884, + "step": 26191 + }, + { + "epoch": 0.47876871332736215, + "grad_norm": 9.055546153627796, + "learning_rate": 5.585625084823066e-06, + "loss": 18.6385, + "step": 26192 + }, + { + "epoch": 0.47878699252380863, + "grad_norm": 6.9846995043625855, + "learning_rate": 5.5853311096535145e-06, + "loss": 17.5807, + "step": 26193 + }, + { + "epoch": 0.47880527172025517, + "grad_norm": 6.732692477602404, + "learning_rate": 5.585037132432429e-06, + "loss": 17.4702, + "step": 26194 + }, + { + "epoch": 0.4788235509167017, + "grad_norm": 5.727818276585686, + "learning_rate": 5.584743153160837e-06, + "loss": 17.4588, + "step": 26195 + }, + { + "epoch": 0.47884183011314824, + "grad_norm": 8.451719454867723, + "learning_rate": 5.584449171839772e-06, + "loss": 18.1792, + "step": 26196 + }, + { + "epoch": 0.4788601093095948, + "grad_norm": 6.900812623289093, + "learning_rate": 5.584155188470264e-06, + "loss": 17.6951, + "step": 26197 + }, + { + "epoch": 0.47887838850604125, + "grad_norm": 6.976842411357199, + "learning_rate": 5.583861203053344e-06, + "loss": 17.7989, + "step": 26198 + }, + { + "epoch": 0.4788966677024878, + "grad_norm": 5.910355358761967, + "learning_rate": 5.583567215590039e-06, + "loss": 17.2434, + "step": 26199 + }, + { + "epoch": 0.4789149468989343, + "grad_norm": 6.175191662204462, + "learning_rate": 5.583273226081381e-06, + "loss": 17.4369, + "step": 26200 + }, + { + "epoch": 0.47893322609538086, + "grad_norm": 6.404927060162966, + "learning_rate": 5.5829792345284025e-06, + "loss": 17.2073, + "step": 26201 + }, + { + "epoch": 0.4789515052918274, + "grad_norm": 7.6688649647097415, + "learning_rate": 5.5826852409321316e-06, + "loss": 18.2065, + "step": 26202 + }, + { + "epoch": 0.4789697844882739, + "grad_norm": 7.757099773467878, + "learning_rate": 5.5823912452936e-06, + "loss": 18.0641, + "step": 26203 + }, + { + "epoch": 0.4789880636847204, + "grad_norm": 5.8128483224136795, + "learning_rate": 5.582097247613838e-06, + "loss": 17.2436, + "step": 26204 + }, + { + "epoch": 0.47900634288116695, + "grad_norm": 6.405755405230303, + "learning_rate": 5.581803247893876e-06, + "loss": 17.6274, + "step": 26205 + }, + { + "epoch": 0.4790246220776135, + "grad_norm": 8.570545696432772, + "learning_rate": 5.581509246134742e-06, + "loss": 18.6291, + "step": 26206 + }, + { + "epoch": 0.47904290127406, + "grad_norm": 6.156591387375951, + "learning_rate": 5.58121524233747e-06, + "loss": 17.24, + "step": 26207 + }, + { + "epoch": 0.4790611804705065, + "grad_norm": 6.623992225506808, + "learning_rate": 5.58092123650309e-06, + "loss": 17.5872, + "step": 26208 + }, + { + "epoch": 0.47907945966695303, + "grad_norm": 5.216852011450072, + "learning_rate": 5.580627228632629e-06, + "loss": 16.8767, + "step": 26209 + }, + { + "epoch": 0.47909773886339957, + "grad_norm": 7.510683482527872, + "learning_rate": 5.580333218727121e-06, + "loss": 17.9773, + "step": 26210 + }, + { + "epoch": 0.4791160180598461, + "grad_norm": 5.75592348346135, + "learning_rate": 5.580039206787597e-06, + "loss": 17.1502, + "step": 26211 + }, + { + "epoch": 0.47913429725629264, + "grad_norm": 5.907665321789651, + "learning_rate": 5.579745192815085e-06, + "loss": 17.0974, + "step": 26212 + }, + { + "epoch": 0.4791525764527391, + "grad_norm": 6.292182669194808, + "learning_rate": 5.579451176810615e-06, + "loss": 17.2361, + "step": 26213 + }, + { + "epoch": 0.47917085564918566, + "grad_norm": 7.204820704388497, + "learning_rate": 5.5791571587752195e-06, + "loss": 18.0221, + "step": 26214 + }, + { + "epoch": 0.4791891348456322, + "grad_norm": 5.484157167412335, + "learning_rate": 5.578863138709929e-06, + "loss": 17.1521, + "step": 26215 + }, + { + "epoch": 0.4792074140420787, + "grad_norm": 5.744855348578453, + "learning_rate": 5.578569116615773e-06, + "loss": 17.4221, + "step": 26216 + }, + { + "epoch": 0.4792256932385252, + "grad_norm": 7.220814378367754, + "learning_rate": 5.578275092493783e-06, + "loss": 17.7878, + "step": 26217 + }, + { + "epoch": 0.47924397243497174, + "grad_norm": 6.328796412840449, + "learning_rate": 5.577981066344988e-06, + "loss": 17.4949, + "step": 26218 + }, + { + "epoch": 0.4792622516314183, + "grad_norm": 6.5617144161473515, + "learning_rate": 5.577687038170421e-06, + "loss": 17.2807, + "step": 26219 + }, + { + "epoch": 0.4792805308278648, + "grad_norm": 6.495003736669981, + "learning_rate": 5.5773930079711105e-06, + "loss": 17.3757, + "step": 26220 + }, + { + "epoch": 0.47929881002431135, + "grad_norm": 6.865996755807059, + "learning_rate": 5.5770989757480865e-06, + "loss": 17.6212, + "step": 26221 + }, + { + "epoch": 0.47931708922075783, + "grad_norm": 6.020183196247202, + "learning_rate": 5.576804941502382e-06, + "loss": 17.2654, + "step": 26222 + }, + { + "epoch": 0.47933536841720437, + "grad_norm": 6.099741301700175, + "learning_rate": 5.576510905235025e-06, + "loss": 17.3339, + "step": 26223 + }, + { + "epoch": 0.4793536476136509, + "grad_norm": 5.4067937833161235, + "learning_rate": 5.576216866947048e-06, + "loss": 17.0633, + "step": 26224 + }, + { + "epoch": 0.47937192681009744, + "grad_norm": 7.247404031681719, + "learning_rate": 5.575922826639483e-06, + "loss": 17.6743, + "step": 26225 + }, + { + "epoch": 0.479390206006544, + "grad_norm": 6.605198334954851, + "learning_rate": 5.575628784313356e-06, + "loss": 17.6374, + "step": 26226 + }, + { + "epoch": 0.47940848520299045, + "grad_norm": 7.6240696437546935, + "learning_rate": 5.575334739969699e-06, + "loss": 18.268, + "step": 26227 + }, + { + "epoch": 0.479426764399437, + "grad_norm": 7.378431079986668, + "learning_rate": 5.5750406936095445e-06, + "loss": 17.9383, + "step": 26228 + }, + { + "epoch": 0.4794450435958835, + "grad_norm": 5.394760685278859, + "learning_rate": 5.574746645233924e-06, + "loss": 17.1321, + "step": 26229 + }, + { + "epoch": 0.47946332279233006, + "grad_norm": 5.730823542829997, + "learning_rate": 5.574452594843865e-06, + "loss": 17.4686, + "step": 26230 + }, + { + "epoch": 0.4794816019887766, + "grad_norm": 6.081860419800946, + "learning_rate": 5.5741585424404e-06, + "loss": 17.7798, + "step": 26231 + }, + { + "epoch": 0.4794998811852231, + "grad_norm": 6.530243472618639, + "learning_rate": 5.57386448802456e-06, + "loss": 17.6541, + "step": 26232 + }, + { + "epoch": 0.4795181603816696, + "grad_norm": 6.6227555503740385, + "learning_rate": 5.573570431597373e-06, + "loss": 17.4864, + "step": 26233 + }, + { + "epoch": 0.47953643957811615, + "grad_norm": 4.923635154528669, + "learning_rate": 5.573276373159872e-06, + "loss": 16.9674, + "step": 26234 + }, + { + "epoch": 0.4795547187745627, + "grad_norm": 5.603616071046356, + "learning_rate": 5.572982312713087e-06, + "loss": 17.2972, + "step": 26235 + }, + { + "epoch": 0.4795729979710092, + "grad_norm": 6.893350260820897, + "learning_rate": 5.572688250258048e-06, + "loss": 17.7171, + "step": 26236 + }, + { + "epoch": 0.4795912771674557, + "grad_norm": 5.620651305897151, + "learning_rate": 5.572394185795787e-06, + "loss": 17.3509, + "step": 26237 + }, + { + "epoch": 0.47960955636390223, + "grad_norm": 6.51119153220209, + "learning_rate": 5.572100119327335e-06, + "loss": 17.7156, + "step": 26238 + }, + { + "epoch": 0.47962783556034877, + "grad_norm": 5.026318614404321, + "learning_rate": 5.571806050853722e-06, + "loss": 17.0207, + "step": 26239 + }, + { + "epoch": 0.4796461147567953, + "grad_norm": 5.384751585291172, + "learning_rate": 5.571511980375977e-06, + "loss": 17.1038, + "step": 26240 + }, + { + "epoch": 0.47966439395324184, + "grad_norm": 6.795563613774651, + "learning_rate": 5.5712179078951325e-06, + "loss": 17.2896, + "step": 26241 + }, + { + "epoch": 0.4796826731496883, + "grad_norm": 6.604792047533484, + "learning_rate": 5.5709238334122194e-06, + "loss": 17.6891, + "step": 26242 + }, + { + "epoch": 0.47970095234613486, + "grad_norm": 5.920415462727743, + "learning_rate": 5.570629756928267e-06, + "loss": 17.2459, + "step": 26243 + }, + { + "epoch": 0.4797192315425814, + "grad_norm": 7.0716277174869555, + "learning_rate": 5.570335678444308e-06, + "loss": 17.948, + "step": 26244 + }, + { + "epoch": 0.47973751073902793, + "grad_norm": 7.241241562813814, + "learning_rate": 5.57004159796137e-06, + "loss": 17.8781, + "step": 26245 + }, + { + "epoch": 0.47975578993547446, + "grad_norm": 6.244955681893017, + "learning_rate": 5.569747515480487e-06, + "loss": 17.6744, + "step": 26246 + }, + { + "epoch": 0.47977406913192094, + "grad_norm": 5.550364666379726, + "learning_rate": 5.569453431002687e-06, + "loss": 17.098, + "step": 26247 + }, + { + "epoch": 0.4797923483283675, + "grad_norm": 6.5128953084245085, + "learning_rate": 5.569159344529004e-06, + "loss": 17.5295, + "step": 26248 + }, + { + "epoch": 0.479810627524814, + "grad_norm": 6.53858048588471, + "learning_rate": 5.568865256060466e-06, + "loss": 17.5188, + "step": 26249 + }, + { + "epoch": 0.47982890672126055, + "grad_norm": 5.346609906075889, + "learning_rate": 5.568571165598104e-06, + "loss": 17.0783, + "step": 26250 + }, + { + "epoch": 0.47984718591770703, + "grad_norm": 7.49948746741147, + "learning_rate": 5.56827707314295e-06, + "loss": 18.0951, + "step": 26251 + }, + { + "epoch": 0.47986546511415357, + "grad_norm": 9.32404418977323, + "learning_rate": 5.567982978696035e-06, + "loss": 19.117, + "step": 26252 + }, + { + "epoch": 0.4798837443106001, + "grad_norm": 6.0266094390333045, + "learning_rate": 5.5676888822583884e-06, + "loss": 17.4078, + "step": 26253 + }, + { + "epoch": 0.47990202350704664, + "grad_norm": 6.420340507834126, + "learning_rate": 5.567394783831041e-06, + "loss": 17.7002, + "step": 26254 + }, + { + "epoch": 0.4799203027034932, + "grad_norm": 6.72038526407255, + "learning_rate": 5.567100683415025e-06, + "loss": 17.562, + "step": 26255 + }, + { + "epoch": 0.47993858189993965, + "grad_norm": 6.125386102652748, + "learning_rate": 5.56680658101137e-06, + "loss": 17.4068, + "step": 26256 + }, + { + "epoch": 0.4799568610963862, + "grad_norm": 7.086812921516251, + "learning_rate": 5.566512476621106e-06, + "loss": 17.5719, + "step": 26257 + }, + { + "epoch": 0.4799751402928327, + "grad_norm": 5.5893929247853675, + "learning_rate": 5.5662183702452665e-06, + "loss": 17.3398, + "step": 26258 + }, + { + "epoch": 0.47999341948927926, + "grad_norm": 5.584706944612587, + "learning_rate": 5.5659242618848785e-06, + "loss": 16.944, + "step": 26259 + }, + { + "epoch": 0.4800116986857258, + "grad_norm": 5.61440570770011, + "learning_rate": 5.565630151540978e-06, + "loss": 17.2652, + "step": 26260 + }, + { + "epoch": 0.4800299778821723, + "grad_norm": 7.016142058233312, + "learning_rate": 5.5653360392145914e-06, + "loss": 18.0383, + "step": 26261 + }, + { + "epoch": 0.4800482570786188, + "grad_norm": 6.160932323324327, + "learning_rate": 5.5650419249067514e-06, + "loss": 17.2671, + "step": 26262 + }, + { + "epoch": 0.48006653627506535, + "grad_norm": 8.763301565907854, + "learning_rate": 5.564747808618488e-06, + "loss": 18.2968, + "step": 26263 + }, + { + "epoch": 0.4800848154715119, + "grad_norm": 5.798588579926526, + "learning_rate": 5.564453690350833e-06, + "loss": 17.0508, + "step": 26264 + }, + { + "epoch": 0.4801030946679584, + "grad_norm": 5.1590241961317655, + "learning_rate": 5.564159570104817e-06, + "loss": 16.9717, + "step": 26265 + }, + { + "epoch": 0.4801213738644049, + "grad_norm": 6.837219230012546, + "learning_rate": 5.56386544788147e-06, + "loss": 17.3461, + "step": 26266 + }, + { + "epoch": 0.48013965306085143, + "grad_norm": 6.810932047800776, + "learning_rate": 5.563571323681825e-06, + "loss": 17.9143, + "step": 26267 + }, + { + "epoch": 0.48015793225729797, + "grad_norm": 5.903412998805857, + "learning_rate": 5.5632771975069085e-06, + "loss": 17.2265, + "step": 26268 + }, + { + "epoch": 0.4801762114537445, + "grad_norm": 7.445933863773643, + "learning_rate": 5.562983069357757e-06, + "loss": 17.8089, + "step": 26269 + }, + { + "epoch": 0.48019449065019104, + "grad_norm": 5.859780627223239, + "learning_rate": 5.562688939235398e-06, + "loss": 17.1932, + "step": 26270 + }, + { + "epoch": 0.4802127698466375, + "grad_norm": 7.763470278693377, + "learning_rate": 5.562394807140863e-06, + "loss": 17.9588, + "step": 26271 + }, + { + "epoch": 0.48023104904308406, + "grad_norm": 7.187215510049689, + "learning_rate": 5.5621006730751825e-06, + "loss": 17.8856, + "step": 26272 + }, + { + "epoch": 0.4802493282395306, + "grad_norm": 6.174061876007868, + "learning_rate": 5.561806537039388e-06, + "loss": 17.5122, + "step": 26273 + }, + { + "epoch": 0.48026760743597713, + "grad_norm": 4.817893880664546, + "learning_rate": 5.561512399034511e-06, + "loss": 16.8486, + "step": 26274 + }, + { + "epoch": 0.48028588663242366, + "grad_norm": 5.405630905271116, + "learning_rate": 5.5612182590615815e-06, + "loss": 17.0686, + "step": 26275 + }, + { + "epoch": 0.48030416582887014, + "grad_norm": 5.733040557614441, + "learning_rate": 5.56092411712163e-06, + "loss": 17.444, + "step": 26276 + }, + { + "epoch": 0.4803224450253167, + "grad_norm": 7.022229118587633, + "learning_rate": 5.560629973215688e-06, + "loss": 17.797, + "step": 26277 + }, + { + "epoch": 0.4803407242217632, + "grad_norm": 7.0566013658705575, + "learning_rate": 5.5603358273447886e-06, + "loss": 17.8137, + "step": 26278 + }, + { + "epoch": 0.48035900341820975, + "grad_norm": 6.799484733560736, + "learning_rate": 5.560041679509959e-06, + "loss": 17.6116, + "step": 26279 + }, + { + "epoch": 0.4803772826146563, + "grad_norm": 4.995210153192633, + "learning_rate": 5.559747529712234e-06, + "loss": 16.7933, + "step": 26280 + }, + { + "epoch": 0.48039556181110277, + "grad_norm": 11.211654346671791, + "learning_rate": 5.559453377952641e-06, + "loss": 17.997, + "step": 26281 + }, + { + "epoch": 0.4804138410075493, + "grad_norm": 5.8848111032914066, + "learning_rate": 5.55915922423221e-06, + "loss": 17.2252, + "step": 26282 + }, + { + "epoch": 0.48043212020399584, + "grad_norm": 5.2625305885375075, + "learning_rate": 5.558865068551978e-06, + "loss": 17.0976, + "step": 26283 + }, + { + "epoch": 0.4804503994004424, + "grad_norm": 6.16618166177873, + "learning_rate": 5.558570910912971e-06, + "loss": 17.6266, + "step": 26284 + }, + { + "epoch": 0.48046867859688885, + "grad_norm": 6.671207237377224, + "learning_rate": 5.558276751316222e-06, + "loss": 17.9212, + "step": 26285 + }, + { + "epoch": 0.4804869577933354, + "grad_norm": 6.5691155807881945, + "learning_rate": 5.55798258976276e-06, + "loss": 17.658, + "step": 26286 + }, + { + "epoch": 0.4805052369897819, + "grad_norm": 5.148630452808944, + "learning_rate": 5.557688426253619e-06, + "loss": 16.9711, + "step": 26287 + }, + { + "epoch": 0.48052351618622846, + "grad_norm": 6.101104961735287, + "learning_rate": 5.557394260789828e-06, + "loss": 17.5516, + "step": 26288 + }, + { + "epoch": 0.480541795382675, + "grad_norm": 6.030352061449858, + "learning_rate": 5.557100093372418e-06, + "loss": 17.1955, + "step": 26289 + }, + { + "epoch": 0.4805600745791215, + "grad_norm": 6.568361226260854, + "learning_rate": 5.556805924002421e-06, + "loss": 17.5732, + "step": 26290 + }, + { + "epoch": 0.480578353775568, + "grad_norm": 7.811923646920016, + "learning_rate": 5.5565117526808675e-06, + "loss": 18.1357, + "step": 26291 + }, + { + "epoch": 0.48059663297201455, + "grad_norm": 6.420016928709315, + "learning_rate": 5.556217579408789e-06, + "loss": 17.7828, + "step": 26292 + }, + { + "epoch": 0.4806149121684611, + "grad_norm": 7.340951696991889, + "learning_rate": 5.555923404187216e-06, + "loss": 18.1184, + "step": 26293 + }, + { + "epoch": 0.4806331913649076, + "grad_norm": 6.170264613948992, + "learning_rate": 5.5556292270171796e-06, + "loss": 17.3478, + "step": 26294 + }, + { + "epoch": 0.4806514705613541, + "grad_norm": 7.3686494293047, + "learning_rate": 5.5553350478997105e-06, + "loss": 18.1051, + "step": 26295 + }, + { + "epoch": 0.48066974975780064, + "grad_norm": 6.20315733126409, + "learning_rate": 5.55504086683584e-06, + "loss": 17.4688, + "step": 26296 + }, + { + "epoch": 0.48068802895424717, + "grad_norm": 6.279347818895089, + "learning_rate": 5.5547466838265995e-06, + "loss": 17.2298, + "step": 26297 + }, + { + "epoch": 0.4807063081506937, + "grad_norm": 7.263587220888498, + "learning_rate": 5.554452498873022e-06, + "loss": 17.6596, + "step": 26298 + }, + { + "epoch": 0.48072458734714024, + "grad_norm": 5.879391009640084, + "learning_rate": 5.5541583119761345e-06, + "loss": 17.3132, + "step": 26299 + }, + { + "epoch": 0.4807428665435867, + "grad_norm": 5.67632039881883, + "learning_rate": 5.55386412313697e-06, + "loss": 17.0772, + "step": 26300 + }, + { + "epoch": 0.48076114574003326, + "grad_norm": 5.4965637625749135, + "learning_rate": 5.553569932356561e-06, + "loss": 17.4225, + "step": 26301 + }, + { + "epoch": 0.4807794249364798, + "grad_norm": 7.718166568010055, + "learning_rate": 5.553275739635938e-06, + "loss": 17.6912, + "step": 26302 + }, + { + "epoch": 0.48079770413292633, + "grad_norm": 7.331301042963287, + "learning_rate": 5.55298154497613e-06, + "loss": 17.9998, + "step": 26303 + }, + { + "epoch": 0.48081598332937286, + "grad_norm": 7.30181352093863, + "learning_rate": 5.552687348378171e-06, + "loss": 17.9985, + "step": 26304 + }, + { + "epoch": 0.48083426252581934, + "grad_norm": 6.1782655559328425, + "learning_rate": 5.552393149843089e-06, + "loss": 17.6233, + "step": 26305 + }, + { + "epoch": 0.4808525417222659, + "grad_norm": 6.840889730160085, + "learning_rate": 5.552098949371918e-06, + "loss": 17.8455, + "step": 26306 + }, + { + "epoch": 0.4808708209187124, + "grad_norm": 6.504445684199956, + "learning_rate": 5.551804746965689e-06, + "loss": 17.5115, + "step": 26307 + }, + { + "epoch": 0.48088910011515895, + "grad_norm": 6.329447353544942, + "learning_rate": 5.551510542625433e-06, + "loss": 17.6087, + "step": 26308 + }, + { + "epoch": 0.4809073793116055, + "grad_norm": 5.539137613769063, + "learning_rate": 5.551216336352176e-06, + "loss": 17.0746, + "step": 26309 + }, + { + "epoch": 0.48092565850805197, + "grad_norm": 6.315593296203523, + "learning_rate": 5.550922128146957e-06, + "loss": 17.6492, + "step": 26310 + }, + { + "epoch": 0.4809439377044985, + "grad_norm": 6.286549505811321, + "learning_rate": 5.550627918010804e-06, + "loss": 17.3756, + "step": 26311 + }, + { + "epoch": 0.48096221690094504, + "grad_norm": 7.438101299101056, + "learning_rate": 5.550333705944747e-06, + "loss": 17.9737, + "step": 26312 + }, + { + "epoch": 0.4809804960973916, + "grad_norm": 5.016704469711039, + "learning_rate": 5.550039491949818e-06, + "loss": 16.9689, + "step": 26313 + }, + { + "epoch": 0.4809987752938381, + "grad_norm": 5.832696566777145, + "learning_rate": 5.549745276027047e-06, + "loss": 17.6528, + "step": 26314 + }, + { + "epoch": 0.4810170544902846, + "grad_norm": 5.482277567660579, + "learning_rate": 5.54945105817747e-06, + "loss": 16.9582, + "step": 26315 + }, + { + "epoch": 0.4810353336867311, + "grad_norm": 6.803837274527475, + "learning_rate": 5.5491568384021125e-06, + "loss": 17.9034, + "step": 26316 + }, + { + "epoch": 0.48105361288317766, + "grad_norm": 6.9583309770867885, + "learning_rate": 5.548862616702008e-06, + "loss": 17.8081, + "step": 26317 + }, + { + "epoch": 0.4810718920796242, + "grad_norm": 8.33771885405574, + "learning_rate": 5.548568393078188e-06, + "loss": 18.432, + "step": 26318 + }, + { + "epoch": 0.4810901712760707, + "grad_norm": 5.976827166786, + "learning_rate": 5.548274167531682e-06, + "loss": 17.3933, + "step": 26319 + }, + { + "epoch": 0.4811084504725172, + "grad_norm": 7.0006852407067495, + "learning_rate": 5.547979940063524e-06, + "loss": 17.5429, + "step": 26320 + }, + { + "epoch": 0.48112672966896375, + "grad_norm": 5.994212461893175, + "learning_rate": 5.547685710674744e-06, + "loss": 17.2477, + "step": 26321 + }, + { + "epoch": 0.4811450088654103, + "grad_norm": 5.40182894286935, + "learning_rate": 5.547391479366372e-06, + "loss": 17.0529, + "step": 26322 + }, + { + "epoch": 0.4811632880618568, + "grad_norm": 6.683537961834204, + "learning_rate": 5.547097246139441e-06, + "loss": 17.5788, + "step": 26323 + }, + { + "epoch": 0.4811815672583033, + "grad_norm": 6.985561446856629, + "learning_rate": 5.546803010994982e-06, + "loss": 17.7265, + "step": 26324 + }, + { + "epoch": 0.48119984645474984, + "grad_norm": 6.762442212134518, + "learning_rate": 5.546508773934026e-06, + "loss": 17.568, + "step": 26325 + }, + { + "epoch": 0.48121812565119637, + "grad_norm": 6.166687649428789, + "learning_rate": 5.5462145349576046e-06, + "loss": 17.3994, + "step": 26326 + }, + { + "epoch": 0.4812364048476429, + "grad_norm": 5.78866726007588, + "learning_rate": 5.545920294066747e-06, + "loss": 17.1952, + "step": 26327 + }, + { + "epoch": 0.48125468404408944, + "grad_norm": 7.8089709936525855, + "learning_rate": 5.545626051262486e-06, + "loss": 17.8199, + "step": 26328 + }, + { + "epoch": 0.4812729632405359, + "grad_norm": 5.730696785860018, + "learning_rate": 5.545331806545855e-06, + "loss": 17.2917, + "step": 26329 + }, + { + "epoch": 0.48129124243698246, + "grad_norm": 6.010027176161514, + "learning_rate": 5.545037559917883e-06, + "loss": 17.3959, + "step": 26330 + }, + { + "epoch": 0.481309521633429, + "grad_norm": 6.660744577614647, + "learning_rate": 5.5447433113796e-06, + "loss": 17.3296, + "step": 26331 + }, + { + "epoch": 0.48132780082987553, + "grad_norm": 6.7999284135408775, + "learning_rate": 5.54444906093204e-06, + "loss": 17.5348, + "step": 26332 + }, + { + "epoch": 0.48134608002632207, + "grad_norm": 6.717869793306183, + "learning_rate": 5.544154808576235e-06, + "loss": 17.8168, + "step": 26333 + }, + { + "epoch": 0.48136435922276855, + "grad_norm": 7.7918479956045354, + "learning_rate": 5.543860554313212e-06, + "loss": 17.5697, + "step": 26334 + }, + { + "epoch": 0.4813826384192151, + "grad_norm": 5.3385532128671915, + "learning_rate": 5.543566298144005e-06, + "loss": 17.0516, + "step": 26335 + }, + { + "epoch": 0.4814009176156616, + "grad_norm": 5.446983480464271, + "learning_rate": 5.543272040069646e-06, + "loss": 17.1035, + "step": 26336 + }, + { + "epoch": 0.48141919681210815, + "grad_norm": 7.361994189029218, + "learning_rate": 5.542977780091166e-06, + "loss": 17.4346, + "step": 26337 + }, + { + "epoch": 0.4814374760085547, + "grad_norm": 5.831175749993607, + "learning_rate": 5.542683518209596e-06, + "loss": 17.2801, + "step": 26338 + }, + { + "epoch": 0.48145575520500117, + "grad_norm": 5.320647097754033, + "learning_rate": 5.5423892544259685e-06, + "loss": 17.0893, + "step": 26339 + }, + { + "epoch": 0.4814740344014477, + "grad_norm": 4.948055325539974, + "learning_rate": 5.542094988741311e-06, + "loss": 16.8273, + "step": 26340 + }, + { + "epoch": 0.48149231359789424, + "grad_norm": 6.744933085245579, + "learning_rate": 5.541800721156658e-06, + "loss": 17.8702, + "step": 26341 + }, + { + "epoch": 0.4815105927943408, + "grad_norm": 6.903769133505602, + "learning_rate": 5.541506451673043e-06, + "loss": 17.9442, + "step": 26342 + }, + { + "epoch": 0.4815288719907873, + "grad_norm": 5.061867908918671, + "learning_rate": 5.541212180291493e-06, + "loss": 17.1038, + "step": 26343 + }, + { + "epoch": 0.4815471511872338, + "grad_norm": 7.448512656050445, + "learning_rate": 5.540917907013041e-06, + "loss": 18.2131, + "step": 26344 + }, + { + "epoch": 0.4815654303836803, + "grad_norm": 6.354973975214358, + "learning_rate": 5.54062363183872e-06, + "loss": 17.4036, + "step": 26345 + }, + { + "epoch": 0.48158370958012686, + "grad_norm": 6.487779165859003, + "learning_rate": 5.540329354769559e-06, + "loss": 17.4813, + "step": 26346 + }, + { + "epoch": 0.4816019887765734, + "grad_norm": 6.461643148634538, + "learning_rate": 5.540035075806591e-06, + "loss": 17.2318, + "step": 26347 + }, + { + "epoch": 0.48162026797301993, + "grad_norm": 6.7665501526774685, + "learning_rate": 5.539740794950846e-06, + "loss": 17.7662, + "step": 26348 + }, + { + "epoch": 0.4816385471694664, + "grad_norm": 7.327919960970281, + "learning_rate": 5.539446512203358e-06, + "loss": 17.7267, + "step": 26349 + }, + { + "epoch": 0.48165682636591295, + "grad_norm": 7.199502002435817, + "learning_rate": 5.5391522275651555e-06, + "loss": 17.7265, + "step": 26350 + }, + { + "epoch": 0.4816751055623595, + "grad_norm": 6.1694956664444325, + "learning_rate": 5.538857941037272e-06, + "loss": 17.4255, + "step": 26351 + }, + { + "epoch": 0.481693384758806, + "grad_norm": 7.261349513143847, + "learning_rate": 5.538563652620738e-06, + "loss": 18.0629, + "step": 26352 + }, + { + "epoch": 0.4817116639552525, + "grad_norm": 7.2562196600869715, + "learning_rate": 5.538269362316585e-06, + "loss": 17.7093, + "step": 26353 + }, + { + "epoch": 0.48172994315169904, + "grad_norm": 7.815595500800447, + "learning_rate": 5.537975070125844e-06, + "loss": 18.2068, + "step": 26354 + }, + { + "epoch": 0.48174822234814557, + "grad_norm": 8.912954167386891, + "learning_rate": 5.537680776049547e-06, + "loss": 17.9742, + "step": 26355 + }, + { + "epoch": 0.4817665015445921, + "grad_norm": 5.379099899561392, + "learning_rate": 5.537386480088728e-06, + "loss": 17.3032, + "step": 26356 + }, + { + "epoch": 0.48178478074103864, + "grad_norm": 5.833575098529954, + "learning_rate": 5.537092182244414e-06, + "loss": 17.1715, + "step": 26357 + }, + { + "epoch": 0.4818030599374851, + "grad_norm": 5.944524551931755, + "learning_rate": 5.536797882517639e-06, + "loss": 17.4551, + "step": 26358 + }, + { + "epoch": 0.48182133913393166, + "grad_norm": 4.406158916859859, + "learning_rate": 5.5365035809094315e-06, + "loss": 16.7808, + "step": 26359 + }, + { + "epoch": 0.4818396183303782, + "grad_norm": 6.482350089546045, + "learning_rate": 5.536209277420829e-06, + "loss": 17.5317, + "step": 26360 + }, + { + "epoch": 0.48185789752682473, + "grad_norm": 8.447406297336544, + "learning_rate": 5.5359149720528586e-06, + "loss": 17.653, + "step": 26361 + }, + { + "epoch": 0.48187617672327127, + "grad_norm": 7.80788670441757, + "learning_rate": 5.535620664806551e-06, + "loss": 18.283, + "step": 26362 + }, + { + "epoch": 0.48189445591971775, + "grad_norm": 5.911683566350858, + "learning_rate": 5.535326355682942e-06, + "loss": 17.5112, + "step": 26363 + }, + { + "epoch": 0.4819127351161643, + "grad_norm": 6.646164979700029, + "learning_rate": 5.5350320446830585e-06, + "loss": 17.9519, + "step": 26364 + }, + { + "epoch": 0.4819310143126108, + "grad_norm": 5.971546840920929, + "learning_rate": 5.534737731807935e-06, + "loss": 17.2452, + "step": 26365 + }, + { + "epoch": 0.48194929350905735, + "grad_norm": 6.922578573049142, + "learning_rate": 5.534443417058602e-06, + "loss": 17.9595, + "step": 26366 + }, + { + "epoch": 0.4819675727055039, + "grad_norm": 7.729067852830623, + "learning_rate": 5.534149100436092e-06, + "loss": 17.5947, + "step": 26367 + }, + { + "epoch": 0.48198585190195037, + "grad_norm": 5.921488170399348, + "learning_rate": 5.533854781941435e-06, + "loss": 17.4843, + "step": 26368 + }, + { + "epoch": 0.4820041310983969, + "grad_norm": 6.099905150420779, + "learning_rate": 5.533560461575663e-06, + "loss": 17.4936, + "step": 26369 + }, + { + "epoch": 0.48202241029484344, + "grad_norm": 5.2078788116215, + "learning_rate": 5.533266139339809e-06, + "loss": 17.0594, + "step": 26370 + }, + { + "epoch": 0.48204068949129, + "grad_norm": 5.344458387455696, + "learning_rate": 5.5329718152349036e-06, + "loss": 17.1222, + "step": 26371 + }, + { + "epoch": 0.4820589686877365, + "grad_norm": 5.759422879294768, + "learning_rate": 5.532677489261976e-06, + "loss": 17.1989, + "step": 26372 + }, + { + "epoch": 0.482077247884183, + "grad_norm": 6.27210077383447, + "learning_rate": 5.532383161422061e-06, + "loss": 17.4011, + "step": 26373 + }, + { + "epoch": 0.4820955270806295, + "grad_norm": 6.975288594122639, + "learning_rate": 5.532088831716191e-06, + "loss": 17.5764, + "step": 26374 + }, + { + "epoch": 0.48211380627707606, + "grad_norm": 7.2013189700389875, + "learning_rate": 5.531794500145394e-06, + "loss": 17.7035, + "step": 26375 + }, + { + "epoch": 0.4821320854735226, + "grad_norm": 5.93673221939514, + "learning_rate": 5.531500166710704e-06, + "loss": 17.3782, + "step": 26376 + }, + { + "epoch": 0.48215036466996913, + "grad_norm": 6.2951549386612635, + "learning_rate": 5.5312058314131515e-06, + "loss": 17.537, + "step": 26377 + }, + { + "epoch": 0.4821686438664156, + "grad_norm": 5.095930937214379, + "learning_rate": 5.530911494253769e-06, + "loss": 17.0357, + "step": 26378 + }, + { + "epoch": 0.48218692306286215, + "grad_norm": 6.595069307076094, + "learning_rate": 5.530617155233588e-06, + "loss": 17.6362, + "step": 26379 + }, + { + "epoch": 0.4822052022593087, + "grad_norm": 5.227657714040226, + "learning_rate": 5.530322814353641e-06, + "loss": 16.9308, + "step": 26380 + }, + { + "epoch": 0.4822234814557552, + "grad_norm": 5.635523305027515, + "learning_rate": 5.530028471614955e-06, + "loss": 17.263, + "step": 26381 + }, + { + "epoch": 0.48224176065220176, + "grad_norm": 5.44696137512229, + "learning_rate": 5.529734127018568e-06, + "loss": 17.0705, + "step": 26382 + }, + { + "epoch": 0.48226003984864824, + "grad_norm": 5.997484849082562, + "learning_rate": 5.529439780565509e-06, + "loss": 17.4066, + "step": 26383 + }, + { + "epoch": 0.48227831904509477, + "grad_norm": 6.628561715389882, + "learning_rate": 5.529145432256809e-06, + "loss": 17.7799, + "step": 26384 + }, + { + "epoch": 0.4822965982415413, + "grad_norm": 6.430481647033819, + "learning_rate": 5.5288510820935005e-06, + "loss": 17.3636, + "step": 26385 + }, + { + "epoch": 0.48231487743798784, + "grad_norm": 7.03515482701866, + "learning_rate": 5.528556730076613e-06, + "loss": 17.7708, + "step": 26386 + }, + { + "epoch": 0.4823331566344343, + "grad_norm": 6.0235610521984135, + "learning_rate": 5.528262376207182e-06, + "loss": 17.2265, + "step": 26387 + }, + { + "epoch": 0.48235143583088086, + "grad_norm": 6.569044577328092, + "learning_rate": 5.527968020486237e-06, + "loss": 17.2592, + "step": 26388 + }, + { + "epoch": 0.4823697150273274, + "grad_norm": 6.5548474387297855, + "learning_rate": 5.52767366291481e-06, + "loss": 17.3231, + "step": 26389 + }, + { + "epoch": 0.48238799422377393, + "grad_norm": 6.309354915714875, + "learning_rate": 5.527379303493932e-06, + "loss": 17.197, + "step": 26390 + }, + { + "epoch": 0.48240627342022047, + "grad_norm": 6.195226704317913, + "learning_rate": 5.527084942224635e-06, + "loss": 17.8131, + "step": 26391 + }, + { + "epoch": 0.48242455261666695, + "grad_norm": 5.912727609028651, + "learning_rate": 5.526790579107951e-06, + "loss": 17.1175, + "step": 26392 + }, + { + "epoch": 0.4824428318131135, + "grad_norm": 7.107358197025265, + "learning_rate": 5.526496214144912e-06, + "loss": 17.9344, + "step": 26393 + }, + { + "epoch": 0.48246111100956, + "grad_norm": 7.684102386305336, + "learning_rate": 5.526201847336551e-06, + "loss": 17.9892, + "step": 26394 + }, + { + "epoch": 0.48247939020600655, + "grad_norm": 6.474374355161299, + "learning_rate": 5.525907478683895e-06, + "loss": 17.4138, + "step": 26395 + }, + { + "epoch": 0.4824976694024531, + "grad_norm": 5.991150146906878, + "learning_rate": 5.525613108187982e-06, + "loss": 17.1133, + "step": 26396 + }, + { + "epoch": 0.48251594859889957, + "grad_norm": 7.153046771873008, + "learning_rate": 5.5253187358498385e-06, + "loss": 17.7382, + "step": 26397 + }, + { + "epoch": 0.4825342277953461, + "grad_norm": 5.927026542319743, + "learning_rate": 5.5250243616705005e-06, + "loss": 17.0766, + "step": 26398 + }, + { + "epoch": 0.48255250699179264, + "grad_norm": 6.171271561810203, + "learning_rate": 5.524729985650996e-06, + "loss": 17.6482, + "step": 26399 + }, + { + "epoch": 0.4825707861882392, + "grad_norm": 6.893844430349866, + "learning_rate": 5.524435607792358e-06, + "loss": 17.6515, + "step": 26400 + }, + { + "epoch": 0.4825890653846857, + "grad_norm": 7.439381565053576, + "learning_rate": 5.524141228095621e-06, + "loss": 17.8956, + "step": 26401 + }, + { + "epoch": 0.4826073445811322, + "grad_norm": 7.373664106598826, + "learning_rate": 5.5238468465618135e-06, + "loss": 17.9442, + "step": 26402 + }, + { + "epoch": 0.4826256237775787, + "grad_norm": 6.9939627799868695, + "learning_rate": 5.5235524631919664e-06, + "loss": 17.7618, + "step": 26403 + }, + { + "epoch": 0.48264390297402526, + "grad_norm": 6.217449746692742, + "learning_rate": 5.523258077987116e-06, + "loss": 17.2366, + "step": 26404 + }, + { + "epoch": 0.4826621821704718, + "grad_norm": 6.475407706996417, + "learning_rate": 5.5229636909482895e-06, + "loss": 17.453, + "step": 26405 + }, + { + "epoch": 0.48268046136691833, + "grad_norm": 6.771248412325351, + "learning_rate": 5.522669302076522e-06, + "loss": 17.6682, + "step": 26406 + }, + { + "epoch": 0.4826987405633648, + "grad_norm": 7.932638380024133, + "learning_rate": 5.522374911372843e-06, + "loss": 17.7972, + "step": 26407 + }, + { + "epoch": 0.48271701975981135, + "grad_norm": 5.398758511775816, + "learning_rate": 5.522080518838286e-06, + "loss": 17.0619, + "step": 26408 + }, + { + "epoch": 0.4827352989562579, + "grad_norm": 7.134467064725874, + "learning_rate": 5.521786124473881e-06, + "loss": 17.676, + "step": 26409 + }, + { + "epoch": 0.4827535781527044, + "grad_norm": 5.467138954842728, + "learning_rate": 5.521491728280661e-06, + "loss": 17.079, + "step": 26410 + }, + { + "epoch": 0.48277185734915096, + "grad_norm": 7.448593383359493, + "learning_rate": 5.52119733025966e-06, + "loss": 17.7377, + "step": 26411 + }, + { + "epoch": 0.48279013654559744, + "grad_norm": 6.706712332785757, + "learning_rate": 5.5209029304119055e-06, + "loss": 17.542, + "step": 26412 + }, + { + "epoch": 0.482808415742044, + "grad_norm": 8.079782595318413, + "learning_rate": 5.520608528738431e-06, + "loss": 18.13, + "step": 26413 + }, + { + "epoch": 0.4828266949384905, + "grad_norm": 6.180117578392903, + "learning_rate": 5.520314125240269e-06, + "loss": 17.2135, + "step": 26414 + }, + { + "epoch": 0.48284497413493704, + "grad_norm": 6.413793734003815, + "learning_rate": 5.520019719918454e-06, + "loss": 17.4822, + "step": 26415 + }, + { + "epoch": 0.4828632533313836, + "grad_norm": 5.416052383810696, + "learning_rate": 5.519725312774012e-06, + "loss": 16.9619, + "step": 26416 + }, + { + "epoch": 0.48288153252783006, + "grad_norm": 8.515404032131901, + "learning_rate": 5.519430903807979e-06, + "loss": 18.2006, + "step": 26417 + }, + { + "epoch": 0.4828998117242766, + "grad_norm": 6.718324835533906, + "learning_rate": 5.519136493021385e-06, + "loss": 17.7451, + "step": 26418 + }, + { + "epoch": 0.48291809092072313, + "grad_norm": 7.557157321472496, + "learning_rate": 5.518842080415263e-06, + "loss": 18.0379, + "step": 26419 + }, + { + "epoch": 0.48293637011716967, + "grad_norm": 5.491356103339672, + "learning_rate": 5.518547665990644e-06, + "loss": 16.8837, + "step": 26420 + }, + { + "epoch": 0.48295464931361615, + "grad_norm": 6.039783839028251, + "learning_rate": 5.518253249748562e-06, + "loss": 17.4438, + "step": 26421 + }, + { + "epoch": 0.4829729285100627, + "grad_norm": 7.234507615296453, + "learning_rate": 5.517958831690047e-06, + "loss": 17.5323, + "step": 26422 + }, + { + "epoch": 0.4829912077065092, + "grad_norm": 7.147886885993875, + "learning_rate": 5.517664411816129e-06, + "loss": 17.9488, + "step": 26423 + }, + { + "epoch": 0.48300948690295575, + "grad_norm": 6.7792975085443254, + "learning_rate": 5.517369990127844e-06, + "loss": 17.9162, + "step": 26424 + }, + { + "epoch": 0.4830277660994023, + "grad_norm": 6.366299688558848, + "learning_rate": 5.517075566626223e-06, + "loss": 17.3968, + "step": 26425 + }, + { + "epoch": 0.48304604529584877, + "grad_norm": 6.671544908143495, + "learning_rate": 5.516781141312296e-06, + "loss": 17.6329, + "step": 26426 + }, + { + "epoch": 0.4830643244922953, + "grad_norm": 6.147532588853179, + "learning_rate": 5.516486714187095e-06, + "loss": 17.3035, + "step": 26427 + }, + { + "epoch": 0.48308260368874184, + "grad_norm": 6.5213750406529005, + "learning_rate": 5.516192285251654e-06, + "loss": 17.7173, + "step": 26428 + }, + { + "epoch": 0.4831008828851884, + "grad_norm": 6.299153225964926, + "learning_rate": 5.515897854507004e-06, + "loss": 17.5168, + "step": 26429 + }, + { + "epoch": 0.4831191620816349, + "grad_norm": 6.884270250099057, + "learning_rate": 5.5156034219541765e-06, + "loss": 17.6303, + "step": 26430 + }, + { + "epoch": 0.4831374412780814, + "grad_norm": 6.791941452825686, + "learning_rate": 5.515308987594204e-06, + "loss": 17.6293, + "step": 26431 + }, + { + "epoch": 0.4831557204745279, + "grad_norm": 5.425015410045171, + "learning_rate": 5.515014551428117e-06, + "loss": 16.9689, + "step": 26432 + }, + { + "epoch": 0.48317399967097446, + "grad_norm": 6.327286698870973, + "learning_rate": 5.514720113456949e-06, + "loss": 17.4335, + "step": 26433 + }, + { + "epoch": 0.483192278867421, + "grad_norm": 5.751877844138184, + "learning_rate": 5.514425673681732e-06, + "loss": 17.1089, + "step": 26434 + }, + { + "epoch": 0.48321055806386753, + "grad_norm": 7.826855071819306, + "learning_rate": 5.514131232103498e-06, + "loss": 17.887, + "step": 26435 + }, + { + "epoch": 0.483228837260314, + "grad_norm": 5.554974774813623, + "learning_rate": 5.513836788723279e-06, + "loss": 17.195, + "step": 26436 + }, + { + "epoch": 0.48324711645676055, + "grad_norm": 6.460077041217687, + "learning_rate": 5.513542343542105e-06, + "loss": 17.619, + "step": 26437 + }, + { + "epoch": 0.4832653956532071, + "grad_norm": 6.559002295310229, + "learning_rate": 5.513247896561011e-06, + "loss": 17.7913, + "step": 26438 + }, + { + "epoch": 0.4832836748496536, + "grad_norm": 6.33195986367585, + "learning_rate": 5.5129534477810285e-06, + "loss": 17.4724, + "step": 26439 + }, + { + "epoch": 0.48330195404610016, + "grad_norm": 5.912952227299503, + "learning_rate": 5.512658997203187e-06, + "loss": 17.3697, + "step": 26440 + }, + { + "epoch": 0.48332023324254664, + "grad_norm": 5.983003719290201, + "learning_rate": 5.51236454482852e-06, + "loss": 17.4935, + "step": 26441 + }, + { + "epoch": 0.4833385124389932, + "grad_norm": 6.084286934451618, + "learning_rate": 5.5120700906580614e-06, + "loss": 17.2255, + "step": 26442 + }, + { + "epoch": 0.4833567916354397, + "grad_norm": 5.918171952761315, + "learning_rate": 5.5117756346928406e-06, + "loss": 17.3408, + "step": 26443 + }, + { + "epoch": 0.48337507083188624, + "grad_norm": 7.875499809776562, + "learning_rate": 5.51148117693389e-06, + "loss": 17.8208, + "step": 26444 + }, + { + "epoch": 0.4833933500283328, + "grad_norm": 5.9383213103735315, + "learning_rate": 5.511186717382244e-06, + "loss": 17.3032, + "step": 26445 + }, + { + "epoch": 0.48341162922477926, + "grad_norm": 9.484592910129143, + "learning_rate": 5.510892256038932e-06, + "loss": 18.4924, + "step": 26446 + }, + { + "epoch": 0.4834299084212258, + "grad_norm": 6.985124629907305, + "learning_rate": 5.510597792904987e-06, + "loss": 17.5429, + "step": 26447 + }, + { + "epoch": 0.48344818761767233, + "grad_norm": 5.5846440055655755, + "learning_rate": 5.51030332798144e-06, + "loss": 17.1491, + "step": 26448 + }, + { + "epoch": 0.48346646681411887, + "grad_norm": 5.508266345734633, + "learning_rate": 5.510008861269325e-06, + "loss": 17.0911, + "step": 26449 + }, + { + "epoch": 0.4834847460105654, + "grad_norm": 6.573095767583956, + "learning_rate": 5.509714392769674e-06, + "loss": 17.2324, + "step": 26450 + }, + { + "epoch": 0.4835030252070119, + "grad_norm": 7.010458442249363, + "learning_rate": 5.509419922483516e-06, + "loss": 17.8, + "step": 26451 + }, + { + "epoch": 0.4835213044034584, + "grad_norm": 6.951120102039227, + "learning_rate": 5.509125450411888e-06, + "loss": 17.402, + "step": 26452 + }, + { + "epoch": 0.48353958359990495, + "grad_norm": 6.355362812699377, + "learning_rate": 5.508830976555819e-06, + "loss": 17.0563, + "step": 26453 + }, + { + "epoch": 0.4835578627963515, + "grad_norm": 6.308735858351048, + "learning_rate": 5.5085365009163394e-06, + "loss": 17.3058, + "step": 26454 + }, + { + "epoch": 0.48357614199279797, + "grad_norm": 6.79619632781194, + "learning_rate": 5.508242023494486e-06, + "loss": 17.7997, + "step": 26455 + }, + { + "epoch": 0.4835944211892445, + "grad_norm": 6.67685624025841, + "learning_rate": 5.507947544291288e-06, + "loss": 17.1824, + "step": 26456 + }, + { + "epoch": 0.48361270038569104, + "grad_norm": 7.292173648165463, + "learning_rate": 5.507653063307777e-06, + "loss": 17.9333, + "step": 26457 + }, + { + "epoch": 0.4836309795821376, + "grad_norm": 6.360337230182274, + "learning_rate": 5.507358580544986e-06, + "loss": 17.4501, + "step": 26458 + }, + { + "epoch": 0.4836492587785841, + "grad_norm": 8.908950701616417, + "learning_rate": 5.507064096003947e-06, + "loss": 18.7293, + "step": 26459 + }, + { + "epoch": 0.4836675379750306, + "grad_norm": 6.081792855704345, + "learning_rate": 5.506769609685694e-06, + "loss": 17.4807, + "step": 26460 + }, + { + "epoch": 0.48368581717147713, + "grad_norm": 5.310649674090544, + "learning_rate": 5.506475121591256e-06, + "loss": 16.9886, + "step": 26461 + }, + { + "epoch": 0.48370409636792366, + "grad_norm": 6.463605555093732, + "learning_rate": 5.5061806317216675e-06, + "loss": 17.5928, + "step": 26462 + }, + { + "epoch": 0.4837223755643702, + "grad_norm": 6.645141703076254, + "learning_rate": 5.505886140077959e-06, + "loss": 17.6156, + "step": 26463 + }, + { + "epoch": 0.48374065476081674, + "grad_norm": 7.919740673887846, + "learning_rate": 5.505591646661163e-06, + "loss": 17.6663, + "step": 26464 + }, + { + "epoch": 0.4837589339572632, + "grad_norm": 6.380795144980014, + "learning_rate": 5.505297151472314e-06, + "loss": 17.497, + "step": 26465 + }, + { + "epoch": 0.48377721315370975, + "grad_norm": 5.354570423059798, + "learning_rate": 5.505002654512442e-06, + "loss": 17.2725, + "step": 26466 + }, + { + "epoch": 0.4837954923501563, + "grad_norm": 7.920450163534305, + "learning_rate": 5.504708155782579e-06, + "loss": 18.361, + "step": 26467 + }, + { + "epoch": 0.4838137715466028, + "grad_norm": 6.797506938434176, + "learning_rate": 5.504413655283757e-06, + "loss": 17.4863, + "step": 26468 + }, + { + "epoch": 0.48383205074304936, + "grad_norm": 6.1863490212084455, + "learning_rate": 5.504119153017009e-06, + "loss": 17.4748, + "step": 26469 + }, + { + "epoch": 0.48385032993949584, + "grad_norm": 9.94859428382597, + "learning_rate": 5.503824648983369e-06, + "loss": 18.8714, + "step": 26470 + }, + { + "epoch": 0.4838686091359424, + "grad_norm": 5.984985549829618, + "learning_rate": 5.503530143183865e-06, + "loss": 17.3481, + "step": 26471 + }, + { + "epoch": 0.4838868883323889, + "grad_norm": 7.570580355804501, + "learning_rate": 5.5032356356195325e-06, + "loss": 18.2996, + "step": 26472 + }, + { + "epoch": 0.48390516752883544, + "grad_norm": 8.095710540796654, + "learning_rate": 5.502941126291402e-06, + "loss": 17.7676, + "step": 26473 + }, + { + "epoch": 0.483923446725282, + "grad_norm": 6.442904414187398, + "learning_rate": 5.502646615200509e-06, + "loss": 17.41, + "step": 26474 + }, + { + "epoch": 0.48394172592172846, + "grad_norm": 5.8999603205240385, + "learning_rate": 5.502352102347881e-06, + "loss": 17.0339, + "step": 26475 + }, + { + "epoch": 0.483960005118175, + "grad_norm": 7.594679173460728, + "learning_rate": 5.502057587734553e-06, + "loss": 17.767, + "step": 26476 + }, + { + "epoch": 0.48397828431462153, + "grad_norm": 6.339421754426708, + "learning_rate": 5.501763071361557e-06, + "loss": 17.576, + "step": 26477 + }, + { + "epoch": 0.48399656351106807, + "grad_norm": 5.493731792588541, + "learning_rate": 5.501468553229924e-06, + "loss": 17.3342, + "step": 26478 + }, + { + "epoch": 0.4840148427075146, + "grad_norm": 5.890005557295245, + "learning_rate": 5.501174033340687e-06, + "loss": 17.1531, + "step": 26479 + }, + { + "epoch": 0.4840331219039611, + "grad_norm": 8.516081426871658, + "learning_rate": 5.500879511694881e-06, + "loss": 18.3053, + "step": 26480 + }, + { + "epoch": 0.4840514011004076, + "grad_norm": 6.46748852795383, + "learning_rate": 5.500584988293534e-06, + "loss": 17.4105, + "step": 26481 + }, + { + "epoch": 0.48406968029685415, + "grad_norm": 6.58473814722474, + "learning_rate": 5.50029046313768e-06, + "loss": 17.5878, + "step": 26482 + }, + { + "epoch": 0.4840879594933007, + "grad_norm": 5.820451167813538, + "learning_rate": 5.49999593622835e-06, + "loss": 17.3362, + "step": 26483 + }, + { + "epoch": 0.4841062386897472, + "grad_norm": 6.398707813036087, + "learning_rate": 5.499701407566581e-06, + "loss": 17.45, + "step": 26484 + }, + { + "epoch": 0.4841245178861937, + "grad_norm": 6.657717437355664, + "learning_rate": 5.499406877153401e-06, + "loss": 17.7112, + "step": 26485 + }, + { + "epoch": 0.48414279708264024, + "grad_norm": 5.654215096790829, + "learning_rate": 5.49911234498984e-06, + "loss": 17.0652, + "step": 26486 + }, + { + "epoch": 0.4841610762790868, + "grad_norm": 6.9519255804161, + "learning_rate": 5.498817811076938e-06, + "loss": 17.8553, + "step": 26487 + }, + { + "epoch": 0.4841793554755333, + "grad_norm": 6.242247399787906, + "learning_rate": 5.49852327541572e-06, + "loss": 17.5432, + "step": 26488 + }, + { + "epoch": 0.4841976346719798, + "grad_norm": 6.713974957489091, + "learning_rate": 5.498228738007222e-06, + "loss": 17.3966, + "step": 26489 + }, + { + "epoch": 0.48421591386842633, + "grad_norm": 6.439268771521275, + "learning_rate": 5.497934198852475e-06, + "loss": 17.7379, + "step": 26490 + }, + { + "epoch": 0.48423419306487286, + "grad_norm": 6.713643240167457, + "learning_rate": 5.497639657952513e-06, + "loss": 17.6121, + "step": 26491 + }, + { + "epoch": 0.4842524722613194, + "grad_norm": 6.863243618818315, + "learning_rate": 5.497345115308366e-06, + "loss": 17.7486, + "step": 26492 + }, + { + "epoch": 0.48427075145776594, + "grad_norm": 6.463231846157784, + "learning_rate": 5.497050570921067e-06, + "loss": 17.3952, + "step": 26493 + }, + { + "epoch": 0.4842890306542124, + "grad_norm": 7.895602202068241, + "learning_rate": 5.4967560247916516e-06, + "loss": 17.902, + "step": 26494 + }, + { + "epoch": 0.48430730985065895, + "grad_norm": 7.699725467897229, + "learning_rate": 5.496461476921147e-06, + "loss": 18.0572, + "step": 26495 + }, + { + "epoch": 0.4843255890471055, + "grad_norm": 6.638513230121819, + "learning_rate": 5.4961669273105875e-06, + "loss": 17.6035, + "step": 26496 + }, + { + "epoch": 0.484343868243552, + "grad_norm": 6.281047227787639, + "learning_rate": 5.495872375961008e-06, + "loss": 16.9826, + "step": 26497 + }, + { + "epoch": 0.48436214743999856, + "grad_norm": 6.55216091310133, + "learning_rate": 5.495577822873439e-06, + "loss": 17.9349, + "step": 26498 + }, + { + "epoch": 0.48438042663644504, + "grad_norm": 5.6852815409141435, + "learning_rate": 5.495283268048912e-06, + "loss": 16.9378, + "step": 26499 + }, + { + "epoch": 0.4843987058328916, + "grad_norm": 7.084329263200316, + "learning_rate": 5.494988711488458e-06, + "loss": 17.6849, + "step": 26500 + }, + { + "epoch": 0.4844169850293381, + "grad_norm": 4.411131277470196, + "learning_rate": 5.4946941531931146e-06, + "loss": 16.6473, + "step": 26501 + }, + { + "epoch": 0.48443526422578465, + "grad_norm": 6.360299954623239, + "learning_rate": 5.49439959316391e-06, + "loss": 17.5655, + "step": 26502 + }, + { + "epoch": 0.4844535434222312, + "grad_norm": 7.005715812781231, + "learning_rate": 5.494105031401877e-06, + "loss": 17.8182, + "step": 26503 + }, + { + "epoch": 0.48447182261867766, + "grad_norm": 7.065328420310809, + "learning_rate": 5.49381046790805e-06, + "loss": 17.8284, + "step": 26504 + }, + { + "epoch": 0.4844901018151242, + "grad_norm": 7.66959744448296, + "learning_rate": 5.493515902683459e-06, + "loss": 18.0271, + "step": 26505 + }, + { + "epoch": 0.48450838101157073, + "grad_norm": 6.852720664283129, + "learning_rate": 5.493221335729139e-06, + "loss": 17.7273, + "step": 26506 + }, + { + "epoch": 0.48452666020801727, + "grad_norm": 6.1968679770201005, + "learning_rate": 5.49292676704612e-06, + "loss": 17.4665, + "step": 26507 + }, + { + "epoch": 0.4845449394044638, + "grad_norm": 6.000632562630498, + "learning_rate": 5.492632196635436e-06, + "loss": 17.4382, + "step": 26508 + }, + { + "epoch": 0.4845632186009103, + "grad_norm": 6.07520775388825, + "learning_rate": 5.492337624498117e-06, + "loss": 17.476, + "step": 26509 + }, + { + "epoch": 0.4845814977973568, + "grad_norm": 6.671538107506655, + "learning_rate": 5.4920430506351995e-06, + "loss": 17.4663, + "step": 26510 + }, + { + "epoch": 0.48459977699380336, + "grad_norm": 5.957206393544445, + "learning_rate": 5.491748475047714e-06, + "loss": 17.3816, + "step": 26511 + }, + { + "epoch": 0.4846180561902499, + "grad_norm": 5.550194755608161, + "learning_rate": 5.491453897736692e-06, + "loss": 17.0009, + "step": 26512 + }, + { + "epoch": 0.4846363353866964, + "grad_norm": 6.1428089729205695, + "learning_rate": 5.491159318703165e-06, + "loss": 17.1129, + "step": 26513 + }, + { + "epoch": 0.4846546145831429, + "grad_norm": 6.285517326065865, + "learning_rate": 5.490864737948169e-06, + "loss": 17.4101, + "step": 26514 + }, + { + "epoch": 0.48467289377958944, + "grad_norm": 6.65362113295435, + "learning_rate": 5.4905701554727365e-06, + "loss": 17.5512, + "step": 26515 + }, + { + "epoch": 0.484691172976036, + "grad_norm": 6.620467063218983, + "learning_rate": 5.490275571277896e-06, + "loss": 17.8127, + "step": 26516 + }, + { + "epoch": 0.4847094521724825, + "grad_norm": 5.786860733968187, + "learning_rate": 5.489980985364682e-06, + "loss": 17.1823, + "step": 26517 + }, + { + "epoch": 0.48472773136892905, + "grad_norm": 6.009540133094905, + "learning_rate": 5.4896863977341275e-06, + "loss": 17.1732, + "step": 26518 + }, + { + "epoch": 0.48474601056537553, + "grad_norm": 5.942866457705427, + "learning_rate": 5.489391808387265e-06, + "loss": 17.2876, + "step": 26519 + }, + { + "epoch": 0.48476428976182206, + "grad_norm": 6.147859989217325, + "learning_rate": 5.489097217325127e-06, + "loss": 17.4091, + "step": 26520 + }, + { + "epoch": 0.4847825689582686, + "grad_norm": 5.839743379545554, + "learning_rate": 5.4888026245487444e-06, + "loss": 17.1816, + "step": 26521 + }, + { + "epoch": 0.48480084815471514, + "grad_norm": 6.987820665477988, + "learning_rate": 5.488508030059152e-06, + "loss": 17.4251, + "step": 26522 + }, + { + "epoch": 0.4848191273511616, + "grad_norm": 6.279484115563279, + "learning_rate": 5.488213433857381e-06, + "loss": 17.4336, + "step": 26523 + }, + { + "epoch": 0.48483740654760815, + "grad_norm": 7.064761281317023, + "learning_rate": 5.487918835944465e-06, + "loss": 17.8791, + "step": 26524 + }, + { + "epoch": 0.4848556857440547, + "grad_norm": 8.05195396523413, + "learning_rate": 5.487624236321435e-06, + "loss": 17.53, + "step": 26525 + }, + { + "epoch": 0.4848739649405012, + "grad_norm": 7.037036320561439, + "learning_rate": 5.487329634989325e-06, + "loss": 17.7999, + "step": 26526 + }, + { + "epoch": 0.48489224413694776, + "grad_norm": 5.150846998602031, + "learning_rate": 5.487035031949165e-06, + "loss": 16.9384, + "step": 26527 + }, + { + "epoch": 0.48491052333339424, + "grad_norm": 5.853581615381309, + "learning_rate": 5.486740427201991e-06, + "loss": 17.1325, + "step": 26528 + }, + { + "epoch": 0.4849288025298408, + "grad_norm": 7.924993206481882, + "learning_rate": 5.486445820748835e-06, + "loss": 17.6764, + "step": 26529 + }, + { + "epoch": 0.4849470817262873, + "grad_norm": 7.8504844587924, + "learning_rate": 5.486151212590728e-06, + "loss": 18.0291, + "step": 26530 + }, + { + "epoch": 0.48496536092273385, + "grad_norm": 5.809219210938385, + "learning_rate": 5.485856602728702e-06, + "loss": 17.1463, + "step": 26531 + }, + { + "epoch": 0.4849836401191804, + "grad_norm": 8.660822147117912, + "learning_rate": 5.485561991163791e-06, + "loss": 18.2508, + "step": 26532 + }, + { + "epoch": 0.48500191931562686, + "grad_norm": 6.271816895138902, + "learning_rate": 5.485267377897029e-06, + "loss": 17.5188, + "step": 26533 + }, + { + "epoch": 0.4850201985120734, + "grad_norm": 7.230829243055291, + "learning_rate": 5.484972762929446e-06, + "loss": 17.5166, + "step": 26534 + }, + { + "epoch": 0.48503847770851993, + "grad_norm": 5.104340002457307, + "learning_rate": 5.484678146262075e-06, + "loss": 17.0811, + "step": 26535 + }, + { + "epoch": 0.48505675690496647, + "grad_norm": 6.016658967529224, + "learning_rate": 5.484383527895949e-06, + "loss": 17.5314, + "step": 26536 + }, + { + "epoch": 0.485075036101413, + "grad_norm": 6.387333265667369, + "learning_rate": 5.484088907832102e-06, + "loss": 17.7089, + "step": 26537 + }, + { + "epoch": 0.4850933152978595, + "grad_norm": 7.233007439047168, + "learning_rate": 5.483794286071565e-06, + "loss": 17.8259, + "step": 26538 + }, + { + "epoch": 0.485111594494306, + "grad_norm": 6.07590553003209, + "learning_rate": 5.483499662615371e-06, + "loss": 17.429, + "step": 26539 + }, + { + "epoch": 0.48512987369075256, + "grad_norm": 6.550549145304181, + "learning_rate": 5.483205037464552e-06, + "loss": 17.4891, + "step": 26540 + }, + { + "epoch": 0.4851481528871991, + "grad_norm": 5.359384545009174, + "learning_rate": 5.4829104106201415e-06, + "loss": 17.0303, + "step": 26541 + }, + { + "epoch": 0.4851664320836456, + "grad_norm": 5.337846074159136, + "learning_rate": 5.4826157820831715e-06, + "loss": 17.0801, + "step": 26542 + }, + { + "epoch": 0.4851847112800921, + "grad_norm": 6.631213815437908, + "learning_rate": 5.482321151854677e-06, + "loss": 17.6556, + "step": 26543 + }, + { + "epoch": 0.48520299047653864, + "grad_norm": 7.722108684275236, + "learning_rate": 5.482026519935687e-06, + "loss": 17.1154, + "step": 26544 + }, + { + "epoch": 0.4852212696729852, + "grad_norm": 6.857963631684011, + "learning_rate": 5.481731886327235e-06, + "loss": 17.8986, + "step": 26545 + }, + { + "epoch": 0.4852395488694317, + "grad_norm": 5.79207267534587, + "learning_rate": 5.481437251030357e-06, + "loss": 17.3041, + "step": 26546 + }, + { + "epoch": 0.48525782806587825, + "grad_norm": 6.3787164053305245, + "learning_rate": 5.4811426140460825e-06, + "loss": 17.3586, + "step": 26547 + }, + { + "epoch": 0.48527610726232473, + "grad_norm": 8.393542094603822, + "learning_rate": 5.480847975375444e-06, + "loss": 18.3381, + "step": 26548 + }, + { + "epoch": 0.48529438645877127, + "grad_norm": 6.574024373888554, + "learning_rate": 5.480553335019475e-06, + "loss": 17.5777, + "step": 26549 + }, + { + "epoch": 0.4853126656552178, + "grad_norm": 5.98329693173344, + "learning_rate": 5.4802586929792086e-06, + "loss": 17.3316, + "step": 26550 + }, + { + "epoch": 0.48533094485166434, + "grad_norm": 6.985402588071238, + "learning_rate": 5.479964049255677e-06, + "loss": 17.6135, + "step": 26551 + }, + { + "epoch": 0.48534922404811087, + "grad_norm": 6.4466575683269935, + "learning_rate": 5.479669403849913e-06, + "loss": 17.6279, + "step": 26552 + }, + { + "epoch": 0.48536750324455735, + "grad_norm": 5.189915395214789, + "learning_rate": 5.4793747567629504e-06, + "loss": 17.1822, + "step": 26553 + }, + { + "epoch": 0.4853857824410039, + "grad_norm": 6.067557612326647, + "learning_rate": 5.47908010799582e-06, + "loss": 17.5608, + "step": 26554 + }, + { + "epoch": 0.4854040616374504, + "grad_norm": 7.594750565730956, + "learning_rate": 5.478785457549555e-06, + "loss": 17.7572, + "step": 26555 + }, + { + "epoch": 0.48542234083389696, + "grad_norm": 6.435276649579518, + "learning_rate": 5.478490805425191e-06, + "loss": 17.6635, + "step": 26556 + }, + { + "epoch": 0.48544062003034344, + "grad_norm": 7.11158491200244, + "learning_rate": 5.4781961516237555e-06, + "loss": 17.7047, + "step": 26557 + }, + { + "epoch": 0.48545889922679, + "grad_norm": 4.733950175664281, + "learning_rate": 5.477901496146285e-06, + "loss": 16.7589, + "step": 26558 + }, + { + "epoch": 0.4854771784232365, + "grad_norm": 7.0080832196589276, + "learning_rate": 5.47760683899381e-06, + "loss": 17.9338, + "step": 26559 + }, + { + "epoch": 0.48549545761968305, + "grad_norm": 5.0607686591002965, + "learning_rate": 5.477312180167366e-06, + "loss": 16.995, + "step": 26560 + }, + { + "epoch": 0.4855137368161296, + "grad_norm": 6.137308209246509, + "learning_rate": 5.4770175196679845e-06, + "loss": 17.2181, + "step": 26561 + }, + { + "epoch": 0.48553201601257606, + "grad_norm": 6.7271861283140755, + "learning_rate": 5.4767228574966965e-06, + "loss": 17.5663, + "step": 26562 + }, + { + "epoch": 0.4855502952090226, + "grad_norm": 5.148819457273331, + "learning_rate": 5.476428193654537e-06, + "loss": 16.822, + "step": 26563 + }, + { + "epoch": 0.48556857440546913, + "grad_norm": 7.003527994582009, + "learning_rate": 5.4761335281425375e-06, + "loss": 17.6496, + "step": 26564 + }, + { + "epoch": 0.48558685360191567, + "grad_norm": 8.40255245863197, + "learning_rate": 5.475838860961732e-06, + "loss": 17.9476, + "step": 26565 + }, + { + "epoch": 0.4856051327983622, + "grad_norm": 6.653719698463278, + "learning_rate": 5.475544192113152e-06, + "loss": 17.3842, + "step": 26566 + }, + { + "epoch": 0.4856234119948087, + "grad_norm": 5.443807941247819, + "learning_rate": 5.475249521597833e-06, + "loss": 16.907, + "step": 26567 + }, + { + "epoch": 0.4856416911912552, + "grad_norm": 5.86720973065785, + "learning_rate": 5.474954849416802e-06, + "loss": 17.2419, + "step": 26568 + }, + { + "epoch": 0.48565997038770176, + "grad_norm": 6.2895047620896065, + "learning_rate": 5.474660175571098e-06, + "loss": 17.3104, + "step": 26569 + }, + { + "epoch": 0.4856782495841483, + "grad_norm": 7.6818301270678155, + "learning_rate": 5.4743655000617515e-06, + "loss": 18.2199, + "step": 26570 + }, + { + "epoch": 0.4856965287805948, + "grad_norm": 7.084022234762375, + "learning_rate": 5.474070822889795e-06, + "loss": 18.0019, + "step": 26571 + }, + { + "epoch": 0.4857148079770413, + "grad_norm": 6.868505220168283, + "learning_rate": 5.47377614405626e-06, + "loss": 17.9504, + "step": 26572 + }, + { + "epoch": 0.48573308717348784, + "grad_norm": 6.6229483247928735, + "learning_rate": 5.47348146356218e-06, + "loss": 17.7776, + "step": 26573 + }, + { + "epoch": 0.4857513663699344, + "grad_norm": 6.440645746533691, + "learning_rate": 5.473186781408591e-06, + "loss": 17.5428, + "step": 26574 + }, + { + "epoch": 0.4857696455663809, + "grad_norm": 7.515916814424151, + "learning_rate": 5.4728920975965214e-06, + "loss": 17.715, + "step": 26575 + }, + { + "epoch": 0.48578792476282745, + "grad_norm": 6.886688046334952, + "learning_rate": 5.472597412127008e-06, + "loss": 17.5188, + "step": 26576 + }, + { + "epoch": 0.48580620395927393, + "grad_norm": 6.25423326557669, + "learning_rate": 5.47230272500108e-06, + "loss": 17.1894, + "step": 26577 + }, + { + "epoch": 0.48582448315572047, + "grad_norm": 5.149576858402395, + "learning_rate": 5.472008036219772e-06, + "loss": 17.0235, + "step": 26578 + }, + { + "epoch": 0.485842762352167, + "grad_norm": 5.221973513778705, + "learning_rate": 5.471713345784118e-06, + "loss": 16.9643, + "step": 26579 + }, + { + "epoch": 0.48586104154861354, + "grad_norm": 5.60671239177601, + "learning_rate": 5.471418653695149e-06, + "loss": 17.1331, + "step": 26580 + }, + { + "epoch": 0.4858793207450601, + "grad_norm": 8.106985399703962, + "learning_rate": 5.471123959953898e-06, + "loss": 17.6359, + "step": 26581 + }, + { + "epoch": 0.48589759994150655, + "grad_norm": 7.437369590931693, + "learning_rate": 5.4708292645613995e-06, + "loss": 17.9801, + "step": 26582 + }, + { + "epoch": 0.4859158791379531, + "grad_norm": 5.063156408596951, + "learning_rate": 5.470534567518684e-06, + "loss": 16.9018, + "step": 26583 + }, + { + "epoch": 0.4859341583343996, + "grad_norm": 9.493058215283343, + "learning_rate": 5.470239868826788e-06, + "loss": 17.5201, + "step": 26584 + }, + { + "epoch": 0.48595243753084616, + "grad_norm": 6.557149756489445, + "learning_rate": 5.46994516848674e-06, + "loss": 17.3902, + "step": 26585 + }, + { + "epoch": 0.4859707167272927, + "grad_norm": 7.111068340274383, + "learning_rate": 5.469650466499574e-06, + "loss": 17.8025, + "step": 26586 + }, + { + "epoch": 0.4859889959237392, + "grad_norm": 7.015342299448457, + "learning_rate": 5.469355762866327e-06, + "loss": 17.7311, + "step": 26587 + }, + { + "epoch": 0.4860072751201857, + "grad_norm": 5.7164801045758695, + "learning_rate": 5.469061057588027e-06, + "loss": 17.3174, + "step": 26588 + }, + { + "epoch": 0.48602555431663225, + "grad_norm": 5.642670381011369, + "learning_rate": 5.468766350665709e-06, + "loss": 17.0686, + "step": 26589 + }, + { + "epoch": 0.4860438335130788, + "grad_norm": 8.514006608723344, + "learning_rate": 5.468471642100404e-06, + "loss": 17.354, + "step": 26590 + }, + { + "epoch": 0.48606211270952526, + "grad_norm": 6.285525586919754, + "learning_rate": 5.468176931893149e-06, + "loss": 17.5392, + "step": 26591 + }, + { + "epoch": 0.4860803919059718, + "grad_norm": 4.87026401636559, + "learning_rate": 5.467882220044973e-06, + "loss": 17.0176, + "step": 26592 + }, + { + "epoch": 0.48609867110241833, + "grad_norm": 6.604639662487142, + "learning_rate": 5.467587506556911e-06, + "loss": 17.5402, + "step": 26593 + }, + { + "epoch": 0.48611695029886487, + "grad_norm": 7.691647893381333, + "learning_rate": 5.467292791429997e-06, + "loss": 17.6207, + "step": 26594 + }, + { + "epoch": 0.4861352294953114, + "grad_norm": 5.898843312344089, + "learning_rate": 5.466998074665259e-06, + "loss": 17.2536, + "step": 26595 + }, + { + "epoch": 0.4861535086917579, + "grad_norm": 7.573039342632927, + "learning_rate": 5.466703356263734e-06, + "loss": 17.5114, + "step": 26596 + }, + { + "epoch": 0.4861717878882044, + "grad_norm": 5.101249641526701, + "learning_rate": 5.466408636226455e-06, + "loss": 16.9151, + "step": 26597 + }, + { + "epoch": 0.48619006708465096, + "grad_norm": 5.8520661688430575, + "learning_rate": 5.466113914554455e-06, + "loss": 17.3912, + "step": 26598 + }, + { + "epoch": 0.4862083462810975, + "grad_norm": 6.020245283050912, + "learning_rate": 5.465819191248766e-06, + "loss": 17.3466, + "step": 26599 + }, + { + "epoch": 0.486226625477544, + "grad_norm": 5.594927094598268, + "learning_rate": 5.465524466310419e-06, + "loss": 17.1538, + "step": 26600 + }, + { + "epoch": 0.4862449046739905, + "grad_norm": 7.47649543567741, + "learning_rate": 5.465229739740452e-06, + "loss": 18.611, + "step": 26601 + }, + { + "epoch": 0.48626318387043704, + "grad_norm": 5.2141971011979855, + "learning_rate": 5.464935011539894e-06, + "loss": 17.0901, + "step": 26602 + }, + { + "epoch": 0.4862814630668836, + "grad_norm": 6.845905298465474, + "learning_rate": 5.464640281709779e-06, + "loss": 17.6799, + "step": 26603 + }, + { + "epoch": 0.4862997422633301, + "grad_norm": 6.394076981508338, + "learning_rate": 5.46434555025114e-06, + "loss": 17.6783, + "step": 26604 + }, + { + "epoch": 0.48631802145977665, + "grad_norm": 5.506820170598393, + "learning_rate": 5.464050817165009e-06, + "loss": 16.9771, + "step": 26605 + }, + { + "epoch": 0.48633630065622313, + "grad_norm": 7.344541839171006, + "learning_rate": 5.463756082452423e-06, + "loss": 17.163, + "step": 26606 + }, + { + "epoch": 0.48635457985266967, + "grad_norm": 6.527649923861331, + "learning_rate": 5.463461346114409e-06, + "loss": 17.3594, + "step": 26607 + }, + { + "epoch": 0.4863728590491162, + "grad_norm": 5.226281276789873, + "learning_rate": 5.463166608152005e-06, + "loss": 17.0518, + "step": 26608 + }, + { + "epoch": 0.48639113824556274, + "grad_norm": 5.9010886221186505, + "learning_rate": 5.462871868566242e-06, + "loss": 17.4303, + "step": 26609 + }, + { + "epoch": 0.4864094174420093, + "grad_norm": 7.05978247832892, + "learning_rate": 5.462577127358154e-06, + "loss": 17.7988, + "step": 26610 + }, + { + "epoch": 0.48642769663845575, + "grad_norm": 7.197769770156895, + "learning_rate": 5.462282384528773e-06, + "loss": 18.1668, + "step": 26611 + }, + { + "epoch": 0.4864459758349023, + "grad_norm": 7.142218245417265, + "learning_rate": 5.461987640079132e-06, + "loss": 17.8786, + "step": 26612 + }, + { + "epoch": 0.4864642550313488, + "grad_norm": 6.449349589955589, + "learning_rate": 5.461692894010263e-06, + "loss": 17.7673, + "step": 26613 + }, + { + "epoch": 0.48648253422779536, + "grad_norm": 6.426401988205395, + "learning_rate": 5.461398146323202e-06, + "loss": 17.8083, + "step": 26614 + }, + { + "epoch": 0.4865008134242419, + "grad_norm": 7.282501321645925, + "learning_rate": 5.461103397018981e-06, + "loss": 17.8547, + "step": 26615 + }, + { + "epoch": 0.4865190926206884, + "grad_norm": 6.3285525513507555, + "learning_rate": 5.4608086460986324e-06, + "loss": 17.5695, + "step": 26616 + }, + { + "epoch": 0.4865373718171349, + "grad_norm": 6.616748535093236, + "learning_rate": 5.460513893563189e-06, + "loss": 17.4545, + "step": 26617 + }, + { + "epoch": 0.48655565101358145, + "grad_norm": 6.015220408534, + "learning_rate": 5.460219139413684e-06, + "loss": 17.2147, + "step": 26618 + }, + { + "epoch": 0.486573930210028, + "grad_norm": 6.001736263614366, + "learning_rate": 5.4599243836511516e-06, + "loss": 17.3096, + "step": 26619 + }, + { + "epoch": 0.4865922094064745, + "grad_norm": 7.029744272317311, + "learning_rate": 5.459629626276624e-06, + "loss": 17.556, + "step": 26620 + }, + { + "epoch": 0.486610488602921, + "grad_norm": 6.365931427236614, + "learning_rate": 5.459334867291134e-06, + "loss": 17.6441, + "step": 26621 + }, + { + "epoch": 0.48662876779936753, + "grad_norm": 7.774243933733844, + "learning_rate": 5.459040106695716e-06, + "loss": 18.2137, + "step": 26622 + }, + { + "epoch": 0.48664704699581407, + "grad_norm": 5.9603038412986376, + "learning_rate": 5.4587453444914e-06, + "loss": 17.4149, + "step": 26623 + }, + { + "epoch": 0.4866653261922606, + "grad_norm": 5.659576186668394, + "learning_rate": 5.458450580679223e-06, + "loss": 17.3379, + "step": 26624 + }, + { + "epoch": 0.4866836053887071, + "grad_norm": 6.196018730642399, + "learning_rate": 5.458155815260219e-06, + "loss": 17.4753, + "step": 26625 + }, + { + "epoch": 0.4867018845851536, + "grad_norm": 7.024416450972192, + "learning_rate": 5.457861048235416e-06, + "loss": 17.7739, + "step": 26626 + }, + { + "epoch": 0.48672016378160016, + "grad_norm": 5.400957036917106, + "learning_rate": 5.457566279605848e-06, + "loss": 17.0448, + "step": 26627 + }, + { + "epoch": 0.4867384429780467, + "grad_norm": 5.8155318177455575, + "learning_rate": 5.4572715093725515e-06, + "loss": 17.4156, + "step": 26628 + }, + { + "epoch": 0.48675672217449323, + "grad_norm": 5.562979741031682, + "learning_rate": 5.45697673753656e-06, + "loss": 17.1216, + "step": 26629 + }, + { + "epoch": 0.4867750013709397, + "grad_norm": 7.12242779394732, + "learning_rate": 5.456681964098902e-06, + "loss": 17.4697, + "step": 26630 + }, + { + "epoch": 0.48679328056738624, + "grad_norm": 5.889235737164289, + "learning_rate": 5.4563871890606145e-06, + "loss": 17.4843, + "step": 26631 + }, + { + "epoch": 0.4868115597638328, + "grad_norm": 6.642818958598797, + "learning_rate": 5.45609241242273e-06, + "loss": 17.4714, + "step": 26632 + }, + { + "epoch": 0.4868298389602793, + "grad_norm": 5.585845806917995, + "learning_rate": 5.45579763418628e-06, + "loss": 17.255, + "step": 26633 + }, + { + "epoch": 0.48684811815672585, + "grad_norm": 5.393327362257041, + "learning_rate": 5.455502854352299e-06, + "loss": 17.2974, + "step": 26634 + }, + { + "epoch": 0.48686639735317233, + "grad_norm": 7.314095707053459, + "learning_rate": 5.455208072921821e-06, + "loss": 17.8221, + "step": 26635 + }, + { + "epoch": 0.48688467654961887, + "grad_norm": 7.0350601960001535, + "learning_rate": 5.454913289895878e-06, + "loss": 17.6477, + "step": 26636 + }, + { + "epoch": 0.4869029557460654, + "grad_norm": 6.931914639525556, + "learning_rate": 5.454618505275503e-06, + "loss": 17.7597, + "step": 26637 + }, + { + "epoch": 0.48692123494251194, + "grad_norm": 7.275927073570928, + "learning_rate": 5.454323719061729e-06, + "loss": 17.9571, + "step": 26638 + }, + { + "epoch": 0.4869395141389585, + "grad_norm": 5.102907014372395, + "learning_rate": 5.454028931255592e-06, + "loss": 16.9075, + "step": 26639 + }, + { + "epoch": 0.48695779333540495, + "grad_norm": 5.730347467741487, + "learning_rate": 5.45373414185812e-06, + "loss": 17.3043, + "step": 26640 + }, + { + "epoch": 0.4869760725318515, + "grad_norm": 7.3836310643565914, + "learning_rate": 5.453439350870351e-06, + "loss": 17.4446, + "step": 26641 + }, + { + "epoch": 0.486994351728298, + "grad_norm": 5.020498286082509, + "learning_rate": 5.453144558293315e-06, + "loss": 16.9975, + "step": 26642 + }, + { + "epoch": 0.48701263092474456, + "grad_norm": 8.013904751478776, + "learning_rate": 5.452849764128049e-06, + "loss": 18.1751, + "step": 26643 + }, + { + "epoch": 0.4870309101211911, + "grad_norm": 7.2824823067803175, + "learning_rate": 5.452554968375583e-06, + "loss": 17.982, + "step": 26644 + }, + { + "epoch": 0.4870491893176376, + "grad_norm": 6.719829505022645, + "learning_rate": 5.452260171036949e-06, + "loss": 17.6701, + "step": 26645 + }, + { + "epoch": 0.4870674685140841, + "grad_norm": 5.584415576101518, + "learning_rate": 5.451965372113185e-06, + "loss": 17.0621, + "step": 26646 + }, + { + "epoch": 0.48708574771053065, + "grad_norm": 6.707432748749226, + "learning_rate": 5.451670571605321e-06, + "loss": 17.4253, + "step": 26647 + }, + { + "epoch": 0.4871040269069772, + "grad_norm": 7.062764180772722, + "learning_rate": 5.45137576951439e-06, + "loss": 17.8118, + "step": 26648 + }, + { + "epoch": 0.4871223061034237, + "grad_norm": 9.202990816780765, + "learning_rate": 5.451080965841427e-06, + "loss": 17.9016, + "step": 26649 + }, + { + "epoch": 0.4871405852998702, + "grad_norm": 6.32233053160526, + "learning_rate": 5.450786160587463e-06, + "loss": 17.4882, + "step": 26650 + }, + { + "epoch": 0.48715886449631673, + "grad_norm": 6.069334385436446, + "learning_rate": 5.450491353753533e-06, + "loss": 17.2714, + "step": 26651 + }, + { + "epoch": 0.48717714369276327, + "grad_norm": 6.147846392541621, + "learning_rate": 5.450196545340671e-06, + "loss": 17.4155, + "step": 26652 + }, + { + "epoch": 0.4871954228892098, + "grad_norm": 6.07501178327118, + "learning_rate": 5.4499017353499095e-06, + "loss": 17.3031, + "step": 26653 + }, + { + "epoch": 0.48721370208565634, + "grad_norm": 4.661875821745856, + "learning_rate": 5.449606923782279e-06, + "loss": 16.7665, + "step": 26654 + }, + { + "epoch": 0.4872319812821028, + "grad_norm": 6.967972881934136, + "learning_rate": 5.449312110638817e-06, + "loss": 17.7271, + "step": 26655 + }, + { + "epoch": 0.48725026047854936, + "grad_norm": 8.872333170762872, + "learning_rate": 5.449017295920556e-06, + "loss": 17.9727, + "step": 26656 + }, + { + "epoch": 0.4872685396749959, + "grad_norm": 6.082674160655565, + "learning_rate": 5.448722479628528e-06, + "loss": 17.541, + "step": 26657 + }, + { + "epoch": 0.48728681887144243, + "grad_norm": 5.8644745001181775, + "learning_rate": 5.448427661763766e-06, + "loss": 17.274, + "step": 26658 + }, + { + "epoch": 0.4873050980678889, + "grad_norm": 8.562532643994132, + "learning_rate": 5.448132842327302e-06, + "loss": 18.3781, + "step": 26659 + }, + { + "epoch": 0.48732337726433544, + "grad_norm": 7.318551635687446, + "learning_rate": 5.447838021320173e-06, + "loss": 17.994, + "step": 26660 + }, + { + "epoch": 0.487341656460782, + "grad_norm": 7.048067486365951, + "learning_rate": 5.447543198743411e-06, + "loss": 17.6905, + "step": 26661 + }, + { + "epoch": 0.4873599356572285, + "grad_norm": 7.088445178297393, + "learning_rate": 5.447248374598049e-06, + "loss": 17.5954, + "step": 26662 + }, + { + "epoch": 0.48737821485367505, + "grad_norm": 6.056738429756884, + "learning_rate": 5.44695354888512e-06, + "loss": 17.2938, + "step": 26663 + }, + { + "epoch": 0.48739649405012153, + "grad_norm": 5.597497715489482, + "learning_rate": 5.446658721605657e-06, + "loss": 17.4139, + "step": 26664 + }, + { + "epoch": 0.48741477324656807, + "grad_norm": 7.8833620968787725, + "learning_rate": 5.446363892760694e-06, + "loss": 18.2431, + "step": 26665 + }, + { + "epoch": 0.4874330524430146, + "grad_norm": 5.829542433580899, + "learning_rate": 5.446069062351265e-06, + "loss": 17.4375, + "step": 26666 + }, + { + "epoch": 0.48745133163946114, + "grad_norm": 7.326167912127498, + "learning_rate": 5.445774230378402e-06, + "loss": 18.1475, + "step": 26667 + }, + { + "epoch": 0.4874696108359077, + "grad_norm": 8.159616676986518, + "learning_rate": 5.44547939684314e-06, + "loss": 17.8864, + "step": 26668 + }, + { + "epoch": 0.48748789003235415, + "grad_norm": 6.733878280865458, + "learning_rate": 5.445184561746511e-06, + "loss": 17.5365, + "step": 26669 + }, + { + "epoch": 0.4875061692288007, + "grad_norm": 6.150416170308892, + "learning_rate": 5.444889725089548e-06, + "loss": 17.3496, + "step": 26670 + }, + { + "epoch": 0.4875244484252472, + "grad_norm": 5.852591058993894, + "learning_rate": 5.444594886873286e-06, + "loss": 17.3277, + "step": 26671 + }, + { + "epoch": 0.48754272762169376, + "grad_norm": 7.050355774784577, + "learning_rate": 5.444300047098756e-06, + "loss": 17.8252, + "step": 26672 + }, + { + "epoch": 0.4875610068181403, + "grad_norm": 5.101136321964566, + "learning_rate": 5.4440052057669934e-06, + "loss": 16.9262, + "step": 26673 + }, + { + "epoch": 0.4875792860145868, + "grad_norm": 6.171134819997972, + "learning_rate": 5.443710362879033e-06, + "loss": 17.3672, + "step": 26674 + }, + { + "epoch": 0.4875975652110333, + "grad_norm": 8.424747184849217, + "learning_rate": 5.443415518435905e-06, + "loss": 18.4778, + "step": 26675 + }, + { + "epoch": 0.48761584440747985, + "grad_norm": 6.663080208183747, + "learning_rate": 5.443120672438643e-06, + "loss": 17.5707, + "step": 26676 + }, + { + "epoch": 0.4876341236039264, + "grad_norm": 5.795464634344631, + "learning_rate": 5.442825824888283e-06, + "loss": 17.2108, + "step": 26677 + }, + { + "epoch": 0.4876524028003729, + "grad_norm": 5.469798270017111, + "learning_rate": 5.442530975785855e-06, + "loss": 17.1255, + "step": 26678 + }, + { + "epoch": 0.4876706819968194, + "grad_norm": 7.341864107316959, + "learning_rate": 5.442236125132397e-06, + "loss": 17.7743, + "step": 26679 + }, + { + "epoch": 0.48768896119326594, + "grad_norm": 6.446259241788843, + "learning_rate": 5.441941272928939e-06, + "loss": 17.5795, + "step": 26680 + }, + { + "epoch": 0.48770724038971247, + "grad_norm": 6.32346501154039, + "learning_rate": 5.441646419176514e-06, + "loss": 17.399, + "step": 26681 + }, + { + "epoch": 0.487725519586159, + "grad_norm": 7.253957061840832, + "learning_rate": 5.441351563876157e-06, + "loss": 17.7322, + "step": 26682 + }, + { + "epoch": 0.48774379878260554, + "grad_norm": 8.251096577384583, + "learning_rate": 5.441056707028901e-06, + "loss": 17.5844, + "step": 26683 + }, + { + "epoch": 0.487762077979052, + "grad_norm": 8.309287226384034, + "learning_rate": 5.440761848635781e-06, + "loss": 18.3265, + "step": 26684 + }, + { + "epoch": 0.48778035717549856, + "grad_norm": 6.478423372830788, + "learning_rate": 5.440466988697828e-06, + "loss": 17.7607, + "step": 26685 + }, + { + "epoch": 0.4877986363719451, + "grad_norm": 6.46679214652241, + "learning_rate": 5.440172127216076e-06, + "loss": 17.3941, + "step": 26686 + }, + { + "epoch": 0.48781691556839163, + "grad_norm": 6.592282836976066, + "learning_rate": 5.43987726419156e-06, + "loss": 17.6032, + "step": 26687 + }, + { + "epoch": 0.48783519476483816, + "grad_norm": 6.687011106745191, + "learning_rate": 5.439582399625311e-06, + "loss": 17.6042, + "step": 26688 + }, + { + "epoch": 0.48785347396128464, + "grad_norm": 6.168841621335481, + "learning_rate": 5.439287533518364e-06, + "loss": 17.363, + "step": 26689 + }, + { + "epoch": 0.4878717531577312, + "grad_norm": 5.826934054760655, + "learning_rate": 5.438992665871753e-06, + "loss": 17.2163, + "step": 26690 + }, + { + "epoch": 0.4878900323541777, + "grad_norm": 6.508074954201721, + "learning_rate": 5.438697796686511e-06, + "loss": 17.7818, + "step": 26691 + }, + { + "epoch": 0.48790831155062425, + "grad_norm": 5.347730389877579, + "learning_rate": 5.438402925963669e-06, + "loss": 17.0873, + "step": 26692 + }, + { + "epoch": 0.48792659074707073, + "grad_norm": 5.735453219002497, + "learning_rate": 5.4381080537042655e-06, + "loss": 17.0578, + "step": 26693 + }, + { + "epoch": 0.48794486994351727, + "grad_norm": 5.871442657524678, + "learning_rate": 5.437813179909332e-06, + "loss": 17.3047, + "step": 26694 + }, + { + "epoch": 0.4879631491399638, + "grad_norm": 6.4683578382875435, + "learning_rate": 5.437518304579898e-06, + "loss": 17.4914, + "step": 26695 + }, + { + "epoch": 0.48798142833641034, + "grad_norm": 5.744793784154497, + "learning_rate": 5.437223427717001e-06, + "loss": 17.2997, + "step": 26696 + }, + { + "epoch": 0.4879997075328569, + "grad_norm": 5.867712906536801, + "learning_rate": 5.436928549321675e-06, + "loss": 16.928, + "step": 26697 + }, + { + "epoch": 0.48801798672930335, + "grad_norm": 6.391154608621728, + "learning_rate": 5.436633669394953e-06, + "loss": 17.5694, + "step": 26698 + }, + { + "epoch": 0.4880362659257499, + "grad_norm": 5.955561226736121, + "learning_rate": 5.436338787937867e-06, + "loss": 17.1313, + "step": 26699 + }, + { + "epoch": 0.4880545451221964, + "grad_norm": 5.836660560356295, + "learning_rate": 5.43604390495145e-06, + "loss": 17.2105, + "step": 26700 + }, + { + "epoch": 0.48807282431864296, + "grad_norm": 6.2484085719067295, + "learning_rate": 5.43574902043674e-06, + "loss": 17.5134, + "step": 26701 + }, + { + "epoch": 0.4880911035150895, + "grad_norm": 7.114590802442527, + "learning_rate": 5.435454134394765e-06, + "loss": 17.5665, + "step": 26702 + }, + { + "epoch": 0.488109382711536, + "grad_norm": 7.373815281828023, + "learning_rate": 5.4351592468265625e-06, + "loss": 17.5261, + "step": 26703 + }, + { + "epoch": 0.4881276619079825, + "grad_norm": 8.493552962876162, + "learning_rate": 5.434864357733164e-06, + "loss": 18.0959, + "step": 26704 + }, + { + "epoch": 0.48814594110442905, + "grad_norm": 6.985858148092461, + "learning_rate": 5.434569467115604e-06, + "loss": 17.6583, + "step": 26705 + }, + { + "epoch": 0.4881642203008756, + "grad_norm": 6.05518898261649, + "learning_rate": 5.434274574974915e-06, + "loss": 17.3173, + "step": 26706 + }, + { + "epoch": 0.4881824994973221, + "grad_norm": 7.075867296689358, + "learning_rate": 5.433979681312131e-06, + "loss": 17.7777, + "step": 26707 + }, + { + "epoch": 0.4882007786937686, + "grad_norm": 7.078355166872703, + "learning_rate": 5.433684786128287e-06, + "loss": 18.022, + "step": 26708 + }, + { + "epoch": 0.48821905789021514, + "grad_norm": 6.155883790054182, + "learning_rate": 5.433389889424416e-06, + "loss": 17.5531, + "step": 26709 + }, + { + "epoch": 0.48823733708666167, + "grad_norm": 7.2286511400033175, + "learning_rate": 5.433094991201549e-06, + "loss": 17.9021, + "step": 26710 + }, + { + "epoch": 0.4882556162831082, + "grad_norm": 4.98670601372043, + "learning_rate": 5.432800091460723e-06, + "loss": 16.9237, + "step": 26711 + }, + { + "epoch": 0.48827389547955474, + "grad_norm": 7.6114791374891695, + "learning_rate": 5.43250519020297e-06, + "loss": 18.0662, + "step": 26712 + }, + { + "epoch": 0.4882921746760012, + "grad_norm": 7.510468745586594, + "learning_rate": 5.432210287429324e-06, + "loss": 17.7714, + "step": 26713 + }, + { + "epoch": 0.48831045387244776, + "grad_norm": 6.251969574222383, + "learning_rate": 5.4319153831408176e-06, + "loss": 17.5097, + "step": 26714 + }, + { + "epoch": 0.4883287330688943, + "grad_norm": 7.079435065952679, + "learning_rate": 5.4316204773384865e-06, + "loss": 17.6973, + "step": 26715 + }, + { + "epoch": 0.48834701226534083, + "grad_norm": 7.955138314761039, + "learning_rate": 5.431325570023362e-06, + "loss": 17.8779, + "step": 26716 + }, + { + "epoch": 0.48836529146178737, + "grad_norm": 6.933085599949644, + "learning_rate": 5.431030661196481e-06, + "loss": 17.819, + "step": 26717 + }, + { + "epoch": 0.48838357065823385, + "grad_norm": 6.230553472893436, + "learning_rate": 5.430735750858872e-06, + "loss": 17.508, + "step": 26718 + }, + { + "epoch": 0.4884018498546804, + "grad_norm": 5.705069411832746, + "learning_rate": 5.430440839011572e-06, + "loss": 17.1437, + "step": 26719 + }, + { + "epoch": 0.4884201290511269, + "grad_norm": 7.904679814923992, + "learning_rate": 5.430145925655616e-06, + "loss": 18.4188, + "step": 26720 + }, + { + "epoch": 0.48843840824757345, + "grad_norm": 5.547515061694185, + "learning_rate": 5.4298510107920345e-06, + "loss": 17.1844, + "step": 26721 + }, + { + "epoch": 0.48845668744402, + "grad_norm": 5.882950277228873, + "learning_rate": 5.429556094421863e-06, + "loss": 17.1431, + "step": 26722 + }, + { + "epoch": 0.48847496664046647, + "grad_norm": 5.959155236936194, + "learning_rate": 5.429261176546134e-06, + "loss": 17.1748, + "step": 26723 + }, + { + "epoch": 0.488493245836913, + "grad_norm": 6.085907969524742, + "learning_rate": 5.428966257165882e-06, + "loss": 17.637, + "step": 26724 + }, + { + "epoch": 0.48851152503335954, + "grad_norm": 5.442096106554661, + "learning_rate": 5.428671336282142e-06, + "loss": 17.1573, + "step": 26725 + }, + { + "epoch": 0.4885298042298061, + "grad_norm": 6.044868232271571, + "learning_rate": 5.428376413895945e-06, + "loss": 17.719, + "step": 26726 + }, + { + "epoch": 0.48854808342625256, + "grad_norm": 6.206093193676755, + "learning_rate": 5.428081490008325e-06, + "loss": 17.208, + "step": 26727 + }, + { + "epoch": 0.4885663626226991, + "grad_norm": 4.799014605211223, + "learning_rate": 5.427786564620318e-06, + "loss": 16.8918, + "step": 26728 + }, + { + "epoch": 0.4885846418191456, + "grad_norm": 6.832359469861618, + "learning_rate": 5.4274916377329556e-06, + "loss": 17.8295, + "step": 26729 + }, + { + "epoch": 0.48860292101559216, + "grad_norm": 6.656490306847209, + "learning_rate": 5.427196709347272e-06, + "loss": 17.855, + "step": 26730 + }, + { + "epoch": 0.4886212002120387, + "grad_norm": 5.792625739089165, + "learning_rate": 5.4269017794643006e-06, + "loss": 17.1727, + "step": 26731 + }, + { + "epoch": 0.4886394794084852, + "grad_norm": 6.971258159023458, + "learning_rate": 5.426606848085076e-06, + "loss": 17.5696, + "step": 26732 + }, + { + "epoch": 0.4886577586049317, + "grad_norm": 5.25932462163262, + "learning_rate": 5.426311915210633e-06, + "loss": 16.901, + "step": 26733 + }, + { + "epoch": 0.48867603780137825, + "grad_norm": 7.260817596924251, + "learning_rate": 5.426016980842002e-06, + "loss": 18.1982, + "step": 26734 + }, + { + "epoch": 0.4886943169978248, + "grad_norm": 8.153119422459952, + "learning_rate": 5.425722044980217e-06, + "loss": 18.3199, + "step": 26735 + }, + { + "epoch": 0.4887125961942713, + "grad_norm": 6.048124673936369, + "learning_rate": 5.425427107626316e-06, + "loss": 17.2398, + "step": 26736 + }, + { + "epoch": 0.4887308753907178, + "grad_norm": 7.214994033609668, + "learning_rate": 5.425132168781328e-06, + "loss": 17.6843, + "step": 26737 + }, + { + "epoch": 0.48874915458716434, + "grad_norm": 6.063501555656492, + "learning_rate": 5.424837228446289e-06, + "loss": 17.4596, + "step": 26738 + }, + { + "epoch": 0.48876743378361087, + "grad_norm": 5.529805401716335, + "learning_rate": 5.424542286622234e-06, + "loss": 17.1915, + "step": 26739 + }, + { + "epoch": 0.4887857129800574, + "grad_norm": 5.316192857659175, + "learning_rate": 5.424247343310193e-06, + "loss": 17.0905, + "step": 26740 + }, + { + "epoch": 0.48880399217650394, + "grad_norm": 7.14134986856988, + "learning_rate": 5.423952398511202e-06, + "loss": 17.7756, + "step": 26741 + }, + { + "epoch": 0.4888222713729504, + "grad_norm": 5.6608215942570155, + "learning_rate": 5.4236574522262955e-06, + "loss": 17.1622, + "step": 26742 + }, + { + "epoch": 0.48884055056939696, + "grad_norm": 7.754954075097137, + "learning_rate": 5.4233625044565075e-06, + "loss": 17.5948, + "step": 26743 + }, + { + "epoch": 0.4888588297658435, + "grad_norm": 6.304006626992727, + "learning_rate": 5.4230675552028686e-06, + "loss": 17.4356, + "step": 26744 + }, + { + "epoch": 0.48887710896229003, + "grad_norm": 7.6598036781548435, + "learning_rate": 5.4227726044664154e-06, + "loss": 18.1548, + "step": 26745 + }, + { + "epoch": 0.48889538815873657, + "grad_norm": 5.043567632997474, + "learning_rate": 5.4224776522481815e-06, + "loss": 16.9123, + "step": 26746 + }, + { + "epoch": 0.48891366735518305, + "grad_norm": 7.4474356120256795, + "learning_rate": 5.4221826985491984e-06, + "loss": 17.4138, + "step": 26747 + }, + { + "epoch": 0.4889319465516296, + "grad_norm": 6.580050828900662, + "learning_rate": 5.421887743370503e-06, + "loss": 17.678, + "step": 26748 + }, + { + "epoch": 0.4889502257480761, + "grad_norm": 5.914436484098127, + "learning_rate": 5.421592786713128e-06, + "loss": 17.3221, + "step": 26749 + }, + { + "epoch": 0.48896850494452265, + "grad_norm": 7.040745101047383, + "learning_rate": 5.421297828578106e-06, + "loss": 18.0544, + "step": 26750 + }, + { + "epoch": 0.4889867841409692, + "grad_norm": 5.439251177141843, + "learning_rate": 5.4210028689664715e-06, + "loss": 17.0645, + "step": 26751 + }, + { + "epoch": 0.48900506333741567, + "grad_norm": 7.458356758409796, + "learning_rate": 5.420707907879259e-06, + "loss": 17.536, + "step": 26752 + }, + { + "epoch": 0.4890233425338622, + "grad_norm": 6.508491627841003, + "learning_rate": 5.420412945317502e-06, + "loss": 17.4326, + "step": 26753 + }, + { + "epoch": 0.48904162173030874, + "grad_norm": 6.51150504386778, + "learning_rate": 5.420117981282234e-06, + "loss": 17.5608, + "step": 26754 + }, + { + "epoch": 0.4890599009267553, + "grad_norm": 6.116798502026182, + "learning_rate": 5.419823015774488e-06, + "loss": 17.3379, + "step": 26755 + }, + { + "epoch": 0.4890781801232018, + "grad_norm": 7.176310260618286, + "learning_rate": 5.419528048795301e-06, + "loss": 17.9868, + "step": 26756 + }, + { + "epoch": 0.4890964593196483, + "grad_norm": 6.910287149539001, + "learning_rate": 5.419233080345702e-06, + "loss": 17.8103, + "step": 26757 + }, + { + "epoch": 0.4891147385160948, + "grad_norm": 6.065923872504756, + "learning_rate": 5.418938110426729e-06, + "loss": 17.4823, + "step": 26758 + }, + { + "epoch": 0.48913301771254136, + "grad_norm": 5.272226781807781, + "learning_rate": 5.4186431390394124e-06, + "loss": 16.9866, + "step": 26759 + }, + { + "epoch": 0.4891512969089879, + "grad_norm": 6.766457150853006, + "learning_rate": 5.41834816618479e-06, + "loss": 17.6785, + "step": 26760 + }, + { + "epoch": 0.4891695761054344, + "grad_norm": 5.990333341937009, + "learning_rate": 5.418053191863893e-06, + "loss": 17.3857, + "step": 26761 + }, + { + "epoch": 0.4891878553018809, + "grad_norm": 4.845768868473213, + "learning_rate": 5.417758216077756e-06, + "loss": 16.8998, + "step": 26762 + }, + { + "epoch": 0.48920613449832745, + "grad_norm": 6.316455854819885, + "learning_rate": 5.417463238827413e-06, + "loss": 17.5497, + "step": 26763 + }, + { + "epoch": 0.489224413694774, + "grad_norm": 5.425386887917177, + "learning_rate": 5.417168260113896e-06, + "loss": 16.943, + "step": 26764 + }, + { + "epoch": 0.4892426928912205, + "grad_norm": 6.336866161571187, + "learning_rate": 5.416873279938241e-06, + "loss": 17.459, + "step": 26765 + }, + { + "epoch": 0.489260972087667, + "grad_norm": 7.594505821619773, + "learning_rate": 5.4165782983014825e-06, + "loss": 17.9707, + "step": 26766 + }, + { + "epoch": 0.48927925128411354, + "grad_norm": 6.511043350662532, + "learning_rate": 5.416283315204652e-06, + "loss": 17.4195, + "step": 26767 + }, + { + "epoch": 0.4892975304805601, + "grad_norm": 6.222490785112991, + "learning_rate": 5.415988330648785e-06, + "loss": 17.5351, + "step": 26768 + }, + { + "epoch": 0.4893158096770066, + "grad_norm": 6.5235654266904755, + "learning_rate": 5.415693344634916e-06, + "loss": 17.4953, + "step": 26769 + }, + { + "epoch": 0.48933408887345314, + "grad_norm": 7.537106258040323, + "learning_rate": 5.415398357164078e-06, + "loss": 17.7087, + "step": 26770 + }, + { + "epoch": 0.4893523680698996, + "grad_norm": 6.006885768011372, + "learning_rate": 5.4151033682373035e-06, + "loss": 17.4608, + "step": 26771 + }, + { + "epoch": 0.48937064726634616, + "grad_norm": 5.3133142659177635, + "learning_rate": 5.414808377855626e-06, + "loss": 17.1791, + "step": 26772 + }, + { + "epoch": 0.4893889264627927, + "grad_norm": 6.612993465288934, + "learning_rate": 5.414513386020084e-06, + "loss": 18.1677, + "step": 26773 + }, + { + "epoch": 0.48940720565923923, + "grad_norm": 6.834958480775478, + "learning_rate": 5.414218392731708e-06, + "loss": 17.8332, + "step": 26774 + }, + { + "epoch": 0.48942548485568577, + "grad_norm": 6.685691737836291, + "learning_rate": 5.413923397991532e-06, + "loss": 17.0545, + "step": 26775 + }, + { + "epoch": 0.48944376405213225, + "grad_norm": 6.097640583030748, + "learning_rate": 5.413628401800591e-06, + "loss": 17.3799, + "step": 26776 + }, + { + "epoch": 0.4894620432485788, + "grad_norm": 6.762595586009303, + "learning_rate": 5.413333404159917e-06, + "loss": 17.6671, + "step": 26777 + }, + { + "epoch": 0.4894803224450253, + "grad_norm": 5.749003583807096, + "learning_rate": 5.413038405070547e-06, + "loss": 17.1119, + "step": 26778 + }, + { + "epoch": 0.48949860164147185, + "grad_norm": 5.646286438711893, + "learning_rate": 5.412743404533512e-06, + "loss": 17.493, + "step": 26779 + }, + { + "epoch": 0.4895168808379184, + "grad_norm": 7.391000562632388, + "learning_rate": 5.412448402549848e-06, + "loss": 17.8051, + "step": 26780 + }, + { + "epoch": 0.48953516003436487, + "grad_norm": 5.408967309528288, + "learning_rate": 5.4121533991205875e-06, + "loss": 17.2397, + "step": 26781 + }, + { + "epoch": 0.4895534392308114, + "grad_norm": 6.601831321266045, + "learning_rate": 5.411858394246765e-06, + "loss": 17.4203, + "step": 26782 + }, + { + "epoch": 0.48957171842725794, + "grad_norm": 5.480369766172453, + "learning_rate": 5.411563387929415e-06, + "loss": 17.2981, + "step": 26783 + }, + { + "epoch": 0.4895899976237045, + "grad_norm": 6.8605239151113695, + "learning_rate": 5.411268380169572e-06, + "loss": 17.771, + "step": 26784 + }, + { + "epoch": 0.489608276820151, + "grad_norm": 6.873494468844328, + "learning_rate": 5.410973370968268e-06, + "loss": 17.6408, + "step": 26785 + }, + { + "epoch": 0.4896265560165975, + "grad_norm": 5.889293413290619, + "learning_rate": 5.410678360326537e-06, + "loss": 17.2507, + "step": 26786 + }, + { + "epoch": 0.489644835213044, + "grad_norm": 5.892544846947069, + "learning_rate": 5.410383348245416e-06, + "loss": 17.0728, + "step": 26787 + }, + { + "epoch": 0.48966311440949056, + "grad_norm": 7.835531562358622, + "learning_rate": 5.4100883347259355e-06, + "loss": 18.115, + "step": 26788 + }, + { + "epoch": 0.4896813936059371, + "grad_norm": 6.599834990120285, + "learning_rate": 5.4097933197691325e-06, + "loss": 17.5977, + "step": 26789 + }, + { + "epoch": 0.48969967280238363, + "grad_norm": 5.751483476441311, + "learning_rate": 5.409498303376038e-06, + "loss": 17.186, + "step": 26790 + }, + { + "epoch": 0.4897179519988301, + "grad_norm": 5.943528801227008, + "learning_rate": 5.409203285547687e-06, + "loss": 17.4003, + "step": 26791 + }, + { + "epoch": 0.48973623119527665, + "grad_norm": 6.631362999406427, + "learning_rate": 5.408908266285116e-06, + "loss": 17.3434, + "step": 26792 + }, + { + "epoch": 0.4897545103917232, + "grad_norm": 8.261633001616998, + "learning_rate": 5.408613245589354e-06, + "loss": 18.0013, + "step": 26793 + }, + { + "epoch": 0.4897727895881697, + "grad_norm": 9.316514163925088, + "learning_rate": 5.408318223461441e-06, + "loss": 18.3173, + "step": 26794 + }, + { + "epoch": 0.4897910687846162, + "grad_norm": 5.632237732045882, + "learning_rate": 5.408023199902407e-06, + "loss": 17.2808, + "step": 26795 + }, + { + "epoch": 0.48980934798106274, + "grad_norm": 7.090079701011441, + "learning_rate": 5.407728174913287e-06, + "loss": 17.7669, + "step": 26796 + }, + { + "epoch": 0.4898276271775093, + "grad_norm": 7.082274922460304, + "learning_rate": 5.407433148495115e-06, + "loss": 17.6108, + "step": 26797 + }, + { + "epoch": 0.4898459063739558, + "grad_norm": 5.921376319212949, + "learning_rate": 5.407138120648926e-06, + "loss": 17.4497, + "step": 26798 + }, + { + "epoch": 0.48986418557040234, + "grad_norm": 5.088669060802675, + "learning_rate": 5.406843091375752e-06, + "loss": 17.1594, + "step": 26799 + }, + { + "epoch": 0.4898824647668488, + "grad_norm": 9.112998331090383, + "learning_rate": 5.406548060676629e-06, + "loss": 18.656, + "step": 26800 + }, + { + "epoch": 0.48990074396329536, + "grad_norm": 6.64795311345859, + "learning_rate": 5.40625302855259e-06, + "loss": 17.4863, + "step": 26801 + }, + { + "epoch": 0.4899190231597419, + "grad_norm": 6.893498065757439, + "learning_rate": 5.40595799500467e-06, + "loss": 17.5541, + "step": 26802 + }, + { + "epoch": 0.48993730235618843, + "grad_norm": 8.16370549613025, + "learning_rate": 5.405662960033902e-06, + "loss": 18.6281, + "step": 26803 + }, + { + "epoch": 0.48995558155263497, + "grad_norm": 6.470659436684596, + "learning_rate": 5.405367923641319e-06, + "loss": 17.524, + "step": 26804 + }, + { + "epoch": 0.48997386074908145, + "grad_norm": 6.170261979075298, + "learning_rate": 5.40507288582796e-06, + "loss": 17.555, + "step": 26805 + }, + { + "epoch": 0.489992139945528, + "grad_norm": 4.9576388587498315, + "learning_rate": 5.404777846594853e-06, + "loss": 16.8701, + "step": 26806 + }, + { + "epoch": 0.4900104191419745, + "grad_norm": 6.438231315488346, + "learning_rate": 5.4044828059430355e-06, + "loss": 17.6076, + "step": 26807 + }, + { + "epoch": 0.49002869833842105, + "grad_norm": 7.5781883995260255, + "learning_rate": 5.4041877638735405e-06, + "loss": 17.563, + "step": 26808 + }, + { + "epoch": 0.4900469775348676, + "grad_norm": 6.578265249848708, + "learning_rate": 5.403892720387404e-06, + "loss": 17.3995, + "step": 26809 + }, + { + "epoch": 0.49006525673131407, + "grad_norm": 5.285911452760043, + "learning_rate": 5.403597675485657e-06, + "loss": 17.0243, + "step": 26810 + }, + { + "epoch": 0.4900835359277606, + "grad_norm": 5.795312727202839, + "learning_rate": 5.403302629169336e-06, + "loss": 17.2022, + "step": 26811 + }, + { + "epoch": 0.49010181512420714, + "grad_norm": 9.374189870011783, + "learning_rate": 5.403007581439475e-06, + "loss": 17.405, + "step": 26812 + }, + { + "epoch": 0.4901200943206537, + "grad_norm": 7.492876513342417, + "learning_rate": 5.4027125322971045e-06, + "loss": 18.029, + "step": 26813 + }, + { + "epoch": 0.4901383735171002, + "grad_norm": 5.4262779633071165, + "learning_rate": 5.4024174817432624e-06, + "loss": 17.2035, + "step": 26814 + }, + { + "epoch": 0.4901566527135467, + "grad_norm": 6.46063539353455, + "learning_rate": 5.402122429778985e-06, + "loss": 17.7318, + "step": 26815 + }, + { + "epoch": 0.49017493190999323, + "grad_norm": 6.131022203123187, + "learning_rate": 5.401827376405301e-06, + "loss": 17.5596, + "step": 26816 + }, + { + "epoch": 0.49019321110643976, + "grad_norm": 6.43573483444271, + "learning_rate": 5.401532321623247e-06, + "loss": 17.4997, + "step": 26817 + }, + { + "epoch": 0.4902114903028863, + "grad_norm": 6.267315419116112, + "learning_rate": 5.401237265433857e-06, + "loss": 17.2701, + "step": 26818 + }, + { + "epoch": 0.49022976949933283, + "grad_norm": 8.349651440043182, + "learning_rate": 5.400942207838166e-06, + "loss": 18.0184, + "step": 26819 + }, + { + "epoch": 0.4902480486957793, + "grad_norm": 5.849351097855939, + "learning_rate": 5.400647148837207e-06, + "loss": 17.2519, + "step": 26820 + }, + { + "epoch": 0.49026632789222585, + "grad_norm": 5.557082938829403, + "learning_rate": 5.400352088432014e-06, + "loss": 17.2449, + "step": 26821 + }, + { + "epoch": 0.4902846070886724, + "grad_norm": 7.659280914258292, + "learning_rate": 5.400057026623622e-06, + "loss": 17.6655, + "step": 26822 + }, + { + "epoch": 0.4903028862851189, + "grad_norm": 6.293562110702888, + "learning_rate": 5.399761963413065e-06, + "loss": 17.3693, + "step": 26823 + }, + { + "epoch": 0.49032116548156546, + "grad_norm": 5.320283229261041, + "learning_rate": 5.399466898801377e-06, + "loss": 17.1344, + "step": 26824 + }, + { + "epoch": 0.49033944467801194, + "grad_norm": 5.3788484838259, + "learning_rate": 5.3991718327895925e-06, + "loss": 17.0159, + "step": 26825 + }, + { + "epoch": 0.4903577238744585, + "grad_norm": 7.665377172028155, + "learning_rate": 5.398876765378744e-06, + "loss": 18.0574, + "step": 26826 + }, + { + "epoch": 0.490376003070905, + "grad_norm": 6.049932660681751, + "learning_rate": 5.398581696569868e-06, + "loss": 17.2714, + "step": 26827 + }, + { + "epoch": 0.49039428226735154, + "grad_norm": 6.2361116173369044, + "learning_rate": 5.398286626363996e-06, + "loss": 17.4638, + "step": 26828 + }, + { + "epoch": 0.490412561463798, + "grad_norm": 6.112522394228724, + "learning_rate": 5.397991554762167e-06, + "loss": 17.3001, + "step": 26829 + }, + { + "epoch": 0.49043084066024456, + "grad_norm": 5.640500088782503, + "learning_rate": 5.3976964817654106e-06, + "loss": 17.0323, + "step": 26830 + }, + { + "epoch": 0.4904491198566911, + "grad_norm": 7.223058265261146, + "learning_rate": 5.397401407374763e-06, + "loss": 17.7531, + "step": 26831 + }, + { + "epoch": 0.49046739905313763, + "grad_norm": 10.137905839670566, + "learning_rate": 5.3971063315912575e-06, + "loss": 17.808, + "step": 26832 + }, + { + "epoch": 0.49048567824958417, + "grad_norm": 5.570050306643148, + "learning_rate": 5.396811254415929e-06, + "loss": 17.0955, + "step": 26833 + }, + { + "epoch": 0.49050395744603065, + "grad_norm": 6.471424733456603, + "learning_rate": 5.396516175849812e-06, + "loss": 17.4727, + "step": 26834 + }, + { + "epoch": 0.4905222366424772, + "grad_norm": 7.28869555481653, + "learning_rate": 5.396221095893938e-06, + "loss": 18.055, + "step": 26835 + }, + { + "epoch": 0.4905405158389237, + "grad_norm": 6.113599979603523, + "learning_rate": 5.395926014549347e-06, + "loss": 17.2481, + "step": 26836 + }, + { + "epoch": 0.49055879503537025, + "grad_norm": 6.477612647925058, + "learning_rate": 5.395630931817066e-06, + "loss": 17.579, + "step": 26837 + }, + { + "epoch": 0.4905770742318168, + "grad_norm": 5.570082626655907, + "learning_rate": 5.3953358476981355e-06, + "loss": 17.0483, + "step": 26838 + }, + { + "epoch": 0.49059535342826327, + "grad_norm": 6.5939035359793685, + "learning_rate": 5.395040762193587e-06, + "loss": 17.5565, + "step": 26839 + }, + { + "epoch": 0.4906136326247098, + "grad_norm": 5.889521432332619, + "learning_rate": 5.394745675304453e-06, + "loss": 17.3782, + "step": 26840 + }, + { + "epoch": 0.49063191182115634, + "grad_norm": 5.810556861786461, + "learning_rate": 5.394450587031771e-06, + "loss": 17.1271, + "step": 26841 + }, + { + "epoch": 0.4906501910176029, + "grad_norm": 7.603444045410603, + "learning_rate": 5.3941554973765745e-06, + "loss": 18.1476, + "step": 26842 + }, + { + "epoch": 0.4906684702140494, + "grad_norm": 8.524801879786166, + "learning_rate": 5.393860406339897e-06, + "loss": 18.5666, + "step": 26843 + }, + { + "epoch": 0.4906867494104959, + "grad_norm": 6.091587868190396, + "learning_rate": 5.3935653139227714e-06, + "loss": 17.3453, + "step": 26844 + }, + { + "epoch": 0.49070502860694243, + "grad_norm": 5.373096735902344, + "learning_rate": 5.393270220126235e-06, + "loss": 17.1939, + "step": 26845 + }, + { + "epoch": 0.49072330780338896, + "grad_norm": 6.959521133381509, + "learning_rate": 5.39297512495132e-06, + "loss": 17.8511, + "step": 26846 + }, + { + "epoch": 0.4907415869998355, + "grad_norm": 5.851506965114323, + "learning_rate": 5.392680028399062e-06, + "loss": 17.2155, + "step": 26847 + }, + { + "epoch": 0.49075986619628204, + "grad_norm": 7.532761371224967, + "learning_rate": 5.392384930470493e-06, + "loss": 17.6946, + "step": 26848 + }, + { + "epoch": 0.4907781453927285, + "grad_norm": 6.0838953034197365, + "learning_rate": 5.39208983116665e-06, + "loss": 17.4837, + "step": 26849 + }, + { + "epoch": 0.49079642458917505, + "grad_norm": 7.050171855548302, + "learning_rate": 5.391794730488567e-06, + "loss": 18.0847, + "step": 26850 + }, + { + "epoch": 0.4908147037856216, + "grad_norm": 5.740280096755192, + "learning_rate": 5.3914996284372756e-06, + "loss": 17.4159, + "step": 26851 + }, + { + "epoch": 0.4908329829820681, + "grad_norm": 7.279768457602669, + "learning_rate": 5.391204525013814e-06, + "loss": 17.7201, + "step": 26852 + }, + { + "epoch": 0.49085126217851466, + "grad_norm": 6.894222463107142, + "learning_rate": 5.390909420219213e-06, + "loss": 17.1891, + "step": 26853 + }, + { + "epoch": 0.49086954137496114, + "grad_norm": 5.797836351655732, + "learning_rate": 5.390614314054509e-06, + "loss": 17.4896, + "step": 26854 + }, + { + "epoch": 0.4908878205714077, + "grad_norm": 6.95220083501087, + "learning_rate": 5.390319206520734e-06, + "loss": 17.8335, + "step": 26855 + }, + { + "epoch": 0.4909060997678542, + "grad_norm": 6.383801639281826, + "learning_rate": 5.3900240976189275e-06, + "loss": 17.5907, + "step": 26856 + }, + { + "epoch": 0.49092437896430074, + "grad_norm": 6.0585700664970075, + "learning_rate": 5.389728987350118e-06, + "loss": 17.3967, + "step": 26857 + }, + { + "epoch": 0.4909426581607473, + "grad_norm": 5.4990854133711515, + "learning_rate": 5.389433875715343e-06, + "loss": 17.2939, + "step": 26858 + }, + { + "epoch": 0.49096093735719376, + "grad_norm": 6.031399634538206, + "learning_rate": 5.389138762715634e-06, + "loss": 17.4824, + "step": 26859 + }, + { + "epoch": 0.4909792165536403, + "grad_norm": 5.960467649815992, + "learning_rate": 5.38884364835203e-06, + "loss": 17.5735, + "step": 26860 + }, + { + "epoch": 0.49099749575008683, + "grad_norm": 6.363918885303285, + "learning_rate": 5.388548532625562e-06, + "loss": 17.4231, + "step": 26861 + }, + { + "epoch": 0.49101577494653337, + "grad_norm": 6.799854083715129, + "learning_rate": 5.388253415537266e-06, + "loss": 17.46, + "step": 26862 + }, + { + "epoch": 0.49103405414297985, + "grad_norm": 5.419690651476267, + "learning_rate": 5.387958297088174e-06, + "loss": 17.1368, + "step": 26863 + }, + { + "epoch": 0.4910523333394264, + "grad_norm": 5.6065718145011365, + "learning_rate": 5.387663177279322e-06, + "loss": 17.1557, + "step": 26864 + }, + { + "epoch": 0.4910706125358729, + "grad_norm": 7.392491276085907, + "learning_rate": 5.3873680561117444e-06, + "loss": 18.1115, + "step": 26865 + }, + { + "epoch": 0.49108889173231945, + "grad_norm": 5.540634482165595, + "learning_rate": 5.387072933586476e-06, + "loss": 17.0831, + "step": 26866 + }, + { + "epoch": 0.491107170928766, + "grad_norm": 6.49316476778211, + "learning_rate": 5.386777809704549e-06, + "loss": 17.6583, + "step": 26867 + }, + { + "epoch": 0.49112545012521247, + "grad_norm": 5.272607120352341, + "learning_rate": 5.386482684467e-06, + "loss": 17.3372, + "step": 26868 + }, + { + "epoch": 0.491143729321659, + "grad_norm": 7.280203647564744, + "learning_rate": 5.386187557874864e-06, + "loss": 17.0905, + "step": 26869 + }, + { + "epoch": 0.49116200851810554, + "grad_norm": 6.901638087373839, + "learning_rate": 5.385892429929174e-06, + "loss": 17.4576, + "step": 26870 + }, + { + "epoch": 0.4911802877145521, + "grad_norm": 6.4241817384312006, + "learning_rate": 5.385597300630964e-06, + "loss": 17.5416, + "step": 26871 + }, + { + "epoch": 0.4911985669109986, + "grad_norm": 7.21240344430058, + "learning_rate": 5.385302169981267e-06, + "loss": 18.199, + "step": 26872 + }, + { + "epoch": 0.4912168461074451, + "grad_norm": 6.461777851863109, + "learning_rate": 5.385007037981122e-06, + "loss": 17.4993, + "step": 26873 + }, + { + "epoch": 0.49123512530389163, + "grad_norm": 6.32818213034626, + "learning_rate": 5.38471190463156e-06, + "loss": 17.6206, + "step": 26874 + }, + { + "epoch": 0.49125340450033816, + "grad_norm": 6.541881916707459, + "learning_rate": 5.384416769933616e-06, + "loss": 17.6468, + "step": 26875 + }, + { + "epoch": 0.4912716836967847, + "grad_norm": 5.788408433951197, + "learning_rate": 5.384121633888324e-06, + "loss": 17.363, + "step": 26876 + }, + { + "epoch": 0.49128996289323124, + "grad_norm": 7.48705751623344, + "learning_rate": 5.38382649649672e-06, + "loss": 17.7163, + "step": 26877 + }, + { + "epoch": 0.4913082420896777, + "grad_norm": 6.054534722697527, + "learning_rate": 5.383531357759837e-06, + "loss": 17.2928, + "step": 26878 + }, + { + "epoch": 0.49132652128612425, + "grad_norm": 5.686697007101629, + "learning_rate": 5.383236217678709e-06, + "loss": 16.9443, + "step": 26879 + }, + { + "epoch": 0.4913448004825708, + "grad_norm": 5.154853867221452, + "learning_rate": 5.382941076254372e-06, + "loss": 17.0009, + "step": 26880 + }, + { + "epoch": 0.4913630796790173, + "grad_norm": 9.232364566283492, + "learning_rate": 5.38264593348786e-06, + "loss": 18.3791, + "step": 26881 + }, + { + "epoch": 0.49138135887546386, + "grad_norm": 5.6116698874649895, + "learning_rate": 5.382350789380207e-06, + "loss": 17.5436, + "step": 26882 + }, + { + "epoch": 0.49139963807191034, + "grad_norm": 5.797979642885264, + "learning_rate": 5.3820556439324476e-06, + "loss": 17.2975, + "step": 26883 + }, + { + "epoch": 0.4914179172683569, + "grad_norm": 6.644032793815194, + "learning_rate": 5.381760497145617e-06, + "loss": 17.4729, + "step": 26884 + }, + { + "epoch": 0.4914361964648034, + "grad_norm": 6.307802392799059, + "learning_rate": 5.381465349020749e-06, + "loss": 17.5992, + "step": 26885 + }, + { + "epoch": 0.49145447566124995, + "grad_norm": 6.2488141686992265, + "learning_rate": 5.381170199558876e-06, + "loss": 17.6261, + "step": 26886 + }, + { + "epoch": 0.4914727548576965, + "grad_norm": 6.195354067715704, + "learning_rate": 5.380875048761038e-06, + "loss": 17.6299, + "step": 26887 + }, + { + "epoch": 0.49149103405414296, + "grad_norm": 7.809291432483063, + "learning_rate": 5.380579896628263e-06, + "loss": 18.0773, + "step": 26888 + }, + { + "epoch": 0.4915093132505895, + "grad_norm": 7.261728012854263, + "learning_rate": 5.38028474316159e-06, + "loss": 18.1113, + "step": 26889 + }, + { + "epoch": 0.49152759244703603, + "grad_norm": 6.174152541252177, + "learning_rate": 5.379989588362052e-06, + "loss": 17.3604, + "step": 26890 + }, + { + "epoch": 0.49154587164348257, + "grad_norm": 7.053717253465543, + "learning_rate": 5.379694432230682e-06, + "loss": 17.4238, + "step": 26891 + }, + { + "epoch": 0.4915641508399291, + "grad_norm": 6.163030691150242, + "learning_rate": 5.379399274768517e-06, + "loss": 17.7023, + "step": 26892 + }, + { + "epoch": 0.4915824300363756, + "grad_norm": 7.085142398701097, + "learning_rate": 5.37910411597659e-06, + "loss": 18.0284, + "step": 26893 + }, + { + "epoch": 0.4916007092328221, + "grad_norm": 7.466436844686998, + "learning_rate": 5.378808955855937e-06, + "loss": 18.1061, + "step": 26894 + }, + { + "epoch": 0.49161898842926866, + "grad_norm": 6.054243917307078, + "learning_rate": 5.378513794407591e-06, + "loss": 17.3603, + "step": 26895 + }, + { + "epoch": 0.4916372676257152, + "grad_norm": 6.703002079725329, + "learning_rate": 5.378218631632585e-06, + "loss": 17.7708, + "step": 26896 + }, + { + "epoch": 0.49165554682216167, + "grad_norm": 7.644869572406129, + "learning_rate": 5.377923467531958e-06, + "loss": 18.1343, + "step": 26897 + }, + { + "epoch": 0.4916738260186082, + "grad_norm": 6.135790011801075, + "learning_rate": 5.3776283021067435e-06, + "loss": 17.4194, + "step": 26898 + }, + { + "epoch": 0.49169210521505474, + "grad_norm": 5.248274801099542, + "learning_rate": 5.37733313535797e-06, + "loss": 16.8469, + "step": 26899 + }, + { + "epoch": 0.4917103844115013, + "grad_norm": 5.1752921146963855, + "learning_rate": 5.3770379672866805e-06, + "loss": 16.9588, + "step": 26900 + }, + { + "epoch": 0.4917286636079478, + "grad_norm": 6.878734534908685, + "learning_rate": 5.376742797893905e-06, + "loss": 17.795, + "step": 26901 + }, + { + "epoch": 0.4917469428043943, + "grad_norm": 6.765103844627631, + "learning_rate": 5.376447627180677e-06, + "loss": 17.5713, + "step": 26902 + }, + { + "epoch": 0.49176522200084083, + "grad_norm": 7.373925014002523, + "learning_rate": 5.376152455148034e-06, + "loss": 17.9319, + "step": 26903 + }, + { + "epoch": 0.49178350119728736, + "grad_norm": 6.974871017734401, + "learning_rate": 5.375857281797008e-06, + "loss": 17.7886, + "step": 26904 + }, + { + "epoch": 0.4918017803937339, + "grad_norm": 7.047464959961935, + "learning_rate": 5.375562107128636e-06, + "loss": 17.8694, + "step": 26905 + }, + { + "epoch": 0.49182005959018044, + "grad_norm": 6.203225308572396, + "learning_rate": 5.375266931143951e-06, + "loss": 17.5135, + "step": 26906 + }, + { + "epoch": 0.4918383387866269, + "grad_norm": 5.6146931985286495, + "learning_rate": 5.374971753843988e-06, + "loss": 17.0993, + "step": 26907 + }, + { + "epoch": 0.49185661798307345, + "grad_norm": 5.788173869878248, + "learning_rate": 5.374676575229782e-06, + "loss": 17.3109, + "step": 26908 + }, + { + "epoch": 0.49187489717952, + "grad_norm": 6.580879383156743, + "learning_rate": 5.374381395302367e-06, + "loss": 17.4365, + "step": 26909 + }, + { + "epoch": 0.4918931763759665, + "grad_norm": 5.662605690686466, + "learning_rate": 5.374086214062777e-06, + "loss": 17.2884, + "step": 26910 + }, + { + "epoch": 0.49191145557241306, + "grad_norm": 7.698035315986826, + "learning_rate": 5.373791031512048e-06, + "loss": 18.2263, + "step": 26911 + }, + { + "epoch": 0.49192973476885954, + "grad_norm": 6.156024129877999, + "learning_rate": 5.373495847651214e-06, + "loss": 17.47, + "step": 26912 + }, + { + "epoch": 0.4919480139653061, + "grad_norm": 6.802171153383905, + "learning_rate": 5.373200662481308e-06, + "loss": 17.6954, + "step": 26913 + }, + { + "epoch": 0.4919662931617526, + "grad_norm": 6.636026493710453, + "learning_rate": 5.372905476003368e-06, + "loss": 17.6593, + "step": 26914 + }, + { + "epoch": 0.49198457235819915, + "grad_norm": 6.484582377149246, + "learning_rate": 5.372610288218426e-06, + "loss": 17.4478, + "step": 26915 + }, + { + "epoch": 0.4920028515546457, + "grad_norm": 6.15595223056846, + "learning_rate": 5.3723150991275175e-06, + "loss": 17.363, + "step": 26916 + }, + { + "epoch": 0.49202113075109216, + "grad_norm": 7.695587024505982, + "learning_rate": 5.372019908731676e-06, + "loss": 18.4037, + "step": 26917 + }, + { + "epoch": 0.4920394099475387, + "grad_norm": 6.798574774437424, + "learning_rate": 5.371724717031938e-06, + "loss": 17.4892, + "step": 26918 + }, + { + "epoch": 0.49205768914398523, + "grad_norm": 6.815633024814039, + "learning_rate": 5.371429524029337e-06, + "loss": 17.726, + "step": 26919 + }, + { + "epoch": 0.49207596834043177, + "grad_norm": 6.759571646412667, + "learning_rate": 5.3711343297249065e-06, + "loss": 17.4205, + "step": 26920 + }, + { + "epoch": 0.4920942475368783, + "grad_norm": 7.2400862585694945, + "learning_rate": 5.370839134119683e-06, + "loss": 17.8043, + "step": 26921 + }, + { + "epoch": 0.4921125267333248, + "grad_norm": 7.084659912512047, + "learning_rate": 5.370543937214701e-06, + "loss": 17.9527, + "step": 26922 + }, + { + "epoch": 0.4921308059297713, + "grad_norm": 6.862495073162513, + "learning_rate": 5.370248739010995e-06, + "loss": 17.8831, + "step": 26923 + }, + { + "epoch": 0.49214908512621786, + "grad_norm": 5.968072786265508, + "learning_rate": 5.369953539509598e-06, + "loss": 17.5994, + "step": 26924 + }, + { + "epoch": 0.4921673643226644, + "grad_norm": 6.376872240610247, + "learning_rate": 5.369658338711547e-06, + "loss": 17.3389, + "step": 26925 + }, + { + "epoch": 0.4921856435191109, + "grad_norm": 6.258884787702053, + "learning_rate": 5.369363136617875e-06, + "loss": 17.4756, + "step": 26926 + }, + { + "epoch": 0.4922039227155574, + "grad_norm": 5.857006205686026, + "learning_rate": 5.369067933229617e-06, + "loss": 17.5055, + "step": 26927 + }, + { + "epoch": 0.49222220191200394, + "grad_norm": 6.134324532429734, + "learning_rate": 5.368772728547809e-06, + "loss": 17.4956, + "step": 26928 + }, + { + "epoch": 0.4922404811084505, + "grad_norm": 6.484942194754365, + "learning_rate": 5.368477522573484e-06, + "loss": 17.6194, + "step": 26929 + }, + { + "epoch": 0.492258760304897, + "grad_norm": 5.237759670925472, + "learning_rate": 5.368182315307677e-06, + "loss": 16.8716, + "step": 26930 + }, + { + "epoch": 0.4922770395013435, + "grad_norm": 6.164913411336027, + "learning_rate": 5.367887106751423e-06, + "loss": 17.584, + "step": 26931 + }, + { + "epoch": 0.49229531869779003, + "grad_norm": 4.863338566559338, + "learning_rate": 5.367591896905757e-06, + "loss": 16.9111, + "step": 26932 + }, + { + "epoch": 0.49231359789423657, + "grad_norm": 6.055456709587119, + "learning_rate": 5.367296685771713e-06, + "loss": 17.3858, + "step": 26933 + }, + { + "epoch": 0.4923318770906831, + "grad_norm": 4.91780885037892, + "learning_rate": 5.367001473350327e-06, + "loss": 16.8793, + "step": 26934 + }, + { + "epoch": 0.49235015628712964, + "grad_norm": 7.4618920387291565, + "learning_rate": 5.366706259642631e-06, + "loss": 17.7985, + "step": 26935 + }, + { + "epoch": 0.4923684354835761, + "grad_norm": 6.8229652616455425, + "learning_rate": 5.366411044649663e-06, + "loss": 17.5554, + "step": 26936 + }, + { + "epoch": 0.49238671468002265, + "grad_norm": 6.244515700441109, + "learning_rate": 5.366115828372454e-06, + "loss": 17.2898, + "step": 26937 + }, + { + "epoch": 0.4924049938764692, + "grad_norm": 5.985505017798252, + "learning_rate": 5.365820610812042e-06, + "loss": 17.4043, + "step": 26938 + }, + { + "epoch": 0.4924232730729157, + "grad_norm": 5.535320221329392, + "learning_rate": 5.365525391969463e-06, + "loss": 17.3785, + "step": 26939 + }, + { + "epoch": 0.49244155226936226, + "grad_norm": 7.899752924980689, + "learning_rate": 5.365230171845746e-06, + "loss": 18.0108, + "step": 26940 + }, + { + "epoch": 0.49245983146580874, + "grad_norm": 6.514524779167289, + "learning_rate": 5.364934950441929e-06, + "loss": 17.4425, + "step": 26941 + }, + { + "epoch": 0.4924781106622553, + "grad_norm": 6.0603258145956, + "learning_rate": 5.364639727759047e-06, + "loss": 17.1233, + "step": 26942 + }, + { + "epoch": 0.4924963898587018, + "grad_norm": 6.512603450804955, + "learning_rate": 5.364344503798136e-06, + "loss": 17.5404, + "step": 26943 + }, + { + "epoch": 0.49251466905514835, + "grad_norm": 8.761079644741475, + "learning_rate": 5.3640492785602285e-06, + "loss": 17.5778, + "step": 26944 + }, + { + "epoch": 0.4925329482515949, + "grad_norm": 6.417406889031707, + "learning_rate": 5.363754052046357e-06, + "loss": 17.3514, + "step": 26945 + }, + { + "epoch": 0.49255122744804136, + "grad_norm": 5.894893632438456, + "learning_rate": 5.3634588242575625e-06, + "loss": 17.3609, + "step": 26946 + }, + { + "epoch": 0.4925695066444879, + "grad_norm": 6.449317455126374, + "learning_rate": 5.363163595194876e-06, + "loss": 17.5889, + "step": 26947 + }, + { + "epoch": 0.49258778584093443, + "grad_norm": 7.35069890110503, + "learning_rate": 5.362868364859332e-06, + "loss": 17.8098, + "step": 26948 + }, + { + "epoch": 0.49260606503738097, + "grad_norm": 7.126230057637257, + "learning_rate": 5.362573133251965e-06, + "loss": 17.7459, + "step": 26949 + }, + { + "epoch": 0.4926243442338275, + "grad_norm": 6.235345778518111, + "learning_rate": 5.362277900373811e-06, + "loss": 17.3931, + "step": 26950 + }, + { + "epoch": 0.492642623430274, + "grad_norm": 6.933436583356739, + "learning_rate": 5.361982666225905e-06, + "loss": 17.5829, + "step": 26951 + }, + { + "epoch": 0.4926609026267205, + "grad_norm": 7.389122953462225, + "learning_rate": 5.361687430809282e-06, + "loss": 18.1202, + "step": 26952 + }, + { + "epoch": 0.49267918182316706, + "grad_norm": 7.1692490912883144, + "learning_rate": 5.361392194124973e-06, + "loss": 17.9091, + "step": 26953 + }, + { + "epoch": 0.4926974610196136, + "grad_norm": 8.591531879847942, + "learning_rate": 5.361096956174018e-06, + "loss": 17.9691, + "step": 26954 + }, + { + "epoch": 0.4927157402160601, + "grad_norm": 6.527397017102309, + "learning_rate": 5.360801716957449e-06, + "loss": 17.7767, + "step": 26955 + }, + { + "epoch": 0.4927340194125066, + "grad_norm": 6.639954028183422, + "learning_rate": 5.360506476476303e-06, + "loss": 17.6142, + "step": 26956 + }, + { + "epoch": 0.49275229860895314, + "grad_norm": 6.465569336463757, + "learning_rate": 5.360211234731612e-06, + "loss": 17.6567, + "step": 26957 + }, + { + "epoch": 0.4927705778053997, + "grad_norm": 7.161281066801649, + "learning_rate": 5.359915991724409e-06, + "loss": 18.3439, + "step": 26958 + }, + { + "epoch": 0.4927888570018462, + "grad_norm": 5.810592162886852, + "learning_rate": 5.359620747455734e-06, + "loss": 17.2661, + "step": 26959 + }, + { + "epoch": 0.49280713619829275, + "grad_norm": 6.639986391388215, + "learning_rate": 5.359325501926621e-06, + "loss": 17.945, + "step": 26960 + }, + { + "epoch": 0.49282541539473923, + "grad_norm": 5.685433823811523, + "learning_rate": 5.359030255138101e-06, + "loss": 17.2936, + "step": 26961 + }, + { + "epoch": 0.49284369459118577, + "grad_norm": 5.729852340163938, + "learning_rate": 5.358735007091212e-06, + "loss": 17.2307, + "step": 26962 + }, + { + "epoch": 0.4928619737876323, + "grad_norm": 6.187144348372903, + "learning_rate": 5.358439757786989e-06, + "loss": 17.2237, + "step": 26963 + }, + { + "epoch": 0.49288025298407884, + "grad_norm": 4.332550610332634, + "learning_rate": 5.358144507226464e-06, + "loss": 16.6847, + "step": 26964 + }, + { + "epoch": 0.4928985321805253, + "grad_norm": 4.863894359925461, + "learning_rate": 5.357849255410674e-06, + "loss": 16.7782, + "step": 26965 + }, + { + "epoch": 0.49291681137697185, + "grad_norm": 6.182878907863092, + "learning_rate": 5.357554002340654e-06, + "loss": 17.2337, + "step": 26966 + }, + { + "epoch": 0.4929350905734184, + "grad_norm": 5.526272701074921, + "learning_rate": 5.357258748017437e-06, + "loss": 17.3214, + "step": 26967 + }, + { + "epoch": 0.4929533697698649, + "grad_norm": 7.613455418716324, + "learning_rate": 5.3569634924420595e-06, + "loss": 18.3149, + "step": 26968 + }, + { + "epoch": 0.49297164896631146, + "grad_norm": 6.67763283935045, + "learning_rate": 5.356668235615557e-06, + "loss": 17.8365, + "step": 26969 + }, + { + "epoch": 0.49298992816275794, + "grad_norm": 5.913927222305072, + "learning_rate": 5.356372977538964e-06, + "loss": 17.6074, + "step": 26970 + }, + { + "epoch": 0.4930082073592045, + "grad_norm": 8.135030482059443, + "learning_rate": 5.356077718213313e-06, + "loss": 18.0704, + "step": 26971 + }, + { + "epoch": 0.493026486555651, + "grad_norm": 5.8973378186105725, + "learning_rate": 5.35578245763964e-06, + "loss": 17.1145, + "step": 26972 + }, + { + "epoch": 0.49304476575209755, + "grad_norm": 14.303282257554455, + "learning_rate": 5.35548719581898e-06, + "loss": 17.8257, + "step": 26973 + }, + { + "epoch": 0.4930630449485441, + "grad_norm": 5.7261448376667765, + "learning_rate": 5.35519193275237e-06, + "loss": 17.0845, + "step": 26974 + }, + { + "epoch": 0.49308132414499056, + "grad_norm": 5.512857713231104, + "learning_rate": 5.354896668440843e-06, + "loss": 17.2451, + "step": 26975 + }, + { + "epoch": 0.4930996033414371, + "grad_norm": 5.209464948503143, + "learning_rate": 5.354601402885434e-06, + "loss": 16.8178, + "step": 26976 + }, + { + "epoch": 0.49311788253788363, + "grad_norm": 6.831800009675839, + "learning_rate": 5.3543061360871774e-06, + "loss": 17.8316, + "step": 26977 + }, + { + "epoch": 0.49313616173433017, + "grad_norm": 6.310170617315658, + "learning_rate": 5.3540108680471085e-06, + "loss": 17.6085, + "step": 26978 + }, + { + "epoch": 0.4931544409307767, + "grad_norm": 7.249291216101069, + "learning_rate": 5.353715598766262e-06, + "loss": 18.0408, + "step": 26979 + }, + { + "epoch": 0.4931727201272232, + "grad_norm": 5.835547523451604, + "learning_rate": 5.353420328245673e-06, + "loss": 17.244, + "step": 26980 + }, + { + "epoch": 0.4931909993236697, + "grad_norm": 6.747052172227247, + "learning_rate": 5.353125056486377e-06, + "loss": 17.5503, + "step": 26981 + }, + { + "epoch": 0.49320927852011626, + "grad_norm": 5.787707208088663, + "learning_rate": 5.352829783489407e-06, + "loss": 17.3794, + "step": 26982 + }, + { + "epoch": 0.4932275577165628, + "grad_norm": 7.077291862170868, + "learning_rate": 5.3525345092558e-06, + "loss": 17.6171, + "step": 26983 + }, + { + "epoch": 0.49324583691300933, + "grad_norm": 8.164761075820138, + "learning_rate": 5.352239233786592e-06, + "loss": 17.6073, + "step": 26984 + }, + { + "epoch": 0.4932641161094558, + "grad_norm": 7.161564110869427, + "learning_rate": 5.351943957082813e-06, + "loss": 17.7869, + "step": 26985 + }, + { + "epoch": 0.49328239530590234, + "grad_norm": 7.7619675751750625, + "learning_rate": 5.351648679145502e-06, + "loss": 18.0181, + "step": 26986 + }, + { + "epoch": 0.4933006745023489, + "grad_norm": 5.668140917595582, + "learning_rate": 5.351353399975694e-06, + "loss": 17.3054, + "step": 26987 + }, + { + "epoch": 0.4933189536987954, + "grad_norm": 6.3856548032983405, + "learning_rate": 5.351058119574423e-06, + "loss": 17.4187, + "step": 26988 + }, + { + "epoch": 0.49333723289524195, + "grad_norm": 5.526415424467501, + "learning_rate": 5.3507628379427225e-06, + "loss": 17.179, + "step": 26989 + }, + { + "epoch": 0.49335551209168843, + "grad_norm": 6.346497051201486, + "learning_rate": 5.3504675550816286e-06, + "loss": 17.575, + "step": 26990 + }, + { + "epoch": 0.49337379128813497, + "grad_norm": 6.980587862419724, + "learning_rate": 5.350172270992177e-06, + "loss": 17.946, + "step": 26991 + }, + { + "epoch": 0.4933920704845815, + "grad_norm": 7.803761180025331, + "learning_rate": 5.3498769856754e-06, + "loss": 17.7162, + "step": 26992 + }, + { + "epoch": 0.49341034968102804, + "grad_norm": 6.661552136135507, + "learning_rate": 5.349581699132337e-06, + "loss": 17.7463, + "step": 26993 + }, + { + "epoch": 0.4934286288774746, + "grad_norm": 5.6460867041461125, + "learning_rate": 5.34928641136402e-06, + "loss": 17.1434, + "step": 26994 + }, + { + "epoch": 0.49344690807392105, + "grad_norm": 6.079734936911856, + "learning_rate": 5.348991122371484e-06, + "loss": 17.5019, + "step": 26995 + }, + { + "epoch": 0.4934651872703676, + "grad_norm": 5.589034062581712, + "learning_rate": 5.3486958321557645e-06, + "loss": 17.1942, + "step": 26996 + }, + { + "epoch": 0.4934834664668141, + "grad_norm": 6.288894942471698, + "learning_rate": 5.348400540717896e-06, + "loss": 17.3222, + "step": 26997 + }, + { + "epoch": 0.49350174566326066, + "grad_norm": 5.079433526195749, + "learning_rate": 5.3481052480589154e-06, + "loss": 16.911, + "step": 26998 + }, + { + "epoch": 0.49352002485970714, + "grad_norm": 5.801648085214462, + "learning_rate": 5.347809954179853e-06, + "loss": 17.2667, + "step": 26999 + }, + { + "epoch": 0.4935383040561537, + "grad_norm": 6.783307260324162, + "learning_rate": 5.34751465908175e-06, + "loss": 17.6292, + "step": 27000 + }, + { + "epoch": 0.4935565832526002, + "grad_norm": 7.3165225948765995, + "learning_rate": 5.347219362765637e-06, + "loss": 17.8569, + "step": 27001 + }, + { + "epoch": 0.49357486244904675, + "grad_norm": 6.571744864750892, + "learning_rate": 5.34692406523255e-06, + "loss": 17.5595, + "step": 27002 + }, + { + "epoch": 0.4935931416454933, + "grad_norm": 6.7247155088644375, + "learning_rate": 5.3466287664835255e-06, + "loss": 17.9137, + "step": 27003 + }, + { + "epoch": 0.49361142084193976, + "grad_norm": 5.6204778914317535, + "learning_rate": 5.3463334665195954e-06, + "loss": 17.0609, + "step": 27004 + }, + { + "epoch": 0.4936297000383863, + "grad_norm": 6.321485899260976, + "learning_rate": 5.346038165341798e-06, + "loss": 17.4114, + "step": 27005 + }, + { + "epoch": 0.49364797923483283, + "grad_norm": 5.684925732587549, + "learning_rate": 5.3457428629511665e-06, + "loss": 17.2745, + "step": 27006 + }, + { + "epoch": 0.49366625843127937, + "grad_norm": 6.471030352199631, + "learning_rate": 5.345447559348736e-06, + "loss": 17.2311, + "step": 27007 + }, + { + "epoch": 0.4936845376277259, + "grad_norm": 6.386545510890274, + "learning_rate": 5.345152254535542e-06, + "loss": 17.4082, + "step": 27008 + }, + { + "epoch": 0.4937028168241724, + "grad_norm": 6.72417967649249, + "learning_rate": 5.344856948512619e-06, + "loss": 17.8282, + "step": 27009 + }, + { + "epoch": 0.4937210960206189, + "grad_norm": 6.224589284089164, + "learning_rate": 5.344561641281003e-06, + "loss": 17.2626, + "step": 27010 + }, + { + "epoch": 0.49373937521706546, + "grad_norm": 6.127257620504393, + "learning_rate": 5.344266332841728e-06, + "loss": 17.2126, + "step": 27011 + }, + { + "epoch": 0.493757654413512, + "grad_norm": 7.429251140891445, + "learning_rate": 5.343971023195829e-06, + "loss": 18.1963, + "step": 27012 + }, + { + "epoch": 0.49377593360995853, + "grad_norm": 6.348298199733702, + "learning_rate": 5.343675712344342e-06, + "loss": 17.7133, + "step": 27013 + }, + { + "epoch": 0.493794212806405, + "grad_norm": 6.316016192402913, + "learning_rate": 5.343380400288301e-06, + "loss": 17.4648, + "step": 27014 + }, + { + "epoch": 0.49381249200285154, + "grad_norm": 7.458585796933602, + "learning_rate": 5.343085087028742e-06, + "loss": 18.0657, + "step": 27015 + }, + { + "epoch": 0.4938307711992981, + "grad_norm": 6.686006564372484, + "learning_rate": 5.3427897725667e-06, + "loss": 17.5928, + "step": 27016 + }, + { + "epoch": 0.4938490503957446, + "grad_norm": 7.845928481052563, + "learning_rate": 5.342494456903208e-06, + "loss": 18.0915, + "step": 27017 + }, + { + "epoch": 0.49386732959219115, + "grad_norm": 7.656002154062699, + "learning_rate": 5.342199140039303e-06, + "loss": 17.3364, + "step": 27018 + }, + { + "epoch": 0.49388560878863763, + "grad_norm": 5.391532742953372, + "learning_rate": 5.3419038219760215e-06, + "loss": 17.1496, + "step": 27019 + }, + { + "epoch": 0.49390388798508417, + "grad_norm": 5.359108971593141, + "learning_rate": 5.341608502714395e-06, + "loss": 17.1226, + "step": 27020 + }, + { + "epoch": 0.4939221671815307, + "grad_norm": 7.004731176418986, + "learning_rate": 5.34131318225546e-06, + "loss": 17.8174, + "step": 27021 + }, + { + "epoch": 0.49394044637797724, + "grad_norm": 5.484664541987583, + "learning_rate": 5.341017860600253e-06, + "loss": 17.1402, + "step": 27022 + }, + { + "epoch": 0.4939587255744238, + "grad_norm": 6.371429794989805, + "learning_rate": 5.340722537749807e-06, + "loss": 17.494, + "step": 27023 + }, + { + "epoch": 0.49397700477087025, + "grad_norm": 7.01453052158817, + "learning_rate": 5.340427213705159e-06, + "loss": 17.7144, + "step": 27024 + }, + { + "epoch": 0.4939952839673168, + "grad_norm": 6.078159804931989, + "learning_rate": 5.340131888467344e-06, + "loss": 17.478, + "step": 27025 + }, + { + "epoch": 0.4940135631637633, + "grad_norm": 6.5826178827557875, + "learning_rate": 5.3398365620373935e-06, + "loss": 17.4035, + "step": 27026 + }, + { + "epoch": 0.49403184236020986, + "grad_norm": 6.41085703611692, + "learning_rate": 5.339541234416348e-06, + "loss": 17.4888, + "step": 27027 + }, + { + "epoch": 0.4940501215566564, + "grad_norm": 6.471313237406635, + "learning_rate": 5.3392459056052385e-06, + "loss": 17.6197, + "step": 27028 + }, + { + "epoch": 0.4940684007531029, + "grad_norm": 6.759620568912845, + "learning_rate": 5.338950575605103e-06, + "loss": 17.5151, + "step": 27029 + }, + { + "epoch": 0.4940866799495494, + "grad_norm": 5.726145059844037, + "learning_rate": 5.338655244416974e-06, + "loss": 17.1632, + "step": 27030 + }, + { + "epoch": 0.49410495914599595, + "grad_norm": 7.367143100038594, + "learning_rate": 5.3383599120418865e-06, + "loss": 17.8241, + "step": 27031 + }, + { + "epoch": 0.4941232383424425, + "grad_norm": 8.401285293444744, + "learning_rate": 5.338064578480879e-06, + "loss": 18.5157, + "step": 27032 + }, + { + "epoch": 0.49414151753888896, + "grad_norm": 7.261683111609722, + "learning_rate": 5.3377692437349835e-06, + "loss": 17.8886, + "step": 27033 + }, + { + "epoch": 0.4941597967353355, + "grad_norm": 6.532766534634735, + "learning_rate": 5.3374739078052366e-06, + "loss": 17.487, + "step": 27034 + }, + { + "epoch": 0.49417807593178203, + "grad_norm": 5.855344901640332, + "learning_rate": 5.3371785706926725e-06, + "loss": 17.1961, + "step": 27035 + }, + { + "epoch": 0.49419635512822857, + "grad_norm": 6.000774744402495, + "learning_rate": 5.336883232398326e-06, + "loss": 17.4634, + "step": 27036 + }, + { + "epoch": 0.4942146343246751, + "grad_norm": 6.4939483382731416, + "learning_rate": 5.336587892923235e-06, + "loss": 17.3718, + "step": 27037 + }, + { + "epoch": 0.4942329135211216, + "grad_norm": 8.90333019290219, + "learning_rate": 5.336292552268432e-06, + "loss": 18.2913, + "step": 27038 + }, + { + "epoch": 0.4942511927175681, + "grad_norm": 5.384420330075554, + "learning_rate": 5.335997210434953e-06, + "loss": 17.1865, + "step": 27039 + }, + { + "epoch": 0.49426947191401466, + "grad_norm": 6.191513432394301, + "learning_rate": 5.33570186742383e-06, + "loss": 17.3732, + "step": 27040 + }, + { + "epoch": 0.4942877511104612, + "grad_norm": 6.0282047564718555, + "learning_rate": 5.335406523236104e-06, + "loss": 17.5049, + "step": 27041 + }, + { + "epoch": 0.49430603030690773, + "grad_norm": 7.34642269136078, + "learning_rate": 5.3351111778728075e-06, + "loss": 17.7227, + "step": 27042 + }, + { + "epoch": 0.4943243095033542, + "grad_norm": 6.347294744552038, + "learning_rate": 5.334815831334975e-06, + "loss": 17.4325, + "step": 27043 + }, + { + "epoch": 0.49434258869980074, + "grad_norm": 5.738275868115558, + "learning_rate": 5.334520483623641e-06, + "loss": 17.1608, + "step": 27044 + }, + { + "epoch": 0.4943608678962473, + "grad_norm": 6.983004953804565, + "learning_rate": 5.334225134739841e-06, + "loss": 17.4832, + "step": 27045 + }, + { + "epoch": 0.4943791470926938, + "grad_norm": 5.8536804263533, + "learning_rate": 5.333929784684614e-06, + "loss": 17.2471, + "step": 27046 + }, + { + "epoch": 0.49439742628914035, + "grad_norm": 6.337725509820828, + "learning_rate": 5.333634433458989e-06, + "loss": 17.3965, + "step": 27047 + }, + { + "epoch": 0.49441570548558683, + "grad_norm": 5.4616529150627215, + "learning_rate": 5.333339081064006e-06, + "loss": 17.2323, + "step": 27048 + }, + { + "epoch": 0.49443398468203337, + "grad_norm": 6.1375526015468305, + "learning_rate": 5.3330437275006985e-06, + "loss": 17.3927, + "step": 27049 + }, + { + "epoch": 0.4944522638784799, + "grad_norm": 6.460811066663535, + "learning_rate": 5.3327483727701015e-06, + "loss": 17.4909, + "step": 27050 + }, + { + "epoch": 0.49447054307492644, + "grad_norm": 7.0460329858769954, + "learning_rate": 5.3324530168732495e-06, + "loss": 17.8015, + "step": 27051 + }, + { + "epoch": 0.494488822271373, + "grad_norm": 6.81539654948131, + "learning_rate": 5.332157659811179e-06, + "loss": 17.6895, + "step": 27052 + }, + { + "epoch": 0.49450710146781945, + "grad_norm": 5.905972951436398, + "learning_rate": 5.331862301584925e-06, + "loss": 17.2996, + "step": 27053 + }, + { + "epoch": 0.494525380664266, + "grad_norm": 6.84792720692525, + "learning_rate": 5.331566942195522e-06, + "loss": 17.8233, + "step": 27054 + }, + { + "epoch": 0.4945436598607125, + "grad_norm": 6.546110597837149, + "learning_rate": 5.331271581644005e-06, + "loss": 17.1846, + "step": 27055 + }, + { + "epoch": 0.49456193905715906, + "grad_norm": 6.034470999865652, + "learning_rate": 5.3309762199314115e-06, + "loss": 17.1926, + "step": 27056 + }, + { + "epoch": 0.4945802182536056, + "grad_norm": 6.678728078849492, + "learning_rate": 5.330680857058774e-06, + "loss": 17.3395, + "step": 27057 + }, + { + "epoch": 0.4945984974500521, + "grad_norm": 5.890798850145908, + "learning_rate": 5.330385493027128e-06, + "loss": 17.2562, + "step": 27058 + }, + { + "epoch": 0.4946167766464986, + "grad_norm": 5.659396926385291, + "learning_rate": 5.330090127837511e-06, + "loss": 17.1656, + "step": 27059 + }, + { + "epoch": 0.49463505584294515, + "grad_norm": 6.4122559835855, + "learning_rate": 5.329794761490957e-06, + "loss": 17.6316, + "step": 27060 + }, + { + "epoch": 0.4946533350393917, + "grad_norm": 7.444983281994059, + "learning_rate": 5.329499393988501e-06, + "loss": 17.8522, + "step": 27061 + }, + { + "epoch": 0.4946716142358382, + "grad_norm": 5.868080111913281, + "learning_rate": 5.3292040253311774e-06, + "loss": 17.0549, + "step": 27062 + }, + { + "epoch": 0.4946898934322847, + "grad_norm": 6.43927321499535, + "learning_rate": 5.328908655520022e-06, + "loss": 17.5224, + "step": 27063 + }, + { + "epoch": 0.49470817262873124, + "grad_norm": 6.275055806201355, + "learning_rate": 5.32861328455607e-06, + "loss": 17.5626, + "step": 27064 + }, + { + "epoch": 0.49472645182517777, + "grad_norm": 5.874244766501496, + "learning_rate": 5.328317912440358e-06, + "loss": 17.3449, + "step": 27065 + }, + { + "epoch": 0.4947447310216243, + "grad_norm": 6.002735749726299, + "learning_rate": 5.328022539173919e-06, + "loss": 17.4166, + "step": 27066 + }, + { + "epoch": 0.4947630102180708, + "grad_norm": 7.078313016909611, + "learning_rate": 5.327727164757791e-06, + "loss": 17.8288, + "step": 27067 + }, + { + "epoch": 0.4947812894145173, + "grad_norm": 7.211479667554601, + "learning_rate": 5.3274317891930075e-06, + "loss": 17.7986, + "step": 27068 + }, + { + "epoch": 0.49479956861096386, + "grad_norm": 6.49780210498228, + "learning_rate": 5.327136412480603e-06, + "loss": 17.5079, + "step": 27069 + }, + { + "epoch": 0.4948178478074104, + "grad_norm": 4.962388526502773, + "learning_rate": 5.3268410346216146e-06, + "loss": 17.0274, + "step": 27070 + }, + { + "epoch": 0.49483612700385693, + "grad_norm": 8.588909027113303, + "learning_rate": 5.326545655617077e-06, + "loss": 18.572, + "step": 27071 + }, + { + "epoch": 0.4948544062003034, + "grad_norm": 7.909936946394288, + "learning_rate": 5.326250275468023e-06, + "loss": 17.8187, + "step": 27072 + }, + { + "epoch": 0.49487268539674995, + "grad_norm": 7.79164215084259, + "learning_rate": 5.325954894175491e-06, + "loss": 18.0844, + "step": 27073 + }, + { + "epoch": 0.4948909645931965, + "grad_norm": 6.507166316716315, + "learning_rate": 5.325659511740518e-06, + "loss": 17.4294, + "step": 27074 + }, + { + "epoch": 0.494909243789643, + "grad_norm": 7.045042200629096, + "learning_rate": 5.325364128164134e-06, + "loss": 17.4291, + "step": 27075 + }, + { + "epoch": 0.49492752298608955, + "grad_norm": 6.5913797952025615, + "learning_rate": 5.325068743447378e-06, + "loss": 17.5557, + "step": 27076 + }, + { + "epoch": 0.49494580218253603, + "grad_norm": 7.157139359265984, + "learning_rate": 5.324773357591284e-06, + "loss": 17.8728, + "step": 27077 + }, + { + "epoch": 0.49496408137898257, + "grad_norm": 6.286609264768648, + "learning_rate": 5.324477970596887e-06, + "loss": 17.2988, + "step": 27078 + }, + { + "epoch": 0.4949823605754291, + "grad_norm": 6.139125567440642, + "learning_rate": 5.324182582465224e-06, + "loss": 17.442, + "step": 27079 + }, + { + "epoch": 0.49500063977187564, + "grad_norm": 5.286657223187777, + "learning_rate": 5.323887193197328e-06, + "loss": 17.0214, + "step": 27080 + }, + { + "epoch": 0.4950189189683222, + "grad_norm": 6.0313338055455175, + "learning_rate": 5.323591802794237e-06, + "loss": 17.1108, + "step": 27081 + }, + { + "epoch": 0.49503719816476865, + "grad_norm": 6.663827894606453, + "learning_rate": 5.323296411256983e-06, + "loss": 17.822, + "step": 27082 + }, + { + "epoch": 0.4950554773612152, + "grad_norm": 5.749880403608699, + "learning_rate": 5.323001018586604e-06, + "loss": 17.1773, + "step": 27083 + }, + { + "epoch": 0.4950737565576617, + "grad_norm": 5.834371956386388, + "learning_rate": 5.322705624784136e-06, + "loss": 17.2421, + "step": 27084 + }, + { + "epoch": 0.49509203575410826, + "grad_norm": 6.237005739814264, + "learning_rate": 5.32241022985061e-06, + "loss": 17.2866, + "step": 27085 + }, + { + "epoch": 0.4951103149505548, + "grad_norm": 5.823408136601172, + "learning_rate": 5.322114833787066e-06, + "loss": 17.2565, + "step": 27086 + }, + { + "epoch": 0.4951285941470013, + "grad_norm": 7.479932089479094, + "learning_rate": 5.321819436594536e-06, + "loss": 17.7947, + "step": 27087 + }, + { + "epoch": 0.4951468733434478, + "grad_norm": 6.271746162433983, + "learning_rate": 5.321524038274058e-06, + "loss": 17.3551, + "step": 27088 + }, + { + "epoch": 0.49516515253989435, + "grad_norm": 5.516386190837085, + "learning_rate": 5.321228638826667e-06, + "loss": 17.2787, + "step": 27089 + }, + { + "epoch": 0.4951834317363409, + "grad_norm": 6.51806980194292, + "learning_rate": 5.320933238253394e-06, + "loss": 17.2764, + "step": 27090 + }, + { + "epoch": 0.4952017109327874, + "grad_norm": 6.505095705765514, + "learning_rate": 5.320637836555282e-06, + "loss": 17.3872, + "step": 27091 + }, + { + "epoch": 0.4952199901292339, + "grad_norm": 6.008812418764353, + "learning_rate": 5.32034243373336e-06, + "loss": 17.4496, + "step": 27092 + }, + { + "epoch": 0.49523826932568044, + "grad_norm": 6.132994346673277, + "learning_rate": 5.320047029788665e-06, + "loss": 17.3415, + "step": 27093 + }, + { + "epoch": 0.49525654852212697, + "grad_norm": 6.76828084705253, + "learning_rate": 5.319751624722235e-06, + "loss": 17.7391, + "step": 27094 + }, + { + "epoch": 0.4952748277185735, + "grad_norm": 4.852920399836946, + "learning_rate": 5.319456218535102e-06, + "loss": 16.8498, + "step": 27095 + }, + { + "epoch": 0.49529310691502004, + "grad_norm": 8.37845213462456, + "learning_rate": 5.3191608112283026e-06, + "loss": 18.22, + "step": 27096 + }, + { + "epoch": 0.4953113861114665, + "grad_norm": 5.510644493946002, + "learning_rate": 5.318865402802872e-06, + "loss": 17.079, + "step": 27097 + }, + { + "epoch": 0.49532966530791306, + "grad_norm": 5.823363752835767, + "learning_rate": 5.318569993259848e-06, + "loss": 17.3107, + "step": 27098 + }, + { + "epoch": 0.4953479445043596, + "grad_norm": 5.763510154128105, + "learning_rate": 5.31827458260026e-06, + "loss": 17.5461, + "step": 27099 + }, + { + "epoch": 0.49536622370080613, + "grad_norm": 6.918820810921189, + "learning_rate": 5.317979170825149e-06, + "loss": 17.4873, + "step": 27100 + }, + { + "epoch": 0.4953845028972526, + "grad_norm": 7.989744572692749, + "learning_rate": 5.31768375793555e-06, + "loss": 18.4143, + "step": 27101 + }, + { + "epoch": 0.49540278209369915, + "grad_norm": 7.214879095416795, + "learning_rate": 5.317388343932497e-06, + "loss": 17.8851, + "step": 27102 + }, + { + "epoch": 0.4954210612901457, + "grad_norm": 5.8630533633346245, + "learning_rate": 5.3170929288170235e-06, + "loss": 17.2316, + "step": 27103 + }, + { + "epoch": 0.4954393404865922, + "grad_norm": 5.838223225066707, + "learning_rate": 5.316797512590166e-06, + "loss": 17.0889, + "step": 27104 + }, + { + "epoch": 0.49545761968303875, + "grad_norm": 8.490065189147296, + "learning_rate": 5.316502095252964e-06, + "loss": 17.939, + "step": 27105 + }, + { + "epoch": 0.49547589887948523, + "grad_norm": 6.947018162341639, + "learning_rate": 5.316206676806448e-06, + "loss": 17.8263, + "step": 27106 + }, + { + "epoch": 0.49549417807593177, + "grad_norm": 6.530436899887638, + "learning_rate": 5.315911257251655e-06, + "loss": 17.5459, + "step": 27107 + }, + { + "epoch": 0.4955124572723783, + "grad_norm": 5.139972165119779, + "learning_rate": 5.31561583658962e-06, + "loss": 16.8613, + "step": 27108 + }, + { + "epoch": 0.49553073646882484, + "grad_norm": 5.007554461914014, + "learning_rate": 5.315320414821379e-06, + "loss": 16.9549, + "step": 27109 + }, + { + "epoch": 0.4955490156652714, + "grad_norm": 6.531020566255451, + "learning_rate": 5.3150249919479676e-06, + "loss": 17.5633, + "step": 27110 + }, + { + "epoch": 0.49556729486171786, + "grad_norm": 6.7268024899273655, + "learning_rate": 5.314729567970421e-06, + "loss": 17.2107, + "step": 27111 + }, + { + "epoch": 0.4955855740581644, + "grad_norm": 5.763513281309919, + "learning_rate": 5.314434142889776e-06, + "loss": 17.3185, + "step": 27112 + }, + { + "epoch": 0.4956038532546109, + "grad_norm": 5.735780970376321, + "learning_rate": 5.314138716707063e-06, + "loss": 16.6206, + "step": 27113 + }, + { + "epoch": 0.49562213245105746, + "grad_norm": 6.094915498236935, + "learning_rate": 5.313843289423324e-06, + "loss": 17.5662, + "step": 27114 + }, + { + "epoch": 0.495640411647504, + "grad_norm": 7.193393884445647, + "learning_rate": 5.313547861039592e-06, + "loss": 17.6936, + "step": 27115 + }, + { + "epoch": 0.4956586908439505, + "grad_norm": 6.116919446868163, + "learning_rate": 5.3132524315569e-06, + "loss": 17.6923, + "step": 27116 + }, + { + "epoch": 0.495676970040397, + "grad_norm": 6.071783068354307, + "learning_rate": 5.3129570009762864e-06, + "loss": 17.4997, + "step": 27117 + }, + { + "epoch": 0.49569524923684355, + "grad_norm": 7.36498956045436, + "learning_rate": 5.312661569298784e-06, + "loss": 17.9898, + "step": 27118 + }, + { + "epoch": 0.4957135284332901, + "grad_norm": 6.513731500749691, + "learning_rate": 5.312366136525433e-06, + "loss": 17.376, + "step": 27119 + }, + { + "epoch": 0.4957318076297366, + "grad_norm": 5.943945414772952, + "learning_rate": 5.312070702657264e-06, + "loss": 16.9633, + "step": 27120 + }, + { + "epoch": 0.4957500868261831, + "grad_norm": 7.317799352604877, + "learning_rate": 5.311775267695314e-06, + "loss": 17.541, + "step": 27121 + }, + { + "epoch": 0.49576836602262964, + "grad_norm": 6.678806429324938, + "learning_rate": 5.311479831640619e-06, + "loss": 17.6554, + "step": 27122 + }, + { + "epoch": 0.49578664521907617, + "grad_norm": 6.422061144880449, + "learning_rate": 5.311184394494214e-06, + "loss": 17.6898, + "step": 27123 + }, + { + "epoch": 0.4958049244155227, + "grad_norm": 7.257743893993381, + "learning_rate": 5.310888956257135e-06, + "loss": 17.7341, + "step": 27124 + }, + { + "epoch": 0.49582320361196924, + "grad_norm": 5.938111724825104, + "learning_rate": 5.310593516930418e-06, + "loss": 17.3035, + "step": 27125 + }, + { + "epoch": 0.4958414828084157, + "grad_norm": 6.100282441446354, + "learning_rate": 5.310298076515096e-06, + "loss": 17.5533, + "step": 27126 + }, + { + "epoch": 0.49585976200486226, + "grad_norm": 6.40958138518579, + "learning_rate": 5.310002635012207e-06, + "loss": 17.5581, + "step": 27127 + }, + { + "epoch": 0.4958780412013088, + "grad_norm": 6.904282606317481, + "learning_rate": 5.309707192422786e-06, + "loss": 17.818, + "step": 27128 + }, + { + "epoch": 0.49589632039775533, + "grad_norm": 8.793520044325492, + "learning_rate": 5.309411748747869e-06, + "loss": 17.8181, + "step": 27129 + }, + { + "epoch": 0.49591459959420187, + "grad_norm": 6.398017389891622, + "learning_rate": 5.309116303988488e-06, + "loss": 17.5039, + "step": 27130 + }, + { + "epoch": 0.49593287879064835, + "grad_norm": 7.251971439581628, + "learning_rate": 5.308820858145682e-06, + "loss": 17.5048, + "step": 27131 + }, + { + "epoch": 0.4959511579870949, + "grad_norm": 6.404215997284142, + "learning_rate": 5.308525411220488e-06, + "loss": 17.4336, + "step": 27132 + }, + { + "epoch": 0.4959694371835414, + "grad_norm": 7.729486686990656, + "learning_rate": 5.3082299632139375e-06, + "loss": 17.9189, + "step": 27133 + }, + { + "epoch": 0.49598771637998795, + "grad_norm": 5.949653232636323, + "learning_rate": 5.307934514127068e-06, + "loss": 17.5042, + "step": 27134 + }, + { + "epoch": 0.49600599557643443, + "grad_norm": 5.5871038029272935, + "learning_rate": 5.3076390639609146e-06, + "loss": 17.4189, + "step": 27135 + }, + { + "epoch": 0.49602427477288097, + "grad_norm": 6.966548112460395, + "learning_rate": 5.307343612716512e-06, + "loss": 17.5541, + "step": 27136 + }, + { + "epoch": 0.4960425539693275, + "grad_norm": 7.007021036075989, + "learning_rate": 5.307048160394899e-06, + "loss": 17.6978, + "step": 27137 + }, + { + "epoch": 0.49606083316577404, + "grad_norm": 8.397875547905214, + "learning_rate": 5.306752706997107e-06, + "loss": 18.4, + "step": 27138 + }, + { + "epoch": 0.4960791123622206, + "grad_norm": 6.045377634488545, + "learning_rate": 5.306457252524176e-06, + "loss": 17.4589, + "step": 27139 + }, + { + "epoch": 0.49609739155866706, + "grad_norm": 6.331855818214131, + "learning_rate": 5.306161796977134e-06, + "loss": 17.2416, + "step": 27140 + }, + { + "epoch": 0.4961156707551136, + "grad_norm": 7.340035417544372, + "learning_rate": 5.305866340357024e-06, + "loss": 17.6641, + "step": 27141 + }, + { + "epoch": 0.4961339499515601, + "grad_norm": 6.171788505845319, + "learning_rate": 5.30557088266488e-06, + "loss": 17.1048, + "step": 27142 + }, + { + "epoch": 0.49615222914800666, + "grad_norm": 5.457816041538035, + "learning_rate": 5.305275423901737e-06, + "loss": 17.3584, + "step": 27143 + }, + { + "epoch": 0.4961705083444532, + "grad_norm": 7.613004108036592, + "learning_rate": 5.304979964068628e-06, + "loss": 18.1483, + "step": 27144 + }, + { + "epoch": 0.4961887875408997, + "grad_norm": 5.481668594229304, + "learning_rate": 5.3046845031665915e-06, + "loss": 17.0934, + "step": 27145 + }, + { + "epoch": 0.4962070667373462, + "grad_norm": 7.39994442611417, + "learning_rate": 5.304389041196664e-06, + "loss": 17.4173, + "step": 27146 + }, + { + "epoch": 0.49622534593379275, + "grad_norm": 5.7025844735581455, + "learning_rate": 5.304093578159877e-06, + "loss": 17.2323, + "step": 27147 + }, + { + "epoch": 0.4962436251302393, + "grad_norm": 7.30143375176874, + "learning_rate": 5.303798114057269e-06, + "loss": 18.0775, + "step": 27148 + }, + { + "epoch": 0.4962619043266858, + "grad_norm": 5.696227811299683, + "learning_rate": 5.3035026488898754e-06, + "loss": 17.4022, + "step": 27149 + }, + { + "epoch": 0.4962801835231323, + "grad_norm": 6.530643656227143, + "learning_rate": 5.30320718265873e-06, + "loss": 17.6637, + "step": 27150 + }, + { + "epoch": 0.49629846271957884, + "grad_norm": 6.822978267413106, + "learning_rate": 5.302911715364871e-06, + "loss": 17.8557, + "step": 27151 + }, + { + "epoch": 0.4963167419160254, + "grad_norm": 7.268598916926087, + "learning_rate": 5.3026162470093335e-06, + "loss": 17.7663, + "step": 27152 + }, + { + "epoch": 0.4963350211124719, + "grad_norm": 7.921854843630647, + "learning_rate": 5.30232077759315e-06, + "loss": 18.2357, + "step": 27153 + }, + { + "epoch": 0.49635330030891844, + "grad_norm": 6.040172703825464, + "learning_rate": 5.302025307117361e-06, + "loss": 17.5834, + "step": 27154 + }, + { + "epoch": 0.4963715795053649, + "grad_norm": 6.33443303987812, + "learning_rate": 5.301729835582998e-06, + "loss": 17.5087, + "step": 27155 + }, + { + "epoch": 0.49638985870181146, + "grad_norm": 6.399351837295014, + "learning_rate": 5.301434362991099e-06, + "loss": 17.5478, + "step": 27156 + }, + { + "epoch": 0.496408137898258, + "grad_norm": 7.275944016496439, + "learning_rate": 5.301138889342698e-06, + "loss": 18.0614, + "step": 27157 + }, + { + "epoch": 0.49642641709470453, + "grad_norm": 7.654295357271992, + "learning_rate": 5.300843414638831e-06, + "loss": 17.9039, + "step": 27158 + }, + { + "epoch": 0.49644469629115107, + "grad_norm": 6.381527232148551, + "learning_rate": 5.3005479388805335e-06, + "loss": 17.747, + "step": 27159 + }, + { + "epoch": 0.49646297548759755, + "grad_norm": 7.172154294659397, + "learning_rate": 5.300252462068845e-06, + "loss": 17.5675, + "step": 27160 + }, + { + "epoch": 0.4964812546840441, + "grad_norm": 5.899237087590126, + "learning_rate": 5.299956984204794e-06, + "loss": 17.3641, + "step": 27161 + }, + { + "epoch": 0.4964995338804906, + "grad_norm": 6.293315909695069, + "learning_rate": 5.299661505289421e-06, + "loss": 17.1642, + "step": 27162 + }, + { + "epoch": 0.49651781307693715, + "grad_norm": 7.239253128344299, + "learning_rate": 5.29936602532376e-06, + "loss": 17.3627, + "step": 27163 + }, + { + "epoch": 0.4965360922733837, + "grad_norm": 7.336105727193535, + "learning_rate": 5.299070544308847e-06, + "loss": 18.1346, + "step": 27164 + }, + { + "epoch": 0.49655437146983017, + "grad_norm": 6.574231614732913, + "learning_rate": 5.298775062245719e-06, + "loss": 17.4154, + "step": 27165 + }, + { + "epoch": 0.4965726506662767, + "grad_norm": 4.892042150079644, + "learning_rate": 5.298479579135409e-06, + "loss": 17.0709, + "step": 27166 + }, + { + "epoch": 0.49659092986272324, + "grad_norm": 6.696837834447044, + "learning_rate": 5.2981840949789546e-06, + "loss": 17.3769, + "step": 27167 + }, + { + "epoch": 0.4966092090591698, + "grad_norm": 7.3244109929235295, + "learning_rate": 5.297888609777391e-06, + "loss": 17.7439, + "step": 27168 + }, + { + "epoch": 0.49662748825561626, + "grad_norm": 5.75513126188906, + "learning_rate": 5.2975931235317525e-06, + "loss": 17.3198, + "step": 27169 + }, + { + "epoch": 0.4966457674520628, + "grad_norm": 6.675387476127053, + "learning_rate": 5.297297636243077e-06, + "loss": 17.27, + "step": 27170 + }, + { + "epoch": 0.4966640466485093, + "grad_norm": 6.81580139447009, + "learning_rate": 5.2970021479124e-06, + "loss": 17.6338, + "step": 27171 + }, + { + "epoch": 0.49668232584495586, + "grad_norm": 6.732773707828087, + "learning_rate": 5.296706658540753e-06, + "loss": 17.3781, + "step": 27172 + }, + { + "epoch": 0.4967006050414024, + "grad_norm": 5.441919668441327, + "learning_rate": 5.296411168129177e-06, + "loss": 17.0593, + "step": 27173 + }, + { + "epoch": 0.4967188842378489, + "grad_norm": 6.315453747522237, + "learning_rate": 5.296115676678707e-06, + "loss": 17.4357, + "step": 27174 + }, + { + "epoch": 0.4967371634342954, + "grad_norm": 7.056538431335537, + "learning_rate": 5.2958201841903754e-06, + "loss": 17.6652, + "step": 27175 + }, + { + "epoch": 0.49675544263074195, + "grad_norm": 5.821744196774932, + "learning_rate": 5.295524690665221e-06, + "loss": 17.2166, + "step": 27176 + }, + { + "epoch": 0.4967737218271885, + "grad_norm": 7.502599944919671, + "learning_rate": 5.295229196104277e-06, + "loss": 18.0384, + "step": 27177 + }, + { + "epoch": 0.496792001023635, + "grad_norm": 5.019815368437581, + "learning_rate": 5.2949337005085795e-06, + "loss": 17.0715, + "step": 27178 + }, + { + "epoch": 0.4968102802200815, + "grad_norm": 6.0498780514001815, + "learning_rate": 5.294638203879167e-06, + "loss": 17.3159, + "step": 27179 + }, + { + "epoch": 0.49682855941652804, + "grad_norm": 6.728427255070438, + "learning_rate": 5.294342706217072e-06, + "loss": 18.0954, + "step": 27180 + }, + { + "epoch": 0.4968468386129746, + "grad_norm": 6.383029542631247, + "learning_rate": 5.294047207523332e-06, + "loss": 17.4091, + "step": 27181 + }, + { + "epoch": 0.4968651178094211, + "grad_norm": 5.8332534112216745, + "learning_rate": 5.293751707798981e-06, + "loss": 17.1511, + "step": 27182 + }, + { + "epoch": 0.49688339700586764, + "grad_norm": 5.88775996750707, + "learning_rate": 5.293456207045056e-06, + "loss": 17.2777, + "step": 27183 + }, + { + "epoch": 0.4969016762023141, + "grad_norm": 7.75262316026169, + "learning_rate": 5.293160705262594e-06, + "loss": 17.8964, + "step": 27184 + }, + { + "epoch": 0.49691995539876066, + "grad_norm": 6.520555094380181, + "learning_rate": 5.292865202452628e-06, + "loss": 17.6149, + "step": 27185 + }, + { + "epoch": 0.4969382345952072, + "grad_norm": 6.358301365429968, + "learning_rate": 5.2925696986161935e-06, + "loss": 17.1833, + "step": 27186 + }, + { + "epoch": 0.49695651379165373, + "grad_norm": 5.566572725779698, + "learning_rate": 5.2922741937543294e-06, + "loss": 17.2679, + "step": 27187 + }, + { + "epoch": 0.49697479298810027, + "grad_norm": 6.276243680080803, + "learning_rate": 5.2919786878680705e-06, + "loss": 17.5834, + "step": 27188 + }, + { + "epoch": 0.49699307218454675, + "grad_norm": 5.914734083121124, + "learning_rate": 5.29168318095845e-06, + "loss": 17.2533, + "step": 27189 + }, + { + "epoch": 0.4970113513809933, + "grad_norm": 7.3116436347415945, + "learning_rate": 5.291387673026505e-06, + "loss": 17.3719, + "step": 27190 + }, + { + "epoch": 0.4970296305774398, + "grad_norm": 6.664224589792451, + "learning_rate": 5.291092164073273e-06, + "loss": 17.7188, + "step": 27191 + }, + { + "epoch": 0.49704790977388635, + "grad_norm": 7.468512860219787, + "learning_rate": 5.290796654099787e-06, + "loss": 17.5344, + "step": 27192 + }, + { + "epoch": 0.4970661889703329, + "grad_norm": 5.7942198892057695, + "learning_rate": 5.2905011431070845e-06, + "loss": 17.0086, + "step": 27193 + }, + { + "epoch": 0.49708446816677937, + "grad_norm": 7.135569698803995, + "learning_rate": 5.2902056310962005e-06, + "loss": 17.7024, + "step": 27194 + }, + { + "epoch": 0.4971027473632259, + "grad_norm": 8.383877195301391, + "learning_rate": 5.28991011806817e-06, + "loss": 17.6119, + "step": 27195 + }, + { + "epoch": 0.49712102655967244, + "grad_norm": 6.165860670145469, + "learning_rate": 5.2896146040240305e-06, + "loss": 17.3282, + "step": 27196 + }, + { + "epoch": 0.497139305756119, + "grad_norm": 7.3823915368426185, + "learning_rate": 5.289319088964817e-06, + "loss": 17.7051, + "step": 27197 + }, + { + "epoch": 0.4971575849525655, + "grad_norm": 5.654115886268558, + "learning_rate": 5.289023572891567e-06, + "loss": 17.1707, + "step": 27198 + }, + { + "epoch": 0.497175864149012, + "grad_norm": 7.867968063707022, + "learning_rate": 5.288728055805311e-06, + "loss": 18.0214, + "step": 27199 + }, + { + "epoch": 0.49719414334545853, + "grad_norm": 7.252312509773188, + "learning_rate": 5.28843253770709e-06, + "loss": 17.5912, + "step": 27200 + }, + { + "epoch": 0.49721242254190506, + "grad_norm": 6.8954169769120055, + "learning_rate": 5.288137018597939e-06, + "loss": 17.8047, + "step": 27201 + }, + { + "epoch": 0.4972307017383516, + "grad_norm": 7.892652262769014, + "learning_rate": 5.287841498478892e-06, + "loss": 17.9997, + "step": 27202 + }, + { + "epoch": 0.4972489809347981, + "grad_norm": 5.391683692320803, + "learning_rate": 5.287545977350985e-06, + "loss": 17.2399, + "step": 27203 + }, + { + "epoch": 0.4972672601312446, + "grad_norm": 7.162900747616963, + "learning_rate": 5.287250455215254e-06, + "loss": 17.7604, + "step": 27204 + }, + { + "epoch": 0.49728553932769115, + "grad_norm": 6.745080042105251, + "learning_rate": 5.2869549320727355e-06, + "loss": 17.527, + "step": 27205 + }, + { + "epoch": 0.4973038185241377, + "grad_norm": 7.4739556135528, + "learning_rate": 5.286659407924465e-06, + "loss": 17.9076, + "step": 27206 + }, + { + "epoch": 0.4973220977205842, + "grad_norm": 6.454025515691937, + "learning_rate": 5.286363882771478e-06, + "loss": 17.8487, + "step": 27207 + }, + { + "epoch": 0.4973403769170307, + "grad_norm": 5.179353665563951, + "learning_rate": 5.2860683566148105e-06, + "loss": 17.1066, + "step": 27208 + }, + { + "epoch": 0.49735865611347724, + "grad_norm": 5.0208671925141815, + "learning_rate": 5.285772829455499e-06, + "loss": 17.0062, + "step": 27209 + }, + { + "epoch": 0.4973769353099238, + "grad_norm": 6.331156785180926, + "learning_rate": 5.285477301294577e-06, + "loss": 17.5236, + "step": 27210 + }, + { + "epoch": 0.4973952145063703, + "grad_norm": 6.954132012161971, + "learning_rate": 5.2851817721330835e-06, + "loss": 17.7691, + "step": 27211 + }, + { + "epoch": 0.49741349370281684, + "grad_norm": 7.436982095131251, + "learning_rate": 5.284886241972051e-06, + "loss": 17.9956, + "step": 27212 + }, + { + "epoch": 0.4974317728992633, + "grad_norm": 6.021238119244414, + "learning_rate": 5.284590710812519e-06, + "loss": 17.1315, + "step": 27213 + }, + { + "epoch": 0.49745005209570986, + "grad_norm": 5.331703064694421, + "learning_rate": 5.284295178655518e-06, + "loss": 16.984, + "step": 27214 + }, + { + "epoch": 0.4974683312921564, + "grad_norm": 6.639442860072865, + "learning_rate": 5.283999645502091e-06, + "loss": 17.5802, + "step": 27215 + }, + { + "epoch": 0.49748661048860293, + "grad_norm": 6.296819913586566, + "learning_rate": 5.283704111353267e-06, + "loss": 17.7537, + "step": 27216 + }, + { + "epoch": 0.49750488968504947, + "grad_norm": 5.949604943756755, + "learning_rate": 5.283408576210085e-06, + "loss": 17.375, + "step": 27217 + }, + { + "epoch": 0.49752316888149595, + "grad_norm": 5.874507368310843, + "learning_rate": 5.283113040073581e-06, + "loss": 17.2806, + "step": 27218 + }, + { + "epoch": 0.4975414480779425, + "grad_norm": 5.321096736482824, + "learning_rate": 5.282817502944791e-06, + "loss": 16.9946, + "step": 27219 + }, + { + "epoch": 0.497559727274389, + "grad_norm": 4.867627458878209, + "learning_rate": 5.28252196482475e-06, + "loss": 16.8596, + "step": 27220 + }, + { + "epoch": 0.49757800647083555, + "grad_norm": 5.657588405964617, + "learning_rate": 5.282226425714494e-06, + "loss": 17.1736, + "step": 27221 + }, + { + "epoch": 0.4975962856672821, + "grad_norm": 7.688874444755954, + "learning_rate": 5.281930885615059e-06, + "loss": 17.4771, + "step": 27222 + }, + { + "epoch": 0.49761456486372857, + "grad_norm": 7.27819314632887, + "learning_rate": 5.28163534452748e-06, + "loss": 18.0137, + "step": 27223 + }, + { + "epoch": 0.4976328440601751, + "grad_norm": 7.227627704917986, + "learning_rate": 5.281339802452794e-06, + "loss": 17.845, + "step": 27224 + }, + { + "epoch": 0.49765112325662164, + "grad_norm": 6.586557711148001, + "learning_rate": 5.281044259392038e-06, + "loss": 17.5847, + "step": 27225 + }, + { + "epoch": 0.4976694024530682, + "grad_norm": 6.729547602989797, + "learning_rate": 5.280748715346242e-06, + "loss": 17.7012, + "step": 27226 + }, + { + "epoch": 0.4976876816495147, + "grad_norm": 6.039794973030868, + "learning_rate": 5.28045317031645e-06, + "loss": 17.4433, + "step": 27227 + }, + { + "epoch": 0.4977059608459612, + "grad_norm": 5.757563907346949, + "learning_rate": 5.280157624303692e-06, + "loss": 17.0479, + "step": 27228 + }, + { + "epoch": 0.49772424004240773, + "grad_norm": 7.145382119795324, + "learning_rate": 5.279862077309007e-06, + "loss": 17.6854, + "step": 27229 + }, + { + "epoch": 0.49774251923885426, + "grad_norm": 6.90161787296418, + "learning_rate": 5.27956652933343e-06, + "loss": 17.8546, + "step": 27230 + }, + { + "epoch": 0.4977607984353008, + "grad_norm": 5.636887504816268, + "learning_rate": 5.279270980377994e-06, + "loss": 17.2021, + "step": 27231 + }, + { + "epoch": 0.49777907763174734, + "grad_norm": 7.027245822017659, + "learning_rate": 5.27897543044374e-06, + "loss": 17.7732, + "step": 27232 + }, + { + "epoch": 0.4977973568281938, + "grad_norm": 6.361708975918521, + "learning_rate": 5.278679879531701e-06, + "loss": 17.2399, + "step": 27233 + }, + { + "epoch": 0.49781563602464035, + "grad_norm": 6.389524564784658, + "learning_rate": 5.278384327642912e-06, + "loss": 17.7114, + "step": 27234 + }, + { + "epoch": 0.4978339152210869, + "grad_norm": 6.159050623032668, + "learning_rate": 5.278088774778412e-06, + "loss": 17.4076, + "step": 27235 + }, + { + "epoch": 0.4978521944175334, + "grad_norm": 7.697747232418964, + "learning_rate": 5.277793220939233e-06, + "loss": 18.5743, + "step": 27236 + }, + { + "epoch": 0.4978704736139799, + "grad_norm": 6.099365168059452, + "learning_rate": 5.277497666126413e-06, + "loss": 17.5432, + "step": 27237 + }, + { + "epoch": 0.49788875281042644, + "grad_norm": 7.462158905698103, + "learning_rate": 5.277202110340989e-06, + "loss": 18.1144, + "step": 27238 + }, + { + "epoch": 0.497907032006873, + "grad_norm": 5.544124810711395, + "learning_rate": 5.276906553583996e-06, + "loss": 17.2428, + "step": 27239 + }, + { + "epoch": 0.4979253112033195, + "grad_norm": 7.329429172628634, + "learning_rate": 5.276610995856468e-06, + "loss": 17.5655, + "step": 27240 + }, + { + "epoch": 0.49794359039976605, + "grad_norm": 5.533087384747263, + "learning_rate": 5.276315437159443e-06, + "loss": 17.2499, + "step": 27241 + }, + { + "epoch": 0.4979618695962125, + "grad_norm": 6.646939876218621, + "learning_rate": 5.2760198774939565e-06, + "loss": 17.5805, + "step": 27242 + }, + { + "epoch": 0.49798014879265906, + "grad_norm": 7.168023518725268, + "learning_rate": 5.275724316861045e-06, + "loss": 18.0732, + "step": 27243 + }, + { + "epoch": 0.4979984279891056, + "grad_norm": 7.253311223750868, + "learning_rate": 5.275428755261742e-06, + "loss": 17.8587, + "step": 27244 + }, + { + "epoch": 0.49801670718555213, + "grad_norm": 6.635024711834854, + "learning_rate": 5.275133192697086e-06, + "loss": 17.6585, + "step": 27245 + }, + { + "epoch": 0.49803498638199867, + "grad_norm": 5.967339873264225, + "learning_rate": 5.274837629168112e-06, + "loss": 17.2748, + "step": 27246 + }, + { + "epoch": 0.49805326557844515, + "grad_norm": 5.594154402885207, + "learning_rate": 5.274542064675857e-06, + "loss": 17.3248, + "step": 27247 + }, + { + "epoch": 0.4980715447748917, + "grad_norm": 6.088278735186422, + "learning_rate": 5.274246499221355e-06, + "loss": 17.6099, + "step": 27248 + }, + { + "epoch": 0.4980898239713382, + "grad_norm": 5.448986816313254, + "learning_rate": 5.273950932805641e-06, + "loss": 17.2054, + "step": 27249 + }, + { + "epoch": 0.49810810316778475, + "grad_norm": 6.941485267383951, + "learning_rate": 5.273655365429756e-06, + "loss": 17.5758, + "step": 27250 + }, + { + "epoch": 0.4981263823642313, + "grad_norm": 7.093893261235679, + "learning_rate": 5.273359797094731e-06, + "loss": 17.6851, + "step": 27251 + }, + { + "epoch": 0.49814466156067777, + "grad_norm": 6.0737254445571365, + "learning_rate": 5.273064227801604e-06, + "loss": 17.2736, + "step": 27252 + }, + { + "epoch": 0.4981629407571243, + "grad_norm": 6.223150827510893, + "learning_rate": 5.272768657551411e-06, + "loss": 17.375, + "step": 27253 + }, + { + "epoch": 0.49818121995357084, + "grad_norm": 6.778711264125992, + "learning_rate": 5.272473086345187e-06, + "loss": 17.4212, + "step": 27254 + }, + { + "epoch": 0.4981994991500174, + "grad_norm": 5.4685402624857025, + "learning_rate": 5.272177514183967e-06, + "loss": 17.1399, + "step": 27255 + }, + { + "epoch": 0.4982177783464639, + "grad_norm": 5.971388206290414, + "learning_rate": 5.271881941068792e-06, + "loss": 17.3266, + "step": 27256 + }, + { + "epoch": 0.4982360575429104, + "grad_norm": 5.697162623851452, + "learning_rate": 5.271586367000692e-06, + "loss": 17.3548, + "step": 27257 + }, + { + "epoch": 0.49825433673935693, + "grad_norm": 6.088845435619034, + "learning_rate": 5.271290791980704e-06, + "loss": 17.3779, + "step": 27258 + }, + { + "epoch": 0.49827261593580346, + "grad_norm": 5.853067754879511, + "learning_rate": 5.270995216009867e-06, + "loss": 17.323, + "step": 27259 + }, + { + "epoch": 0.49829089513225, + "grad_norm": 6.57190388598174, + "learning_rate": 5.2706996390892166e-06, + "loss": 17.6874, + "step": 27260 + }, + { + "epoch": 0.49830917432869654, + "grad_norm": 5.603114077511451, + "learning_rate": 5.270404061219786e-06, + "loss": 17.0561, + "step": 27261 + }, + { + "epoch": 0.498327453525143, + "grad_norm": 6.171017351257926, + "learning_rate": 5.270108482402612e-06, + "loss": 17.3007, + "step": 27262 + }, + { + "epoch": 0.49834573272158955, + "grad_norm": 6.350938266900552, + "learning_rate": 5.269812902638733e-06, + "loss": 17.8039, + "step": 27263 + }, + { + "epoch": 0.4983640119180361, + "grad_norm": 5.562161501121355, + "learning_rate": 5.2695173219291805e-06, + "loss": 17.1186, + "step": 27264 + }, + { + "epoch": 0.4983822911144826, + "grad_norm": 5.4989385623513565, + "learning_rate": 5.269221740274996e-06, + "loss": 17.3501, + "step": 27265 + }, + { + "epoch": 0.49840057031092916, + "grad_norm": 6.814186356177413, + "learning_rate": 5.268926157677211e-06, + "loss": 17.681, + "step": 27266 + }, + { + "epoch": 0.49841884950737564, + "grad_norm": 6.32709050709167, + "learning_rate": 5.268630574136864e-06, + "loss": 17.4067, + "step": 27267 + }, + { + "epoch": 0.4984371287038222, + "grad_norm": 6.0456350719203, + "learning_rate": 5.268334989654988e-06, + "loss": 17.6149, + "step": 27268 + }, + { + "epoch": 0.4984554079002687, + "grad_norm": 5.299140660827133, + "learning_rate": 5.268039404232624e-06, + "loss": 17.1577, + "step": 27269 + }, + { + "epoch": 0.49847368709671525, + "grad_norm": 5.4418613551047965, + "learning_rate": 5.267743817870805e-06, + "loss": 17.1168, + "step": 27270 + }, + { + "epoch": 0.4984919662931617, + "grad_norm": 7.413473483414581, + "learning_rate": 5.267448230570565e-06, + "loss": 17.9511, + "step": 27271 + }, + { + "epoch": 0.49851024548960826, + "grad_norm": 6.447941037758599, + "learning_rate": 5.267152642332943e-06, + "loss": 17.6765, + "step": 27272 + }, + { + "epoch": 0.4985285246860548, + "grad_norm": 8.662117696687469, + "learning_rate": 5.266857053158975e-06, + "loss": 18.0849, + "step": 27273 + }, + { + "epoch": 0.49854680388250133, + "grad_norm": 4.784470459934603, + "learning_rate": 5.2665614630496965e-06, + "loss": 16.8672, + "step": 27274 + }, + { + "epoch": 0.49856508307894787, + "grad_norm": 7.212260733700813, + "learning_rate": 5.2662658720061424e-06, + "loss": 17.6389, + "step": 27275 + }, + { + "epoch": 0.49858336227539435, + "grad_norm": 6.082691345885144, + "learning_rate": 5.265970280029349e-06, + "loss": 17.5206, + "step": 27276 + }, + { + "epoch": 0.4986016414718409, + "grad_norm": 6.361255605534515, + "learning_rate": 5.265674687120354e-06, + "loss": 17.3961, + "step": 27277 + }, + { + "epoch": 0.4986199206682874, + "grad_norm": 7.882584150565434, + "learning_rate": 5.265379093280191e-06, + "loss": 18.135, + "step": 27278 + }, + { + "epoch": 0.49863819986473396, + "grad_norm": 6.803867565955744, + "learning_rate": 5.265083498509898e-06, + "loss": 17.8072, + "step": 27279 + }, + { + "epoch": 0.4986564790611805, + "grad_norm": 6.734315518552827, + "learning_rate": 5.26478790281051e-06, + "loss": 17.3493, + "step": 27280 + }, + { + "epoch": 0.49867475825762697, + "grad_norm": 5.94951606986175, + "learning_rate": 5.264492306183063e-06, + "loss": 17.4249, + "step": 27281 + }, + { + "epoch": 0.4986930374540735, + "grad_norm": 7.496664659887621, + "learning_rate": 5.264196708628595e-06, + "loss": 17.6521, + "step": 27282 + }, + { + "epoch": 0.49871131665052004, + "grad_norm": 5.084969949782511, + "learning_rate": 5.26390111014814e-06, + "loss": 16.9039, + "step": 27283 + }, + { + "epoch": 0.4987295958469666, + "grad_norm": 7.350533788688811, + "learning_rate": 5.263605510742734e-06, + "loss": 17.7732, + "step": 27284 + }, + { + "epoch": 0.4987478750434131, + "grad_norm": 5.92753706762238, + "learning_rate": 5.263309910413412e-06, + "loss": 17.1627, + "step": 27285 + }, + { + "epoch": 0.4987661542398596, + "grad_norm": 8.021800227382037, + "learning_rate": 5.263014309161214e-06, + "loss": 18.1532, + "step": 27286 + }, + { + "epoch": 0.49878443343630613, + "grad_norm": 6.557198343835686, + "learning_rate": 5.262718706987172e-06, + "loss": 17.678, + "step": 27287 + }, + { + "epoch": 0.49880271263275267, + "grad_norm": 6.704476399119188, + "learning_rate": 5.262423103892327e-06, + "loss": 17.7313, + "step": 27288 + }, + { + "epoch": 0.4988209918291992, + "grad_norm": 8.38404822779253, + "learning_rate": 5.262127499877708e-06, + "loss": 18.4438, + "step": 27289 + }, + { + "epoch": 0.49883927102564574, + "grad_norm": 6.1932640372868715, + "learning_rate": 5.261831894944356e-06, + "loss": 17.103, + "step": 27290 + }, + { + "epoch": 0.4988575502220922, + "grad_norm": 6.626956857118013, + "learning_rate": 5.261536289093308e-06, + "loss": 17.4115, + "step": 27291 + }, + { + "epoch": 0.49887582941853875, + "grad_norm": 5.9304365371352965, + "learning_rate": 5.261240682325595e-06, + "loss": 17.4598, + "step": 27292 + }, + { + "epoch": 0.4988941086149853, + "grad_norm": 7.531357573936381, + "learning_rate": 5.260945074642257e-06, + "loss": 18.1339, + "step": 27293 + }, + { + "epoch": 0.4989123878114318, + "grad_norm": 6.873920083080513, + "learning_rate": 5.26064946604433e-06, + "loss": 17.749, + "step": 27294 + }, + { + "epoch": 0.49893066700787836, + "grad_norm": 5.743669766851879, + "learning_rate": 5.260353856532848e-06, + "loss": 17.2678, + "step": 27295 + }, + { + "epoch": 0.49894894620432484, + "grad_norm": 6.520042487393192, + "learning_rate": 5.260058246108849e-06, + "loss": 17.7258, + "step": 27296 + }, + { + "epoch": 0.4989672254007714, + "grad_norm": 6.582487933064542, + "learning_rate": 5.259762634773369e-06, + "loss": 17.6029, + "step": 27297 + }, + { + "epoch": 0.4989855045972179, + "grad_norm": 6.714649967128315, + "learning_rate": 5.259467022527443e-06, + "loss": 17.6026, + "step": 27298 + }, + { + "epoch": 0.49900378379366445, + "grad_norm": 6.081654655488484, + "learning_rate": 5.259171409372107e-06, + "loss": 17.3182, + "step": 27299 + }, + { + "epoch": 0.499022062990111, + "grad_norm": 5.695502752793096, + "learning_rate": 5.258875795308398e-06, + "loss": 17.404, + "step": 27300 + }, + { + "epoch": 0.49904034218655746, + "grad_norm": 5.562535014051476, + "learning_rate": 5.258580180337353e-06, + "loss": 17.538, + "step": 27301 + }, + { + "epoch": 0.499058621383004, + "grad_norm": 5.945981359883965, + "learning_rate": 5.258284564460006e-06, + "loss": 17.2567, + "step": 27302 + }, + { + "epoch": 0.49907690057945053, + "grad_norm": 8.16154827655044, + "learning_rate": 5.2579889476773936e-06, + "loss": 18.235, + "step": 27303 + }, + { + "epoch": 0.49909517977589707, + "grad_norm": 7.341600633851559, + "learning_rate": 5.257693329990552e-06, + "loss": 18.1669, + "step": 27304 + }, + { + "epoch": 0.49911345897234355, + "grad_norm": 5.323704261626477, + "learning_rate": 5.257397711400519e-06, + "loss": 17.2324, + "step": 27305 + }, + { + "epoch": 0.4991317381687901, + "grad_norm": 5.860128598907489, + "learning_rate": 5.2571020919083294e-06, + "loss": 17.3605, + "step": 27306 + }, + { + "epoch": 0.4991500173652366, + "grad_norm": 4.738935459635471, + "learning_rate": 5.256806471515018e-06, + "loss": 16.9251, + "step": 27307 + }, + { + "epoch": 0.49916829656168316, + "grad_norm": 6.472101664606162, + "learning_rate": 5.2565108502216225e-06, + "loss": 17.2316, + "step": 27308 + }, + { + "epoch": 0.4991865757581297, + "grad_norm": 7.973558479732446, + "learning_rate": 5.256215228029179e-06, + "loss": 18.1083, + "step": 27309 + }, + { + "epoch": 0.49920485495457617, + "grad_norm": 4.894069680357392, + "learning_rate": 5.255919604938723e-06, + "loss": 16.889, + "step": 27310 + }, + { + "epoch": 0.4992231341510227, + "grad_norm": 5.8727078370170185, + "learning_rate": 5.255623980951292e-06, + "loss": 17.3891, + "step": 27311 + }, + { + "epoch": 0.49924141334746924, + "grad_norm": 6.632408257021762, + "learning_rate": 5.2553283560679205e-06, + "loss": 17.6655, + "step": 27312 + }, + { + "epoch": 0.4992596925439158, + "grad_norm": 6.405787045312934, + "learning_rate": 5.255032730289644e-06, + "loss": 17.5536, + "step": 27313 + }, + { + "epoch": 0.4992779717403623, + "grad_norm": 7.02220431101913, + "learning_rate": 5.254737103617502e-06, + "loss": 17.9237, + "step": 27314 + }, + { + "epoch": 0.4992962509368088, + "grad_norm": 6.160457284452359, + "learning_rate": 5.254441476052529e-06, + "loss": 17.2988, + "step": 27315 + }, + { + "epoch": 0.49931453013325533, + "grad_norm": 6.7947684550883745, + "learning_rate": 5.254145847595758e-06, + "loss": 17.7638, + "step": 27316 + }, + { + "epoch": 0.49933280932970187, + "grad_norm": 6.777068010215756, + "learning_rate": 5.253850218248228e-06, + "loss": 17.8532, + "step": 27317 + }, + { + "epoch": 0.4993510885261484, + "grad_norm": 7.243763457221405, + "learning_rate": 5.2535545880109775e-06, + "loss": 17.7889, + "step": 27318 + }, + { + "epoch": 0.49936936772259494, + "grad_norm": 5.3918795999495, + "learning_rate": 5.25325895688504e-06, + "loss": 17.0649, + "step": 27319 + }, + { + "epoch": 0.4993876469190414, + "grad_norm": 5.413404390198723, + "learning_rate": 5.25296332487145e-06, + "loss": 17.1639, + "step": 27320 + }, + { + "epoch": 0.49940592611548795, + "grad_norm": 5.72906473494947, + "learning_rate": 5.252667691971247e-06, + "loss": 17.2953, + "step": 27321 + }, + { + "epoch": 0.4994242053119345, + "grad_norm": 5.193161058333442, + "learning_rate": 5.252372058185465e-06, + "loss": 17.0099, + "step": 27322 + }, + { + "epoch": 0.499442484508381, + "grad_norm": 7.039314401305383, + "learning_rate": 5.25207642351514e-06, + "loss": 17.5882, + "step": 27323 + }, + { + "epoch": 0.49946076370482756, + "grad_norm": 8.025612850124404, + "learning_rate": 5.2517807879613105e-06, + "loss": 17.9741, + "step": 27324 + }, + { + "epoch": 0.49947904290127404, + "grad_norm": 5.330340399919986, + "learning_rate": 5.251485151525011e-06, + "loss": 16.956, + "step": 27325 + }, + { + "epoch": 0.4994973220977206, + "grad_norm": 7.545316536692668, + "learning_rate": 5.251189514207276e-06, + "loss": 18.2837, + "step": 27326 + }, + { + "epoch": 0.4995156012941671, + "grad_norm": 6.0748830026656515, + "learning_rate": 5.250893876009146e-06, + "loss": 17.2829, + "step": 27327 + }, + { + "epoch": 0.49953388049061365, + "grad_norm": 5.65142999078361, + "learning_rate": 5.2505982369316525e-06, + "loss": 17.2648, + "step": 27328 + }, + { + "epoch": 0.4995521596870602, + "grad_norm": 6.562824523907876, + "learning_rate": 5.250302596975836e-06, + "loss": 17.387, + "step": 27329 + }, + { + "epoch": 0.49957043888350666, + "grad_norm": 6.622548580334553, + "learning_rate": 5.25000695614273e-06, + "loss": 17.3598, + "step": 27330 + }, + { + "epoch": 0.4995887180799532, + "grad_norm": 7.550808300661673, + "learning_rate": 5.24971131443337e-06, + "loss": 18.1236, + "step": 27331 + }, + { + "epoch": 0.49960699727639973, + "grad_norm": 5.670013093144835, + "learning_rate": 5.2494156718487955e-06, + "loss": 17.1048, + "step": 27332 + }, + { + "epoch": 0.49962527647284627, + "grad_norm": 4.981376117498401, + "learning_rate": 5.249120028390039e-06, + "loss": 17.1096, + "step": 27333 + }, + { + "epoch": 0.4996435556692928, + "grad_norm": 7.2170066049910115, + "learning_rate": 5.2488243840581395e-06, + "loss": 17.6629, + "step": 27334 + }, + { + "epoch": 0.4996618348657393, + "grad_norm": 6.390773456586503, + "learning_rate": 5.248528738854132e-06, + "loss": 17.352, + "step": 27335 + }, + { + "epoch": 0.4996801140621858, + "grad_norm": 5.998306482182172, + "learning_rate": 5.248233092779053e-06, + "loss": 17.4651, + "step": 27336 + }, + { + "epoch": 0.49969839325863236, + "grad_norm": 5.664445245901735, + "learning_rate": 5.247937445833937e-06, + "loss": 17.337, + "step": 27337 + }, + { + "epoch": 0.4997166724550789, + "grad_norm": 7.0876287427138704, + "learning_rate": 5.247641798019824e-06, + "loss": 17.7832, + "step": 27338 + }, + { + "epoch": 0.49973495165152537, + "grad_norm": 7.7887961983554534, + "learning_rate": 5.247346149337746e-06, + "loss": 17.8716, + "step": 27339 + }, + { + "epoch": 0.4997532308479719, + "grad_norm": 6.913824805197927, + "learning_rate": 5.247050499788742e-06, + "loss": 17.7465, + "step": 27340 + }, + { + "epoch": 0.49977151004441844, + "grad_norm": 6.70493938546642, + "learning_rate": 5.246754849373848e-06, + "loss": 17.6515, + "step": 27341 + }, + { + "epoch": 0.499789789240865, + "grad_norm": 7.240895239359535, + "learning_rate": 5.246459198094098e-06, + "loss": 17.7322, + "step": 27342 + }, + { + "epoch": 0.4998080684373115, + "grad_norm": 7.6092188518292, + "learning_rate": 5.246163545950532e-06, + "loss": 17.7282, + "step": 27343 + }, + { + "epoch": 0.499826347633758, + "grad_norm": 8.32095780022321, + "learning_rate": 5.245867892944183e-06, + "loss": 18.1291, + "step": 27344 + }, + { + "epoch": 0.49984462683020453, + "grad_norm": 5.387775197233404, + "learning_rate": 5.245572239076089e-06, + "loss": 17.1777, + "step": 27345 + }, + { + "epoch": 0.49986290602665107, + "grad_norm": 5.71156203244884, + "learning_rate": 5.245276584347285e-06, + "loss": 17.2225, + "step": 27346 + }, + { + "epoch": 0.4998811852230976, + "grad_norm": 5.826846297576714, + "learning_rate": 5.2449809287588086e-06, + "loss": 16.9412, + "step": 27347 + }, + { + "epoch": 0.49989946441954414, + "grad_norm": 5.894183395356666, + "learning_rate": 5.2446852723116945e-06, + "loss": 17.2698, + "step": 27348 + }, + { + "epoch": 0.4999177436159906, + "grad_norm": 7.939433203270985, + "learning_rate": 5.24438961500698e-06, + "loss": 17.8702, + "step": 27349 + }, + { + "epoch": 0.49993602281243715, + "grad_norm": 7.17360554657337, + "learning_rate": 5.244093956845701e-06, + "loss": 17.8907, + "step": 27350 + }, + { + "epoch": 0.4999543020088837, + "grad_norm": 7.083243086783728, + "learning_rate": 5.2437982978288935e-06, + "loss": 17.8503, + "step": 27351 + }, + { + "epoch": 0.4999725812053302, + "grad_norm": 6.267525803147073, + "learning_rate": 5.2435026379575945e-06, + "loss": 17.5359, + "step": 27352 + }, + { + "epoch": 0.49999086040177676, + "grad_norm": 5.68135934114697, + "learning_rate": 5.243206977232841e-06, + "loss": 17.2727, + "step": 27353 + }, + { + "epoch": 0.5000091395982232, + "grad_norm": 6.0711168002188565, + "learning_rate": 5.242911315655667e-06, + "loss": 17.3524, + "step": 27354 + }, + { + "epoch": 0.5000274187946698, + "grad_norm": 8.019120929226196, + "learning_rate": 5.24261565322711e-06, + "loss": 18.4906, + "step": 27355 + }, + { + "epoch": 0.5000456979911163, + "grad_norm": 6.1078030558693905, + "learning_rate": 5.242319989948207e-06, + "loss": 17.3165, + "step": 27356 + }, + { + "epoch": 0.5000639771875628, + "grad_norm": 6.110272340357689, + "learning_rate": 5.242024325819993e-06, + "loss": 17.1111, + "step": 27357 + }, + { + "epoch": 0.5000822563840094, + "grad_norm": 6.387111648612196, + "learning_rate": 5.241728660843504e-06, + "loss": 17.4546, + "step": 27358 + }, + { + "epoch": 0.5001005355804559, + "grad_norm": 6.958135010754308, + "learning_rate": 5.241432995019776e-06, + "loss": 17.8441, + "step": 27359 + }, + { + "epoch": 0.5001188147769025, + "grad_norm": 5.9604794133716466, + "learning_rate": 5.241137328349849e-06, + "loss": 17.4998, + "step": 27360 + }, + { + "epoch": 0.5001370939733489, + "grad_norm": 7.191712253563262, + "learning_rate": 5.240841660834756e-06, + "loss": 17.6892, + "step": 27361 + }, + { + "epoch": 0.5001553731697954, + "grad_norm": 8.089367139206132, + "learning_rate": 5.240545992475533e-06, + "loss": 17.9973, + "step": 27362 + }, + { + "epoch": 0.500173652366242, + "grad_norm": 6.500798907545879, + "learning_rate": 5.240250323273217e-06, + "loss": 17.9538, + "step": 27363 + }, + { + "epoch": 0.5001919315626885, + "grad_norm": 7.202789932386763, + "learning_rate": 5.239954653228845e-06, + "loss": 17.4273, + "step": 27364 + }, + { + "epoch": 0.5002102107591351, + "grad_norm": 6.917364605155819, + "learning_rate": 5.239658982343453e-06, + "loss": 17.7761, + "step": 27365 + }, + { + "epoch": 0.5002284899555816, + "grad_norm": 6.394389360957413, + "learning_rate": 5.239363310618076e-06, + "loss": 17.4169, + "step": 27366 + }, + { + "epoch": 0.500246769152028, + "grad_norm": 5.216270305462959, + "learning_rate": 5.239067638053752e-06, + "loss": 16.9941, + "step": 27367 + }, + { + "epoch": 0.5002650483484746, + "grad_norm": 5.8359567468384945, + "learning_rate": 5.238771964651517e-06, + "loss": 17.0607, + "step": 27368 + }, + { + "epoch": 0.5002833275449211, + "grad_norm": 7.552960228153321, + "learning_rate": 5.238476290412407e-06, + "loss": 18.4582, + "step": 27369 + }, + { + "epoch": 0.5003016067413677, + "grad_norm": 6.996073524475762, + "learning_rate": 5.238180615337459e-06, + "loss": 17.414, + "step": 27370 + }, + { + "epoch": 0.5003198859378142, + "grad_norm": 5.943166499036538, + "learning_rate": 5.237884939427707e-06, + "loss": 17.4045, + "step": 27371 + }, + { + "epoch": 0.5003381651342607, + "grad_norm": 6.822133846742642, + "learning_rate": 5.237589262684188e-06, + "loss": 17.3984, + "step": 27372 + }, + { + "epoch": 0.5003564443307073, + "grad_norm": 4.917184385497025, + "learning_rate": 5.237293585107942e-06, + "loss": 16.8352, + "step": 27373 + }, + { + "epoch": 0.5003747235271537, + "grad_norm": 5.67334271032968, + "learning_rate": 5.236997906700002e-06, + "loss": 17.2446, + "step": 27374 + }, + { + "epoch": 0.5003930027236003, + "grad_norm": 5.8042370374908785, + "learning_rate": 5.236702227461404e-06, + "loss": 17.2292, + "step": 27375 + }, + { + "epoch": 0.5004112819200468, + "grad_norm": 7.816240683140112, + "learning_rate": 5.236406547393185e-06, + "loss": 18.1633, + "step": 27376 + }, + { + "epoch": 0.5004295611164933, + "grad_norm": 4.794110991390027, + "learning_rate": 5.236110866496383e-06, + "loss": 16.9117, + "step": 27377 + }, + { + "epoch": 0.5004478403129399, + "grad_norm": 6.997077513138833, + "learning_rate": 5.2358151847720315e-06, + "loss": 17.6769, + "step": 27378 + }, + { + "epoch": 0.5004661195093864, + "grad_norm": 7.3662656254532335, + "learning_rate": 5.2355195022211695e-06, + "loss": 17.7404, + "step": 27379 + }, + { + "epoch": 0.500484398705833, + "grad_norm": 6.483880158315783, + "learning_rate": 5.235223818844832e-06, + "loss": 17.8944, + "step": 27380 + }, + { + "epoch": 0.5005026779022794, + "grad_norm": 7.234928813027564, + "learning_rate": 5.234928134644054e-06, + "loss": 17.5484, + "step": 27381 + }, + { + "epoch": 0.5005209570987259, + "grad_norm": 7.261600480159182, + "learning_rate": 5.234632449619873e-06, + "loss": 17.8835, + "step": 27382 + }, + { + "epoch": 0.5005392362951725, + "grad_norm": 6.238666607560452, + "learning_rate": 5.234336763773326e-06, + "loss": 17.2874, + "step": 27383 + }, + { + "epoch": 0.500557515491619, + "grad_norm": 6.396814720909468, + "learning_rate": 5.234041077105451e-06, + "loss": 17.2575, + "step": 27384 + }, + { + "epoch": 0.5005757946880656, + "grad_norm": 6.363811787678032, + "learning_rate": 5.233745389617281e-06, + "loss": 17.7276, + "step": 27385 + }, + { + "epoch": 0.500594073884512, + "grad_norm": 5.8440359777593365, + "learning_rate": 5.233449701309853e-06, + "loss": 17.3754, + "step": 27386 + }, + { + "epoch": 0.5006123530809585, + "grad_norm": 6.120363833446666, + "learning_rate": 5.233154012184205e-06, + "loss": 17.3495, + "step": 27387 + }, + { + "epoch": 0.5006306322774051, + "grad_norm": 7.57091183675726, + "learning_rate": 5.232858322241373e-06, + "loss": 17.618, + "step": 27388 + }, + { + "epoch": 0.5006489114738516, + "grad_norm": 5.809761818613197, + "learning_rate": 5.232562631482392e-06, + "loss": 17.3301, + "step": 27389 + }, + { + "epoch": 0.5006671906702981, + "grad_norm": 5.935096577307627, + "learning_rate": 5.232266939908298e-06, + "loss": 17.3575, + "step": 27390 + }, + { + "epoch": 0.5006854698667447, + "grad_norm": 5.683414427115721, + "learning_rate": 5.23197124752013e-06, + "loss": 17.4249, + "step": 27391 + }, + { + "epoch": 0.5007037490631912, + "grad_norm": 7.672901328723398, + "learning_rate": 5.231675554318923e-06, + "loss": 18.2358, + "step": 27392 + }, + { + "epoch": 0.5007220282596377, + "grad_norm": 5.51855032814968, + "learning_rate": 5.2313798603057135e-06, + "loss": 17.2993, + "step": 27393 + }, + { + "epoch": 0.5007403074560842, + "grad_norm": 4.902807267884883, + "learning_rate": 5.2310841654815355e-06, + "loss": 17.0361, + "step": 27394 + }, + { + "epoch": 0.5007585866525307, + "grad_norm": 5.574659021420579, + "learning_rate": 5.23078846984743e-06, + "loss": 17.4636, + "step": 27395 + }, + { + "epoch": 0.5007768658489773, + "grad_norm": 6.5940863560363825, + "learning_rate": 5.2304927734044295e-06, + "loss": 17.5887, + "step": 27396 + }, + { + "epoch": 0.5007951450454238, + "grad_norm": 5.592720218805488, + "learning_rate": 5.2301970761535725e-06, + "loss": 17.3221, + "step": 27397 + }, + { + "epoch": 0.5008134242418704, + "grad_norm": 5.166934862750008, + "learning_rate": 5.229901378095895e-06, + "loss": 17.1445, + "step": 27398 + }, + { + "epoch": 0.5008317034383168, + "grad_norm": 7.109736810296239, + "learning_rate": 5.229605679232432e-06, + "loss": 17.6091, + "step": 27399 + }, + { + "epoch": 0.5008499826347633, + "grad_norm": 6.148882070907928, + "learning_rate": 5.229309979564221e-06, + "loss": 17.6021, + "step": 27400 + }, + { + "epoch": 0.5008682618312099, + "grad_norm": 7.179005321709902, + "learning_rate": 5.2290142790923e-06, + "loss": 17.991, + "step": 27401 + }, + { + "epoch": 0.5008865410276564, + "grad_norm": 5.461564279100281, + "learning_rate": 5.228718577817703e-06, + "loss": 17.2314, + "step": 27402 + }, + { + "epoch": 0.500904820224103, + "grad_norm": 6.55343778634408, + "learning_rate": 5.228422875741467e-06, + "loss": 17.3584, + "step": 27403 + }, + { + "epoch": 0.5009230994205495, + "grad_norm": 5.462835394014223, + "learning_rate": 5.228127172864627e-06, + "loss": 17.1289, + "step": 27404 + }, + { + "epoch": 0.500941378616996, + "grad_norm": 6.905041544785173, + "learning_rate": 5.227831469188225e-06, + "loss": 17.823, + "step": 27405 + }, + { + "epoch": 0.5009596578134425, + "grad_norm": 7.102553762331364, + "learning_rate": 5.227535764713291e-06, + "loss": 17.7111, + "step": 27406 + }, + { + "epoch": 0.500977937009889, + "grad_norm": 7.158427242021332, + "learning_rate": 5.227240059440865e-06, + "loss": 17.4464, + "step": 27407 + }, + { + "epoch": 0.5009962162063356, + "grad_norm": 7.132729504524786, + "learning_rate": 5.2269443533719814e-06, + "loss": 17.7173, + "step": 27408 + }, + { + "epoch": 0.5010144954027821, + "grad_norm": 7.428577864623479, + "learning_rate": 5.226648646507677e-06, + "loss": 18.198, + "step": 27409 + }, + { + "epoch": 0.5010327745992286, + "grad_norm": 6.282317811470806, + "learning_rate": 5.2263529388489885e-06, + "loss": 17.3669, + "step": 27410 + }, + { + "epoch": 0.5010510537956752, + "grad_norm": 6.973185316094404, + "learning_rate": 5.226057230396953e-06, + "loss": 17.2822, + "step": 27411 + }, + { + "epoch": 0.5010693329921216, + "grad_norm": 5.760768121814484, + "learning_rate": 5.225761521152608e-06, + "loss": 17.1108, + "step": 27412 + }, + { + "epoch": 0.5010876121885682, + "grad_norm": 6.267662872351457, + "learning_rate": 5.225465811116988e-06, + "loss": 17.8989, + "step": 27413 + }, + { + "epoch": 0.5011058913850147, + "grad_norm": 6.161751745976709, + "learning_rate": 5.225170100291129e-06, + "loss": 17.6454, + "step": 27414 + }, + { + "epoch": 0.5011241705814612, + "grad_norm": 6.946668774645795, + "learning_rate": 5.22487438867607e-06, + "loss": 17.4902, + "step": 27415 + }, + { + "epoch": 0.5011424497779078, + "grad_norm": 7.8009518378293246, + "learning_rate": 5.224578676272844e-06, + "loss": 18.0471, + "step": 27416 + }, + { + "epoch": 0.5011607289743543, + "grad_norm": 8.100631348561828, + "learning_rate": 5.2242829630824885e-06, + "loss": 18.1581, + "step": 27417 + }, + { + "epoch": 0.5011790081708009, + "grad_norm": 7.944855927452899, + "learning_rate": 5.223987249106042e-06, + "loss": 18.2589, + "step": 27418 + }, + { + "epoch": 0.5011972873672473, + "grad_norm": 5.1268255941988405, + "learning_rate": 5.2236915343445404e-06, + "loss": 17.0776, + "step": 27419 + }, + { + "epoch": 0.5012155665636938, + "grad_norm": 7.138204920686909, + "learning_rate": 5.223395818799019e-06, + "loss": 18.2867, + "step": 27420 + }, + { + "epoch": 0.5012338457601404, + "grad_norm": 7.551431483059387, + "learning_rate": 5.223100102470513e-06, + "loss": 17.6356, + "step": 27421 + }, + { + "epoch": 0.5012521249565869, + "grad_norm": 6.532834147764727, + "learning_rate": 5.222804385360062e-06, + "loss": 17.6956, + "step": 27422 + }, + { + "epoch": 0.5012704041530335, + "grad_norm": 7.167663155300319, + "learning_rate": 5.222508667468701e-06, + "loss": 17.2327, + "step": 27423 + }, + { + "epoch": 0.50128868334948, + "grad_norm": 6.731071205187084, + "learning_rate": 5.222212948797466e-06, + "loss": 17.6541, + "step": 27424 + }, + { + "epoch": 0.5013069625459264, + "grad_norm": 5.914110127603212, + "learning_rate": 5.221917229347395e-06, + "loss": 17.4401, + "step": 27425 + }, + { + "epoch": 0.501325241742373, + "grad_norm": 6.690376471128805, + "learning_rate": 5.221621509119521e-06, + "loss": 17.3074, + "step": 27426 + }, + { + "epoch": 0.5013435209388195, + "grad_norm": 6.600748489199835, + "learning_rate": 5.221325788114884e-06, + "loss": 17.7168, + "step": 27427 + }, + { + "epoch": 0.5013618001352661, + "grad_norm": 6.278334865841477, + "learning_rate": 5.22103006633452e-06, + "loss": 17.583, + "step": 27428 + }, + { + "epoch": 0.5013800793317126, + "grad_norm": 6.644632845121036, + "learning_rate": 5.220734343779465e-06, + "loss": 17.7029, + "step": 27429 + }, + { + "epoch": 0.5013983585281591, + "grad_norm": 7.3252409032340005, + "learning_rate": 5.220438620450754e-06, + "loss": 17.3549, + "step": 27430 + }, + { + "epoch": 0.5014166377246057, + "grad_norm": 5.853935514389421, + "learning_rate": 5.220142896349424e-06, + "loss": 17.3847, + "step": 27431 + }, + { + "epoch": 0.5014349169210521, + "grad_norm": 4.588873686307592, + "learning_rate": 5.219847171476515e-06, + "loss": 16.8352, + "step": 27432 + }, + { + "epoch": 0.5014531961174987, + "grad_norm": 6.392448853003408, + "learning_rate": 5.2195514458330585e-06, + "loss": 17.4353, + "step": 27433 + }, + { + "epoch": 0.5014714753139452, + "grad_norm": 7.0629340071061835, + "learning_rate": 5.219255719420095e-06, + "loss": 18.0061, + "step": 27434 + }, + { + "epoch": 0.5014897545103917, + "grad_norm": 9.441199465210085, + "learning_rate": 5.218959992238658e-06, + "loss": 17.6992, + "step": 27435 + }, + { + "epoch": 0.5015080337068383, + "grad_norm": 6.942934271494722, + "learning_rate": 5.218664264289786e-06, + "loss": 17.6394, + "step": 27436 + }, + { + "epoch": 0.5015263129032848, + "grad_norm": 6.086372515573157, + "learning_rate": 5.2183685355745126e-06, + "loss": 17.388, + "step": 27437 + }, + { + "epoch": 0.5015445920997313, + "grad_norm": 6.790859367633346, + "learning_rate": 5.218072806093879e-06, + "loss": 17.5098, + "step": 27438 + }, + { + "epoch": 0.5015628712961778, + "grad_norm": 6.377041254405387, + "learning_rate": 5.217777075848918e-06, + "loss": 17.4578, + "step": 27439 + }, + { + "epoch": 0.5015811504926243, + "grad_norm": 8.609211270087672, + "learning_rate": 5.217481344840667e-06, + "loss": 17.4983, + "step": 27440 + }, + { + "epoch": 0.5015994296890709, + "grad_norm": 5.416409304891662, + "learning_rate": 5.217185613070164e-06, + "loss": 17.1505, + "step": 27441 + }, + { + "epoch": 0.5016177088855174, + "grad_norm": 6.184904764132652, + "learning_rate": 5.2168898805384424e-06, + "loss": 17.5776, + "step": 27442 + }, + { + "epoch": 0.501635988081964, + "grad_norm": 6.135180400266057, + "learning_rate": 5.216594147246543e-06, + "loss": 17.5401, + "step": 27443 + }, + { + "epoch": 0.5016542672784104, + "grad_norm": 7.028843924673126, + "learning_rate": 5.216298413195497e-06, + "loss": 17.506, + "step": 27444 + }, + { + "epoch": 0.5016725464748569, + "grad_norm": 5.641817449703513, + "learning_rate": 5.216002678386346e-06, + "loss": 17.2133, + "step": 27445 + }, + { + "epoch": 0.5016908256713035, + "grad_norm": 5.1560586955278405, + "learning_rate": 5.215706942820124e-06, + "loss": 17.0432, + "step": 27446 + }, + { + "epoch": 0.50170910486775, + "grad_norm": 6.549285818228365, + "learning_rate": 5.215411206497868e-06, + "loss": 17.5745, + "step": 27447 + }, + { + "epoch": 0.5017273840641966, + "grad_norm": 7.7095342333496815, + "learning_rate": 5.215115469420614e-06, + "loss": 18.2654, + "step": 27448 + }, + { + "epoch": 0.5017456632606431, + "grad_norm": 5.941606628883112, + "learning_rate": 5.214819731589398e-06, + "loss": 17.1361, + "step": 27449 + }, + { + "epoch": 0.5017639424570896, + "grad_norm": 7.8957610329637165, + "learning_rate": 5.214523993005259e-06, + "loss": 17.9532, + "step": 27450 + }, + { + "epoch": 0.5017822216535361, + "grad_norm": 6.372576546619049, + "learning_rate": 5.214228253669232e-06, + "loss": 17.3834, + "step": 27451 + }, + { + "epoch": 0.5018005008499826, + "grad_norm": 7.6097126215648245, + "learning_rate": 5.213932513582353e-06, + "loss": 18.1436, + "step": 27452 + }, + { + "epoch": 0.5018187800464292, + "grad_norm": 5.852300587894222, + "learning_rate": 5.2136367727456595e-06, + "loss": 17.1411, + "step": 27453 + }, + { + "epoch": 0.5018370592428757, + "grad_norm": 9.691714048624561, + "learning_rate": 5.2133410311601875e-06, + "loss": 18.9537, + "step": 27454 + }, + { + "epoch": 0.5018553384393222, + "grad_norm": 7.197165582650849, + "learning_rate": 5.2130452888269725e-06, + "loss": 17.9285, + "step": 27455 + }, + { + "epoch": 0.5018736176357688, + "grad_norm": 6.790190706934791, + "learning_rate": 5.212749545747053e-06, + "loss": 17.5413, + "step": 27456 + }, + { + "epoch": 0.5018918968322152, + "grad_norm": 5.920475628504475, + "learning_rate": 5.212453801921467e-06, + "loss": 17.2972, + "step": 27457 + }, + { + "epoch": 0.5019101760286617, + "grad_norm": 5.7450883295921695, + "learning_rate": 5.2121580573512456e-06, + "loss": 17.2121, + "step": 27458 + }, + { + "epoch": 0.5019284552251083, + "grad_norm": 6.129887306500934, + "learning_rate": 5.21186231203743e-06, + "loss": 17.3798, + "step": 27459 + }, + { + "epoch": 0.5019467344215548, + "grad_norm": 6.389799105005755, + "learning_rate": 5.2115665659810555e-06, + "loss": 17.4598, + "step": 27460 + }, + { + "epoch": 0.5019650136180014, + "grad_norm": 7.487780545050386, + "learning_rate": 5.211270819183159e-06, + "loss": 17.9528, + "step": 27461 + }, + { + "epoch": 0.5019832928144479, + "grad_norm": 6.545697454607325, + "learning_rate": 5.210975071644776e-06, + "loss": 17.2926, + "step": 27462 + }, + { + "epoch": 0.5020015720108943, + "grad_norm": 5.724842236748844, + "learning_rate": 5.210679323366943e-06, + "loss": 17.2105, + "step": 27463 + }, + { + "epoch": 0.5020198512073409, + "grad_norm": 5.7749935514895725, + "learning_rate": 5.210383574350698e-06, + "loss": 17.0421, + "step": 27464 + }, + { + "epoch": 0.5020381304037874, + "grad_norm": 5.319072944307387, + "learning_rate": 5.210087824597076e-06, + "loss": 16.9669, + "step": 27465 + }, + { + "epoch": 0.502056409600234, + "grad_norm": 6.916192023352253, + "learning_rate": 5.209792074107116e-06, + "loss": 17.661, + "step": 27466 + }, + { + "epoch": 0.5020746887966805, + "grad_norm": 6.52839170697784, + "learning_rate": 5.209496322881852e-06, + "loss": 17.5856, + "step": 27467 + }, + { + "epoch": 0.502092967993127, + "grad_norm": 5.7575835279806, + "learning_rate": 5.209200570922322e-06, + "loss": 17.2502, + "step": 27468 + }, + { + "epoch": 0.5021112471895736, + "grad_norm": 5.21937368770072, + "learning_rate": 5.208904818229561e-06, + "loss": 17.2321, + "step": 27469 + }, + { + "epoch": 0.50212952638602, + "grad_norm": 8.122590314842842, + "learning_rate": 5.2086090648046096e-06, + "loss": 18.061, + "step": 27470 + }, + { + "epoch": 0.5021478055824666, + "grad_norm": 5.781673897216666, + "learning_rate": 5.2083133106484986e-06, + "loss": 17.1307, + "step": 27471 + }, + { + "epoch": 0.5021660847789131, + "grad_norm": 6.9870143769618895, + "learning_rate": 5.208017555762268e-06, + "loss": 17.7875, + "step": 27472 + }, + { + "epoch": 0.5021843639753596, + "grad_norm": 6.684202125132718, + "learning_rate": 5.207721800146954e-06, + "loss": 17.4909, + "step": 27473 + }, + { + "epoch": 0.5022026431718062, + "grad_norm": 6.565595420450179, + "learning_rate": 5.2074260438035954e-06, + "loss": 17.5612, + "step": 27474 + }, + { + "epoch": 0.5022209223682527, + "grad_norm": 6.897436635906595, + "learning_rate": 5.207130286733224e-06, + "loss": 17.6166, + "step": 27475 + }, + { + "epoch": 0.5022392015646993, + "grad_norm": 6.051510521759804, + "learning_rate": 5.206834528936878e-06, + "loss": 17.5767, + "step": 27476 + }, + { + "epoch": 0.5022574807611457, + "grad_norm": 4.660172711916454, + "learning_rate": 5.206538770415598e-06, + "loss": 16.8226, + "step": 27477 + }, + { + "epoch": 0.5022757599575922, + "grad_norm": 5.502986954221638, + "learning_rate": 5.206243011170415e-06, + "loss": 17.023, + "step": 27478 + }, + { + "epoch": 0.5022940391540388, + "grad_norm": 5.978417104720057, + "learning_rate": 5.205947251202369e-06, + "loss": 17.5388, + "step": 27479 + }, + { + "epoch": 0.5023123183504853, + "grad_norm": 6.8248904751761765, + "learning_rate": 5.205651490512496e-06, + "loss": 17.613, + "step": 27480 + }, + { + "epoch": 0.5023305975469319, + "grad_norm": 4.8724367239266, + "learning_rate": 5.205355729101833e-06, + "loss": 17.0065, + "step": 27481 + }, + { + "epoch": 0.5023488767433784, + "grad_norm": 5.961520441985069, + "learning_rate": 5.205059966971415e-06, + "loss": 17.6608, + "step": 27482 + }, + { + "epoch": 0.5023671559398248, + "grad_norm": 5.884050484409216, + "learning_rate": 5.204764204122279e-06, + "loss": 17.1868, + "step": 27483 + }, + { + "epoch": 0.5023854351362714, + "grad_norm": 6.253560070917388, + "learning_rate": 5.2044684405554645e-06, + "loss": 17.4443, + "step": 27484 + }, + { + "epoch": 0.5024037143327179, + "grad_norm": 8.408789749844162, + "learning_rate": 5.204172676272003e-06, + "loss": 18.3138, + "step": 27485 + }, + { + "epoch": 0.5024219935291645, + "grad_norm": 6.558372316727946, + "learning_rate": 5.203876911272936e-06, + "loss": 17.6935, + "step": 27486 + }, + { + "epoch": 0.502440272725611, + "grad_norm": 6.158160751081932, + "learning_rate": 5.203581145559298e-06, + "loss": 17.2859, + "step": 27487 + }, + { + "epoch": 0.5024585519220575, + "grad_norm": 6.208609058868916, + "learning_rate": 5.2032853791321255e-06, + "loss": 17.3936, + "step": 27488 + }, + { + "epoch": 0.502476831118504, + "grad_norm": 7.848355301758456, + "learning_rate": 5.202989611992455e-06, + "loss": 17.9317, + "step": 27489 + }, + { + "epoch": 0.5024951103149505, + "grad_norm": 6.685016959708192, + "learning_rate": 5.202693844141322e-06, + "loss": 17.299, + "step": 27490 + }, + { + "epoch": 0.5025133895113971, + "grad_norm": 5.288702752183807, + "learning_rate": 5.202398075579767e-06, + "loss": 17.035, + "step": 27491 + }, + { + "epoch": 0.5025316687078436, + "grad_norm": 8.106273503780569, + "learning_rate": 5.202102306308825e-06, + "loss": 17.9173, + "step": 27492 + }, + { + "epoch": 0.5025499479042901, + "grad_norm": 6.0398167216408885, + "learning_rate": 5.2018065363295304e-06, + "loss": 17.4087, + "step": 27493 + }, + { + "epoch": 0.5025682271007367, + "grad_norm": 7.557718648557547, + "learning_rate": 5.201510765642922e-06, + "loss": 17.7971, + "step": 27494 + }, + { + "epoch": 0.5025865062971832, + "grad_norm": 6.054882255550186, + "learning_rate": 5.201214994250034e-06, + "loss": 17.3972, + "step": 27495 + }, + { + "epoch": 0.5026047854936297, + "grad_norm": 7.194471640590093, + "learning_rate": 5.200919222151908e-06, + "loss": 17.9227, + "step": 27496 + }, + { + "epoch": 0.5026230646900762, + "grad_norm": 7.479360695694727, + "learning_rate": 5.200623449349575e-06, + "loss": 18.0845, + "step": 27497 + }, + { + "epoch": 0.5026413438865227, + "grad_norm": 6.683254102833924, + "learning_rate": 5.200327675844076e-06, + "loss": 17.4318, + "step": 27498 + }, + { + "epoch": 0.5026596230829693, + "grad_norm": 6.748717926325936, + "learning_rate": 5.200031901636444e-06, + "loss": 17.6456, + "step": 27499 + }, + { + "epoch": 0.5026779022794158, + "grad_norm": 6.117365829160261, + "learning_rate": 5.199736126727719e-06, + "loss": 17.3438, + "step": 27500 + }, + { + "epoch": 0.5026961814758624, + "grad_norm": 6.642969667594831, + "learning_rate": 5.199440351118936e-06, + "loss": 17.4521, + "step": 27501 + }, + { + "epoch": 0.5027144606723088, + "grad_norm": 6.546327008119256, + "learning_rate": 5.199144574811132e-06, + "loss": 17.5044, + "step": 27502 + }, + { + "epoch": 0.5027327398687553, + "grad_norm": 5.349614452347511, + "learning_rate": 5.198848797805343e-06, + "loss": 16.9952, + "step": 27503 + }, + { + "epoch": 0.5027510190652019, + "grad_norm": 6.806322501820001, + "learning_rate": 5.198553020102606e-06, + "loss": 17.7437, + "step": 27504 + }, + { + "epoch": 0.5027692982616484, + "grad_norm": 6.504506895365117, + "learning_rate": 5.198257241703959e-06, + "loss": 17.6166, + "step": 27505 + }, + { + "epoch": 0.502787577458095, + "grad_norm": 6.8308968273401645, + "learning_rate": 5.1979614626104365e-06, + "loss": 17.7568, + "step": 27506 + }, + { + "epoch": 0.5028058566545415, + "grad_norm": 6.600709319039463, + "learning_rate": 5.197665682823076e-06, + "loss": 17.5428, + "step": 27507 + }, + { + "epoch": 0.502824135850988, + "grad_norm": 6.2470534123693895, + "learning_rate": 5.197369902342916e-06, + "loss": 17.3073, + "step": 27508 + }, + { + "epoch": 0.5028424150474345, + "grad_norm": 6.976116280348958, + "learning_rate": 5.197074121170991e-06, + "loss": 17.7634, + "step": 27509 + }, + { + "epoch": 0.502860694243881, + "grad_norm": 6.1875155221474625, + "learning_rate": 5.196778339308338e-06, + "loss": 17.3329, + "step": 27510 + }, + { + "epoch": 0.5028789734403276, + "grad_norm": 6.46088522994846, + "learning_rate": 5.196482556755994e-06, + "loss": 17.599, + "step": 27511 + }, + { + "epoch": 0.5028972526367741, + "grad_norm": 8.997033569978639, + "learning_rate": 5.196186773514995e-06, + "loss": 18.3507, + "step": 27512 + }, + { + "epoch": 0.5029155318332206, + "grad_norm": 5.922053130112032, + "learning_rate": 5.19589098958638e-06, + "loss": 17.4276, + "step": 27513 + }, + { + "epoch": 0.5029338110296672, + "grad_norm": 5.7468085657305625, + "learning_rate": 5.195595204971182e-06, + "loss": 17.3735, + "step": 27514 + }, + { + "epoch": 0.5029520902261136, + "grad_norm": 6.405522064361517, + "learning_rate": 5.195299419670442e-06, + "loss": 17.8147, + "step": 27515 + }, + { + "epoch": 0.5029703694225602, + "grad_norm": 7.037740565033126, + "learning_rate": 5.195003633685194e-06, + "loss": 17.6155, + "step": 27516 + }, + { + "epoch": 0.5029886486190067, + "grad_norm": 5.7461921632343325, + "learning_rate": 5.194707847016474e-06, + "loss": 17.2436, + "step": 27517 + }, + { + "epoch": 0.5030069278154532, + "grad_norm": 6.768562562646901, + "learning_rate": 5.19441205966532e-06, + "loss": 17.821, + "step": 27518 + }, + { + "epoch": 0.5030252070118998, + "grad_norm": 6.994355393123101, + "learning_rate": 5.194116271632769e-06, + "loss": 17.6248, + "step": 27519 + }, + { + "epoch": 0.5030434862083463, + "grad_norm": 5.945406372738656, + "learning_rate": 5.193820482919858e-06, + "loss": 17.2435, + "step": 27520 + }, + { + "epoch": 0.5030617654047929, + "grad_norm": 5.974668663293852, + "learning_rate": 5.193524693527623e-06, + "loss": 17.3625, + "step": 27521 + }, + { + "epoch": 0.5030800446012393, + "grad_norm": 6.9194982401268526, + "learning_rate": 5.1932289034571e-06, + "loss": 18.2391, + "step": 27522 + }, + { + "epoch": 0.5030983237976858, + "grad_norm": 7.315381405321791, + "learning_rate": 5.192933112709326e-06, + "loss": 17.9562, + "step": 27523 + }, + { + "epoch": 0.5031166029941324, + "grad_norm": 6.594120925526298, + "learning_rate": 5.1926373212853385e-06, + "loss": 17.5046, + "step": 27524 + }, + { + "epoch": 0.5031348821905789, + "grad_norm": 7.304940661604103, + "learning_rate": 5.192341529186175e-06, + "loss": 17.7933, + "step": 27525 + }, + { + "epoch": 0.5031531613870254, + "grad_norm": 5.483380648253276, + "learning_rate": 5.19204573641287e-06, + "loss": 17.1572, + "step": 27526 + }, + { + "epoch": 0.503171440583472, + "grad_norm": 6.890729723949929, + "learning_rate": 5.191749942966462e-06, + "loss": 17.8273, + "step": 27527 + }, + { + "epoch": 0.5031897197799184, + "grad_norm": 5.600074806366154, + "learning_rate": 5.191454148847986e-06, + "loss": 17.2443, + "step": 27528 + }, + { + "epoch": 0.503207998976365, + "grad_norm": 5.512225238396698, + "learning_rate": 5.191158354058482e-06, + "loss": 17.1353, + "step": 27529 + }, + { + "epoch": 0.5032262781728115, + "grad_norm": 6.097103260232165, + "learning_rate": 5.190862558598983e-06, + "loss": 17.3884, + "step": 27530 + }, + { + "epoch": 0.503244557369258, + "grad_norm": 5.2372114164909735, + "learning_rate": 5.190566762470527e-06, + "loss": 16.9618, + "step": 27531 + }, + { + "epoch": 0.5032628365657046, + "grad_norm": 7.533783967663297, + "learning_rate": 5.190270965674152e-06, + "loss": 18.1905, + "step": 27532 + }, + { + "epoch": 0.5032811157621511, + "grad_norm": 5.798360449902278, + "learning_rate": 5.189975168210893e-06, + "loss": 17.3989, + "step": 27533 + }, + { + "epoch": 0.5032993949585977, + "grad_norm": 6.640908870126543, + "learning_rate": 5.189679370081789e-06, + "loss": 17.4577, + "step": 27534 + }, + { + "epoch": 0.5033176741550441, + "grad_norm": 5.080109637563325, + "learning_rate": 5.189383571287872e-06, + "loss": 16.9935, + "step": 27535 + }, + { + "epoch": 0.5033359533514906, + "grad_norm": 6.912073437888923, + "learning_rate": 5.189087771830186e-06, + "loss": 17.6632, + "step": 27536 + }, + { + "epoch": 0.5033542325479372, + "grad_norm": 6.575914356282273, + "learning_rate": 5.188791971709761e-06, + "loss": 17.4028, + "step": 27537 + }, + { + "epoch": 0.5033725117443837, + "grad_norm": 5.540881853619029, + "learning_rate": 5.188496170927637e-06, + "loss": 17.2044, + "step": 27538 + }, + { + "epoch": 0.5033907909408303, + "grad_norm": 7.219808193319258, + "learning_rate": 5.1882003694848515e-06, + "loss": 17.7435, + "step": 27539 + }, + { + "epoch": 0.5034090701372768, + "grad_norm": 7.271662511375215, + "learning_rate": 5.187904567382439e-06, + "loss": 18.1021, + "step": 27540 + }, + { + "epoch": 0.5034273493337232, + "grad_norm": 6.789408787706083, + "learning_rate": 5.187608764621437e-06, + "loss": 17.6288, + "step": 27541 + }, + { + "epoch": 0.5034456285301698, + "grad_norm": 6.929807585433291, + "learning_rate": 5.187312961202882e-06, + "loss": 17.5508, + "step": 27542 + }, + { + "epoch": 0.5034639077266163, + "grad_norm": 5.757440779569821, + "learning_rate": 5.187017157127815e-06, + "loss": 17.2604, + "step": 27543 + }, + { + "epoch": 0.5034821869230629, + "grad_norm": 6.736364591735982, + "learning_rate": 5.186721352397265e-06, + "loss": 17.5449, + "step": 27544 + }, + { + "epoch": 0.5035004661195094, + "grad_norm": 5.680884503028648, + "learning_rate": 5.186425547012275e-06, + "loss": 17.0263, + "step": 27545 + }, + { + "epoch": 0.5035187453159559, + "grad_norm": 4.685044570264193, + "learning_rate": 5.18612974097388e-06, + "loss": 16.8804, + "step": 27546 + }, + { + "epoch": 0.5035370245124025, + "grad_norm": 7.224721385841569, + "learning_rate": 5.185833934283114e-06, + "loss": 17.9704, + "step": 27547 + }, + { + "epoch": 0.5035553037088489, + "grad_norm": 6.933162705042278, + "learning_rate": 5.185538126941019e-06, + "loss": 17.9275, + "step": 27548 + }, + { + "epoch": 0.5035735829052955, + "grad_norm": 5.864375061142779, + "learning_rate": 5.1852423189486256e-06, + "loss": 17.6488, + "step": 27549 + }, + { + "epoch": 0.503591862101742, + "grad_norm": 7.11665122116013, + "learning_rate": 5.184946510306977e-06, + "loss": 17.5661, + "step": 27550 + }, + { + "epoch": 0.5036101412981885, + "grad_norm": 6.304113181307043, + "learning_rate": 5.184650701017105e-06, + "loss": 17.5131, + "step": 27551 + }, + { + "epoch": 0.5036284204946351, + "grad_norm": 5.737618692572437, + "learning_rate": 5.18435489108005e-06, + "loss": 16.9711, + "step": 27552 + }, + { + "epoch": 0.5036466996910816, + "grad_norm": 5.700244822219272, + "learning_rate": 5.184059080496846e-06, + "loss": 17.1295, + "step": 27553 + }, + { + "epoch": 0.5036649788875281, + "grad_norm": 5.862370704788984, + "learning_rate": 5.183763269268531e-06, + "loss": 17.1625, + "step": 27554 + }, + { + "epoch": 0.5036832580839746, + "grad_norm": 5.909596797048763, + "learning_rate": 5.183467457396142e-06, + "loss": 17.1662, + "step": 27555 + }, + { + "epoch": 0.5037015372804211, + "grad_norm": 8.056279847860345, + "learning_rate": 5.183171644880714e-06, + "loss": 17.7257, + "step": 27556 + }, + { + "epoch": 0.5037198164768677, + "grad_norm": 6.257912896267873, + "learning_rate": 5.182875831723288e-06, + "loss": 17.5899, + "step": 27557 + }, + { + "epoch": 0.5037380956733142, + "grad_norm": 7.619659666477824, + "learning_rate": 5.1825800179248964e-06, + "loss": 17.6511, + "step": 27558 + }, + { + "epoch": 0.5037563748697608, + "grad_norm": 8.25092517858032, + "learning_rate": 5.182284203486577e-06, + "loss": 18.1767, + "step": 27559 + }, + { + "epoch": 0.5037746540662073, + "grad_norm": 5.824274287405943, + "learning_rate": 5.1819883884093705e-06, + "loss": 17.31, + "step": 27560 + }, + { + "epoch": 0.5037929332626537, + "grad_norm": 6.923456020475565, + "learning_rate": 5.181692572694308e-06, + "loss": 17.7274, + "step": 27561 + }, + { + "epoch": 0.5038112124591003, + "grad_norm": 7.702178478700586, + "learning_rate": 5.181396756342428e-06, + "loss": 17.6612, + "step": 27562 + }, + { + "epoch": 0.5038294916555468, + "grad_norm": 5.526224845757836, + "learning_rate": 5.18110093935477e-06, + "loss": 17.2442, + "step": 27563 + }, + { + "epoch": 0.5038477708519934, + "grad_norm": 5.925765686166306, + "learning_rate": 5.180805121732367e-06, + "loss": 17.246, + "step": 27564 + }, + { + "epoch": 0.5038660500484399, + "grad_norm": 6.314698857761973, + "learning_rate": 5.18050930347626e-06, + "loss": 17.4263, + "step": 27565 + }, + { + "epoch": 0.5038843292448864, + "grad_norm": 6.400760567150828, + "learning_rate": 5.180213484587482e-06, + "loss": 17.4009, + "step": 27566 + }, + { + "epoch": 0.5039026084413329, + "grad_norm": 6.066075711125857, + "learning_rate": 5.179917665067072e-06, + "loss": 17.5616, + "step": 27567 + }, + { + "epoch": 0.5039208876377794, + "grad_norm": 6.74359468662774, + "learning_rate": 5.179621844916065e-06, + "loss": 17.6614, + "step": 27568 + }, + { + "epoch": 0.503939166834226, + "grad_norm": 6.5324995219543736, + "learning_rate": 5.1793260241355e-06, + "loss": 17.2444, + "step": 27569 + }, + { + "epoch": 0.5039574460306725, + "grad_norm": 6.916770101727999, + "learning_rate": 5.179030202726414e-06, + "loss": 18.1773, + "step": 27570 + }, + { + "epoch": 0.503975725227119, + "grad_norm": 5.983715099934033, + "learning_rate": 5.17873438068984e-06, + "loss": 17.2402, + "step": 27571 + }, + { + "epoch": 0.5039940044235656, + "grad_norm": 5.256466208380804, + "learning_rate": 5.178438558026819e-06, + "loss": 17.3271, + "step": 27572 + }, + { + "epoch": 0.504012283620012, + "grad_norm": 4.865510599169104, + "learning_rate": 5.178142734738386e-06, + "loss": 16.9278, + "step": 27573 + }, + { + "epoch": 0.5040305628164586, + "grad_norm": 6.009582623410873, + "learning_rate": 5.17784691082558e-06, + "loss": 17.3797, + "step": 27574 + }, + { + "epoch": 0.5040488420129051, + "grad_norm": 6.029082880362614, + "learning_rate": 5.177551086289434e-06, + "loss": 17.3101, + "step": 27575 + }, + { + "epoch": 0.5040671212093516, + "grad_norm": 6.645481189205907, + "learning_rate": 5.177255261130987e-06, + "loss": 17.4678, + "step": 27576 + }, + { + "epoch": 0.5040854004057982, + "grad_norm": 6.868423411372092, + "learning_rate": 5.1769594353512765e-06, + "loss": 17.8212, + "step": 27577 + }, + { + "epoch": 0.5041036796022447, + "grad_norm": 6.7974656322543865, + "learning_rate": 5.1766636089513375e-06, + "loss": 17.5799, + "step": 27578 + }, + { + "epoch": 0.5041219587986913, + "grad_norm": 9.202097818861546, + "learning_rate": 5.176367781932209e-06, + "loss": 17.9019, + "step": 27579 + }, + { + "epoch": 0.5041402379951377, + "grad_norm": 6.546695558542694, + "learning_rate": 5.176071954294926e-06, + "loss": 17.2939, + "step": 27580 + }, + { + "epoch": 0.5041585171915842, + "grad_norm": 4.114361230779656, + "learning_rate": 5.175776126040526e-06, + "loss": 16.5922, + "step": 27581 + }, + { + "epoch": 0.5041767963880308, + "grad_norm": 5.446068971978248, + "learning_rate": 5.175480297170047e-06, + "loss": 17.1501, + "step": 27582 + }, + { + "epoch": 0.5041950755844773, + "grad_norm": 6.528465725908221, + "learning_rate": 5.1751844676845234e-06, + "loss": 17.6874, + "step": 27583 + }, + { + "epoch": 0.5042133547809239, + "grad_norm": 6.263069703194509, + "learning_rate": 5.174888637584995e-06, + "loss": 17.3593, + "step": 27584 + }, + { + "epoch": 0.5042316339773704, + "grad_norm": 7.089100425270701, + "learning_rate": 5.174592806872495e-06, + "loss": 17.7325, + "step": 27585 + }, + { + "epoch": 0.5042499131738168, + "grad_norm": 6.952406788040247, + "learning_rate": 5.174296975548063e-06, + "loss": 17.8632, + "step": 27586 + }, + { + "epoch": 0.5042681923702634, + "grad_norm": 6.702582100217042, + "learning_rate": 5.1740011436127355e-06, + "loss": 17.5542, + "step": 27587 + }, + { + "epoch": 0.5042864715667099, + "grad_norm": 6.415982961407396, + "learning_rate": 5.1737053110675505e-06, + "loss": 17.8005, + "step": 27588 + }, + { + "epoch": 0.5043047507631565, + "grad_norm": 6.657096792187528, + "learning_rate": 5.173409477913543e-06, + "loss": 17.6757, + "step": 27589 + }, + { + "epoch": 0.504323029959603, + "grad_norm": 7.168338115829753, + "learning_rate": 5.173113644151748e-06, + "loss": 17.7773, + "step": 27590 + }, + { + "epoch": 0.5043413091560495, + "grad_norm": 4.892969233631025, + "learning_rate": 5.172817809783207e-06, + "loss": 16.8434, + "step": 27591 + }, + { + "epoch": 0.5043595883524961, + "grad_norm": 5.473425059303431, + "learning_rate": 5.172521974808954e-06, + "loss": 17.2269, + "step": 27592 + }, + { + "epoch": 0.5043778675489425, + "grad_norm": 6.62752555391626, + "learning_rate": 5.1722261392300265e-06, + "loss": 17.6366, + "step": 27593 + }, + { + "epoch": 0.504396146745389, + "grad_norm": 5.984820671045974, + "learning_rate": 5.171930303047461e-06, + "loss": 17.1321, + "step": 27594 + }, + { + "epoch": 0.5044144259418356, + "grad_norm": 7.448722269027972, + "learning_rate": 5.171634466262294e-06, + "loss": 17.9497, + "step": 27595 + }, + { + "epoch": 0.5044327051382821, + "grad_norm": 5.5853500569532955, + "learning_rate": 5.171338628875564e-06, + "loss": 17.2253, + "step": 27596 + }, + { + "epoch": 0.5044509843347287, + "grad_norm": 6.771563707319088, + "learning_rate": 5.171042790888308e-06, + "loss": 17.549, + "step": 27597 + }, + { + "epoch": 0.5044692635311752, + "grad_norm": 5.871899978894757, + "learning_rate": 5.170746952301559e-06, + "loss": 17.4473, + "step": 27598 + }, + { + "epoch": 0.5044875427276216, + "grad_norm": 6.274713350608566, + "learning_rate": 5.170451113116359e-06, + "loss": 17.7429, + "step": 27599 + }, + { + "epoch": 0.5045058219240682, + "grad_norm": 7.110533040292356, + "learning_rate": 5.170155273333743e-06, + "loss": 17.9841, + "step": 27600 + }, + { + "epoch": 0.5045241011205147, + "grad_norm": 7.803695683487732, + "learning_rate": 5.169859432954747e-06, + "loss": 18.2242, + "step": 27601 + }, + { + "epoch": 0.5045423803169613, + "grad_norm": 5.916541655408195, + "learning_rate": 5.169563591980409e-06, + "loss": 17.0129, + "step": 27602 + }, + { + "epoch": 0.5045606595134078, + "grad_norm": 6.182438632464134, + "learning_rate": 5.169267750411763e-06, + "loss": 17.5817, + "step": 27603 + }, + { + "epoch": 0.5045789387098543, + "grad_norm": 5.900076900146689, + "learning_rate": 5.1689719082498494e-06, + "loss": 17.128, + "step": 27604 + }, + { + "epoch": 0.5045972179063009, + "grad_norm": 7.172817506058974, + "learning_rate": 5.168676065495705e-06, + "loss": 17.5771, + "step": 27605 + }, + { + "epoch": 0.5046154971027473, + "grad_norm": 7.6274939751249615, + "learning_rate": 5.168380222150364e-06, + "loss": 17.8769, + "step": 27606 + }, + { + "epoch": 0.5046337762991939, + "grad_norm": 5.404809349830576, + "learning_rate": 5.1680843782148656e-06, + "loss": 17.0407, + "step": 27607 + }, + { + "epoch": 0.5046520554956404, + "grad_norm": 7.394237278623086, + "learning_rate": 5.167788533690247e-06, + "loss": 17.8788, + "step": 27608 + }, + { + "epoch": 0.5046703346920869, + "grad_norm": 7.492467063448779, + "learning_rate": 5.167492688577543e-06, + "loss": 17.9748, + "step": 27609 + }, + { + "epoch": 0.5046886138885335, + "grad_norm": 6.8790718888237, + "learning_rate": 5.167196842877792e-06, + "loss": 17.5047, + "step": 27610 + }, + { + "epoch": 0.50470689308498, + "grad_norm": 6.597736836157785, + "learning_rate": 5.1669009965920305e-06, + "loss": 17.8067, + "step": 27611 + }, + { + "epoch": 0.5047251722814265, + "grad_norm": 5.16417093993006, + "learning_rate": 5.166605149721296e-06, + "loss": 16.864, + "step": 27612 + }, + { + "epoch": 0.504743451477873, + "grad_norm": 6.790951932401917, + "learning_rate": 5.166309302266624e-06, + "loss": 17.5408, + "step": 27613 + }, + { + "epoch": 0.5047617306743195, + "grad_norm": 7.320218145959628, + "learning_rate": 5.166013454229053e-06, + "loss": 17.9132, + "step": 27614 + }, + { + "epoch": 0.5047800098707661, + "grad_norm": 5.540957011762305, + "learning_rate": 5.16571760560962e-06, + "loss": 17.3871, + "step": 27615 + }, + { + "epoch": 0.5047982890672126, + "grad_norm": 7.778922745046559, + "learning_rate": 5.16542175640936e-06, + "loss": 18.0185, + "step": 27616 + }, + { + "epoch": 0.5048165682636592, + "grad_norm": 6.193097886338422, + "learning_rate": 5.16512590662931e-06, + "loss": 17.3566, + "step": 27617 + }, + { + "epoch": 0.5048348474601057, + "grad_norm": 5.811583558003684, + "learning_rate": 5.164830056270509e-06, + "loss": 17.2262, + "step": 27618 + }, + { + "epoch": 0.5048531266565521, + "grad_norm": 7.898633495034506, + "learning_rate": 5.164534205333995e-06, + "loss": 18.3214, + "step": 27619 + }, + { + "epoch": 0.5048714058529987, + "grad_norm": 8.030837492772608, + "learning_rate": 5.1642383538208005e-06, + "loss": 18.3775, + "step": 27620 + }, + { + "epoch": 0.5048896850494452, + "grad_norm": 5.723929271515542, + "learning_rate": 5.163942501731966e-06, + "loss": 17.4789, + "step": 27621 + }, + { + "epoch": 0.5049079642458918, + "grad_norm": 5.230468112841281, + "learning_rate": 5.163646649068527e-06, + "loss": 16.9984, + "step": 27622 + }, + { + "epoch": 0.5049262434423383, + "grad_norm": 6.128025454370273, + "learning_rate": 5.16335079583152e-06, + "loss": 17.399, + "step": 27623 + }, + { + "epoch": 0.5049445226387848, + "grad_norm": 5.866331204761387, + "learning_rate": 5.163054942021983e-06, + "loss": 17.6067, + "step": 27624 + }, + { + "epoch": 0.5049628018352313, + "grad_norm": 5.760854215968586, + "learning_rate": 5.162759087640953e-06, + "loss": 17.3237, + "step": 27625 + }, + { + "epoch": 0.5049810810316778, + "grad_norm": 7.015165463723453, + "learning_rate": 5.162463232689465e-06, + "loss": 18.1369, + "step": 27626 + }, + { + "epoch": 0.5049993602281244, + "grad_norm": 6.416469001773678, + "learning_rate": 5.162167377168559e-06, + "loss": 17.7597, + "step": 27627 + }, + { + "epoch": 0.5050176394245709, + "grad_norm": 6.994677546386976, + "learning_rate": 5.1618715210792704e-06, + "loss": 18.0055, + "step": 27628 + }, + { + "epoch": 0.5050359186210174, + "grad_norm": 7.063157435887147, + "learning_rate": 5.161575664422637e-06, + "loss": 17.8699, + "step": 27629 + }, + { + "epoch": 0.505054197817464, + "grad_norm": 7.2920732637401215, + "learning_rate": 5.161279807199692e-06, + "loss": 18.198, + "step": 27630 + }, + { + "epoch": 0.5050724770139104, + "grad_norm": 5.055173979027419, + "learning_rate": 5.160983949411478e-06, + "loss": 16.9205, + "step": 27631 + }, + { + "epoch": 0.505090756210357, + "grad_norm": 5.647088359036415, + "learning_rate": 5.1606880910590285e-06, + "loss": 17.2178, + "step": 27632 + }, + { + "epoch": 0.5051090354068035, + "grad_norm": 5.928155576463627, + "learning_rate": 5.160392232143381e-06, + "loss": 17.5289, + "step": 27633 + }, + { + "epoch": 0.50512731460325, + "grad_norm": 6.396122099132778, + "learning_rate": 5.160096372665573e-06, + "loss": 17.3092, + "step": 27634 + }, + { + "epoch": 0.5051455937996966, + "grad_norm": 5.2811810268772446, + "learning_rate": 5.1598005126266395e-06, + "loss": 17.0258, + "step": 27635 + }, + { + "epoch": 0.5051638729961431, + "grad_norm": 6.033285483416313, + "learning_rate": 5.159504652027621e-06, + "loss": 17.4595, + "step": 27636 + }, + { + "epoch": 0.5051821521925897, + "grad_norm": 6.461835547461277, + "learning_rate": 5.159208790869552e-06, + "loss": 17.6768, + "step": 27637 + }, + { + "epoch": 0.5052004313890361, + "grad_norm": 6.103894956407447, + "learning_rate": 5.158912929153469e-06, + "loss": 17.0627, + "step": 27638 + }, + { + "epoch": 0.5052187105854826, + "grad_norm": 5.478906843384525, + "learning_rate": 5.158617066880411e-06, + "loss": 16.9255, + "step": 27639 + }, + { + "epoch": 0.5052369897819292, + "grad_norm": 7.640017289504977, + "learning_rate": 5.158321204051414e-06, + "loss": 17.9303, + "step": 27640 + }, + { + "epoch": 0.5052552689783757, + "grad_norm": 8.514597003385521, + "learning_rate": 5.158025340667514e-06, + "loss": 18.0154, + "step": 27641 + }, + { + "epoch": 0.5052735481748223, + "grad_norm": 5.810975657165897, + "learning_rate": 5.157729476729749e-06, + "loss": 17.0998, + "step": 27642 + }, + { + "epoch": 0.5052918273712688, + "grad_norm": 5.553983838062384, + "learning_rate": 5.1574336122391575e-06, + "loss": 17.1545, + "step": 27643 + }, + { + "epoch": 0.5053101065677152, + "grad_norm": 5.185153497954882, + "learning_rate": 5.157137747196773e-06, + "loss": 17.1105, + "step": 27644 + }, + { + "epoch": 0.5053283857641618, + "grad_norm": 7.312979692958259, + "learning_rate": 5.156841881603635e-06, + "loss": 17.5566, + "step": 27645 + }, + { + "epoch": 0.5053466649606083, + "grad_norm": 7.308497903257434, + "learning_rate": 5.156546015460782e-06, + "loss": 18.2654, + "step": 27646 + }, + { + "epoch": 0.5053649441570549, + "grad_norm": 6.652847675639543, + "learning_rate": 5.156250148769247e-06, + "loss": 17.6146, + "step": 27647 + }, + { + "epoch": 0.5053832233535014, + "grad_norm": 6.041361608402879, + "learning_rate": 5.155954281530066e-06, + "loss": 17.3519, + "step": 27648 + }, + { + "epoch": 0.5054015025499479, + "grad_norm": 6.277524567147896, + "learning_rate": 5.155658413744281e-06, + "loss": 17.147, + "step": 27649 + }, + { + "epoch": 0.5054197817463945, + "grad_norm": 5.30187731898524, + "learning_rate": 5.155362545412928e-06, + "loss": 17.1637, + "step": 27650 + }, + { + "epoch": 0.5054380609428409, + "grad_norm": 6.06369297579374, + "learning_rate": 5.1550666765370416e-06, + "loss": 17.3441, + "step": 27651 + }, + { + "epoch": 0.5054563401392875, + "grad_norm": 6.125803243534805, + "learning_rate": 5.15477080711766e-06, + "loss": 17.4234, + "step": 27652 + }, + { + "epoch": 0.505474619335734, + "grad_norm": 6.902841414799843, + "learning_rate": 5.1544749371558214e-06, + "loss": 17.7937, + "step": 27653 + }, + { + "epoch": 0.5054928985321805, + "grad_norm": 9.121673690437543, + "learning_rate": 5.154179066652559e-06, + "loss": 17.8539, + "step": 27654 + }, + { + "epoch": 0.5055111777286271, + "grad_norm": 5.513348411451003, + "learning_rate": 5.153883195608914e-06, + "loss": 17.23, + "step": 27655 + }, + { + "epoch": 0.5055294569250736, + "grad_norm": 7.644708440959603, + "learning_rate": 5.153587324025921e-06, + "loss": 18.2627, + "step": 27656 + }, + { + "epoch": 0.5055477361215202, + "grad_norm": 5.872963130275385, + "learning_rate": 5.153291451904621e-06, + "loss": 17.2168, + "step": 27657 + }, + { + "epoch": 0.5055660153179666, + "grad_norm": 6.047861589930542, + "learning_rate": 5.1529955792460425e-06, + "loss": 17.3844, + "step": 27658 + }, + { + "epoch": 0.5055842945144131, + "grad_norm": 6.697298712603379, + "learning_rate": 5.1526997060512305e-06, + "loss": 17.7653, + "step": 27659 + }, + { + "epoch": 0.5056025737108597, + "grad_norm": 5.831644306619482, + "learning_rate": 5.1524038323212215e-06, + "loss": 17.4758, + "step": 27660 + }, + { + "epoch": 0.5056208529073062, + "grad_norm": 7.811983398180582, + "learning_rate": 5.1521079580570464e-06, + "loss": 17.9186, + "step": 27661 + }, + { + "epoch": 0.5056391321037527, + "grad_norm": 8.365875514911632, + "learning_rate": 5.151812083259747e-06, + "loss": 18.0911, + "step": 27662 + }, + { + "epoch": 0.5056574113001993, + "grad_norm": 7.142597196886949, + "learning_rate": 5.151516207930361e-06, + "loss": 17.9224, + "step": 27663 + }, + { + "epoch": 0.5056756904966457, + "grad_norm": 7.144160869364189, + "learning_rate": 5.151220332069923e-06, + "loss": 17.8455, + "step": 27664 + }, + { + "epoch": 0.5056939696930923, + "grad_norm": 5.225125562203611, + "learning_rate": 5.15092445567947e-06, + "loss": 17.0537, + "step": 27665 + }, + { + "epoch": 0.5057122488895388, + "grad_norm": 6.164546195115452, + "learning_rate": 5.1506285787600405e-06, + "loss": 17.3785, + "step": 27666 + }, + { + "epoch": 0.5057305280859853, + "grad_norm": 5.844485109282541, + "learning_rate": 5.150332701312672e-06, + "loss": 17.5462, + "step": 27667 + }, + { + "epoch": 0.5057488072824319, + "grad_norm": 5.7195059066051215, + "learning_rate": 5.150036823338399e-06, + "loss": 17.2419, + "step": 27668 + }, + { + "epoch": 0.5057670864788784, + "grad_norm": 7.8681767607495745, + "learning_rate": 5.1497409448382605e-06, + "loss": 17.7197, + "step": 27669 + }, + { + "epoch": 0.505785365675325, + "grad_norm": 3.9105518559744006, + "learning_rate": 5.149445065813294e-06, + "loss": 16.5977, + "step": 27670 + }, + { + "epoch": 0.5058036448717714, + "grad_norm": 6.170131995520045, + "learning_rate": 5.1491491862645325e-06, + "loss": 17.2897, + "step": 27671 + }, + { + "epoch": 0.5058219240682179, + "grad_norm": 5.1358430281321175, + "learning_rate": 5.148853306193018e-06, + "loss": 17.7214, + "step": 27672 + }, + { + "epoch": 0.5058402032646645, + "grad_norm": 5.856023897165747, + "learning_rate": 5.148557425599786e-06, + "loss": 17.4302, + "step": 27673 + }, + { + "epoch": 0.505858482461111, + "grad_norm": 6.09834731008439, + "learning_rate": 5.148261544485873e-06, + "loss": 17.6248, + "step": 27674 + }, + { + "epoch": 0.5058767616575576, + "grad_norm": 5.32953497411772, + "learning_rate": 5.1479656628523166e-06, + "loss": 16.9663, + "step": 27675 + }, + { + "epoch": 0.505895040854004, + "grad_norm": 5.791288210493317, + "learning_rate": 5.147669780700151e-06, + "loss": 17.4501, + "step": 27676 + }, + { + "epoch": 0.5059133200504505, + "grad_norm": 4.674975822255367, + "learning_rate": 5.147373898030419e-06, + "loss": 17.5492, + "step": 27677 + }, + { + "epoch": 0.5059315992468971, + "grad_norm": 6.4795540493638315, + "learning_rate": 5.147078014844152e-06, + "loss": 17.4952, + "step": 27678 + }, + { + "epoch": 0.5059498784433436, + "grad_norm": 5.87615287630539, + "learning_rate": 5.14678213114239e-06, + "loss": 17.3138, + "step": 27679 + }, + { + "epoch": 0.5059681576397902, + "grad_norm": 6.823351620249338, + "learning_rate": 5.146486246926169e-06, + "loss": 17.7274, + "step": 27680 + }, + { + "epoch": 0.5059864368362367, + "grad_norm": 6.636215937774113, + "learning_rate": 5.1461903621965256e-06, + "loss": 17.8749, + "step": 27681 + }, + { + "epoch": 0.5060047160326832, + "grad_norm": 5.570020480201589, + "learning_rate": 5.145894476954499e-06, + "loss": 17.1821, + "step": 27682 + }, + { + "epoch": 0.5060229952291297, + "grad_norm": 6.308932897503492, + "learning_rate": 5.145598591201124e-06, + "loss": 17.7802, + "step": 27683 + }, + { + "epoch": 0.5060412744255762, + "grad_norm": 7.644739061584473, + "learning_rate": 5.14530270493744e-06, + "loss": 17.6652, + "step": 27684 + }, + { + "epoch": 0.5060595536220228, + "grad_norm": 5.5404416303762725, + "learning_rate": 5.145006818164482e-06, + "loss": 17.2558, + "step": 27685 + }, + { + "epoch": 0.5060778328184693, + "grad_norm": 5.951095898646831, + "learning_rate": 5.1447109308832865e-06, + "loss": 17.2144, + "step": 27686 + }, + { + "epoch": 0.5060961120149158, + "grad_norm": 5.955982864622808, + "learning_rate": 5.144415043094892e-06, + "loss": 17.585, + "step": 27687 + }, + { + "epoch": 0.5061143912113624, + "grad_norm": 6.180952227977285, + "learning_rate": 5.144119154800338e-06, + "loss": 17.5469, + "step": 27688 + }, + { + "epoch": 0.5061326704078088, + "grad_norm": 7.116905280800295, + "learning_rate": 5.143823266000657e-06, + "loss": 17.9486, + "step": 27689 + }, + { + "epoch": 0.5061509496042554, + "grad_norm": 6.0854189530902625, + "learning_rate": 5.143527376696886e-06, + "loss": 17.3287, + "step": 27690 + }, + { + "epoch": 0.5061692288007019, + "grad_norm": 5.9056177128777305, + "learning_rate": 5.1432314868900675e-06, + "loss": 17.273, + "step": 27691 + }, + { + "epoch": 0.5061875079971484, + "grad_norm": 7.1257202352464475, + "learning_rate": 5.1429355965812335e-06, + "loss": 17.7816, + "step": 27692 + }, + { + "epoch": 0.506205787193595, + "grad_norm": 4.9526104789537975, + "learning_rate": 5.142639705771422e-06, + "loss": 16.9168, + "step": 27693 + }, + { + "epoch": 0.5062240663900415, + "grad_norm": 6.925540441649807, + "learning_rate": 5.142343814461671e-06, + "loss": 18.026, + "step": 27694 + }, + { + "epoch": 0.5062423455864881, + "grad_norm": 5.057242785752675, + "learning_rate": 5.1420479226530176e-06, + "loss": 16.9973, + "step": 27695 + }, + { + "epoch": 0.5062606247829345, + "grad_norm": 5.298186320413527, + "learning_rate": 5.141752030346499e-06, + "loss": 17.0309, + "step": 27696 + }, + { + "epoch": 0.506278903979381, + "grad_norm": 6.590194470556828, + "learning_rate": 5.141456137543151e-06, + "loss": 17.6833, + "step": 27697 + }, + { + "epoch": 0.5062971831758276, + "grad_norm": 6.984955925175951, + "learning_rate": 5.141160244244011e-06, + "loss": 17.9011, + "step": 27698 + }, + { + "epoch": 0.5063154623722741, + "grad_norm": 4.831846667844356, + "learning_rate": 5.140864350450117e-06, + "loss": 16.9374, + "step": 27699 + }, + { + "epoch": 0.5063337415687207, + "grad_norm": 6.681990776830355, + "learning_rate": 5.140568456162507e-06, + "loss": 17.773, + "step": 27700 + }, + { + "epoch": 0.5063520207651672, + "grad_norm": 7.553439780878484, + "learning_rate": 5.1402725613822165e-06, + "loss": 18.0232, + "step": 27701 + }, + { + "epoch": 0.5063702999616136, + "grad_norm": 7.438364030658903, + "learning_rate": 5.139976666110283e-06, + "loss": 17.9461, + "step": 27702 + }, + { + "epoch": 0.5063885791580602, + "grad_norm": 5.842131301653922, + "learning_rate": 5.139680770347741e-06, + "loss": 17.4337, + "step": 27703 + }, + { + "epoch": 0.5064068583545067, + "grad_norm": 6.235689119762629, + "learning_rate": 5.139384874095631e-06, + "loss": 17.5769, + "step": 27704 + }, + { + "epoch": 0.5064251375509533, + "grad_norm": 6.5957393003374305, + "learning_rate": 5.139088977354991e-06, + "loss": 17.4528, + "step": 27705 + }, + { + "epoch": 0.5064434167473998, + "grad_norm": 8.876219451068485, + "learning_rate": 5.138793080126855e-06, + "loss": 17.9884, + "step": 27706 + }, + { + "epoch": 0.5064616959438463, + "grad_norm": 6.536162880783229, + "learning_rate": 5.138497182412261e-06, + "loss": 17.8436, + "step": 27707 + }, + { + "epoch": 0.5064799751402929, + "grad_norm": 5.346922379373697, + "learning_rate": 5.138201284212246e-06, + "loss": 17.1215, + "step": 27708 + }, + { + "epoch": 0.5064982543367393, + "grad_norm": 6.920789085254473, + "learning_rate": 5.13790538552785e-06, + "loss": 18.0832, + "step": 27709 + }, + { + "epoch": 0.5065165335331859, + "grad_norm": 5.38868080758509, + "learning_rate": 5.137609486360105e-06, + "loss": 17.1127, + "step": 27710 + }, + { + "epoch": 0.5065348127296324, + "grad_norm": 7.3354816946597525, + "learning_rate": 5.137313586710051e-06, + "loss": 17.5267, + "step": 27711 + }, + { + "epoch": 0.5065530919260789, + "grad_norm": 5.713671445638332, + "learning_rate": 5.137017686578724e-06, + "loss": 17.0097, + "step": 27712 + }, + { + "epoch": 0.5065713711225255, + "grad_norm": 7.486510099193616, + "learning_rate": 5.136721785967165e-06, + "loss": 17.7021, + "step": 27713 + }, + { + "epoch": 0.506589650318972, + "grad_norm": 5.746439996288859, + "learning_rate": 5.136425884876405e-06, + "loss": 17.2224, + "step": 27714 + }, + { + "epoch": 0.5066079295154186, + "grad_norm": 7.3379061579999965, + "learning_rate": 5.136129983307486e-06, + "loss": 17.9784, + "step": 27715 + }, + { + "epoch": 0.506626208711865, + "grad_norm": 6.008121010187761, + "learning_rate": 5.135834081261443e-06, + "loss": 17.6975, + "step": 27716 + }, + { + "epoch": 0.5066444879083115, + "grad_norm": 7.21445062747721, + "learning_rate": 5.135538178739311e-06, + "loss": 17.6537, + "step": 27717 + }, + { + "epoch": 0.5066627671047581, + "grad_norm": 5.362687134513164, + "learning_rate": 5.135242275742132e-06, + "loss": 17.1599, + "step": 27718 + }, + { + "epoch": 0.5066810463012046, + "grad_norm": 7.813650131621309, + "learning_rate": 5.13494637227094e-06, + "loss": 18.3504, + "step": 27719 + }, + { + "epoch": 0.5066993254976512, + "grad_norm": 6.092689226364962, + "learning_rate": 5.134650468326773e-06, + "loss": 17.5533, + "step": 27720 + }, + { + "epoch": 0.5067176046940977, + "grad_norm": 7.587971222214896, + "learning_rate": 5.134354563910667e-06, + "loss": 18.0693, + "step": 27721 + }, + { + "epoch": 0.5067358838905441, + "grad_norm": 6.330113621846811, + "learning_rate": 5.134058659023661e-06, + "loss": 17.2376, + "step": 27722 + }, + { + "epoch": 0.5067541630869907, + "grad_norm": 6.32979980405475, + "learning_rate": 5.133762753666789e-06, + "loss": 17.7372, + "step": 27723 + }, + { + "epoch": 0.5067724422834372, + "grad_norm": 5.110404489032517, + "learning_rate": 5.1334668478410925e-06, + "loss": 16.9661, + "step": 27724 + }, + { + "epoch": 0.5067907214798838, + "grad_norm": 6.623456977007643, + "learning_rate": 5.133170941547604e-06, + "loss": 17.2745, + "step": 27725 + }, + { + "epoch": 0.5068090006763303, + "grad_norm": 8.008242050881773, + "learning_rate": 5.132875034787365e-06, + "loss": 18.2314, + "step": 27726 + }, + { + "epoch": 0.5068272798727768, + "grad_norm": 5.7253805568384895, + "learning_rate": 5.132579127561409e-06, + "loss": 17.1657, + "step": 27727 + }, + { + "epoch": 0.5068455590692234, + "grad_norm": 9.219274905737294, + "learning_rate": 5.132283219870775e-06, + "loss": 18.0161, + "step": 27728 + }, + { + "epoch": 0.5068638382656698, + "grad_norm": 7.70803399074323, + "learning_rate": 5.1319873117165005e-06, + "loss": 17.7997, + "step": 27729 + }, + { + "epoch": 0.5068821174621163, + "grad_norm": 6.875985284577539, + "learning_rate": 5.131691403099621e-06, + "loss": 17.5223, + "step": 27730 + }, + { + "epoch": 0.5069003966585629, + "grad_norm": 5.026856926452195, + "learning_rate": 5.1313954940211755e-06, + "loss": 17.085, + "step": 27731 + }, + { + "epoch": 0.5069186758550094, + "grad_norm": 6.312322182507129, + "learning_rate": 5.1310995844822e-06, + "loss": 17.629, + "step": 27732 + }, + { + "epoch": 0.506936955051456, + "grad_norm": 5.544649978169023, + "learning_rate": 5.130803674483732e-06, + "loss": 17.2875, + "step": 27733 + }, + { + "epoch": 0.5069552342479025, + "grad_norm": 6.679378183058156, + "learning_rate": 5.130507764026808e-06, + "loss": 17.8269, + "step": 27734 + }, + { + "epoch": 0.5069735134443489, + "grad_norm": 6.886195116724657, + "learning_rate": 5.130211853112463e-06, + "loss": 17.8946, + "step": 27735 + }, + { + "epoch": 0.5069917926407955, + "grad_norm": 5.442584452922059, + "learning_rate": 5.129915941741741e-06, + "loss": 17.1328, + "step": 27736 + }, + { + "epoch": 0.507010071837242, + "grad_norm": 6.613305630433648, + "learning_rate": 5.129620029915674e-06, + "loss": 17.8886, + "step": 27737 + }, + { + "epoch": 0.5070283510336886, + "grad_norm": 6.267162436401069, + "learning_rate": 5.1293241176353e-06, + "loss": 17.4957, + "step": 27738 + }, + { + "epoch": 0.5070466302301351, + "grad_norm": 5.707869374733119, + "learning_rate": 5.129028204901654e-06, + "loss": 17.1091, + "step": 27739 + }, + { + "epoch": 0.5070649094265816, + "grad_norm": 6.481475256922889, + "learning_rate": 5.128732291715777e-06, + "loss": 17.533, + "step": 27740 + }, + { + "epoch": 0.5070831886230281, + "grad_norm": 5.59205507786949, + "learning_rate": 5.128436378078704e-06, + "loss": 17.2637, + "step": 27741 + }, + { + "epoch": 0.5071014678194746, + "grad_norm": 8.04507234687692, + "learning_rate": 5.128140463991473e-06, + "loss": 18.3353, + "step": 27742 + }, + { + "epoch": 0.5071197470159212, + "grad_norm": 5.860938477511177, + "learning_rate": 5.127844549455122e-06, + "loss": 17.3962, + "step": 27743 + }, + { + "epoch": 0.5071380262123677, + "grad_norm": 6.096136693855123, + "learning_rate": 5.127548634470685e-06, + "loss": 17.3681, + "step": 27744 + }, + { + "epoch": 0.5071563054088142, + "grad_norm": 6.991027932920232, + "learning_rate": 5.127252719039202e-06, + "loss": 17.618, + "step": 27745 + }, + { + "epoch": 0.5071745846052608, + "grad_norm": 6.212866849705913, + "learning_rate": 5.126956803161709e-06, + "loss": 17.4926, + "step": 27746 + }, + { + "epoch": 0.5071928638017072, + "grad_norm": 5.892368860593251, + "learning_rate": 5.126660886839244e-06, + "loss": 17.3287, + "step": 27747 + }, + { + "epoch": 0.5072111429981538, + "grad_norm": 7.190833850718565, + "learning_rate": 5.126364970072843e-06, + "loss": 17.5176, + "step": 27748 + }, + { + "epoch": 0.5072294221946003, + "grad_norm": 7.364418141602431, + "learning_rate": 5.126069052863542e-06, + "loss": 17.8325, + "step": 27749 + }, + { + "epoch": 0.5072477013910468, + "grad_norm": 8.195818727530257, + "learning_rate": 5.125773135212383e-06, + "loss": 18.4358, + "step": 27750 + }, + { + "epoch": 0.5072659805874934, + "grad_norm": 6.889646480412214, + "learning_rate": 5.125477217120399e-06, + "loss": 17.9033, + "step": 27751 + }, + { + "epoch": 0.5072842597839399, + "grad_norm": 5.628395766056375, + "learning_rate": 5.125181298588629e-06, + "loss": 17.3565, + "step": 27752 + }, + { + "epoch": 0.5073025389803865, + "grad_norm": 4.957846213430797, + "learning_rate": 5.124885379618107e-06, + "loss": 16.8323, + "step": 27753 + }, + { + "epoch": 0.5073208181768329, + "grad_norm": 8.043322179811897, + "learning_rate": 5.124589460209875e-06, + "loss": 17.9384, + "step": 27754 + }, + { + "epoch": 0.5073390973732794, + "grad_norm": 5.643776303822953, + "learning_rate": 5.124293540364966e-06, + "loss": 17.4568, + "step": 27755 + }, + { + "epoch": 0.507357376569726, + "grad_norm": 6.823357145909125, + "learning_rate": 5.12399762008442e-06, + "loss": 17.6151, + "step": 27756 + }, + { + "epoch": 0.5073756557661725, + "grad_norm": 6.848050043748216, + "learning_rate": 5.1237016993692726e-06, + "loss": 17.5411, + "step": 27757 + }, + { + "epoch": 0.5073939349626191, + "grad_norm": 6.818873049251554, + "learning_rate": 5.123405778220562e-06, + "loss": 17.5898, + "step": 27758 + }, + { + "epoch": 0.5074122141590656, + "grad_norm": 6.891246948640802, + "learning_rate": 5.123109856639325e-06, + "loss": 17.724, + "step": 27759 + }, + { + "epoch": 0.507430493355512, + "grad_norm": 5.22646842897986, + "learning_rate": 5.122813934626598e-06, + "loss": 17.3879, + "step": 27760 + }, + { + "epoch": 0.5074487725519586, + "grad_norm": 6.241375253225352, + "learning_rate": 5.122518012183419e-06, + "loss": 17.4592, + "step": 27761 + }, + { + "epoch": 0.5074670517484051, + "grad_norm": 7.011943657573289, + "learning_rate": 5.1222220893108245e-06, + "loss": 17.7635, + "step": 27762 + }, + { + "epoch": 0.5074853309448517, + "grad_norm": 8.340489642847533, + "learning_rate": 5.121926166009854e-06, + "loss": 17.9439, + "step": 27763 + }, + { + "epoch": 0.5075036101412982, + "grad_norm": 7.020032119133576, + "learning_rate": 5.1216302422815415e-06, + "loss": 18.0195, + "step": 27764 + }, + { + "epoch": 0.5075218893377447, + "grad_norm": 7.319892816728759, + "learning_rate": 5.121334318126925e-06, + "loss": 17.8734, + "step": 27765 + }, + { + "epoch": 0.5075401685341913, + "grad_norm": 6.401731890036808, + "learning_rate": 5.121038393547043e-06, + "loss": 17.5819, + "step": 27766 + }, + { + "epoch": 0.5075584477306377, + "grad_norm": 4.869051610432103, + "learning_rate": 5.120742468542932e-06, + "loss": 17.0804, + "step": 27767 + }, + { + "epoch": 0.5075767269270843, + "grad_norm": 7.001114839435052, + "learning_rate": 5.120446543115629e-06, + "loss": 17.6885, + "step": 27768 + }, + { + "epoch": 0.5075950061235308, + "grad_norm": 5.985950871215604, + "learning_rate": 5.12015061726617e-06, + "loss": 17.2141, + "step": 27769 + }, + { + "epoch": 0.5076132853199773, + "grad_norm": 6.203530235471378, + "learning_rate": 5.119854690995597e-06, + "loss": 17.4852, + "step": 27770 + }, + { + "epoch": 0.5076315645164239, + "grad_norm": 6.345810166297624, + "learning_rate": 5.11955876430494e-06, + "loss": 17.5605, + "step": 27771 + }, + { + "epoch": 0.5076498437128704, + "grad_norm": 7.017718859447296, + "learning_rate": 5.119262837195241e-06, + "loss": 18.0204, + "step": 27772 + }, + { + "epoch": 0.507668122909317, + "grad_norm": 5.915548862643262, + "learning_rate": 5.118966909667536e-06, + "loss": 17.4062, + "step": 27773 + }, + { + "epoch": 0.5076864021057634, + "grad_norm": 8.459241006330435, + "learning_rate": 5.118670981722864e-06, + "loss": 18.7702, + "step": 27774 + }, + { + "epoch": 0.5077046813022099, + "grad_norm": 5.26648824627864, + "learning_rate": 5.11837505336226e-06, + "loss": 17.0453, + "step": 27775 + }, + { + "epoch": 0.5077229604986565, + "grad_norm": 6.546008264375168, + "learning_rate": 5.11807912458676e-06, + "loss": 17.4485, + "step": 27776 + }, + { + "epoch": 0.507741239695103, + "grad_norm": 6.065952069195552, + "learning_rate": 5.117783195397405e-06, + "loss": 17.2764, + "step": 27777 + }, + { + "epoch": 0.5077595188915496, + "grad_norm": 6.844432593900314, + "learning_rate": 5.117487265795229e-06, + "loss": 17.4527, + "step": 27778 + }, + { + "epoch": 0.5077777980879961, + "grad_norm": 5.670818472573281, + "learning_rate": 5.11719133578127e-06, + "loss": 17.3438, + "step": 27779 + }, + { + "epoch": 0.5077960772844425, + "grad_norm": 7.971437219958533, + "learning_rate": 5.1168954053565655e-06, + "loss": 18.0089, + "step": 27780 + }, + { + "epoch": 0.5078143564808891, + "grad_norm": 6.268244555676373, + "learning_rate": 5.116599474522153e-06, + "loss": 17.5396, + "step": 27781 + }, + { + "epoch": 0.5078326356773356, + "grad_norm": 6.215729356350821, + "learning_rate": 5.11630354327907e-06, + "loss": 17.5865, + "step": 27782 + }, + { + "epoch": 0.5078509148737822, + "grad_norm": 6.288194553442732, + "learning_rate": 5.116007611628353e-06, + "loss": 17.5758, + "step": 27783 + }, + { + "epoch": 0.5078691940702287, + "grad_norm": 6.14978246789447, + "learning_rate": 5.115711679571038e-06, + "loss": 17.4051, + "step": 27784 + }, + { + "epoch": 0.5078874732666752, + "grad_norm": 6.612922377398286, + "learning_rate": 5.115415747108166e-06, + "loss": 17.4096, + "step": 27785 + }, + { + "epoch": 0.5079057524631218, + "grad_norm": 6.47498327904835, + "learning_rate": 5.11511981424077e-06, + "loss": 17.2669, + "step": 27786 + }, + { + "epoch": 0.5079240316595682, + "grad_norm": 7.595785899509144, + "learning_rate": 5.114823880969889e-06, + "loss": 17.6638, + "step": 27787 + }, + { + "epoch": 0.5079423108560148, + "grad_norm": 6.23259983046272, + "learning_rate": 5.114527947296563e-06, + "loss": 17.4832, + "step": 27788 + }, + { + "epoch": 0.5079605900524613, + "grad_norm": 7.519498838984317, + "learning_rate": 5.114232013221823e-06, + "loss": 17.7527, + "step": 27789 + }, + { + "epoch": 0.5079788692489078, + "grad_norm": 7.11893307367405, + "learning_rate": 5.1139360787467104e-06, + "loss": 17.8125, + "step": 27790 + }, + { + "epoch": 0.5079971484453544, + "grad_norm": 7.572962276243854, + "learning_rate": 5.113640143872264e-06, + "loss": 17.6688, + "step": 27791 + }, + { + "epoch": 0.5080154276418009, + "grad_norm": 5.910023689579945, + "learning_rate": 5.1133442085995165e-06, + "loss": 17.5413, + "step": 27792 + }, + { + "epoch": 0.5080337068382474, + "grad_norm": 6.737547699899249, + "learning_rate": 5.113048272929508e-06, + "loss": 17.8257, + "step": 27793 + }, + { + "epoch": 0.5080519860346939, + "grad_norm": 6.569180814096426, + "learning_rate": 5.112752336863275e-06, + "loss": 17.3771, + "step": 27794 + }, + { + "epoch": 0.5080702652311404, + "grad_norm": 6.509063902180465, + "learning_rate": 5.112456400401855e-06, + "loss": 17.4601, + "step": 27795 + }, + { + "epoch": 0.508088544427587, + "grad_norm": 6.216367487588847, + "learning_rate": 5.112160463546285e-06, + "loss": 17.2877, + "step": 27796 + }, + { + "epoch": 0.5081068236240335, + "grad_norm": 5.924333410370937, + "learning_rate": 5.111864526297603e-06, + "loss": 17.0676, + "step": 27797 + }, + { + "epoch": 0.50812510282048, + "grad_norm": 6.347197197891492, + "learning_rate": 5.111568588656845e-06, + "loss": 17.7076, + "step": 27798 + }, + { + "epoch": 0.5081433820169265, + "grad_norm": 6.201500369875943, + "learning_rate": 5.111272650625049e-06, + "loss": 17.4921, + "step": 27799 + }, + { + "epoch": 0.508161661213373, + "grad_norm": 5.864006941659548, + "learning_rate": 5.110976712203251e-06, + "loss": 17.397, + "step": 27800 + }, + { + "epoch": 0.5081799404098196, + "grad_norm": 6.527750122750077, + "learning_rate": 5.110680773392491e-06, + "loss": 17.7126, + "step": 27801 + }, + { + "epoch": 0.5081982196062661, + "grad_norm": 5.978759562287268, + "learning_rate": 5.110384834193804e-06, + "loss": 17.5569, + "step": 27802 + }, + { + "epoch": 0.5082164988027126, + "grad_norm": 5.653941790257011, + "learning_rate": 5.110088894608226e-06, + "loss": 17.2645, + "step": 27803 + }, + { + "epoch": 0.5082347779991592, + "grad_norm": 7.111886366714862, + "learning_rate": 5.109792954636796e-06, + "loss": 17.7092, + "step": 27804 + }, + { + "epoch": 0.5082530571956057, + "grad_norm": 5.887329362261316, + "learning_rate": 5.109497014280555e-06, + "loss": 17.5025, + "step": 27805 + }, + { + "epoch": 0.5082713363920522, + "grad_norm": 6.316006213197096, + "learning_rate": 5.1092010735405325e-06, + "loss": 17.4514, + "step": 27806 + }, + { + "epoch": 0.5082896155884987, + "grad_norm": 5.425393787878941, + "learning_rate": 5.108905132417772e-06, + "loss": 17.1413, + "step": 27807 + }, + { + "epoch": 0.5083078947849452, + "grad_norm": 6.179089818058358, + "learning_rate": 5.1086091909133075e-06, + "loss": 17.3279, + "step": 27808 + }, + { + "epoch": 0.5083261739813918, + "grad_norm": 5.75494897087057, + "learning_rate": 5.108313249028177e-06, + "loss": 17.2535, + "step": 27809 + }, + { + "epoch": 0.5083444531778383, + "grad_norm": 7.413869531581902, + "learning_rate": 5.108017306763417e-06, + "loss": 17.927, + "step": 27810 + }, + { + "epoch": 0.5083627323742849, + "grad_norm": 6.531126158633522, + "learning_rate": 5.107721364120067e-06, + "loss": 17.4851, + "step": 27811 + }, + { + "epoch": 0.5083810115707313, + "grad_norm": 7.495061218472021, + "learning_rate": 5.107425421099163e-06, + "loss": 18.0959, + "step": 27812 + }, + { + "epoch": 0.5083992907671778, + "grad_norm": 7.345337631125079, + "learning_rate": 5.107129477701743e-06, + "loss": 17.8673, + "step": 27813 + }, + { + "epoch": 0.5084175699636244, + "grad_norm": 5.881337538549764, + "learning_rate": 5.106833533928842e-06, + "loss": 17.3385, + "step": 27814 + }, + { + "epoch": 0.5084358491600709, + "grad_norm": 6.806397927155145, + "learning_rate": 5.106537589781501e-06, + "loss": 17.442, + "step": 27815 + }, + { + "epoch": 0.5084541283565175, + "grad_norm": 7.2959632017956695, + "learning_rate": 5.106241645260754e-06, + "loss": 17.8208, + "step": 27816 + }, + { + "epoch": 0.508472407552964, + "grad_norm": 5.475619954715328, + "learning_rate": 5.105945700367636e-06, + "loss": 16.8526, + "step": 27817 + }, + { + "epoch": 0.5084906867494104, + "grad_norm": 8.234228650583185, + "learning_rate": 5.105649755103191e-06, + "loss": 18.1657, + "step": 27818 + }, + { + "epoch": 0.508508965945857, + "grad_norm": 8.355068089071333, + "learning_rate": 5.1053538094684515e-06, + "loss": 17.6617, + "step": 27819 + }, + { + "epoch": 0.5085272451423035, + "grad_norm": 6.819178272566638, + "learning_rate": 5.105057863464458e-06, + "loss": 17.7844, + "step": 27820 + }, + { + "epoch": 0.5085455243387501, + "grad_norm": 6.923034621610501, + "learning_rate": 5.104761917092243e-06, + "loss": 17.6915, + "step": 27821 + }, + { + "epoch": 0.5085638035351966, + "grad_norm": 6.931761091764257, + "learning_rate": 5.104465970352848e-06, + "loss": 17.6312, + "step": 27822 + }, + { + "epoch": 0.5085820827316431, + "grad_norm": 5.682705060024975, + "learning_rate": 5.1041700232473095e-06, + "loss": 17.3104, + "step": 27823 + }, + { + "epoch": 0.5086003619280897, + "grad_norm": 6.657548656800469, + "learning_rate": 5.103874075776663e-06, + "loss": 17.6736, + "step": 27824 + }, + { + "epoch": 0.5086186411245361, + "grad_norm": 7.483931887010073, + "learning_rate": 5.103578127941946e-06, + "loss": 17.8158, + "step": 27825 + }, + { + "epoch": 0.5086369203209827, + "grad_norm": 6.676413809934157, + "learning_rate": 5.103282179744198e-06, + "loss": 17.5908, + "step": 27826 + }, + { + "epoch": 0.5086551995174292, + "grad_norm": 7.886278071909366, + "learning_rate": 5.102986231184455e-06, + "loss": 17.9176, + "step": 27827 + }, + { + "epoch": 0.5086734787138757, + "grad_norm": 7.839605698319246, + "learning_rate": 5.102690282263754e-06, + "loss": 17.8862, + "step": 27828 + }, + { + "epoch": 0.5086917579103223, + "grad_norm": 4.9164327604959315, + "learning_rate": 5.102394332983132e-06, + "loss": 17.0036, + "step": 27829 + }, + { + "epoch": 0.5087100371067688, + "grad_norm": 6.177818813364183, + "learning_rate": 5.1020983833436264e-06, + "loss": 17.1324, + "step": 27830 + }, + { + "epoch": 0.5087283163032154, + "grad_norm": 8.576030724323914, + "learning_rate": 5.101802433346275e-06, + "loss": 17.9203, + "step": 27831 + }, + { + "epoch": 0.5087465954996618, + "grad_norm": 7.2500642547772225, + "learning_rate": 5.101506482992118e-06, + "loss": 18.0675, + "step": 27832 + }, + { + "epoch": 0.5087648746961083, + "grad_norm": 6.52184673889023, + "learning_rate": 5.101210532282187e-06, + "loss": 17.4957, + "step": 27833 + }, + { + "epoch": 0.5087831538925549, + "grad_norm": 6.68907586183205, + "learning_rate": 5.100914581217521e-06, + "loss": 17.5087, + "step": 27834 + }, + { + "epoch": 0.5088014330890014, + "grad_norm": 6.575152285909569, + "learning_rate": 5.1006186297991574e-06, + "loss": 17.392, + "step": 27835 + }, + { + "epoch": 0.508819712285448, + "grad_norm": 11.465419141079181, + "learning_rate": 5.100322678028138e-06, + "loss": 19.344, + "step": 27836 + }, + { + "epoch": 0.5088379914818945, + "grad_norm": 6.067945112157284, + "learning_rate": 5.100026725905494e-06, + "loss": 17.4425, + "step": 27837 + }, + { + "epoch": 0.5088562706783409, + "grad_norm": 5.802755459897126, + "learning_rate": 5.099730773432266e-06, + "loss": 17.0646, + "step": 27838 + }, + { + "epoch": 0.5088745498747875, + "grad_norm": 6.309037106321797, + "learning_rate": 5.099434820609488e-06, + "loss": 17.2151, + "step": 27839 + }, + { + "epoch": 0.508892829071234, + "grad_norm": 6.163295659381445, + "learning_rate": 5.099138867438201e-06, + "loss": 17.281, + "step": 27840 + }, + { + "epoch": 0.5089111082676806, + "grad_norm": 8.354820996571528, + "learning_rate": 5.098842913919442e-06, + "loss": 18.0639, + "step": 27841 + }, + { + "epoch": 0.5089293874641271, + "grad_norm": 7.64688041787203, + "learning_rate": 5.098546960054246e-06, + "loss": 17.6232, + "step": 27842 + }, + { + "epoch": 0.5089476666605736, + "grad_norm": 6.7545021547324, + "learning_rate": 5.098251005843652e-06, + "loss": 17.6928, + "step": 27843 + }, + { + "epoch": 0.5089659458570202, + "grad_norm": 5.143487498292757, + "learning_rate": 5.097955051288696e-06, + "loss": 17.1643, + "step": 27844 + }, + { + "epoch": 0.5089842250534666, + "grad_norm": 5.53427588550187, + "learning_rate": 5.097659096390416e-06, + "loss": 17.2131, + "step": 27845 + }, + { + "epoch": 0.5090025042499132, + "grad_norm": 6.30860754506615, + "learning_rate": 5.09736314114985e-06, + "loss": 17.7132, + "step": 27846 + }, + { + "epoch": 0.5090207834463597, + "grad_norm": 8.563713722008549, + "learning_rate": 5.0970671855680344e-06, + "loss": 17.6117, + "step": 27847 + }, + { + "epoch": 0.5090390626428062, + "grad_norm": 7.1524516519094306, + "learning_rate": 5.096771229646007e-06, + "loss": 17.6969, + "step": 27848 + }, + { + "epoch": 0.5090573418392528, + "grad_norm": 5.634445277772277, + "learning_rate": 5.0964752733848035e-06, + "loss": 17.332, + "step": 27849 + }, + { + "epoch": 0.5090756210356993, + "grad_norm": 10.846335040821502, + "learning_rate": 5.096179316785464e-06, + "loss": 18.1933, + "step": 27850 + }, + { + "epoch": 0.5090939002321458, + "grad_norm": 5.813532846858103, + "learning_rate": 5.095883359849024e-06, + "loss": 17.3308, + "step": 27851 + }, + { + "epoch": 0.5091121794285923, + "grad_norm": 6.807114437097919, + "learning_rate": 5.095587402576521e-06, + "loss": 17.7462, + "step": 27852 + }, + { + "epoch": 0.5091304586250388, + "grad_norm": 5.423405840084081, + "learning_rate": 5.095291444968993e-06, + "loss": 17.0539, + "step": 27853 + }, + { + "epoch": 0.5091487378214854, + "grad_norm": 8.369617524169517, + "learning_rate": 5.094995487027475e-06, + "loss": 17.9208, + "step": 27854 + }, + { + "epoch": 0.5091670170179319, + "grad_norm": 5.734025767614059, + "learning_rate": 5.094699528753008e-06, + "loss": 17.2728, + "step": 27855 + }, + { + "epoch": 0.5091852962143785, + "grad_norm": 7.37603727772448, + "learning_rate": 5.094403570146626e-06, + "loss": 17.5804, + "step": 27856 + }, + { + "epoch": 0.509203575410825, + "grad_norm": 5.523727202934449, + "learning_rate": 5.0941076112093694e-06, + "loss": 17.057, + "step": 27857 + }, + { + "epoch": 0.5092218546072714, + "grad_norm": 6.575651794090517, + "learning_rate": 5.093811651942272e-06, + "loss": 17.824, + "step": 27858 + }, + { + "epoch": 0.509240133803718, + "grad_norm": 7.08990373527558, + "learning_rate": 5.093515692346373e-06, + "loss": 17.8624, + "step": 27859 + }, + { + "epoch": 0.5092584130001645, + "grad_norm": 6.252322738583567, + "learning_rate": 5.093219732422712e-06, + "loss": 17.2543, + "step": 27860 + }, + { + "epoch": 0.5092766921966111, + "grad_norm": 7.274000395594801, + "learning_rate": 5.092923772172322e-06, + "loss": 17.5227, + "step": 27861 + }, + { + "epoch": 0.5092949713930576, + "grad_norm": 7.363211891902213, + "learning_rate": 5.092627811596241e-06, + "loss": 17.73, + "step": 27862 + }, + { + "epoch": 0.509313250589504, + "grad_norm": 7.440880837814436, + "learning_rate": 5.0923318506955086e-06, + "loss": 17.8423, + "step": 27863 + }, + { + "epoch": 0.5093315297859506, + "grad_norm": 5.093079881812766, + "learning_rate": 5.092035889471163e-06, + "loss": 17.0842, + "step": 27864 + }, + { + "epoch": 0.5093498089823971, + "grad_norm": 6.865865467142303, + "learning_rate": 5.091739927924239e-06, + "loss": 17.9005, + "step": 27865 + }, + { + "epoch": 0.5093680881788436, + "grad_norm": 9.030087018564162, + "learning_rate": 5.0914439660557726e-06, + "loss": 18.2221, + "step": 27866 + }, + { + "epoch": 0.5093863673752902, + "grad_norm": 6.753652388079484, + "learning_rate": 5.0911480038668036e-06, + "loss": 17.7345, + "step": 27867 + }, + { + "epoch": 0.5094046465717367, + "grad_norm": 7.056262798115523, + "learning_rate": 5.090852041358369e-06, + "loss": 17.5722, + "step": 27868 + }, + { + "epoch": 0.5094229257681833, + "grad_norm": 6.356815609070242, + "learning_rate": 5.0905560785315065e-06, + "loss": 17.3165, + "step": 27869 + }, + { + "epoch": 0.5094412049646297, + "grad_norm": 6.512300669371463, + "learning_rate": 5.090260115387253e-06, + "loss": 17.2421, + "step": 27870 + }, + { + "epoch": 0.5094594841610762, + "grad_norm": 8.32693455713865, + "learning_rate": 5.089964151926644e-06, + "loss": 18.4941, + "step": 27871 + }, + { + "epoch": 0.5094777633575228, + "grad_norm": 5.997102400857258, + "learning_rate": 5.089668188150719e-06, + "loss": 17.2529, + "step": 27872 + }, + { + "epoch": 0.5094960425539693, + "grad_norm": 6.3895848858132105, + "learning_rate": 5.089372224060516e-06, + "loss": 17.4529, + "step": 27873 + }, + { + "epoch": 0.5095143217504159, + "grad_norm": 8.13476791743883, + "learning_rate": 5.089076259657071e-06, + "loss": 18.2045, + "step": 27874 + }, + { + "epoch": 0.5095326009468624, + "grad_norm": 5.361835074480378, + "learning_rate": 5.088780294941421e-06, + "loss": 17.0467, + "step": 27875 + }, + { + "epoch": 0.5095508801433088, + "grad_norm": 6.250313742437329, + "learning_rate": 5.088484329914602e-06, + "loss": 17.5089, + "step": 27876 + }, + { + "epoch": 0.5095691593397554, + "grad_norm": 6.288506411961067, + "learning_rate": 5.0881883645776565e-06, + "loss": 17.7503, + "step": 27877 + }, + { + "epoch": 0.5095874385362019, + "grad_norm": 6.757143820103346, + "learning_rate": 5.087892398931616e-06, + "loss": 17.5151, + "step": 27878 + }, + { + "epoch": 0.5096057177326485, + "grad_norm": 8.195882530412849, + "learning_rate": 5.087596432977521e-06, + "loss": 17.9789, + "step": 27879 + }, + { + "epoch": 0.509623996929095, + "grad_norm": 7.861485717426822, + "learning_rate": 5.087300466716407e-06, + "loss": 17.7657, + "step": 27880 + }, + { + "epoch": 0.5096422761255415, + "grad_norm": 6.36459832657322, + "learning_rate": 5.087004500149314e-06, + "loss": 17.5838, + "step": 27881 + }, + { + "epoch": 0.5096605553219881, + "grad_norm": 5.4643637952944415, + "learning_rate": 5.086708533277277e-06, + "loss": 17.012, + "step": 27882 + }, + { + "epoch": 0.5096788345184345, + "grad_norm": 5.439422791490859, + "learning_rate": 5.086412566101334e-06, + "loss": 17.1715, + "step": 27883 + }, + { + "epoch": 0.5096971137148811, + "grad_norm": 6.050986215713007, + "learning_rate": 5.086116598622522e-06, + "loss": 17.3001, + "step": 27884 + }, + { + "epoch": 0.5097153929113276, + "grad_norm": 7.2932790019798235, + "learning_rate": 5.08582063084188e-06, + "loss": 17.6906, + "step": 27885 + }, + { + "epoch": 0.5097336721077741, + "grad_norm": 6.866716266787876, + "learning_rate": 5.085524662760444e-06, + "loss": 17.6706, + "step": 27886 + }, + { + "epoch": 0.5097519513042207, + "grad_norm": 6.237292491224625, + "learning_rate": 5.085228694379251e-06, + "loss": 17.4173, + "step": 27887 + }, + { + "epoch": 0.5097702305006672, + "grad_norm": 9.178059618595595, + "learning_rate": 5.0849327256993394e-06, + "loss": 18.0714, + "step": 27888 + }, + { + "epoch": 0.5097885096971138, + "grad_norm": 6.494638280082455, + "learning_rate": 5.084636756721744e-06, + "loss": 17.8371, + "step": 27889 + }, + { + "epoch": 0.5098067888935602, + "grad_norm": 8.03154022239092, + "learning_rate": 5.084340787447506e-06, + "loss": 18.1014, + "step": 27890 + }, + { + "epoch": 0.5098250680900067, + "grad_norm": 8.497256806005296, + "learning_rate": 5.084044817877663e-06, + "loss": 17.6061, + "step": 27891 + }, + { + "epoch": 0.5098433472864533, + "grad_norm": 6.4522589549367675, + "learning_rate": 5.083748848013247e-06, + "loss": 17.4162, + "step": 27892 + }, + { + "epoch": 0.5098616264828998, + "grad_norm": 6.124167891150617, + "learning_rate": 5.083452877855298e-06, + "loss": 17.3322, + "step": 27893 + }, + { + "epoch": 0.5098799056793464, + "grad_norm": 6.070776568095202, + "learning_rate": 5.083156907404855e-06, + "loss": 17.1881, + "step": 27894 + }, + { + "epoch": 0.5098981848757929, + "grad_norm": 6.869976134582212, + "learning_rate": 5.0828609366629556e-06, + "loss": 17.7099, + "step": 27895 + }, + { + "epoch": 0.5099164640722393, + "grad_norm": 6.624390682490424, + "learning_rate": 5.082564965630634e-06, + "loss": 17.6734, + "step": 27896 + }, + { + "epoch": 0.5099347432686859, + "grad_norm": 6.250022675612942, + "learning_rate": 5.0822689943089294e-06, + "loss": 17.5516, + "step": 27897 + }, + { + "epoch": 0.5099530224651324, + "grad_norm": 6.681294668058565, + "learning_rate": 5.0819730226988805e-06, + "loss": 17.3986, + "step": 27898 + }, + { + "epoch": 0.509971301661579, + "grad_norm": 8.381762746719547, + "learning_rate": 5.081677050801522e-06, + "loss": 18.1535, + "step": 27899 + }, + { + "epoch": 0.5099895808580255, + "grad_norm": 6.578829196788574, + "learning_rate": 5.081381078617893e-06, + "loss": 17.7318, + "step": 27900 + }, + { + "epoch": 0.510007860054472, + "grad_norm": 7.0951003053675334, + "learning_rate": 5.0810851061490315e-06, + "loss": 17.6282, + "step": 27901 + }, + { + "epoch": 0.5100261392509186, + "grad_norm": 5.103586841876317, + "learning_rate": 5.080789133395973e-06, + "loss": 17.1156, + "step": 27902 + }, + { + "epoch": 0.510044418447365, + "grad_norm": 7.460144193023806, + "learning_rate": 5.080493160359754e-06, + "loss": 17.6798, + "step": 27903 + }, + { + "epoch": 0.5100626976438116, + "grad_norm": 7.090248826311705, + "learning_rate": 5.080197187041415e-06, + "loss": 17.0903, + "step": 27904 + }, + { + "epoch": 0.5100809768402581, + "grad_norm": 6.8042642548949885, + "learning_rate": 5.079901213441992e-06, + "loss": 17.5889, + "step": 27905 + }, + { + "epoch": 0.5100992560367046, + "grad_norm": 6.165145144153961, + "learning_rate": 5.079605239562522e-06, + "loss": 16.9309, + "step": 27906 + }, + { + "epoch": 0.5101175352331512, + "grad_norm": 6.504810586479322, + "learning_rate": 5.079309265404042e-06, + "loss": 17.6423, + "step": 27907 + }, + { + "epoch": 0.5101358144295977, + "grad_norm": 5.552455567232766, + "learning_rate": 5.079013290967589e-06, + "loss": 17.2008, + "step": 27908 + }, + { + "epoch": 0.5101540936260442, + "grad_norm": 7.036312304543531, + "learning_rate": 5.078717316254202e-06, + "loss": 17.0784, + "step": 27909 + }, + { + "epoch": 0.5101723728224907, + "grad_norm": 6.5684040882431285, + "learning_rate": 5.078421341264919e-06, + "loss": 17.5405, + "step": 27910 + }, + { + "epoch": 0.5101906520189372, + "grad_norm": 6.3598182509281775, + "learning_rate": 5.078125366000775e-06, + "loss": 17.3858, + "step": 27911 + }, + { + "epoch": 0.5102089312153838, + "grad_norm": 6.950337943610434, + "learning_rate": 5.077829390462809e-06, + "loss": 17.6734, + "step": 27912 + }, + { + "epoch": 0.5102272104118303, + "grad_norm": 6.392436753818627, + "learning_rate": 5.077533414652056e-06, + "loss": 17.6908, + "step": 27913 + }, + { + "epoch": 0.5102454896082769, + "grad_norm": 6.37787231520368, + "learning_rate": 5.077237438569557e-06, + "loss": 17.3778, + "step": 27914 + }, + { + "epoch": 0.5102637688047233, + "grad_norm": 5.703003146742271, + "learning_rate": 5.076941462216347e-06, + "loss": 17.2388, + "step": 27915 + }, + { + "epoch": 0.5102820480011698, + "grad_norm": 5.423843335529613, + "learning_rate": 5.076645485593462e-06, + "loss": 17.035, + "step": 27916 + }, + { + "epoch": 0.5103003271976164, + "grad_norm": 8.167982766498403, + "learning_rate": 5.076349508701943e-06, + "loss": 18.0702, + "step": 27917 + }, + { + "epoch": 0.5103186063940629, + "grad_norm": 6.480664865267102, + "learning_rate": 5.076053531542826e-06, + "loss": 17.2405, + "step": 27918 + }, + { + "epoch": 0.5103368855905095, + "grad_norm": 6.1631597582761195, + "learning_rate": 5.075757554117148e-06, + "loss": 17.2679, + "step": 27919 + }, + { + "epoch": 0.510355164786956, + "grad_norm": 7.117681822102847, + "learning_rate": 5.075461576425946e-06, + "loss": 17.6283, + "step": 27920 + }, + { + "epoch": 0.5103734439834025, + "grad_norm": 5.114716886469767, + "learning_rate": 5.075165598470257e-06, + "loss": 16.8313, + "step": 27921 + }, + { + "epoch": 0.510391723179849, + "grad_norm": 7.345253415797447, + "learning_rate": 5.07486962025112e-06, + "loss": 17.9692, + "step": 27922 + }, + { + "epoch": 0.5104100023762955, + "grad_norm": 5.800808293392524, + "learning_rate": 5.07457364176957e-06, + "loss": 17.2526, + "step": 27923 + }, + { + "epoch": 0.5104282815727421, + "grad_norm": 6.642269203657916, + "learning_rate": 5.0742776630266475e-06, + "loss": 17.4973, + "step": 27924 + }, + { + "epoch": 0.5104465607691886, + "grad_norm": 6.6416064789316875, + "learning_rate": 5.073981684023388e-06, + "loss": 17.5644, + "step": 27925 + }, + { + "epoch": 0.5104648399656351, + "grad_norm": 6.927755807980413, + "learning_rate": 5.073685704760828e-06, + "loss": 17.3729, + "step": 27926 + }, + { + "epoch": 0.5104831191620817, + "grad_norm": 6.033087824890685, + "learning_rate": 5.073389725240006e-06, + "loss": 17.3631, + "step": 27927 + }, + { + "epoch": 0.5105013983585281, + "grad_norm": 5.796741823708957, + "learning_rate": 5.073093745461961e-06, + "loss": 17.2888, + "step": 27928 + }, + { + "epoch": 0.5105196775549747, + "grad_norm": 6.489427966014684, + "learning_rate": 5.072797765427729e-06, + "loss": 17.6568, + "step": 27929 + }, + { + "epoch": 0.5105379567514212, + "grad_norm": 5.861282253986606, + "learning_rate": 5.072501785138345e-06, + "loss": 17.0678, + "step": 27930 + }, + { + "epoch": 0.5105562359478677, + "grad_norm": 6.958764261470338, + "learning_rate": 5.07220580459485e-06, + "loss": 17.4967, + "step": 27931 + }, + { + "epoch": 0.5105745151443143, + "grad_norm": 7.584696063777194, + "learning_rate": 5.07190982379828e-06, + "loss": 17.7249, + "step": 27932 + }, + { + "epoch": 0.5105927943407608, + "grad_norm": 5.35595648986422, + "learning_rate": 5.071613842749672e-06, + "loss": 17.1567, + "step": 27933 + }, + { + "epoch": 0.5106110735372072, + "grad_norm": 6.061092903293324, + "learning_rate": 5.071317861450063e-06, + "loss": 17.5192, + "step": 27934 + }, + { + "epoch": 0.5106293527336538, + "grad_norm": 5.365330897197779, + "learning_rate": 5.0710218799004906e-06, + "loss": 17.0581, + "step": 27935 + }, + { + "epoch": 0.5106476319301003, + "grad_norm": 5.838573082680509, + "learning_rate": 5.070725898101995e-06, + "loss": 17.5544, + "step": 27936 + }, + { + "epoch": 0.5106659111265469, + "grad_norm": 8.596949097713447, + "learning_rate": 5.070429916055609e-06, + "loss": 17.6689, + "step": 27937 + }, + { + "epoch": 0.5106841903229934, + "grad_norm": 8.097615772411954, + "learning_rate": 5.070133933762373e-06, + "loss": 18.4352, + "step": 27938 + }, + { + "epoch": 0.5107024695194399, + "grad_norm": 6.61105770284266, + "learning_rate": 5.069837951223324e-06, + "loss": 17.6067, + "step": 27939 + }, + { + "epoch": 0.5107207487158865, + "grad_norm": 7.06885343616716, + "learning_rate": 5.069541968439498e-06, + "loss": 17.7823, + "step": 27940 + }, + { + "epoch": 0.5107390279123329, + "grad_norm": 7.714779778663963, + "learning_rate": 5.069245985411935e-06, + "loss": 17.3728, + "step": 27941 + }, + { + "epoch": 0.5107573071087795, + "grad_norm": 5.374890110205253, + "learning_rate": 5.06895000214167e-06, + "loss": 17.0761, + "step": 27942 + }, + { + "epoch": 0.510775586305226, + "grad_norm": 6.779089121408302, + "learning_rate": 5.068654018629743e-06, + "loss": 17.2467, + "step": 27943 + }, + { + "epoch": 0.5107938655016725, + "grad_norm": 6.6904896821342055, + "learning_rate": 5.068358034877187e-06, + "loss": 17.8552, + "step": 27944 + }, + { + "epoch": 0.5108121446981191, + "grad_norm": 7.544460945651414, + "learning_rate": 5.068062050885042e-06, + "loss": 18.0192, + "step": 27945 + }, + { + "epoch": 0.5108304238945656, + "grad_norm": 7.3778038152276055, + "learning_rate": 5.0677660666543486e-06, + "loss": 17.8556, + "step": 27946 + }, + { + "epoch": 0.5108487030910122, + "grad_norm": 6.147904216098145, + "learning_rate": 5.067470082186138e-06, + "loss": 17.287, + "step": 27947 + }, + { + "epoch": 0.5108669822874586, + "grad_norm": 6.036334583470327, + "learning_rate": 5.067174097481451e-06, + "loss": 17.5303, + "step": 27948 + }, + { + "epoch": 0.5108852614839051, + "grad_norm": 6.3812375777208175, + "learning_rate": 5.0668781125413235e-06, + "loss": 17.2747, + "step": 27949 + }, + { + "epoch": 0.5109035406803517, + "grad_norm": 7.0846058593199945, + "learning_rate": 5.066582127366797e-06, + "loss": 17.9406, + "step": 27950 + }, + { + "epoch": 0.5109218198767982, + "grad_norm": 5.995581704388138, + "learning_rate": 5.0662861419589045e-06, + "loss": 17.5552, + "step": 27951 + }, + { + "epoch": 0.5109400990732448, + "grad_norm": 8.079681787989854, + "learning_rate": 5.065990156318686e-06, + "loss": 17.5316, + "step": 27952 + }, + { + "epoch": 0.5109583782696913, + "grad_norm": 6.394805775513728, + "learning_rate": 5.065694170447175e-06, + "loss": 17.3691, + "step": 27953 + }, + { + "epoch": 0.5109766574661377, + "grad_norm": 9.04358904856608, + "learning_rate": 5.065398184345413e-06, + "loss": 18.2654, + "step": 27954 + }, + { + "epoch": 0.5109949366625843, + "grad_norm": 5.919043342764798, + "learning_rate": 5.0651021980144366e-06, + "loss": 17.2662, + "step": 27955 + }, + { + "epoch": 0.5110132158590308, + "grad_norm": 6.8232770058589205, + "learning_rate": 5.064806211455282e-06, + "loss": 17.3929, + "step": 27956 + }, + { + "epoch": 0.5110314950554774, + "grad_norm": 6.3081512598405, + "learning_rate": 5.0645102246689885e-06, + "loss": 17.3551, + "step": 27957 + }, + { + "epoch": 0.5110497742519239, + "grad_norm": 7.207245941402502, + "learning_rate": 5.064214237656591e-06, + "loss": 17.8506, + "step": 27958 + }, + { + "epoch": 0.5110680534483704, + "grad_norm": 6.225357130711153, + "learning_rate": 5.063918250419128e-06, + "loss": 17.3574, + "step": 27959 + }, + { + "epoch": 0.511086332644817, + "grad_norm": 6.2018937939581145, + "learning_rate": 5.063622262957638e-06, + "loss": 17.3057, + "step": 27960 + }, + { + "epoch": 0.5111046118412634, + "grad_norm": 6.3001218579894065, + "learning_rate": 5.063326275273157e-06, + "loss": 17.3587, + "step": 27961 + }, + { + "epoch": 0.51112289103771, + "grad_norm": 6.048242051160997, + "learning_rate": 5.063030287366723e-06, + "loss": 17.4052, + "step": 27962 + }, + { + "epoch": 0.5111411702341565, + "grad_norm": 6.06499117173175, + "learning_rate": 5.062734299239372e-06, + "loss": 17.3983, + "step": 27963 + }, + { + "epoch": 0.511159449430603, + "grad_norm": 5.785315620005533, + "learning_rate": 5.0624383108921445e-06, + "loss": 17.4715, + "step": 27964 + }, + { + "epoch": 0.5111777286270496, + "grad_norm": 6.231404459331465, + "learning_rate": 5.0621423223260765e-06, + "loss": 17.6666, + "step": 27965 + }, + { + "epoch": 0.5111960078234961, + "grad_norm": 6.198098887849262, + "learning_rate": 5.061846333542204e-06, + "loss": 17.3928, + "step": 27966 + }, + { + "epoch": 0.5112142870199426, + "grad_norm": 6.256331047675148, + "learning_rate": 5.061550344541566e-06, + "loss": 17.4104, + "step": 27967 + }, + { + "epoch": 0.5112325662163891, + "grad_norm": 6.070585188329285, + "learning_rate": 5.061254355325198e-06, + "loss": 17.4089, + "step": 27968 + }, + { + "epoch": 0.5112508454128356, + "grad_norm": 5.648920168004417, + "learning_rate": 5.06095836589414e-06, + "loss": 17.2043, + "step": 27969 + }, + { + "epoch": 0.5112691246092822, + "grad_norm": 7.33495141010944, + "learning_rate": 5.060662376249429e-06, + "loss": 17.926, + "step": 27970 + }, + { + "epoch": 0.5112874038057287, + "grad_norm": 6.145325637030012, + "learning_rate": 5.0603663863921e-06, + "loss": 17.5622, + "step": 27971 + }, + { + "epoch": 0.5113056830021753, + "grad_norm": 7.409558314805561, + "learning_rate": 5.060070396323192e-06, + "loss": 17.6996, + "step": 27972 + }, + { + "epoch": 0.5113239621986218, + "grad_norm": 6.537365255117251, + "learning_rate": 5.059774406043744e-06, + "loss": 17.6237, + "step": 27973 + }, + { + "epoch": 0.5113422413950682, + "grad_norm": 7.200750731269138, + "learning_rate": 5.059478415554792e-06, + "loss": 17.9639, + "step": 27974 + }, + { + "epoch": 0.5113605205915148, + "grad_norm": 6.421165575310187, + "learning_rate": 5.059182424857371e-06, + "loss": 17.411, + "step": 27975 + }, + { + "epoch": 0.5113787997879613, + "grad_norm": 5.277040088558584, + "learning_rate": 5.058886433952521e-06, + "loss": 17.0703, + "step": 27976 + }, + { + "epoch": 0.5113970789844079, + "grad_norm": 6.936336264939123, + "learning_rate": 5.0585904428412824e-06, + "loss": 17.7754, + "step": 27977 + }, + { + "epoch": 0.5114153581808544, + "grad_norm": 6.855191478628995, + "learning_rate": 5.058294451524687e-06, + "loss": 17.6812, + "step": 27978 + }, + { + "epoch": 0.5114336373773009, + "grad_norm": 6.050774172441768, + "learning_rate": 5.057998460003775e-06, + "loss": 17.4065, + "step": 27979 + }, + { + "epoch": 0.5114519165737474, + "grad_norm": 6.863536531195133, + "learning_rate": 5.057702468279583e-06, + "loss": 17.816, + "step": 27980 + }, + { + "epoch": 0.5114701957701939, + "grad_norm": 5.223811086374738, + "learning_rate": 5.0574064763531495e-06, + "loss": 16.9572, + "step": 27981 + }, + { + "epoch": 0.5114884749666405, + "grad_norm": 6.796207410027425, + "learning_rate": 5.05711048422551e-06, + "loss": 17.7347, + "step": 27982 + }, + { + "epoch": 0.511506754163087, + "grad_norm": 6.777248411355415, + "learning_rate": 5.056814491897705e-06, + "loss": 17.7146, + "step": 27983 + }, + { + "epoch": 0.5115250333595335, + "grad_norm": 6.3883882989031235, + "learning_rate": 5.056518499370768e-06, + "loss": 17.5044, + "step": 27984 + }, + { + "epoch": 0.5115433125559801, + "grad_norm": 4.973791532447568, + "learning_rate": 5.056222506645741e-06, + "loss": 17.2173, + "step": 27985 + }, + { + "epoch": 0.5115615917524265, + "grad_norm": 7.69910450381834, + "learning_rate": 5.055926513723657e-06, + "loss": 18.153, + "step": 27986 + }, + { + "epoch": 0.5115798709488731, + "grad_norm": 6.213555465878709, + "learning_rate": 5.055630520605557e-06, + "loss": 17.3883, + "step": 27987 + }, + { + "epoch": 0.5115981501453196, + "grad_norm": 6.102399203148746, + "learning_rate": 5.055334527292477e-06, + "loss": 17.4289, + "step": 27988 + }, + { + "epoch": 0.5116164293417661, + "grad_norm": 5.704482747869875, + "learning_rate": 5.055038533785451e-06, + "loss": 17.3771, + "step": 27989 + }, + { + "epoch": 0.5116347085382127, + "grad_norm": 7.720373131835971, + "learning_rate": 5.054742540085523e-06, + "loss": 17.9982, + "step": 27990 + }, + { + "epoch": 0.5116529877346592, + "grad_norm": 5.988599421806966, + "learning_rate": 5.054446546193728e-06, + "loss": 17.4281, + "step": 27991 + }, + { + "epoch": 0.5116712669311058, + "grad_norm": 6.250533702797249, + "learning_rate": 5.0541505521111e-06, + "loss": 17.2882, + "step": 27992 + }, + { + "epoch": 0.5116895461275522, + "grad_norm": 5.697124985304141, + "learning_rate": 5.05385455783868e-06, + "loss": 17.2617, + "step": 27993 + }, + { + "epoch": 0.5117078253239987, + "grad_norm": 8.295971595841486, + "learning_rate": 5.053558563377503e-06, + "loss": 18.3817, + "step": 27994 + }, + { + "epoch": 0.5117261045204453, + "grad_norm": 7.837309151991971, + "learning_rate": 5.053262568728609e-06, + "loss": 17.877, + "step": 27995 + }, + { + "epoch": 0.5117443837168918, + "grad_norm": 6.498182727342684, + "learning_rate": 5.052966573893036e-06, + "loss": 17.6076, + "step": 27996 + }, + { + "epoch": 0.5117626629133384, + "grad_norm": 7.2434474881906, + "learning_rate": 5.052670578871818e-06, + "loss": 17.802, + "step": 27997 + }, + { + "epoch": 0.5117809421097849, + "grad_norm": 5.303841371811925, + "learning_rate": 5.052374583665994e-06, + "loss": 17.1082, + "step": 27998 + }, + { + "epoch": 0.5117992213062313, + "grad_norm": 5.851803379670491, + "learning_rate": 5.052078588276602e-06, + "loss": 17.4313, + "step": 27999 + }, + { + "epoch": 0.5118175005026779, + "grad_norm": 8.61954880884424, + "learning_rate": 5.051782592704679e-06, + "loss": 18.0882, + "step": 28000 + }, + { + "epoch": 0.5118357796991244, + "grad_norm": 6.047523950834138, + "learning_rate": 5.051486596951264e-06, + "loss": 17.4648, + "step": 28001 + }, + { + "epoch": 0.5118540588955709, + "grad_norm": 6.244999563783573, + "learning_rate": 5.051190601017391e-06, + "loss": 17.2835, + "step": 28002 + }, + { + "epoch": 0.5118723380920175, + "grad_norm": 9.220879058723916, + "learning_rate": 5.0508946049041e-06, + "loss": 18.5216, + "step": 28003 + }, + { + "epoch": 0.511890617288464, + "grad_norm": 5.911686970152043, + "learning_rate": 5.050598608612427e-06, + "loss": 17.3586, + "step": 28004 + }, + { + "epoch": 0.5119088964849106, + "grad_norm": 5.395181574700731, + "learning_rate": 5.0503026121434115e-06, + "loss": 17.2908, + "step": 28005 + }, + { + "epoch": 0.511927175681357, + "grad_norm": 7.199897142317298, + "learning_rate": 5.05000661549809e-06, + "loss": 17.8668, + "step": 28006 + }, + { + "epoch": 0.5119454548778035, + "grad_norm": 5.713902850634102, + "learning_rate": 5.049710618677499e-06, + "loss": 17.3038, + "step": 28007 + }, + { + "epoch": 0.5119637340742501, + "grad_norm": 5.945746328317656, + "learning_rate": 5.049414621682677e-06, + "loss": 17.2434, + "step": 28008 + }, + { + "epoch": 0.5119820132706966, + "grad_norm": 6.800970587928478, + "learning_rate": 5.049118624514659e-06, + "loss": 17.7604, + "step": 28009 + }, + { + "epoch": 0.5120002924671432, + "grad_norm": 5.014364988281461, + "learning_rate": 5.048822627174487e-06, + "loss": 17.2556, + "step": 28010 + }, + { + "epoch": 0.5120185716635897, + "grad_norm": 5.646783329337935, + "learning_rate": 5.048526629663194e-06, + "loss": 17.1364, + "step": 28011 + }, + { + "epoch": 0.5120368508600361, + "grad_norm": 5.238593346776299, + "learning_rate": 5.0482306319818205e-06, + "loss": 17.0546, + "step": 28012 + }, + { + "epoch": 0.5120551300564827, + "grad_norm": 5.867916078013198, + "learning_rate": 5.047934634131403e-06, + "loss": 17.257, + "step": 28013 + }, + { + "epoch": 0.5120734092529292, + "grad_norm": 6.349823261825295, + "learning_rate": 5.047638636112978e-06, + "loss": 17.6975, + "step": 28014 + }, + { + "epoch": 0.5120916884493758, + "grad_norm": 8.712592750857338, + "learning_rate": 5.047342637927586e-06, + "loss": 18.8455, + "step": 28015 + }, + { + "epoch": 0.5121099676458223, + "grad_norm": 6.66391801475898, + "learning_rate": 5.047046639576258e-06, + "loss": 17.4625, + "step": 28016 + }, + { + "epoch": 0.5121282468422688, + "grad_norm": 6.62869295930528, + "learning_rate": 5.046750641060038e-06, + "loss": 17.7247, + "step": 28017 + }, + { + "epoch": 0.5121465260387154, + "grad_norm": 5.373166233411081, + "learning_rate": 5.046454642379962e-06, + "loss": 17.1545, + "step": 28018 + }, + { + "epoch": 0.5121648052351618, + "grad_norm": 6.501967979661488, + "learning_rate": 5.0461586435370656e-06, + "loss": 17.5092, + "step": 28019 + }, + { + "epoch": 0.5121830844316084, + "grad_norm": 6.33903576693256, + "learning_rate": 5.045862644532388e-06, + "loss": 17.8611, + "step": 28020 + }, + { + "epoch": 0.5122013636280549, + "grad_norm": 5.994603276352138, + "learning_rate": 5.045566645366963e-06, + "loss": 17.3117, + "step": 28021 + }, + { + "epoch": 0.5122196428245014, + "grad_norm": 5.630383901243195, + "learning_rate": 5.045270646041834e-06, + "loss": 17.291, + "step": 28022 + }, + { + "epoch": 0.512237922020948, + "grad_norm": 5.532876931544379, + "learning_rate": 5.044974646558034e-06, + "loss": 17.0049, + "step": 28023 + }, + { + "epoch": 0.5122562012173945, + "grad_norm": 5.008223902516464, + "learning_rate": 5.044678646916602e-06, + "loss": 17.0178, + "step": 28024 + }, + { + "epoch": 0.512274480413841, + "grad_norm": 6.615996521253628, + "learning_rate": 5.044382647118574e-06, + "loss": 17.6848, + "step": 28025 + }, + { + "epoch": 0.5122927596102875, + "grad_norm": 6.074262935974503, + "learning_rate": 5.044086647164991e-06, + "loss": 17.2916, + "step": 28026 + }, + { + "epoch": 0.512311038806734, + "grad_norm": 5.6487417065577405, + "learning_rate": 5.043790647056886e-06, + "loss": 17.1785, + "step": 28027 + }, + { + "epoch": 0.5123293180031806, + "grad_norm": 6.049154975666557, + "learning_rate": 5.043494646795299e-06, + "loss": 17.1928, + "step": 28028 + }, + { + "epoch": 0.5123475971996271, + "grad_norm": 5.1185673294355, + "learning_rate": 5.043198646381269e-06, + "loss": 16.8799, + "step": 28029 + }, + { + "epoch": 0.5123658763960737, + "grad_norm": 5.770846082516518, + "learning_rate": 5.042902645815829e-06, + "loss": 17.1606, + "step": 28030 + }, + { + "epoch": 0.5123841555925202, + "grad_norm": 6.910684220598062, + "learning_rate": 5.04260664510002e-06, + "loss": 17.4591, + "step": 28031 + }, + { + "epoch": 0.5124024347889666, + "grad_norm": 6.628891492238753, + "learning_rate": 5.042310644234878e-06, + "loss": 17.422, + "step": 28032 + }, + { + "epoch": 0.5124207139854132, + "grad_norm": 5.3739276066214705, + "learning_rate": 5.042014643221442e-06, + "loss": 17.0926, + "step": 28033 + }, + { + "epoch": 0.5124389931818597, + "grad_norm": 4.709496993209792, + "learning_rate": 5.0417186420607475e-06, + "loss": 16.9556, + "step": 28034 + }, + { + "epoch": 0.5124572723783063, + "grad_norm": 6.570254315115292, + "learning_rate": 5.041422640753831e-06, + "loss": 17.3542, + "step": 28035 + }, + { + "epoch": 0.5124755515747528, + "grad_norm": 5.702979363851849, + "learning_rate": 5.041126639301736e-06, + "loss": 17.0444, + "step": 28036 + }, + { + "epoch": 0.5124938307711993, + "grad_norm": 6.134215999578505, + "learning_rate": 5.040830637705493e-06, + "loss": 17.3444, + "step": 28037 + }, + { + "epoch": 0.5125121099676458, + "grad_norm": 5.53583089154324, + "learning_rate": 5.040534635966141e-06, + "loss": 17.1026, + "step": 28038 + }, + { + "epoch": 0.5125303891640923, + "grad_norm": 6.692634199078798, + "learning_rate": 5.040238634084721e-06, + "loss": 17.1582, + "step": 28039 + }, + { + "epoch": 0.5125486683605389, + "grad_norm": 9.112706744573707, + "learning_rate": 5.0399426320622665e-06, + "loss": 17.7881, + "step": 28040 + }, + { + "epoch": 0.5125669475569854, + "grad_norm": 5.436373246965209, + "learning_rate": 5.039646629899817e-06, + "loss": 17.1567, + "step": 28041 + }, + { + "epoch": 0.5125852267534319, + "grad_norm": 6.351125943732603, + "learning_rate": 5.0393506275984094e-06, + "loss": 17.621, + "step": 28042 + }, + { + "epoch": 0.5126035059498785, + "grad_norm": 5.806286890615243, + "learning_rate": 5.039054625159081e-06, + "loss": 17.3372, + "step": 28043 + }, + { + "epoch": 0.512621785146325, + "grad_norm": 5.160793728004603, + "learning_rate": 5.03875862258287e-06, + "loss": 16.8742, + "step": 28044 + }, + { + "epoch": 0.5126400643427715, + "grad_norm": 6.267331369127202, + "learning_rate": 5.038462619870814e-06, + "loss": 17.3996, + "step": 28045 + }, + { + "epoch": 0.512658343539218, + "grad_norm": 6.085634046898352, + "learning_rate": 5.03816661702395e-06, + "loss": 17.5692, + "step": 28046 + }, + { + "epoch": 0.5126766227356645, + "grad_norm": 6.61253265012242, + "learning_rate": 5.037870614043314e-06, + "loss": 17.3804, + "step": 28047 + }, + { + "epoch": 0.5126949019321111, + "grad_norm": 4.96611423998561, + "learning_rate": 5.037574610929945e-06, + "loss": 16.8542, + "step": 28048 + }, + { + "epoch": 0.5127131811285576, + "grad_norm": 6.320321578904927, + "learning_rate": 5.03727860768488e-06, + "loss": 17.5675, + "step": 28049 + }, + { + "epoch": 0.5127314603250042, + "grad_norm": 7.157330893384386, + "learning_rate": 5.036982604309159e-06, + "loss": 17.466, + "step": 28050 + }, + { + "epoch": 0.5127497395214506, + "grad_norm": 5.3084310590091635, + "learning_rate": 5.036686600803815e-06, + "loss": 17.1899, + "step": 28051 + }, + { + "epoch": 0.5127680187178971, + "grad_norm": 7.373330017990298, + "learning_rate": 5.036390597169888e-06, + "loss": 17.9253, + "step": 28052 + }, + { + "epoch": 0.5127862979143437, + "grad_norm": 6.65378399298599, + "learning_rate": 5.036094593408415e-06, + "loss": 17.6431, + "step": 28053 + }, + { + "epoch": 0.5128045771107902, + "grad_norm": 5.858154925957364, + "learning_rate": 5.035798589520434e-06, + "loss": 17.3389, + "step": 28054 + }, + { + "epoch": 0.5128228563072368, + "grad_norm": 5.330596774222364, + "learning_rate": 5.035502585506981e-06, + "loss": 17.1273, + "step": 28055 + }, + { + "epoch": 0.5128411355036833, + "grad_norm": 6.735683133722009, + "learning_rate": 5.035206581369097e-06, + "loss": 17.3526, + "step": 28056 + }, + { + "epoch": 0.5128594147001297, + "grad_norm": 5.310218738370323, + "learning_rate": 5.034910577107814e-06, + "loss": 17.2729, + "step": 28057 + }, + { + "epoch": 0.5128776938965763, + "grad_norm": 5.661281842404663, + "learning_rate": 5.034614572724175e-06, + "loss": 17.3244, + "step": 28058 + }, + { + "epoch": 0.5128959730930228, + "grad_norm": 5.7079937216885845, + "learning_rate": 5.034318568219213e-06, + "loss": 17.4455, + "step": 28059 + }, + { + "epoch": 0.5129142522894694, + "grad_norm": 5.33893828694486, + "learning_rate": 5.0340225635939685e-06, + "loss": 17.1936, + "step": 28060 + }, + { + "epoch": 0.5129325314859159, + "grad_norm": 6.991607716437704, + "learning_rate": 5.033726558849479e-06, + "loss": 17.6457, + "step": 28061 + }, + { + "epoch": 0.5129508106823624, + "grad_norm": 7.173764685392336, + "learning_rate": 5.0334305539867774e-06, + "loss": 18.1706, + "step": 28062 + }, + { + "epoch": 0.512969089878809, + "grad_norm": 7.119662542488564, + "learning_rate": 5.033134549006906e-06, + "loss": 17.7013, + "step": 28063 + }, + { + "epoch": 0.5129873690752554, + "grad_norm": 7.890981854441498, + "learning_rate": 5.032838543910903e-06, + "loss": 18.2298, + "step": 28064 + }, + { + "epoch": 0.513005648271702, + "grad_norm": 6.941025290307531, + "learning_rate": 5.032542538699803e-06, + "loss": 17.8069, + "step": 28065 + }, + { + "epoch": 0.5130239274681485, + "grad_norm": 7.707036799576286, + "learning_rate": 5.032246533374643e-06, + "loss": 17.4969, + "step": 28066 + }, + { + "epoch": 0.513042206664595, + "grad_norm": 5.8355286680754626, + "learning_rate": 5.031950527936462e-06, + "loss": 17.2879, + "step": 28067 + }, + { + "epoch": 0.5130604858610416, + "grad_norm": 6.097625669021527, + "learning_rate": 5.031654522386297e-06, + "loss": 17.4816, + "step": 28068 + }, + { + "epoch": 0.5130787650574881, + "grad_norm": 6.391443302246504, + "learning_rate": 5.031358516725185e-06, + "loss": 17.5693, + "step": 28069 + }, + { + "epoch": 0.5130970442539345, + "grad_norm": 5.853587011187509, + "learning_rate": 5.031062510954166e-06, + "loss": 17.2966, + "step": 28070 + }, + { + "epoch": 0.5131153234503811, + "grad_norm": 6.888218201777283, + "learning_rate": 5.030766505074275e-06, + "loss": 17.6219, + "step": 28071 + }, + { + "epoch": 0.5131336026468276, + "grad_norm": 7.078410786984801, + "learning_rate": 5.030470499086549e-06, + "loss": 17.7244, + "step": 28072 + }, + { + "epoch": 0.5131518818432742, + "grad_norm": 5.892402288292222, + "learning_rate": 5.030174492992027e-06, + "loss": 17.2036, + "step": 28073 + }, + { + "epoch": 0.5131701610397207, + "grad_norm": 6.045014744014676, + "learning_rate": 5.029878486791748e-06, + "loss": 17.318, + "step": 28074 + }, + { + "epoch": 0.5131884402361672, + "grad_norm": 5.76539178992317, + "learning_rate": 5.029582480486744e-06, + "loss": 17.2788, + "step": 28075 + }, + { + "epoch": 0.5132067194326138, + "grad_norm": 6.317937925272435, + "learning_rate": 5.029286474078058e-06, + "loss": 17.3094, + "step": 28076 + }, + { + "epoch": 0.5132249986290602, + "grad_norm": 5.9402884217697665, + "learning_rate": 5.028990467566727e-06, + "loss": 17.1369, + "step": 28077 + }, + { + "epoch": 0.5132432778255068, + "grad_norm": 6.658072244458772, + "learning_rate": 5.028694460953785e-06, + "loss": 17.6295, + "step": 28078 + }, + { + "epoch": 0.5132615570219533, + "grad_norm": 6.548053989903735, + "learning_rate": 5.028398454240271e-06, + "loss": 17.6661, + "step": 28079 + }, + { + "epoch": 0.5132798362183998, + "grad_norm": 7.370262231792316, + "learning_rate": 5.0281024474272225e-06, + "loss": 18.1196, + "step": 28080 + }, + { + "epoch": 0.5132981154148464, + "grad_norm": 5.794905417242664, + "learning_rate": 5.027806440515679e-06, + "loss": 17.0295, + "step": 28081 + }, + { + "epoch": 0.5133163946112929, + "grad_norm": 6.559785798742597, + "learning_rate": 5.027510433506676e-06, + "loss": 17.4465, + "step": 28082 + }, + { + "epoch": 0.5133346738077394, + "grad_norm": 6.161346157356899, + "learning_rate": 5.02721442640125e-06, + "loss": 17.6543, + "step": 28083 + }, + { + "epoch": 0.5133529530041859, + "grad_norm": 4.745825669070633, + "learning_rate": 5.026918419200442e-06, + "loss": 16.9587, + "step": 28084 + }, + { + "epoch": 0.5133712322006324, + "grad_norm": 7.6563757111778985, + "learning_rate": 5.0266224119052855e-06, + "loss": 18.0366, + "step": 28085 + }, + { + "epoch": 0.513389511397079, + "grad_norm": 7.5577163342264315, + "learning_rate": 5.026326404516821e-06, + "loss": 18.04, + "step": 28086 + }, + { + "epoch": 0.5134077905935255, + "grad_norm": 5.61846200718234, + "learning_rate": 5.0260303970360835e-06, + "loss": 17.259, + "step": 28087 + }, + { + "epoch": 0.5134260697899721, + "grad_norm": 7.114267771998251, + "learning_rate": 5.025734389464113e-06, + "loss": 17.5694, + "step": 28088 + }, + { + "epoch": 0.5134443489864186, + "grad_norm": 5.122244468492323, + "learning_rate": 5.025438381801943e-06, + "loss": 16.9749, + "step": 28089 + }, + { + "epoch": 0.513462628182865, + "grad_norm": 8.060971334515285, + "learning_rate": 5.0251423740506155e-06, + "loss": 18.7725, + "step": 28090 + }, + { + "epoch": 0.5134809073793116, + "grad_norm": 6.072602028199104, + "learning_rate": 5.024846366211168e-06, + "loss": 17.5009, + "step": 28091 + }, + { + "epoch": 0.5134991865757581, + "grad_norm": 7.245636367424293, + "learning_rate": 5.024550358284634e-06, + "loss": 17.753, + "step": 28092 + }, + { + "epoch": 0.5135174657722047, + "grad_norm": 6.0389132423800636, + "learning_rate": 5.024254350272054e-06, + "loss": 17.379, + "step": 28093 + }, + { + "epoch": 0.5135357449686512, + "grad_norm": 5.406577581521393, + "learning_rate": 5.023958342174463e-06, + "loss": 17.1855, + "step": 28094 + }, + { + "epoch": 0.5135540241650977, + "grad_norm": 5.828234473307503, + "learning_rate": 5.0236623339929025e-06, + "loss": 17.0713, + "step": 28095 + }, + { + "epoch": 0.5135723033615442, + "grad_norm": 4.6051927541259134, + "learning_rate": 5.023366325728406e-06, + "loss": 16.7456, + "step": 28096 + }, + { + "epoch": 0.5135905825579907, + "grad_norm": 6.234438137695905, + "learning_rate": 5.023070317382013e-06, + "loss": 17.4698, + "step": 28097 + }, + { + "epoch": 0.5136088617544373, + "grad_norm": 6.745582056480888, + "learning_rate": 5.0227743089547594e-06, + "loss": 17.3086, + "step": 28098 + }, + { + "epoch": 0.5136271409508838, + "grad_norm": 5.385427269965797, + "learning_rate": 5.022478300447685e-06, + "loss": 17.2957, + "step": 28099 + }, + { + "epoch": 0.5136454201473303, + "grad_norm": 8.091292903650018, + "learning_rate": 5.022182291861826e-06, + "loss": 17.9107, + "step": 28100 + }, + { + "epoch": 0.5136636993437769, + "grad_norm": 6.26059130775774, + "learning_rate": 5.021886283198221e-06, + "loss": 17.4496, + "step": 28101 + }, + { + "epoch": 0.5136819785402233, + "grad_norm": 6.137892618674586, + "learning_rate": 5.021590274457906e-06, + "loss": 17.6514, + "step": 28102 + }, + { + "epoch": 0.5137002577366699, + "grad_norm": 7.384613148894318, + "learning_rate": 5.0212942656419175e-06, + "loss": 17.8735, + "step": 28103 + }, + { + "epoch": 0.5137185369331164, + "grad_norm": 6.224373819966781, + "learning_rate": 5.020998256751295e-06, + "loss": 17.6275, + "step": 28104 + }, + { + "epoch": 0.5137368161295629, + "grad_norm": 6.561541920810067, + "learning_rate": 5.020702247787076e-06, + "loss": 17.4027, + "step": 28105 + }, + { + "epoch": 0.5137550953260095, + "grad_norm": 6.348724462220238, + "learning_rate": 5.020406238750297e-06, + "loss": 17.2009, + "step": 28106 + }, + { + "epoch": 0.513773374522456, + "grad_norm": 5.6274317742546, + "learning_rate": 5.020110229641997e-06, + "loss": 17.1539, + "step": 28107 + }, + { + "epoch": 0.5137916537189026, + "grad_norm": 5.402416735629333, + "learning_rate": 5.01981422046321e-06, + "loss": 16.9529, + "step": 28108 + }, + { + "epoch": 0.513809932915349, + "grad_norm": 7.337176309987599, + "learning_rate": 5.019518211214978e-06, + "loss": 17.723, + "step": 28109 + }, + { + "epoch": 0.5138282121117955, + "grad_norm": 6.865097560876604, + "learning_rate": 5.019222201898336e-06, + "loss": 17.5339, + "step": 28110 + }, + { + "epoch": 0.5138464913082421, + "grad_norm": 6.23455616923348, + "learning_rate": 5.0189261925143214e-06, + "loss": 17.2609, + "step": 28111 + }, + { + "epoch": 0.5138647705046886, + "grad_norm": 6.809861678289743, + "learning_rate": 5.018630183063972e-06, + "loss": 17.4722, + "step": 28112 + }, + { + "epoch": 0.5138830497011352, + "grad_norm": 6.1133301907662245, + "learning_rate": 5.018334173548326e-06, + "loss": 17.4801, + "step": 28113 + }, + { + "epoch": 0.5139013288975817, + "grad_norm": 11.552232517083285, + "learning_rate": 5.018038163968419e-06, + "loss": 18.6332, + "step": 28114 + }, + { + "epoch": 0.5139196080940281, + "grad_norm": 6.335392764099568, + "learning_rate": 5.0177421543252925e-06, + "loss": 17.5472, + "step": 28115 + }, + { + "epoch": 0.5139378872904747, + "grad_norm": 5.9829507015151036, + "learning_rate": 5.017446144619978e-06, + "loss": 17.6113, + "step": 28116 + }, + { + "epoch": 0.5139561664869212, + "grad_norm": 5.410154458145175, + "learning_rate": 5.017150134853518e-06, + "loss": 17.0227, + "step": 28117 + }, + { + "epoch": 0.5139744456833678, + "grad_norm": 5.83075024866596, + "learning_rate": 5.016854125026949e-06, + "loss": 17.3264, + "step": 28118 + }, + { + "epoch": 0.5139927248798143, + "grad_norm": 5.460150176460602, + "learning_rate": 5.016558115141308e-06, + "loss": 17.1501, + "step": 28119 + }, + { + "epoch": 0.5140110040762608, + "grad_norm": 7.476150179442159, + "learning_rate": 5.01626210519763e-06, + "loss": 18.2651, + "step": 28120 + }, + { + "epoch": 0.5140292832727074, + "grad_norm": 8.293272410674218, + "learning_rate": 5.015966095196956e-06, + "loss": 18.1733, + "step": 28121 + }, + { + "epoch": 0.5140475624691538, + "grad_norm": 8.932934328206157, + "learning_rate": 5.015670085140323e-06, + "loss": 18.0572, + "step": 28122 + }, + { + "epoch": 0.5140658416656004, + "grad_norm": 6.376393939232121, + "learning_rate": 5.0153740750287665e-06, + "loss": 17.6159, + "step": 28123 + }, + { + "epoch": 0.5140841208620469, + "grad_norm": 4.975699133719373, + "learning_rate": 5.015078064863325e-06, + "loss": 16.8964, + "step": 28124 + }, + { + "epoch": 0.5141024000584934, + "grad_norm": 5.524051630978833, + "learning_rate": 5.014782054645037e-06, + "loss": 17.229, + "step": 28125 + }, + { + "epoch": 0.51412067925494, + "grad_norm": 6.733322352927828, + "learning_rate": 5.014486044374939e-06, + "loss": 17.7211, + "step": 28126 + }, + { + "epoch": 0.5141389584513865, + "grad_norm": 6.145482932736864, + "learning_rate": 5.014190034054068e-06, + "loss": 17.4748, + "step": 28127 + }, + { + "epoch": 0.514157237647833, + "grad_norm": 10.969824636140316, + "learning_rate": 5.013894023683463e-06, + "loss": 18.5218, + "step": 28128 + }, + { + "epoch": 0.5141755168442795, + "grad_norm": 6.207129862453142, + "learning_rate": 5.013598013264162e-06, + "loss": 17.3163, + "step": 28129 + }, + { + "epoch": 0.514193796040726, + "grad_norm": 8.738833173940085, + "learning_rate": 5.013302002797198e-06, + "loss": 18.1539, + "step": 28130 + }, + { + "epoch": 0.5142120752371726, + "grad_norm": 6.69535504124619, + "learning_rate": 5.013005992283613e-06, + "loss": 17.2844, + "step": 28131 + }, + { + "epoch": 0.5142303544336191, + "grad_norm": 6.870859011575733, + "learning_rate": 5.012709981724443e-06, + "loss": 17.4839, + "step": 28132 + }, + { + "epoch": 0.5142486336300657, + "grad_norm": 5.880878576065578, + "learning_rate": 5.012413971120726e-06, + "loss": 17.4608, + "step": 28133 + }, + { + "epoch": 0.5142669128265122, + "grad_norm": 7.412049525932334, + "learning_rate": 5.0121179604735005e-06, + "loss": 17.7907, + "step": 28134 + }, + { + "epoch": 0.5142851920229586, + "grad_norm": 5.182481876257259, + "learning_rate": 5.011821949783799e-06, + "loss": 17.1516, + "step": 28135 + }, + { + "epoch": 0.5143034712194052, + "grad_norm": 7.256991660617841, + "learning_rate": 5.011525939052666e-06, + "loss": 17.9211, + "step": 28136 + }, + { + "epoch": 0.5143217504158517, + "grad_norm": 6.612943637546306, + "learning_rate": 5.011229928281134e-06, + "loss": 17.7423, + "step": 28137 + }, + { + "epoch": 0.5143400296122982, + "grad_norm": 6.236537944236401, + "learning_rate": 5.010933917470243e-06, + "loss": 17.4842, + "step": 28138 + }, + { + "epoch": 0.5143583088087448, + "grad_norm": 6.912204897544808, + "learning_rate": 5.0106379066210285e-06, + "loss": 17.5455, + "step": 28139 + }, + { + "epoch": 0.5143765880051913, + "grad_norm": 5.305372202814259, + "learning_rate": 5.010341895734529e-06, + "loss": 17.1269, + "step": 28140 + }, + { + "epoch": 0.5143948672016379, + "grad_norm": 5.715634971383142, + "learning_rate": 5.010045884811783e-06, + "loss": 17.015, + "step": 28141 + }, + { + "epoch": 0.5144131463980843, + "grad_norm": 8.049630120347942, + "learning_rate": 5.009749873853827e-06, + "loss": 18.0077, + "step": 28142 + }, + { + "epoch": 0.5144314255945308, + "grad_norm": 5.197981675803811, + "learning_rate": 5.009453862861697e-06, + "loss": 16.8769, + "step": 28143 + }, + { + "epoch": 0.5144497047909774, + "grad_norm": 6.366521019383396, + "learning_rate": 5.009157851836434e-06, + "loss": 17.6728, + "step": 28144 + }, + { + "epoch": 0.5144679839874239, + "grad_norm": 4.922210672212978, + "learning_rate": 5.008861840779072e-06, + "loss": 16.7652, + "step": 28145 + }, + { + "epoch": 0.5144862631838705, + "grad_norm": 6.174499497399777, + "learning_rate": 5.008565829690652e-06, + "loss": 17.2349, + "step": 28146 + }, + { + "epoch": 0.514504542380317, + "grad_norm": 6.432555622617168, + "learning_rate": 5.0082698185722076e-06, + "loss": 17.6648, + "step": 28147 + }, + { + "epoch": 0.5145228215767634, + "grad_norm": 6.826354523350087, + "learning_rate": 5.007973807424778e-06, + "loss": 17.4025, + "step": 28148 + }, + { + "epoch": 0.51454110077321, + "grad_norm": 5.885592308033598, + "learning_rate": 5.007677796249402e-06, + "loss": 17.3885, + "step": 28149 + }, + { + "epoch": 0.5145593799696565, + "grad_norm": 5.243712760308505, + "learning_rate": 5.007381785047116e-06, + "loss": 17.0312, + "step": 28150 + }, + { + "epoch": 0.5145776591661031, + "grad_norm": 6.797253314513713, + "learning_rate": 5.007085773818958e-06, + "loss": 17.6553, + "step": 28151 + }, + { + "epoch": 0.5145959383625496, + "grad_norm": 5.585097237420028, + "learning_rate": 5.006789762565964e-06, + "loss": 17.2602, + "step": 28152 + }, + { + "epoch": 0.5146142175589961, + "grad_norm": 6.464299392483597, + "learning_rate": 5.006493751289172e-06, + "loss": 17.3206, + "step": 28153 + }, + { + "epoch": 0.5146324967554426, + "grad_norm": 5.9594289792473, + "learning_rate": 5.006197739989621e-06, + "loss": 17.4366, + "step": 28154 + }, + { + "epoch": 0.5146507759518891, + "grad_norm": 5.19228241604465, + "learning_rate": 5.005901728668346e-06, + "loss": 16.9556, + "step": 28155 + }, + { + "epoch": 0.5146690551483357, + "grad_norm": 5.6669730647734475, + "learning_rate": 5.005605717326388e-06, + "loss": 17.3045, + "step": 28156 + }, + { + "epoch": 0.5146873343447822, + "grad_norm": 4.970170824432851, + "learning_rate": 5.005309705964782e-06, + "loss": 16.9029, + "step": 28157 + }, + { + "epoch": 0.5147056135412287, + "grad_norm": 6.8288527551437115, + "learning_rate": 5.005013694584565e-06, + "loss": 17.6727, + "step": 28158 + }, + { + "epoch": 0.5147238927376753, + "grad_norm": 6.65212379126175, + "learning_rate": 5.004717683186775e-06, + "loss": 17.6678, + "step": 28159 + }, + { + "epoch": 0.5147421719341218, + "grad_norm": 6.49324925634759, + "learning_rate": 5.004421671772453e-06, + "loss": 17.4102, + "step": 28160 + }, + { + "epoch": 0.5147604511305683, + "grad_norm": 6.6435775706349505, + "learning_rate": 5.00412566034263e-06, + "loss": 17.6286, + "step": 28161 + }, + { + "epoch": 0.5147787303270148, + "grad_norm": 5.318601132790432, + "learning_rate": 5.003829648898347e-06, + "loss": 17.1369, + "step": 28162 + }, + { + "epoch": 0.5147970095234613, + "grad_norm": 5.851364989000181, + "learning_rate": 5.003533637440643e-06, + "loss": 17.1015, + "step": 28163 + }, + { + "epoch": 0.5148152887199079, + "grad_norm": 5.97345678470819, + "learning_rate": 5.003237625970554e-06, + "loss": 17.3515, + "step": 28164 + }, + { + "epoch": 0.5148335679163544, + "grad_norm": 4.319079791050368, + "learning_rate": 5.002941614489117e-06, + "loss": 16.7136, + "step": 28165 + }, + { + "epoch": 0.514851847112801, + "grad_norm": 6.030998116732727, + "learning_rate": 5.0026456029973705e-06, + "loss": 17.2514, + "step": 28166 + }, + { + "epoch": 0.5148701263092474, + "grad_norm": 7.039669797362035, + "learning_rate": 5.002349591496349e-06, + "loss": 17.6927, + "step": 28167 + }, + { + "epoch": 0.5148884055056939, + "grad_norm": 7.865756664491041, + "learning_rate": 5.002053579987095e-06, + "loss": 17.9537, + "step": 28168 + }, + { + "epoch": 0.5149066847021405, + "grad_norm": 6.962113674781745, + "learning_rate": 5.001757568470642e-06, + "loss": 17.5078, + "step": 28169 + }, + { + "epoch": 0.514924963898587, + "grad_norm": 6.504396415023693, + "learning_rate": 5.00146155694803e-06, + "loss": 17.4157, + "step": 28170 + }, + { + "epoch": 0.5149432430950336, + "grad_norm": 5.863818599382739, + "learning_rate": 5.001165545420293e-06, + "loss": 17.2288, + "step": 28171 + }, + { + "epoch": 0.5149615222914801, + "grad_norm": 6.319205856870767, + "learning_rate": 5.0008695338884725e-06, + "loss": 17.4342, + "step": 28172 + }, + { + "epoch": 0.5149798014879265, + "grad_norm": 5.943042318046097, + "learning_rate": 5.000573522353604e-06, + "loss": 17.356, + "step": 28173 + }, + { + "epoch": 0.5149980806843731, + "grad_norm": 6.129856488068772, + "learning_rate": 5.000277510816728e-06, + "loss": 17.3474, + "step": 28174 + }, + { + "epoch": 0.5150163598808196, + "grad_norm": 5.945116040970834, + "learning_rate": 4.999981499278876e-06, + "loss": 17.1864, + "step": 28175 + }, + { + "epoch": 0.5150346390772662, + "grad_norm": 8.265559736040654, + "learning_rate": 4.9996854877410905e-06, + "loss": 17.8946, + "step": 28176 + }, + { + "epoch": 0.5150529182737127, + "grad_norm": 6.437334872176177, + "learning_rate": 4.999389476204406e-06, + "loss": 17.5352, + "step": 28177 + }, + { + "epoch": 0.5150711974701592, + "grad_norm": 5.419875494669513, + "learning_rate": 4.999093464669863e-06, + "loss": 16.9175, + "step": 28178 + }, + { + "epoch": 0.5150894766666058, + "grad_norm": 6.770252063586818, + "learning_rate": 4.998797453138496e-06, + "loss": 17.6959, + "step": 28179 + }, + { + "epoch": 0.5151077558630522, + "grad_norm": 5.81401931216123, + "learning_rate": 4.998501441611343e-06, + "loss": 17.1223, + "step": 28180 + }, + { + "epoch": 0.5151260350594988, + "grad_norm": 6.229354007716394, + "learning_rate": 4.998205430089445e-06, + "loss": 17.4315, + "step": 28181 + }, + { + "epoch": 0.5151443142559453, + "grad_norm": 6.00189037519563, + "learning_rate": 4.9979094185738344e-06, + "loss": 17.3048, + "step": 28182 + }, + { + "epoch": 0.5151625934523918, + "grad_norm": 6.072229788591644, + "learning_rate": 4.997613407065552e-06, + "loss": 17.2411, + "step": 28183 + }, + { + "epoch": 0.5151808726488384, + "grad_norm": 4.97116775481753, + "learning_rate": 4.997317395565635e-06, + "loss": 16.9966, + "step": 28184 + }, + { + "epoch": 0.5151991518452849, + "grad_norm": 5.731876597442955, + "learning_rate": 4.9970213840751185e-06, + "loss": 17.3688, + "step": 28185 + }, + { + "epoch": 0.5152174310417315, + "grad_norm": 6.139715199829413, + "learning_rate": 4.996725372595044e-06, + "loss": 17.1598, + "step": 28186 + }, + { + "epoch": 0.5152357102381779, + "grad_norm": 6.436362192283496, + "learning_rate": 4.996429361126447e-06, + "loss": 17.4544, + "step": 28187 + }, + { + "epoch": 0.5152539894346244, + "grad_norm": 7.375691130758968, + "learning_rate": 4.996133349670362e-06, + "loss": 17.7593, + "step": 28188 + }, + { + "epoch": 0.515272268631071, + "grad_norm": 6.085832382220902, + "learning_rate": 4.995837338227832e-06, + "loss": 17.4433, + "step": 28189 + }, + { + "epoch": 0.5152905478275175, + "grad_norm": 6.663826732315967, + "learning_rate": 4.9955413267998905e-06, + "loss": 17.5097, + "step": 28190 + }, + { + "epoch": 0.5153088270239641, + "grad_norm": 5.015545658554212, + "learning_rate": 4.995245315387575e-06, + "loss": 16.7537, + "step": 28191 + }, + { + "epoch": 0.5153271062204106, + "grad_norm": 5.838505174851261, + "learning_rate": 4.994949303991928e-06, + "loss": 17.252, + "step": 28192 + }, + { + "epoch": 0.515345385416857, + "grad_norm": 8.249928955669725, + "learning_rate": 4.9946532926139805e-06, + "loss": 18.2775, + "step": 28193 + }, + { + "epoch": 0.5153636646133036, + "grad_norm": 6.7158528747227955, + "learning_rate": 4.994357281254772e-06, + "loss": 17.6138, + "step": 28194 + }, + { + "epoch": 0.5153819438097501, + "grad_norm": 5.824144166676193, + "learning_rate": 4.994061269915343e-06, + "loss": 17.2477, + "step": 28195 + }, + { + "epoch": 0.5154002230061967, + "grad_norm": 5.463913434287644, + "learning_rate": 4.993765258596728e-06, + "loss": 17.3145, + "step": 28196 + }, + { + "epoch": 0.5154185022026432, + "grad_norm": 6.420409486988014, + "learning_rate": 4.993469247299964e-06, + "loss": 17.47, + "step": 28197 + }, + { + "epoch": 0.5154367813990897, + "grad_norm": 5.579108004881442, + "learning_rate": 4.993173236026091e-06, + "loss": 17.1566, + "step": 28198 + }, + { + "epoch": 0.5154550605955363, + "grad_norm": 6.457916538417718, + "learning_rate": 4.9928772247761435e-06, + "loss": 17.6163, + "step": 28199 + }, + { + "epoch": 0.5154733397919827, + "grad_norm": 6.571864744552564, + "learning_rate": 4.992581213551163e-06, + "loss": 17.651, + "step": 28200 + }, + { + "epoch": 0.5154916189884293, + "grad_norm": 6.43539111382244, + "learning_rate": 4.992285202352184e-06, + "loss": 17.5781, + "step": 28201 + }, + { + "epoch": 0.5155098981848758, + "grad_norm": 5.1435792108081895, + "learning_rate": 4.9919891911802445e-06, + "loss": 17.0905, + "step": 28202 + }, + { + "epoch": 0.5155281773813223, + "grad_norm": 7.134187654591497, + "learning_rate": 4.991693180036382e-06, + "loss": 17.9438, + "step": 28203 + }, + { + "epoch": 0.5155464565777689, + "grad_norm": 6.490637936095741, + "learning_rate": 4.9913971689216355e-06, + "loss": 17.351, + "step": 28204 + }, + { + "epoch": 0.5155647357742154, + "grad_norm": 7.956565135064702, + "learning_rate": 4.991101157837038e-06, + "loss": 17.9674, + "step": 28205 + }, + { + "epoch": 0.5155830149706618, + "grad_norm": 6.678808826906036, + "learning_rate": 4.990805146783633e-06, + "loss": 17.649, + "step": 28206 + }, + { + "epoch": 0.5156012941671084, + "grad_norm": 6.412975551902454, + "learning_rate": 4.990509135762455e-06, + "loss": 17.5338, + "step": 28207 + }, + { + "epoch": 0.5156195733635549, + "grad_norm": 8.295048085068535, + "learning_rate": 4.9902131247745395e-06, + "loss": 17.9047, + "step": 28208 + }, + { + "epoch": 0.5156378525600015, + "grad_norm": 6.91658108633665, + "learning_rate": 4.989917113820928e-06, + "loss": 17.6383, + "step": 28209 + }, + { + "epoch": 0.515656131756448, + "grad_norm": 5.576102802165591, + "learning_rate": 4.989621102902658e-06, + "loss": 17.3871, + "step": 28210 + }, + { + "epoch": 0.5156744109528945, + "grad_norm": 8.235191717878612, + "learning_rate": 4.9893250920207606e-06, + "loss": 18.031, + "step": 28211 + }, + { + "epoch": 0.515692690149341, + "grad_norm": 5.481930958457845, + "learning_rate": 4.98902908117628e-06, + "loss": 17.0455, + "step": 28212 + }, + { + "epoch": 0.5157109693457875, + "grad_norm": 6.837430098374462, + "learning_rate": 4.988733070370251e-06, + "loss": 17.7349, + "step": 28213 + }, + { + "epoch": 0.5157292485422341, + "grad_norm": 7.129773801985469, + "learning_rate": 4.988437059603713e-06, + "loss": 17.821, + "step": 28214 + }, + { + "epoch": 0.5157475277386806, + "grad_norm": 8.401615798316545, + "learning_rate": 4.988141048877703e-06, + "loss": 18.8932, + "step": 28215 + }, + { + "epoch": 0.5157658069351271, + "grad_norm": 6.682211248851416, + "learning_rate": 4.987845038193254e-06, + "loss": 17.6356, + "step": 28216 + }, + { + "epoch": 0.5157840861315737, + "grad_norm": 6.583499921389675, + "learning_rate": 4.987549027551409e-06, + "loss": 17.3783, + "step": 28217 + }, + { + "epoch": 0.5158023653280202, + "grad_norm": 5.521369248951098, + "learning_rate": 4.987253016953205e-06, + "loss": 17.1592, + "step": 28218 + }, + { + "epoch": 0.5158206445244667, + "grad_norm": 6.762331827717847, + "learning_rate": 4.986957006399675e-06, + "loss": 17.2717, + "step": 28219 + }, + { + "epoch": 0.5158389237209132, + "grad_norm": 6.7441140402099, + "learning_rate": 4.986660995891862e-06, + "loss": 17.633, + "step": 28220 + }, + { + "epoch": 0.5158572029173597, + "grad_norm": 7.605188296975337, + "learning_rate": 4.986364985430801e-06, + "loss": 17.9664, + "step": 28221 + }, + { + "epoch": 0.5158754821138063, + "grad_norm": 5.954533950344529, + "learning_rate": 4.986068975017527e-06, + "loss": 17.1513, + "step": 28222 + }, + { + "epoch": 0.5158937613102528, + "grad_norm": 5.19384164719807, + "learning_rate": 4.985772964653083e-06, + "loss": 17.0377, + "step": 28223 + }, + { + "epoch": 0.5159120405066994, + "grad_norm": 5.709840000751817, + "learning_rate": 4.985476954338504e-06, + "loss": 17.0539, + "step": 28224 + }, + { + "epoch": 0.5159303197031458, + "grad_norm": 5.467003263646111, + "learning_rate": 4.985180944074824e-06, + "loss": 17.1586, + "step": 28225 + }, + { + "epoch": 0.5159485988995923, + "grad_norm": 6.818836571595896, + "learning_rate": 4.984884933863085e-06, + "loss": 17.7478, + "step": 28226 + }, + { + "epoch": 0.5159668780960389, + "grad_norm": 6.905771052252174, + "learning_rate": 4.984588923704323e-06, + "loss": 18.1019, + "step": 28227 + }, + { + "epoch": 0.5159851572924854, + "grad_norm": 5.661404374163345, + "learning_rate": 4.984292913599575e-06, + "loss": 17.0274, + "step": 28228 + }, + { + "epoch": 0.516003436488932, + "grad_norm": 7.014521349377153, + "learning_rate": 4.983996903549881e-06, + "loss": 17.7912, + "step": 28229 + }, + { + "epoch": 0.5160217156853785, + "grad_norm": 6.232023919122138, + "learning_rate": 4.983700893556273e-06, + "loss": 17.6686, + "step": 28230 + }, + { + "epoch": 0.516039994881825, + "grad_norm": 7.890911013477252, + "learning_rate": 4.983404883619794e-06, + "loss": 17.769, + "step": 28231 + }, + { + "epoch": 0.5160582740782715, + "grad_norm": 6.872587574323388, + "learning_rate": 4.98310887374148e-06, + "loss": 17.5923, + "step": 28232 + }, + { + "epoch": 0.516076553274718, + "grad_norm": 5.201490169639113, + "learning_rate": 4.982812863922366e-06, + "loss": 17.0947, + "step": 28233 + }, + { + "epoch": 0.5160948324711646, + "grad_norm": 5.763744767295851, + "learning_rate": 4.982516854163494e-06, + "loss": 17.3743, + "step": 28234 + }, + { + "epoch": 0.5161131116676111, + "grad_norm": 6.914334473628715, + "learning_rate": 4.982220844465897e-06, + "loss": 18.0751, + "step": 28235 + }, + { + "epoch": 0.5161313908640576, + "grad_norm": 6.620543844060205, + "learning_rate": 4.981924834830614e-06, + "loss": 17.7356, + "step": 28236 + }, + { + "epoch": 0.5161496700605042, + "grad_norm": 6.557555013743278, + "learning_rate": 4.9816288252586844e-06, + "loss": 17.4938, + "step": 28237 + }, + { + "epoch": 0.5161679492569506, + "grad_norm": 6.9004998332954015, + "learning_rate": 4.981332815751144e-06, + "loss": 17.7065, + "step": 28238 + }, + { + "epoch": 0.5161862284533972, + "grad_norm": 6.3165920019418875, + "learning_rate": 4.98103680630903e-06, + "loss": 17.3589, + "step": 28239 + }, + { + "epoch": 0.5162045076498437, + "grad_norm": 6.509015542500228, + "learning_rate": 4.98074079693338e-06, + "loss": 17.6352, + "step": 28240 + }, + { + "epoch": 0.5162227868462902, + "grad_norm": 7.113184527318696, + "learning_rate": 4.980444787625233e-06, + "loss": 17.6092, + "step": 28241 + }, + { + "epoch": 0.5162410660427368, + "grad_norm": 6.14557656788362, + "learning_rate": 4.980148778385623e-06, + "loss": 17.1574, + "step": 28242 + }, + { + "epoch": 0.5162593452391833, + "grad_norm": 7.213181570738609, + "learning_rate": 4.9798527692155915e-06, + "loss": 18.0375, + "step": 28243 + }, + { + "epoch": 0.5162776244356299, + "grad_norm": 5.639323384051671, + "learning_rate": 4.9795567601161735e-06, + "loss": 17.1832, + "step": 28244 + }, + { + "epoch": 0.5162959036320763, + "grad_norm": 6.137465459690317, + "learning_rate": 4.979260751088409e-06, + "loss": 17.5277, + "step": 28245 + }, + { + "epoch": 0.5163141828285228, + "grad_norm": 7.096317856002897, + "learning_rate": 4.9789647421333335e-06, + "loss": 17.6459, + "step": 28246 + }, + { + "epoch": 0.5163324620249694, + "grad_norm": 6.791427737996549, + "learning_rate": 4.978668733251982e-06, + "loss": 17.8373, + "step": 28247 + }, + { + "epoch": 0.5163507412214159, + "grad_norm": 10.550594804003643, + "learning_rate": 4.978372724445397e-06, + "loss": 18.8802, + "step": 28248 + }, + { + "epoch": 0.5163690204178625, + "grad_norm": 5.836382127207683, + "learning_rate": 4.978076715714614e-06, + "loss": 17.6371, + "step": 28249 + }, + { + "epoch": 0.516387299614309, + "grad_norm": 6.107905509674293, + "learning_rate": 4.977780707060668e-06, + "loss": 17.2012, + "step": 28250 + }, + { + "epoch": 0.5164055788107554, + "grad_norm": 6.893817051738435, + "learning_rate": 4.977484698484602e-06, + "loss": 17.5091, + "step": 28251 + }, + { + "epoch": 0.516423858007202, + "grad_norm": 5.507606081739157, + "learning_rate": 4.9771886899874485e-06, + "loss": 17.087, + "step": 28252 + }, + { + "epoch": 0.5164421372036485, + "grad_norm": 5.895304799896382, + "learning_rate": 4.976892681570246e-06, + "loss": 17.3784, + "step": 28253 + }, + { + "epoch": 0.5164604164000951, + "grad_norm": 6.530205478059318, + "learning_rate": 4.9765966732340335e-06, + "loss": 17.5561, + "step": 28254 + }, + { + "epoch": 0.5164786955965416, + "grad_norm": 6.872070323979689, + "learning_rate": 4.9763006649798485e-06, + "loss": 17.7354, + "step": 28255 + }, + { + "epoch": 0.5164969747929881, + "grad_norm": 6.304183298980531, + "learning_rate": 4.976004656808725e-06, + "loss": 17.3701, + "step": 28256 + }, + { + "epoch": 0.5165152539894347, + "grad_norm": 6.000271605623386, + "learning_rate": 4.975708648721705e-06, + "loss": 17.3946, + "step": 28257 + }, + { + "epoch": 0.5165335331858811, + "grad_norm": 6.491954323026376, + "learning_rate": 4.975412640719825e-06, + "loss": 17.6466, + "step": 28258 + }, + { + "epoch": 0.5165518123823277, + "grad_norm": 5.061908246575599, + "learning_rate": 4.975116632804119e-06, + "loss": 16.9296, + "step": 28259 + }, + { + "epoch": 0.5165700915787742, + "grad_norm": 6.976247778399507, + "learning_rate": 4.974820624975629e-06, + "loss": 17.6008, + "step": 28260 + }, + { + "epoch": 0.5165883707752207, + "grad_norm": 6.742763888324338, + "learning_rate": 4.974524617235389e-06, + "loss": 17.6102, + "step": 28261 + }, + { + "epoch": 0.5166066499716673, + "grad_norm": 6.6387729698208275, + "learning_rate": 4.974228609584438e-06, + "loss": 17.6382, + "step": 28262 + }, + { + "epoch": 0.5166249291681138, + "grad_norm": 6.2449695980180415, + "learning_rate": 4.973932602023816e-06, + "loss": 17.5073, + "step": 28263 + }, + { + "epoch": 0.5166432083645603, + "grad_norm": 6.450636685086195, + "learning_rate": 4.973636594554555e-06, + "loss": 17.3292, + "step": 28264 + }, + { + "epoch": 0.5166614875610068, + "grad_norm": 7.9278652694344975, + "learning_rate": 4.973340587177698e-06, + "loss": 18.0148, + "step": 28265 + }, + { + "epoch": 0.5166797667574533, + "grad_norm": 7.312880169912325, + "learning_rate": 4.9730445798942784e-06, + "loss": 17.8211, + "step": 28266 + }, + { + "epoch": 0.5166980459538999, + "grad_norm": 5.976868121343258, + "learning_rate": 4.972748572705334e-06, + "loss": 17.2071, + "step": 28267 + }, + { + "epoch": 0.5167163251503464, + "grad_norm": 6.468444778454015, + "learning_rate": 4.972452565611906e-06, + "loss": 17.235, + "step": 28268 + }, + { + "epoch": 0.516734604346793, + "grad_norm": 5.1413800935468235, + "learning_rate": 4.9721565586150295e-06, + "loss": 16.8773, + "step": 28269 + }, + { + "epoch": 0.5167528835432394, + "grad_norm": 7.270722378199894, + "learning_rate": 4.971860551715739e-06, + "loss": 17.472, + "step": 28270 + }, + { + "epoch": 0.5167711627396859, + "grad_norm": 7.904071788550602, + "learning_rate": 4.971564544915077e-06, + "loss": 18.0134, + "step": 28271 + }, + { + "epoch": 0.5167894419361325, + "grad_norm": 5.7469197328987995, + "learning_rate": 4.971268538214079e-06, + "loss": 17.2567, + "step": 28272 + }, + { + "epoch": 0.516807721132579, + "grad_norm": 5.030550165373328, + "learning_rate": 4.97097253161378e-06, + "loss": 16.8883, + "step": 28273 + }, + { + "epoch": 0.5168260003290255, + "grad_norm": 6.871964276897015, + "learning_rate": 4.970676525115223e-06, + "loss": 17.7691, + "step": 28274 + }, + { + "epoch": 0.5168442795254721, + "grad_norm": 4.606991548838814, + "learning_rate": 4.970380518719439e-06, + "loss": 16.7543, + "step": 28275 + }, + { + "epoch": 0.5168625587219186, + "grad_norm": 5.56013897016377, + "learning_rate": 4.97008451242747e-06, + "loss": 17.3125, + "step": 28276 + }, + { + "epoch": 0.5168808379183651, + "grad_norm": 6.780150570995449, + "learning_rate": 4.969788506240354e-06, + "loss": 17.4703, + "step": 28277 + }, + { + "epoch": 0.5168991171148116, + "grad_norm": 6.311010318448981, + "learning_rate": 4.9694925001591235e-06, + "loss": 17.3542, + "step": 28278 + }, + { + "epoch": 0.5169173963112581, + "grad_norm": 6.854886493991408, + "learning_rate": 4.969196494184822e-06, + "loss": 17.5825, + "step": 28279 + }, + { + "epoch": 0.5169356755077047, + "grad_norm": 8.301282717597386, + "learning_rate": 4.968900488318483e-06, + "loss": 18.3161, + "step": 28280 + }, + { + "epoch": 0.5169539547041512, + "grad_norm": 6.235898453284987, + "learning_rate": 4.968604482561143e-06, + "loss": 17.5212, + "step": 28281 + }, + { + "epoch": 0.5169722339005978, + "grad_norm": 6.341026571008973, + "learning_rate": 4.968308476913845e-06, + "loss": 17.5553, + "step": 28282 + }, + { + "epoch": 0.5169905130970442, + "grad_norm": 7.778073061671004, + "learning_rate": 4.968012471377623e-06, + "loss": 18.2273, + "step": 28283 + }, + { + "epoch": 0.5170087922934907, + "grad_norm": 5.256761677275139, + "learning_rate": 4.967716465953512e-06, + "loss": 17.1107, + "step": 28284 + }, + { + "epoch": 0.5170270714899373, + "grad_norm": 5.232657715473453, + "learning_rate": 4.967420460642553e-06, + "loss": 17.0836, + "step": 28285 + }, + { + "epoch": 0.5170453506863838, + "grad_norm": 6.372387967753305, + "learning_rate": 4.967124455445783e-06, + "loss": 17.3634, + "step": 28286 + }, + { + "epoch": 0.5170636298828304, + "grad_norm": 7.721831486024882, + "learning_rate": 4.966828450364238e-06, + "loss": 17.7065, + "step": 28287 + }, + { + "epoch": 0.5170819090792769, + "grad_norm": 6.690076463553113, + "learning_rate": 4.966532445398958e-06, + "loss": 17.6673, + "step": 28288 + }, + { + "epoch": 0.5171001882757233, + "grad_norm": 7.281341350508655, + "learning_rate": 4.966236440550977e-06, + "loss": 17.7145, + "step": 28289 + }, + { + "epoch": 0.5171184674721699, + "grad_norm": 5.559529328863653, + "learning_rate": 4.965940435821334e-06, + "loss": 17.0352, + "step": 28290 + }, + { + "epoch": 0.5171367466686164, + "grad_norm": 6.753979170901499, + "learning_rate": 4.965644431211069e-06, + "loss": 17.2227, + "step": 28291 + }, + { + "epoch": 0.517155025865063, + "grad_norm": 6.501264889097688, + "learning_rate": 4.9653484267212145e-06, + "loss": 17.5962, + "step": 28292 + }, + { + "epoch": 0.5171733050615095, + "grad_norm": 8.685964218239112, + "learning_rate": 4.965052422352814e-06, + "loss": 18.3715, + "step": 28293 + }, + { + "epoch": 0.517191584257956, + "grad_norm": 6.520037121411414, + "learning_rate": 4.9647564181069e-06, + "loss": 17.9027, + "step": 28294 + }, + { + "epoch": 0.5172098634544026, + "grad_norm": 5.006900722549799, + "learning_rate": 4.9644604139845106e-06, + "loss": 17.0317, + "step": 28295 + }, + { + "epoch": 0.517228142650849, + "grad_norm": 6.79245027887876, + "learning_rate": 4.964164409986687e-06, + "loss": 17.8067, + "step": 28296 + }, + { + "epoch": 0.5172464218472956, + "grad_norm": 6.425147652690195, + "learning_rate": 4.963868406114463e-06, + "loss": 17.0715, + "step": 28297 + }, + { + "epoch": 0.5172647010437421, + "grad_norm": 7.592396400924471, + "learning_rate": 4.963572402368877e-06, + "loss": 17.6252, + "step": 28298 + }, + { + "epoch": 0.5172829802401886, + "grad_norm": 4.9459803310968, + "learning_rate": 4.9632763987509656e-06, + "loss": 16.8748, + "step": 28299 + }, + { + "epoch": 0.5173012594366352, + "grad_norm": 5.637228125645112, + "learning_rate": 4.962980395261769e-06, + "loss": 17.0424, + "step": 28300 + }, + { + "epoch": 0.5173195386330817, + "grad_norm": 6.5955010881627985, + "learning_rate": 4.96268439190232e-06, + "loss": 17.5066, + "step": 28301 + }, + { + "epoch": 0.5173378178295283, + "grad_norm": 7.152268273930854, + "learning_rate": 4.962388388673661e-06, + "loss": 17.9691, + "step": 28302 + }, + { + "epoch": 0.5173560970259747, + "grad_norm": 6.301281888975413, + "learning_rate": 4.962092385576828e-06, + "loss": 17.3616, + "step": 28303 + }, + { + "epoch": 0.5173743762224212, + "grad_norm": 7.623465819388862, + "learning_rate": 4.961796382612857e-06, + "loss": 18.2055, + "step": 28304 + }, + { + "epoch": 0.5173926554188678, + "grad_norm": 6.844232173095886, + "learning_rate": 4.961500379782787e-06, + "loss": 17.559, + "step": 28305 + }, + { + "epoch": 0.5174109346153143, + "grad_norm": 5.103228395683297, + "learning_rate": 4.961204377087654e-06, + "loss": 17.0642, + "step": 28306 + }, + { + "epoch": 0.5174292138117609, + "grad_norm": 5.862556345814915, + "learning_rate": 4.9609083745284955e-06, + "loss": 17.403, + "step": 28307 + }, + { + "epoch": 0.5174474930082074, + "grad_norm": 6.84776664844964, + "learning_rate": 4.960612372106352e-06, + "loss": 17.7825, + "step": 28308 + }, + { + "epoch": 0.5174657722046538, + "grad_norm": 7.289848890162367, + "learning_rate": 4.9603163698222565e-06, + "loss": 17.8208, + "step": 28309 + }, + { + "epoch": 0.5174840514011004, + "grad_norm": 5.779740391739088, + "learning_rate": 4.960020367677251e-06, + "loss": 17.2748, + "step": 28310 + }, + { + "epoch": 0.5175023305975469, + "grad_norm": 6.937638054976089, + "learning_rate": 4.959724365672369e-06, + "loss": 17.5835, + "step": 28311 + }, + { + "epoch": 0.5175206097939935, + "grad_norm": 5.821591776952739, + "learning_rate": 4.95942836380865e-06, + "loss": 17.0297, + "step": 28312 + }, + { + "epoch": 0.51753888899044, + "grad_norm": 7.870391038015023, + "learning_rate": 4.959132362087131e-06, + "loss": 17.5907, + "step": 28313 + }, + { + "epoch": 0.5175571681868865, + "grad_norm": 5.963559402433707, + "learning_rate": 4.958836360508851e-06, + "loss": 17.0072, + "step": 28314 + }, + { + "epoch": 0.517575447383333, + "grad_norm": 6.138604551563362, + "learning_rate": 4.958540359074843e-06, + "loss": 17.4719, + "step": 28315 + }, + { + "epoch": 0.5175937265797795, + "grad_norm": 5.55085953010945, + "learning_rate": 4.958244357786149e-06, + "loss": 17.0418, + "step": 28316 + }, + { + "epoch": 0.5176120057762261, + "grad_norm": 6.216162786051773, + "learning_rate": 4.957948356643806e-06, + "loss": 17.5679, + "step": 28317 + }, + { + "epoch": 0.5176302849726726, + "grad_norm": 6.1428693500892, + "learning_rate": 4.9576523556488485e-06, + "loss": 17.5301, + "step": 28318 + }, + { + "epoch": 0.5176485641691191, + "grad_norm": 5.828606178745438, + "learning_rate": 4.957356354802318e-06, + "loss": 17.1704, + "step": 28319 + }, + { + "epoch": 0.5176668433655657, + "grad_norm": 7.199583396032562, + "learning_rate": 4.957060354105247e-06, + "loss": 17.832, + "step": 28320 + }, + { + "epoch": 0.5176851225620122, + "grad_norm": 6.743918145778942, + "learning_rate": 4.956764353558677e-06, + "loss": 17.7326, + "step": 28321 + }, + { + "epoch": 0.5177034017584587, + "grad_norm": 6.830053726994922, + "learning_rate": 4.956468353163646e-06, + "loss": 17.6931, + "step": 28322 + }, + { + "epoch": 0.5177216809549052, + "grad_norm": 5.917704872181462, + "learning_rate": 4.956172352921186e-06, + "loss": 17.2226, + "step": 28323 + }, + { + "epoch": 0.5177399601513517, + "grad_norm": 5.256594909885979, + "learning_rate": 4.955876352832342e-06, + "loss": 17.0001, + "step": 28324 + }, + { + "epoch": 0.5177582393477983, + "grad_norm": 5.406433740918442, + "learning_rate": 4.955580352898145e-06, + "loss": 17.2431, + "step": 28325 + }, + { + "epoch": 0.5177765185442448, + "grad_norm": 5.8918516094395965, + "learning_rate": 4.955284353119635e-06, + "loss": 17.2941, + "step": 28326 + }, + { + "epoch": 0.5177947977406914, + "grad_norm": 5.17406305535719, + "learning_rate": 4.954988353497851e-06, + "loss": 16.8681, + "step": 28327 + }, + { + "epoch": 0.5178130769371379, + "grad_norm": 5.742955895787697, + "learning_rate": 4.954692354033829e-06, + "loss": 17.2397, + "step": 28328 + }, + { + "epoch": 0.5178313561335843, + "grad_norm": 7.038128444420002, + "learning_rate": 4.954396354728604e-06, + "loss": 17.6647, + "step": 28329 + }, + { + "epoch": 0.5178496353300309, + "grad_norm": 7.029265376873107, + "learning_rate": 4.954100355583217e-06, + "loss": 18.0109, + "step": 28330 + }, + { + "epoch": 0.5178679145264774, + "grad_norm": 5.6742373272957884, + "learning_rate": 4.953804356598706e-06, + "loss": 17.1585, + "step": 28331 + }, + { + "epoch": 0.517886193722924, + "grad_norm": 5.766574209184498, + "learning_rate": 4.953508357776104e-06, + "loss": 17.4039, + "step": 28332 + }, + { + "epoch": 0.5179044729193705, + "grad_norm": 6.3053895933968525, + "learning_rate": 4.953212359116453e-06, + "loss": 17.4309, + "step": 28333 + }, + { + "epoch": 0.517922752115817, + "grad_norm": 7.950056890711074, + "learning_rate": 4.9529163606207884e-06, + "loss": 17.8308, + "step": 28334 + }, + { + "epoch": 0.5179410313122635, + "grad_norm": 7.35631073458575, + "learning_rate": 4.952620362290146e-06, + "loss": 17.8463, + "step": 28335 + }, + { + "epoch": 0.51795931050871, + "grad_norm": 5.043105007380679, + "learning_rate": 4.952324364125567e-06, + "loss": 16.6986, + "step": 28336 + }, + { + "epoch": 0.5179775897051566, + "grad_norm": 4.6970956307817815, + "learning_rate": 4.952028366128086e-06, + "loss": 16.8207, + "step": 28337 + }, + { + "epoch": 0.5179958689016031, + "grad_norm": 7.161634770000542, + "learning_rate": 4.951732368298743e-06, + "loss": 17.5895, + "step": 28338 + }, + { + "epoch": 0.5180141480980496, + "grad_norm": 6.896067591252059, + "learning_rate": 4.951436370638572e-06, + "loss": 17.2049, + "step": 28339 + }, + { + "epoch": 0.5180324272944962, + "grad_norm": 5.907005539784071, + "learning_rate": 4.951140373148613e-06, + "loss": 17.4387, + "step": 28340 + }, + { + "epoch": 0.5180507064909426, + "grad_norm": 6.669907739938093, + "learning_rate": 4.950844375829903e-06, + "loss": 17.532, + "step": 28341 + }, + { + "epoch": 0.5180689856873891, + "grad_norm": 6.386297552721186, + "learning_rate": 4.9505483786834804e-06, + "loss": 17.4779, + "step": 28342 + }, + { + "epoch": 0.5180872648838357, + "grad_norm": 6.555596657950641, + "learning_rate": 4.950252381710379e-06, + "loss": 17.1734, + "step": 28343 + }, + { + "epoch": 0.5181055440802822, + "grad_norm": 8.248701041644166, + "learning_rate": 4.94995638491164e-06, + "loss": 18.5298, + "step": 28344 + }, + { + "epoch": 0.5181238232767288, + "grad_norm": 5.926592105592358, + "learning_rate": 4.9496603882883005e-06, + "loss": 17.32, + "step": 28345 + }, + { + "epoch": 0.5181421024731753, + "grad_norm": 7.876904087450707, + "learning_rate": 4.949364391841395e-06, + "loss": 17.803, + "step": 28346 + }, + { + "epoch": 0.5181603816696218, + "grad_norm": 8.87604005395616, + "learning_rate": 4.9490683955719645e-06, + "loss": 18.6382, + "step": 28347 + }, + { + "epoch": 0.5181786608660683, + "grad_norm": 6.327237224683834, + "learning_rate": 4.948772399481044e-06, + "loss": 17.1739, + "step": 28348 + }, + { + "epoch": 0.5181969400625148, + "grad_norm": 7.09737098314194, + "learning_rate": 4.9484764035696705e-06, + "loss": 17.6734, + "step": 28349 + }, + { + "epoch": 0.5182152192589614, + "grad_norm": 12.50251472700511, + "learning_rate": 4.9481804078388854e-06, + "loss": 17.6391, + "step": 28350 + }, + { + "epoch": 0.5182334984554079, + "grad_norm": 5.556826248447641, + "learning_rate": 4.94788441228972e-06, + "loss": 17.31, + "step": 28351 + }, + { + "epoch": 0.5182517776518544, + "grad_norm": 6.169112154080561, + "learning_rate": 4.9475884169232195e-06, + "loss": 17.3714, + "step": 28352 + }, + { + "epoch": 0.518270056848301, + "grad_norm": 6.149369658046421, + "learning_rate": 4.947292421740415e-06, + "loss": 17.459, + "step": 28353 + }, + { + "epoch": 0.5182883360447474, + "grad_norm": 7.25027977926675, + "learning_rate": 4.9469964267423445e-06, + "loss": 17.6783, + "step": 28354 + }, + { + "epoch": 0.518306615241194, + "grad_norm": 5.705919113560786, + "learning_rate": 4.946700431930049e-06, + "loss": 17.2594, + "step": 28355 + }, + { + "epoch": 0.5183248944376405, + "grad_norm": 7.103057322642434, + "learning_rate": 4.946404437304565e-06, + "loss": 17.7569, + "step": 28356 + }, + { + "epoch": 0.518343173634087, + "grad_norm": 6.324290224428657, + "learning_rate": 4.946108442866925e-06, + "loss": 17.4654, + "step": 28357 + }, + { + "epoch": 0.5183614528305336, + "grad_norm": 5.6742389865285245, + "learning_rate": 4.945812448618173e-06, + "loss": 17.2231, + "step": 28358 + }, + { + "epoch": 0.5183797320269801, + "grad_norm": 6.67351229223035, + "learning_rate": 4.945516454559343e-06, + "loss": 17.6726, + "step": 28359 + }, + { + "epoch": 0.5183980112234267, + "grad_norm": 5.985127165617485, + "learning_rate": 4.945220460691473e-06, + "loss": 17.2501, + "step": 28360 + }, + { + "epoch": 0.5184162904198731, + "grad_norm": 6.790971164288386, + "learning_rate": 4.944924467015601e-06, + "loss": 17.6443, + "step": 28361 + }, + { + "epoch": 0.5184345696163196, + "grad_norm": 5.156384856233661, + "learning_rate": 4.944628473532763e-06, + "loss": 17.082, + "step": 28362 + }, + { + "epoch": 0.5184528488127662, + "grad_norm": 5.804276280583565, + "learning_rate": 4.9443324802439975e-06, + "loss": 17.3728, + "step": 28363 + }, + { + "epoch": 0.5184711280092127, + "grad_norm": 6.887342527283665, + "learning_rate": 4.944036487150343e-06, + "loss": 18.011, + "step": 28364 + }, + { + "epoch": 0.5184894072056593, + "grad_norm": 4.909795921075503, + "learning_rate": 4.943740494252835e-06, + "loss": 16.7328, + "step": 28365 + }, + { + "epoch": 0.5185076864021058, + "grad_norm": 5.793876673440153, + "learning_rate": 4.94344450155251e-06, + "loss": 17.0952, + "step": 28366 + }, + { + "epoch": 0.5185259655985522, + "grad_norm": 5.643399042905578, + "learning_rate": 4.94314850905041e-06, + "loss": 17.1263, + "step": 28367 + }, + { + "epoch": 0.5185442447949988, + "grad_norm": 6.337080276201196, + "learning_rate": 4.942852516747567e-06, + "loss": 17.6168, + "step": 28368 + }, + { + "epoch": 0.5185625239914453, + "grad_norm": 4.43384728185298, + "learning_rate": 4.942556524645023e-06, + "loss": 16.618, + "step": 28369 + }, + { + "epoch": 0.5185808031878919, + "grad_norm": 6.31907727904341, + "learning_rate": 4.942260532743813e-06, + "loss": 17.5743, + "step": 28370 + }, + { + "epoch": 0.5185990823843384, + "grad_norm": 6.269384186412205, + "learning_rate": 4.9419645410449735e-06, + "loss": 17.3451, + "step": 28371 + }, + { + "epoch": 0.5186173615807849, + "grad_norm": 6.723354654080379, + "learning_rate": 4.9416685495495454e-06, + "loss": 17.5516, + "step": 28372 + }, + { + "epoch": 0.5186356407772315, + "grad_norm": 5.644872574713539, + "learning_rate": 4.941372558258564e-06, + "loss": 17.238, + "step": 28373 + }, + { + "epoch": 0.5186539199736779, + "grad_norm": 8.335516295278675, + "learning_rate": 4.941076567173064e-06, + "loss": 17.8968, + "step": 28374 + }, + { + "epoch": 0.5186721991701245, + "grad_norm": 7.392830132433686, + "learning_rate": 4.940780576294087e-06, + "loss": 17.701, + "step": 28375 + }, + { + "epoch": 0.518690478366571, + "grad_norm": 5.7589566603408615, + "learning_rate": 4.94048458562267e-06, + "loss": 17.3986, + "step": 28376 + }, + { + "epoch": 0.5187087575630175, + "grad_norm": 6.828373995474195, + "learning_rate": 4.940188595159848e-06, + "loss": 17.8689, + "step": 28377 + }, + { + "epoch": 0.5187270367594641, + "grad_norm": 6.074302009423401, + "learning_rate": 4.939892604906661e-06, + "loss": 17.3894, + "step": 28378 + }, + { + "epoch": 0.5187453159559106, + "grad_norm": 5.395728480787896, + "learning_rate": 4.939596614864144e-06, + "loss": 17.1342, + "step": 28379 + }, + { + "epoch": 0.5187635951523571, + "grad_norm": 7.098541162613564, + "learning_rate": 4.9393006250333345e-06, + "loss": 17.6606, + "step": 28380 + }, + { + "epoch": 0.5187818743488036, + "grad_norm": 6.19940468455215, + "learning_rate": 4.939004635415274e-06, + "loss": 17.1753, + "step": 28381 + }, + { + "epoch": 0.5188001535452501, + "grad_norm": 5.574690490203115, + "learning_rate": 4.938708646010994e-06, + "loss": 17.4, + "step": 28382 + }, + { + "epoch": 0.5188184327416967, + "grad_norm": 7.557797584270141, + "learning_rate": 4.9384126568215374e-06, + "loss": 18.0713, + "step": 28383 + }, + { + "epoch": 0.5188367119381432, + "grad_norm": 5.5071146457900335, + "learning_rate": 4.938116667847938e-06, + "loss": 17.1174, + "step": 28384 + }, + { + "epoch": 0.5188549911345898, + "grad_norm": 6.239175399354861, + "learning_rate": 4.937820679091233e-06, + "loss": 17.6445, + "step": 28385 + }, + { + "epoch": 0.5188732703310363, + "grad_norm": 7.55498510327374, + "learning_rate": 4.937524690552464e-06, + "loss": 17.7631, + "step": 28386 + }, + { + "epoch": 0.5188915495274827, + "grad_norm": 6.593389107555483, + "learning_rate": 4.937228702232665e-06, + "loss": 17.4847, + "step": 28387 + }, + { + "epoch": 0.5189098287239293, + "grad_norm": 7.227918407171986, + "learning_rate": 4.9369327141328715e-06, + "loss": 18.1397, + "step": 28388 + }, + { + "epoch": 0.5189281079203758, + "grad_norm": 7.193422353039811, + "learning_rate": 4.936636726254125e-06, + "loss": 17.5931, + "step": 28389 + }, + { + "epoch": 0.5189463871168224, + "grad_norm": 6.521969771777463, + "learning_rate": 4.936340738597462e-06, + "loss": 17.5764, + "step": 28390 + }, + { + "epoch": 0.5189646663132689, + "grad_norm": 6.147235511859382, + "learning_rate": 4.936044751163917e-06, + "loss": 17.5021, + "step": 28391 + }, + { + "epoch": 0.5189829455097154, + "grad_norm": 4.990153548976036, + "learning_rate": 4.9357487639545324e-06, + "loss": 16.7748, + "step": 28392 + }, + { + "epoch": 0.519001224706162, + "grad_norm": 5.926882449078139, + "learning_rate": 4.935452776970341e-06, + "loss": 17.1332, + "step": 28393 + }, + { + "epoch": 0.5190195039026084, + "grad_norm": 8.228062422646142, + "learning_rate": 4.935156790212381e-06, + "loss": 18.3323, + "step": 28394 + }, + { + "epoch": 0.519037783099055, + "grad_norm": 7.999383086747716, + "learning_rate": 4.934860803681693e-06, + "loss": 18.1071, + "step": 28395 + }, + { + "epoch": 0.5190560622955015, + "grad_norm": 5.932543343354322, + "learning_rate": 4.934564817379312e-06, + "loss": 17.1878, + "step": 28396 + }, + { + "epoch": 0.519074341491948, + "grad_norm": 5.780339840906645, + "learning_rate": 4.934268831306274e-06, + "loss": 17.0861, + "step": 28397 + }, + { + "epoch": 0.5190926206883946, + "grad_norm": 5.877545939180377, + "learning_rate": 4.9339728454636194e-06, + "loss": 17.6133, + "step": 28398 + }, + { + "epoch": 0.519110899884841, + "grad_norm": 6.929590337227377, + "learning_rate": 4.933676859852383e-06, + "loss": 18.037, + "step": 28399 + }, + { + "epoch": 0.5191291790812876, + "grad_norm": 6.550692562746869, + "learning_rate": 4.933380874473605e-06, + "loss": 17.5974, + "step": 28400 + }, + { + "epoch": 0.5191474582777341, + "grad_norm": 7.504334160162672, + "learning_rate": 4.933084889328322e-06, + "loss": 17.6812, + "step": 28401 + }, + { + "epoch": 0.5191657374741806, + "grad_norm": 6.277629082262044, + "learning_rate": 4.932788904417568e-06, + "loss": 17.4072, + "step": 28402 + }, + { + "epoch": 0.5191840166706272, + "grad_norm": 5.911238000245223, + "learning_rate": 4.932492919742384e-06, + "loss": 17.0953, + "step": 28403 + }, + { + "epoch": 0.5192022958670737, + "grad_norm": 5.204696671213171, + "learning_rate": 4.932196935303808e-06, + "loss": 17.13, + "step": 28404 + }, + { + "epoch": 0.5192205750635203, + "grad_norm": 5.964727338725323, + "learning_rate": 4.931900951102873e-06, + "loss": 17.5791, + "step": 28405 + }, + { + "epoch": 0.5192388542599667, + "grad_norm": 6.512404584723284, + "learning_rate": 4.931604967140622e-06, + "loss": 17.5078, + "step": 28406 + }, + { + "epoch": 0.5192571334564132, + "grad_norm": 4.910562497753882, + "learning_rate": 4.9313089834180885e-06, + "loss": 16.6523, + "step": 28407 + }, + { + "epoch": 0.5192754126528598, + "grad_norm": 5.419494529033941, + "learning_rate": 4.9310129999363095e-06, + "loss": 17.4113, + "step": 28408 + }, + { + "epoch": 0.5192936918493063, + "grad_norm": 7.950816500233542, + "learning_rate": 4.930717016696327e-06, + "loss": 17.9876, + "step": 28409 + }, + { + "epoch": 0.5193119710457528, + "grad_norm": 5.0251182567233466, + "learning_rate": 4.930421033699175e-06, + "loss": 16.789, + "step": 28410 + }, + { + "epoch": 0.5193302502421994, + "grad_norm": 7.994435891045464, + "learning_rate": 4.930125050945889e-06, + "loss": 18.0434, + "step": 28411 + }, + { + "epoch": 0.5193485294386458, + "grad_norm": 6.410436598654899, + "learning_rate": 4.929829068437509e-06, + "loss": 17.6376, + "step": 28412 + }, + { + "epoch": 0.5193668086350924, + "grad_norm": 5.663290499919785, + "learning_rate": 4.929533086175072e-06, + "loss": 17.2351, + "step": 28413 + }, + { + "epoch": 0.5193850878315389, + "grad_norm": 8.776582529292156, + "learning_rate": 4.9292371041596175e-06, + "loss": 18.4499, + "step": 28414 + }, + { + "epoch": 0.5194033670279854, + "grad_norm": 7.041122472572309, + "learning_rate": 4.928941122392181e-06, + "loss": 17.6232, + "step": 28415 + }, + { + "epoch": 0.519421646224432, + "grad_norm": 5.731336055102409, + "learning_rate": 4.928645140873797e-06, + "loss": 17.2532, + "step": 28416 + }, + { + "epoch": 0.5194399254208785, + "grad_norm": 5.400588933769152, + "learning_rate": 4.928349159605506e-06, + "loss": 17.0085, + "step": 28417 + }, + { + "epoch": 0.5194582046173251, + "grad_norm": 7.707068384184171, + "learning_rate": 4.928053178588347e-06, + "loss": 17.8172, + "step": 28418 + }, + { + "epoch": 0.5194764838137715, + "grad_norm": 7.437717160223488, + "learning_rate": 4.9277571978233526e-06, + "loss": 17.7059, + "step": 28419 + }, + { + "epoch": 0.519494763010218, + "grad_norm": 5.959425894062813, + "learning_rate": 4.927461217311566e-06, + "loss": 17.3244, + "step": 28420 + }, + { + "epoch": 0.5195130422066646, + "grad_norm": 5.688353393018982, + "learning_rate": 4.92716523705402e-06, + "loss": 17.136, + "step": 28421 + }, + { + "epoch": 0.5195313214031111, + "grad_norm": 7.921413404049765, + "learning_rate": 4.926869257051752e-06, + "loss": 18.2318, + "step": 28422 + }, + { + "epoch": 0.5195496005995577, + "grad_norm": 6.218524229888885, + "learning_rate": 4.926573277305804e-06, + "loss": 17.5586, + "step": 28423 + }, + { + "epoch": 0.5195678797960042, + "grad_norm": 5.7370631366777385, + "learning_rate": 4.926277297817209e-06, + "loss": 17.2098, + "step": 28424 + }, + { + "epoch": 0.5195861589924506, + "grad_norm": 6.801426371435264, + "learning_rate": 4.925981318587005e-06, + "loss": 17.5821, + "step": 28425 + }, + { + "epoch": 0.5196044381888972, + "grad_norm": 6.884217335038451, + "learning_rate": 4.9256853396162304e-06, + "loss": 17.5537, + "step": 28426 + }, + { + "epoch": 0.5196227173853437, + "grad_norm": 6.938959073811555, + "learning_rate": 4.925389360905924e-06, + "loss": 17.7079, + "step": 28427 + }, + { + "epoch": 0.5196409965817903, + "grad_norm": 6.3840040994419835, + "learning_rate": 4.925093382457118e-06, + "loss": 17.6047, + "step": 28428 + }, + { + "epoch": 0.5196592757782368, + "grad_norm": 5.493911569407914, + "learning_rate": 4.924797404270854e-06, + "loss": 17.1116, + "step": 28429 + }, + { + "epoch": 0.5196775549746833, + "grad_norm": 6.806618236692073, + "learning_rate": 4.92450142634817e-06, + "loss": 17.8762, + "step": 28430 + }, + { + "epoch": 0.5196958341711299, + "grad_norm": 6.596538830081444, + "learning_rate": 4.924205448690101e-06, + "loss": 17.4036, + "step": 28431 + }, + { + "epoch": 0.5197141133675763, + "grad_norm": 6.1625061284571006, + "learning_rate": 4.923909471297687e-06, + "loss": 17.5077, + "step": 28432 + }, + { + "epoch": 0.5197323925640229, + "grad_norm": 5.321676442797134, + "learning_rate": 4.923613494171962e-06, + "loss": 17.0904, + "step": 28433 + }, + { + "epoch": 0.5197506717604694, + "grad_norm": 13.33556481310796, + "learning_rate": 4.923317517313965e-06, + "loss": 19.3512, + "step": 28434 + }, + { + "epoch": 0.5197689509569159, + "grad_norm": 6.584603473348195, + "learning_rate": 4.923021540724735e-06, + "loss": 17.5803, + "step": 28435 + }, + { + "epoch": 0.5197872301533625, + "grad_norm": 7.97873784956204, + "learning_rate": 4.9227255644053056e-06, + "loss": 17.7348, + "step": 28436 + }, + { + "epoch": 0.519805509349809, + "grad_norm": 5.609333406609034, + "learning_rate": 4.9224295883567185e-06, + "loss": 17.2715, + "step": 28437 + }, + { + "epoch": 0.5198237885462555, + "grad_norm": 6.283377946211638, + "learning_rate": 4.922133612580009e-06, + "loss": 17.4622, + "step": 28438 + }, + { + "epoch": 0.519842067742702, + "grad_norm": 7.13110081138636, + "learning_rate": 4.921837637076212e-06, + "loss": 17.4589, + "step": 28439 + }, + { + "epoch": 0.5198603469391485, + "grad_norm": 8.620049344265254, + "learning_rate": 4.921541661846369e-06, + "loss": 18.0009, + "step": 28440 + }, + { + "epoch": 0.5198786261355951, + "grad_norm": 5.458156654294497, + "learning_rate": 4.921245686891517e-06, + "loss": 17.1189, + "step": 28441 + }, + { + "epoch": 0.5198969053320416, + "grad_norm": 5.461605257200562, + "learning_rate": 4.92094971221269e-06, + "loss": 17.2679, + "step": 28442 + }, + { + "epoch": 0.5199151845284882, + "grad_norm": 6.077526445575021, + "learning_rate": 4.920653737810927e-06, + "loss": 17.7665, + "step": 28443 + }, + { + "epoch": 0.5199334637249347, + "grad_norm": 7.17578423592594, + "learning_rate": 4.920357763687265e-06, + "loss": 17.5324, + "step": 28444 + }, + { + "epoch": 0.5199517429213811, + "grad_norm": 5.584351044297349, + "learning_rate": 4.920061789842745e-06, + "loss": 17.3489, + "step": 28445 + }, + { + "epoch": 0.5199700221178277, + "grad_norm": 6.857244895720754, + "learning_rate": 4.9197658162784015e-06, + "loss": 17.819, + "step": 28446 + }, + { + "epoch": 0.5199883013142742, + "grad_norm": 6.717729242088374, + "learning_rate": 4.919469842995269e-06, + "loss": 17.7314, + "step": 28447 + }, + { + "epoch": 0.5200065805107208, + "grad_norm": 6.559535453075689, + "learning_rate": 4.91917386999439e-06, + "loss": 17.2234, + "step": 28448 + }, + { + "epoch": 0.5200248597071673, + "grad_norm": 7.06374343637183, + "learning_rate": 4.9188778972767996e-06, + "loss": 17.3341, + "step": 28449 + }, + { + "epoch": 0.5200431389036138, + "grad_norm": 5.7664473779447, + "learning_rate": 4.918581924843534e-06, + "loss": 17.1368, + "step": 28450 + }, + { + "epoch": 0.5200614181000603, + "grad_norm": 5.871158813757223, + "learning_rate": 4.9182859526956324e-06, + "loss": 17.2258, + "step": 28451 + }, + { + "epoch": 0.5200796972965068, + "grad_norm": 5.98428605017562, + "learning_rate": 4.917989980834132e-06, + "loss": 17.5082, + "step": 28452 + }, + { + "epoch": 0.5200979764929534, + "grad_norm": 6.733955259138203, + "learning_rate": 4.917694009260067e-06, + "loss": 17.4531, + "step": 28453 + }, + { + "epoch": 0.5201162556893999, + "grad_norm": 6.591630512321672, + "learning_rate": 4.91739803797448e-06, + "loss": 17.7313, + "step": 28454 + }, + { + "epoch": 0.5201345348858464, + "grad_norm": 7.072901599462475, + "learning_rate": 4.9171020669784065e-06, + "loss": 17.7049, + "step": 28455 + }, + { + "epoch": 0.520152814082293, + "grad_norm": 5.093333802295993, + "learning_rate": 4.9168060962728795e-06, + "loss": 17.0145, + "step": 28456 + }, + { + "epoch": 0.5201710932787394, + "grad_norm": 6.398399630976842, + "learning_rate": 4.916510125858942e-06, + "loss": 17.7248, + "step": 28457 + }, + { + "epoch": 0.520189372475186, + "grad_norm": 7.420138996746665, + "learning_rate": 4.91621415573763e-06, + "loss": 17.7913, + "step": 28458 + }, + { + "epoch": 0.5202076516716325, + "grad_norm": 6.666231696793718, + "learning_rate": 4.915918185909978e-06, + "loss": 17.4272, + "step": 28459 + }, + { + "epoch": 0.520225930868079, + "grad_norm": 6.8041289241111835, + "learning_rate": 4.915622216377028e-06, + "loss": 17.7081, + "step": 28460 + }, + { + "epoch": 0.5202442100645256, + "grad_norm": 5.951706159791261, + "learning_rate": 4.915326247139812e-06, + "loss": 17.4126, + "step": 28461 + }, + { + "epoch": 0.5202624892609721, + "grad_norm": 5.490469716778039, + "learning_rate": 4.9150302781993715e-06, + "loss": 17.169, + "step": 28462 + }, + { + "epoch": 0.5202807684574187, + "grad_norm": 7.478933155531026, + "learning_rate": 4.914734309556744e-06, + "loss": 17.8273, + "step": 28463 + }, + { + "epoch": 0.5202990476538651, + "grad_norm": 5.376059672743149, + "learning_rate": 4.914438341212963e-06, + "loss": 17.1553, + "step": 28464 + }, + { + "epoch": 0.5203173268503116, + "grad_norm": 6.858406977413497, + "learning_rate": 4.91414237316907e-06, + "loss": 17.5945, + "step": 28465 + }, + { + "epoch": 0.5203356060467582, + "grad_norm": 6.077786951193615, + "learning_rate": 4.9138464054261e-06, + "loss": 17.3616, + "step": 28466 + }, + { + "epoch": 0.5203538852432047, + "grad_norm": 6.812812721856576, + "learning_rate": 4.913550437985089e-06, + "loss": 17.8304, + "step": 28467 + }, + { + "epoch": 0.5203721644396513, + "grad_norm": 6.2142037331805025, + "learning_rate": 4.913254470847079e-06, + "loss": 17.566, + "step": 28468 + }, + { + "epoch": 0.5203904436360978, + "grad_norm": 7.039103235915634, + "learning_rate": 4.912958504013104e-06, + "loss": 17.8699, + "step": 28469 + }, + { + "epoch": 0.5204087228325442, + "grad_norm": 7.681828737673268, + "learning_rate": 4.9126625374842e-06, + "loss": 18.2374, + "step": 28470 + }, + { + "epoch": 0.5204270020289908, + "grad_norm": 13.307296266008041, + "learning_rate": 4.912366571261408e-06, + "loss": 17.6047, + "step": 28471 + }, + { + "epoch": 0.5204452812254373, + "grad_norm": 7.231720035665516, + "learning_rate": 4.912070605345764e-06, + "loss": 18.2681, + "step": 28472 + }, + { + "epoch": 0.5204635604218839, + "grad_norm": 6.602924443524286, + "learning_rate": 4.911774639738303e-06, + "loss": 17.5021, + "step": 28473 + }, + { + "epoch": 0.5204818396183304, + "grad_norm": 7.883868020494509, + "learning_rate": 4.911478674440066e-06, + "loss": 18.2676, + "step": 28474 + }, + { + "epoch": 0.5205001188147769, + "grad_norm": 6.159561162809547, + "learning_rate": 4.911182709452086e-06, + "loss": 17.2755, + "step": 28475 + }, + { + "epoch": 0.5205183980112235, + "grad_norm": 5.514469339714249, + "learning_rate": 4.910886744775405e-06, + "loss": 17.2943, + "step": 28476 + }, + { + "epoch": 0.5205366772076699, + "grad_norm": 6.324586802480088, + "learning_rate": 4.910590780411058e-06, + "loss": 17.3471, + "step": 28477 + }, + { + "epoch": 0.5205549564041164, + "grad_norm": 6.5277273182511175, + "learning_rate": 4.910294816360081e-06, + "loss": 17.519, + "step": 28478 + }, + { + "epoch": 0.520573235600563, + "grad_norm": 6.6522125621361115, + "learning_rate": 4.909998852623516e-06, + "loss": 17.6845, + "step": 28479 + }, + { + "epoch": 0.5205915147970095, + "grad_norm": 5.550169158965691, + "learning_rate": 4.9097028892023955e-06, + "loss": 17.0065, + "step": 28480 + }, + { + "epoch": 0.5206097939934561, + "grad_norm": 5.80747317245644, + "learning_rate": 4.909406926097758e-06, + "loss": 17.1229, + "step": 28481 + }, + { + "epoch": 0.5206280731899026, + "grad_norm": 6.619435338361883, + "learning_rate": 4.909110963310642e-06, + "loss": 17.6486, + "step": 28482 + }, + { + "epoch": 0.520646352386349, + "grad_norm": 7.424707776529631, + "learning_rate": 4.908815000842085e-06, + "loss": 17.9898, + "step": 28483 + }, + { + "epoch": 0.5206646315827956, + "grad_norm": 6.1872392938948035, + "learning_rate": 4.908519038693122e-06, + "loss": 17.5199, + "step": 28484 + }, + { + "epoch": 0.5206829107792421, + "grad_norm": 7.727224028795232, + "learning_rate": 4.908223076864792e-06, + "loss": 17.9094, + "step": 28485 + }, + { + "epoch": 0.5207011899756887, + "grad_norm": 5.480193590507775, + "learning_rate": 4.907927115358133e-06, + "loss": 17.1923, + "step": 28486 + }, + { + "epoch": 0.5207194691721352, + "grad_norm": 6.266386773671442, + "learning_rate": 4.907631154174181e-06, + "loss": 17.7293, + "step": 28487 + }, + { + "epoch": 0.5207377483685817, + "grad_norm": 7.352862448510087, + "learning_rate": 4.9073351933139744e-06, + "loss": 17.8787, + "step": 28488 + }, + { + "epoch": 0.5207560275650283, + "grad_norm": 6.142563812239842, + "learning_rate": 4.9070392327785484e-06, + "loss": 17.1949, + "step": 28489 + }, + { + "epoch": 0.5207743067614747, + "grad_norm": 5.701055702495349, + "learning_rate": 4.906743272568942e-06, + "loss": 16.9904, + "step": 28490 + }, + { + "epoch": 0.5207925859579213, + "grad_norm": 5.679934440740015, + "learning_rate": 4.906447312686195e-06, + "loss": 17.0599, + "step": 28491 + }, + { + "epoch": 0.5208108651543678, + "grad_norm": 6.686150930820088, + "learning_rate": 4.906151353131339e-06, + "loss": 17.4449, + "step": 28492 + }, + { + "epoch": 0.5208291443508143, + "grad_norm": 6.696901803619023, + "learning_rate": 4.905855393905415e-06, + "loss": 17.3549, + "step": 28493 + }, + { + "epoch": 0.5208474235472609, + "grad_norm": 6.195181244263033, + "learning_rate": 4.905559435009462e-06, + "loss": 17.5175, + "step": 28494 + }, + { + "epoch": 0.5208657027437074, + "grad_norm": 6.59450788418369, + "learning_rate": 4.905263476444511e-06, + "loss": 17.1538, + "step": 28495 + }, + { + "epoch": 0.520883981940154, + "grad_norm": 7.408468682901097, + "learning_rate": 4.904967518211607e-06, + "loss": 17.9047, + "step": 28496 + }, + { + "epoch": 0.5209022611366004, + "grad_norm": 6.412189614504112, + "learning_rate": 4.904671560311782e-06, + "loss": 17.4363, + "step": 28497 + }, + { + "epoch": 0.5209205403330469, + "grad_norm": 6.7656715290992455, + "learning_rate": 4.904375602746074e-06, + "loss": 17.9125, + "step": 28498 + }, + { + "epoch": 0.5209388195294935, + "grad_norm": 6.4924780465885945, + "learning_rate": 4.904079645515523e-06, + "loss": 17.2634, + "step": 28499 + }, + { + "epoch": 0.52095709872594, + "grad_norm": 6.285596636750341, + "learning_rate": 4.9037836886211645e-06, + "loss": 17.5109, + "step": 28500 + }, + { + "epoch": 0.5209753779223866, + "grad_norm": 6.7154283657399025, + "learning_rate": 4.903487732064034e-06, + "loss": 17.6013, + "step": 28501 + }, + { + "epoch": 0.520993657118833, + "grad_norm": 6.548987268343913, + "learning_rate": 4.903191775845171e-06, + "loss": 17.78, + "step": 28502 + }, + { + "epoch": 0.5210119363152795, + "grad_norm": 5.626496958756712, + "learning_rate": 4.9028958199656145e-06, + "loss": 17.1568, + "step": 28503 + }, + { + "epoch": 0.5210302155117261, + "grad_norm": 6.430132484776041, + "learning_rate": 4.902599864426397e-06, + "loss": 17.5608, + "step": 28504 + }, + { + "epoch": 0.5210484947081726, + "grad_norm": 6.821738962030881, + "learning_rate": 4.902303909228561e-06, + "loss": 17.402, + "step": 28505 + }, + { + "epoch": 0.5210667739046192, + "grad_norm": 7.8250467364491145, + "learning_rate": 4.902007954373139e-06, + "loss": 18.2186, + "step": 28506 + }, + { + "epoch": 0.5210850531010657, + "grad_norm": 7.19755452109373, + "learning_rate": 4.901711999861172e-06, + "loss": 18.0846, + "step": 28507 + }, + { + "epoch": 0.5211033322975122, + "grad_norm": 8.471044242296147, + "learning_rate": 4.901416045693697e-06, + "loss": 18.6619, + "step": 28508 + }, + { + "epoch": 0.5211216114939587, + "grad_norm": 7.159979382803887, + "learning_rate": 4.901120091871747e-06, + "loss": 18.2145, + "step": 28509 + }, + { + "epoch": 0.5211398906904052, + "grad_norm": 8.182553049643214, + "learning_rate": 4.9008241383963655e-06, + "loss": 17.5352, + "step": 28510 + }, + { + "epoch": 0.5211581698868518, + "grad_norm": 7.407998109603498, + "learning_rate": 4.900528185268586e-06, + "loss": 18.4521, + "step": 28511 + }, + { + "epoch": 0.5211764490832983, + "grad_norm": 6.260601819204853, + "learning_rate": 4.900232232489445e-06, + "loss": 17.4589, + "step": 28512 + }, + { + "epoch": 0.5211947282797448, + "grad_norm": 5.3650323782935825, + "learning_rate": 4.899936280059983e-06, + "loss": 17.1716, + "step": 28513 + }, + { + "epoch": 0.5212130074761914, + "grad_norm": 5.452966579857079, + "learning_rate": 4.899640327981237e-06, + "loss": 17.1096, + "step": 28514 + }, + { + "epoch": 0.5212312866726379, + "grad_norm": 6.8104556465352815, + "learning_rate": 4.899344376254239e-06, + "loss": 17.8224, + "step": 28515 + }, + { + "epoch": 0.5212495658690844, + "grad_norm": 7.543379328766232, + "learning_rate": 4.899048424880033e-06, + "loss": 18.0693, + "step": 28516 + }, + { + "epoch": 0.5212678450655309, + "grad_norm": 5.315562135630225, + "learning_rate": 4.898752473859654e-06, + "loss": 17.234, + "step": 28517 + }, + { + "epoch": 0.5212861242619774, + "grad_norm": 7.1049195512248735, + "learning_rate": 4.898456523194136e-06, + "loss": 17.4867, + "step": 28518 + }, + { + "epoch": 0.521304403458424, + "grad_norm": 7.272050415416022, + "learning_rate": 4.898160572884522e-06, + "loss": 17.7739, + "step": 28519 + }, + { + "epoch": 0.5213226826548705, + "grad_norm": 8.401022379990874, + "learning_rate": 4.897864622931845e-06, + "loss": 18.3357, + "step": 28520 + }, + { + "epoch": 0.5213409618513171, + "grad_norm": 8.378815935319116, + "learning_rate": 4.897568673337143e-06, + "loss": 17.6349, + "step": 28521 + }, + { + "epoch": 0.5213592410477635, + "grad_norm": 7.1869115979672165, + "learning_rate": 4.897272724101456e-06, + "loss": 17.6035, + "step": 28522 + }, + { + "epoch": 0.52137752024421, + "grad_norm": 6.674780567610482, + "learning_rate": 4.8969767752258165e-06, + "loss": 17.3377, + "step": 28523 + }, + { + "epoch": 0.5213957994406566, + "grad_norm": 5.367316091649678, + "learning_rate": 4.896680826711267e-06, + "loss": 17.2292, + "step": 28524 + }, + { + "epoch": 0.5214140786371031, + "grad_norm": 6.794283867337464, + "learning_rate": 4.896384878558841e-06, + "loss": 17.968, + "step": 28525 + }, + { + "epoch": 0.5214323578335497, + "grad_norm": 6.77990135169107, + "learning_rate": 4.896088930769576e-06, + "loss": 17.4489, + "step": 28526 + }, + { + "epoch": 0.5214506370299962, + "grad_norm": 8.045400185883338, + "learning_rate": 4.895792983344512e-06, + "loss": 17.8203, + "step": 28527 + }, + { + "epoch": 0.5214689162264426, + "grad_norm": 4.902725783251568, + "learning_rate": 4.895497036284685e-06, + "loss": 16.9511, + "step": 28528 + }, + { + "epoch": 0.5214871954228892, + "grad_norm": 5.70237133267022, + "learning_rate": 4.89520108959113e-06, + "loss": 17.1397, + "step": 28529 + }, + { + "epoch": 0.5215054746193357, + "grad_norm": 6.144524863815836, + "learning_rate": 4.894905143264887e-06, + "loss": 17.3851, + "step": 28530 + }, + { + "epoch": 0.5215237538157823, + "grad_norm": 5.899723755150293, + "learning_rate": 4.8946091973069935e-06, + "loss": 17.5855, + "step": 28531 + }, + { + "epoch": 0.5215420330122288, + "grad_norm": 5.603984848018888, + "learning_rate": 4.894313251718483e-06, + "loss": 17.1923, + "step": 28532 + }, + { + "epoch": 0.5215603122086753, + "grad_norm": 6.277892233137457, + "learning_rate": 4.894017306500397e-06, + "loss": 17.2843, + "step": 28533 + }, + { + "epoch": 0.5215785914051219, + "grad_norm": 5.936613727806103, + "learning_rate": 4.893721361653771e-06, + "loss": 17.5003, + "step": 28534 + }, + { + "epoch": 0.5215968706015683, + "grad_norm": 7.027533804174499, + "learning_rate": 4.893425417179641e-06, + "loss": 17.649, + "step": 28535 + }, + { + "epoch": 0.5216151497980149, + "grad_norm": 6.102147359780212, + "learning_rate": 4.893129473079048e-06, + "loss": 17.1189, + "step": 28536 + }, + { + "epoch": 0.5216334289944614, + "grad_norm": 6.998842791815334, + "learning_rate": 4.892833529353025e-06, + "loss": 17.8941, + "step": 28537 + }, + { + "epoch": 0.5216517081909079, + "grad_norm": 5.774873116056154, + "learning_rate": 4.892537586002613e-06, + "loss": 17.2359, + "step": 28538 + }, + { + "epoch": 0.5216699873873545, + "grad_norm": 5.992399578919574, + "learning_rate": 4.8922416430288465e-06, + "loss": 17.6229, + "step": 28539 + }, + { + "epoch": 0.521688266583801, + "grad_norm": 5.756647339843333, + "learning_rate": 4.891945700432762e-06, + "loss": 17.437, + "step": 28540 + }, + { + "epoch": 0.5217065457802476, + "grad_norm": 7.315163324355285, + "learning_rate": 4.8916497582154015e-06, + "loss": 17.5926, + "step": 28541 + }, + { + "epoch": 0.521724824976694, + "grad_norm": 6.881487766683682, + "learning_rate": 4.891353816377798e-06, + "loss": 17.6835, + "step": 28542 + }, + { + "epoch": 0.5217431041731405, + "grad_norm": 5.535896958764953, + "learning_rate": 4.891057874920989e-06, + "loss": 17.2321, + "step": 28543 + }, + { + "epoch": 0.5217613833695871, + "grad_norm": 7.536160248991853, + "learning_rate": 4.890761933846014e-06, + "loss": 18.0044, + "step": 28544 + }, + { + "epoch": 0.5217796625660336, + "grad_norm": 6.413927128110615, + "learning_rate": 4.890465993153909e-06, + "loss": 17.4872, + "step": 28545 + }, + { + "epoch": 0.5217979417624801, + "grad_norm": 7.2266632095213605, + "learning_rate": 4.8901700528457094e-06, + "loss": 17.9273, + "step": 28546 + }, + { + "epoch": 0.5218162209589267, + "grad_norm": 5.267232293352199, + "learning_rate": 4.889874112922457e-06, + "loss": 17.1158, + "step": 28547 + }, + { + "epoch": 0.5218345001553731, + "grad_norm": 5.643571977378045, + "learning_rate": 4.889578173385184e-06, + "loss": 17.1832, + "step": 28548 + }, + { + "epoch": 0.5218527793518197, + "grad_norm": 6.05130260401698, + "learning_rate": 4.889282234234929e-06, + "loss": 17.6666, + "step": 28549 + }, + { + "epoch": 0.5218710585482662, + "grad_norm": 7.328747357004344, + "learning_rate": 4.8889862954727325e-06, + "loss": 17.2468, + "step": 28550 + }, + { + "epoch": 0.5218893377447127, + "grad_norm": 5.400184968760882, + "learning_rate": 4.888690357099628e-06, + "loss": 17.0291, + "step": 28551 + }, + { + "epoch": 0.5219076169411593, + "grad_norm": 7.448530732505496, + "learning_rate": 4.888394419116656e-06, + "loss": 18.0796, + "step": 28552 + }, + { + "epoch": 0.5219258961376058, + "grad_norm": 7.59018770133678, + "learning_rate": 4.88809848152485e-06, + "loss": 18.0727, + "step": 28553 + }, + { + "epoch": 0.5219441753340524, + "grad_norm": 8.839733041001011, + "learning_rate": 4.887802544325249e-06, + "loss": 18.1013, + "step": 28554 + }, + { + "epoch": 0.5219624545304988, + "grad_norm": 5.991160633309579, + "learning_rate": 4.887506607518892e-06, + "loss": 17.3717, + "step": 28555 + }, + { + "epoch": 0.5219807337269453, + "grad_norm": 5.253203731855516, + "learning_rate": 4.887210671106814e-06, + "loss": 16.9025, + "step": 28556 + }, + { + "epoch": 0.5219990129233919, + "grad_norm": 6.76671894110189, + "learning_rate": 4.886914735090053e-06, + "loss": 17.3081, + "step": 28557 + }, + { + "epoch": 0.5220172921198384, + "grad_norm": 7.6298661703180715, + "learning_rate": 4.886618799469644e-06, + "loss": 17.8484, + "step": 28558 + }, + { + "epoch": 0.522035571316285, + "grad_norm": 7.2541321687389235, + "learning_rate": 4.88632286424663e-06, + "loss": 17.596, + "step": 28559 + }, + { + "epoch": 0.5220538505127315, + "grad_norm": 6.760886726101878, + "learning_rate": 4.886026929422041e-06, + "loss": 17.3026, + "step": 28560 + }, + { + "epoch": 0.5220721297091779, + "grad_norm": 5.676519567750497, + "learning_rate": 4.885730994996919e-06, + "loss": 17.1263, + "step": 28561 + }, + { + "epoch": 0.5220904089056245, + "grad_norm": 7.21428551073307, + "learning_rate": 4.8854350609723e-06, + "loss": 17.8924, + "step": 28562 + }, + { + "epoch": 0.522108688102071, + "grad_norm": 6.416671753889042, + "learning_rate": 4.885139127349221e-06, + "loss": 17.4339, + "step": 28563 + }, + { + "epoch": 0.5221269672985176, + "grad_norm": 8.085624888739334, + "learning_rate": 4.88484319412872e-06, + "loss": 17.1029, + "step": 28564 + }, + { + "epoch": 0.5221452464949641, + "grad_norm": 6.661496340731978, + "learning_rate": 4.884547261311833e-06, + "loss": 17.5654, + "step": 28565 + }, + { + "epoch": 0.5221635256914106, + "grad_norm": 6.730843745277235, + "learning_rate": 4.884251328899598e-06, + "loss": 17.7507, + "step": 28566 + }, + { + "epoch": 0.5221818048878571, + "grad_norm": 5.203887170605507, + "learning_rate": 4.883955396893053e-06, + "loss": 17.069, + "step": 28567 + }, + { + "epoch": 0.5222000840843036, + "grad_norm": 5.702194749382778, + "learning_rate": 4.883659465293231e-06, + "loss": 17.133, + "step": 28568 + }, + { + "epoch": 0.5222183632807502, + "grad_norm": 6.531802448548897, + "learning_rate": 4.883363534101176e-06, + "loss": 17.7237, + "step": 28569 + }, + { + "epoch": 0.5222366424771967, + "grad_norm": 5.5281364685397865, + "learning_rate": 4.8830676033179205e-06, + "loss": 17.1396, + "step": 28570 + }, + { + "epoch": 0.5222549216736432, + "grad_norm": 7.158311541678934, + "learning_rate": 4.882771672944502e-06, + "loss": 17.2343, + "step": 28571 + }, + { + "epoch": 0.5222732008700898, + "grad_norm": 6.385483693936126, + "learning_rate": 4.88247574298196e-06, + "loss": 17.3761, + "step": 28572 + }, + { + "epoch": 0.5222914800665363, + "grad_norm": 6.575685810924026, + "learning_rate": 4.882179813431331e-06, + "loss": 17.5746, + "step": 28573 + }, + { + "epoch": 0.5223097592629828, + "grad_norm": 5.620502338011076, + "learning_rate": 4.88188388429365e-06, + "loss": 17.1279, + "step": 28574 + }, + { + "epoch": 0.5223280384594293, + "grad_norm": 5.458200354751128, + "learning_rate": 4.881587955569955e-06, + "loss": 17.1399, + "step": 28575 + }, + { + "epoch": 0.5223463176558758, + "grad_norm": 6.698885551026733, + "learning_rate": 4.881292027261286e-06, + "loss": 17.4983, + "step": 28576 + }, + { + "epoch": 0.5223645968523224, + "grad_norm": 5.175300885480785, + "learning_rate": 4.880996099368677e-06, + "loss": 17.091, + "step": 28577 + }, + { + "epoch": 0.5223828760487689, + "grad_norm": 5.456395267418592, + "learning_rate": 4.880700171893167e-06, + "loss": 16.9812, + "step": 28578 + }, + { + "epoch": 0.5224011552452155, + "grad_norm": 6.832448428618826, + "learning_rate": 4.880404244835792e-06, + "loss": 17.8258, + "step": 28579 + }, + { + "epoch": 0.522419434441662, + "grad_norm": 6.287832994819546, + "learning_rate": 4.880108318197588e-06, + "loss": 17.7479, + "step": 28580 + }, + { + "epoch": 0.5224377136381084, + "grad_norm": 5.5577425560173435, + "learning_rate": 4.879812391979598e-06, + "loss": 17.223, + "step": 28581 + }, + { + "epoch": 0.522455992834555, + "grad_norm": 6.121673541690735, + "learning_rate": 4.8795164661828505e-06, + "loss": 17.3787, + "step": 28582 + }, + { + "epoch": 0.5224742720310015, + "grad_norm": 5.665615467954876, + "learning_rate": 4.8792205408083915e-06, + "loss": 17.0907, + "step": 28583 + }, + { + "epoch": 0.5224925512274481, + "grad_norm": 8.032322676181629, + "learning_rate": 4.878924615857252e-06, + "loss": 18.0102, + "step": 28584 + }, + { + "epoch": 0.5225108304238946, + "grad_norm": 6.827180904425209, + "learning_rate": 4.87862869133047e-06, + "loss": 17.6581, + "step": 28585 + }, + { + "epoch": 0.522529109620341, + "grad_norm": 5.908271524091968, + "learning_rate": 4.878332767229086e-06, + "loss": 17.2425, + "step": 28586 + }, + { + "epoch": 0.5225473888167876, + "grad_norm": 5.935408688879286, + "learning_rate": 4.878036843554136e-06, + "loss": 17.4134, + "step": 28587 + }, + { + "epoch": 0.5225656680132341, + "grad_norm": 5.779912781779714, + "learning_rate": 4.877740920306654e-06, + "loss": 17.2493, + "step": 28588 + }, + { + "epoch": 0.5225839472096807, + "grad_norm": 6.2411674179792795, + "learning_rate": 4.87744499748768e-06, + "loss": 17.5038, + "step": 28589 + }, + { + "epoch": 0.5226022264061272, + "grad_norm": 6.749663808532885, + "learning_rate": 4.877149075098251e-06, + "loss": 17.7859, + "step": 28590 + }, + { + "epoch": 0.5226205056025737, + "grad_norm": 6.399509929554015, + "learning_rate": 4.8768531531394035e-06, + "loss": 17.3995, + "step": 28591 + }, + { + "epoch": 0.5226387847990203, + "grad_norm": 5.718918710110861, + "learning_rate": 4.876557231612176e-06, + "loss": 17.365, + "step": 28592 + }, + { + "epoch": 0.5226570639954667, + "grad_norm": 6.330719956222392, + "learning_rate": 4.876261310517604e-06, + "loss": 17.404, + "step": 28593 + }, + { + "epoch": 0.5226753431919133, + "grad_norm": 6.570372225377883, + "learning_rate": 4.875965389856724e-06, + "loss": 17.7979, + "step": 28594 + }, + { + "epoch": 0.5226936223883598, + "grad_norm": 6.830668800842742, + "learning_rate": 4.875669469630577e-06, + "loss": 17.7618, + "step": 28595 + }, + { + "epoch": 0.5227119015848063, + "grad_norm": 7.904864307882776, + "learning_rate": 4.875373549840197e-06, + "loss": 18.4144, + "step": 28596 + }, + { + "epoch": 0.5227301807812529, + "grad_norm": 5.606781549605206, + "learning_rate": 4.87507763048662e-06, + "loss": 17.2982, + "step": 28597 + }, + { + "epoch": 0.5227484599776994, + "grad_norm": 4.72959210553153, + "learning_rate": 4.874781711570886e-06, + "loss": 16.8386, + "step": 28598 + }, + { + "epoch": 0.522766739174146, + "grad_norm": 6.458947639581319, + "learning_rate": 4.874485793094031e-06, + "loss": 17.894, + "step": 28599 + }, + { + "epoch": 0.5227850183705924, + "grad_norm": 5.920819838995126, + "learning_rate": 4.874189875057094e-06, + "loss": 17.1354, + "step": 28600 + }, + { + "epoch": 0.5228032975670389, + "grad_norm": 6.349242723956153, + "learning_rate": 4.873893957461111e-06, + "loss": 17.4207, + "step": 28601 + }, + { + "epoch": 0.5228215767634855, + "grad_norm": 6.208539077079403, + "learning_rate": 4.873598040307116e-06, + "loss": 17.4685, + "step": 28602 + }, + { + "epoch": 0.522839855959932, + "grad_norm": 7.317078508463828, + "learning_rate": 4.87330212359615e-06, + "loss": 17.7244, + "step": 28603 + }, + { + "epoch": 0.5228581351563786, + "grad_norm": 6.723673178658345, + "learning_rate": 4.873006207329251e-06, + "loss": 17.4472, + "step": 28604 + }, + { + "epoch": 0.5228764143528251, + "grad_norm": 6.0828055766578695, + "learning_rate": 4.872710291507452e-06, + "loss": 17.403, + "step": 28605 + }, + { + "epoch": 0.5228946935492715, + "grad_norm": 5.220242569578839, + "learning_rate": 4.872414376131793e-06, + "loss": 17.1341, + "step": 28606 + }, + { + "epoch": 0.5229129727457181, + "grad_norm": 5.79877143035889, + "learning_rate": 4.872118461203311e-06, + "loss": 17.2194, + "step": 28607 + }, + { + "epoch": 0.5229312519421646, + "grad_norm": 6.908868440391969, + "learning_rate": 4.871822546723041e-06, + "loss": 17.7, + "step": 28608 + }, + { + "epoch": 0.5229495311386112, + "grad_norm": 5.969050187671405, + "learning_rate": 4.871526632692024e-06, + "loss": 17.2985, + "step": 28609 + }, + { + "epoch": 0.5229678103350577, + "grad_norm": 6.39895815571215, + "learning_rate": 4.871230719111295e-06, + "loss": 17.659, + "step": 28610 + }, + { + "epoch": 0.5229860895315042, + "grad_norm": 6.329881816727737, + "learning_rate": 4.870934805981889e-06, + "loss": 17.436, + "step": 28611 + }, + { + "epoch": 0.5230043687279508, + "grad_norm": 5.993809150442772, + "learning_rate": 4.870638893304846e-06, + "loss": 17.5049, + "step": 28612 + }, + { + "epoch": 0.5230226479243972, + "grad_norm": 8.424532172633825, + "learning_rate": 4.870342981081202e-06, + "loss": 17.9168, + "step": 28613 + }, + { + "epoch": 0.5230409271208437, + "grad_norm": 7.646587726971442, + "learning_rate": 4.870047069311997e-06, + "loss": 17.668, + "step": 28614 + }, + { + "epoch": 0.5230592063172903, + "grad_norm": 5.686314147004758, + "learning_rate": 4.869751157998264e-06, + "loss": 17.1126, + "step": 28615 + }, + { + "epoch": 0.5230774855137368, + "grad_norm": 6.036282928313144, + "learning_rate": 4.8694552471410425e-06, + "loss": 17.433, + "step": 28616 + }, + { + "epoch": 0.5230957647101834, + "grad_norm": 6.994720058312239, + "learning_rate": 4.869159336741369e-06, + "loss": 17.9136, + "step": 28617 + }, + { + "epoch": 0.5231140439066299, + "grad_norm": 5.934655269276345, + "learning_rate": 4.868863426800281e-06, + "loss": 17.4217, + "step": 28618 + }, + { + "epoch": 0.5231323231030763, + "grad_norm": 5.382820492933522, + "learning_rate": 4.868567517318813e-06, + "loss": 17.0695, + "step": 28619 + }, + { + "epoch": 0.5231506022995229, + "grad_norm": 6.584344094198239, + "learning_rate": 4.8682716082980065e-06, + "loss": 17.3006, + "step": 28620 + }, + { + "epoch": 0.5231688814959694, + "grad_norm": 6.533655367891564, + "learning_rate": 4.867975699738897e-06, + "loss": 17.6225, + "step": 28621 + }, + { + "epoch": 0.523187160692416, + "grad_norm": 5.519509156261628, + "learning_rate": 4.8676797916425194e-06, + "loss": 17.1227, + "step": 28622 + }, + { + "epoch": 0.5232054398888625, + "grad_norm": 4.932508531538405, + "learning_rate": 4.867383884009915e-06, + "loss": 16.9027, + "step": 28623 + }, + { + "epoch": 0.523223719085309, + "grad_norm": 7.522018420607516, + "learning_rate": 4.8670879768421176e-06, + "loss": 17.8257, + "step": 28624 + }, + { + "epoch": 0.5232419982817555, + "grad_norm": 6.990985213410393, + "learning_rate": 4.866792070140164e-06, + "loss": 17.6341, + "step": 28625 + }, + { + "epoch": 0.523260277478202, + "grad_norm": 6.369858785666854, + "learning_rate": 4.866496163905094e-06, + "loss": 17.4816, + "step": 28626 + }, + { + "epoch": 0.5232785566746486, + "grad_norm": 7.279355469306909, + "learning_rate": 4.866200258137944e-06, + "loss": 17.7265, + "step": 28627 + }, + { + "epoch": 0.5232968358710951, + "grad_norm": 6.3077888297320435, + "learning_rate": 4.8659043528397484e-06, + "loss": 17.2586, + "step": 28628 + }, + { + "epoch": 0.5233151150675416, + "grad_norm": 6.007628964170743, + "learning_rate": 4.865608448011547e-06, + "loss": 17.5715, + "step": 28629 + }, + { + "epoch": 0.5233333942639882, + "grad_norm": 7.651909580589494, + "learning_rate": 4.865312543654376e-06, + "loss": 17.7631, + "step": 28630 + }, + { + "epoch": 0.5233516734604347, + "grad_norm": 5.473384197437844, + "learning_rate": 4.865016639769275e-06, + "loss": 17.071, + "step": 28631 + }, + { + "epoch": 0.5233699526568812, + "grad_norm": 6.458577390802869, + "learning_rate": 4.8647207363572785e-06, + "loss": 17.3749, + "step": 28632 + }, + { + "epoch": 0.5233882318533277, + "grad_norm": 4.945032555018459, + "learning_rate": 4.864424833419422e-06, + "loss": 17.1227, + "step": 28633 + }, + { + "epoch": 0.5234065110497742, + "grad_norm": 7.19139568278267, + "learning_rate": 4.864128930956746e-06, + "loss": 17.7834, + "step": 28634 + }, + { + "epoch": 0.5234247902462208, + "grad_norm": 5.468476556153672, + "learning_rate": 4.863833028970287e-06, + "loss": 16.9363, + "step": 28635 + }, + { + "epoch": 0.5234430694426673, + "grad_norm": 5.719976840796176, + "learning_rate": 4.86353712746108e-06, + "loss": 17.2282, + "step": 28636 + }, + { + "epoch": 0.5234613486391139, + "grad_norm": 5.927814075489786, + "learning_rate": 4.8632412264301645e-06, + "loss": 17.2721, + "step": 28637 + }, + { + "epoch": 0.5234796278355603, + "grad_norm": 5.846840195343381, + "learning_rate": 4.862945325878576e-06, + "loss": 16.983, + "step": 28638 + }, + { + "epoch": 0.5234979070320068, + "grad_norm": 6.206621822029001, + "learning_rate": 4.862649425807352e-06, + "loss": 17.364, + "step": 28639 + }, + { + "epoch": 0.5235161862284534, + "grad_norm": 6.532029385335391, + "learning_rate": 4.862353526217531e-06, + "loss": 17.4888, + "step": 28640 + }, + { + "epoch": 0.5235344654248999, + "grad_norm": 5.1130084455203635, + "learning_rate": 4.86205762711015e-06, + "loss": 17.0225, + "step": 28641 + }, + { + "epoch": 0.5235527446213465, + "grad_norm": 6.754188500007389, + "learning_rate": 4.861761728486242e-06, + "loss": 17.8322, + "step": 28642 + }, + { + "epoch": 0.523571023817793, + "grad_norm": 6.50366454538655, + "learning_rate": 4.861465830346848e-06, + "loss": 17.4779, + "step": 28643 + }, + { + "epoch": 0.5235893030142394, + "grad_norm": 9.49622504661568, + "learning_rate": 4.861169932693004e-06, + "loss": 18.3106, + "step": 28644 + }, + { + "epoch": 0.523607582210686, + "grad_norm": 5.73110752522359, + "learning_rate": 4.8608740355257485e-06, + "loss": 17.0343, + "step": 28645 + }, + { + "epoch": 0.5236258614071325, + "grad_norm": 6.458243870159093, + "learning_rate": 4.860578138846119e-06, + "loss": 17.5289, + "step": 28646 + }, + { + "epoch": 0.5236441406035791, + "grad_norm": 6.777740444972973, + "learning_rate": 4.860282242655147e-06, + "loss": 17.5169, + "step": 28647 + }, + { + "epoch": 0.5236624198000256, + "grad_norm": 7.175666162289647, + "learning_rate": 4.859986346953876e-06, + "loss": 17.5742, + "step": 28648 + }, + { + "epoch": 0.5236806989964721, + "grad_norm": 6.058726642855046, + "learning_rate": 4.859690451743341e-06, + "loss": 17.0269, + "step": 28649 + }, + { + "epoch": 0.5236989781929187, + "grad_norm": 7.504244029549141, + "learning_rate": 4.8593945570245776e-06, + "loss": 17.9352, + "step": 28650 + }, + { + "epoch": 0.5237172573893651, + "grad_norm": 5.674543523973336, + "learning_rate": 4.859098662798625e-06, + "loss": 17.101, + "step": 28651 + }, + { + "epoch": 0.5237355365858117, + "grad_norm": 5.386776452218993, + "learning_rate": 4.858802769066519e-06, + "loss": 16.987, + "step": 28652 + }, + { + "epoch": 0.5237538157822582, + "grad_norm": 5.170912022530876, + "learning_rate": 4.858506875829297e-06, + "loss": 16.9947, + "step": 28653 + }, + { + "epoch": 0.5237720949787047, + "grad_norm": 6.497979812330839, + "learning_rate": 4.858210983087997e-06, + "loss": 17.4098, + "step": 28654 + }, + { + "epoch": 0.5237903741751513, + "grad_norm": 5.396850166875037, + "learning_rate": 4.857915090843655e-06, + "loss": 16.8639, + "step": 28655 + }, + { + "epoch": 0.5238086533715978, + "grad_norm": 8.161069659580226, + "learning_rate": 4.857619199097307e-06, + "loss": 18.3507, + "step": 28656 + }, + { + "epoch": 0.5238269325680444, + "grad_norm": 7.427932251449674, + "learning_rate": 4.857323307849992e-06, + "loss": 18.1129, + "step": 28657 + }, + { + "epoch": 0.5238452117644908, + "grad_norm": 7.23740741971263, + "learning_rate": 4.857027417102744e-06, + "loss": 17.7084, + "step": 28658 + }, + { + "epoch": 0.5238634909609373, + "grad_norm": 6.949649628765428, + "learning_rate": 4.856731526856607e-06, + "loss": 17.7779, + "step": 28659 + }, + { + "epoch": 0.5238817701573839, + "grad_norm": 6.939305662249475, + "learning_rate": 4.856435637112612e-06, + "loss": 17.4437, + "step": 28660 + }, + { + "epoch": 0.5239000493538304, + "grad_norm": 8.040513767106534, + "learning_rate": 4.856139747871796e-06, + "loss": 17.8194, + "step": 28661 + }, + { + "epoch": 0.523918328550277, + "grad_norm": 5.234498276910926, + "learning_rate": 4.8558438591351984e-06, + "loss": 16.9883, + "step": 28662 + }, + { + "epoch": 0.5239366077467235, + "grad_norm": 5.6882015636197725, + "learning_rate": 4.8555479709038575e-06, + "loss": 17.1606, + "step": 28663 + }, + { + "epoch": 0.5239548869431699, + "grad_norm": 5.740407646631591, + "learning_rate": 4.855252083178806e-06, + "loss": 17.3319, + "step": 28664 + }, + { + "epoch": 0.5239731661396165, + "grad_norm": 5.718372727511673, + "learning_rate": 4.854956195961085e-06, + "loss": 16.933, + "step": 28665 + }, + { + "epoch": 0.523991445336063, + "grad_norm": 6.6420807768311025, + "learning_rate": 4.854660309251729e-06, + "loss": 17.8021, + "step": 28666 + }, + { + "epoch": 0.5240097245325096, + "grad_norm": 6.545572116505617, + "learning_rate": 4.854364423051775e-06, + "loss": 17.6041, + "step": 28667 + }, + { + "epoch": 0.5240280037289561, + "grad_norm": 7.023776003366043, + "learning_rate": 4.854068537362264e-06, + "loss": 17.6512, + "step": 28668 + }, + { + "epoch": 0.5240462829254026, + "grad_norm": 6.3335694738473, + "learning_rate": 4.853772652184229e-06, + "loss": 17.7214, + "step": 28669 + }, + { + "epoch": 0.5240645621218492, + "grad_norm": 8.615002949894881, + "learning_rate": 4.853476767518706e-06, + "loss": 18.0568, + "step": 28670 + }, + { + "epoch": 0.5240828413182956, + "grad_norm": 5.987409289706991, + "learning_rate": 4.853180883366736e-06, + "loss": 17.322, + "step": 28671 + }, + { + "epoch": 0.5241011205147422, + "grad_norm": 14.72844723554283, + "learning_rate": 4.8528849997293556e-06, + "loss": 17.903, + "step": 28672 + }, + { + "epoch": 0.5241193997111887, + "grad_norm": 7.502380063868678, + "learning_rate": 4.852589116607597e-06, + "loss": 17.6554, + "step": 28673 + }, + { + "epoch": 0.5241376789076352, + "grad_norm": 5.156945823290601, + "learning_rate": 4.852293234002505e-06, + "loss": 16.9923, + "step": 28674 + }, + { + "epoch": 0.5241559581040818, + "grad_norm": 6.0739783576531305, + "learning_rate": 4.851997351915108e-06, + "loss": 17.3553, + "step": 28675 + }, + { + "epoch": 0.5241742373005283, + "grad_norm": 6.034195832265106, + "learning_rate": 4.851701470346449e-06, + "loss": 17.1783, + "step": 28676 + }, + { + "epoch": 0.5241925164969748, + "grad_norm": 5.664087929063006, + "learning_rate": 4.851405589297566e-06, + "loss": 17.241, + "step": 28677 + }, + { + "epoch": 0.5242107956934213, + "grad_norm": 6.075340909033961, + "learning_rate": 4.85110970876949e-06, + "loss": 17.2628, + "step": 28678 + }, + { + "epoch": 0.5242290748898678, + "grad_norm": 6.765987722082619, + "learning_rate": 4.850813828763264e-06, + "loss": 17.7013, + "step": 28679 + }, + { + "epoch": 0.5242473540863144, + "grad_norm": 5.71274771896953, + "learning_rate": 4.850517949279922e-06, + "loss": 17.2508, + "step": 28680 + }, + { + "epoch": 0.5242656332827609, + "grad_norm": 6.519198357361352, + "learning_rate": 4.8502220703205e-06, + "loss": 17.5417, + "step": 28681 + }, + { + "epoch": 0.5242839124792074, + "grad_norm": 6.3193823278258, + "learning_rate": 4.849926191886039e-06, + "loss": 17.4188, + "step": 28682 + }, + { + "epoch": 0.524302191675654, + "grad_norm": 5.507761081687611, + "learning_rate": 4.849630313977573e-06, + "loss": 17.1608, + "step": 28683 + }, + { + "epoch": 0.5243204708721004, + "grad_norm": 5.776666122232264, + "learning_rate": 4.849334436596139e-06, + "loss": 16.9994, + "step": 28684 + }, + { + "epoch": 0.524338750068547, + "grad_norm": 7.7390542660983765, + "learning_rate": 4.8490385597427745e-06, + "loss": 18.1266, + "step": 28685 + }, + { + "epoch": 0.5243570292649935, + "grad_norm": 6.413963591555267, + "learning_rate": 4.848742683418519e-06, + "loss": 17.3086, + "step": 28686 + }, + { + "epoch": 0.52437530846144, + "grad_norm": 6.415781633774691, + "learning_rate": 4.848446807624404e-06, + "loss": 17.5619, + "step": 28687 + }, + { + "epoch": 0.5243935876578866, + "grad_norm": 6.41271687883417, + "learning_rate": 4.848150932361471e-06, + "loss": 17.5939, + "step": 28688 + }, + { + "epoch": 0.524411866854333, + "grad_norm": 6.594850199120288, + "learning_rate": 4.847855057630756e-06, + "loss": 17.723, + "step": 28689 + }, + { + "epoch": 0.5244301460507796, + "grad_norm": 6.550198528890269, + "learning_rate": 4.847559183433296e-06, + "loss": 17.7492, + "step": 28690 + }, + { + "epoch": 0.5244484252472261, + "grad_norm": 7.220340798436696, + "learning_rate": 4.847263309770129e-06, + "loss": 17.8352, + "step": 28691 + }, + { + "epoch": 0.5244667044436726, + "grad_norm": 5.511075521973555, + "learning_rate": 4.8469674366422885e-06, + "loss": 17.3616, + "step": 28692 + }, + { + "epoch": 0.5244849836401192, + "grad_norm": 6.07969437251479, + "learning_rate": 4.846671564050815e-06, + "loss": 17.2891, + "step": 28693 + }, + { + "epoch": 0.5245032628365657, + "grad_norm": 6.277042181576031, + "learning_rate": 4.846375691996745e-06, + "loss": 17.4882, + "step": 28694 + }, + { + "epoch": 0.5245215420330123, + "grad_norm": 8.152968328070358, + "learning_rate": 4.846079820481113e-06, + "loss": 17.9782, + "step": 28695 + }, + { + "epoch": 0.5245398212294587, + "grad_norm": 6.791582980957194, + "learning_rate": 4.845783949504961e-06, + "loss": 17.5009, + "step": 28696 + }, + { + "epoch": 0.5245581004259052, + "grad_norm": 6.489858402076207, + "learning_rate": 4.84548807906932e-06, + "loss": 17.6425, + "step": 28697 + }, + { + "epoch": 0.5245763796223518, + "grad_norm": 6.854646747146825, + "learning_rate": 4.84519220917523e-06, + "loss": 17.766, + "step": 28698 + }, + { + "epoch": 0.5245946588187983, + "grad_norm": 7.4948507071382275, + "learning_rate": 4.844896339823731e-06, + "loss": 17.9889, + "step": 28699 + }, + { + "epoch": 0.5246129380152449, + "grad_norm": 7.274832398975454, + "learning_rate": 4.844600471015855e-06, + "loss": 17.7356, + "step": 28700 + }, + { + "epoch": 0.5246312172116914, + "grad_norm": 7.592434054060814, + "learning_rate": 4.8443046027526395e-06, + "loss": 17.7565, + "step": 28701 + }, + { + "epoch": 0.5246494964081378, + "grad_norm": 7.306628034976432, + "learning_rate": 4.844008735035124e-06, + "loss": 17.8487, + "step": 28702 + }, + { + "epoch": 0.5246677756045844, + "grad_norm": 8.127512072239806, + "learning_rate": 4.843712867864345e-06, + "loss": 17.7852, + "step": 28703 + }, + { + "epoch": 0.5246860548010309, + "grad_norm": 6.608577482738582, + "learning_rate": 4.843417001241336e-06, + "loss": 17.7183, + "step": 28704 + }, + { + "epoch": 0.5247043339974775, + "grad_norm": 5.1392686140284765, + "learning_rate": 4.84312113516714e-06, + "loss": 16.972, + "step": 28705 + }, + { + "epoch": 0.524722613193924, + "grad_norm": 5.622460930992014, + "learning_rate": 4.8428252696427884e-06, + "loss": 17.2805, + "step": 28706 + }, + { + "epoch": 0.5247408923903705, + "grad_norm": 6.728131733473552, + "learning_rate": 4.842529404669322e-06, + "loss": 17.5892, + "step": 28707 + }, + { + "epoch": 0.5247591715868171, + "grad_norm": 6.667787341536187, + "learning_rate": 4.842233540247777e-06, + "loss": 17.4749, + "step": 28708 + }, + { + "epoch": 0.5247774507832635, + "grad_norm": 6.431972698734262, + "learning_rate": 4.841937676379188e-06, + "loss": 17.8282, + "step": 28709 + }, + { + "epoch": 0.5247957299797101, + "grad_norm": 6.122433842841607, + "learning_rate": 4.841641813064596e-06, + "loss": 17.6201, + "step": 28710 + }, + { + "epoch": 0.5248140091761566, + "grad_norm": 5.797297230345632, + "learning_rate": 4.841345950305034e-06, + "loss": 17.2922, + "step": 28711 + }, + { + "epoch": 0.5248322883726031, + "grad_norm": 5.655518893695661, + "learning_rate": 4.84105008810154e-06, + "loss": 17.2077, + "step": 28712 + }, + { + "epoch": 0.5248505675690497, + "grad_norm": 6.053768863837141, + "learning_rate": 4.840754226455154e-06, + "loss": 17.6577, + "step": 28713 + }, + { + "epoch": 0.5248688467654962, + "grad_norm": 5.519281016861334, + "learning_rate": 4.84045836536691e-06, + "loss": 17.2577, + "step": 28714 + }, + { + "epoch": 0.5248871259619428, + "grad_norm": 6.821674791896626, + "learning_rate": 4.840162504837844e-06, + "loss": 17.5838, + "step": 28715 + }, + { + "epoch": 0.5249054051583892, + "grad_norm": 7.131856104619915, + "learning_rate": 4.839866644868995e-06, + "loss": 17.3524, + "step": 28716 + }, + { + "epoch": 0.5249236843548357, + "grad_norm": 6.5408899826240345, + "learning_rate": 4.839570785461401e-06, + "loss": 17.4512, + "step": 28717 + }, + { + "epoch": 0.5249419635512823, + "grad_norm": 5.77154805686396, + "learning_rate": 4.839274926616096e-06, + "loss": 17.1084, + "step": 28718 + }, + { + "epoch": 0.5249602427477288, + "grad_norm": 5.1531795568693, + "learning_rate": 4.838979068334119e-06, + "loss": 17.1768, + "step": 28719 + }, + { + "epoch": 0.5249785219441754, + "grad_norm": 5.985471990358824, + "learning_rate": 4.838683210616505e-06, + "loss": 17.449, + "step": 28720 + }, + { + "epoch": 0.5249968011406219, + "grad_norm": 8.892860157466224, + "learning_rate": 4.838387353464295e-06, + "loss": 17.9445, + "step": 28721 + }, + { + "epoch": 0.5250150803370683, + "grad_norm": 6.0620276417726435, + "learning_rate": 4.838091496878522e-06, + "loss": 17.66, + "step": 28722 + }, + { + "epoch": 0.5250333595335149, + "grad_norm": 6.106682141346665, + "learning_rate": 4.837795640860224e-06, + "loss": 17.2064, + "step": 28723 + }, + { + "epoch": 0.5250516387299614, + "grad_norm": 6.211250277119349, + "learning_rate": 4.8374997854104385e-06, + "loss": 17.2898, + "step": 28724 + }, + { + "epoch": 0.525069917926408, + "grad_norm": 6.518622661517581, + "learning_rate": 4.8372039305302025e-06, + "loss": 17.5631, + "step": 28725 + }, + { + "epoch": 0.5250881971228545, + "grad_norm": 6.1429629713118485, + "learning_rate": 4.836908076220551e-06, + "loss": 17.5944, + "step": 28726 + }, + { + "epoch": 0.525106476319301, + "grad_norm": 6.793614605633182, + "learning_rate": 4.836612222482524e-06, + "loss": 17.6654, + "step": 28727 + }, + { + "epoch": 0.5251247555157476, + "grad_norm": 5.079446359635824, + "learning_rate": 4.836316369317158e-06, + "loss": 16.9357, + "step": 28728 + }, + { + "epoch": 0.525143034712194, + "grad_norm": 7.305594957389146, + "learning_rate": 4.8360205167254865e-06, + "loss": 17.7642, + "step": 28729 + }, + { + "epoch": 0.5251613139086406, + "grad_norm": 7.259000192502282, + "learning_rate": 4.83572466470855e-06, + "loss": 18.3577, + "step": 28730 + }, + { + "epoch": 0.5251795931050871, + "grad_norm": 5.5184028071688385, + "learning_rate": 4.835428813267385e-06, + "loss": 17.3325, + "step": 28731 + }, + { + "epoch": 0.5251978723015336, + "grad_norm": 5.251727641357589, + "learning_rate": 4.8351329624030255e-06, + "loss": 16.9968, + "step": 28732 + }, + { + "epoch": 0.5252161514979802, + "grad_norm": 5.448063703813754, + "learning_rate": 4.834837112116514e-06, + "loss": 17.137, + "step": 28733 + }, + { + "epoch": 0.5252344306944267, + "grad_norm": 5.831568472431994, + "learning_rate": 4.8345412624088814e-06, + "loss": 17.2769, + "step": 28734 + }, + { + "epoch": 0.5252527098908732, + "grad_norm": 4.8287999413733305, + "learning_rate": 4.834245413281167e-06, + "loss": 16.7295, + "step": 28735 + }, + { + "epoch": 0.5252709890873197, + "grad_norm": 5.193900537815957, + "learning_rate": 4.83394956473441e-06, + "loss": 16.9692, + "step": 28736 + }, + { + "epoch": 0.5252892682837662, + "grad_norm": 7.100448094310603, + "learning_rate": 4.833653716769644e-06, + "loss": 17.5512, + "step": 28737 + }, + { + "epoch": 0.5253075474802128, + "grad_norm": 8.357750688280527, + "learning_rate": 4.8333578693879095e-06, + "loss": 17.7029, + "step": 28738 + }, + { + "epoch": 0.5253258266766593, + "grad_norm": 6.653940056829294, + "learning_rate": 4.833062022590239e-06, + "loss": 17.5639, + "step": 28739 + }, + { + "epoch": 0.5253441058731059, + "grad_norm": 5.77919097964156, + "learning_rate": 4.832766176377671e-06, + "loss": 17.1731, + "step": 28740 + }, + { + "epoch": 0.5253623850695524, + "grad_norm": 6.792309666282276, + "learning_rate": 4.832470330751245e-06, + "loss": 17.4062, + "step": 28741 + }, + { + "epoch": 0.5253806642659988, + "grad_norm": 5.637323340332414, + "learning_rate": 4.832174485711995e-06, + "loss": 17.2939, + "step": 28742 + }, + { + "epoch": 0.5253989434624454, + "grad_norm": 5.088149111599136, + "learning_rate": 4.831878641260959e-06, + "loss": 17.0118, + "step": 28743 + }, + { + "epoch": 0.5254172226588919, + "grad_norm": 6.665107883465812, + "learning_rate": 4.831582797399173e-06, + "loss": 17.8735, + "step": 28744 + }, + { + "epoch": 0.5254355018553385, + "grad_norm": 5.934339297536312, + "learning_rate": 4.831286954127677e-06, + "loss": 17.3122, + "step": 28745 + }, + { + "epoch": 0.525453781051785, + "grad_norm": 5.522877423594949, + "learning_rate": 4.830991111447503e-06, + "loss": 16.9918, + "step": 28746 + }, + { + "epoch": 0.5254720602482315, + "grad_norm": 6.352460929737988, + "learning_rate": 4.8306952693596916e-06, + "loss": 17.5666, + "step": 28747 + }, + { + "epoch": 0.525490339444678, + "grad_norm": 6.520708069835449, + "learning_rate": 4.83039942786528e-06, + "loss": 17.8456, + "step": 28748 + }, + { + "epoch": 0.5255086186411245, + "grad_norm": 6.266852236870579, + "learning_rate": 4.8301035869653006e-06, + "loss": 17.4629, + "step": 28749 + }, + { + "epoch": 0.525526897837571, + "grad_norm": 5.3717780443408, + "learning_rate": 4.8298077466607965e-06, + "loss": 17.0446, + "step": 28750 + }, + { + "epoch": 0.5255451770340176, + "grad_norm": 8.064369400960787, + "learning_rate": 4.8295119069527984e-06, + "loss": 18.0764, + "step": 28751 + }, + { + "epoch": 0.5255634562304641, + "grad_norm": 5.994749865910325, + "learning_rate": 4.829216067842347e-06, + "loss": 17.3672, + "step": 28752 + }, + { + "epoch": 0.5255817354269107, + "grad_norm": 5.7969456856365715, + "learning_rate": 4.828920229330482e-06, + "loss": 17.4399, + "step": 28753 + }, + { + "epoch": 0.5256000146233571, + "grad_norm": 7.31493624844024, + "learning_rate": 4.8286243914182326e-06, + "loss": 18.06, + "step": 28754 + }, + { + "epoch": 0.5256182938198036, + "grad_norm": 5.753582751787242, + "learning_rate": 4.828328554106642e-06, + "loss": 17.3053, + "step": 28755 + }, + { + "epoch": 0.5256365730162502, + "grad_norm": 6.150802565740242, + "learning_rate": 4.828032717396744e-06, + "loss": 17.4962, + "step": 28756 + }, + { + "epoch": 0.5256548522126967, + "grad_norm": 6.206272552471612, + "learning_rate": 4.827736881289575e-06, + "loss": 17.394, + "step": 28757 + }, + { + "epoch": 0.5256731314091433, + "grad_norm": 6.901267374559965, + "learning_rate": 4.8274410457861764e-06, + "loss": 17.513, + "step": 28758 + }, + { + "epoch": 0.5256914106055898, + "grad_norm": 5.536056620931555, + "learning_rate": 4.8271452108875815e-06, + "loss": 17.0095, + "step": 28759 + }, + { + "epoch": 0.5257096898020363, + "grad_norm": 4.967540766595819, + "learning_rate": 4.826849376594825e-06, + "loss": 16.812, + "step": 28760 + }, + { + "epoch": 0.5257279689984828, + "grad_norm": 5.979739505266952, + "learning_rate": 4.826553542908948e-06, + "loss": 17.1347, + "step": 28761 + }, + { + "epoch": 0.5257462481949293, + "grad_norm": 5.699134976416894, + "learning_rate": 4.826257709830987e-06, + "loss": 17.4132, + "step": 28762 + }, + { + "epoch": 0.5257645273913759, + "grad_norm": 5.727878449317979, + "learning_rate": 4.825961877361975e-06, + "loss": 17.2631, + "step": 28763 + }, + { + "epoch": 0.5257828065878224, + "grad_norm": 7.249990447239445, + "learning_rate": 4.825666045502954e-06, + "loss": 17.3154, + "step": 28764 + }, + { + "epoch": 0.5258010857842689, + "grad_norm": 5.949054223126825, + "learning_rate": 4.825370214254958e-06, + "loss": 17.3387, + "step": 28765 + }, + { + "epoch": 0.5258193649807155, + "grad_norm": 6.098863907929176, + "learning_rate": 4.825074383619022e-06, + "loss": 17.4397, + "step": 28766 + }, + { + "epoch": 0.525837644177162, + "grad_norm": 6.686424863194278, + "learning_rate": 4.824778553596188e-06, + "loss": 17.3564, + "step": 28767 + }, + { + "epoch": 0.5258559233736085, + "grad_norm": 5.192021215463849, + "learning_rate": 4.824482724187488e-06, + "loss": 17.0585, + "step": 28768 + }, + { + "epoch": 0.525874202570055, + "grad_norm": 5.950977882136102, + "learning_rate": 4.8241868953939626e-06, + "loss": 17.1144, + "step": 28769 + }, + { + "epoch": 0.5258924817665015, + "grad_norm": 5.34094818584719, + "learning_rate": 4.823891067216645e-06, + "loss": 17.1881, + "step": 28770 + }, + { + "epoch": 0.5259107609629481, + "grad_norm": 6.856814190634476, + "learning_rate": 4.823595239656573e-06, + "loss": 17.6609, + "step": 28771 + }, + { + "epoch": 0.5259290401593946, + "grad_norm": 5.639556277583762, + "learning_rate": 4.823299412714788e-06, + "loss": 17.0494, + "step": 28772 + }, + { + "epoch": 0.5259473193558412, + "grad_norm": 6.296680540643699, + "learning_rate": 4.823003586392322e-06, + "loss": 17.4254, + "step": 28773 + }, + { + "epoch": 0.5259655985522876, + "grad_norm": 6.73354810613954, + "learning_rate": 4.822707760690211e-06, + "loss": 17.8162, + "step": 28774 + }, + { + "epoch": 0.5259838777487341, + "grad_norm": 6.5525951676966345, + "learning_rate": 4.822411935609496e-06, + "loss": 17.1992, + "step": 28775 + }, + { + "epoch": 0.5260021569451807, + "grad_norm": 8.408037551361863, + "learning_rate": 4.822116111151212e-06, + "loss": 18.426, + "step": 28776 + }, + { + "epoch": 0.5260204361416272, + "grad_norm": 6.5854702943143675, + "learning_rate": 4.821820287316394e-06, + "loss": 17.6198, + "step": 28777 + }, + { + "epoch": 0.5260387153380738, + "grad_norm": 6.318287058444489, + "learning_rate": 4.821524464106082e-06, + "loss": 17.3501, + "step": 28778 + }, + { + "epoch": 0.5260569945345203, + "grad_norm": 5.133348364285311, + "learning_rate": 4.8212286415213095e-06, + "loss": 17.1092, + "step": 28779 + }, + { + "epoch": 0.5260752737309667, + "grad_norm": 7.3323036109316515, + "learning_rate": 4.820932819563115e-06, + "loss": 17.8787, + "step": 28780 + }, + { + "epoch": 0.5260935529274133, + "grad_norm": 7.388183691942203, + "learning_rate": 4.8206369982325375e-06, + "loss": 17.8646, + "step": 28781 + }, + { + "epoch": 0.5261118321238598, + "grad_norm": 5.480323312940068, + "learning_rate": 4.820341177530609e-06, + "loss": 17.3548, + "step": 28782 + }, + { + "epoch": 0.5261301113203064, + "grad_norm": 7.074633615873922, + "learning_rate": 4.820045357458372e-06, + "loss": 17.7369, + "step": 28783 + }, + { + "epoch": 0.5261483905167529, + "grad_norm": 7.247532050009481, + "learning_rate": 4.819749538016859e-06, + "loss": 18.0397, + "step": 28784 + }, + { + "epoch": 0.5261666697131994, + "grad_norm": 6.678261021602981, + "learning_rate": 4.819453719207107e-06, + "loss": 17.4542, + "step": 28785 + }, + { + "epoch": 0.526184948909646, + "grad_norm": 5.309826273261194, + "learning_rate": 4.819157901030156e-06, + "loss": 17.1165, + "step": 28786 + }, + { + "epoch": 0.5262032281060924, + "grad_norm": 7.008534551252991, + "learning_rate": 4.818862083487042e-06, + "loss": 18.041, + "step": 28787 + }, + { + "epoch": 0.526221507302539, + "grad_norm": 5.676848863987564, + "learning_rate": 4.818566266578797e-06, + "loss": 17.2307, + "step": 28788 + }, + { + "epoch": 0.5262397864989855, + "grad_norm": 6.501034094267944, + "learning_rate": 4.818270450306464e-06, + "loss": 17.7014, + "step": 28789 + }, + { + "epoch": 0.526258065695432, + "grad_norm": 5.961937982620032, + "learning_rate": 4.8179746346710775e-06, + "loss": 17.4626, + "step": 28790 + }, + { + "epoch": 0.5262763448918786, + "grad_norm": 5.322464779133777, + "learning_rate": 4.817678819673672e-06, + "loss": 17.1798, + "step": 28791 + }, + { + "epoch": 0.5262946240883251, + "grad_norm": 5.726952271737116, + "learning_rate": 4.817383005315289e-06, + "loss": 17.2633, + "step": 28792 + }, + { + "epoch": 0.5263129032847716, + "grad_norm": 7.5566700966965765, + "learning_rate": 4.8170871915969615e-06, + "loss": 18.2115, + "step": 28793 + }, + { + "epoch": 0.5263311824812181, + "grad_norm": 7.582494015258839, + "learning_rate": 4.816791378519726e-06, + "loss": 18.143, + "step": 28794 + }, + { + "epoch": 0.5263494616776646, + "grad_norm": 5.774045152665932, + "learning_rate": 4.8164955660846234e-06, + "loss": 17.3002, + "step": 28795 + }, + { + "epoch": 0.5263677408741112, + "grad_norm": 6.097955632273698, + "learning_rate": 4.816199754292688e-06, + "loss": 17.2827, + "step": 28796 + }, + { + "epoch": 0.5263860200705577, + "grad_norm": 6.6630289248265795, + "learning_rate": 4.815903943144955e-06, + "loss": 17.2041, + "step": 28797 + }, + { + "epoch": 0.5264042992670043, + "grad_norm": 5.932300072459454, + "learning_rate": 4.815608132642462e-06, + "loss": 17.4194, + "step": 28798 + }, + { + "epoch": 0.5264225784634508, + "grad_norm": 4.999133806791074, + "learning_rate": 4.815312322786247e-06, + "loss": 17.0654, + "step": 28799 + }, + { + "epoch": 0.5264408576598972, + "grad_norm": 6.751328673344176, + "learning_rate": 4.8150165135773475e-06, + "loss": 17.9876, + "step": 28800 + }, + { + "epoch": 0.5264591368563438, + "grad_norm": 6.192437654670182, + "learning_rate": 4.814720705016799e-06, + "loss": 17.6106, + "step": 28801 + }, + { + "epoch": 0.5264774160527903, + "grad_norm": 7.794274628215912, + "learning_rate": 4.814424897105638e-06, + "loss": 17.8192, + "step": 28802 + }, + { + "epoch": 0.5264956952492369, + "grad_norm": 5.323126925697858, + "learning_rate": 4.8141290898449e-06, + "loss": 17.3072, + "step": 28803 + }, + { + "epoch": 0.5265139744456834, + "grad_norm": 7.398138475483236, + "learning_rate": 4.813833283235626e-06, + "loss": 18.1937, + "step": 28804 + }, + { + "epoch": 0.5265322536421299, + "grad_norm": 6.105252246422766, + "learning_rate": 4.8135374772788475e-06, + "loss": 17.2926, + "step": 28805 + }, + { + "epoch": 0.5265505328385764, + "grad_norm": 6.645956131776079, + "learning_rate": 4.813241671975607e-06, + "loss": 17.6453, + "step": 28806 + }, + { + "epoch": 0.5265688120350229, + "grad_norm": 5.998042468674564, + "learning_rate": 4.812945867326937e-06, + "loss": 17.3839, + "step": 28807 + }, + { + "epoch": 0.5265870912314695, + "grad_norm": 5.410285365863696, + "learning_rate": 4.812650063333874e-06, + "loss": 17.1879, + "step": 28808 + }, + { + "epoch": 0.526605370427916, + "grad_norm": 6.562702465446166, + "learning_rate": 4.8123542599974584e-06, + "loss": 17.4391, + "step": 28809 + }, + { + "epoch": 0.5266236496243625, + "grad_norm": 7.70544683800876, + "learning_rate": 4.812058457318724e-06, + "loss": 18.02, + "step": 28810 + }, + { + "epoch": 0.5266419288208091, + "grad_norm": 6.833309188303262, + "learning_rate": 4.811762655298707e-06, + "loss": 17.8452, + "step": 28811 + }, + { + "epoch": 0.5266602080172555, + "grad_norm": 5.996346710112719, + "learning_rate": 4.811466853938448e-06, + "loss": 17.3942, + "step": 28812 + }, + { + "epoch": 0.5266784872137021, + "grad_norm": 7.732941784560871, + "learning_rate": 4.811171053238978e-06, + "loss": 17.989, + "step": 28813 + }, + { + "epoch": 0.5266967664101486, + "grad_norm": 6.3369818548016354, + "learning_rate": 4.8108752532013405e-06, + "loss": 17.3566, + "step": 28814 + }, + { + "epoch": 0.5267150456065951, + "grad_norm": 7.1037375989936455, + "learning_rate": 4.810579453826568e-06, + "loss": 17.7607, + "step": 28815 + }, + { + "epoch": 0.5267333248030417, + "grad_norm": 5.608068240597165, + "learning_rate": 4.810283655115697e-06, + "loss": 17.1247, + "step": 28816 + }, + { + "epoch": 0.5267516039994882, + "grad_norm": 5.750561928620749, + "learning_rate": 4.809987857069766e-06, + "loss": 17.0579, + "step": 28817 + }, + { + "epoch": 0.5267698831959347, + "grad_norm": 7.964696187803064, + "learning_rate": 4.809692059689813e-06, + "loss": 18.233, + "step": 28818 + }, + { + "epoch": 0.5267881623923812, + "grad_norm": 5.694170250881407, + "learning_rate": 4.809396262976869e-06, + "loss": 17.548, + "step": 28819 + }, + { + "epoch": 0.5268064415888277, + "grad_norm": 5.420672596971406, + "learning_rate": 4.809100466931976e-06, + "loss": 17.0734, + "step": 28820 + }, + { + "epoch": 0.5268247207852743, + "grad_norm": 6.091166944828002, + "learning_rate": 4.808804671556171e-06, + "loss": 17.3475, + "step": 28821 + }, + { + "epoch": 0.5268429999817208, + "grad_norm": 7.1580787462604, + "learning_rate": 4.8085088768504865e-06, + "loss": 17.9794, + "step": 28822 + }, + { + "epoch": 0.5268612791781673, + "grad_norm": 6.468301608303433, + "learning_rate": 4.808213082815964e-06, + "loss": 17.3555, + "step": 28823 + }, + { + "epoch": 0.5268795583746139, + "grad_norm": 6.112134604455029, + "learning_rate": 4.807917289453637e-06, + "loss": 17.3798, + "step": 28824 + }, + { + "epoch": 0.5268978375710603, + "grad_norm": 5.614336961777815, + "learning_rate": 4.807621496764542e-06, + "loss": 17.2696, + "step": 28825 + }, + { + "epoch": 0.5269161167675069, + "grad_norm": 5.986367120200508, + "learning_rate": 4.807325704749719e-06, + "loss": 17.4735, + "step": 28826 + }, + { + "epoch": 0.5269343959639534, + "grad_norm": 5.879731673740824, + "learning_rate": 4.8070299134102006e-06, + "loss": 17.4008, + "step": 28827 + }, + { + "epoch": 0.5269526751603999, + "grad_norm": 5.533548854367254, + "learning_rate": 4.806734122747028e-06, + "loss": 17.1912, + "step": 28828 + }, + { + "epoch": 0.5269709543568465, + "grad_norm": 7.511973988798581, + "learning_rate": 4.806438332761234e-06, + "loss": 17.5786, + "step": 28829 + }, + { + "epoch": 0.526989233553293, + "grad_norm": 6.706040907550385, + "learning_rate": 4.806142543453857e-06, + "loss": 17.6139, + "step": 28830 + }, + { + "epoch": 0.5270075127497396, + "grad_norm": 7.714620301900427, + "learning_rate": 4.805846754825934e-06, + "loss": 18.1718, + "step": 28831 + }, + { + "epoch": 0.527025791946186, + "grad_norm": 6.39458512092261, + "learning_rate": 4.805550966878502e-06, + "loss": 17.6294, + "step": 28832 + }, + { + "epoch": 0.5270440711426325, + "grad_norm": 6.84977405622963, + "learning_rate": 4.805255179612595e-06, + "loss": 17.7036, + "step": 28833 + }, + { + "epoch": 0.5270623503390791, + "grad_norm": 5.206935118393221, + "learning_rate": 4.804959393029253e-06, + "loss": 17.0615, + "step": 28834 + }, + { + "epoch": 0.5270806295355256, + "grad_norm": 6.176374157997757, + "learning_rate": 4.804663607129512e-06, + "loss": 17.5081, + "step": 28835 + }, + { + "epoch": 0.5270989087319722, + "grad_norm": 7.04104096253257, + "learning_rate": 4.804367821914406e-06, + "loss": 17.7588, + "step": 28836 + }, + { + "epoch": 0.5271171879284187, + "grad_norm": 6.64607566359931, + "learning_rate": 4.804072037384976e-06, + "loss": 17.6263, + "step": 28837 + }, + { + "epoch": 0.5271354671248651, + "grad_norm": 6.695391060144659, + "learning_rate": 4.803776253542256e-06, + "loss": 17.929, + "step": 28838 + }, + { + "epoch": 0.5271537463213117, + "grad_norm": 5.7556417819184755, + "learning_rate": 4.803480470387282e-06, + "loss": 17.2578, + "step": 28839 + }, + { + "epoch": 0.5271720255177582, + "grad_norm": 4.93780507451977, + "learning_rate": 4.803184687921093e-06, + "loss": 16.9088, + "step": 28840 + }, + { + "epoch": 0.5271903047142048, + "grad_norm": 6.562483807808549, + "learning_rate": 4.802888906144726e-06, + "loss": 17.7105, + "step": 28841 + }, + { + "epoch": 0.5272085839106513, + "grad_norm": 5.767657012647988, + "learning_rate": 4.8025931250592135e-06, + "loss": 17.2964, + "step": 28842 + }, + { + "epoch": 0.5272268631070978, + "grad_norm": 5.772017979605431, + "learning_rate": 4.802297344665595e-06, + "loss": 17.3725, + "step": 28843 + }, + { + "epoch": 0.5272451423035444, + "grad_norm": 6.119249020066546, + "learning_rate": 4.802001564964908e-06, + "loss": 17.3731, + "step": 28844 + }, + { + "epoch": 0.5272634214999908, + "grad_norm": 6.198138569624086, + "learning_rate": 4.801705785958189e-06, + "loss": 17.6388, + "step": 28845 + }, + { + "epoch": 0.5272817006964374, + "grad_norm": 7.0646354782869265, + "learning_rate": 4.801410007646475e-06, + "loss": 17.3207, + "step": 28846 + }, + { + "epoch": 0.5272999798928839, + "grad_norm": 5.609250364045369, + "learning_rate": 4.801114230030799e-06, + "loss": 17.3197, + "step": 28847 + }, + { + "epoch": 0.5273182590893304, + "grad_norm": 5.920393323699768, + "learning_rate": 4.800818453112201e-06, + "loss": 17.3523, + "step": 28848 + }, + { + "epoch": 0.527336538285777, + "grad_norm": 6.555831430885845, + "learning_rate": 4.800522676891719e-06, + "loss": 17.5858, + "step": 28849 + }, + { + "epoch": 0.5273548174822235, + "grad_norm": 6.300141261394385, + "learning_rate": 4.800226901370385e-06, + "loss": 17.531, + "step": 28850 + }, + { + "epoch": 0.52737309667867, + "grad_norm": 6.621617771442162, + "learning_rate": 4.799931126549241e-06, + "loss": 17.3024, + "step": 28851 + }, + { + "epoch": 0.5273913758751165, + "grad_norm": 6.508352025331976, + "learning_rate": 4.79963535242932e-06, + "loss": 17.7978, + "step": 28852 + }, + { + "epoch": 0.527409655071563, + "grad_norm": 6.263305196475308, + "learning_rate": 4.799339579011658e-06, + "loss": 17.3442, + "step": 28853 + }, + { + "epoch": 0.5274279342680096, + "grad_norm": 5.870789556753149, + "learning_rate": 4.799043806297296e-06, + "loss": 17.2287, + "step": 28854 + }, + { + "epoch": 0.5274462134644561, + "grad_norm": 7.02265397337617, + "learning_rate": 4.798748034287268e-06, + "loss": 17.6731, + "step": 28855 + }, + { + "epoch": 0.5274644926609027, + "grad_norm": 5.772823389684692, + "learning_rate": 4.798452262982608e-06, + "loss": 16.9305, + "step": 28856 + }, + { + "epoch": 0.5274827718573492, + "grad_norm": 5.5524350645130145, + "learning_rate": 4.7981564923843575e-06, + "loss": 17.316, + "step": 28857 + }, + { + "epoch": 0.5275010510537956, + "grad_norm": 5.601942665543992, + "learning_rate": 4.797860722493549e-06, + "loss": 17.0819, + "step": 28858 + }, + { + "epoch": 0.5275193302502422, + "grad_norm": 6.174825587999008, + "learning_rate": 4.797564953311223e-06, + "loss": 17.2222, + "step": 28859 + }, + { + "epoch": 0.5275376094466887, + "grad_norm": 7.139356368902403, + "learning_rate": 4.797269184838415e-06, + "loss": 17.5805, + "step": 28860 + }, + { + "epoch": 0.5275558886431353, + "grad_norm": 7.096687345433783, + "learning_rate": 4.796973417076158e-06, + "loss": 17.9524, + "step": 28861 + }, + { + "epoch": 0.5275741678395818, + "grad_norm": 6.824117822901115, + "learning_rate": 4.796677650025493e-06, + "loss": 17.9426, + "step": 28862 + }, + { + "epoch": 0.5275924470360283, + "grad_norm": 4.971978942798934, + "learning_rate": 4.796381883687457e-06, + "loss": 17.0492, + "step": 28863 + }, + { + "epoch": 0.5276107262324748, + "grad_norm": 7.267242743867442, + "learning_rate": 4.7960861180630815e-06, + "loss": 17.8284, + "step": 28864 + }, + { + "epoch": 0.5276290054289213, + "grad_norm": 7.140995282182595, + "learning_rate": 4.7957903531534095e-06, + "loss": 17.4981, + "step": 28865 + }, + { + "epoch": 0.5276472846253679, + "grad_norm": 6.044278588423279, + "learning_rate": 4.7954945889594735e-06, + "loss": 17.3575, + "step": 28866 + }, + { + "epoch": 0.5276655638218144, + "grad_norm": 6.165296503275143, + "learning_rate": 4.79519882548231e-06, + "loss": 17.7388, + "step": 28867 + }, + { + "epoch": 0.5276838430182609, + "grad_norm": 7.251876738000928, + "learning_rate": 4.794903062722959e-06, + "loss": 17.8625, + "step": 28868 + }, + { + "epoch": 0.5277021222147075, + "grad_norm": 5.316511572719626, + "learning_rate": 4.794607300682453e-06, + "loss": 17.2733, + "step": 28869 + }, + { + "epoch": 0.527720401411154, + "grad_norm": 4.96127738508365, + "learning_rate": 4.794311539361832e-06, + "loss": 16.9464, + "step": 28870 + }, + { + "epoch": 0.5277386806076005, + "grad_norm": 6.128694443346111, + "learning_rate": 4.79401577876213e-06, + "loss": 17.5274, + "step": 28871 + }, + { + "epoch": 0.527756959804047, + "grad_norm": 6.106806971745361, + "learning_rate": 4.793720018884387e-06, + "loss": 17.3225, + "step": 28872 + }, + { + "epoch": 0.5277752390004935, + "grad_norm": 7.468414650130117, + "learning_rate": 4.793424259729634e-06, + "loss": 17.8283, + "step": 28873 + }, + { + "epoch": 0.5277935181969401, + "grad_norm": 7.132019563148453, + "learning_rate": 4.7931285012989135e-06, + "loss": 17.6442, + "step": 28874 + }, + { + "epoch": 0.5278117973933866, + "grad_norm": 5.131383202438949, + "learning_rate": 4.7928327435932584e-06, + "loss": 17.0155, + "step": 28875 + }, + { + "epoch": 0.5278300765898332, + "grad_norm": 5.292756276479835, + "learning_rate": 4.792536986613707e-06, + "loss": 17.2849, + "step": 28876 + }, + { + "epoch": 0.5278483557862796, + "grad_norm": 6.986892692955527, + "learning_rate": 4.792241230361297e-06, + "loss": 17.7004, + "step": 28877 + }, + { + "epoch": 0.5278666349827261, + "grad_norm": 6.412828363572758, + "learning_rate": 4.791945474837061e-06, + "loss": 17.4445, + "step": 28878 + }, + { + "epoch": 0.5278849141791727, + "grad_norm": 7.324417043389137, + "learning_rate": 4.791649720042039e-06, + "loss": 17.9936, + "step": 28879 + }, + { + "epoch": 0.5279031933756192, + "grad_norm": 5.866238112140212, + "learning_rate": 4.791353965977268e-06, + "loss": 17.3683, + "step": 28880 + }, + { + "epoch": 0.5279214725720658, + "grad_norm": 6.977715905323436, + "learning_rate": 4.791058212643781e-06, + "loss": 17.33, + "step": 28881 + }, + { + "epoch": 0.5279397517685123, + "grad_norm": 7.285617705414452, + "learning_rate": 4.790762460042619e-06, + "loss": 18.2784, + "step": 28882 + }, + { + "epoch": 0.5279580309649587, + "grad_norm": 6.124274183913281, + "learning_rate": 4.790466708174815e-06, + "loss": 17.406, + "step": 28883 + }, + { + "epoch": 0.5279763101614053, + "grad_norm": 5.342439893740732, + "learning_rate": 4.790170957041406e-06, + "loss": 16.9759, + "step": 28884 + }, + { + "epoch": 0.5279945893578518, + "grad_norm": 6.44921547563559, + "learning_rate": 4.789875206643432e-06, + "loss": 17.5331, + "step": 28885 + }, + { + "epoch": 0.5280128685542983, + "grad_norm": 6.408641977064421, + "learning_rate": 4.789579456981927e-06, + "loss": 17.7002, + "step": 28886 + }, + { + "epoch": 0.5280311477507449, + "grad_norm": 7.119874164463317, + "learning_rate": 4.789283708057926e-06, + "loss": 18.0914, + "step": 28887 + }, + { + "epoch": 0.5280494269471914, + "grad_norm": 6.315073171129628, + "learning_rate": 4.788987959872468e-06, + "loss": 17.3811, + "step": 28888 + }, + { + "epoch": 0.528067706143638, + "grad_norm": 5.316694993406047, + "learning_rate": 4.7886922124265875e-06, + "loss": 17.1931, + "step": 28889 + }, + { + "epoch": 0.5280859853400844, + "grad_norm": 6.151973430956692, + "learning_rate": 4.788396465721326e-06, + "loss": 17.3438, + "step": 28890 + }, + { + "epoch": 0.5281042645365309, + "grad_norm": 5.531067691821519, + "learning_rate": 4.788100719757715e-06, + "loss": 17.2357, + "step": 28891 + }, + { + "epoch": 0.5281225437329775, + "grad_norm": 8.132472525597233, + "learning_rate": 4.787804974536791e-06, + "loss": 18.3136, + "step": 28892 + }, + { + "epoch": 0.528140822929424, + "grad_norm": 6.2304121069382274, + "learning_rate": 4.787509230059593e-06, + "loss": 17.4463, + "step": 28893 + }, + { + "epoch": 0.5281591021258706, + "grad_norm": 5.938465286065155, + "learning_rate": 4.787213486327158e-06, + "loss": 17.3088, + "step": 28894 + }, + { + "epoch": 0.5281773813223171, + "grad_norm": 6.935893332228987, + "learning_rate": 4.786917743340519e-06, + "loss": 17.7446, + "step": 28895 + }, + { + "epoch": 0.5281956605187635, + "grad_norm": 7.08577763734086, + "learning_rate": 4.786622001100718e-06, + "loss": 17.3099, + "step": 28896 + }, + { + "epoch": 0.5282139397152101, + "grad_norm": 7.63743022798368, + "learning_rate": 4.786326259608785e-06, + "loss": 17.802, + "step": 28897 + }, + { + "epoch": 0.5282322189116566, + "grad_norm": 6.003127141033391, + "learning_rate": 4.78603051886576e-06, + "loss": 17.5072, + "step": 28898 + }, + { + "epoch": 0.5282504981081032, + "grad_norm": 6.017089453144707, + "learning_rate": 4.785734778872682e-06, + "loss": 17.4595, + "step": 28899 + }, + { + "epoch": 0.5282687773045497, + "grad_norm": 5.361848091579653, + "learning_rate": 4.785439039630585e-06, + "loss": 17.0804, + "step": 28900 + }, + { + "epoch": 0.5282870565009962, + "grad_norm": 6.2754717078019375, + "learning_rate": 4.785143301140504e-06, + "loss": 17.4889, + "step": 28901 + }, + { + "epoch": 0.5283053356974428, + "grad_norm": 6.365149984308117, + "learning_rate": 4.784847563403477e-06, + "loss": 17.3507, + "step": 28902 + }, + { + "epoch": 0.5283236148938892, + "grad_norm": 6.818204922468601, + "learning_rate": 4.784551826420542e-06, + "loss": 17.5514, + "step": 28903 + }, + { + "epoch": 0.5283418940903358, + "grad_norm": 5.779721557854343, + "learning_rate": 4.784256090192732e-06, + "loss": 17.3461, + "step": 28904 + }, + { + "epoch": 0.5283601732867823, + "grad_norm": 6.774582861024068, + "learning_rate": 4.783960354721089e-06, + "loss": 17.8016, + "step": 28905 + }, + { + "epoch": 0.5283784524832288, + "grad_norm": 6.594676569936102, + "learning_rate": 4.783664620006642e-06, + "loss": 17.6608, + "step": 28906 + }, + { + "epoch": 0.5283967316796754, + "grad_norm": 6.839665753232639, + "learning_rate": 4.783368886050434e-06, + "loss": 17.4051, + "step": 28907 + }, + { + "epoch": 0.5284150108761219, + "grad_norm": 5.488721248285436, + "learning_rate": 4.7830731528535e-06, + "loss": 17.0675, + "step": 28908 + }, + { + "epoch": 0.5284332900725685, + "grad_norm": 7.588622775282102, + "learning_rate": 4.782777420416874e-06, + "loss": 17.9841, + "step": 28909 + }, + { + "epoch": 0.5284515692690149, + "grad_norm": 5.7470798935263385, + "learning_rate": 4.782481688741596e-06, + "loss": 17.0947, + "step": 28910 + }, + { + "epoch": 0.5284698484654614, + "grad_norm": 5.238533635509501, + "learning_rate": 4.7821859578287e-06, + "loss": 17.1855, + "step": 28911 + }, + { + "epoch": 0.528488127661908, + "grad_norm": 5.2659604937854985, + "learning_rate": 4.781890227679222e-06, + "loss": 17.0248, + "step": 28912 + }, + { + "epoch": 0.5285064068583545, + "grad_norm": 7.881360674751614, + "learning_rate": 4.781594498294202e-06, + "loss": 17.9199, + "step": 28913 + }, + { + "epoch": 0.5285246860548011, + "grad_norm": 7.4174814585036595, + "learning_rate": 4.781298769674675e-06, + "loss": 18.175, + "step": 28914 + }, + { + "epoch": 0.5285429652512476, + "grad_norm": 7.3494059635963, + "learning_rate": 4.7810030418216744e-06, + "loss": 17.924, + "step": 28915 + }, + { + "epoch": 0.528561244447694, + "grad_norm": 5.8011214444685555, + "learning_rate": 4.780707314736239e-06, + "loss": 17.2422, + "step": 28916 + }, + { + "epoch": 0.5285795236441406, + "grad_norm": 5.654016475660515, + "learning_rate": 4.780411588419408e-06, + "loss": 17.0475, + "step": 28917 + }, + { + "epoch": 0.5285978028405871, + "grad_norm": 5.827169075505775, + "learning_rate": 4.780115862872213e-06, + "loss": 17.4755, + "step": 28918 + }, + { + "epoch": 0.5286160820370337, + "grad_norm": 6.205762913996296, + "learning_rate": 4.779820138095694e-06, + "loss": 17.5518, + "step": 28919 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 6.311415581467795, + "learning_rate": 4.7795244140908845e-06, + "loss": 17.6736, + "step": 28920 + }, + { + "epoch": 0.5286526404299267, + "grad_norm": 6.67700343394037, + "learning_rate": 4.779228690858825e-06, + "loss": 17.8366, + "step": 28921 + }, + { + "epoch": 0.5286709196263732, + "grad_norm": 4.573150850930606, + "learning_rate": 4.7789329684005494e-06, + "loss": 16.7168, + "step": 28922 + }, + { + "epoch": 0.5286891988228197, + "grad_norm": 5.597287935428627, + "learning_rate": 4.778637246717093e-06, + "loss": 17.0395, + "step": 28923 + }, + { + "epoch": 0.5287074780192663, + "grad_norm": 5.664359525650026, + "learning_rate": 4.778341525809496e-06, + "loss": 17.1411, + "step": 28924 + }, + { + "epoch": 0.5287257572157128, + "grad_norm": 5.99321077912104, + "learning_rate": 4.778045805678792e-06, + "loss": 17.6294, + "step": 28925 + }, + { + "epoch": 0.5287440364121593, + "grad_norm": 6.917453724948669, + "learning_rate": 4.777750086326017e-06, + "loss": 17.4843, + "step": 28926 + }, + { + "epoch": 0.5287623156086059, + "grad_norm": 6.605667314495165, + "learning_rate": 4.77745436775221e-06, + "loss": 17.5499, + "step": 28927 + }, + { + "epoch": 0.5287805948050524, + "grad_norm": 5.684851313266677, + "learning_rate": 4.777158649958407e-06, + "loss": 17.1861, + "step": 28928 + }, + { + "epoch": 0.5287988740014989, + "grad_norm": 5.556901423251663, + "learning_rate": 4.776862932945641e-06, + "loss": 17.0909, + "step": 28929 + }, + { + "epoch": 0.5288171531979454, + "grad_norm": 7.655543176993401, + "learning_rate": 4.776567216714952e-06, + "loss": 17.8859, + "step": 28930 + }, + { + "epoch": 0.5288354323943919, + "grad_norm": 6.340572271328863, + "learning_rate": 4.776271501267377e-06, + "loss": 17.4977, + "step": 28931 + }, + { + "epoch": 0.5288537115908385, + "grad_norm": 6.857807800546154, + "learning_rate": 4.775975786603949e-06, + "loss": 17.8677, + "step": 28932 + }, + { + "epoch": 0.528871990787285, + "grad_norm": 6.873222102734115, + "learning_rate": 4.775680072725708e-06, + "loss": 17.9672, + "step": 28933 + }, + { + "epoch": 0.5288902699837316, + "grad_norm": 5.399505905359774, + "learning_rate": 4.775384359633688e-06, + "loss": 17.1648, + "step": 28934 + }, + { + "epoch": 0.528908549180178, + "grad_norm": 7.014687734891507, + "learning_rate": 4.775088647328925e-06, + "loss": 17.6098, + "step": 28935 + }, + { + "epoch": 0.5289268283766245, + "grad_norm": 6.4533415281168285, + "learning_rate": 4.7747929358124595e-06, + "loss": 17.6374, + "step": 28936 + }, + { + "epoch": 0.5289451075730711, + "grad_norm": 5.176733574346096, + "learning_rate": 4.774497225085323e-06, + "loss": 17.2231, + "step": 28937 + }, + { + "epoch": 0.5289633867695176, + "grad_norm": 5.686831397646956, + "learning_rate": 4.774201515148556e-06, + "loss": 17.0373, + "step": 28938 + }, + { + "epoch": 0.5289816659659642, + "grad_norm": 6.436715675593961, + "learning_rate": 4.773905806003193e-06, + "loss": 17.3613, + "step": 28939 + }, + { + "epoch": 0.5289999451624107, + "grad_norm": 5.688844862169735, + "learning_rate": 4.773610097650268e-06, + "loss": 17.2999, + "step": 28940 + }, + { + "epoch": 0.5290182243588571, + "grad_norm": 6.76112525919375, + "learning_rate": 4.773314390090823e-06, + "loss": 17.5339, + "step": 28941 + }, + { + "epoch": 0.5290365035553037, + "grad_norm": 6.521896327749828, + "learning_rate": 4.77301868332589e-06, + "loss": 17.6273, + "step": 28942 + }, + { + "epoch": 0.5290547827517502, + "grad_norm": 6.340256283989966, + "learning_rate": 4.772722977356507e-06, + "loss": 17.3061, + "step": 28943 + }, + { + "epoch": 0.5290730619481968, + "grad_norm": 7.579506806029583, + "learning_rate": 4.77242727218371e-06, + "loss": 18.4786, + "step": 28944 + }, + { + "epoch": 0.5290913411446433, + "grad_norm": 5.748663241019761, + "learning_rate": 4.7721315678085364e-06, + "loss": 17.0845, + "step": 28945 + }, + { + "epoch": 0.5291096203410898, + "grad_norm": 7.145918408295226, + "learning_rate": 4.77183586423202e-06, + "loss": 17.7638, + "step": 28946 + }, + { + "epoch": 0.5291278995375364, + "grad_norm": 7.635723725813082, + "learning_rate": 4.7715401614552e-06, + "loss": 17.8344, + "step": 28947 + }, + { + "epoch": 0.5291461787339828, + "grad_norm": 7.6730095577825885, + "learning_rate": 4.771244459479114e-06, + "loss": 17.984, + "step": 28948 + }, + { + "epoch": 0.5291644579304294, + "grad_norm": 6.272690059928203, + "learning_rate": 4.770948758304793e-06, + "loss": 17.4534, + "step": 28949 + }, + { + "epoch": 0.5291827371268759, + "grad_norm": 6.233938132678369, + "learning_rate": 4.77065305793328e-06, + "loss": 17.4707, + "step": 28950 + }, + { + "epoch": 0.5292010163233224, + "grad_norm": 6.405967934080573, + "learning_rate": 4.770357358365605e-06, + "loss": 17.6681, + "step": 28951 + }, + { + "epoch": 0.529219295519769, + "grad_norm": 6.115772521647002, + "learning_rate": 4.770061659602809e-06, + "loss": 17.5208, + "step": 28952 + }, + { + "epoch": 0.5292375747162155, + "grad_norm": 9.511511786835976, + "learning_rate": 4.769765961645928e-06, + "loss": 18.0441, + "step": 28953 + }, + { + "epoch": 0.5292558539126619, + "grad_norm": 7.4884185891622845, + "learning_rate": 4.769470264495995e-06, + "loss": 17.8858, + "step": 28954 + }, + { + "epoch": 0.5292741331091085, + "grad_norm": 6.190588943265005, + "learning_rate": 4.769174568154052e-06, + "loss": 17.4059, + "step": 28955 + }, + { + "epoch": 0.529292412305555, + "grad_norm": 5.628795109903128, + "learning_rate": 4.768878872621129e-06, + "loss": 17.2416, + "step": 28956 + }, + { + "epoch": 0.5293106915020016, + "grad_norm": 5.738085228999053, + "learning_rate": 4.7685831778982656e-06, + "loss": 17.4048, + "step": 28957 + }, + { + "epoch": 0.5293289706984481, + "grad_norm": 6.150061708058466, + "learning_rate": 4.7682874839865005e-06, + "loss": 17.4161, + "step": 28958 + }, + { + "epoch": 0.5293472498948946, + "grad_norm": 6.365318945287678, + "learning_rate": 4.767991790886866e-06, + "loss": 17.4476, + "step": 28959 + }, + { + "epoch": 0.5293655290913412, + "grad_norm": 6.330916463834369, + "learning_rate": 4.7676960986004e-06, + "loss": 17.4031, + "step": 28960 + }, + { + "epoch": 0.5293838082877876, + "grad_norm": 5.79164312744024, + "learning_rate": 4.76740040712814e-06, + "loss": 17.3763, + "step": 28961 + }, + { + "epoch": 0.5294020874842342, + "grad_norm": 6.62474071391404, + "learning_rate": 4.767104716471122e-06, + "loss": 17.5816, + "step": 28962 + }, + { + "epoch": 0.5294203666806807, + "grad_norm": 6.355841527629139, + "learning_rate": 4.766809026630378e-06, + "loss": 17.5715, + "step": 28963 + }, + { + "epoch": 0.5294386458771272, + "grad_norm": 5.728055133066767, + "learning_rate": 4.766513337606952e-06, + "loss": 17.4855, + "step": 28964 + }, + { + "epoch": 0.5294569250735738, + "grad_norm": 6.528204563701519, + "learning_rate": 4.766217649401875e-06, + "loss": 17.4504, + "step": 28965 + }, + { + "epoch": 0.5294752042700203, + "grad_norm": 5.622772939513933, + "learning_rate": 4.7659219620161845e-06, + "loss": 17.205, + "step": 28966 + }, + { + "epoch": 0.5294934834664669, + "grad_norm": 5.9634311597967535, + "learning_rate": 4.765626275450918e-06, + "loss": 17.397, + "step": 28967 + }, + { + "epoch": 0.5295117626629133, + "grad_norm": 6.9254273620727655, + "learning_rate": 4.76533058970711e-06, + "loss": 17.3841, + "step": 28968 + }, + { + "epoch": 0.5295300418593598, + "grad_norm": 5.182212194444598, + "learning_rate": 4.7650349047858e-06, + "loss": 16.9626, + "step": 28969 + }, + { + "epoch": 0.5295483210558064, + "grad_norm": 5.5897143284923665, + "learning_rate": 4.76473922068802e-06, + "loss": 17.1623, + "step": 28970 + }, + { + "epoch": 0.5295666002522529, + "grad_norm": 6.547672645916321, + "learning_rate": 4.764443537414809e-06, + "loss": 17.294, + "step": 28971 + }, + { + "epoch": 0.5295848794486995, + "grad_norm": 6.335219448620369, + "learning_rate": 4.764147854967205e-06, + "loss": 17.1706, + "step": 28972 + }, + { + "epoch": 0.529603158645146, + "grad_norm": 6.512345272851514, + "learning_rate": 4.763852173346242e-06, + "loss": 17.5477, + "step": 28973 + }, + { + "epoch": 0.5296214378415924, + "grad_norm": 7.126635933444644, + "learning_rate": 4.763556492552954e-06, + "loss": 17.4364, + "step": 28974 + }, + { + "epoch": 0.529639717038039, + "grad_norm": 6.699586223759229, + "learning_rate": 4.763260812588381e-06, + "loss": 17.5306, + "step": 28975 + }, + { + "epoch": 0.5296579962344855, + "grad_norm": 6.2770136583172516, + "learning_rate": 4.76296513345356e-06, + "loss": 17.2066, + "step": 28976 + }, + { + "epoch": 0.5296762754309321, + "grad_norm": 6.664630342097273, + "learning_rate": 4.762669455149523e-06, + "loss": 17.6957, + "step": 28977 + }, + { + "epoch": 0.5296945546273786, + "grad_norm": 6.334087876642642, + "learning_rate": 4.7623737776773125e-06, + "loss": 17.3825, + "step": 28978 + }, + { + "epoch": 0.5297128338238251, + "grad_norm": 7.664843603961732, + "learning_rate": 4.762078101037959e-06, + "loss": 17.9372, + "step": 28979 + }, + { + "epoch": 0.5297311130202716, + "grad_norm": 6.2353192056173405, + "learning_rate": 4.7617824252324995e-06, + "loss": 17.274, + "step": 28980 + }, + { + "epoch": 0.5297493922167181, + "grad_norm": 9.97945252779588, + "learning_rate": 4.761486750261975e-06, + "loss": 19.1664, + "step": 28981 + }, + { + "epoch": 0.5297676714131647, + "grad_norm": 7.069265974511238, + "learning_rate": 4.761191076127416e-06, + "loss": 17.753, + "step": 28982 + }, + { + "epoch": 0.5297859506096112, + "grad_norm": 6.040235588954766, + "learning_rate": 4.760895402829864e-06, + "loss": 17.5188, + "step": 28983 + }, + { + "epoch": 0.5298042298060577, + "grad_norm": 7.13098589882815, + "learning_rate": 4.760599730370352e-06, + "loss": 17.8517, + "step": 28984 + }, + { + "epoch": 0.5298225090025043, + "grad_norm": 6.561225109909866, + "learning_rate": 4.7603040587499165e-06, + "loss": 17.468, + "step": 28985 + }, + { + "epoch": 0.5298407881989508, + "grad_norm": 6.197798029895168, + "learning_rate": 4.7600083879695954e-06, + "loss": 17.4974, + "step": 28986 + }, + { + "epoch": 0.5298590673953973, + "grad_norm": 6.746513134685152, + "learning_rate": 4.759712718030425e-06, + "loss": 17.6516, + "step": 28987 + }, + { + "epoch": 0.5298773465918438, + "grad_norm": 8.384064048282763, + "learning_rate": 4.759417048933438e-06, + "loss": 18.0163, + "step": 28988 + }, + { + "epoch": 0.5298956257882903, + "grad_norm": 7.545582877617902, + "learning_rate": 4.759121380679674e-06, + "loss": 17.6561, + "step": 28989 + }, + { + "epoch": 0.5299139049847369, + "grad_norm": 7.380961876726133, + "learning_rate": 4.758825713270171e-06, + "loss": 17.7723, + "step": 28990 + }, + { + "epoch": 0.5299321841811834, + "grad_norm": 7.403514377235956, + "learning_rate": 4.758530046705961e-06, + "loss": 17.621, + "step": 28991 + }, + { + "epoch": 0.52995046337763, + "grad_norm": 7.6176915257901765, + "learning_rate": 4.758234380988083e-06, + "loss": 17.9377, + "step": 28992 + }, + { + "epoch": 0.5299687425740764, + "grad_norm": 5.851218543396473, + "learning_rate": 4.757938716117572e-06, + "loss": 17.5295, + "step": 28993 + }, + { + "epoch": 0.5299870217705229, + "grad_norm": 5.357925665484355, + "learning_rate": 4.757643052095464e-06, + "loss": 17.1055, + "step": 28994 + }, + { + "epoch": 0.5300053009669695, + "grad_norm": 6.815637251884852, + "learning_rate": 4.757347388922797e-06, + "loss": 17.5178, + "step": 28995 + }, + { + "epoch": 0.530023580163416, + "grad_norm": 5.519619079182495, + "learning_rate": 4.757051726600606e-06, + "loss": 17.2082, + "step": 28996 + }, + { + "epoch": 0.5300418593598626, + "grad_norm": 6.284457709423418, + "learning_rate": 4.756756065129929e-06, + "loss": 17.5382, + "step": 28997 + }, + { + "epoch": 0.5300601385563091, + "grad_norm": 6.20993452893053, + "learning_rate": 4.756460404511799e-06, + "loss": 17.1413, + "step": 28998 + }, + { + "epoch": 0.5300784177527555, + "grad_norm": 6.393283012192629, + "learning_rate": 4.7561647447472545e-06, + "loss": 17.4477, + "step": 28999 + }, + { + "epoch": 0.5300966969492021, + "grad_norm": 5.622959309911061, + "learning_rate": 4.755869085837333e-06, + "loss": 17.1082, + "step": 29000 + }, + { + "epoch": 0.5301149761456486, + "grad_norm": 5.020867332350183, + "learning_rate": 4.755573427783068e-06, + "loss": 17.0118, + "step": 29001 + }, + { + "epoch": 0.5301332553420952, + "grad_norm": 5.03247731379233, + "learning_rate": 4.755277770585496e-06, + "loss": 16.8708, + "step": 29002 + }, + { + "epoch": 0.5301515345385417, + "grad_norm": 7.435931180725855, + "learning_rate": 4.754982114245655e-06, + "loss": 17.8667, + "step": 29003 + }, + { + "epoch": 0.5301698137349882, + "grad_norm": 6.99032306287987, + "learning_rate": 4.754686458764582e-06, + "loss": 17.8947, + "step": 29004 + }, + { + "epoch": 0.5301880929314348, + "grad_norm": 7.200105281796414, + "learning_rate": 4.754390804143309e-06, + "loss": 18.0311, + "step": 29005 + }, + { + "epoch": 0.5302063721278812, + "grad_norm": 4.986183366678657, + "learning_rate": 4.754095150382876e-06, + "loss": 16.965, + "step": 29006 + }, + { + "epoch": 0.5302246513243278, + "grad_norm": 6.233305622340141, + "learning_rate": 4.753799497484319e-06, + "loss": 17.4663, + "step": 29007 + }, + { + "epoch": 0.5302429305207743, + "grad_norm": 5.673532705218475, + "learning_rate": 4.753503845448672e-06, + "loss": 17.0611, + "step": 29008 + }, + { + "epoch": 0.5302612097172208, + "grad_norm": 5.690178376425513, + "learning_rate": 4.753208194276974e-06, + "loss": 17.1658, + "step": 29009 + }, + { + "epoch": 0.5302794889136674, + "grad_norm": 6.673679623648136, + "learning_rate": 4.7529125439702594e-06, + "loss": 17.9059, + "step": 29010 + }, + { + "epoch": 0.5302977681101139, + "grad_norm": 6.457653997418939, + "learning_rate": 4.752616894529564e-06, + "loss": 17.2834, + "step": 29011 + }, + { + "epoch": 0.5303160473065605, + "grad_norm": 5.788563805288343, + "learning_rate": 4.752321245955927e-06, + "loss": 17.2688, + "step": 29012 + }, + { + "epoch": 0.5303343265030069, + "grad_norm": 7.02269511892852, + "learning_rate": 4.752025598250379e-06, + "loss": 17.782, + "step": 29013 + }, + { + "epoch": 0.5303526056994534, + "grad_norm": 6.536583697592861, + "learning_rate": 4.751729951413963e-06, + "loss": 17.5823, + "step": 29014 + }, + { + "epoch": 0.5303708848959, + "grad_norm": 6.175956006660992, + "learning_rate": 4.7514343054477105e-06, + "loss": 17.1392, + "step": 29015 + }, + { + "epoch": 0.5303891640923465, + "grad_norm": 6.52542404024492, + "learning_rate": 4.751138660352659e-06, + "loss": 17.2988, + "step": 29016 + }, + { + "epoch": 0.5304074432887931, + "grad_norm": 5.389273970877506, + "learning_rate": 4.750843016129846e-06, + "loss": 17.0757, + "step": 29017 + }, + { + "epoch": 0.5304257224852396, + "grad_norm": 6.227759632651717, + "learning_rate": 4.750547372780308e-06, + "loss": 17.5329, + "step": 29018 + }, + { + "epoch": 0.530444001681686, + "grad_norm": 7.032243624524673, + "learning_rate": 4.750251730305077e-06, + "loss": 18.0772, + "step": 29019 + }, + { + "epoch": 0.5304622808781326, + "grad_norm": 8.65552082768172, + "learning_rate": 4.749956088705192e-06, + "loss": 17.8541, + "step": 29020 + }, + { + "epoch": 0.5304805600745791, + "grad_norm": 7.130319468662974, + "learning_rate": 4.749660447981691e-06, + "loss": 17.8018, + "step": 29021 + }, + { + "epoch": 0.5304988392710256, + "grad_norm": 7.080552813274937, + "learning_rate": 4.749364808135607e-06, + "loss": 17.7129, + "step": 29022 + }, + { + "epoch": 0.5305171184674722, + "grad_norm": 5.794984351054141, + "learning_rate": 4.749069169167979e-06, + "loss": 17.3477, + "step": 29023 + }, + { + "epoch": 0.5305353976639187, + "grad_norm": 5.624996959233557, + "learning_rate": 4.7487735310798405e-06, + "loss": 17.1875, + "step": 29024 + }, + { + "epoch": 0.5305536768603653, + "grad_norm": 5.469560987892032, + "learning_rate": 4.7484778938722285e-06, + "loss": 17.2285, + "step": 29025 + }, + { + "epoch": 0.5305719560568117, + "grad_norm": 5.890981220322474, + "learning_rate": 4.748182257546181e-06, + "loss": 16.9253, + "step": 29026 + }, + { + "epoch": 0.5305902352532582, + "grad_norm": 8.778371523133774, + "learning_rate": 4.747886622102731e-06, + "loss": 18.3411, + "step": 29027 + }, + { + "epoch": 0.5306085144497048, + "grad_norm": 5.650137184782942, + "learning_rate": 4.747590987542919e-06, + "loss": 17.1306, + "step": 29028 + }, + { + "epoch": 0.5306267936461513, + "grad_norm": 6.961102745151707, + "learning_rate": 4.747295353867778e-06, + "loss": 18.2216, + "step": 29029 + }, + { + "epoch": 0.5306450728425979, + "grad_norm": 6.470423668539671, + "learning_rate": 4.7469997210783435e-06, + "loss": 17.6332, + "step": 29030 + }, + { + "epoch": 0.5306633520390444, + "grad_norm": 7.04058675641339, + "learning_rate": 4.746704089175655e-06, + "loss": 17.6692, + "step": 29031 + }, + { + "epoch": 0.5306816312354908, + "grad_norm": 7.120824994109516, + "learning_rate": 4.7464084581607465e-06, + "loss": 17.4945, + "step": 29032 + }, + { + "epoch": 0.5306999104319374, + "grad_norm": 7.813392016753592, + "learning_rate": 4.746112828034653e-06, + "loss": 17.6192, + "step": 29033 + }, + { + "epoch": 0.5307181896283839, + "grad_norm": 6.513456589941889, + "learning_rate": 4.745817198798412e-06, + "loss": 17.3292, + "step": 29034 + }, + { + "epoch": 0.5307364688248305, + "grad_norm": 6.6167338676562135, + "learning_rate": 4.745521570453061e-06, + "loss": 17.394, + "step": 29035 + }, + { + "epoch": 0.530754748021277, + "grad_norm": 8.437965719621715, + "learning_rate": 4.745225942999633e-06, + "loss": 18.5355, + "step": 29036 + }, + { + "epoch": 0.5307730272177235, + "grad_norm": 8.917662320778536, + "learning_rate": 4.744930316439168e-06, + "loss": 18.2374, + "step": 29037 + }, + { + "epoch": 0.53079130641417, + "grad_norm": 6.990175887722325, + "learning_rate": 4.744634690772699e-06, + "loss": 17.5489, + "step": 29038 + }, + { + "epoch": 0.5308095856106165, + "grad_norm": 6.7485057341990675, + "learning_rate": 4.744339066001262e-06, + "loss": 17.3745, + "step": 29039 + }, + { + "epoch": 0.5308278648070631, + "grad_norm": 7.143291266208228, + "learning_rate": 4.744043442125897e-06, + "loss": 17.8604, + "step": 29040 + }, + { + "epoch": 0.5308461440035096, + "grad_norm": 6.971640177308801, + "learning_rate": 4.743747819147637e-06, + "loss": 17.62, + "step": 29041 + }, + { + "epoch": 0.5308644231999561, + "grad_norm": 7.976051122820207, + "learning_rate": 4.743452197067516e-06, + "loss": 17.6011, + "step": 29042 + }, + { + "epoch": 0.5308827023964027, + "grad_norm": 6.391408805176334, + "learning_rate": 4.743156575886575e-06, + "loss": 17.0996, + "step": 29043 + }, + { + "epoch": 0.5309009815928492, + "grad_norm": 4.956450882770411, + "learning_rate": 4.742860955605846e-06, + "loss": 16.9487, + "step": 29044 + }, + { + "epoch": 0.5309192607892957, + "grad_norm": 7.895859674510243, + "learning_rate": 4.74256533622637e-06, + "loss": 17.883, + "step": 29045 + }, + { + "epoch": 0.5309375399857422, + "grad_norm": 7.879627065172488, + "learning_rate": 4.74226971774918e-06, + "loss": 17.886, + "step": 29046 + }, + { + "epoch": 0.5309558191821887, + "grad_norm": 7.166142253316326, + "learning_rate": 4.74197410017531e-06, + "loss": 17.5091, + "step": 29047 + }, + { + "epoch": 0.5309740983786353, + "grad_norm": 6.595705922538593, + "learning_rate": 4.741678483505799e-06, + "loss": 17.5326, + "step": 29048 + }, + { + "epoch": 0.5309923775750818, + "grad_norm": 7.3835527577926845, + "learning_rate": 4.741382867741684e-06, + "loss": 18.1946, + "step": 29049 + }, + { + "epoch": 0.5310106567715284, + "grad_norm": 6.338145646823406, + "learning_rate": 4.741087252883998e-06, + "loss": 17.4536, + "step": 29050 + }, + { + "epoch": 0.5310289359679748, + "grad_norm": 11.185395669690315, + "learning_rate": 4.74079163893378e-06, + "loss": 17.9135, + "step": 29051 + }, + { + "epoch": 0.5310472151644213, + "grad_norm": 7.885925422033358, + "learning_rate": 4.740496025892064e-06, + "loss": 18.2055, + "step": 29052 + }, + { + "epoch": 0.5310654943608679, + "grad_norm": 6.817404549241904, + "learning_rate": 4.740200413759886e-06, + "loss": 17.429, + "step": 29053 + }, + { + "epoch": 0.5310837735573144, + "grad_norm": 6.044258696525604, + "learning_rate": 4.739904802538284e-06, + "loss": 17.6657, + "step": 29054 + }, + { + "epoch": 0.531102052753761, + "grad_norm": 9.037642839197277, + "learning_rate": 4.739609192228295e-06, + "loss": 17.7689, + "step": 29055 + }, + { + "epoch": 0.5311203319502075, + "grad_norm": 4.846138987507346, + "learning_rate": 4.7393135828309495e-06, + "loss": 16.8368, + "step": 29056 + }, + { + "epoch": 0.531138611146654, + "grad_norm": 10.03554536509534, + "learning_rate": 4.7390179743472895e-06, + "loss": 17.9387, + "step": 29057 + }, + { + "epoch": 0.5311568903431005, + "grad_norm": 6.869602546938215, + "learning_rate": 4.738722366778346e-06, + "loss": 17.7339, + "step": 29058 + }, + { + "epoch": 0.531175169539547, + "grad_norm": 5.454949928664412, + "learning_rate": 4.738426760125162e-06, + "loss": 17.0717, + "step": 29059 + }, + { + "epoch": 0.5311934487359936, + "grad_norm": 5.857091816580592, + "learning_rate": 4.738131154388768e-06, + "loss": 17.1165, + "step": 29060 + }, + { + "epoch": 0.5312117279324401, + "grad_norm": 6.930273201413045, + "learning_rate": 4.737835549570201e-06, + "loss": 17.7204, + "step": 29061 + }, + { + "epoch": 0.5312300071288866, + "grad_norm": 5.431037714773593, + "learning_rate": 4.737539945670498e-06, + "loss": 17.1813, + "step": 29062 + }, + { + "epoch": 0.5312482863253332, + "grad_norm": 6.5420044744411845, + "learning_rate": 4.737244342690696e-06, + "loss": 17.4828, + "step": 29063 + }, + { + "epoch": 0.5312665655217796, + "grad_norm": 7.036392217452092, + "learning_rate": 4.736948740631827e-06, + "loss": 17.8612, + "step": 29064 + }, + { + "epoch": 0.5312848447182262, + "grad_norm": 7.429120135752747, + "learning_rate": 4.736653139494933e-06, + "loss": 18.002, + "step": 29065 + }, + { + "epoch": 0.5313031239146727, + "grad_norm": 7.647763917020927, + "learning_rate": 4.736357539281045e-06, + "loss": 18.3326, + "step": 29066 + }, + { + "epoch": 0.5313214031111192, + "grad_norm": 6.714500069030933, + "learning_rate": 4.7360619399912e-06, + "loss": 17.7183, + "step": 29067 + }, + { + "epoch": 0.5313396823075658, + "grad_norm": 6.437689205093623, + "learning_rate": 4.735766341626437e-06, + "loss": 17.5725, + "step": 29068 + }, + { + "epoch": 0.5313579615040123, + "grad_norm": 5.896080567498599, + "learning_rate": 4.735470744187789e-06, + "loss": 17.1384, + "step": 29069 + }, + { + "epoch": 0.5313762407004589, + "grad_norm": 5.706152266296976, + "learning_rate": 4.735175147676294e-06, + "loss": 17.4059, + "step": 29070 + }, + { + "epoch": 0.5313945198969053, + "grad_norm": 6.558307824820367, + "learning_rate": 4.734879552092986e-06, + "loss": 17.5545, + "step": 29071 + }, + { + "epoch": 0.5314127990933518, + "grad_norm": 6.6419850563836444, + "learning_rate": 4.734583957438903e-06, + "loss": 17.7131, + "step": 29072 + }, + { + "epoch": 0.5314310782897984, + "grad_norm": 5.426127406448526, + "learning_rate": 4.7342883637150796e-06, + "loss": 17.0622, + "step": 29073 + }, + { + "epoch": 0.5314493574862449, + "grad_norm": 6.008733891505808, + "learning_rate": 4.7339927709225524e-06, + "loss": 17.4839, + "step": 29074 + }, + { + "epoch": 0.5314676366826915, + "grad_norm": 5.623738962763688, + "learning_rate": 4.733697179062356e-06, + "loss": 17.3007, + "step": 29075 + }, + { + "epoch": 0.531485915879138, + "grad_norm": 5.616239444795882, + "learning_rate": 4.733401588135531e-06, + "loss": 17.0733, + "step": 29076 + }, + { + "epoch": 0.5315041950755844, + "grad_norm": 7.862884441361074, + "learning_rate": 4.73310599814311e-06, + "loss": 17.9129, + "step": 29077 + }, + { + "epoch": 0.531522474272031, + "grad_norm": 5.009030678254554, + "learning_rate": 4.732810409086127e-06, + "loss": 16.9358, + "step": 29078 + }, + { + "epoch": 0.5315407534684775, + "grad_norm": 5.334322342518181, + "learning_rate": 4.732514820965621e-06, + "loss": 17.1485, + "step": 29079 + }, + { + "epoch": 0.5315590326649241, + "grad_norm": 6.000890507008084, + "learning_rate": 4.73221923378263e-06, + "loss": 17.4316, + "step": 29080 + }, + { + "epoch": 0.5315773118613706, + "grad_norm": 6.659016598136487, + "learning_rate": 4.731923647538184e-06, + "loss": 17.3543, + "step": 29081 + }, + { + "epoch": 0.5315955910578171, + "grad_norm": 6.61833062327974, + "learning_rate": 4.731628062233325e-06, + "loss": 17.6285, + "step": 29082 + }, + { + "epoch": 0.5316138702542637, + "grad_norm": 5.462550395014614, + "learning_rate": 4.731332477869084e-06, + "loss": 16.8888, + "step": 29083 + }, + { + "epoch": 0.5316321494507101, + "grad_norm": 8.559596390802898, + "learning_rate": 4.731036894446499e-06, + "loss": 18.3411, + "step": 29084 + }, + { + "epoch": 0.5316504286471567, + "grad_norm": 5.5304459159182136, + "learning_rate": 4.730741311966609e-06, + "loss": 17.2945, + "step": 29085 + }, + { + "epoch": 0.5316687078436032, + "grad_norm": 6.4386947966121255, + "learning_rate": 4.730445730430447e-06, + "loss": 17.6291, + "step": 29086 + }, + { + "epoch": 0.5316869870400497, + "grad_norm": 6.924546856510563, + "learning_rate": 4.730150149839047e-06, + "loss": 17.9036, + "step": 29087 + }, + { + "epoch": 0.5317052662364963, + "grad_norm": 8.26086054422182, + "learning_rate": 4.729854570193448e-06, + "loss": 18.4739, + "step": 29088 + }, + { + "epoch": 0.5317235454329428, + "grad_norm": 5.911957173509042, + "learning_rate": 4.729558991494685e-06, + "loss": 17.5701, + "step": 29089 + }, + { + "epoch": 0.5317418246293892, + "grad_norm": 7.3927788013912155, + "learning_rate": 4.7292634137437965e-06, + "loss": 17.8532, + "step": 29090 + }, + { + "epoch": 0.5317601038258358, + "grad_norm": 6.669605504379684, + "learning_rate": 4.728967836941816e-06, + "loss": 17.3569, + "step": 29091 + }, + { + "epoch": 0.5317783830222823, + "grad_norm": 7.796781218527904, + "learning_rate": 4.728672261089777e-06, + "loss": 17.879, + "step": 29092 + }, + { + "epoch": 0.5317966622187289, + "grad_norm": 6.581204812453982, + "learning_rate": 4.72837668618872e-06, + "loss": 17.4378, + "step": 29093 + }, + { + "epoch": 0.5318149414151754, + "grad_norm": 5.786141109773239, + "learning_rate": 4.72808111223968e-06, + "loss": 17.3518, + "step": 29094 + }, + { + "epoch": 0.5318332206116219, + "grad_norm": 5.580146707013943, + "learning_rate": 4.72778553924369e-06, + "loss": 17.1563, + "step": 29095 + }, + { + "epoch": 0.5318514998080685, + "grad_norm": 6.7681026442740535, + "learning_rate": 4.72748996720179e-06, + "loss": 17.6506, + "step": 29096 + }, + { + "epoch": 0.5318697790045149, + "grad_norm": 6.325593306072917, + "learning_rate": 4.727194396115013e-06, + "loss": 17.4579, + "step": 29097 + }, + { + "epoch": 0.5318880582009615, + "grad_norm": 7.750348884085031, + "learning_rate": 4.7268988259843945e-06, + "loss": 18.1183, + "step": 29098 + }, + { + "epoch": 0.531906337397408, + "grad_norm": 7.378992994794502, + "learning_rate": 4.7266032568109745e-06, + "loss": 18.222, + "step": 29099 + }, + { + "epoch": 0.5319246165938545, + "grad_norm": 7.214803875822746, + "learning_rate": 4.726307688595787e-06, + "loss": 17.9459, + "step": 29100 + }, + { + "epoch": 0.5319428957903011, + "grad_norm": 6.175586950398687, + "learning_rate": 4.726012121339864e-06, + "loss": 17.5421, + "step": 29101 + }, + { + "epoch": 0.5319611749867476, + "grad_norm": 5.526292871774039, + "learning_rate": 4.725716555044246e-06, + "loss": 17.3515, + "step": 29102 + }, + { + "epoch": 0.5319794541831941, + "grad_norm": 6.173101937782035, + "learning_rate": 4.72542098970997e-06, + "loss": 17.2722, + "step": 29103 + }, + { + "epoch": 0.5319977333796406, + "grad_norm": 7.939720874584466, + "learning_rate": 4.725125425338066e-06, + "loss": 17.6554, + "step": 29104 + }, + { + "epoch": 0.5320160125760871, + "grad_norm": 7.102072818870398, + "learning_rate": 4.724829861929576e-06, + "loss": 18.0799, + "step": 29105 + }, + { + "epoch": 0.5320342917725337, + "grad_norm": 6.193719341303695, + "learning_rate": 4.724534299485532e-06, + "loss": 17.3817, + "step": 29106 + }, + { + "epoch": 0.5320525709689802, + "grad_norm": 6.710891402253766, + "learning_rate": 4.724238738006972e-06, + "loss": 17.5157, + "step": 29107 + }, + { + "epoch": 0.5320708501654268, + "grad_norm": 6.289553488924047, + "learning_rate": 4.723943177494932e-06, + "loss": 17.2921, + "step": 29108 + }, + { + "epoch": 0.5320891293618732, + "grad_norm": 7.320087808991515, + "learning_rate": 4.723647617950446e-06, + "loss": 18.1539, + "step": 29109 + }, + { + "epoch": 0.5321074085583197, + "grad_norm": 5.961258353746868, + "learning_rate": 4.723352059374552e-06, + "loss": 17.329, + "step": 29110 + }, + { + "epoch": 0.5321256877547663, + "grad_norm": 6.051518226625935, + "learning_rate": 4.723056501768285e-06, + "loss": 17.4943, + "step": 29111 + }, + { + "epoch": 0.5321439669512128, + "grad_norm": 6.3104499872196715, + "learning_rate": 4.722760945132679e-06, + "loss": 17.4186, + "step": 29112 + }, + { + "epoch": 0.5321622461476594, + "grad_norm": 6.005834493972101, + "learning_rate": 4.722465389468775e-06, + "loss": 17.5438, + "step": 29113 + }, + { + "epoch": 0.5321805253441059, + "grad_norm": 6.753467752413835, + "learning_rate": 4.722169834777605e-06, + "loss": 18.0328, + "step": 29114 + }, + { + "epoch": 0.5321988045405524, + "grad_norm": 6.127705081696299, + "learning_rate": 4.7218742810602035e-06, + "loss": 17.5204, + "step": 29115 + }, + { + "epoch": 0.5322170837369989, + "grad_norm": 5.364277239290237, + "learning_rate": 4.72157872831761e-06, + "loss": 17.2414, + "step": 29116 + }, + { + "epoch": 0.5322353629334454, + "grad_norm": 4.817707614412788, + "learning_rate": 4.72128317655086e-06, + "loss": 16.9568, + "step": 29117 + }, + { + "epoch": 0.532253642129892, + "grad_norm": 5.5802778958409025, + "learning_rate": 4.720987625760985e-06, + "loss": 17.2708, + "step": 29118 + }, + { + "epoch": 0.5322719213263385, + "grad_norm": 5.350329158205401, + "learning_rate": 4.720692075949027e-06, + "loss": 17.2927, + "step": 29119 + }, + { + "epoch": 0.532290200522785, + "grad_norm": 6.25668171940735, + "learning_rate": 4.720396527116018e-06, + "loss": 17.4023, + "step": 29120 + }, + { + "epoch": 0.5323084797192316, + "grad_norm": 6.244412090560745, + "learning_rate": 4.720100979262995e-06, + "loss": 17.5831, + "step": 29121 + }, + { + "epoch": 0.532326758915678, + "grad_norm": 7.59218018480341, + "learning_rate": 4.719805432390995e-06, + "loss": 17.7089, + "step": 29122 + }, + { + "epoch": 0.5323450381121246, + "grad_norm": 9.935650537819685, + "learning_rate": 4.7195098865010504e-06, + "loss": 18.3908, + "step": 29123 + }, + { + "epoch": 0.5323633173085711, + "grad_norm": 6.495075807465171, + "learning_rate": 4.719214341594201e-06, + "loss": 17.6079, + "step": 29124 + }, + { + "epoch": 0.5323815965050176, + "grad_norm": 7.914936385801918, + "learning_rate": 4.7189187976714804e-06, + "loss": 18.2558, + "step": 29125 + }, + { + "epoch": 0.5323998757014642, + "grad_norm": 6.230754304976618, + "learning_rate": 4.718623254733924e-06, + "loss": 17.5205, + "step": 29126 + }, + { + "epoch": 0.5324181548979107, + "grad_norm": 5.421609703568032, + "learning_rate": 4.71832771278257e-06, + "loss": 17.2301, + "step": 29127 + }, + { + "epoch": 0.5324364340943573, + "grad_norm": 6.478292233647558, + "learning_rate": 4.718032171818453e-06, + "loss": 17.5168, + "step": 29128 + }, + { + "epoch": 0.5324547132908037, + "grad_norm": 6.688539822736038, + "learning_rate": 4.717736631842608e-06, + "loss": 17.6626, + "step": 29129 + }, + { + "epoch": 0.5324729924872502, + "grad_norm": 6.928218580494495, + "learning_rate": 4.717441092856072e-06, + "loss": 18.171, + "step": 29130 + }, + { + "epoch": 0.5324912716836968, + "grad_norm": 6.56043645920163, + "learning_rate": 4.7171455548598816e-06, + "loss": 17.7995, + "step": 29131 + }, + { + "epoch": 0.5325095508801433, + "grad_norm": 6.150982416930782, + "learning_rate": 4.7168500178550695e-06, + "loss": 17.4851, + "step": 29132 + }, + { + "epoch": 0.5325278300765899, + "grad_norm": 6.320931657999938, + "learning_rate": 4.716554481842674e-06, + "loss": 17.5192, + "step": 29133 + }, + { + "epoch": 0.5325461092730364, + "grad_norm": 4.759395082450143, + "learning_rate": 4.716258946823732e-06, + "loss": 16.7456, + "step": 29134 + }, + { + "epoch": 0.5325643884694828, + "grad_norm": 7.486771062381638, + "learning_rate": 4.715963412799276e-06, + "loss": 17.7593, + "step": 29135 + }, + { + "epoch": 0.5325826676659294, + "grad_norm": 5.549348222321404, + "learning_rate": 4.715667879770345e-06, + "loss": 17.1854, + "step": 29136 + }, + { + "epoch": 0.5326009468623759, + "grad_norm": 5.915789294304772, + "learning_rate": 4.715372347737971e-06, + "loss": 17.4329, + "step": 29137 + }, + { + "epoch": 0.5326192260588225, + "grad_norm": 9.867560162165196, + "learning_rate": 4.715076816703194e-06, + "loss": 17.8107, + "step": 29138 + }, + { + "epoch": 0.532637505255269, + "grad_norm": 6.532373079707325, + "learning_rate": 4.714781286667048e-06, + "loss": 17.5884, + "step": 29139 + }, + { + "epoch": 0.5326557844517155, + "grad_norm": 5.020319197667657, + "learning_rate": 4.714485757630568e-06, + "loss": 16.8819, + "step": 29140 + }, + { + "epoch": 0.532674063648162, + "grad_norm": 7.2737297847563696, + "learning_rate": 4.714190229594792e-06, + "loss": 17.9487, + "step": 29141 + }, + { + "epoch": 0.5326923428446085, + "grad_norm": 6.0278678970210535, + "learning_rate": 4.713894702560754e-06, + "loss": 17.3564, + "step": 29142 + }, + { + "epoch": 0.5327106220410551, + "grad_norm": 8.863413400127481, + "learning_rate": 4.713599176529488e-06, + "loss": 18.677, + "step": 29143 + }, + { + "epoch": 0.5327289012375016, + "grad_norm": 6.13073041808526, + "learning_rate": 4.713303651502036e-06, + "loss": 17.609, + "step": 29144 + }, + { + "epoch": 0.5327471804339481, + "grad_norm": 7.0548541991269555, + "learning_rate": 4.713008127479429e-06, + "loss": 17.8444, + "step": 29145 + }, + { + "epoch": 0.5327654596303947, + "grad_norm": 6.3594455768943385, + "learning_rate": 4.712712604462701e-06, + "loss": 17.6913, + "step": 29146 + }, + { + "epoch": 0.5327837388268412, + "grad_norm": 7.358024674587688, + "learning_rate": 4.712417082452892e-06, + "loss": 18.1915, + "step": 29147 + }, + { + "epoch": 0.5328020180232877, + "grad_norm": 7.27916880003968, + "learning_rate": 4.7121215614510365e-06, + "loss": 17.7422, + "step": 29148 + }, + { + "epoch": 0.5328202972197342, + "grad_norm": 6.10328131209803, + "learning_rate": 4.711826041458169e-06, + "loss": 17.4295, + "step": 29149 + }, + { + "epoch": 0.5328385764161807, + "grad_norm": 6.993145169384505, + "learning_rate": 4.711530522475327e-06, + "loss": 17.9042, + "step": 29150 + }, + { + "epoch": 0.5328568556126273, + "grad_norm": 6.224101834741438, + "learning_rate": 4.711235004503544e-06, + "loss": 17.4397, + "step": 29151 + }, + { + "epoch": 0.5328751348090738, + "grad_norm": 6.784740796828709, + "learning_rate": 4.7109394875438585e-06, + "loss": 17.503, + "step": 29152 + }, + { + "epoch": 0.5328934140055204, + "grad_norm": 5.454179536820799, + "learning_rate": 4.710643971597306e-06, + "loss": 17.3178, + "step": 29153 + }, + { + "epoch": 0.5329116932019669, + "grad_norm": 5.419832965479272, + "learning_rate": 4.710348456664919e-06, + "loss": 17.1325, + "step": 29154 + }, + { + "epoch": 0.5329299723984133, + "grad_norm": 6.468622716265971, + "learning_rate": 4.710052942747738e-06, + "loss": 17.3501, + "step": 29155 + }, + { + "epoch": 0.5329482515948599, + "grad_norm": 5.677443523185989, + "learning_rate": 4.709757429846795e-06, + "loss": 17.5229, + "step": 29156 + }, + { + "epoch": 0.5329665307913064, + "grad_norm": 6.436444698835134, + "learning_rate": 4.709461917963126e-06, + "loss": 17.6191, + "step": 29157 + }, + { + "epoch": 0.5329848099877529, + "grad_norm": 6.253066463006159, + "learning_rate": 4.709166407097769e-06, + "loss": 17.4264, + "step": 29158 + }, + { + "epoch": 0.5330030891841995, + "grad_norm": 5.261050823052316, + "learning_rate": 4.70887089725176e-06, + "loss": 17.2113, + "step": 29159 + }, + { + "epoch": 0.533021368380646, + "grad_norm": 6.257819167140041, + "learning_rate": 4.708575388426131e-06, + "loss": 17.4203, + "step": 29160 + }, + { + "epoch": 0.5330396475770925, + "grad_norm": 6.513401227549751, + "learning_rate": 4.70827988062192e-06, + "loss": 17.6654, + "step": 29161 + }, + { + "epoch": 0.533057926773539, + "grad_norm": 5.635081993814778, + "learning_rate": 4.707984373840164e-06, + "loss": 17.2523, + "step": 29162 + }, + { + "epoch": 0.5330762059699855, + "grad_norm": 8.04649228287663, + "learning_rate": 4.707688868081896e-06, + "loss": 17.7352, + "step": 29163 + }, + { + "epoch": 0.5330944851664321, + "grad_norm": 5.31590699965306, + "learning_rate": 4.707393363348154e-06, + "loss": 17.1076, + "step": 29164 + }, + { + "epoch": 0.5331127643628786, + "grad_norm": 5.554912643104351, + "learning_rate": 4.707097859639972e-06, + "loss": 17.2083, + "step": 29165 + }, + { + "epoch": 0.5331310435593252, + "grad_norm": 7.071426242434656, + "learning_rate": 4.7068023569583865e-06, + "loss": 17.7656, + "step": 29166 + }, + { + "epoch": 0.5331493227557716, + "grad_norm": 5.76104893770655, + "learning_rate": 4.706506855304435e-06, + "loss": 17.2488, + "step": 29167 + }, + { + "epoch": 0.5331676019522181, + "grad_norm": 6.712165285165031, + "learning_rate": 4.7062113546791496e-06, + "loss": 17.3628, + "step": 29168 + }, + { + "epoch": 0.5331858811486647, + "grad_norm": 6.7751911834935195, + "learning_rate": 4.70591585508357e-06, + "loss": 17.6679, + "step": 29169 + }, + { + "epoch": 0.5332041603451112, + "grad_norm": 6.4036925971669225, + "learning_rate": 4.705620356518729e-06, + "loss": 17.7652, + "step": 29170 + }, + { + "epoch": 0.5332224395415578, + "grad_norm": 5.620878637163123, + "learning_rate": 4.705324858985662e-06, + "loss": 17.3258, + "step": 29171 + }, + { + "epoch": 0.5332407187380043, + "grad_norm": 6.484494676186936, + "learning_rate": 4.705029362485407e-06, + "loss": 17.6879, + "step": 29172 + }, + { + "epoch": 0.5332589979344508, + "grad_norm": 6.126985357715734, + "learning_rate": 4.704733867018999e-06, + "loss": 17.608, + "step": 29173 + }, + { + "epoch": 0.5332772771308973, + "grad_norm": 6.061217895217802, + "learning_rate": 4.704438372587471e-06, + "loss": 17.3942, + "step": 29174 + }, + { + "epoch": 0.5332955563273438, + "grad_norm": 6.91946908138377, + "learning_rate": 4.704142879191862e-06, + "loss": 17.885, + "step": 29175 + }, + { + "epoch": 0.5333138355237904, + "grad_norm": 5.638756120268821, + "learning_rate": 4.703847386833207e-06, + "loss": 17.4492, + "step": 29176 + }, + { + "epoch": 0.5333321147202369, + "grad_norm": 7.360688988167568, + "learning_rate": 4.70355189551254e-06, + "loss": 17.8167, + "step": 29177 + }, + { + "epoch": 0.5333503939166834, + "grad_norm": 5.554017683149972, + "learning_rate": 4.7032564052309e-06, + "loss": 17.3239, + "step": 29178 + }, + { + "epoch": 0.53336867311313, + "grad_norm": 7.220440437032762, + "learning_rate": 4.7029609159893196e-06, + "loss": 17.9182, + "step": 29179 + }, + { + "epoch": 0.5333869523095764, + "grad_norm": 5.617028454349031, + "learning_rate": 4.702665427788833e-06, + "loss": 17.1134, + "step": 29180 + }, + { + "epoch": 0.533405231506023, + "grad_norm": 7.210935894788144, + "learning_rate": 4.702369940630482e-06, + "loss": 17.7359, + "step": 29181 + }, + { + "epoch": 0.5334235107024695, + "grad_norm": 5.442315779787362, + "learning_rate": 4.702074454515296e-06, + "loss": 17.3984, + "step": 29182 + }, + { + "epoch": 0.533441789898916, + "grad_norm": 5.732815744973244, + "learning_rate": 4.701778969444315e-06, + "loss": 17.2546, + "step": 29183 + }, + { + "epoch": 0.5334600690953626, + "grad_norm": 6.875816693462245, + "learning_rate": 4.701483485418571e-06, + "loss": 17.7802, + "step": 29184 + }, + { + "epoch": 0.5334783482918091, + "grad_norm": 5.564279309972907, + "learning_rate": 4.701188002439101e-06, + "loss": 17.452, + "step": 29185 + }, + { + "epoch": 0.5334966274882557, + "grad_norm": 5.306153078862511, + "learning_rate": 4.700892520506944e-06, + "loss": 17.2651, + "step": 29186 + }, + { + "epoch": 0.5335149066847021, + "grad_norm": 14.53928973098706, + "learning_rate": 4.700597039623133e-06, + "loss": 17.6077, + "step": 29187 + }, + { + "epoch": 0.5335331858811486, + "grad_norm": 6.369984681166308, + "learning_rate": 4.7003015597887e-06, + "loss": 17.5793, + "step": 29188 + }, + { + "epoch": 0.5335514650775952, + "grad_norm": 7.481302517731318, + "learning_rate": 4.700006081004685e-06, + "loss": 18.1637, + "step": 29189 + }, + { + "epoch": 0.5335697442740417, + "grad_norm": 6.582023609889075, + "learning_rate": 4.699710603272125e-06, + "loss": 17.6102, + "step": 29190 + }, + { + "epoch": 0.5335880234704883, + "grad_norm": 6.980630320109185, + "learning_rate": 4.699415126592051e-06, + "loss": 17.8523, + "step": 29191 + }, + { + "epoch": 0.5336063026669348, + "grad_norm": 5.382278433913923, + "learning_rate": 4.699119650965502e-06, + "loss": 16.9737, + "step": 29192 + }, + { + "epoch": 0.5336245818633812, + "grad_norm": 5.500432465719919, + "learning_rate": 4.698824176393512e-06, + "loss": 17.2951, + "step": 29193 + }, + { + "epoch": 0.5336428610598278, + "grad_norm": 5.730394696563894, + "learning_rate": 4.698528702877116e-06, + "loss": 17.4959, + "step": 29194 + }, + { + "epoch": 0.5336611402562743, + "grad_norm": 5.30363362092714, + "learning_rate": 4.6982332304173524e-06, + "loss": 17.0591, + "step": 29195 + }, + { + "epoch": 0.5336794194527209, + "grad_norm": 5.358670228367869, + "learning_rate": 4.697937759015254e-06, + "loss": 17.1578, + "step": 29196 + }, + { + "epoch": 0.5336976986491674, + "grad_norm": 8.413072866315387, + "learning_rate": 4.697642288671858e-06, + "loss": 18.0521, + "step": 29197 + }, + { + "epoch": 0.5337159778456139, + "grad_norm": 4.688684094204592, + "learning_rate": 4.697346819388201e-06, + "loss": 16.8042, + "step": 29198 + }, + { + "epoch": 0.5337342570420605, + "grad_norm": 6.053872229728858, + "learning_rate": 4.697051351165314e-06, + "loss": 17.4931, + "step": 29199 + }, + { + "epoch": 0.5337525362385069, + "grad_norm": 5.765015298125997, + "learning_rate": 4.6967558840042395e-06, + "loss": 17.0297, + "step": 29200 + }, + { + "epoch": 0.5337708154349535, + "grad_norm": 5.818301652851261, + "learning_rate": 4.696460417906007e-06, + "loss": 17.0998, + "step": 29201 + }, + { + "epoch": 0.5337890946314, + "grad_norm": 5.126136644576303, + "learning_rate": 4.696164952871654e-06, + "loss": 17.2133, + "step": 29202 + }, + { + "epoch": 0.5338073738278465, + "grad_norm": 5.614543121003322, + "learning_rate": 4.695869488902218e-06, + "loss": 17.2043, + "step": 29203 + }, + { + "epoch": 0.5338256530242931, + "grad_norm": 5.950383837368383, + "learning_rate": 4.695574025998733e-06, + "loss": 17.3344, + "step": 29204 + }, + { + "epoch": 0.5338439322207396, + "grad_norm": 6.149957103878116, + "learning_rate": 4.6952785641622326e-06, + "loss": 17.3925, + "step": 29205 + }, + { + "epoch": 0.5338622114171861, + "grad_norm": 5.709616959971137, + "learning_rate": 4.694983103393756e-06, + "loss": 17.1373, + "step": 29206 + }, + { + "epoch": 0.5338804906136326, + "grad_norm": 7.2074863095903705, + "learning_rate": 4.694687643694338e-06, + "loss": 17.7656, + "step": 29207 + }, + { + "epoch": 0.5338987698100791, + "grad_norm": 6.052629056621195, + "learning_rate": 4.694392185065011e-06, + "loss": 17.3853, + "step": 29208 + }, + { + "epoch": 0.5339170490065257, + "grad_norm": 6.6373186264798445, + "learning_rate": 4.694096727506815e-06, + "loss": 17.3346, + "step": 29209 + }, + { + "epoch": 0.5339353282029722, + "grad_norm": 6.387124795224236, + "learning_rate": 4.693801271020783e-06, + "loss": 17.9033, + "step": 29210 + }, + { + "epoch": 0.5339536073994188, + "grad_norm": 6.223251258431593, + "learning_rate": 4.693505815607949e-06, + "loss": 17.4301, + "step": 29211 + }, + { + "epoch": 0.5339718865958653, + "grad_norm": 6.576451745258645, + "learning_rate": 4.693210361269352e-06, + "loss": 17.6064, + "step": 29212 + }, + { + "epoch": 0.5339901657923117, + "grad_norm": 6.325741136202451, + "learning_rate": 4.692914908006026e-06, + "loss": 17.3868, + "step": 29213 + }, + { + "epoch": 0.5340084449887583, + "grad_norm": 5.453538743658066, + "learning_rate": 4.692619455819008e-06, + "loss": 17.3604, + "step": 29214 + }, + { + "epoch": 0.5340267241852048, + "grad_norm": 6.395024201833934, + "learning_rate": 4.69232400470933e-06, + "loss": 17.4291, + "step": 29215 + }, + { + "epoch": 0.5340450033816514, + "grad_norm": 5.261791959142509, + "learning_rate": 4.692028554678029e-06, + "loss": 17.1114, + "step": 29216 + }, + { + "epoch": 0.5340632825780979, + "grad_norm": 6.792270631815648, + "learning_rate": 4.691733105726144e-06, + "loss": 17.5678, + "step": 29217 + }, + { + "epoch": 0.5340815617745444, + "grad_norm": 6.80209010429963, + "learning_rate": 4.691437657854707e-06, + "loss": 17.6784, + "step": 29218 + }, + { + "epoch": 0.534099840970991, + "grad_norm": 6.498573399814959, + "learning_rate": 4.691142211064753e-06, + "loss": 17.5499, + "step": 29219 + }, + { + "epoch": 0.5341181201674374, + "grad_norm": 6.3261239509177045, + "learning_rate": 4.690846765357319e-06, + "loss": 17.424, + "step": 29220 + }, + { + "epoch": 0.534136399363884, + "grad_norm": 6.458664907486099, + "learning_rate": 4.690551320733442e-06, + "loss": 17.4369, + "step": 29221 + }, + { + "epoch": 0.5341546785603305, + "grad_norm": 6.926856980821773, + "learning_rate": 4.690255877194152e-06, + "loss": 17.9459, + "step": 29222 + }, + { + "epoch": 0.534172957756777, + "grad_norm": 5.482993153007825, + "learning_rate": 4.6899604347404925e-06, + "loss": 17.0677, + "step": 29223 + }, + { + "epoch": 0.5341912369532236, + "grad_norm": 5.95549853159849, + "learning_rate": 4.689664993373493e-06, + "loss": 17.4105, + "step": 29224 + }, + { + "epoch": 0.53420951614967, + "grad_norm": 5.7026105701250716, + "learning_rate": 4.689369553094189e-06, + "loss": 17.3671, + "step": 29225 + }, + { + "epoch": 0.5342277953461165, + "grad_norm": 7.3686976538510685, + "learning_rate": 4.689074113903621e-06, + "loss": 17.7183, + "step": 29226 + }, + { + "epoch": 0.5342460745425631, + "grad_norm": 6.252171421143555, + "learning_rate": 4.688778675802818e-06, + "loss": 17.5999, + "step": 29227 + }, + { + "epoch": 0.5342643537390096, + "grad_norm": 5.786479766304065, + "learning_rate": 4.688483238792822e-06, + "loss": 17.4693, + "step": 29228 + }, + { + "epoch": 0.5342826329354562, + "grad_norm": 6.364261824826348, + "learning_rate": 4.688187802874663e-06, + "loss": 17.3688, + "step": 29229 + }, + { + "epoch": 0.5343009121319027, + "grad_norm": 6.8465039000472165, + "learning_rate": 4.6878923680493785e-06, + "loss": 17.9025, + "step": 29230 + }, + { + "epoch": 0.5343191913283492, + "grad_norm": 7.833967474762711, + "learning_rate": 4.687596934318006e-06, + "loss": 17.7981, + "step": 29231 + }, + { + "epoch": 0.5343374705247957, + "grad_norm": 6.664933874321917, + "learning_rate": 4.687301501681579e-06, + "loss": 17.7476, + "step": 29232 + }, + { + "epoch": 0.5343557497212422, + "grad_norm": 6.091551360699175, + "learning_rate": 4.687006070141131e-06, + "loss": 17.5468, + "step": 29233 + }, + { + "epoch": 0.5343740289176888, + "grad_norm": 6.8923573712614585, + "learning_rate": 4.686710639697701e-06, + "loss": 17.692, + "step": 29234 + }, + { + "epoch": 0.5343923081141353, + "grad_norm": 6.937381768672017, + "learning_rate": 4.686415210352324e-06, + "loss": 17.6177, + "step": 29235 + }, + { + "epoch": 0.5344105873105818, + "grad_norm": 6.084408330732266, + "learning_rate": 4.6861197821060315e-06, + "loss": 17.4911, + "step": 29236 + }, + { + "epoch": 0.5344288665070284, + "grad_norm": 7.44522130052418, + "learning_rate": 4.685824354959865e-06, + "loss": 17.7589, + "step": 29237 + }, + { + "epoch": 0.5344471457034748, + "grad_norm": 4.454374701917882, + "learning_rate": 4.685528928914855e-06, + "loss": 16.7307, + "step": 29238 + }, + { + "epoch": 0.5344654248999214, + "grad_norm": 6.715146535940895, + "learning_rate": 4.685233503972039e-06, + "loss": 17.6364, + "step": 29239 + }, + { + "epoch": 0.5344837040963679, + "grad_norm": 7.0344452900829735, + "learning_rate": 4.684938080132454e-06, + "loss": 17.7868, + "step": 29240 + }, + { + "epoch": 0.5345019832928144, + "grad_norm": 6.1756329523202345, + "learning_rate": 4.684642657397132e-06, + "loss": 17.5986, + "step": 29241 + }, + { + "epoch": 0.534520262489261, + "grad_norm": 5.955811314738884, + "learning_rate": 4.684347235767111e-06, + "loss": 17.3416, + "step": 29242 + }, + { + "epoch": 0.5345385416857075, + "grad_norm": 5.508464569377075, + "learning_rate": 4.6840518152434245e-06, + "loss": 17.2384, + "step": 29243 + }, + { + "epoch": 0.5345568208821541, + "grad_norm": 6.255405359310729, + "learning_rate": 4.6837563958271085e-06, + "loss": 17.5851, + "step": 29244 + }, + { + "epoch": 0.5345751000786005, + "grad_norm": 5.702873854884792, + "learning_rate": 4.6834609775192e-06, + "loss": 17.2959, + "step": 29245 + }, + { + "epoch": 0.534593379275047, + "grad_norm": 5.607666762210169, + "learning_rate": 4.683165560320735e-06, + "loss": 17.1606, + "step": 29246 + }, + { + "epoch": 0.5346116584714936, + "grad_norm": 5.07753553773357, + "learning_rate": 4.682870144232744e-06, + "loss": 17.0299, + "step": 29247 + }, + { + "epoch": 0.5346299376679401, + "grad_norm": 5.812528319347235, + "learning_rate": 4.682574729256266e-06, + "loss": 17.3234, + "step": 29248 + }, + { + "epoch": 0.5346482168643867, + "grad_norm": 5.261893376053922, + "learning_rate": 4.682279315392339e-06, + "loss": 17.1088, + "step": 29249 + }, + { + "epoch": 0.5346664960608332, + "grad_norm": 8.080050959436333, + "learning_rate": 4.681983902641992e-06, + "loss": 17.9734, + "step": 29250 + }, + { + "epoch": 0.5346847752572796, + "grad_norm": 6.2807987098320295, + "learning_rate": 4.681688491006267e-06, + "loss": 17.4539, + "step": 29251 + }, + { + "epoch": 0.5347030544537262, + "grad_norm": 7.599171447651194, + "learning_rate": 4.681393080486194e-06, + "loss": 18.1529, + "step": 29252 + }, + { + "epoch": 0.5347213336501727, + "grad_norm": 6.1447741984974575, + "learning_rate": 4.68109767108281e-06, + "loss": 17.4327, + "step": 29253 + }, + { + "epoch": 0.5347396128466193, + "grad_norm": 6.389357331246572, + "learning_rate": 4.680802262797153e-06, + "loss": 17.5275, + "step": 29254 + }, + { + "epoch": 0.5347578920430658, + "grad_norm": 5.745397360469146, + "learning_rate": 4.6805068556302555e-06, + "loss": 17.2144, + "step": 29255 + }, + { + "epoch": 0.5347761712395123, + "grad_norm": 5.640009675799337, + "learning_rate": 4.680211449583153e-06, + "loss": 17.225, + "step": 29256 + }, + { + "epoch": 0.5347944504359589, + "grad_norm": 6.701663513066303, + "learning_rate": 4.679916044656883e-06, + "loss": 17.5104, + "step": 29257 + }, + { + "epoch": 0.5348127296324053, + "grad_norm": 6.758906431610483, + "learning_rate": 4.679620640852477e-06, + "loss": 17.8155, + "step": 29258 + }, + { + "epoch": 0.5348310088288519, + "grad_norm": 5.804176758180699, + "learning_rate": 4.679325238170977e-06, + "loss": 17.1583, + "step": 29259 + }, + { + "epoch": 0.5348492880252984, + "grad_norm": 6.456709927031515, + "learning_rate": 4.679029836613411e-06, + "loss": 17.827, + "step": 29260 + }, + { + "epoch": 0.5348675672217449, + "grad_norm": 5.544919597973098, + "learning_rate": 4.678734436180818e-06, + "loss": 17.2689, + "step": 29261 + }, + { + "epoch": 0.5348858464181915, + "grad_norm": 6.694230767897984, + "learning_rate": 4.678439036874234e-06, + "loss": 17.6911, + "step": 29262 + }, + { + "epoch": 0.534904125614638, + "grad_norm": 7.383500252683421, + "learning_rate": 4.6781436386946935e-06, + "loss": 17.6856, + "step": 29263 + }, + { + "epoch": 0.5349224048110846, + "grad_norm": 6.969245483848144, + "learning_rate": 4.6778482416432295e-06, + "loss": 17.3148, + "step": 29264 + }, + { + "epoch": 0.534940684007531, + "grad_norm": 6.455067519609029, + "learning_rate": 4.677552845720881e-06, + "loss": 17.6421, + "step": 29265 + }, + { + "epoch": 0.5349589632039775, + "grad_norm": 5.647349330468523, + "learning_rate": 4.677257450928682e-06, + "loss": 17.0891, + "step": 29266 + }, + { + "epoch": 0.5349772424004241, + "grad_norm": 5.590856780988894, + "learning_rate": 4.676962057267667e-06, + "loss": 17.1274, + "step": 29267 + }, + { + "epoch": 0.5349955215968706, + "grad_norm": 8.717455084846353, + "learning_rate": 4.676666664738873e-06, + "loss": 17.8971, + "step": 29268 + }, + { + "epoch": 0.5350138007933172, + "grad_norm": 7.189686572730251, + "learning_rate": 4.676371273343333e-06, + "loss": 17.9889, + "step": 29269 + }, + { + "epoch": 0.5350320799897637, + "grad_norm": 7.806460932605605, + "learning_rate": 4.676075883082083e-06, + "loss": 18.0796, + "step": 29270 + }, + { + "epoch": 0.5350503591862101, + "grad_norm": 7.254536934137691, + "learning_rate": 4.675780493956161e-06, + "loss": 17.829, + "step": 29271 + }, + { + "epoch": 0.5350686383826567, + "grad_norm": 8.41694626434146, + "learning_rate": 4.6754851059666e-06, + "loss": 18.4648, + "step": 29272 + }, + { + "epoch": 0.5350869175791032, + "grad_norm": 5.5703289626038055, + "learning_rate": 4.6751897191144335e-06, + "loss": 17.2874, + "step": 29273 + }, + { + "epoch": 0.5351051967755498, + "grad_norm": 5.408001157120569, + "learning_rate": 4.6748943334007e-06, + "loss": 17.1824, + "step": 29274 + }, + { + "epoch": 0.5351234759719963, + "grad_norm": 5.930717778294631, + "learning_rate": 4.674598948826432e-06, + "loss": 16.822, + "step": 29275 + }, + { + "epoch": 0.5351417551684428, + "grad_norm": 6.642122104741229, + "learning_rate": 4.67430356539267e-06, + "loss": 17.3118, + "step": 29276 + }, + { + "epoch": 0.5351600343648893, + "grad_norm": 6.719707459775767, + "learning_rate": 4.674008183100445e-06, + "loss": 17.6993, + "step": 29277 + }, + { + "epoch": 0.5351783135613358, + "grad_norm": 6.797813782800309, + "learning_rate": 4.67371280195079e-06, + "loss": 17.8031, + "step": 29278 + }, + { + "epoch": 0.5351965927577824, + "grad_norm": 5.928483262475636, + "learning_rate": 4.673417421944746e-06, + "loss": 17.1513, + "step": 29279 + }, + { + "epoch": 0.5352148719542289, + "grad_norm": 5.708362227727168, + "learning_rate": 4.673122043083345e-06, + "loss": 17.2476, + "step": 29280 + }, + { + "epoch": 0.5352331511506754, + "grad_norm": 6.456114185780829, + "learning_rate": 4.6728266653676225e-06, + "loss": 17.3815, + "step": 29281 + }, + { + "epoch": 0.535251430347122, + "grad_norm": 6.649650343554105, + "learning_rate": 4.6725312887986154e-06, + "loss": 17.2747, + "step": 29282 + }, + { + "epoch": 0.5352697095435685, + "grad_norm": 5.574568616832653, + "learning_rate": 4.672235913377357e-06, + "loss": 17.35, + "step": 29283 + }, + { + "epoch": 0.535287988740015, + "grad_norm": 5.443705554389813, + "learning_rate": 4.671940539104881e-06, + "loss": 17.3226, + "step": 29284 + }, + { + "epoch": 0.5353062679364615, + "grad_norm": 5.73942444897739, + "learning_rate": 4.6716451659822284e-06, + "loss": 17.2158, + "step": 29285 + }, + { + "epoch": 0.535324547132908, + "grad_norm": 6.801894375705081, + "learning_rate": 4.67134979401043e-06, + "loss": 17.5671, + "step": 29286 + }, + { + "epoch": 0.5353428263293546, + "grad_norm": 6.290705725065143, + "learning_rate": 4.671054423190521e-06, + "loss": 17.4168, + "step": 29287 + }, + { + "epoch": 0.5353611055258011, + "grad_norm": 5.743885114201012, + "learning_rate": 4.670759053523538e-06, + "loss": 17.2939, + "step": 29288 + }, + { + "epoch": 0.5353793847222477, + "grad_norm": 5.7248347951083245, + "learning_rate": 4.670463685010514e-06, + "loss": 17.2367, + "step": 29289 + }, + { + "epoch": 0.5353976639186941, + "grad_norm": 7.111293871021303, + "learning_rate": 4.670168317652491e-06, + "loss": 17.7536, + "step": 29290 + }, + { + "epoch": 0.5354159431151406, + "grad_norm": 8.535309486135604, + "learning_rate": 4.669872951450497e-06, + "loss": 18.1195, + "step": 29291 + }, + { + "epoch": 0.5354342223115872, + "grad_norm": 5.840101600532595, + "learning_rate": 4.669577586405568e-06, + "loss": 17.271, + "step": 29292 + }, + { + "epoch": 0.5354525015080337, + "grad_norm": 6.973643897993268, + "learning_rate": 4.669282222518743e-06, + "loss": 17.6406, + "step": 29293 + }, + { + "epoch": 0.5354707807044802, + "grad_norm": 6.14925160338331, + "learning_rate": 4.6689868597910555e-06, + "loss": 17.2322, + "step": 29294 + }, + { + "epoch": 0.5354890599009268, + "grad_norm": 6.997981700307357, + "learning_rate": 4.668691498223537e-06, + "loss": 17.5883, + "step": 29295 + }, + { + "epoch": 0.5355073390973732, + "grad_norm": 7.53567212091816, + "learning_rate": 4.668396137817231e-06, + "loss": 17.8659, + "step": 29296 + }, + { + "epoch": 0.5355256182938198, + "grad_norm": 5.8336664721152065, + "learning_rate": 4.668100778573164e-06, + "loss": 17.2724, + "step": 29297 + }, + { + "epoch": 0.5355438974902663, + "grad_norm": 5.951920364049022, + "learning_rate": 4.667805420492375e-06, + "loss": 17.2534, + "step": 29298 + }, + { + "epoch": 0.5355621766867128, + "grad_norm": 5.349078049617566, + "learning_rate": 4.6675100635759015e-06, + "loss": 17.33, + "step": 29299 + }, + { + "epoch": 0.5355804558831594, + "grad_norm": 5.181484811256063, + "learning_rate": 4.667214707824776e-06, + "loss": 16.9174, + "step": 29300 + }, + { + "epoch": 0.5355987350796059, + "grad_norm": 8.621212699436843, + "learning_rate": 4.6669193532400326e-06, + "loss": 17.8028, + "step": 29301 + }, + { + "epoch": 0.5356170142760525, + "grad_norm": 6.008305903796612, + "learning_rate": 4.666623999822708e-06, + "loss": 17.0339, + "step": 29302 + }, + { + "epoch": 0.5356352934724989, + "grad_norm": 7.774715545069141, + "learning_rate": 4.666328647573837e-06, + "loss": 17.7492, + "step": 29303 + }, + { + "epoch": 0.5356535726689454, + "grad_norm": 6.096256932202169, + "learning_rate": 4.666033296494457e-06, + "loss": 17.556, + "step": 29304 + }, + { + "epoch": 0.535671851865392, + "grad_norm": 6.884931342989107, + "learning_rate": 4.665737946585602e-06, + "loss": 17.2523, + "step": 29305 + }, + { + "epoch": 0.5356901310618385, + "grad_norm": 6.185112709333423, + "learning_rate": 4.665442597848303e-06, + "loss": 17.6004, + "step": 29306 + }, + { + "epoch": 0.5357084102582851, + "grad_norm": 7.236608712566107, + "learning_rate": 4.665147250283601e-06, + "loss": 17.9588, + "step": 29307 + }, + { + "epoch": 0.5357266894547316, + "grad_norm": 5.671268378623509, + "learning_rate": 4.6648519038925285e-06, + "loss": 17.0656, + "step": 29308 + }, + { + "epoch": 0.535744968651178, + "grad_norm": 8.802022106221187, + "learning_rate": 4.66455655867612e-06, + "loss": 17.9557, + "step": 29309 + }, + { + "epoch": 0.5357632478476246, + "grad_norm": 5.864360534832958, + "learning_rate": 4.664261214635414e-06, + "loss": 17.3295, + "step": 29310 + }, + { + "epoch": 0.5357815270440711, + "grad_norm": 7.331905969731185, + "learning_rate": 4.663965871771441e-06, + "loss": 18.0535, + "step": 29311 + }, + { + "epoch": 0.5357998062405177, + "grad_norm": 7.08138413712087, + "learning_rate": 4.663670530085239e-06, + "loss": 17.7963, + "step": 29312 + }, + { + "epoch": 0.5358180854369642, + "grad_norm": 5.424243891684073, + "learning_rate": 4.663375189577843e-06, + "loss": 17.1795, + "step": 29313 + }, + { + "epoch": 0.5358363646334107, + "grad_norm": 6.239865055155216, + "learning_rate": 4.66307985025029e-06, + "loss": 17.2509, + "step": 29314 + }, + { + "epoch": 0.5358546438298573, + "grad_norm": 5.735952050430516, + "learning_rate": 4.6627845121036084e-06, + "loss": 17.1316, + "step": 29315 + }, + { + "epoch": 0.5358729230263037, + "grad_norm": 5.565925724658268, + "learning_rate": 4.66248917513884e-06, + "loss": 17.1876, + "step": 29316 + }, + { + "epoch": 0.5358912022227503, + "grad_norm": 6.106526816844541, + "learning_rate": 4.662193839357019e-06, + "loss": 17.2983, + "step": 29317 + }, + { + "epoch": 0.5359094814191968, + "grad_norm": 6.100422188196055, + "learning_rate": 4.661898504759176e-06, + "loss": 17.3339, + "step": 29318 + }, + { + "epoch": 0.5359277606156433, + "grad_norm": 9.067345685594086, + "learning_rate": 4.661603171346352e-06, + "loss": 18.2016, + "step": 29319 + }, + { + "epoch": 0.5359460398120899, + "grad_norm": 5.759043408835573, + "learning_rate": 4.661307839119579e-06, + "loss": 17.36, + "step": 29320 + }, + { + "epoch": 0.5359643190085364, + "grad_norm": 7.074325023844372, + "learning_rate": 4.661012508079892e-06, + "loss": 17.9006, + "step": 29321 + }, + { + "epoch": 0.535982598204983, + "grad_norm": 4.682188078011207, + "learning_rate": 4.660717178228328e-06, + "loss": 16.8074, + "step": 29322 + }, + { + "epoch": 0.5360008774014294, + "grad_norm": 5.932596754632673, + "learning_rate": 4.660421849565919e-06, + "loss": 17.3143, + "step": 29323 + }, + { + "epoch": 0.5360191565978759, + "grad_norm": 5.032828189967548, + "learning_rate": 4.660126522093704e-06, + "loss": 17.1526, + "step": 29324 + }, + { + "epoch": 0.5360374357943225, + "grad_norm": 5.933551125599635, + "learning_rate": 4.659831195812716e-06, + "loss": 17.634, + "step": 29325 + }, + { + "epoch": 0.536055714990769, + "grad_norm": 5.250106738628413, + "learning_rate": 4.659535870723988e-06, + "loss": 16.9836, + "step": 29326 + }, + { + "epoch": 0.5360739941872156, + "grad_norm": 7.230189637800085, + "learning_rate": 4.65924054682856e-06, + "loss": 17.9238, + "step": 29327 + }, + { + "epoch": 0.536092273383662, + "grad_norm": 7.002282883504601, + "learning_rate": 4.658945224127462e-06, + "loss": 17.5911, + "step": 29328 + }, + { + "epoch": 0.5361105525801085, + "grad_norm": 5.822483441629879, + "learning_rate": 4.65864990262173e-06, + "loss": 17.3025, + "step": 29329 + }, + { + "epoch": 0.5361288317765551, + "grad_norm": 6.458043066053483, + "learning_rate": 4.658354582312405e-06, + "loss": 17.4127, + "step": 29330 + }, + { + "epoch": 0.5361471109730016, + "grad_norm": 7.808095657410022, + "learning_rate": 4.658059263200516e-06, + "loss": 18.3442, + "step": 29331 + }, + { + "epoch": 0.5361653901694482, + "grad_norm": 5.579122407499592, + "learning_rate": 4.657763945287097e-06, + "loss": 17.3155, + "step": 29332 + }, + { + "epoch": 0.5361836693658947, + "grad_norm": 5.269806281522528, + "learning_rate": 4.657468628573188e-06, + "loss": 17.0414, + "step": 29333 + }, + { + "epoch": 0.5362019485623412, + "grad_norm": 7.400005842810225, + "learning_rate": 4.65717331305982e-06, + "loss": 18.1298, + "step": 29334 + }, + { + "epoch": 0.5362202277587877, + "grad_norm": 8.19494904366371, + "learning_rate": 4.656877998748032e-06, + "loss": 17.7911, + "step": 29335 + }, + { + "epoch": 0.5362385069552342, + "grad_norm": 5.395202795013228, + "learning_rate": 4.656582685638857e-06, + "loss": 17.2497, + "step": 29336 + }, + { + "epoch": 0.5362567861516808, + "grad_norm": 5.708600503897889, + "learning_rate": 4.656287373733328e-06, + "loss": 17.2469, + "step": 29337 + }, + { + "epoch": 0.5362750653481273, + "grad_norm": 5.47260203117835, + "learning_rate": 4.6559920630324825e-06, + "loss": 17.0903, + "step": 29338 + }, + { + "epoch": 0.5362933445445738, + "grad_norm": 6.778494362597934, + "learning_rate": 4.655696753537356e-06, + "loss": 17.8149, + "step": 29339 + }, + { + "epoch": 0.5363116237410204, + "grad_norm": 7.044672217424278, + "learning_rate": 4.655401445248981e-06, + "loss": 17.8323, + "step": 29340 + }, + { + "epoch": 0.5363299029374669, + "grad_norm": 5.916865508538785, + "learning_rate": 4.655106138168395e-06, + "loss": 17.3129, + "step": 29341 + }, + { + "epoch": 0.5363481821339134, + "grad_norm": 6.2823091343334525, + "learning_rate": 4.654810832296632e-06, + "loss": 17.1717, + "step": 29342 + }, + { + "epoch": 0.5363664613303599, + "grad_norm": 10.37997974822832, + "learning_rate": 4.654515527634725e-06, + "loss": 17.3777, + "step": 29343 + }, + { + "epoch": 0.5363847405268064, + "grad_norm": 6.325962697313978, + "learning_rate": 4.654220224183715e-06, + "loss": 17.6281, + "step": 29344 + }, + { + "epoch": 0.536403019723253, + "grad_norm": 6.839796889503244, + "learning_rate": 4.653924921944631e-06, + "loss": 17.6442, + "step": 29345 + }, + { + "epoch": 0.5364212989196995, + "grad_norm": 5.720004386686231, + "learning_rate": 4.653629620918509e-06, + "loss": 17.0727, + "step": 29346 + }, + { + "epoch": 0.5364395781161461, + "grad_norm": 7.202606278330624, + "learning_rate": 4.653334321106387e-06, + "loss": 17.9296, + "step": 29347 + }, + { + "epoch": 0.5364578573125925, + "grad_norm": 5.618062835825139, + "learning_rate": 4.653039022509298e-06, + "loss": 17.0657, + "step": 29348 + }, + { + "epoch": 0.536476136509039, + "grad_norm": 5.433691777755176, + "learning_rate": 4.652743725128275e-06, + "loss": 17.0539, + "step": 29349 + }, + { + "epoch": 0.5364944157054856, + "grad_norm": 6.402264889054256, + "learning_rate": 4.652448428964358e-06, + "loss": 17.3862, + "step": 29350 + }, + { + "epoch": 0.5365126949019321, + "grad_norm": 7.8667460832180724, + "learning_rate": 4.652153134018577e-06, + "loss": 18.1236, + "step": 29351 + }, + { + "epoch": 0.5365309740983787, + "grad_norm": 6.508666560921784, + "learning_rate": 4.651857840291969e-06, + "loss": 17.5245, + "step": 29352 + }, + { + "epoch": 0.5365492532948252, + "grad_norm": 7.228950229054558, + "learning_rate": 4.651562547785571e-06, + "loss": 18.0504, + "step": 29353 + }, + { + "epoch": 0.5365675324912716, + "grad_norm": 6.948779196738871, + "learning_rate": 4.651267256500413e-06, + "loss": 17.7902, + "step": 29354 + }, + { + "epoch": 0.5365858116877182, + "grad_norm": 5.746321231918124, + "learning_rate": 4.650971966437537e-06, + "loss": 17.2835, + "step": 29355 + }, + { + "epoch": 0.5366040908841647, + "grad_norm": 6.279429001653878, + "learning_rate": 4.650676677597972e-06, + "loss": 17.4677, + "step": 29356 + }, + { + "epoch": 0.5366223700806113, + "grad_norm": 7.049565430130494, + "learning_rate": 4.650381389982754e-06, + "loss": 17.4014, + "step": 29357 + }, + { + "epoch": 0.5366406492770578, + "grad_norm": 6.241784604979106, + "learning_rate": 4.6500861035929195e-06, + "loss": 17.1587, + "step": 29358 + }, + { + "epoch": 0.5366589284735043, + "grad_norm": 5.489016065724621, + "learning_rate": 4.6497908184295044e-06, + "loss": 17.1332, + "step": 29359 + }, + { + "epoch": 0.5366772076699509, + "grad_norm": 6.850144707783792, + "learning_rate": 4.649495534493539e-06, + "loss": 18.0111, + "step": 29360 + }, + { + "epoch": 0.5366954868663973, + "grad_norm": 6.81380763397933, + "learning_rate": 4.649200251786063e-06, + "loss": 17.415, + "step": 29361 + }, + { + "epoch": 0.5367137660628438, + "grad_norm": 9.109713823789997, + "learning_rate": 4.648904970308111e-06, + "loss": 18.4835, + "step": 29362 + }, + { + "epoch": 0.5367320452592904, + "grad_norm": 6.870680971815996, + "learning_rate": 4.6486096900607145e-06, + "loss": 17.4854, + "step": 29363 + }, + { + "epoch": 0.5367503244557369, + "grad_norm": 7.654470596092475, + "learning_rate": 4.648314411044912e-06, + "loss": 17.8558, + "step": 29364 + }, + { + "epoch": 0.5367686036521835, + "grad_norm": 6.540666871477112, + "learning_rate": 4.648019133261735e-06, + "loss": 17.4551, + "step": 29365 + }, + { + "epoch": 0.53678688284863, + "grad_norm": 7.060905809863112, + "learning_rate": 4.647723856712222e-06, + "loss": 17.41, + "step": 29366 + }, + { + "epoch": 0.5368051620450764, + "grad_norm": 6.528052422282426, + "learning_rate": 4.6474285813974075e-06, + "loss": 17.6751, + "step": 29367 + }, + { + "epoch": 0.536823441241523, + "grad_norm": 5.822993964637325, + "learning_rate": 4.647133307318322e-06, + "loss": 17.2885, + "step": 29368 + }, + { + "epoch": 0.5368417204379695, + "grad_norm": 7.361284339128492, + "learning_rate": 4.646838034476007e-06, + "loss": 17.6867, + "step": 29369 + }, + { + "epoch": 0.5368599996344161, + "grad_norm": 8.702266070282723, + "learning_rate": 4.646542762871492e-06, + "loss": 18.0663, + "step": 29370 + }, + { + "epoch": 0.5368782788308626, + "grad_norm": 7.680265628120438, + "learning_rate": 4.646247492505813e-06, + "loss": 17.5257, + "step": 29371 + }, + { + "epoch": 0.5368965580273091, + "grad_norm": 6.756423681184469, + "learning_rate": 4.645952223380009e-06, + "loss": 17.6017, + "step": 29372 + }, + { + "epoch": 0.5369148372237557, + "grad_norm": 5.657934085083988, + "learning_rate": 4.645656955495111e-06, + "loss": 17.3026, + "step": 29373 + }, + { + "epoch": 0.5369331164202021, + "grad_norm": 5.797227153979704, + "learning_rate": 4.645361688852153e-06, + "loss": 17.2116, + "step": 29374 + }, + { + "epoch": 0.5369513956166487, + "grad_norm": 7.409386454521562, + "learning_rate": 4.645066423452171e-06, + "loss": 17.8287, + "step": 29375 + }, + { + "epoch": 0.5369696748130952, + "grad_norm": 6.785129146466672, + "learning_rate": 4.644771159296203e-06, + "loss": 17.5434, + "step": 29376 + }, + { + "epoch": 0.5369879540095417, + "grad_norm": 7.335645828702823, + "learning_rate": 4.644475896385278e-06, + "loss": 17.5533, + "step": 29377 + }, + { + "epoch": 0.5370062332059883, + "grad_norm": 5.843956419485704, + "learning_rate": 4.644180634720437e-06, + "loss": 17.1225, + "step": 29378 + }, + { + "epoch": 0.5370245124024348, + "grad_norm": 5.8218643680485815, + "learning_rate": 4.643885374302711e-06, + "loss": 17.4456, + "step": 29379 + }, + { + "epoch": 0.5370427915988814, + "grad_norm": 6.76200635501882, + "learning_rate": 4.643590115133135e-06, + "loss": 17.3823, + "step": 29380 + }, + { + "epoch": 0.5370610707953278, + "grad_norm": 5.566491416400289, + "learning_rate": 4.643294857212746e-06, + "loss": 17.1918, + "step": 29381 + }, + { + "epoch": 0.5370793499917743, + "grad_norm": 6.260268604229605, + "learning_rate": 4.642999600542576e-06, + "loss": 17.474, + "step": 29382 + }, + { + "epoch": 0.5370976291882209, + "grad_norm": 6.0218129939675835, + "learning_rate": 4.642704345123664e-06, + "loss": 17.4573, + "step": 29383 + }, + { + "epoch": 0.5371159083846674, + "grad_norm": 6.530548656154416, + "learning_rate": 4.64240909095704e-06, + "loss": 17.5706, + "step": 29384 + }, + { + "epoch": 0.537134187581114, + "grad_norm": 6.071058171613956, + "learning_rate": 4.642113838043741e-06, + "loss": 17.3123, + "step": 29385 + }, + { + "epoch": 0.5371524667775605, + "grad_norm": 5.5482326896115595, + "learning_rate": 4.641818586384803e-06, + "loss": 17.1153, + "step": 29386 + }, + { + "epoch": 0.5371707459740069, + "grad_norm": 6.211957718250558, + "learning_rate": 4.64152333598126e-06, + "loss": 17.2345, + "step": 29387 + }, + { + "epoch": 0.5371890251704535, + "grad_norm": 5.699175348587522, + "learning_rate": 4.641228086834146e-06, + "loss": 17.0164, + "step": 29388 + }, + { + "epoch": 0.5372073043669, + "grad_norm": 6.010587503966536, + "learning_rate": 4.640932838944497e-06, + "loss": 17.2308, + "step": 29389 + }, + { + "epoch": 0.5372255835633466, + "grad_norm": 6.244570599895282, + "learning_rate": 4.640637592313347e-06, + "loss": 17.351, + "step": 29390 + }, + { + "epoch": 0.5372438627597931, + "grad_norm": 6.78333587828515, + "learning_rate": 4.64034234694173e-06, + "loss": 17.6876, + "step": 29391 + }, + { + "epoch": 0.5372621419562396, + "grad_norm": 6.678429894941688, + "learning_rate": 4.640047102830683e-06, + "loss": 17.5395, + "step": 29392 + }, + { + "epoch": 0.5372804211526861, + "grad_norm": 7.160387268459865, + "learning_rate": 4.63975185998124e-06, + "loss": 17.7461, + "step": 29393 + }, + { + "epoch": 0.5372987003491326, + "grad_norm": 6.526628635386126, + "learning_rate": 4.639456618394434e-06, + "loss": 17.4234, + "step": 29394 + }, + { + "epoch": 0.5373169795455792, + "grad_norm": 5.693856248389524, + "learning_rate": 4.639161378071303e-06, + "loss": 17.178, + "step": 29395 + }, + { + "epoch": 0.5373352587420257, + "grad_norm": 6.264029564666308, + "learning_rate": 4.638866139012879e-06, + "loss": 17.2177, + "step": 29396 + }, + { + "epoch": 0.5373535379384722, + "grad_norm": 5.949745882098424, + "learning_rate": 4.638570901220197e-06, + "loss": 17.4326, + "step": 29397 + }, + { + "epoch": 0.5373718171349188, + "grad_norm": 8.495532980691907, + "learning_rate": 4.638275664694295e-06, + "loss": 17.5507, + "step": 29398 + }, + { + "epoch": 0.5373900963313653, + "grad_norm": 8.76437569547379, + "learning_rate": 4.637980429436203e-06, + "loss": 18.227, + "step": 29399 + }, + { + "epoch": 0.5374083755278118, + "grad_norm": 5.973602951977648, + "learning_rate": 4.63768519544696e-06, + "loss": 17.5578, + "step": 29400 + }, + { + "epoch": 0.5374266547242583, + "grad_norm": 4.959636867896732, + "learning_rate": 4.637389962727598e-06, + "loss": 16.8575, + "step": 29401 + }, + { + "epoch": 0.5374449339207048, + "grad_norm": 6.858806662125633, + "learning_rate": 4.637094731279152e-06, + "loss": 18.0463, + "step": 29402 + }, + { + "epoch": 0.5374632131171514, + "grad_norm": 5.708286998701389, + "learning_rate": 4.6367995011026596e-06, + "loss": 17.1434, + "step": 29403 + }, + { + "epoch": 0.5374814923135979, + "grad_norm": 6.168848984205421, + "learning_rate": 4.636504272199153e-06, + "loss": 17.3139, + "step": 29404 + }, + { + "epoch": 0.5374997715100445, + "grad_norm": 5.401303803194577, + "learning_rate": 4.636209044569665e-06, + "loss": 17.1362, + "step": 29405 + }, + { + "epoch": 0.537518050706491, + "grad_norm": 7.160912187803219, + "learning_rate": 4.635913818215234e-06, + "loss": 17.6565, + "step": 29406 + }, + { + "epoch": 0.5375363299029374, + "grad_norm": 6.024653480766796, + "learning_rate": 4.635618593136896e-06, + "loss": 17.083, + "step": 29407 + }, + { + "epoch": 0.537554609099384, + "grad_norm": 6.465684858822787, + "learning_rate": 4.63532336933568e-06, + "loss": 17.5774, + "step": 29408 + }, + { + "epoch": 0.5375728882958305, + "grad_norm": 6.705385820450828, + "learning_rate": 4.6350281468126255e-06, + "loss": 17.3118, + "step": 29409 + }, + { + "epoch": 0.5375911674922771, + "grad_norm": 6.960883044363678, + "learning_rate": 4.6347329255687654e-06, + "loss": 17.7671, + "step": 29410 + }, + { + "epoch": 0.5376094466887236, + "grad_norm": 5.949739040229357, + "learning_rate": 4.634437705605133e-06, + "loss": 17.1729, + "step": 29411 + }, + { + "epoch": 0.53762772588517, + "grad_norm": 8.008650712910079, + "learning_rate": 4.634142486922767e-06, + "loss": 18.3337, + "step": 29412 + }, + { + "epoch": 0.5376460050816166, + "grad_norm": 6.381688136674593, + "learning_rate": 4.633847269522698e-06, + "loss": 17.7893, + "step": 29413 + }, + { + "epoch": 0.5376642842780631, + "grad_norm": 6.129539403024211, + "learning_rate": 4.633552053405965e-06, + "loss": 17.2959, + "step": 29414 + }, + { + "epoch": 0.5376825634745097, + "grad_norm": 6.8010730349317345, + "learning_rate": 4.6332568385735985e-06, + "loss": 17.4192, + "step": 29415 + }, + { + "epoch": 0.5377008426709562, + "grad_norm": 7.749552261047914, + "learning_rate": 4.632961625026634e-06, + "loss": 17.8023, + "step": 29416 + }, + { + "epoch": 0.5377191218674027, + "grad_norm": 7.227173298445376, + "learning_rate": 4.632666412766109e-06, + "loss": 17.5013, + "step": 29417 + }, + { + "epoch": 0.5377374010638493, + "grad_norm": 6.589930903699898, + "learning_rate": 4.632371201793057e-06, + "loss": 18.0213, + "step": 29418 + }, + { + "epoch": 0.5377556802602957, + "grad_norm": 7.1108711693286955, + "learning_rate": 4.63207599210851e-06, + "loss": 17.7495, + "step": 29419 + }, + { + "epoch": 0.5377739594567423, + "grad_norm": 5.756677243444879, + "learning_rate": 4.6317807837135055e-06, + "loss": 17.1539, + "step": 29420 + }, + { + "epoch": 0.5377922386531888, + "grad_norm": 5.851982032084623, + "learning_rate": 4.631485576609078e-06, + "loss": 17.285, + "step": 29421 + }, + { + "epoch": 0.5378105178496353, + "grad_norm": 5.69020733797831, + "learning_rate": 4.63119037079626e-06, + "loss": 17.2473, + "step": 29422 + }, + { + "epoch": 0.5378287970460819, + "grad_norm": 6.562588678630628, + "learning_rate": 4.63089516627609e-06, + "loss": 17.6663, + "step": 29423 + }, + { + "epoch": 0.5378470762425284, + "grad_norm": 5.940777519911705, + "learning_rate": 4.630599963049599e-06, + "loss": 17.0966, + "step": 29424 + }, + { + "epoch": 0.537865355438975, + "grad_norm": 5.076806333792547, + "learning_rate": 4.630304761117822e-06, + "loss": 17.0302, + "step": 29425 + }, + { + "epoch": 0.5378836346354214, + "grad_norm": 5.255430406037884, + "learning_rate": 4.630009560481797e-06, + "loss": 17.2251, + "step": 29426 + }, + { + "epoch": 0.5379019138318679, + "grad_norm": 7.216822730025074, + "learning_rate": 4.629714361142555e-06, + "loss": 17.9002, + "step": 29427 + }, + { + "epoch": 0.5379201930283145, + "grad_norm": 6.092894704779971, + "learning_rate": 4.629419163101133e-06, + "loss": 17.4818, + "step": 29428 + }, + { + "epoch": 0.537938472224761, + "grad_norm": 5.758073355618983, + "learning_rate": 4.629123966358564e-06, + "loss": 17.1897, + "step": 29429 + }, + { + "epoch": 0.5379567514212075, + "grad_norm": 7.515799397134103, + "learning_rate": 4.628828770915882e-06, + "loss": 17.6309, + "step": 29430 + }, + { + "epoch": 0.5379750306176541, + "grad_norm": 6.877101783486477, + "learning_rate": 4.628533576774125e-06, + "loss": 17.9767, + "step": 29431 + }, + { + "epoch": 0.5379933098141005, + "grad_norm": 5.612593666034816, + "learning_rate": 4.628238383934326e-06, + "loss": 17.1924, + "step": 29432 + }, + { + "epoch": 0.5380115890105471, + "grad_norm": 6.022177368941253, + "learning_rate": 4.627943192397517e-06, + "loss": 17.4696, + "step": 29433 + }, + { + "epoch": 0.5380298682069936, + "grad_norm": 6.01082937675309, + "learning_rate": 4.627648002164736e-06, + "loss": 17.101, + "step": 29434 + }, + { + "epoch": 0.5380481474034401, + "grad_norm": 6.908720313695068, + "learning_rate": 4.627352813237017e-06, + "loss": 17.7684, + "step": 29435 + }, + { + "epoch": 0.5380664265998867, + "grad_norm": 6.934726264491708, + "learning_rate": 4.627057625615392e-06, + "loss": 17.4942, + "step": 29436 + }, + { + "epoch": 0.5380847057963332, + "grad_norm": 6.767730609440472, + "learning_rate": 4.6267624393009e-06, + "loss": 17.6882, + "step": 29437 + }, + { + "epoch": 0.5381029849927798, + "grad_norm": 5.363970931806698, + "learning_rate": 4.626467254294572e-06, + "loss": 17.1753, + "step": 29438 + }, + { + "epoch": 0.5381212641892262, + "grad_norm": 5.659846540324078, + "learning_rate": 4.626172070597444e-06, + "loss": 17.3222, + "step": 29439 + }, + { + "epoch": 0.5381395433856727, + "grad_norm": 6.044542077164771, + "learning_rate": 4.625876888210551e-06, + "loss": 17.1571, + "step": 29440 + }, + { + "epoch": 0.5381578225821193, + "grad_norm": 6.570344416048931, + "learning_rate": 4.625581707134928e-06, + "loss": 17.7369, + "step": 29441 + }, + { + "epoch": 0.5381761017785658, + "grad_norm": 6.120390875854275, + "learning_rate": 4.625286527371606e-06, + "loss": 17.0608, + "step": 29442 + }, + { + "epoch": 0.5381943809750124, + "grad_norm": 6.09306400427329, + "learning_rate": 4.6249913489216245e-06, + "loss": 17.3157, + "step": 29443 + }, + { + "epoch": 0.5382126601714589, + "grad_norm": 6.790203034486313, + "learning_rate": 4.624696171786012e-06, + "loss": 17.7011, + "step": 29444 + }, + { + "epoch": 0.5382309393679053, + "grad_norm": 7.639083428217476, + "learning_rate": 4.624400995965812e-06, + "loss": 17.3712, + "step": 29445 + }, + { + "epoch": 0.5382492185643519, + "grad_norm": 7.516845162250205, + "learning_rate": 4.6241058214620516e-06, + "loss": 17.8654, + "step": 29446 + }, + { + "epoch": 0.5382674977607984, + "grad_norm": 7.301650308646706, + "learning_rate": 4.623810648275767e-06, + "loss": 17.4821, + "step": 29447 + }, + { + "epoch": 0.538285776957245, + "grad_norm": 7.0783371920980525, + "learning_rate": 4.623515476407994e-06, + "loss": 17.8513, + "step": 29448 + }, + { + "epoch": 0.5383040561536915, + "grad_norm": 6.989071061146871, + "learning_rate": 4.6232203058597676e-06, + "loss": 17.5442, + "step": 29449 + }, + { + "epoch": 0.538322335350138, + "grad_norm": 6.740616738139947, + "learning_rate": 4.6229251366321195e-06, + "loss": 17.6039, + "step": 29450 + }, + { + "epoch": 0.5383406145465846, + "grad_norm": 6.055411461890058, + "learning_rate": 4.622629968726087e-06, + "loss": 17.2912, + "step": 29451 + }, + { + "epoch": 0.538358893743031, + "grad_norm": 5.006914991409046, + "learning_rate": 4.622334802142705e-06, + "loss": 16.853, + "step": 29452 + }, + { + "epoch": 0.5383771729394776, + "grad_norm": 5.945411939113464, + "learning_rate": 4.622039636883004e-06, + "loss": 17.2922, + "step": 29453 + }, + { + "epoch": 0.5383954521359241, + "grad_norm": 6.997866624696268, + "learning_rate": 4.6217444729480235e-06, + "loss": 17.3213, + "step": 29454 + }, + { + "epoch": 0.5384137313323706, + "grad_norm": 4.842755316183787, + "learning_rate": 4.621449310338795e-06, + "loss": 16.904, + "step": 29455 + }, + { + "epoch": 0.5384320105288172, + "grad_norm": 6.44555934151634, + "learning_rate": 4.621154149056353e-06, + "loss": 17.318, + "step": 29456 + }, + { + "epoch": 0.5384502897252637, + "grad_norm": 5.63938653182184, + "learning_rate": 4.620858989101735e-06, + "loss": 17.2163, + "step": 29457 + }, + { + "epoch": 0.5384685689217102, + "grad_norm": 6.847603108257481, + "learning_rate": 4.62056383047597e-06, + "loss": 17.8226, + "step": 29458 + }, + { + "epoch": 0.5384868481181567, + "grad_norm": 6.495474202642668, + "learning_rate": 4.620268673180099e-06, + "loss": 17.6462, + "step": 29459 + }, + { + "epoch": 0.5385051273146032, + "grad_norm": 6.36912065411912, + "learning_rate": 4.6199735172151525e-06, + "loss": 17.4788, + "step": 29460 + }, + { + "epoch": 0.5385234065110498, + "grad_norm": 7.569506157647184, + "learning_rate": 4.619678362582164e-06, + "loss": 17.3841, + "step": 29461 + }, + { + "epoch": 0.5385416857074963, + "grad_norm": 6.13074048656206, + "learning_rate": 4.619383209282173e-06, + "loss": 17.304, + "step": 29462 + }, + { + "epoch": 0.5385599649039429, + "grad_norm": 5.6353209803185536, + "learning_rate": 4.61908805731621e-06, + "loss": 17.044, + "step": 29463 + }, + { + "epoch": 0.5385782441003893, + "grad_norm": 6.529993248223105, + "learning_rate": 4.618792906685308e-06, + "loss": 17.5979, + "step": 29464 + }, + { + "epoch": 0.5385965232968358, + "grad_norm": 5.907159939389777, + "learning_rate": 4.618497757390505e-06, + "loss": 17.3092, + "step": 29465 + }, + { + "epoch": 0.5386148024932824, + "grad_norm": 9.227590186808916, + "learning_rate": 4.618202609432836e-06, + "loss": 18.4866, + "step": 29466 + }, + { + "epoch": 0.5386330816897289, + "grad_norm": 5.495784479179967, + "learning_rate": 4.617907462813332e-06, + "loss": 17.33, + "step": 29467 + }, + { + "epoch": 0.5386513608861755, + "grad_norm": 6.004887540725572, + "learning_rate": 4.6176123175330295e-06, + "loss": 17.0819, + "step": 29468 + }, + { + "epoch": 0.538669640082622, + "grad_norm": 6.377693620330323, + "learning_rate": 4.617317173592963e-06, + "loss": 17.5609, + "step": 29469 + }, + { + "epoch": 0.5386879192790685, + "grad_norm": 7.296511044930412, + "learning_rate": 4.617022030994165e-06, + "loss": 17.7308, + "step": 29470 + }, + { + "epoch": 0.538706198475515, + "grad_norm": 6.2670869074828355, + "learning_rate": 4.616726889737673e-06, + "loss": 17.5962, + "step": 29471 + }, + { + "epoch": 0.5387244776719615, + "grad_norm": 6.273506710374839, + "learning_rate": 4.616431749824519e-06, + "loss": 17.3744, + "step": 29472 + }, + { + "epoch": 0.5387427568684081, + "grad_norm": 8.194156052425928, + "learning_rate": 4.61613661125574e-06, + "loss": 18.4392, + "step": 29473 + }, + { + "epoch": 0.5387610360648546, + "grad_norm": 5.372760051340595, + "learning_rate": 4.615841474032368e-06, + "loss": 17.0274, + "step": 29474 + }, + { + "epoch": 0.5387793152613011, + "grad_norm": 7.082151687429085, + "learning_rate": 4.615546338155438e-06, + "loss": 17.681, + "step": 29475 + }, + { + "epoch": 0.5387975944577477, + "grad_norm": 6.496208767191889, + "learning_rate": 4.615251203625986e-06, + "loss": 17.3553, + "step": 29476 + }, + { + "epoch": 0.5388158736541941, + "grad_norm": 6.762368717166169, + "learning_rate": 4.614956070445045e-06, + "loss": 17.6863, + "step": 29477 + }, + { + "epoch": 0.5388341528506407, + "grad_norm": 5.503882816305648, + "learning_rate": 4.614660938613648e-06, + "loss": 17.2541, + "step": 29478 + }, + { + "epoch": 0.5388524320470872, + "grad_norm": 6.676343953056528, + "learning_rate": 4.614365808132832e-06, + "loss": 17.4198, + "step": 29479 + }, + { + "epoch": 0.5388707112435337, + "grad_norm": 7.722256220464426, + "learning_rate": 4.61407067900363e-06, + "loss": 18.0793, + "step": 29480 + }, + { + "epoch": 0.5388889904399803, + "grad_norm": 5.1380679544098715, + "learning_rate": 4.613775551227076e-06, + "loss": 16.9727, + "step": 29481 + }, + { + "epoch": 0.5389072696364268, + "grad_norm": 6.9937525316139135, + "learning_rate": 4.613480424804207e-06, + "loss": 17.459, + "step": 29482 + }, + { + "epoch": 0.5389255488328734, + "grad_norm": 6.575303936009218, + "learning_rate": 4.613185299736055e-06, + "loss": 17.5852, + "step": 29483 + }, + { + "epoch": 0.5389438280293198, + "grad_norm": 6.0547981093976695, + "learning_rate": 4.612890176023654e-06, + "loss": 17.2239, + "step": 29484 + }, + { + "epoch": 0.5389621072257663, + "grad_norm": 6.979537515178285, + "learning_rate": 4.61259505366804e-06, + "loss": 17.7887, + "step": 29485 + }, + { + "epoch": 0.5389803864222129, + "grad_norm": 5.574405040408401, + "learning_rate": 4.612299932670248e-06, + "loss": 17.0227, + "step": 29486 + }, + { + "epoch": 0.5389986656186594, + "grad_norm": 5.957891024767679, + "learning_rate": 4.6120048130313085e-06, + "loss": 17.4767, + "step": 29487 + }, + { + "epoch": 0.539016944815106, + "grad_norm": 6.508454385681888, + "learning_rate": 4.611709694752259e-06, + "loss": 17.4061, + "step": 29488 + }, + { + "epoch": 0.5390352240115525, + "grad_norm": 7.952240112241982, + "learning_rate": 4.611414577834134e-06, + "loss": 18.545, + "step": 29489 + }, + { + "epoch": 0.5390535032079989, + "grad_norm": 6.074578112519142, + "learning_rate": 4.611119462277968e-06, + "loss": 17.3847, + "step": 29490 + }, + { + "epoch": 0.5390717824044455, + "grad_norm": 7.042071628695794, + "learning_rate": 4.6108243480847945e-06, + "loss": 17.6878, + "step": 29491 + }, + { + "epoch": 0.539090061600892, + "grad_norm": 7.742850166745321, + "learning_rate": 4.610529235255646e-06, + "loss": 17.825, + "step": 29492 + }, + { + "epoch": 0.5391083407973386, + "grad_norm": 7.16135930848796, + "learning_rate": 4.61023412379156e-06, + "loss": 17.7906, + "step": 29493 + }, + { + "epoch": 0.5391266199937851, + "grad_norm": 6.872807704777655, + "learning_rate": 4.60993901369357e-06, + "loss": 17.2553, + "step": 29494 + }, + { + "epoch": 0.5391448991902316, + "grad_norm": 7.036621368448833, + "learning_rate": 4.609643904962709e-06, + "loss": 17.7991, + "step": 29495 + }, + { + "epoch": 0.5391631783866782, + "grad_norm": 6.140199390986187, + "learning_rate": 4.609348797600013e-06, + "loss": 17.288, + "step": 29496 + }, + { + "epoch": 0.5391814575831246, + "grad_norm": 5.393724906218478, + "learning_rate": 4.609053691606516e-06, + "loss": 17.3026, + "step": 29497 + }, + { + "epoch": 0.5391997367795711, + "grad_norm": 6.618458585201584, + "learning_rate": 4.608758586983249e-06, + "loss": 17.4113, + "step": 29498 + }, + { + "epoch": 0.5392180159760177, + "grad_norm": 6.38221382609543, + "learning_rate": 4.608463483731253e-06, + "loss": 17.3821, + "step": 29499 + }, + { + "epoch": 0.5392362951724642, + "grad_norm": 6.160402299489039, + "learning_rate": 4.6081683818515575e-06, + "loss": 17.4905, + "step": 29500 + } + ], + "logging_steps": 1.0, + "max_steps": 54707, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}