| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1221, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002457002457002457, |
| "grad_norm": 51.00658947196253, |
| "learning_rate": 4.0650406504065046e-07, |
| "loss": 11.5201, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004914004914004914, |
| "grad_norm": 41.32182940742031, |
| "learning_rate": 8.130081300813009e-07, |
| "loss": 11.7707, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.007371007371007371, |
| "grad_norm": 44.33185040770822, |
| "learning_rate": 1.2195121951219514e-06, |
| "loss": 11.6448, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.009828009828009828, |
| "grad_norm": 42.42836066927598, |
| "learning_rate": 1.6260162601626018e-06, |
| "loss": 11.8086, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.012285012285012284, |
| "grad_norm": 53.775532456381285, |
| "learning_rate": 2.0325203252032523e-06, |
| "loss": 11.3357, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.014742014742014743, |
| "grad_norm": 47.62249981599493, |
| "learning_rate": 2.4390243902439027e-06, |
| "loss": 11.3818, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0171990171990172, |
| "grad_norm": 55.13732030606171, |
| "learning_rate": 2.8455284552845528e-06, |
| "loss": 11.142, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.019656019656019656, |
| "grad_norm": 62.52065331239275, |
| "learning_rate": 3.2520325203252037e-06, |
| "loss": 10.5585, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.022113022113022112, |
| "grad_norm": 69.61760079081881, |
| "learning_rate": 3.6585365853658537e-06, |
| "loss": 10.4944, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02457002457002457, |
| "grad_norm": 101.43566538305599, |
| "learning_rate": 4.0650406504065046e-06, |
| "loss": 9.3135, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02702702702702703, |
| "grad_norm": 78.43100915045316, |
| "learning_rate": 4.471544715447155e-06, |
| "loss": 5.2457, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.029484029484029485, |
| "grad_norm": 73.16500005409208, |
| "learning_rate": 4.8780487804878055e-06, |
| "loss": 5.3303, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03194103194103194, |
| "grad_norm": 47.86656696315455, |
| "learning_rate": 5.2845528455284555e-06, |
| "loss": 3.6113, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0343980343980344, |
| "grad_norm": 8.67227796195133, |
| "learning_rate": 5.6910569105691056e-06, |
| "loss": 2.1041, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.036855036855036855, |
| "grad_norm": 7.283413327700143, |
| "learning_rate": 6.0975609756097564e-06, |
| "loss": 2.0566, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03931203931203931, |
| "grad_norm": 5.08768956210286, |
| "learning_rate": 6.504065040650407e-06, |
| "loss": 1.7846, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04176904176904177, |
| "grad_norm": 3.7989995223107624, |
| "learning_rate": 6.910569105691057e-06, |
| "loss": 1.6511, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.044226044226044224, |
| "grad_norm": 3.5721481727371764, |
| "learning_rate": 7.317073170731707e-06, |
| "loss": 1.9222, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04668304668304668, |
| "grad_norm": 2.2512893668476988, |
| "learning_rate": 7.723577235772358e-06, |
| "loss": 1.6941, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04914004914004914, |
| "grad_norm": 2.274570626749542, |
| "learning_rate": 8.130081300813009e-06, |
| "loss": 1.3336, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.051597051597051594, |
| "grad_norm": 1.759146954439502, |
| "learning_rate": 8.53658536585366e-06, |
| "loss": 1.6479, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05405405405405406, |
| "grad_norm": 1.5309831654707053, |
| "learning_rate": 8.94308943089431e-06, |
| "loss": 1.6839, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.056511056511056514, |
| "grad_norm": 1.7232978302647235, |
| "learning_rate": 9.34959349593496e-06, |
| "loss": 1.4097, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05896805896805897, |
| "grad_norm": 0.9050344472252703, |
| "learning_rate": 9.756097560975611e-06, |
| "loss": 1.3058, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.06142506142506143, |
| "grad_norm": 1.011046912711339, |
| "learning_rate": 1.016260162601626e-05, |
| "loss": 1.3016, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06388206388206388, |
| "grad_norm": 0.7633443815628498, |
| "learning_rate": 1.0569105691056911e-05, |
| "loss": 1.0767, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06633906633906633, |
| "grad_norm": 3.711382173921332, |
| "learning_rate": 1.0975609756097562e-05, |
| "loss": 1.2445, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0687960687960688, |
| "grad_norm": 0.9084685938028465, |
| "learning_rate": 1.1382113821138211e-05, |
| "loss": 1.3219, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07125307125307126, |
| "grad_norm": 0.7278631490873225, |
| "learning_rate": 1.1788617886178862e-05, |
| "loss": 1.344, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.07371007371007371, |
| "grad_norm": 0.8118157029372023, |
| "learning_rate": 1.2195121951219513e-05, |
| "loss": 1.2438, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07616707616707617, |
| "grad_norm": 0.7833625240021413, |
| "learning_rate": 1.2601626016260162e-05, |
| "loss": 1.3219, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07862407862407862, |
| "grad_norm": 0.5647517543959654, |
| "learning_rate": 1.3008130081300815e-05, |
| "loss": 0.9937, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08108108108108109, |
| "grad_norm": 0.6618891423388924, |
| "learning_rate": 1.3414634146341466e-05, |
| "loss": 0.9841, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.08353808353808354, |
| "grad_norm": 0.8111611221708586, |
| "learning_rate": 1.3821138211382115e-05, |
| "loss": 1.1497, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.085995085995086, |
| "grad_norm": 0.5989650541769594, |
| "learning_rate": 1.4227642276422764e-05, |
| "loss": 1.0802, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08845208845208845, |
| "grad_norm": 0.5601453621258067, |
| "learning_rate": 1.4634146341463415e-05, |
| "loss": 0.9953, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 0.546088498086313, |
| "learning_rate": 1.5040650406504067e-05, |
| "loss": 0.9963, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.09336609336609336, |
| "grad_norm": 0.5184238539089115, |
| "learning_rate": 1.5447154471544717e-05, |
| "loss": 0.9913, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.09582309582309582, |
| "grad_norm": 0.4859364925634494, |
| "learning_rate": 1.5853658536585366e-05, |
| "loss": 0.9131, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.09828009828009827, |
| "grad_norm": 0.5263041709899442, |
| "learning_rate": 1.6260162601626018e-05, |
| "loss": 1.2145, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10073710073710074, |
| "grad_norm": 0.48323570525096055, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 1.093, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.10319410319410319, |
| "grad_norm": 0.39786588976887655, |
| "learning_rate": 1.707317073170732e-05, |
| "loss": 1.0228, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.10565110565110565, |
| "grad_norm": 0.4353003519119437, |
| "learning_rate": 1.747967479674797e-05, |
| "loss": 0.8313, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.10810810810810811, |
| "grad_norm": 0.5099701425209221, |
| "learning_rate": 1.788617886178862e-05, |
| "loss": 1.0385, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.11056511056511056, |
| "grad_norm": 0.4247114077933769, |
| "learning_rate": 1.8292682926829268e-05, |
| "loss": 1.0362, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.11302211302211303, |
| "grad_norm": 0.4278915097042231, |
| "learning_rate": 1.869918699186992e-05, |
| "loss": 1.0663, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.11547911547911548, |
| "grad_norm": 0.42504196406677935, |
| "learning_rate": 1.9105691056910573e-05, |
| "loss": 1.065, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.11793611793611794, |
| "grad_norm": 0.39322409819280146, |
| "learning_rate": 1.9512195121951222e-05, |
| "loss": 0.831, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.12039312039312039, |
| "grad_norm": 0.38178816332973403, |
| "learning_rate": 1.991869918699187e-05, |
| "loss": 1.024, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.12285012285012285, |
| "grad_norm": 0.32488634343203454, |
| "learning_rate": 2.032520325203252e-05, |
| "loss": 0.8349, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12530712530712532, |
| "grad_norm": 0.3782484182668685, |
| "learning_rate": 2.073170731707317e-05, |
| "loss": 0.9342, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.12776412776412777, |
| "grad_norm": 0.36030842714472017, |
| "learning_rate": 2.1138211382113822e-05, |
| "loss": 1.0332, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.13022113022113022, |
| "grad_norm": 0.3504763804177174, |
| "learning_rate": 2.1544715447154475e-05, |
| "loss": 1.0438, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.13267813267813267, |
| "grad_norm": 0.3121087782309304, |
| "learning_rate": 2.1951219512195124e-05, |
| "loss": 0.8683, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.13513513513513514, |
| "grad_norm": 0.4132956337094442, |
| "learning_rate": 2.2357723577235773e-05, |
| "loss": 1.0546, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1375921375921376, |
| "grad_norm": 0.3356523934810729, |
| "learning_rate": 2.2764227642276422e-05, |
| "loss": 0.8938, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.14004914004914004, |
| "grad_norm": 0.3394592355583364, |
| "learning_rate": 2.3170731707317075e-05, |
| "loss": 0.8029, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.14250614250614252, |
| "grad_norm": 0.9025202002420913, |
| "learning_rate": 2.3577235772357724e-05, |
| "loss": 0.836, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.14496314496314497, |
| "grad_norm": 0.33122567373181955, |
| "learning_rate": 2.3983739837398377e-05, |
| "loss": 0.9265, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.14742014742014742, |
| "grad_norm": 0.386487486127247, |
| "learning_rate": 2.4390243902439026e-05, |
| "loss": 1.0522, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.14987714987714987, |
| "grad_norm": 0.3180551010846452, |
| "learning_rate": 2.4796747967479675e-05, |
| "loss": 0.8465, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.15233415233415235, |
| "grad_norm": 0.3886943015332388, |
| "learning_rate": 2.5203252032520324e-05, |
| "loss": 0.8106, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1547911547911548, |
| "grad_norm": 0.36483367577896464, |
| "learning_rate": 2.5609756097560977e-05, |
| "loss": 0.9219, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.15724815724815724, |
| "grad_norm": 0.3476022996526318, |
| "learning_rate": 2.601626016260163e-05, |
| "loss": 0.7888, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1597051597051597, |
| "grad_norm": 0.33332604952333145, |
| "learning_rate": 2.642276422764228e-05, |
| "loss": 0.8336, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.16216216216216217, |
| "grad_norm": 0.3210922545885254, |
| "learning_rate": 2.682926829268293e-05, |
| "loss": 0.8325, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.16461916461916462, |
| "grad_norm": 0.34166973327455336, |
| "learning_rate": 2.7235772357723577e-05, |
| "loss": 0.8286, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.16707616707616707, |
| "grad_norm": 0.2772568838407044, |
| "learning_rate": 2.764227642276423e-05, |
| "loss": 0.6857, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.16953316953316952, |
| "grad_norm": 1.0755515057999228, |
| "learning_rate": 2.8048780487804882e-05, |
| "loss": 0.8337, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.171990171990172, |
| "grad_norm": 0.774515155788574, |
| "learning_rate": 2.8455284552845528e-05, |
| "loss": 0.9026, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.17444717444717445, |
| "grad_norm": 0.343996443532602, |
| "learning_rate": 2.886178861788618e-05, |
| "loss": 0.858, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1769041769041769, |
| "grad_norm": 0.37464578169776397, |
| "learning_rate": 2.926829268292683e-05, |
| "loss": 0.8868, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.17936117936117937, |
| "grad_norm": 0.30780292194750675, |
| "learning_rate": 2.9674796747967482e-05, |
| "loss": 0.8738, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.7080920118302183, |
| "learning_rate": 3.0081300813008135e-05, |
| "loss": 1.0803, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.18427518427518427, |
| "grad_norm": 0.3195310398410445, |
| "learning_rate": 3.048780487804878e-05, |
| "loss": 0.7552, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.18673218673218672, |
| "grad_norm": 0.3398724677379115, |
| "learning_rate": 3.089430894308943e-05, |
| "loss": 0.915, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1891891891891892, |
| "grad_norm": 1.409643650018611, |
| "learning_rate": 3.130081300813008e-05, |
| "loss": 0.8522, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.19164619164619165, |
| "grad_norm": 0.9400426414745835, |
| "learning_rate": 3.170731707317073e-05, |
| "loss": 0.9234, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1941031941031941, |
| "grad_norm": 0.44549756510252503, |
| "learning_rate": 3.2113821138211384e-05, |
| "loss": 0.8354, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.19656019656019655, |
| "grad_norm": 0.31409628217862606, |
| "learning_rate": 3.2520325203252037e-05, |
| "loss": 0.8491, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.19901719901719903, |
| "grad_norm": 0.4537000801486613, |
| "learning_rate": 3.292682926829269e-05, |
| "loss": 0.85, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.20147420147420148, |
| "grad_norm": 0.42406673549654195, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 1.053, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.20393120393120392, |
| "grad_norm": 0.3789788855142771, |
| "learning_rate": 3.373983739837399e-05, |
| "loss": 0.8627, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.20638820638820637, |
| "grad_norm": 0.408375242705326, |
| "learning_rate": 3.414634146341464e-05, |
| "loss": 0.9088, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.20884520884520885, |
| "grad_norm": 0.4269743612762991, |
| "learning_rate": 3.4552845528455286e-05, |
| "loss": 0.9017, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2113022113022113, |
| "grad_norm": 0.3983104483895218, |
| "learning_rate": 3.495934959349594e-05, |
| "loss": 0.8781, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.21375921375921375, |
| "grad_norm": 0.4289837220182789, |
| "learning_rate": 3.5365853658536584e-05, |
| "loss": 0.7913, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.21621621621621623, |
| "grad_norm": 0.4383253801829447, |
| "learning_rate": 3.577235772357724e-05, |
| "loss": 0.8579, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.21867321867321868, |
| "grad_norm": 0.3815114297981113, |
| "learning_rate": 3.617886178861789e-05, |
| "loss": 0.7926, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.22113022113022113, |
| "grad_norm": 0.4460874540522612, |
| "learning_rate": 3.6585365853658535e-05, |
| "loss": 0.8682, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.22358722358722358, |
| "grad_norm": 0.4242618487534378, |
| "learning_rate": 3.699186991869919e-05, |
| "loss": 0.8574, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.22604422604422605, |
| "grad_norm": 0.3784544099868278, |
| "learning_rate": 3.739837398373984e-05, |
| "loss": 0.7585, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2285012285012285, |
| "grad_norm": 0.4216052185506308, |
| "learning_rate": 3.780487804878049e-05, |
| "loss": 0.7668, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.23095823095823095, |
| "grad_norm": 0.43197147956134363, |
| "learning_rate": 3.8211382113821145e-05, |
| "loss": 0.9439, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2334152334152334, |
| "grad_norm": 0.35661007106689985, |
| "learning_rate": 3.861788617886179e-05, |
| "loss": 0.745, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.23587223587223588, |
| "grad_norm": 0.35038751371475896, |
| "learning_rate": 3.9024390243902444e-05, |
| "loss": 0.8148, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.23832923832923833, |
| "grad_norm": 0.3269434336747683, |
| "learning_rate": 3.943089430894309e-05, |
| "loss": 0.7587, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.24078624078624078, |
| "grad_norm": 0.3927980260401744, |
| "learning_rate": 3.983739837398374e-05, |
| "loss": 0.8683, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.24324324324324326, |
| "grad_norm": 0.3655921622998464, |
| "learning_rate": 4.0243902439024395e-05, |
| "loss": 0.7903, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2457002457002457, |
| "grad_norm": 7.434933759364144, |
| "learning_rate": 4.065040650406504e-05, |
| "loss": 0.9234, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.24815724815724816, |
| "grad_norm": 0.5057132697370877, |
| "learning_rate": 4.105691056910569e-05, |
| "loss": 0.8459, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.25061425061425063, |
| "grad_norm": 0.35608438610939613, |
| "learning_rate": 4.146341463414634e-05, |
| "loss": 0.9301, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.25307125307125306, |
| "grad_norm": 0.4378277298361604, |
| "learning_rate": 4.186991869918699e-05, |
| "loss": 0.751, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.25552825552825553, |
| "grad_norm": 0.39957854645534735, |
| "learning_rate": 4.2276422764227644e-05, |
| "loss": 0.6775, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.257985257985258, |
| "grad_norm": 0.4425372497170904, |
| "learning_rate": 4.26829268292683e-05, |
| "loss": 0.7637, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.26044226044226043, |
| "grad_norm": 0.5055020698531032, |
| "learning_rate": 4.308943089430895e-05, |
| "loss": 0.9547, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.2628992628992629, |
| "grad_norm": 0.48084566592201927, |
| "learning_rate": 4.3495934959349595e-05, |
| "loss": 0.7968, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.26535626535626533, |
| "grad_norm": 0.44969395374862164, |
| "learning_rate": 4.390243902439025e-05, |
| "loss": 0.9182, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.2678132678132678, |
| "grad_norm": 0.5044687667724931, |
| "learning_rate": 4.43089430894309e-05, |
| "loss": 0.8516, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2702702702702703, |
| "grad_norm": 0.46505802585107076, |
| "learning_rate": 4.4715447154471546e-05, |
| "loss": 0.8654, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.5806796709997633, |
| "learning_rate": 4.51219512195122e-05, |
| "loss": 0.906, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2751842751842752, |
| "grad_norm": 0.4210793238854805, |
| "learning_rate": 4.5528455284552844e-05, |
| "loss": 0.7159, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.27764127764127766, |
| "grad_norm": 0.45861184290594337, |
| "learning_rate": 4.59349593495935e-05, |
| "loss": 0.8742, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2800982800982801, |
| "grad_norm": 0.474280060915593, |
| "learning_rate": 4.634146341463415e-05, |
| "loss": 0.9078, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.28255528255528256, |
| "grad_norm": 3.4497188666996608, |
| "learning_rate": 4.6747967479674795e-05, |
| "loss": 0.7941, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.28501228501228504, |
| "grad_norm": 0.6859838903958281, |
| "learning_rate": 4.715447154471545e-05, |
| "loss": 0.829, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.28746928746928746, |
| "grad_norm": 0.4257855688775576, |
| "learning_rate": 4.75609756097561e-05, |
| "loss": 0.684, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.28992628992628994, |
| "grad_norm": 0.7209470061472436, |
| "learning_rate": 4.796747967479675e-05, |
| "loss": 0.8426, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.29238329238329236, |
| "grad_norm": 0.4348904611702599, |
| "learning_rate": 4.8373983739837406e-05, |
| "loss": 0.8974, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.29484029484029484, |
| "grad_norm": 0.6022119693773859, |
| "learning_rate": 4.878048780487805e-05, |
| "loss": 0.7381, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2972972972972973, |
| "grad_norm": 0.49356434831001184, |
| "learning_rate": 4.9186991869918704e-05, |
| "loss": 0.8491, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.29975429975429974, |
| "grad_norm": 0.5199694383515181, |
| "learning_rate": 4.959349593495935e-05, |
| "loss": 0.9368, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3022113022113022, |
| "grad_norm": 0.6287882015300568, |
| "learning_rate": 5e-05, |
| "loss": 0.7733, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3046683046683047, |
| "grad_norm": 0.47882310869561157, |
| "learning_rate": 4.99544626593807e-05, |
| "loss": 0.828, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3071253071253071, |
| "grad_norm": 0.47321265037200055, |
| "learning_rate": 4.990892531876138e-05, |
| "loss": 0.7783, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3095823095823096, |
| "grad_norm": 5.521364106361822, |
| "learning_rate": 4.986338797814208e-05, |
| "loss": 0.8306, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.31203931203931207, |
| "grad_norm": 0.6130054000501296, |
| "learning_rate": 4.9817850637522776e-05, |
| "loss": 0.7226, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3144963144963145, |
| "grad_norm": 0.5002677223225025, |
| "learning_rate": 4.977231329690346e-05, |
| "loss": 0.842, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.31695331695331697, |
| "grad_norm": 0.5097826151334071, |
| "learning_rate": 4.9726775956284156e-05, |
| "loss": 0.8517, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3194103194103194, |
| "grad_norm": 0.4823779081669877, |
| "learning_rate": 4.9681238615664846e-05, |
| "loss": 0.6543, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.32186732186732187, |
| "grad_norm": 0.6212379815651925, |
| "learning_rate": 4.9635701275045536e-05, |
| "loss": 0.8558, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.32432432432432434, |
| "grad_norm": 0.46984602438360945, |
| "learning_rate": 4.959016393442623e-05, |
| "loss": 0.9246, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.32678132678132676, |
| "grad_norm": 0.4983418640829827, |
| "learning_rate": 4.954462659380692e-05, |
| "loss": 0.7805, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.32923832923832924, |
| "grad_norm": 0.6115973164236492, |
| "learning_rate": 4.949908925318761e-05, |
| "loss": 0.8777, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3316953316953317, |
| "grad_norm": 0.425339912596782, |
| "learning_rate": 4.945355191256831e-05, |
| "loss": 0.8364, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.33415233415233414, |
| "grad_norm": 0.5081656348291814, |
| "learning_rate": 4.9408014571949e-05, |
| "loss": 0.8182, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3366093366093366, |
| "grad_norm": 0.4507877289201634, |
| "learning_rate": 4.936247723132969e-05, |
| "loss": 0.7059, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.33906633906633904, |
| "grad_norm": 0.4259156527505649, |
| "learning_rate": 4.9316939890710386e-05, |
| "loss": 0.7337, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3415233415233415, |
| "grad_norm": 0.4870288608531628, |
| "learning_rate": 4.9271402550091076e-05, |
| "loss": 0.6361, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.343980343980344, |
| "grad_norm": 0.3911986963597503, |
| "learning_rate": 4.9225865209471766e-05, |
| "loss": 0.7603, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3464373464373464, |
| "grad_norm": 0.7264714511948328, |
| "learning_rate": 4.918032786885246e-05, |
| "loss": 0.9062, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3488943488943489, |
| "grad_norm": 0.47242198367565236, |
| "learning_rate": 4.913479052823315e-05, |
| "loss": 0.7663, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.35135135135135137, |
| "grad_norm": 0.47328040906145535, |
| "learning_rate": 4.908925318761385e-05, |
| "loss": 0.6299, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3538083538083538, |
| "grad_norm": 0.5189395807696658, |
| "learning_rate": 4.904371584699454e-05, |
| "loss": 0.7318, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.35626535626535627, |
| "grad_norm": 0.45277914852216605, |
| "learning_rate": 4.899817850637523e-05, |
| "loss": 0.7552, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.35872235872235875, |
| "grad_norm": 0.4495050775600071, |
| "learning_rate": 4.8952641165755927e-05, |
| "loss": 0.7726, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.36117936117936117, |
| "grad_norm": 0.5562436967325994, |
| "learning_rate": 4.890710382513661e-05, |
| "loss": 0.7093, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.4021268067280976, |
| "learning_rate": 4.8861566484517307e-05, |
| "loss": 0.7734, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.36609336609336607, |
| "grad_norm": 0.5900140052185344, |
| "learning_rate": 4.8816029143898e-05, |
| "loss": 0.898, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.36855036855036855, |
| "grad_norm": 0.40531749565353, |
| "learning_rate": 4.8770491803278687e-05, |
| "loss": 0.8673, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.371007371007371, |
| "grad_norm": 0.5257544978960317, |
| "learning_rate": 4.872495446265938e-05, |
| "loss": 0.8524, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.37346437346437344, |
| "grad_norm": 0.3709941280583937, |
| "learning_rate": 4.867941712204008e-05, |
| "loss": 0.7762, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3759213759213759, |
| "grad_norm": 0.6733613092717959, |
| "learning_rate": 4.863387978142076e-05, |
| "loss": 0.7244, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3783783783783784, |
| "grad_norm": 0.38207770249074585, |
| "learning_rate": 4.858834244080146e-05, |
| "loss": 0.7583, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3808353808353808, |
| "grad_norm": 0.6562455849116627, |
| "learning_rate": 4.854280510018216e-05, |
| "loss": 0.8073, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3832923832923833, |
| "grad_norm": 0.39748554973014666, |
| "learning_rate": 4.849726775956284e-05, |
| "loss": 0.7228, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3857493857493858, |
| "grad_norm": 0.3661785544020982, |
| "learning_rate": 4.845173041894354e-05, |
| "loss": 0.6666, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3882063882063882, |
| "grad_norm": 0.5268622303781758, |
| "learning_rate": 4.840619307832423e-05, |
| "loss": 0.7831, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3906633906633907, |
| "grad_norm": 0.49812730319416026, |
| "learning_rate": 4.836065573770492e-05, |
| "loss": 0.7754, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3931203931203931, |
| "grad_norm": 0.4157898663987463, |
| "learning_rate": 4.8315118397085614e-05, |
| "loss": 0.7964, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3955773955773956, |
| "grad_norm": 0.47307293289652125, |
| "learning_rate": 4.8269581056466304e-05, |
| "loss": 0.8665, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.39803439803439805, |
| "grad_norm": 0.5347821089983137, |
| "learning_rate": 4.8224043715846994e-05, |
| "loss": 0.9031, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.4004914004914005, |
| "grad_norm": 0.4416804543349193, |
| "learning_rate": 4.817850637522769e-05, |
| "loss": 0.8018, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.40294840294840295, |
| "grad_norm": 0.38242798734043876, |
| "learning_rate": 4.813296903460838e-05, |
| "loss": 0.8083, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.40540540540540543, |
| "grad_norm": 0.46628473043304297, |
| "learning_rate": 4.808743169398907e-05, |
| "loss": 0.7786, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.40786240786240785, |
| "grad_norm": 0.8957454928357931, |
| "learning_rate": 4.804189435336977e-05, |
| "loss": 0.7697, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.4103194103194103, |
| "grad_norm": 0.7840441840091149, |
| "learning_rate": 4.799635701275046e-05, |
| "loss": 0.9073, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.41277641277641275, |
| "grad_norm": 0.7288094214817507, |
| "learning_rate": 4.795081967213115e-05, |
| "loss": 0.7594, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.4152334152334152, |
| "grad_norm": 0.6041127252970878, |
| "learning_rate": 4.7905282331511844e-05, |
| "loss": 0.7313, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.4176904176904177, |
| "grad_norm": 0.8145997847617484, |
| "learning_rate": 4.7859744990892534e-05, |
| "loss": 0.6774, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4201474201474201, |
| "grad_norm": 0.5528790249808274, |
| "learning_rate": 4.7814207650273224e-05, |
| "loss": 0.8161, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.4226044226044226, |
| "grad_norm": 0.5818892388787992, |
| "learning_rate": 4.776867030965392e-05, |
| "loss": 0.6353, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4250614250614251, |
| "grad_norm": 0.5028845858663835, |
| "learning_rate": 4.772313296903461e-05, |
| "loss": 0.7811, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.4275184275184275, |
| "grad_norm": 0.5094764920597807, |
| "learning_rate": 4.76775956284153e-05, |
| "loss": 0.77, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.42997542997543, |
| "grad_norm": 0.40339340341267327, |
| "learning_rate": 4.7632058287796e-05, |
| "loss": 0.6796, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.43243243243243246, |
| "grad_norm": 0.47577207705852176, |
| "learning_rate": 4.758652094717669e-05, |
| "loss": 0.7034, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.4348894348894349, |
| "grad_norm": 0.39888134217182175, |
| "learning_rate": 4.754098360655738e-05, |
| "loss": 0.6607, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.43734643734643736, |
| "grad_norm": 0.3965895014017134, |
| "learning_rate": 4.749544626593807e-05, |
| "loss": 0.7624, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4398034398034398, |
| "grad_norm": 0.4709202993164332, |
| "learning_rate": 4.7449908925318764e-05, |
| "loss": 0.8225, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.44226044226044225, |
| "grad_norm": 0.382474212228653, |
| "learning_rate": 4.740437158469946e-05, |
| "loss": 0.8546, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.44471744471744473, |
| "grad_norm": 0.4231565796785838, |
| "learning_rate": 4.7358834244080144e-05, |
| "loss": 0.771, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.44717444717444715, |
| "grad_norm": 0.38054832898962976, |
| "learning_rate": 4.731329690346084e-05, |
| "loss": 0.6595, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.44963144963144963, |
| "grad_norm": 0.3547946010093686, |
| "learning_rate": 4.726775956284154e-05, |
| "loss": 0.6817, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4520884520884521, |
| "grad_norm": 0.3945726785571152, |
| "learning_rate": 4.722222222222222e-05, |
| "loss": 0.7525, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.3928424227592678, |
| "learning_rate": 4.717668488160292e-05, |
| "loss": 0.7477, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.457002457002457, |
| "grad_norm": 0.4426304351649171, |
| "learning_rate": 4.713114754098361e-05, |
| "loss": 0.7423, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.4594594594594595, |
| "grad_norm": 1.6332435201318054, |
| "learning_rate": 4.70856102003643e-05, |
| "loss": 0.7035, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.4619164619164619, |
| "grad_norm": 0.4178581553378913, |
| "learning_rate": 4.7040072859744995e-05, |
| "loss": 0.7806, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4643734643734644, |
| "grad_norm": 0.37923597472442744, |
| "learning_rate": 4.6994535519125685e-05, |
| "loss": 0.7407, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4668304668304668, |
| "grad_norm": 0.4449909952237191, |
| "learning_rate": 4.6948998178506375e-05, |
| "loss": 0.7735, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4692874692874693, |
| "grad_norm": 1.3250256301620615, |
| "learning_rate": 4.690346083788707e-05, |
| "loss": 0.7303, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.47174447174447176, |
| "grad_norm": 0.9645765967219847, |
| "learning_rate": 4.685792349726776e-05, |
| "loss": 0.7097, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4742014742014742, |
| "grad_norm": 0.6361558765678473, |
| "learning_rate": 4.681238615664845e-05, |
| "loss": 0.7363, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.47665847665847666, |
| "grad_norm": 0.45389595024787915, |
| "learning_rate": 4.676684881602915e-05, |
| "loss": 0.7055, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.47911547911547914, |
| "grad_norm": 0.6258698325325335, |
| "learning_rate": 4.672131147540984e-05, |
| "loss": 0.6585, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.48157248157248156, |
| "grad_norm": 1.0118318676213243, |
| "learning_rate": 4.667577413479053e-05, |
| "loss": 0.7367, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.48402948402948404, |
| "grad_norm": 0.4198144007946843, |
| "learning_rate": 4.6630236794171225e-05, |
| "loss": 0.6616, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4864864864864865, |
| "grad_norm": 0.5913511667595013, |
| "learning_rate": 4.6584699453551915e-05, |
| "loss": 0.7845, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.48894348894348894, |
| "grad_norm": 0.33611220980462847, |
| "learning_rate": 4.6539162112932605e-05, |
| "loss": 0.7603, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4914004914004914, |
| "grad_norm": 0.5711542431602112, |
| "learning_rate": 4.64936247723133e-05, |
| "loss": 0.7322, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.49385749385749383, |
| "grad_norm": 0.3189868792840459, |
| "learning_rate": 4.644808743169399e-05, |
| "loss": 0.6661, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4963144963144963, |
| "grad_norm": 0.4826389836871673, |
| "learning_rate": 4.640255009107468e-05, |
| "loss": 0.6749, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4987714987714988, |
| "grad_norm": 0.40951703225470715, |
| "learning_rate": 4.635701275045538e-05, |
| "loss": 0.7387, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5012285012285013, |
| "grad_norm": 0.38375829145246065, |
| "learning_rate": 4.631147540983607e-05, |
| "loss": 0.7027, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5036855036855037, |
| "grad_norm": 0.4733049578471896, |
| "learning_rate": 4.626593806921676e-05, |
| "loss": 0.7509, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5061425061425061, |
| "grad_norm": 0.3474159718643396, |
| "learning_rate": 4.622040072859745e-05, |
| "loss": 0.7367, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5085995085995086, |
| "grad_norm": 0.48857281066114416, |
| "learning_rate": 4.6174863387978145e-05, |
| "loss": 0.8525, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5110565110565111, |
| "grad_norm": 0.38214808096990427, |
| "learning_rate": 4.6129326047358835e-05, |
| "loss": 0.7906, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5135135135135135, |
| "grad_norm": 0.33815932263073856, |
| "learning_rate": 4.6083788706739525e-05, |
| "loss": 0.7306, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.515970515970516, |
| "grad_norm": 0.4339469943504887, |
| "learning_rate": 4.603825136612022e-05, |
| "loss": 0.8031, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5184275184275184, |
| "grad_norm": 0.3911806777997916, |
| "learning_rate": 4.599271402550091e-05, |
| "loss": 0.6236, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.5208845208845209, |
| "grad_norm": 0.4169040746627703, |
| "learning_rate": 4.59471766848816e-05, |
| "loss": 0.6954, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.5233415233415234, |
| "grad_norm": 0.409930981249451, |
| "learning_rate": 4.59016393442623e-05, |
| "loss": 0.6972, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.5257985257985258, |
| "grad_norm": 0.3662077296397301, |
| "learning_rate": 4.585610200364299e-05, |
| "loss": 0.7205, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.5282555282555282, |
| "grad_norm": 0.3999793098185867, |
| "learning_rate": 4.581056466302368e-05, |
| "loss": 0.7142, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5307125307125307, |
| "grad_norm": 0.33426678861834175, |
| "learning_rate": 4.5765027322404376e-05, |
| "loss": 0.7806, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.5331695331695332, |
| "grad_norm": 0.2920950465438566, |
| "learning_rate": 4.5719489981785066e-05, |
| "loss": 0.5735, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.5356265356265356, |
| "grad_norm": 0.4387714217174655, |
| "learning_rate": 4.5673952641165756e-05, |
| "loss": 0.7661, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.538083538083538, |
| "grad_norm": 0.40724721414199005, |
| "learning_rate": 4.562841530054645e-05, |
| "loss": 0.7578, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5405405405405406, |
| "grad_norm": 0.4361008788632283, |
| "learning_rate": 4.558287795992714e-05, |
| "loss": 0.6755, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.542997542997543, |
| "grad_norm": 0.4246249810597821, |
| "learning_rate": 4.553734061930783e-05, |
| "loss": 0.7546, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.36118319320850206, |
| "learning_rate": 4.549180327868853e-05, |
| "loss": 0.7669, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.547911547911548, |
| "grad_norm": 0.908289119148723, |
| "learning_rate": 4.544626593806922e-05, |
| "loss": 0.7135, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5503685503685504, |
| "grad_norm": 0.39602734595220085, |
| "learning_rate": 4.540072859744991e-05, |
| "loss": 0.749, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5528255528255528, |
| "grad_norm": 0.5078448020996696, |
| "learning_rate": 4.5355191256830606e-05, |
| "loss": 0.6208, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5552825552825553, |
| "grad_norm": 0.3443372372601607, |
| "learning_rate": 4.5309653916211296e-05, |
| "loss": 0.7046, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5577395577395577, |
| "grad_norm": 0.4525893747493054, |
| "learning_rate": 4.5264116575591986e-05, |
| "loss": 0.7592, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5601965601965602, |
| "grad_norm": 0.40243874841518706, |
| "learning_rate": 4.521857923497268e-05, |
| "loss": 0.8445, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5626535626535627, |
| "grad_norm": 0.35161294551869515, |
| "learning_rate": 4.517304189435337e-05, |
| "loss": 0.677, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5651105651105651, |
| "grad_norm": 0.41535550493065193, |
| "learning_rate": 4.512750455373406e-05, |
| "loss": 0.7478, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5675675675675675, |
| "grad_norm": 0.4226366849862933, |
| "learning_rate": 4.508196721311476e-05, |
| "loss": 0.745, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5700245700245701, |
| "grad_norm": 0.3673983419967179, |
| "learning_rate": 4.503642987249545e-05, |
| "loss": 0.7015, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5724815724815725, |
| "grad_norm": 0.38024111457034476, |
| "learning_rate": 4.499089253187614e-05, |
| "loss": 0.7877, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5749385749385749, |
| "grad_norm": 0.38382167053979005, |
| "learning_rate": 4.494535519125683e-05, |
| "loss": 0.6943, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5773955773955773, |
| "grad_norm": 0.3773460766513446, |
| "learning_rate": 4.4899817850637526e-05, |
| "loss": 0.7944, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5798525798525799, |
| "grad_norm": 0.4206436428227826, |
| "learning_rate": 4.4854280510018216e-05, |
| "loss": 0.6814, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5823095823095823, |
| "grad_norm": 6.225234570790709, |
| "learning_rate": 4.4808743169398906e-05, |
| "loss": 0.7907, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5847665847665847, |
| "grad_norm": 0.4921907401337786, |
| "learning_rate": 4.47632058287796e-05, |
| "loss": 0.6665, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5872235872235873, |
| "grad_norm": 0.48327648449237093, |
| "learning_rate": 4.471766848816029e-05, |
| "loss": 0.7715, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5896805896805897, |
| "grad_norm": 0.4791973859907425, |
| "learning_rate": 4.467213114754098e-05, |
| "loss": 0.6644, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5921375921375921, |
| "grad_norm": 0.5219036090133962, |
| "learning_rate": 4.462659380692168e-05, |
| "loss": 0.8049, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5945945945945946, |
| "grad_norm": 0.5456422166867602, |
| "learning_rate": 4.458105646630237e-05, |
| "loss": 0.7501, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.597051597051597, |
| "grad_norm": 0.42200513727398753, |
| "learning_rate": 4.453551912568306e-05, |
| "loss": 0.887, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5995085995085995, |
| "grad_norm": 0.4322560276672431, |
| "learning_rate": 4.4489981785063757e-05, |
| "loss": 0.7695, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.601965601965602, |
| "grad_norm": 0.6813701089189296, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.8039, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6044226044226044, |
| "grad_norm": 0.34727875514808987, |
| "learning_rate": 4.4398907103825137e-05, |
| "loss": 0.6736, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6068796068796068, |
| "grad_norm": 0.5097357043993563, |
| "learning_rate": 4.435336976320583e-05, |
| "loss": 0.7733, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.6093366093366094, |
| "grad_norm": 0.3917452125453462, |
| "learning_rate": 4.430783242258652e-05, |
| "loss": 0.6345, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.6117936117936118, |
| "grad_norm": 0.3886971533793202, |
| "learning_rate": 4.426229508196721e-05, |
| "loss": 0.6465, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.6142506142506142, |
| "grad_norm": 0.42563955199502573, |
| "learning_rate": 4.421675774134791e-05, |
| "loss": 0.6499, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6167076167076168, |
| "grad_norm": 0.34063379000466826, |
| "learning_rate": 4.41712204007286e-05, |
| "loss": 0.6963, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.6191646191646192, |
| "grad_norm": 0.4724839536346018, |
| "learning_rate": 4.412568306010929e-05, |
| "loss": 0.7917, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.6216216216216216, |
| "grad_norm": 0.43466297048497554, |
| "learning_rate": 4.408014571948999e-05, |
| "loss": 0.7627, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.6240786240786241, |
| "grad_norm": 0.35263290647277007, |
| "learning_rate": 4.403460837887068e-05, |
| "loss": 0.624, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6265356265356266, |
| "grad_norm": 0.41771099490666685, |
| "learning_rate": 4.398907103825137e-05, |
| "loss": 0.6774, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.628992628992629, |
| "grad_norm": 0.45045654101278304, |
| "learning_rate": 4.3943533697632064e-05, |
| "loss": 0.6706, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.6314496314496314, |
| "grad_norm": 0.4054524028616639, |
| "learning_rate": 4.3897996357012754e-05, |
| "loss": 0.6856, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.6339066339066339, |
| "grad_norm": 0.4199071567113292, |
| "learning_rate": 4.3852459016393444e-05, |
| "loss": 0.7385, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.4359170619851533, |
| "learning_rate": 4.380692167577414e-05, |
| "loss": 0.7095, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.6388206388206388, |
| "grad_norm": 0.3850739753964197, |
| "learning_rate": 4.376138433515483e-05, |
| "loss": 0.6958, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6412776412776413, |
| "grad_norm": 0.4890138604791565, |
| "learning_rate": 4.371584699453552e-05, |
| "loss": 0.7211, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.6437346437346437, |
| "grad_norm": 0.38398720286811694, |
| "learning_rate": 4.367030965391621e-05, |
| "loss": 0.8539, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.6461916461916462, |
| "grad_norm": 0.5242499237496944, |
| "learning_rate": 4.362477231329691e-05, |
| "loss": 0.7239, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6486486486486487, |
| "grad_norm": 0.6576624559407754, |
| "learning_rate": 4.35792349726776e-05, |
| "loss": 0.6224, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6511056511056511, |
| "grad_norm": 0.48964094334247854, |
| "learning_rate": 4.353369763205829e-05, |
| "loss": 0.7645, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6535626535626535, |
| "grad_norm": 0.4674980129473235, |
| "learning_rate": 4.3488160291438984e-05, |
| "loss": 0.768, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6560196560196561, |
| "grad_norm": 0.4434022776784131, |
| "learning_rate": 4.3442622950819674e-05, |
| "loss": 0.7459, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6584766584766585, |
| "grad_norm": 0.538941168132682, |
| "learning_rate": 4.3397085610200364e-05, |
| "loss": 0.6968, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6609336609336609, |
| "grad_norm": 0.3624467815465402, |
| "learning_rate": 4.335154826958106e-05, |
| "loss": 0.6096, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6633906633906634, |
| "grad_norm": 0.5599889013533942, |
| "learning_rate": 4.330601092896175e-05, |
| "loss": 0.7658, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6658476658476659, |
| "grad_norm": 0.690440401509493, |
| "learning_rate": 4.326047358834244e-05, |
| "loss": 0.7877, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6683046683046683, |
| "grad_norm": 0.3686357695682294, |
| "learning_rate": 4.321493624772314e-05, |
| "loss": 0.6895, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6707616707616708, |
| "grad_norm": 0.545620565235858, |
| "learning_rate": 4.316939890710383e-05, |
| "loss": 0.69, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6732186732186732, |
| "grad_norm": 0.4204580863650939, |
| "learning_rate": 4.312386156648452e-05, |
| "loss": 0.6768, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.6756756756756757, |
| "grad_norm": 0.47301510227399846, |
| "learning_rate": 4.3078324225865214e-05, |
| "loss": 0.6414, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6781326781326781, |
| "grad_norm": 0.39120871861762363, |
| "learning_rate": 4.3032786885245904e-05, |
| "loss": 0.6393, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6805896805896806, |
| "grad_norm": 0.5663194594331895, |
| "learning_rate": 4.2987249544626594e-05, |
| "loss": 0.7721, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.683046683046683, |
| "grad_norm": 0.5578558026406056, |
| "learning_rate": 4.294171220400729e-05, |
| "loss": 0.679, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6855036855036855, |
| "grad_norm": 0.4785935193977311, |
| "learning_rate": 4.289617486338798e-05, |
| "loss": 0.8548, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.687960687960688, |
| "grad_norm": 0.7344196795158664, |
| "learning_rate": 4.285063752276867e-05, |
| "loss": 0.7421, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6904176904176904, |
| "grad_norm": 0.8908899764975586, |
| "learning_rate": 4.280510018214937e-05, |
| "loss": 0.7894, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6928746928746928, |
| "grad_norm": 0.6287419956030045, |
| "learning_rate": 4.275956284153005e-05, |
| "loss": 0.6785, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6953316953316954, |
| "grad_norm": 0.5149422483348357, |
| "learning_rate": 4.271402550091075e-05, |
| "loss": 0.7382, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6977886977886978, |
| "grad_norm": 0.5454860373961983, |
| "learning_rate": 4.2668488160291445e-05, |
| "loss": 0.7274, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.7002457002457002, |
| "grad_norm": 0.5477624009062736, |
| "learning_rate": 4.262295081967213e-05, |
| "loss": 0.7058, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7027027027027027, |
| "grad_norm": 0.5596039899044134, |
| "learning_rate": 4.2577413479052825e-05, |
| "loss": 0.8178, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.7051597051597052, |
| "grad_norm": 0.5919332487502931, |
| "learning_rate": 4.253187613843352e-05, |
| "loss": 0.69, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.7076167076167076, |
| "grad_norm": 0.5283900337631473, |
| "learning_rate": 4.248633879781421e-05, |
| "loss": 0.8171, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.7100737100737101, |
| "grad_norm": 0.7692525624223621, |
| "learning_rate": 4.24408014571949e-05, |
| "loss": 0.7239, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.7125307125307125, |
| "grad_norm": 0.3863360498506725, |
| "learning_rate": 4.23952641165756e-05, |
| "loss": 0.5576, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.714987714987715, |
| "grad_norm": 0.7223883296775482, |
| "learning_rate": 4.234972677595629e-05, |
| "loss": 0.6975, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.7174447174447175, |
| "grad_norm": 1.7771798036626734, |
| "learning_rate": 4.230418943533698e-05, |
| "loss": 0.6565, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.7199017199017199, |
| "grad_norm": 0.6430310979475962, |
| "learning_rate": 4.225865209471767e-05, |
| "loss": 0.6675, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.7223587223587223, |
| "grad_norm": 0.3794537639280509, |
| "learning_rate": 4.2213114754098365e-05, |
| "loss": 0.7642, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.7248157248157249, |
| "grad_norm": 0.60943195656342, |
| "learning_rate": 4.2167577413479055e-05, |
| "loss": 0.7247, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.37289390653274224, |
| "learning_rate": 4.2122040072859745e-05, |
| "loss": 0.6255, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.7297297297297297, |
| "grad_norm": 0.4532436953171903, |
| "learning_rate": 4.207650273224044e-05, |
| "loss": 0.7069, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.7321867321867321, |
| "grad_norm": 0.40650887131809266, |
| "learning_rate": 4.203096539162113e-05, |
| "loss": 0.7403, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.7346437346437347, |
| "grad_norm": 0.5965618573882557, |
| "learning_rate": 4.198542805100182e-05, |
| "loss": 0.7413, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.7371007371007371, |
| "grad_norm": 0.35937646145739954, |
| "learning_rate": 4.193989071038252e-05, |
| "loss": 0.7104, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7395577395577395, |
| "grad_norm": 0.45967984584408983, |
| "learning_rate": 4.189435336976321e-05, |
| "loss": 0.8102, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.742014742014742, |
| "grad_norm": 0.4885635149330037, |
| "learning_rate": 4.18488160291439e-05, |
| "loss": 0.7302, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.7444717444717445, |
| "grad_norm": 0.3152058972635706, |
| "learning_rate": 4.1803278688524595e-05, |
| "loss": 0.6891, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.7469287469287469, |
| "grad_norm": 0.4161834589482244, |
| "learning_rate": 4.1757741347905285e-05, |
| "loss": 0.6623, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.7493857493857494, |
| "grad_norm": 0.36473148815614853, |
| "learning_rate": 4.1712204007285975e-05, |
| "loss": 0.7902, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.7518427518427518, |
| "grad_norm": 0.4147403697677368, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.7875, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.7542997542997543, |
| "grad_norm": 0.4077917564117238, |
| "learning_rate": 4.162112932604736e-05, |
| "loss": 0.7275, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.7567567567567568, |
| "grad_norm": 0.4060094467217255, |
| "learning_rate": 4.157559198542805e-05, |
| "loss": 0.7783, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7592137592137592, |
| "grad_norm": 0.4130103975738772, |
| "learning_rate": 4.153005464480875e-05, |
| "loss": 0.6847, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7616707616707616, |
| "grad_norm": 0.3681636230585068, |
| "learning_rate": 4.148451730418943e-05, |
| "loss": 0.7531, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7641277641277642, |
| "grad_norm": 0.3827065341158274, |
| "learning_rate": 4.143897996357013e-05, |
| "loss": 0.7141, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7665847665847666, |
| "grad_norm": 0.29238085362688543, |
| "learning_rate": 4.1393442622950826e-05, |
| "loss": 0.6273, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.769041769041769, |
| "grad_norm": 0.33937884647496835, |
| "learning_rate": 4.134790528233151e-05, |
| "loss": 0.6489, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7714987714987716, |
| "grad_norm": 0.3015348898927694, |
| "learning_rate": 4.1302367941712206e-05, |
| "loss": 0.5207, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.773955773955774, |
| "grad_norm": 0.35134100703007254, |
| "learning_rate": 4.12568306010929e-05, |
| "loss": 0.7576, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.7764127764127764, |
| "grad_norm": 0.31798902115911587, |
| "learning_rate": 4.1211293260473586e-05, |
| "loss": 0.6203, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7788697788697788, |
| "grad_norm": 0.35299888238401994, |
| "learning_rate": 4.116575591985428e-05, |
| "loss": 0.6875, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.7813267813267813, |
| "grad_norm": 0.3525914582079822, |
| "learning_rate": 4.112021857923498e-05, |
| "loss": 0.6804, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7837837837837838, |
| "grad_norm": 0.3006720346358963, |
| "learning_rate": 4.107468123861566e-05, |
| "loss": 0.5346, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7862407862407862, |
| "grad_norm": 1.5252533561825474, |
| "learning_rate": 4.102914389799636e-05, |
| "loss": 0.7018, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7886977886977887, |
| "grad_norm": 0.32274770353739635, |
| "learning_rate": 4.098360655737705e-05, |
| "loss": 0.614, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7911547911547911, |
| "grad_norm": 0.32985165709996966, |
| "learning_rate": 4.093806921675774e-05, |
| "loss": 0.6607, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.7936117936117936, |
| "grad_norm": 0.30025432983818734, |
| "learning_rate": 4.0892531876138436e-05, |
| "loss": 0.6357, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.7960687960687961, |
| "grad_norm": 0.3049594116455463, |
| "learning_rate": 4.0846994535519126e-05, |
| "loss": 0.5822, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7985257985257985, |
| "grad_norm": 0.3629904661955952, |
| "learning_rate": 4.080145719489982e-05, |
| "loss": 0.6978, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.800982800982801, |
| "grad_norm": 0.9634279527349047, |
| "learning_rate": 4.075591985428051e-05, |
| "loss": 0.8865, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.8034398034398035, |
| "grad_norm": 0.45193045970841783, |
| "learning_rate": 4.07103825136612e-05, |
| "loss": 0.8065, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.8058968058968059, |
| "grad_norm": 0.3177464973567778, |
| "learning_rate": 4.06648451730419e-05, |
| "loss": 0.711, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.8083538083538083, |
| "grad_norm": 0.5049266007665172, |
| "learning_rate": 4.061930783242259e-05, |
| "loss": 0.788, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.8108108108108109, |
| "grad_norm": 0.44936451115710957, |
| "learning_rate": 4.057377049180328e-05, |
| "loss": 0.6603, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8132678132678133, |
| "grad_norm": 0.40221025853337433, |
| "learning_rate": 4.0528233151183976e-05, |
| "loss": 0.6261, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.8157248157248157, |
| "grad_norm": 0.38900176002138404, |
| "learning_rate": 4.0482695810564666e-05, |
| "loss": 0.6544, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.453208732932394, |
| "learning_rate": 4.0437158469945356e-05, |
| "loss": 0.6353, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.8206388206388207, |
| "grad_norm": 0.3681796156494085, |
| "learning_rate": 4.039162112932605e-05, |
| "loss": 0.6836, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.8230958230958231, |
| "grad_norm": 0.468685040057859, |
| "learning_rate": 4.034608378870674e-05, |
| "loss": 0.7046, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8255528255528255, |
| "grad_norm": 0.43444130480919046, |
| "learning_rate": 4.030054644808743e-05, |
| "loss": 0.6659, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.828009828009828, |
| "grad_norm": 0.3619248405794401, |
| "learning_rate": 4.025500910746813e-05, |
| "loss": 0.6417, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.8304668304668305, |
| "grad_norm": 0.410561658075711, |
| "learning_rate": 4.020947176684881e-05, |
| "loss": 0.659, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.8329238329238329, |
| "grad_norm": 0.349661211154494, |
| "learning_rate": 4.016393442622951e-05, |
| "loss": 0.7009, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.8353808353808354, |
| "grad_norm": 0.45025633913904883, |
| "learning_rate": 4.0118397085610207e-05, |
| "loss": 0.7118, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8378378378378378, |
| "grad_norm": 0.3491439279038829, |
| "learning_rate": 4.007285974499089e-05, |
| "loss": 0.7326, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.8402948402948403, |
| "grad_norm": 0.37516636206626935, |
| "learning_rate": 4.0027322404371587e-05, |
| "loss": 0.7005, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.8427518427518428, |
| "grad_norm": 0.3135717435105698, |
| "learning_rate": 3.998178506375228e-05, |
| "loss": 0.6751, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.8452088452088452, |
| "grad_norm": 0.45748071875834095, |
| "learning_rate": 3.9936247723132967e-05, |
| "loss": 0.7238, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.8476658476658476, |
| "grad_norm": 0.43936046038898285, |
| "learning_rate": 3.989071038251366e-05, |
| "loss": 0.7568, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.8501228501228502, |
| "grad_norm": 0.38829296038456096, |
| "learning_rate": 3.984517304189436e-05, |
| "loss": 0.6835, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.8525798525798526, |
| "grad_norm": 0.45261007109171814, |
| "learning_rate": 3.979963570127504e-05, |
| "loss": 0.7626, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.855036855036855, |
| "grad_norm": 0.3469325577394658, |
| "learning_rate": 3.975409836065574e-05, |
| "loss": 0.7497, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.8574938574938575, |
| "grad_norm": 0.5400301615988978, |
| "learning_rate": 3.970856102003643e-05, |
| "loss": 0.8051, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.85995085995086, |
| "grad_norm": 0.4001992360407668, |
| "learning_rate": 3.966302367941712e-05, |
| "loss": 0.7536, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8624078624078624, |
| "grad_norm": 0.3724180671895729, |
| "learning_rate": 3.961748633879782e-05, |
| "loss": 0.6238, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.8648648648648649, |
| "grad_norm": 0.386974931071893, |
| "learning_rate": 3.957194899817851e-05, |
| "loss": 0.6876, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.8673218673218673, |
| "grad_norm": 0.372863116265662, |
| "learning_rate": 3.95264116575592e-05, |
| "loss": 0.5849, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8697788697788698, |
| "grad_norm": 0.33795820672046467, |
| "learning_rate": 3.9480874316939894e-05, |
| "loss": 0.5205, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.8722358722358723, |
| "grad_norm": 0.40729933902725135, |
| "learning_rate": 3.9435336976320584e-05, |
| "loss": 0.7655, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8746928746928747, |
| "grad_norm": 0.30755968744467366, |
| "learning_rate": 3.9389799635701274e-05, |
| "loss": 0.6263, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.8771498771498771, |
| "grad_norm": 0.37093708872360476, |
| "learning_rate": 3.934426229508197e-05, |
| "loss": 0.7129, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.8796068796068796, |
| "grad_norm": 0.37633511734635255, |
| "learning_rate": 3.929872495446266e-05, |
| "loss": 0.5872, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.8820638820638821, |
| "grad_norm": 0.9614590556739387, |
| "learning_rate": 3.925318761384335e-05, |
| "loss": 0.7089, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.8845208845208845, |
| "grad_norm": 0.36669325077055215, |
| "learning_rate": 3.920765027322405e-05, |
| "loss": 0.5196, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8869778869778869, |
| "grad_norm": 0.36063038368340206, |
| "learning_rate": 3.916211293260474e-05, |
| "loss": 0.7037, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8894348894348895, |
| "grad_norm": 0.3844550677877335, |
| "learning_rate": 3.9116575591985434e-05, |
| "loss": 0.6472, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.8918918918918919, |
| "grad_norm": 0.36208926990085244, |
| "learning_rate": 3.9071038251366124e-05, |
| "loss": 0.6393, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8943488943488943, |
| "grad_norm": 0.36998305778442386, |
| "learning_rate": 3.9025500910746814e-05, |
| "loss": 0.7667, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.8968058968058968, |
| "grad_norm": 0.3447294134836953, |
| "learning_rate": 3.897996357012751e-05, |
| "loss": 0.605, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8992628992628993, |
| "grad_norm": 0.36709184015795876, |
| "learning_rate": 3.89344262295082e-05, |
| "loss": 0.6642, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.9017199017199017, |
| "grad_norm": 0.3486298961479053, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.6621, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.9041769041769042, |
| "grad_norm": 0.4328843991656747, |
| "learning_rate": 3.884335154826959e-05, |
| "loss": 0.6797, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.9066339066339066, |
| "grad_norm": 0.35617103914532294, |
| "learning_rate": 3.879781420765027e-05, |
| "loss": 0.6853, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.48563000772634657, |
| "learning_rate": 3.875227686703097e-05, |
| "loss": 0.6981, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9115479115479116, |
| "grad_norm": 0.3887375137301516, |
| "learning_rate": 3.8706739526411664e-05, |
| "loss": 0.603, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.914004914004914, |
| "grad_norm": 0.439470097514328, |
| "learning_rate": 3.866120218579235e-05, |
| "loss": 0.7077, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.9164619164619164, |
| "grad_norm": 0.3403160171473462, |
| "learning_rate": 3.8615664845173044e-05, |
| "loss": 0.586, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.918918918918919, |
| "grad_norm": 0.42760829158750546, |
| "learning_rate": 3.857012750455374e-05, |
| "loss": 0.7303, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.9213759213759214, |
| "grad_norm": 0.36489243280535705, |
| "learning_rate": 3.8524590163934424e-05, |
| "loss": 0.6256, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.9238329238329238, |
| "grad_norm": 0.3808217161262314, |
| "learning_rate": 3.847905282331512e-05, |
| "loss": 0.7059, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.9262899262899262, |
| "grad_norm": 0.34013903969336157, |
| "learning_rate": 3.843351548269581e-05, |
| "loss": 0.7301, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.9287469287469288, |
| "grad_norm": 1.1463809470744701, |
| "learning_rate": 3.83879781420765e-05, |
| "loss": 0.6969, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.9312039312039312, |
| "grad_norm": 0.4235667833129601, |
| "learning_rate": 3.83424408014572e-05, |
| "loss": 0.6473, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.9336609336609336, |
| "grad_norm": 0.79876765490425, |
| "learning_rate": 3.829690346083789e-05, |
| "loss": 0.6183, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9361179361179361, |
| "grad_norm": 0.49555963725341723, |
| "learning_rate": 3.825136612021858e-05, |
| "loss": 0.8044, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.9385749385749386, |
| "grad_norm": 0.3428503165110703, |
| "learning_rate": 3.8205828779599275e-05, |
| "loss": 0.5995, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.941031941031941, |
| "grad_norm": 0.44593307884321404, |
| "learning_rate": 3.8160291438979965e-05, |
| "loss": 0.7151, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.9434889434889435, |
| "grad_norm": 0.37468176709006323, |
| "learning_rate": 3.8114754098360655e-05, |
| "loss": 0.7905, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.9459459459459459, |
| "grad_norm": 0.5722646888774676, |
| "learning_rate": 3.806921675774135e-05, |
| "loss": 0.731, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.9484029484029484, |
| "grad_norm": 1.8799684973155986, |
| "learning_rate": 3.802367941712204e-05, |
| "loss": 0.7714, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.9508599508599509, |
| "grad_norm": 0.5172547101235551, |
| "learning_rate": 3.797814207650273e-05, |
| "loss": 0.6399, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.9533169533169533, |
| "grad_norm": 0.4418711377815284, |
| "learning_rate": 3.793260473588343e-05, |
| "loss": 0.6997, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.9557739557739557, |
| "grad_norm": 0.5285652919128196, |
| "learning_rate": 3.788706739526412e-05, |
| "loss": 0.704, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.9582309582309583, |
| "grad_norm": 0.45024081362204066, |
| "learning_rate": 3.784153005464481e-05, |
| "loss": 0.7121, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.9606879606879607, |
| "grad_norm": 0.4069199989712789, |
| "learning_rate": 3.7795992714025505e-05, |
| "loss": 0.6408, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.9631449631449631, |
| "grad_norm": 0.4856083258958585, |
| "learning_rate": 3.7750455373406195e-05, |
| "loss": 0.6723, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.9656019656019657, |
| "grad_norm": 0.3584054750131388, |
| "learning_rate": 3.7704918032786885e-05, |
| "loss": 0.619, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.9680589680589681, |
| "grad_norm": 0.46503131404325265, |
| "learning_rate": 3.765938069216758e-05, |
| "loss": 0.7499, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.9705159705159705, |
| "grad_norm": 0.3568325978396338, |
| "learning_rate": 3.761384335154827e-05, |
| "loss": 0.6153, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.972972972972973, |
| "grad_norm": 0.35548746505907636, |
| "learning_rate": 3.756830601092896e-05, |
| "loss": 0.6644, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.9754299754299754, |
| "grad_norm": 3.732099146967768, |
| "learning_rate": 3.752276867030965e-05, |
| "loss": 0.7677, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.9778869778869779, |
| "grad_norm": 0.6417926585769745, |
| "learning_rate": 3.747723132969035e-05, |
| "loss": 0.713, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.9803439803439803, |
| "grad_norm": 0.3428338885926231, |
| "learning_rate": 3.7431693989071045e-05, |
| "loss": 0.7057, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.9828009828009828, |
| "grad_norm": 0.44136452162974704, |
| "learning_rate": 3.738615664845173e-05, |
| "loss": 0.5293, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9852579852579852, |
| "grad_norm": 0.4267208521085863, |
| "learning_rate": 3.7340619307832425e-05, |
| "loss": 0.6846, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.9877149877149877, |
| "grad_norm": 0.36579693412461944, |
| "learning_rate": 3.729508196721312e-05, |
| "loss": 0.7243, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.9901719901719902, |
| "grad_norm": 0.46204688211658324, |
| "learning_rate": 3.7249544626593805e-05, |
| "loss": 0.68, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.9926289926289926, |
| "grad_norm": 0.37956013971155556, |
| "learning_rate": 3.72040072859745e-05, |
| "loss": 0.7447, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.995085995085995, |
| "grad_norm": 0.3910625026439214, |
| "learning_rate": 3.71584699453552e-05, |
| "loss": 0.6197, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9975429975429976, |
| "grad_norm": 0.41783305217284267, |
| "learning_rate": 3.711293260473588e-05, |
| "loss": 0.6719, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3611623758486256, |
| "learning_rate": 3.706739526411658e-05, |
| "loss": 0.5977, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.0024570024570025, |
| "grad_norm": 0.4712316660517998, |
| "learning_rate": 3.702185792349727e-05, |
| "loss": 0.6267, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.0049140049140048, |
| "grad_norm": 0.44510865147589923, |
| "learning_rate": 3.697632058287796e-05, |
| "loss": 0.5723, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.0073710073710074, |
| "grad_norm": 0.4897737184802636, |
| "learning_rate": 3.6930783242258656e-05, |
| "loss": 0.6133, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.00982800982801, |
| "grad_norm": 0.4710019531923247, |
| "learning_rate": 3.6885245901639346e-05, |
| "loss": 0.6601, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.0122850122850122, |
| "grad_norm": 0.4127476864637772, |
| "learning_rate": 3.6839708561020036e-05, |
| "loss": 0.5831, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.0147420147420148, |
| "grad_norm": 0.3852466347026918, |
| "learning_rate": 3.679417122040073e-05, |
| "loss": 0.6171, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.0171990171990173, |
| "grad_norm": 0.35722854453354774, |
| "learning_rate": 3.674863387978142e-05, |
| "loss": 0.4941, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.0196560196560196, |
| "grad_norm": 0.3477409452059263, |
| "learning_rate": 3.670309653916211e-05, |
| "loss": 0.6014, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.0221130221130221, |
| "grad_norm": 0.38573394146966594, |
| "learning_rate": 3.665755919854281e-05, |
| "loss": 0.5435, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.0245700245700247, |
| "grad_norm": 0.3152965022867117, |
| "learning_rate": 3.66120218579235e-05, |
| "loss": 0.5363, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.027027027027027, |
| "grad_norm": 0.37855487804654653, |
| "learning_rate": 3.656648451730419e-05, |
| "loss": 0.6216, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.0294840294840295, |
| "grad_norm": 0.3915386797411922, |
| "learning_rate": 3.6520947176684886e-05, |
| "loss": 0.6468, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.031941031941032, |
| "grad_norm": 0.30903418418917916, |
| "learning_rate": 3.6475409836065576e-05, |
| "loss": 0.5918, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0343980343980343, |
| "grad_norm": 18.485814215831798, |
| "learning_rate": 3.6429872495446266e-05, |
| "loss": 0.7571, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.0368550368550369, |
| "grad_norm": 0.43418803474006623, |
| "learning_rate": 3.638433515482696e-05, |
| "loss": 0.4651, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.0393120393120394, |
| "grad_norm": 0.4296276366274725, |
| "learning_rate": 3.633879781420765e-05, |
| "loss": 0.569, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.0417690417690417, |
| "grad_norm": 0.3252040498050024, |
| "learning_rate": 3.629326047358834e-05, |
| "loss": 0.5682, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.0442260442260443, |
| "grad_norm": 0.5555580641102786, |
| "learning_rate": 3.624772313296903e-05, |
| "loss": 0.5685, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.0466830466830466, |
| "grad_norm": 0.30439876353558465, |
| "learning_rate": 3.620218579234973e-05, |
| "loss": 0.5509, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.049140049140049, |
| "grad_norm": 0.5257024496923978, |
| "learning_rate": 3.615664845173042e-05, |
| "loss": 0.6175, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.0515970515970516, |
| "grad_norm": 0.3924880233071523, |
| "learning_rate": 3.611111111111111e-05, |
| "loss": 0.5463, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.054054054054054, |
| "grad_norm": 0.3912483665248679, |
| "learning_rate": 3.6065573770491806e-05, |
| "loss": 0.5172, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.0565110565110565, |
| "grad_norm": 0.35522183054743234, |
| "learning_rate": 3.6020036429872496e-05, |
| "loss": 0.548, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.058968058968059, |
| "grad_norm": 0.41696382741795146, |
| "learning_rate": 3.5974499089253186e-05, |
| "loss": 0.6179, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.0614250614250613, |
| "grad_norm": 0.34899632677634346, |
| "learning_rate": 3.592896174863388e-05, |
| "loss": 0.5848, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.0638820638820639, |
| "grad_norm": 0.29764763902529734, |
| "learning_rate": 3.588342440801457e-05, |
| "loss": 0.5065, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.0663390663390664, |
| "grad_norm": 0.33789418991474374, |
| "learning_rate": 3.583788706739526e-05, |
| "loss": 0.557, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.0687960687960687, |
| "grad_norm": 0.3817072319681774, |
| "learning_rate": 3.579234972677596e-05, |
| "loss": 0.573, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.0712530712530712, |
| "grad_norm": 0.27883801849612727, |
| "learning_rate": 3.574681238615665e-05, |
| "loss": 0.4778, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.0737100737100738, |
| "grad_norm": 0.3923116193005877, |
| "learning_rate": 3.570127504553734e-05, |
| "loss": 0.5919, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.076167076167076, |
| "grad_norm": 0.29914831145059495, |
| "learning_rate": 3.5655737704918037e-05, |
| "loss": 0.494, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.0786240786240786, |
| "grad_norm": 0.31767336538989416, |
| "learning_rate": 3.5610200364298727e-05, |
| "loss": 0.6199, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.0810810810810811, |
| "grad_norm": 0.46913096826211653, |
| "learning_rate": 3.5564663023679417e-05, |
| "loss": 0.6955, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0835380835380835, |
| "grad_norm": 0.3675875371319456, |
| "learning_rate": 3.551912568306011e-05, |
| "loss": 0.538, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.085995085995086, |
| "grad_norm": 0.3330032586684102, |
| "learning_rate": 3.54735883424408e-05, |
| "loss": 0.5659, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.0884520884520885, |
| "grad_norm": 0.39684518158418425, |
| "learning_rate": 3.542805100182149e-05, |
| "loss": 0.4902, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 0.3370350178101319, |
| "learning_rate": 3.538251366120219e-05, |
| "loss": 0.6277, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.0933660933660934, |
| "grad_norm": 0.3120031541968653, |
| "learning_rate": 3.533697632058288e-05, |
| "loss": 0.5705, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.095823095823096, |
| "grad_norm": 0.35804818545314876, |
| "learning_rate": 3.529143897996357e-05, |
| "loss": 0.5268, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.0982800982800982, |
| "grad_norm": 0.36340510531282566, |
| "learning_rate": 3.524590163934427e-05, |
| "loss": 0.5328, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.1007371007371007, |
| "grad_norm": 0.3098836614900157, |
| "learning_rate": 3.520036429872496e-05, |
| "loss": 0.5773, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.1031941031941033, |
| "grad_norm": 0.3135507590572425, |
| "learning_rate": 3.515482695810565e-05, |
| "loss": 0.6188, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.1056511056511056, |
| "grad_norm": 0.31164002022216103, |
| "learning_rate": 3.5109289617486344e-05, |
| "loss": 0.4606, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1081081081081081, |
| "grad_norm": 0.3623278294612082, |
| "learning_rate": 3.5063752276867034e-05, |
| "loss": 0.6439, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.1105651105651106, |
| "grad_norm": 0.296521047913555, |
| "learning_rate": 3.5018214936247724e-05, |
| "loss": 0.4695, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.113022113022113, |
| "grad_norm": 0.3483084595473505, |
| "learning_rate": 3.4972677595628414e-05, |
| "loss": 0.5615, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.1154791154791155, |
| "grad_norm": 0.2860532290662123, |
| "learning_rate": 3.492714025500911e-05, |
| "loss": 0.5426, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.117936117936118, |
| "grad_norm": 0.35904055869223206, |
| "learning_rate": 3.48816029143898e-05, |
| "loss": 0.6372, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.1203931203931203, |
| "grad_norm": 0.3035047945160019, |
| "learning_rate": 3.483606557377049e-05, |
| "loss": 0.5084, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.1228501228501229, |
| "grad_norm": 0.34056825729709134, |
| "learning_rate": 3.479052823315119e-05, |
| "loss": 0.5445, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.1253071253071254, |
| "grad_norm": 0.34548063719869543, |
| "learning_rate": 3.474499089253188e-05, |
| "loss": 0.5538, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.1277641277641277, |
| "grad_norm": 0.34863453010817147, |
| "learning_rate": 3.469945355191257e-05, |
| "loss": 0.6136, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.1302211302211302, |
| "grad_norm": 0.36452640020436167, |
| "learning_rate": 3.4653916211293264e-05, |
| "loss": 0.6339, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.1326781326781328, |
| "grad_norm": 0.33505641304640355, |
| "learning_rate": 3.4608378870673954e-05, |
| "loss": 0.5226, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.135135135135135, |
| "grad_norm": 0.5832869535948028, |
| "learning_rate": 3.4562841530054644e-05, |
| "loss": 0.6528, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.1375921375921376, |
| "grad_norm": 0.29618924105134536, |
| "learning_rate": 3.451730418943534e-05, |
| "loss": 0.6025, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.1400491400491402, |
| "grad_norm": 0.34874600771453107, |
| "learning_rate": 3.447176684881603e-05, |
| "loss": 0.5565, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.1425061425061425, |
| "grad_norm": 0.335951908594719, |
| "learning_rate": 3.442622950819672e-05, |
| "loss": 0.547, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.144963144963145, |
| "grad_norm": 0.2998993608726187, |
| "learning_rate": 3.438069216757742e-05, |
| "loss": 0.628, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.1474201474201475, |
| "grad_norm": 0.29644218347091184, |
| "learning_rate": 3.433515482695811e-05, |
| "loss": 0.52, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.1498771498771498, |
| "grad_norm": 0.30863434769848686, |
| "learning_rate": 3.42896174863388e-05, |
| "loss": 0.5253, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.1523341523341524, |
| "grad_norm": 0.28232514356630184, |
| "learning_rate": 3.4244080145719494e-05, |
| "loss": 0.5264, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.154791154791155, |
| "grad_norm": 0.3486029632281899, |
| "learning_rate": 3.4198542805100184e-05, |
| "loss": 0.5337, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.1572481572481572, |
| "grad_norm": 0.2749244379146869, |
| "learning_rate": 3.4153005464480874e-05, |
| "loss": 0.4396, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.1597051597051597, |
| "grad_norm": 0.35073763579329614, |
| "learning_rate": 3.410746812386157e-05, |
| "loss": 0.5767, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.1621621621621623, |
| "grad_norm": 0.3148751339175056, |
| "learning_rate": 3.406193078324226e-05, |
| "loss": 0.554, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.1646191646191646, |
| "grad_norm": 0.31661478461777187, |
| "learning_rate": 3.401639344262295e-05, |
| "loss": 0.6312, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.1670761670761671, |
| "grad_norm": 0.32266558978084553, |
| "learning_rate": 3.397085610200365e-05, |
| "loss": 0.5549, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.1695331695331694, |
| "grad_norm": 0.31175094191334074, |
| "learning_rate": 3.392531876138434e-05, |
| "loss": 0.6031, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.171990171990172, |
| "grad_norm": 0.2860842816292032, |
| "learning_rate": 3.387978142076503e-05, |
| "loss": 0.5033, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.1744471744471745, |
| "grad_norm": 0.2863055488397975, |
| "learning_rate": 3.3834244080145725e-05, |
| "loss": 0.5826, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.1769041769041768, |
| "grad_norm": 0.2814884571892455, |
| "learning_rate": 3.3788706739526415e-05, |
| "loss": 0.6098, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.1793611793611793, |
| "grad_norm": 0.3343616425168066, |
| "learning_rate": 3.3743169398907105e-05, |
| "loss": 0.6576, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.1818181818181819, |
| "grad_norm": 4.471655389420487, |
| "learning_rate": 3.36976320582878e-05, |
| "loss": 0.6501, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.1842751842751842, |
| "grad_norm": 0.3531434211683213, |
| "learning_rate": 3.365209471766849e-05, |
| "loss": 0.5736, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.1867321867321867, |
| "grad_norm": 0.30933282032145204, |
| "learning_rate": 3.360655737704918e-05, |
| "loss": 0.5773, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.1891891891891893, |
| "grad_norm": 0.34749618430933105, |
| "learning_rate": 3.356102003642987e-05, |
| "loss": 0.5344, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.1916461916461916, |
| "grad_norm": 0.2890952500864336, |
| "learning_rate": 3.351548269581057e-05, |
| "loss": 0.5979, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.194103194103194, |
| "grad_norm": 0.34484921930011087, |
| "learning_rate": 3.346994535519126e-05, |
| "loss": 0.5318, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.1965601965601966, |
| "grad_norm": 0.30984886065289263, |
| "learning_rate": 3.342440801457195e-05, |
| "loss": 0.5531, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.199017199017199, |
| "grad_norm": 0.32020672210102435, |
| "learning_rate": 3.3378870673952645e-05, |
| "loss": 0.559, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.2014742014742015, |
| "grad_norm": 0.3715980189408075, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.5414, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.203931203931204, |
| "grad_norm": 0.2867099183140612, |
| "learning_rate": 3.3287795992714025e-05, |
| "loss": 0.5016, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2063882063882063, |
| "grad_norm": 0.32647658657343387, |
| "learning_rate": 3.324225865209472e-05, |
| "loss": 0.5668, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.2088452088452089, |
| "grad_norm": 0.31285287963181513, |
| "learning_rate": 3.319672131147541e-05, |
| "loss": 0.5808, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.2113022113022114, |
| "grad_norm": 0.31154263564497325, |
| "learning_rate": 3.31511839708561e-05, |
| "loss": 0.577, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.2137592137592137, |
| "grad_norm": 0.3148888983694767, |
| "learning_rate": 3.31056466302368e-05, |
| "loss": 0.5713, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.2162162162162162, |
| "grad_norm": 0.33196948700396134, |
| "learning_rate": 3.306010928961749e-05, |
| "loss": 0.6411, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.2186732186732188, |
| "grad_norm": 0.3089241773992785, |
| "learning_rate": 3.301457194899818e-05, |
| "loss": 0.6084, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.221130221130221, |
| "grad_norm": 0.35264205238860336, |
| "learning_rate": 3.2969034608378875e-05, |
| "loss": 0.6082, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.2235872235872236, |
| "grad_norm": 0.3592504157610499, |
| "learning_rate": 3.2923497267759565e-05, |
| "loss": 0.5017, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.2260442260442261, |
| "grad_norm": 0.3294945441126368, |
| "learning_rate": 3.2877959927140255e-05, |
| "loss": 0.5671, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.2285012285012284, |
| "grad_norm": 0.31804938107229946, |
| "learning_rate": 3.283242258652095e-05, |
| "loss": 0.5706, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.230958230958231, |
| "grad_norm": 0.2933642876504185, |
| "learning_rate": 3.2786885245901635e-05, |
| "loss": 0.5426, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.2334152334152333, |
| "grad_norm": 0.3626340514862369, |
| "learning_rate": 3.274134790528233e-05, |
| "loss": 0.6105, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.2358722358722358, |
| "grad_norm": 0.26476010226570695, |
| "learning_rate": 3.269581056466303e-05, |
| "loss": 0.4702, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.2383292383292384, |
| "grad_norm": 0.3661036271637661, |
| "learning_rate": 3.265027322404371e-05, |
| "loss": 0.542, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.2407862407862407, |
| "grad_norm": 0.3421274093595941, |
| "learning_rate": 3.260473588342441e-05, |
| "loss": 0.5814, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.2432432432432432, |
| "grad_norm": 0.27100222834936427, |
| "learning_rate": 3.2559198542805106e-05, |
| "loss": 0.5478, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.2457002457002457, |
| "grad_norm": 0.314120753601731, |
| "learning_rate": 3.251366120218579e-05, |
| "loss": 0.5531, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.248157248157248, |
| "grad_norm": 0.9759156709730757, |
| "learning_rate": 3.2468123861566486e-05, |
| "loss": 0.6531, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.2506142506142506, |
| "grad_norm": 0.30944457432745653, |
| "learning_rate": 3.242258652094718e-05, |
| "loss": 0.5513, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.253071253071253, |
| "grad_norm": 0.3010475271711826, |
| "learning_rate": 3.237704918032787e-05, |
| "loss": 0.5095, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.2555282555282554, |
| "grad_norm": 2.091229835428742, |
| "learning_rate": 3.233151183970856e-05, |
| "loss": 0.6917, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.257985257985258, |
| "grad_norm": 0.4263480510636171, |
| "learning_rate": 3.228597449908925e-05, |
| "loss": 0.5107, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.2604422604422605, |
| "grad_norm": 0.2662240671218934, |
| "learning_rate": 3.224043715846995e-05, |
| "loss": 0.5171, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.2628992628992628, |
| "grad_norm": 0.38958730612737474, |
| "learning_rate": 3.219489981785064e-05, |
| "loss": 0.5062, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.2653562653562653, |
| "grad_norm": 0.30418756289720655, |
| "learning_rate": 3.214936247723133e-05, |
| "loss": 0.5835, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.2678132678132679, |
| "grad_norm": 0.344436665503126, |
| "learning_rate": 3.2103825136612026e-05, |
| "loss": 0.5983, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.2702702702702702, |
| "grad_norm": 1.918725499774248, |
| "learning_rate": 3.2058287795992716e-05, |
| "loss": 0.6293, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.2727272727272727, |
| "grad_norm": 0.3623753413503759, |
| "learning_rate": 3.2012750455373406e-05, |
| "loss": 0.631, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.2751842751842752, |
| "grad_norm": 0.4577653918156244, |
| "learning_rate": 3.19672131147541e-05, |
| "loss": 0.5013, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.2776412776412776, |
| "grad_norm": 0.31126359791794433, |
| "learning_rate": 3.192167577413479e-05, |
| "loss": 0.502, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.28009828009828, |
| "grad_norm": 0.30127450296424224, |
| "learning_rate": 3.187613843351548e-05, |
| "loss": 0.4445, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.2825552825552826, |
| "grad_norm": 0.32777802361056146, |
| "learning_rate": 3.183060109289618e-05, |
| "loss": 0.586, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.285012285012285, |
| "grad_norm": 0.3151574260038467, |
| "learning_rate": 3.178506375227687e-05, |
| "loss": 0.5101, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.2874692874692875, |
| "grad_norm": 0.2958405193987708, |
| "learning_rate": 3.173952641165756e-05, |
| "loss": 0.5115, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.28992628992629, |
| "grad_norm": 0.30692569753814974, |
| "learning_rate": 3.1693989071038256e-05, |
| "loss": 0.5255, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.2923832923832923, |
| "grad_norm": 0.31369349705521754, |
| "learning_rate": 3.1648451730418946e-05, |
| "loss": 0.5708, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.2948402948402948, |
| "grad_norm": 0.2818423915221156, |
| "learning_rate": 3.1602914389799636e-05, |
| "loss": 0.4837, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.2972972972972974, |
| "grad_norm": 0.3134826582265648, |
| "learning_rate": 3.155737704918033e-05, |
| "loss": 0.5751, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.2997542997542997, |
| "grad_norm": 0.2816827747685129, |
| "learning_rate": 3.1511839708561016e-05, |
| "loss": 0.4596, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.3022113022113022, |
| "grad_norm": 0.34084186090096374, |
| "learning_rate": 3.146630236794171e-05, |
| "loss": 0.5656, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3046683046683047, |
| "grad_norm": 0.32476535285413916, |
| "learning_rate": 3.142076502732241e-05, |
| "loss": 0.5661, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.307125307125307, |
| "grad_norm": 0.3188888254272654, |
| "learning_rate": 3.137522768670309e-05, |
| "loss": 0.5269, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.3095823095823096, |
| "grad_norm": 0.3366341919026146, |
| "learning_rate": 3.132969034608379e-05, |
| "loss": 0.4923, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.3120393120393121, |
| "grad_norm": 0.3271992624122109, |
| "learning_rate": 3.1284153005464487e-05, |
| "loss": 0.5977, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.3144963144963144, |
| "grad_norm": 0.417085896710461, |
| "learning_rate": 3.123861566484517e-05, |
| "loss": 0.614, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.316953316953317, |
| "grad_norm": 0.31946680031176, |
| "learning_rate": 3.1193078324225867e-05, |
| "loss": 0.5364, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.3194103194103195, |
| "grad_norm": 0.34172653254662405, |
| "learning_rate": 3.114754098360656e-05, |
| "loss": 0.5552, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.3218673218673218, |
| "grad_norm": 0.334367874832506, |
| "learning_rate": 3.1102003642987247e-05, |
| "loss": 0.5969, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.3243243243243243, |
| "grad_norm": 0.3986000529335846, |
| "learning_rate": 3.105646630236794e-05, |
| "loss": 0.499, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.3267813267813269, |
| "grad_norm": 0.30475256236149134, |
| "learning_rate": 3.101092896174863e-05, |
| "loss": 0.5106, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.3292383292383292, |
| "grad_norm": 0.3316364648381355, |
| "learning_rate": 3.096539162112932e-05, |
| "loss": 0.5334, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.3316953316953317, |
| "grad_norm": 0.409941745047023, |
| "learning_rate": 3.091985428051002e-05, |
| "loss": 0.6345, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.3341523341523343, |
| "grad_norm": 0.3401524473507645, |
| "learning_rate": 3.087431693989071e-05, |
| "loss": 0.6766, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.3366093366093366, |
| "grad_norm": 0.3950305885673271, |
| "learning_rate": 3.082877959927141e-05, |
| "loss": 0.5296, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.339066339066339, |
| "grad_norm": 0.306723619335892, |
| "learning_rate": 3.07832422586521e-05, |
| "loss": 0.6201, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.3415233415233416, |
| "grad_norm": 0.3877898069868618, |
| "learning_rate": 3.073770491803279e-05, |
| "loss": 0.5411, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.343980343980344, |
| "grad_norm": 0.31598719997076186, |
| "learning_rate": 3.0692167577413484e-05, |
| "loss": 0.5121, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.3464373464373465, |
| "grad_norm": 0.3712193743058151, |
| "learning_rate": 3.0646630236794174e-05, |
| "loss": 0.5978, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.348894348894349, |
| "grad_norm": 0.33020226938329394, |
| "learning_rate": 3.0601092896174864e-05, |
| "loss": 0.4806, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.3513513513513513, |
| "grad_norm": 0.3248140873697447, |
| "learning_rate": 3.055555555555556e-05, |
| "loss": 0.5433, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.3538083538083538, |
| "grad_norm": 0.3230001924138346, |
| "learning_rate": 3.0510018214936247e-05, |
| "loss": 0.5853, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.3562653562653564, |
| "grad_norm": 0.35792498410700313, |
| "learning_rate": 3.046448087431694e-05, |
| "loss": 0.4961, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.3587223587223587, |
| "grad_norm": 0.37595474090197006, |
| "learning_rate": 3.0418943533697637e-05, |
| "loss": 0.5736, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.3611793611793612, |
| "grad_norm": 4.505487497550051, |
| "learning_rate": 3.0373406193078324e-05, |
| "loss": 0.7102, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.4374726309839091, |
| "learning_rate": 3.0327868852459017e-05, |
| "loss": 0.5518, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.366093366093366, |
| "grad_norm": 0.3410372388042177, |
| "learning_rate": 3.028233151183971e-05, |
| "loss": 0.5756, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.3685503685503686, |
| "grad_norm": 0.4059522167570831, |
| "learning_rate": 3.02367941712204e-05, |
| "loss": 0.5434, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.3710073710073711, |
| "grad_norm": 1.27646389360509, |
| "learning_rate": 3.0191256830601094e-05, |
| "loss": 0.4415, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.3734643734643734, |
| "grad_norm": 0.39439443949738967, |
| "learning_rate": 3.0145719489981787e-05, |
| "loss": 0.5545, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.375921375921376, |
| "grad_norm": 0.3677529411655735, |
| "learning_rate": 3.0100182149362477e-05, |
| "loss": 0.5867, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.3783783783783785, |
| "grad_norm": 0.3938192601050454, |
| "learning_rate": 3.005464480874317e-05, |
| "loss": 0.6695, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.3808353808353808, |
| "grad_norm": 0.4188986867298627, |
| "learning_rate": 3.0009107468123864e-05, |
| "loss": 0.6397, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.3832923832923834, |
| "grad_norm": 0.36496617388518987, |
| "learning_rate": 2.9963570127504554e-05, |
| "loss": 0.642, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.3857493857493859, |
| "grad_norm": 0.36732841184379483, |
| "learning_rate": 2.9918032786885248e-05, |
| "loss": 0.6248, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.3882063882063882, |
| "grad_norm": 0.3287638014013039, |
| "learning_rate": 2.987249544626594e-05, |
| "loss": 0.5528, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.3906633906633907, |
| "grad_norm": 0.3290826647548518, |
| "learning_rate": 2.982695810564663e-05, |
| "loss": 0.5995, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.393120393120393, |
| "grad_norm": 7.1958757083746585, |
| "learning_rate": 2.9781420765027324e-05, |
| "loss": 1.1998, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.3955773955773956, |
| "grad_norm": 2.242377112943564, |
| "learning_rate": 2.9735883424408018e-05, |
| "loss": 0.6146, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.398034398034398, |
| "grad_norm": 0.46246249293412817, |
| "learning_rate": 2.9690346083788704e-05, |
| "loss": 0.6113, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.4004914004914004, |
| "grad_norm": 0.32979804742212093, |
| "learning_rate": 2.96448087431694e-05, |
| "loss": 0.4534, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.402948402948403, |
| "grad_norm": 0.362610751520732, |
| "learning_rate": 2.9599271402550094e-05, |
| "loss": 0.6311, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.4054054054054055, |
| "grad_norm": 0.3834824335793335, |
| "learning_rate": 2.955373406193078e-05, |
| "loss": 0.5671, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.4078624078624078, |
| "grad_norm": 0.3162740542302538, |
| "learning_rate": 2.9508196721311478e-05, |
| "loss": 0.4862, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.4103194103194103, |
| "grad_norm": 0.6767191055749541, |
| "learning_rate": 2.946265938069217e-05, |
| "loss": 0.574, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.4127764127764126, |
| "grad_norm": 0.2842423993917733, |
| "learning_rate": 2.9417122040072858e-05, |
| "loss": 0.4788, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.4152334152334152, |
| "grad_norm": 0.4398113214411677, |
| "learning_rate": 2.937158469945355e-05, |
| "loss": 0.5164, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.4176904176904177, |
| "grad_norm": 0.33122596395932824, |
| "learning_rate": 2.9326047358834248e-05, |
| "loss": 0.5859, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.42014742014742, |
| "grad_norm": 0.6087999515003836, |
| "learning_rate": 2.9280510018214935e-05, |
| "loss": 0.5303, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.4226044226044225, |
| "grad_norm": 0.32726522540965197, |
| "learning_rate": 2.9234972677595628e-05, |
| "loss": 0.5724, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.425061425061425, |
| "grad_norm": 0.4106956970848515, |
| "learning_rate": 2.918943533697632e-05, |
| "loss": 0.5341, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.4275184275184274, |
| "grad_norm": 0.2852492527312244, |
| "learning_rate": 2.9143897996357018e-05, |
| "loss": 0.5196, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.42997542997543, |
| "grad_norm": 0.285739926248913, |
| "learning_rate": 2.9098360655737705e-05, |
| "loss": 0.5442, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.4324324324324325, |
| "grad_norm": 0.36932760352978566, |
| "learning_rate": 2.9052823315118398e-05, |
| "loss": 0.6142, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.4348894348894348, |
| "grad_norm": 0.33676368381537514, |
| "learning_rate": 2.9007285974499095e-05, |
| "loss": 0.6479, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.4373464373464373, |
| "grad_norm": 2.762615052994114, |
| "learning_rate": 2.896174863387978e-05, |
| "loss": 0.5999, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.4398034398034398, |
| "grad_norm": 0.33823584373786114, |
| "learning_rate": 2.8916211293260475e-05, |
| "loss": 0.5738, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.4422604422604421, |
| "grad_norm": 0.35461746489514906, |
| "learning_rate": 2.8870673952641168e-05, |
| "loss": 0.5155, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.4447174447174447, |
| "grad_norm": 0.30665641874707567, |
| "learning_rate": 2.8825136612021858e-05, |
| "loss": 0.5726, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.4471744471744472, |
| "grad_norm": 0.3218144044024646, |
| "learning_rate": 2.877959927140255e-05, |
| "loss": 0.5198, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.4496314496314495, |
| "grad_norm": 0.3282045050488162, |
| "learning_rate": 2.8734061930783245e-05, |
| "loss": 0.5923, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.452088452088452, |
| "grad_norm": 0.2882401227029393, |
| "learning_rate": 2.8688524590163935e-05, |
| "loss": 0.5092, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.3127088054502666, |
| "learning_rate": 2.864298724954463e-05, |
| "loss": 0.526, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.457002457002457, |
| "grad_norm": 2.4172038138032828, |
| "learning_rate": 2.8597449908925322e-05, |
| "loss": 0.7051, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.4594594594594594, |
| "grad_norm": 0.5233594884346805, |
| "learning_rate": 2.8551912568306012e-05, |
| "loss": 0.4573, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.461916461916462, |
| "grad_norm": 21.995326582795617, |
| "learning_rate": 2.8506375227686705e-05, |
| "loss": 0.6321, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.4643734643734643, |
| "grad_norm": 0.5078686288976298, |
| "learning_rate": 2.84608378870674e-05, |
| "loss": 0.5941, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.4668304668304668, |
| "grad_norm": 0.3609671816475955, |
| "learning_rate": 2.841530054644809e-05, |
| "loss": 0.5707, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.4692874692874693, |
| "grad_norm": 0.35532192813895724, |
| "learning_rate": 2.8369763205828782e-05, |
| "loss": 0.4821, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.4717444717444716, |
| "grad_norm": 0.3671957212508993, |
| "learning_rate": 2.8324225865209475e-05, |
| "loss": 0.4661, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.4742014742014742, |
| "grad_norm": 0.30117085000522925, |
| "learning_rate": 2.8278688524590162e-05, |
| "loss": 0.4893, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4766584766584767, |
| "grad_norm": 0.33268501284939167, |
| "learning_rate": 2.823315118397086e-05, |
| "loss": 0.5176, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.479115479115479, |
| "grad_norm": 0.33604265458925436, |
| "learning_rate": 2.8187613843351552e-05, |
| "loss": 0.5182, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.4815724815724816, |
| "grad_norm": 0.3360889711504089, |
| "learning_rate": 2.814207650273224e-05, |
| "loss": 0.5907, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.484029484029484, |
| "grad_norm": 0.328673675164007, |
| "learning_rate": 2.8096539162112932e-05, |
| "loss": 0.55, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.4864864864864864, |
| "grad_norm": 0.31706495449515043, |
| "learning_rate": 2.805100182149363e-05, |
| "loss": 0.6074, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.488943488943489, |
| "grad_norm": 0.2920463104930728, |
| "learning_rate": 2.8005464480874316e-05, |
| "loss": 0.5588, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.4914004914004915, |
| "grad_norm": 0.2861747694993797, |
| "learning_rate": 2.795992714025501e-05, |
| "loss": 0.5292, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.4938574938574938, |
| "grad_norm": 0.3285475879079594, |
| "learning_rate": 2.7914389799635702e-05, |
| "loss": 0.6151, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.4963144963144963, |
| "grad_norm": 0.34034599761877166, |
| "learning_rate": 2.7868852459016392e-05, |
| "loss": 0.697, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.4987714987714988, |
| "grad_norm": 0.2804576195171845, |
| "learning_rate": 2.7823315118397086e-05, |
| "loss": 0.5973, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.5012285012285012, |
| "grad_norm": 0.33784205792768834, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 0.474, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.5036855036855037, |
| "grad_norm": 0.31097306577779116, |
| "learning_rate": 2.773224043715847e-05, |
| "loss": 0.4641, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.5061425061425062, |
| "grad_norm": 0.2844030812033379, |
| "learning_rate": 2.7686703096539162e-05, |
| "loss": 0.5817, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.5085995085995085, |
| "grad_norm": 0.38968984446732813, |
| "learning_rate": 2.7641165755919856e-05, |
| "loss": 0.5086, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.511056511056511, |
| "grad_norm": 0.31040975194056414, |
| "learning_rate": 2.7595628415300546e-05, |
| "loss": 0.4693, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.5135135135135136, |
| "grad_norm": 0.33741077330159325, |
| "learning_rate": 2.755009107468124e-05, |
| "loss": 0.589, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.515970515970516, |
| "grad_norm": 0.31661478005044347, |
| "learning_rate": 2.7504553734061933e-05, |
| "loss": 0.6049, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.5184275184275184, |
| "grad_norm": 0.3335488405098975, |
| "learning_rate": 2.7459016393442626e-05, |
| "loss": 0.6017, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.520884520884521, |
| "grad_norm": 0.31027476247695246, |
| "learning_rate": 2.7413479052823316e-05, |
| "loss": 0.5838, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.5233415233415233, |
| "grad_norm": 0.2777646630361566, |
| "learning_rate": 2.736794171220401e-05, |
| "loss": 0.5584, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.5257985257985258, |
| "grad_norm": 0.2913563020518906, |
| "learning_rate": 2.7322404371584703e-05, |
| "loss": 0.4762, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.5282555282555284, |
| "grad_norm": 0.350171904455104, |
| "learning_rate": 2.7276867030965393e-05, |
| "loss": 0.6177, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.5307125307125307, |
| "grad_norm": 0.30111848450509915, |
| "learning_rate": 2.7231329690346086e-05, |
| "loss": 0.5101, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.5331695331695332, |
| "grad_norm": 0.2867090459147248, |
| "learning_rate": 2.718579234972678e-05, |
| "loss": 0.5821, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.5356265356265357, |
| "grad_norm": 0.3638114975712457, |
| "learning_rate": 2.714025500910747e-05, |
| "loss": 0.6383, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.538083538083538, |
| "grad_norm": 0.2867067374702508, |
| "learning_rate": 2.7094717668488163e-05, |
| "loss": 0.5452, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.5405405405405406, |
| "grad_norm": 0.28737600107101, |
| "learning_rate": 2.7049180327868856e-05, |
| "loss": 0.5418, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.542997542997543, |
| "grad_norm": 0.26498806931754665, |
| "learning_rate": 2.7003642987249543e-05, |
| "loss": 0.5039, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.5454545454545454, |
| "grad_norm": 0.3011713624967737, |
| "learning_rate": 2.695810564663024e-05, |
| "loss": 0.53, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.547911547911548, |
| "grad_norm": 0.2691530635935858, |
| "learning_rate": 2.6912568306010933e-05, |
| "loss": 0.4859, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.5503685503685505, |
| "grad_norm": 0.2558261406391803, |
| "learning_rate": 2.686703096539162e-05, |
| "loss": 0.477, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.5528255528255528, |
| "grad_norm": 0.25620551411091325, |
| "learning_rate": 2.6821493624772313e-05, |
| "loss": 0.4604, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.5552825552825553, |
| "grad_norm": 0.2763114315788065, |
| "learning_rate": 2.677595628415301e-05, |
| "loss": 0.5792, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.5577395577395579, |
| "grad_norm": 0.28083914382759145, |
| "learning_rate": 2.6730418943533697e-05, |
| "loss": 0.5356, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.5601965601965602, |
| "grad_norm": 0.2920422213997252, |
| "learning_rate": 2.668488160291439e-05, |
| "loss": 0.5251, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.5626535626535627, |
| "grad_norm": 0.27262649363198743, |
| "learning_rate": 2.6639344262295087e-05, |
| "loss": 0.5402, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.5651105651105652, |
| "grad_norm": 0.2910361795236517, |
| "learning_rate": 2.6593806921675773e-05, |
| "loss": 0.5268, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.5675675675675675, |
| "grad_norm": 0.26654925026502435, |
| "learning_rate": 2.6548269581056467e-05, |
| "loss": 0.5328, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.57002457002457, |
| "grad_norm": 0.2973118734804211, |
| "learning_rate": 2.650273224043716e-05, |
| "loss": 0.5803, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.5724815724815726, |
| "grad_norm": 0.2971915311670167, |
| "learning_rate": 2.645719489981785e-05, |
| "loss": 0.5564, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.574938574938575, |
| "grad_norm": 0.2969196104157182, |
| "learning_rate": 2.6411657559198543e-05, |
| "loss": 0.5677, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.5773955773955772, |
| "grad_norm": 0.273834529796921, |
| "learning_rate": 2.6366120218579237e-05, |
| "loss": 0.4952, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.57985257985258, |
| "grad_norm": 0.2743084458426201, |
| "learning_rate": 2.6320582877959927e-05, |
| "loss": 0.5289, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.5823095823095823, |
| "grad_norm": 0.27010983622024526, |
| "learning_rate": 2.627504553734062e-05, |
| "loss": 0.5218, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.5847665847665846, |
| "grad_norm": 0.33171449854749435, |
| "learning_rate": 2.6229508196721314e-05, |
| "loss": 0.5671, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.5872235872235874, |
| "grad_norm": 0.30504425469503404, |
| "learning_rate": 2.6183970856102004e-05, |
| "loss": 0.5877, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.5896805896805897, |
| "grad_norm": 0.278481170782536, |
| "learning_rate": 2.6138433515482697e-05, |
| "loss": 0.6005, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.592137592137592, |
| "grad_norm": 0.29696142460818625, |
| "learning_rate": 2.609289617486339e-05, |
| "loss": 0.5305, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.5945945945945947, |
| "grad_norm": 0.33350125578968326, |
| "learning_rate": 2.604735883424408e-05, |
| "loss": 0.5594, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.597051597051597, |
| "grad_norm": 0.2895010135396355, |
| "learning_rate": 2.6001821493624774e-05, |
| "loss": 0.5711, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.5995085995085994, |
| "grad_norm": 0.3218565904038471, |
| "learning_rate": 2.5956284153005467e-05, |
| "loss": 0.5958, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.6019656019656021, |
| "grad_norm": 0.333555069936009, |
| "learning_rate": 2.5910746812386154e-05, |
| "loss": 0.5207, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.6044226044226044, |
| "grad_norm": 0.24599267706082115, |
| "learning_rate": 2.586520947176685e-05, |
| "loss": 0.5302, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.6068796068796067, |
| "grad_norm": 0.30331457642746157, |
| "learning_rate": 2.5819672131147544e-05, |
| "loss": 0.5827, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.6093366093366095, |
| "grad_norm": 0.30665436759125925, |
| "learning_rate": 2.5774134790528237e-05, |
| "loss": 0.5576, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.6117936117936118, |
| "grad_norm": 0.34092613831052127, |
| "learning_rate": 2.5728597449908924e-05, |
| "loss": 0.5054, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.6142506142506141, |
| "grad_norm": 0.28798725261513564, |
| "learning_rate": 2.568306010928962e-05, |
| "loss": 0.5379, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.6167076167076169, |
| "grad_norm": 0.3089417920348662, |
| "learning_rate": 2.5637522768670314e-05, |
| "loss": 0.5599, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.6191646191646192, |
| "grad_norm": 0.30820249889026247, |
| "learning_rate": 2.5591985428051e-05, |
| "loss": 0.4972, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.6216216216216215, |
| "grad_norm": 0.27710041943109465, |
| "learning_rate": 2.5546448087431697e-05, |
| "loss": 0.5631, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.6240786240786242, |
| "grad_norm": 0.30117632251651183, |
| "learning_rate": 2.550091074681239e-05, |
| "loss": 0.6309, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.6265356265356266, |
| "grad_norm": 0.3148919358125854, |
| "learning_rate": 2.5455373406193077e-05, |
| "loss": 0.6265, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.6289926289926289, |
| "grad_norm": 0.2912887580005306, |
| "learning_rate": 2.540983606557377e-05, |
| "loss": 0.5901, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.6314496314496314, |
| "grad_norm": 0.24051170702858976, |
| "learning_rate": 2.5364298724954468e-05, |
| "loss": 0.4714, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.633906633906634, |
| "grad_norm": 0.3126823231242451, |
| "learning_rate": 2.5318761384335154e-05, |
| "loss": 0.5539, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.6363636363636362, |
| "grad_norm": 0.32930595380736144, |
| "learning_rate": 2.5273224043715848e-05, |
| "loss": 0.5895, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.6388206388206388, |
| "grad_norm": 0.2932745657575283, |
| "learning_rate": 2.522768670309654e-05, |
| "loss": 0.5388, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.6412776412776413, |
| "grad_norm": 0.2787597154650259, |
| "learning_rate": 2.518214936247723e-05, |
| "loss": 0.5179, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.6437346437346436, |
| "grad_norm": 0.2937841728580581, |
| "learning_rate": 2.5136612021857924e-05, |
| "loss": 0.5083, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.6461916461916462, |
| "grad_norm": 0.32130959163428363, |
| "learning_rate": 2.5091074681238618e-05, |
| "loss": 0.5693, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.6486486486486487, |
| "grad_norm": 0.2991851088280298, |
| "learning_rate": 2.5045537340619308e-05, |
| "loss": 0.4979, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.651105651105651, |
| "grad_norm": 0.2776565257867249, |
| "learning_rate": 2.5e-05, |
| "loss": 0.5318, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.6535626535626535, |
| "grad_norm": 0.36657164420346156, |
| "learning_rate": 2.495446265938069e-05, |
| "loss": 0.5043, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.656019656019656, |
| "grad_norm": 0.3215204376240886, |
| "learning_rate": 2.4908925318761388e-05, |
| "loss": 0.5854, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.6584766584766584, |
| "grad_norm": 0.2950829336587517, |
| "learning_rate": 2.4863387978142078e-05, |
| "loss": 0.6343, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.660933660933661, |
| "grad_norm": 0.3113697420564982, |
| "learning_rate": 2.4817850637522768e-05, |
| "loss": 0.5147, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.6633906633906634, |
| "grad_norm": 0.31302535665995557, |
| "learning_rate": 2.477231329690346e-05, |
| "loss": 0.49, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.6658476658476657, |
| "grad_norm": 0.34035592954838445, |
| "learning_rate": 2.4726775956284155e-05, |
| "loss": 0.5816, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.6683046683046683, |
| "grad_norm": 0.3015369953798126, |
| "learning_rate": 2.4681238615664845e-05, |
| "loss": 0.5775, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.6707616707616708, |
| "grad_norm": 0.31787296914393265, |
| "learning_rate": 2.4635701275045538e-05, |
| "loss": 0.5725, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.6732186732186731, |
| "grad_norm": 0.3379584502023648, |
| "learning_rate": 2.459016393442623e-05, |
| "loss": 0.5642, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.6756756756756757, |
| "grad_norm": 0.27840602624421484, |
| "learning_rate": 2.4544626593806925e-05, |
| "loss": 0.615, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.6781326781326782, |
| "grad_norm": 0.28097823151212464, |
| "learning_rate": 2.4499089253187615e-05, |
| "loss": 0.547, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.6805896805896805, |
| "grad_norm": 0.2616920934490369, |
| "learning_rate": 2.4453551912568305e-05, |
| "loss": 0.4777, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.683046683046683, |
| "grad_norm": 0.2787698127004213, |
| "learning_rate": 2.4408014571949e-05, |
| "loss": 0.5141, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.6855036855036856, |
| "grad_norm": 0.27981914048115714, |
| "learning_rate": 2.436247723132969e-05, |
| "loss": 0.5716, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.6879606879606879, |
| "grad_norm": 4.088562590331179, |
| "learning_rate": 2.431693989071038e-05, |
| "loss": 0.6609, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.6904176904176904, |
| "grad_norm": 0.2956395589638685, |
| "learning_rate": 2.427140255009108e-05, |
| "loss": 0.5403, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.692874692874693, |
| "grad_norm": 0.29438181310455147, |
| "learning_rate": 2.422586520947177e-05, |
| "loss": 0.5685, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.6953316953316953, |
| "grad_norm": 0.2538193516711629, |
| "learning_rate": 2.418032786885246e-05, |
| "loss": 0.4785, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.6977886977886978, |
| "grad_norm": 0.28796731794186514, |
| "learning_rate": 2.4134790528233152e-05, |
| "loss": 0.552, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.7002457002457003, |
| "grad_norm": 0.27868424952411996, |
| "learning_rate": 2.4089253187613845e-05, |
| "loss": 0.5076, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.7027027027027026, |
| "grad_norm": 0.25990529519697514, |
| "learning_rate": 2.4043715846994535e-05, |
| "loss": 0.4533, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.7051597051597052, |
| "grad_norm": 0.2873379074909231, |
| "learning_rate": 2.399817850637523e-05, |
| "loss": 0.5913, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.7076167076167077, |
| "grad_norm": 0.29088350023709175, |
| "learning_rate": 2.3952641165755922e-05, |
| "loss": 0.5959, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.71007371007371, |
| "grad_norm": 0.2573364666726, |
| "learning_rate": 2.3907103825136612e-05, |
| "loss": 0.5062, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.7125307125307125, |
| "grad_norm": 0.29392565603255266, |
| "learning_rate": 2.3861566484517305e-05, |
| "loss": 0.5687, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.714987714987715, |
| "grad_norm": 0.2891193206942597, |
| "learning_rate": 2.3816029143898e-05, |
| "loss": 0.6049, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.7174447174447174, |
| "grad_norm": 0.2840198076767787, |
| "learning_rate": 2.377049180327869e-05, |
| "loss": 0.507, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.71990171990172, |
| "grad_norm": 0.29919110648065483, |
| "learning_rate": 2.3724954462659382e-05, |
| "loss": 0.5467, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7223587223587224, |
| "grad_norm": 0.3058328148398321, |
| "learning_rate": 2.3679417122040072e-05, |
| "loss": 0.5812, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.7248157248157248, |
| "grad_norm": 0.30016823104589047, |
| "learning_rate": 2.363387978142077e-05, |
| "loss": 0.5755, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.7272727272727273, |
| "grad_norm": 0.3499121631564174, |
| "learning_rate": 2.358834244080146e-05, |
| "loss": 0.6278, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.7297297297297298, |
| "grad_norm": 0.7047977484338855, |
| "learning_rate": 2.354280510018215e-05, |
| "loss": 0.4715, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.7321867321867321, |
| "grad_norm": 0.3627891702679587, |
| "learning_rate": 2.3497267759562842e-05, |
| "loss": 0.5641, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.7346437346437347, |
| "grad_norm": 0.3269601035291933, |
| "learning_rate": 2.3451730418943536e-05, |
| "loss": 0.5644, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.7371007371007372, |
| "grad_norm": 0.3229301454671492, |
| "learning_rate": 2.3406193078324226e-05, |
| "loss": 0.5623, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.7395577395577395, |
| "grad_norm": 0.2621133990792928, |
| "learning_rate": 2.336065573770492e-05, |
| "loss": 0.4495, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.742014742014742, |
| "grad_norm": 0.3105707711781621, |
| "learning_rate": 2.3315118397085612e-05, |
| "loss": 0.6677, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.7444717444717446, |
| "grad_norm": 0.31405789889240876, |
| "learning_rate": 2.3269581056466302e-05, |
| "loss": 0.6109, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.746928746928747, |
| "grad_norm": 0.27862780228274875, |
| "learning_rate": 2.3224043715846996e-05, |
| "loss": 0.4707, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.7493857493857494, |
| "grad_norm": 0.2951135319765008, |
| "learning_rate": 2.317850637522769e-05, |
| "loss": 0.5152, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.751842751842752, |
| "grad_norm": 0.23868591982715384, |
| "learning_rate": 2.313296903460838e-05, |
| "loss": 0.4805, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.7542997542997543, |
| "grad_norm": 0.2809998521081784, |
| "learning_rate": 2.3087431693989073e-05, |
| "loss": 0.6071, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.7567567567567568, |
| "grad_norm": 0.3062020782313415, |
| "learning_rate": 2.3041894353369763e-05, |
| "loss": 0.5542, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.7592137592137593, |
| "grad_norm": 0.25160094772031133, |
| "learning_rate": 2.2996357012750456e-05, |
| "loss": 0.5262, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.7616707616707616, |
| "grad_norm": 0.2648834976152306, |
| "learning_rate": 2.295081967213115e-05, |
| "loss": 0.4888, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.7641277641277642, |
| "grad_norm": 0.2692198246802372, |
| "learning_rate": 2.290528233151184e-05, |
| "loss": 0.5413, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.7665847665847667, |
| "grad_norm": 0.302608144563453, |
| "learning_rate": 2.2859744990892533e-05, |
| "loss": 0.5485, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.769041769041769, |
| "grad_norm": 0.3024049619053678, |
| "learning_rate": 2.2814207650273226e-05, |
| "loss": 0.5612, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.7714987714987716, |
| "grad_norm": 0.2686755624508314, |
| "learning_rate": 2.2768670309653916e-05, |
| "loss": 0.5532, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.773955773955774, |
| "grad_norm": 0.31355914454819966, |
| "learning_rate": 2.272313296903461e-05, |
| "loss": 0.545, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.7764127764127764, |
| "grad_norm": 0.2679523528547601, |
| "learning_rate": 2.2677595628415303e-05, |
| "loss": 0.5404, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.7788697788697787, |
| "grad_norm": 0.34380417593496515, |
| "learning_rate": 2.2632058287795993e-05, |
| "loss": 0.6352, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.7813267813267815, |
| "grad_norm": 0.29712724540471824, |
| "learning_rate": 2.2586520947176686e-05, |
| "loss": 0.5769, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.7837837837837838, |
| "grad_norm": 0.29704261087468237, |
| "learning_rate": 2.254098360655738e-05, |
| "loss": 0.5088, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.786240786240786, |
| "grad_norm": 0.2823609172880149, |
| "learning_rate": 2.249544626593807e-05, |
| "loss": 0.5047, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.7886977886977888, |
| "grad_norm": 0.5140965758355988, |
| "learning_rate": 2.2449908925318763e-05, |
| "loss": 0.4842, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.7911547911547911, |
| "grad_norm": 1.7892960579058013, |
| "learning_rate": 2.2404371584699453e-05, |
| "loss": 0.5787, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.7936117936117935, |
| "grad_norm": 0.31920880587878125, |
| "learning_rate": 2.2358834244080147e-05, |
| "loss": 0.5224, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.7960687960687962, |
| "grad_norm": 0.2842312803943501, |
| "learning_rate": 2.231329690346084e-05, |
| "loss": 0.5373, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.7985257985257985, |
| "grad_norm": 0.3016887670720209, |
| "learning_rate": 2.226775956284153e-05, |
| "loss": 0.542, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.8009828009828008, |
| "grad_norm": 0.3107473379471071, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 0.555, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.8034398034398036, |
| "grad_norm": 0.2955503666728652, |
| "learning_rate": 2.2176684881602917e-05, |
| "loss": 0.5613, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.805896805896806, |
| "grad_norm": 0.30524826919317594, |
| "learning_rate": 2.2131147540983607e-05, |
| "loss": 0.5297, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.8083538083538082, |
| "grad_norm": 0.27729189858459274, |
| "learning_rate": 2.20856102003643e-05, |
| "loss": 0.6061, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.810810810810811, |
| "grad_norm": 0.2893287066030788, |
| "learning_rate": 2.2040072859744993e-05, |
| "loss": 0.4896, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.8132678132678133, |
| "grad_norm": 0.2606901787727459, |
| "learning_rate": 2.1994535519125683e-05, |
| "loss": 0.5773, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.8157248157248156, |
| "grad_norm": 0.25527945735655144, |
| "learning_rate": 2.1948998178506377e-05, |
| "loss": 0.561, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.2669859467920838, |
| "learning_rate": 2.190346083788707e-05, |
| "loss": 0.5482, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.8206388206388207, |
| "grad_norm": 0.2693248782403161, |
| "learning_rate": 2.185792349726776e-05, |
| "loss": 0.5919, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.823095823095823, |
| "grad_norm": 0.2554414448707284, |
| "learning_rate": 2.1812386156648454e-05, |
| "loss": 0.5184, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.8255528255528255, |
| "grad_norm": 0.2945019560024116, |
| "learning_rate": 2.1766848816029144e-05, |
| "loss": 0.5633, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.828009828009828, |
| "grad_norm": 0.30074361548984935, |
| "learning_rate": 2.1721311475409837e-05, |
| "loss": 0.6057, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.8304668304668303, |
| "grad_norm": 0.31888019740726103, |
| "learning_rate": 2.167577413479053e-05, |
| "loss": 0.6217, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.8329238329238329, |
| "grad_norm": 0.6953990362734012, |
| "learning_rate": 2.163023679417122e-05, |
| "loss": 0.5032, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.8353808353808354, |
| "grad_norm": 0.2638969682962359, |
| "learning_rate": 2.1584699453551914e-05, |
| "loss": 0.5814, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.8378378378378377, |
| "grad_norm": 0.2673502531002082, |
| "learning_rate": 2.1539162112932607e-05, |
| "loss": 0.5353, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.8402948402948403, |
| "grad_norm": 0.3052402733466028, |
| "learning_rate": 2.1493624772313297e-05, |
| "loss": 0.6075, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.8427518427518428, |
| "grad_norm": 0.2590866717496352, |
| "learning_rate": 2.144808743169399e-05, |
| "loss": 0.548, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.845208845208845, |
| "grad_norm": 0.2800576089744623, |
| "learning_rate": 2.1402550091074684e-05, |
| "loss": 0.5195, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.8476658476658476, |
| "grad_norm": 0.3032887173826026, |
| "learning_rate": 2.1357012750455374e-05, |
| "loss": 0.482, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.8501228501228502, |
| "grad_norm": 0.2780282487888591, |
| "learning_rate": 2.1311475409836064e-05, |
| "loss": 0.5939, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.8525798525798525, |
| "grad_norm": 0.28853976340491677, |
| "learning_rate": 2.126593806921676e-05, |
| "loss": 0.5743, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.855036855036855, |
| "grad_norm": 0.3327786395512487, |
| "learning_rate": 2.122040072859745e-05, |
| "loss": 0.4186, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.8574938574938575, |
| "grad_norm": 0.2819311307178514, |
| "learning_rate": 2.1174863387978144e-05, |
| "loss": 0.5976, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.8599508599508598, |
| "grad_norm": 0.27219577380560167, |
| "learning_rate": 2.1129326047358834e-05, |
| "loss": 0.481, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.8624078624078624, |
| "grad_norm": 0.2888069852070096, |
| "learning_rate": 2.1083788706739527e-05, |
| "loss": 0.5633, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.864864864864865, |
| "grad_norm": 0.283281648136038, |
| "learning_rate": 2.103825136612022e-05, |
| "loss": 0.4768, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.8673218673218672, |
| "grad_norm": 0.2506456753016858, |
| "learning_rate": 2.099271402550091e-05, |
| "loss": 0.4806, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.8697788697788698, |
| "grad_norm": 0.259014545169757, |
| "learning_rate": 2.0947176684881604e-05, |
| "loss": 0.5095, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.8722358722358723, |
| "grad_norm": 0.28690420297792946, |
| "learning_rate": 2.0901639344262298e-05, |
| "loss": 0.5739, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.8746928746928746, |
| "grad_norm": 0.3067416454066446, |
| "learning_rate": 2.0856102003642988e-05, |
| "loss": 0.5557, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.8771498771498771, |
| "grad_norm": 0.2557249566617281, |
| "learning_rate": 2.081056466302368e-05, |
| "loss": 0.5136, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.8796068796068797, |
| "grad_norm": 0.27522324877940546, |
| "learning_rate": 2.0765027322404374e-05, |
| "loss": 0.582, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.882063882063882, |
| "grad_norm": 0.27502624886911736, |
| "learning_rate": 2.0719489981785064e-05, |
| "loss": 0.5115, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.8845208845208845, |
| "grad_norm": 0.28524062704028064, |
| "learning_rate": 2.0673952641165754e-05, |
| "loss": 0.604, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.886977886977887, |
| "grad_norm": 0.2896306842805975, |
| "learning_rate": 2.062841530054645e-05, |
| "loss": 0.5909, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.8894348894348894, |
| "grad_norm": 0.37017739402403504, |
| "learning_rate": 2.058287795992714e-05, |
| "loss": 0.6113, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.8918918918918919, |
| "grad_norm": 0.25659739921327007, |
| "learning_rate": 2.053734061930783e-05, |
| "loss": 0.533, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.8943488943488944, |
| "grad_norm": 0.2631965030301113, |
| "learning_rate": 2.0491803278688525e-05, |
| "loss": 0.4888, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.8968058968058967, |
| "grad_norm": 0.26335776548343703, |
| "learning_rate": 2.0446265938069218e-05, |
| "loss": 0.5527, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.8992628992628993, |
| "grad_norm": 0.2754894594590262, |
| "learning_rate": 2.040072859744991e-05, |
| "loss": 0.5974, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.9017199017199018, |
| "grad_norm": 0.25145939998899, |
| "learning_rate": 2.03551912568306e-05, |
| "loss": 0.4814, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.904176904176904, |
| "grad_norm": 0.24080950527586228, |
| "learning_rate": 2.0309653916211295e-05, |
| "loss": 0.485, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.9066339066339066, |
| "grad_norm": 0.26980458286482356, |
| "learning_rate": 2.0264116575591988e-05, |
| "loss": 0.5519, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.9090909090909092, |
| "grad_norm": 0.26842981889888856, |
| "learning_rate": 2.0218579234972678e-05, |
| "loss": 0.5098, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.9115479115479115, |
| "grad_norm": 0.2524704758947662, |
| "learning_rate": 2.017304189435337e-05, |
| "loss": 0.4943, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.914004914004914, |
| "grad_norm": 0.2856037686590571, |
| "learning_rate": 2.0127504553734065e-05, |
| "loss": 0.5953, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.9164619164619165, |
| "grad_norm": 0.30220124538301824, |
| "learning_rate": 2.0081967213114755e-05, |
| "loss": 0.6225, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.9189189189189189, |
| "grad_norm": 0.2680433180647516, |
| "learning_rate": 2.0036429872495445e-05, |
| "loss": 0.5303, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.9213759213759214, |
| "grad_norm": 0.2658779045053125, |
| "learning_rate": 1.999089253187614e-05, |
| "loss": 0.5525, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.923832923832924, |
| "grad_norm": 0.2962893495403355, |
| "learning_rate": 1.994535519125683e-05, |
| "loss": 0.5522, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.9262899262899262, |
| "grad_norm": 0.5328833977576326, |
| "learning_rate": 1.989981785063752e-05, |
| "loss": 0.5646, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.9287469287469288, |
| "grad_norm": 0.3653824185491529, |
| "learning_rate": 1.9854280510018215e-05, |
| "loss": 0.6461, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.9312039312039313, |
| "grad_norm": 0.342083225179309, |
| "learning_rate": 1.980874316939891e-05, |
| "loss": 0.5784, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.9336609336609336, |
| "grad_norm": 0.2879236786453721, |
| "learning_rate": 1.97632058287796e-05, |
| "loss": 0.4979, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.9361179361179361, |
| "grad_norm": 0.2837537460473016, |
| "learning_rate": 1.9717668488160292e-05, |
| "loss": 0.5029, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.9385749385749387, |
| "grad_norm": 0.32014780260432985, |
| "learning_rate": 1.9672131147540985e-05, |
| "loss": 0.5573, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.941031941031941, |
| "grad_norm": 0.32817327775242966, |
| "learning_rate": 1.9626593806921675e-05, |
| "loss": 0.4987, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.9434889434889435, |
| "grad_norm": 0.28163717402922606, |
| "learning_rate": 1.958105646630237e-05, |
| "loss": 0.4783, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.945945945945946, |
| "grad_norm": 0.2861392501523952, |
| "learning_rate": 1.9535519125683062e-05, |
| "loss": 0.4826, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.9484029484029484, |
| "grad_norm": 0.3363642864939868, |
| "learning_rate": 1.9489981785063755e-05, |
| "loss": 0.5295, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.950859950859951, |
| "grad_norm": 0.297653344647604, |
| "learning_rate": 1.9444444444444445e-05, |
| "loss": 0.5454, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.9533169533169534, |
| "grad_norm": 0.3270453521765983, |
| "learning_rate": 1.9398907103825135e-05, |
| "loss": 0.6322, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.9557739557739557, |
| "grad_norm": 0.26046016384211484, |
| "learning_rate": 1.9353369763205832e-05, |
| "loss": 0.5203, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.9582309582309583, |
| "grad_norm": 0.2770362852807057, |
| "learning_rate": 1.9307832422586522e-05, |
| "loss": 0.5273, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.9606879606879608, |
| "grad_norm": 0.29331896952461095, |
| "learning_rate": 1.9262295081967212e-05, |
| "loss": 0.6334, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.9631449631449631, |
| "grad_norm": 0.8983263494767803, |
| "learning_rate": 1.9216757741347906e-05, |
| "loss": 0.5353, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.9656019656019657, |
| "grad_norm": 0.354421880818103, |
| "learning_rate": 1.91712204007286e-05, |
| "loss": 0.5813, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.9680589680589682, |
| "grad_norm": 0.2927169517127379, |
| "learning_rate": 1.912568306010929e-05, |
| "loss": 0.579, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.9705159705159705, |
| "grad_norm": 0.32721857789898107, |
| "learning_rate": 1.9080145719489982e-05, |
| "loss": 0.5296, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.972972972972973, |
| "grad_norm": 0.3808551126968657, |
| "learning_rate": 1.9034608378870676e-05, |
| "loss": 0.568, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.9754299754299756, |
| "grad_norm": 0.29090526952275514, |
| "learning_rate": 1.8989071038251366e-05, |
| "loss": 0.5479, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.9778869778869779, |
| "grad_norm": 0.31614970674893583, |
| "learning_rate": 1.894353369763206e-05, |
| "loss": 0.5712, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.9803439803439802, |
| "grad_norm": 0.32886598817833446, |
| "learning_rate": 1.8897996357012752e-05, |
| "loss": 0.5644, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.982800982800983, |
| "grad_norm": 0.29937607015091, |
| "learning_rate": 1.8852459016393442e-05, |
| "loss": 0.5381, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.9852579852579852, |
| "grad_norm": 0.30106855434356256, |
| "learning_rate": 1.8806921675774136e-05, |
| "loss": 0.5981, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.9877149877149876, |
| "grad_norm": 0.2800181256297378, |
| "learning_rate": 1.8761384335154826e-05, |
| "loss": 0.551, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.9901719901719903, |
| "grad_norm": 0.28586036377645657, |
| "learning_rate": 1.8715846994535523e-05, |
| "loss": 0.5275, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.9926289926289926, |
| "grad_norm": 0.31385447188308085, |
| "learning_rate": 1.8670309653916213e-05, |
| "loss": 0.5601, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.995085995085995, |
| "grad_norm": 0.28851307782462143, |
| "learning_rate": 1.8624772313296903e-05, |
| "loss": 0.5781, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.9975429975429977, |
| "grad_norm": 0.34464670180519497, |
| "learning_rate": 1.85792349726776e-05, |
| "loss": 0.6451, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.29338564297775654, |
| "learning_rate": 1.853369763205829e-05, |
| "loss": 0.5088, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.0024570024570023, |
| "grad_norm": 0.41071893724186426, |
| "learning_rate": 1.848816029143898e-05, |
| "loss": 0.4005, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.004914004914005, |
| "grad_norm": 0.35729203255082986, |
| "learning_rate": 1.8442622950819673e-05, |
| "loss": 0.5375, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.0073710073710074, |
| "grad_norm": 0.35116777825965795, |
| "learning_rate": 1.8397085610200366e-05, |
| "loss": 0.3852, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.0098280098280097, |
| "grad_norm": 0.4283969986464076, |
| "learning_rate": 1.8351548269581056e-05, |
| "loss": 0.3566, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.0122850122850124, |
| "grad_norm": 0.35665717517061507, |
| "learning_rate": 1.830601092896175e-05, |
| "loss": 0.4491, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.0147420147420148, |
| "grad_norm": 0.29607963350631866, |
| "learning_rate": 1.8260473588342443e-05, |
| "loss": 0.4156, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.017199017199017, |
| "grad_norm": 0.40471159128724016, |
| "learning_rate": 1.8214936247723133e-05, |
| "loss": 0.4252, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.01965601965602, |
| "grad_norm": 0.4023037095439346, |
| "learning_rate": 1.8169398907103826e-05, |
| "loss": 0.4303, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.022113022113022, |
| "grad_norm": 0.31491067990723465, |
| "learning_rate": 1.8123861566484516e-05, |
| "loss": 0.4405, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.0245700245700244, |
| "grad_norm": 0.32098076190832764, |
| "learning_rate": 1.807832422586521e-05, |
| "loss": 0.4037, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.027027027027027, |
| "grad_norm": 0.4117176838641387, |
| "learning_rate": 1.8032786885245903e-05, |
| "loss": 0.4702, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.0294840294840295, |
| "grad_norm": 0.36001699667740716, |
| "learning_rate": 1.7987249544626593e-05, |
| "loss": 0.4989, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.031941031941032, |
| "grad_norm": 0.3396068846506985, |
| "learning_rate": 1.7941712204007287e-05, |
| "loss": 0.4954, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.0343980343980346, |
| "grad_norm": 0.32243687243046554, |
| "learning_rate": 1.789617486338798e-05, |
| "loss": 0.4664, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.036855036855037, |
| "grad_norm": 0.27969879077206583, |
| "learning_rate": 1.785063752276867e-05, |
| "loss": 0.4429, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.039312039312039, |
| "grad_norm": 0.31574022998246704, |
| "learning_rate": 1.7805100182149363e-05, |
| "loss": 0.4584, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.041769041769042, |
| "grad_norm": 0.2688789472016761, |
| "learning_rate": 1.7759562841530057e-05, |
| "loss": 0.3876, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.0442260442260443, |
| "grad_norm": 0.2784054655160525, |
| "learning_rate": 1.7714025500910747e-05, |
| "loss": 0.3927, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.0466830466830466, |
| "grad_norm": 0.2673023986899889, |
| "learning_rate": 1.766848816029144e-05, |
| "loss": 0.386, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.0491400491400493, |
| "grad_norm": 0.2800382150316435, |
| "learning_rate": 1.7622950819672133e-05, |
| "loss": 0.4102, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.0515970515970516, |
| "grad_norm": 0.2893871141752102, |
| "learning_rate": 1.7577413479052823e-05, |
| "loss": 0.4367, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.054054054054054, |
| "grad_norm": 0.32085451551691285, |
| "learning_rate": 1.7531876138433517e-05, |
| "loss": 0.438, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.0565110565110567, |
| "grad_norm": 0.26054641681204144, |
| "learning_rate": 1.7486338797814207e-05, |
| "loss": 0.4182, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.058968058968059, |
| "grad_norm": 0.2876218424667253, |
| "learning_rate": 1.74408014571949e-05, |
| "loss": 0.4671, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.0614250614250613, |
| "grad_norm": 0.32477671982156153, |
| "learning_rate": 1.7395264116575594e-05, |
| "loss": 0.5294, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.063882063882064, |
| "grad_norm": 0.25297866530499685, |
| "learning_rate": 1.7349726775956284e-05, |
| "loss": 0.3794, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.0663390663390664, |
| "grad_norm": 0.3220090607744593, |
| "learning_rate": 1.7304189435336977e-05, |
| "loss": 0.4592, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.0687960687960687, |
| "grad_norm": 0.27031704077453683, |
| "learning_rate": 1.725865209471767e-05, |
| "loss": 0.3944, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.0712530712530715, |
| "grad_norm": 0.27581141711381324, |
| "learning_rate": 1.721311475409836e-05, |
| "loss": 0.4197, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.0737100737100738, |
| "grad_norm": 0.272567864438195, |
| "learning_rate": 1.7167577413479054e-05, |
| "loss": 0.4428, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.076167076167076, |
| "grad_norm": 0.2968432656835, |
| "learning_rate": 1.7122040072859747e-05, |
| "loss": 0.4456, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.078624078624079, |
| "grad_norm": 0.25441437304353254, |
| "learning_rate": 1.7076502732240437e-05, |
| "loss": 0.4155, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.081081081081081, |
| "grad_norm": 0.5659771360772702, |
| "learning_rate": 1.703096539162113e-05, |
| "loss": 0.4397, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.0835380835380835, |
| "grad_norm": 0.2472720529621334, |
| "learning_rate": 1.6985428051001824e-05, |
| "loss": 0.3765, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.085995085995086, |
| "grad_norm": 0.2859826191779256, |
| "learning_rate": 1.6939890710382514e-05, |
| "loss": 0.4637, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.0884520884520885, |
| "grad_norm": 0.2785861022601944, |
| "learning_rate": 1.6894353369763207e-05, |
| "loss": 0.4135, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.090909090909091, |
| "grad_norm": 0.25864220657304277, |
| "learning_rate": 1.68488160291439e-05, |
| "loss": 0.4325, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.093366093366093, |
| "grad_norm": 0.31600684812535784, |
| "learning_rate": 1.680327868852459e-05, |
| "loss": 0.4794, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.095823095823096, |
| "grad_norm": 0.22407053363373733, |
| "learning_rate": 1.6757741347905284e-05, |
| "loss": 0.3725, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.098280098280098, |
| "grad_norm": 0.2784163632616066, |
| "learning_rate": 1.6712204007285974e-05, |
| "loss": 0.4596, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.100737100737101, |
| "grad_norm": 0.2615510772868201, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.4238, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.1031941031941033, |
| "grad_norm": 0.23026759434611474, |
| "learning_rate": 1.662112932604736e-05, |
| "loss": 0.3978, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.1056511056511056, |
| "grad_norm": 0.2676230826969792, |
| "learning_rate": 1.657559198542805e-05, |
| "loss": 0.3808, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.108108108108108, |
| "grad_norm": 0.28732333343272015, |
| "learning_rate": 1.6530054644808744e-05, |
| "loss": 0.422, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.1105651105651106, |
| "grad_norm": 0.2275060472400037, |
| "learning_rate": 1.6484517304189438e-05, |
| "loss": 0.3937, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.113022113022113, |
| "grad_norm": 0.26578354958753214, |
| "learning_rate": 1.6438979963570128e-05, |
| "loss": 0.4314, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.1154791154791153, |
| "grad_norm": 0.2738579827950538, |
| "learning_rate": 1.6393442622950818e-05, |
| "loss": 0.4774, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.117936117936118, |
| "grad_norm": 0.26017321347892347, |
| "learning_rate": 1.6347905282331514e-05, |
| "loss": 0.4136, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.1203931203931203, |
| "grad_norm": 0.25094754715043494, |
| "learning_rate": 1.6302367941712204e-05, |
| "loss": 0.3995, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.1228501228501226, |
| "grad_norm": 0.24234839713703257, |
| "learning_rate": 1.6256830601092894e-05, |
| "loss": 0.4119, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.1253071253071254, |
| "grad_norm": 0.25163386424602613, |
| "learning_rate": 1.621129326047359e-05, |
| "loss": 0.3784, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.1277641277641277, |
| "grad_norm": 0.27936129219089983, |
| "learning_rate": 1.616575591985428e-05, |
| "loss": 0.4346, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.13022113022113, |
| "grad_norm": 0.25895393324808647, |
| "learning_rate": 1.6120218579234975e-05, |
| "loss": 0.4425, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.1326781326781328, |
| "grad_norm": 0.23027249451614298, |
| "learning_rate": 1.6074681238615665e-05, |
| "loss": 0.3955, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.135135135135135, |
| "grad_norm": 0.24009714066156393, |
| "learning_rate": 1.6029143897996358e-05, |
| "loss": 0.4008, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.1375921375921374, |
| "grad_norm": 0.2608536899762057, |
| "learning_rate": 1.598360655737705e-05, |
| "loss": 0.4489, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.14004914004914, |
| "grad_norm": 0.27104838457856334, |
| "learning_rate": 1.593806921675774e-05, |
| "loss": 0.4567, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.1425061425061425, |
| "grad_norm": 0.23710136887205738, |
| "learning_rate": 1.5892531876138435e-05, |
| "loss": 0.4177, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.1449631449631448, |
| "grad_norm": 0.2614832850724873, |
| "learning_rate": 1.5846994535519128e-05, |
| "loss": 0.4159, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.1474201474201475, |
| "grad_norm": 0.2493244212239117, |
| "learning_rate": 1.5801457194899818e-05, |
| "loss": 0.398, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.14987714987715, |
| "grad_norm": 0.2336464998631902, |
| "learning_rate": 1.5755919854280508e-05, |
| "loss": 0.4269, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.152334152334152, |
| "grad_norm": 0.2500529632050007, |
| "learning_rate": 1.5710382513661205e-05, |
| "loss": 0.4597, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.154791154791155, |
| "grad_norm": 0.237091380401223, |
| "learning_rate": 1.5664845173041895e-05, |
| "loss": 0.3458, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.157248157248157, |
| "grad_norm": 0.2654558367364787, |
| "learning_rate": 1.5619307832422585e-05, |
| "loss": 0.4016, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.1597051597051595, |
| "grad_norm": 0.2378495764493538, |
| "learning_rate": 1.557377049180328e-05, |
| "loss": 0.3668, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.1621621621621623, |
| "grad_norm": 0.24037789383707953, |
| "learning_rate": 1.552823315118397e-05, |
| "loss": 0.4122, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.1646191646191646, |
| "grad_norm": 0.2774310296361704, |
| "learning_rate": 1.548269581056466e-05, |
| "loss": 0.4947, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.167076167076167, |
| "grad_norm": 0.24152670773336085, |
| "learning_rate": 1.5437158469945355e-05, |
| "loss": 0.3944, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.1695331695331697, |
| "grad_norm": 0.2598802823682941, |
| "learning_rate": 1.539162112932605e-05, |
| "loss": 0.452, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.171990171990172, |
| "grad_norm": 4.841124303991015, |
| "learning_rate": 1.5346083788706742e-05, |
| "loss": 0.4445, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.1744471744471743, |
| "grad_norm": 0.9349040267922862, |
| "learning_rate": 1.5300546448087432e-05, |
| "loss": 0.4991, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.176904176904177, |
| "grad_norm": 0.27687057595557907, |
| "learning_rate": 1.5255009107468124e-05, |
| "loss": 0.4169, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.1793611793611793, |
| "grad_norm": 0.29678484834818836, |
| "learning_rate": 1.5209471766848819e-05, |
| "loss": 0.4114, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.1818181818181817, |
| "grad_norm": 0.2530513739903439, |
| "learning_rate": 1.5163934426229509e-05, |
| "loss": 0.4305, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.1842751842751844, |
| "grad_norm": 0.2882228696525778, |
| "learning_rate": 1.51183970856102e-05, |
| "loss": 0.498, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.1867321867321867, |
| "grad_norm": 0.2888696434121559, |
| "learning_rate": 1.5072859744990894e-05, |
| "loss": 0.4209, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.189189189189189, |
| "grad_norm": 0.26095945742134186, |
| "learning_rate": 1.5027322404371585e-05, |
| "loss": 0.4539, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.191646191646192, |
| "grad_norm": 0.25838164536911407, |
| "learning_rate": 1.4981785063752277e-05, |
| "loss": 0.4293, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.194103194103194, |
| "grad_norm": 0.28809740633168196, |
| "learning_rate": 1.493624772313297e-05, |
| "loss": 0.4173, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.1965601965601964, |
| "grad_norm": 0.2800744500661992, |
| "learning_rate": 1.4890710382513662e-05, |
| "loss": 0.4365, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.199017199017199, |
| "grad_norm": 0.2502242180844428, |
| "learning_rate": 1.4845173041894352e-05, |
| "loss": 0.4286, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.2014742014742015, |
| "grad_norm": 0.27065496051514343, |
| "learning_rate": 1.4799635701275047e-05, |
| "loss": 0.4674, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.203931203931204, |
| "grad_norm": 0.2683214829245222, |
| "learning_rate": 1.4754098360655739e-05, |
| "loss": 0.4248, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.2063882063882065, |
| "grad_norm": 0.2504102146869169, |
| "learning_rate": 1.4708561020036429e-05, |
| "loss": 0.4289, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.208845208845209, |
| "grad_norm": 0.2577283264465404, |
| "learning_rate": 1.4663023679417124e-05, |
| "loss": 0.4386, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.211302211302211, |
| "grad_norm": 0.26573181605705387, |
| "learning_rate": 1.4617486338797814e-05, |
| "loss": 0.4475, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.213759213759214, |
| "grad_norm": 0.24365626007806804, |
| "learning_rate": 1.4571948998178509e-05, |
| "loss": 0.4331, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.2162162162162162, |
| "grad_norm": 0.28062941136254116, |
| "learning_rate": 1.4526411657559199e-05, |
| "loss": 0.4328, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.2186732186732185, |
| "grad_norm": 2.972065788354103, |
| "learning_rate": 1.448087431693989e-05, |
| "loss": 0.4546, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.2211302211302213, |
| "grad_norm": 0.2534606607226359, |
| "learning_rate": 1.4435336976320584e-05, |
| "loss": 0.3829, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.2235872235872236, |
| "grad_norm": 0.274929708261248, |
| "learning_rate": 1.4389799635701276e-05, |
| "loss": 0.4797, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.226044226044226, |
| "grad_norm": 0.23105916503325502, |
| "learning_rate": 1.4344262295081968e-05, |
| "loss": 0.3747, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.2285012285012287, |
| "grad_norm": 0.2868497407842456, |
| "learning_rate": 1.4298724954462661e-05, |
| "loss": 0.4016, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.230958230958231, |
| "grad_norm": 0.27039212267154017, |
| "learning_rate": 1.4253187613843353e-05, |
| "loss": 0.4429, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.2334152334152333, |
| "grad_norm": 0.2626645153376362, |
| "learning_rate": 1.4207650273224044e-05, |
| "loss": 0.3925, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.235872235872236, |
| "grad_norm": 0.2857586938595, |
| "learning_rate": 1.4162112932604738e-05, |
| "loss": 0.4041, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.2383292383292384, |
| "grad_norm": 0.24210750827605218, |
| "learning_rate": 1.411657559198543e-05, |
| "loss": 0.3824, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.2407862407862407, |
| "grad_norm": 0.23567329374252635, |
| "learning_rate": 1.407103825136612e-05, |
| "loss": 0.4054, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.2432432432432434, |
| "grad_norm": 0.2608042036778111, |
| "learning_rate": 1.4025500910746814e-05, |
| "loss": 0.4663, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.2457002457002457, |
| "grad_norm": 0.2516718644133891, |
| "learning_rate": 1.3979963570127504e-05, |
| "loss": 0.3965, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.248157248157248, |
| "grad_norm": 0.2547812213730915, |
| "learning_rate": 1.3934426229508196e-05, |
| "loss": 0.4178, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.250614250614251, |
| "grad_norm": 0.24505968428973618, |
| "learning_rate": 1.388888888888889e-05, |
| "loss": 0.377, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.253071253071253, |
| "grad_norm": 0.2726142469594438, |
| "learning_rate": 1.3843351548269581e-05, |
| "loss": 0.5143, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.2555282555282554, |
| "grad_norm": 0.2349026070277474, |
| "learning_rate": 1.3797814207650273e-05, |
| "loss": 0.3882, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.257985257985258, |
| "grad_norm": 0.22293153420044365, |
| "learning_rate": 1.3752276867030966e-05, |
| "loss": 0.3853, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.2604422604422605, |
| "grad_norm": 0.25743572034407713, |
| "learning_rate": 1.3706739526411658e-05, |
| "loss": 0.4365, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.262899262899263, |
| "grad_norm": 0.2579052038508534, |
| "learning_rate": 1.3661202185792351e-05, |
| "loss": 0.4398, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.2653562653562656, |
| "grad_norm": 0.2440626223986384, |
| "learning_rate": 1.3615664845173043e-05, |
| "loss": 0.4438, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.267813267813268, |
| "grad_norm": 0.24281445788603354, |
| "learning_rate": 1.3570127504553735e-05, |
| "loss": 0.4001, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.27027027027027, |
| "grad_norm": 0.2552660754956044, |
| "learning_rate": 1.3524590163934428e-05, |
| "loss": 0.4111, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 0.2457448775461868, |
| "learning_rate": 1.347905282331512e-05, |
| "loss": 0.4873, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.2751842751842752, |
| "grad_norm": 0.24684307678902845, |
| "learning_rate": 1.343351548269581e-05, |
| "loss": 0.4106, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.2776412776412776, |
| "grad_norm": 0.26175150698727817, |
| "learning_rate": 1.3387978142076505e-05, |
| "loss": 0.3655, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.2800982800982803, |
| "grad_norm": 0.22280106083223208, |
| "learning_rate": 1.3342440801457195e-05, |
| "loss": 0.386, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.2825552825552826, |
| "grad_norm": 0.2615527014422742, |
| "learning_rate": 1.3296903460837887e-05, |
| "loss": 0.4426, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.285012285012285, |
| "grad_norm": 0.27081169659666804, |
| "learning_rate": 1.325136612021858e-05, |
| "loss": 0.4449, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.2874692874692872, |
| "grad_norm": 0.2664738790934516, |
| "learning_rate": 1.3205828779599272e-05, |
| "loss": 0.3878, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.28992628992629, |
| "grad_norm": 0.25492227378588317, |
| "learning_rate": 1.3160291438979963e-05, |
| "loss": 0.4008, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.2923832923832923, |
| "grad_norm": 0.26063202213635495, |
| "learning_rate": 1.3114754098360657e-05, |
| "loss": 0.4289, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.294840294840295, |
| "grad_norm": 0.2910730570058918, |
| "learning_rate": 1.3069216757741349e-05, |
| "loss": 0.5056, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.2972972972972974, |
| "grad_norm": 0.25949619296926774, |
| "learning_rate": 1.302367941712204e-05, |
| "loss": 0.4352, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.2997542997542997, |
| "grad_norm": 0.27651669705889614, |
| "learning_rate": 1.2978142076502734e-05, |
| "loss": 0.4198, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.302211302211302, |
| "grad_norm": 0.24886536308925195, |
| "learning_rate": 1.2932604735883425e-05, |
| "loss": 0.4334, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.3046683046683047, |
| "grad_norm": 0.2354118767882423, |
| "learning_rate": 1.2887067395264119e-05, |
| "loss": 0.4045, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.307125307125307, |
| "grad_norm": 0.25840133517937125, |
| "learning_rate": 1.284153005464481e-05, |
| "loss": 0.4153, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.30958230958231, |
| "grad_norm": 0.24820967640391792, |
| "learning_rate": 1.27959927140255e-05, |
| "loss": 0.4295, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.312039312039312, |
| "grad_norm": 0.2682235670401214, |
| "learning_rate": 1.2750455373406195e-05, |
| "loss": 0.3975, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.3144963144963144, |
| "grad_norm": 0.24936889462306489, |
| "learning_rate": 1.2704918032786885e-05, |
| "loss": 0.4019, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.3169533169533167, |
| "grad_norm": 0.25313590598342434, |
| "learning_rate": 1.2659380692167577e-05, |
| "loss": 0.4121, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.3194103194103195, |
| "grad_norm": 0.2661342959579272, |
| "learning_rate": 1.261384335154827e-05, |
| "loss": 0.4755, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.321867321867322, |
| "grad_norm": 0.24872736039438376, |
| "learning_rate": 1.2568306010928962e-05, |
| "loss": 0.4223, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.3243243243243246, |
| "grad_norm": 0.2687245129503849, |
| "learning_rate": 1.2522768670309654e-05, |
| "loss": 0.4267, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.326781326781327, |
| "grad_norm": 0.22779727771546104, |
| "learning_rate": 1.2477231329690346e-05, |
| "loss": 0.3892, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.329238329238329, |
| "grad_norm": 0.24571522522484693, |
| "learning_rate": 1.2431693989071039e-05, |
| "loss": 0.4346, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.3316953316953315, |
| "grad_norm": 0.25561261326404666, |
| "learning_rate": 1.238615664845173e-05, |
| "loss": 0.4389, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.3341523341523343, |
| "grad_norm": 0.22939877730223857, |
| "learning_rate": 1.2340619307832422e-05, |
| "loss": 0.4432, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.3366093366093366, |
| "grad_norm": 0.2357383557445305, |
| "learning_rate": 1.2295081967213116e-05, |
| "loss": 0.4173, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.339066339066339, |
| "grad_norm": 0.7364831086181507, |
| "learning_rate": 1.2249544626593807e-05, |
| "loss": 0.4907, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.3415233415233416, |
| "grad_norm": 0.44572692986041285, |
| "learning_rate": 1.22040072859745e-05, |
| "loss": 0.4205, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.343980343980344, |
| "grad_norm": 3.567246948098372, |
| "learning_rate": 1.215846994535519e-05, |
| "loss": 0.4788, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.3464373464373462, |
| "grad_norm": 0.30309787203713356, |
| "learning_rate": 1.2112932604735884e-05, |
| "loss": 0.4275, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.348894348894349, |
| "grad_norm": 0.24996522048036657, |
| "learning_rate": 1.2067395264116576e-05, |
| "loss": 0.4782, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.3513513513513513, |
| "grad_norm": 0.24778131630911673, |
| "learning_rate": 1.2021857923497268e-05, |
| "loss": 0.4645, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.3538083538083536, |
| "grad_norm": 0.29025219100431315, |
| "learning_rate": 1.1976320582877961e-05, |
| "loss": 0.4398, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.3562653562653564, |
| "grad_norm": 0.27470165797569124, |
| "learning_rate": 1.1930783242258653e-05, |
| "loss": 0.4196, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.3587223587223587, |
| "grad_norm": 0.24980686026607077, |
| "learning_rate": 1.1885245901639344e-05, |
| "loss": 0.4048, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.361179361179361, |
| "grad_norm": 0.23877293904046448, |
| "learning_rate": 1.1839708561020036e-05, |
| "loss": 0.4014, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.3636363636363638, |
| "grad_norm": 0.2727423118228381, |
| "learning_rate": 1.179417122040073e-05, |
| "loss": 0.4454, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.366093366093366, |
| "grad_norm": 1.7198763345457913, |
| "learning_rate": 1.1748633879781421e-05, |
| "loss": 0.4657, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.3685503685503684, |
| "grad_norm": 0.22671067308180004, |
| "learning_rate": 1.1703096539162113e-05, |
| "loss": 0.3986, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.371007371007371, |
| "grad_norm": 0.26385523396246985, |
| "learning_rate": 1.1657559198542806e-05, |
| "loss": 0.4469, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.3734643734643734, |
| "grad_norm": 0.5084136843839772, |
| "learning_rate": 1.1612021857923498e-05, |
| "loss": 0.3711, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.3759213759213758, |
| "grad_norm": 0.27655806457414583, |
| "learning_rate": 1.156648451730419e-05, |
| "loss": 0.4551, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.3783783783783785, |
| "grad_norm": 0.2410876717300484, |
| "learning_rate": 1.1520947176684881e-05, |
| "loss": 0.4496, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.380835380835381, |
| "grad_norm": 0.24309059276058065, |
| "learning_rate": 1.1475409836065575e-05, |
| "loss": 0.3511, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.383292383292383, |
| "grad_norm": 0.30117433755614303, |
| "learning_rate": 1.1429872495446266e-05, |
| "loss": 0.4416, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.385749385749386, |
| "grad_norm": 0.25582765879217223, |
| "learning_rate": 1.1384335154826958e-05, |
| "loss": 0.4337, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.388206388206388, |
| "grad_norm": 0.22539920788225698, |
| "learning_rate": 1.1338797814207651e-05, |
| "loss": 0.3714, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.3906633906633905, |
| "grad_norm": 0.29931134171407514, |
| "learning_rate": 1.1293260473588343e-05, |
| "loss": 0.5106, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.3931203931203933, |
| "grad_norm": 0.24423619232997276, |
| "learning_rate": 1.1247723132969035e-05, |
| "loss": 0.4172, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.3955773955773956, |
| "grad_norm": 0.24339190265600077, |
| "learning_rate": 1.1202185792349727e-05, |
| "loss": 0.4833, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.398034398034398, |
| "grad_norm": 0.24812658878123112, |
| "learning_rate": 1.115664845173042e-05, |
| "loss": 0.4091, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.4004914004914006, |
| "grad_norm": 0.28759159231823356, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 0.4221, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.402948402948403, |
| "grad_norm": 0.23293079513866843, |
| "learning_rate": 1.1065573770491803e-05, |
| "loss": 0.4131, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.4054054054054053, |
| "grad_norm": 7.749543886893285, |
| "learning_rate": 1.1020036429872497e-05, |
| "loss": 0.7646, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.407862407862408, |
| "grad_norm": 0.23764368732650262, |
| "learning_rate": 1.0974499089253188e-05, |
| "loss": 0.4017, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.4103194103194103, |
| "grad_norm": 0.2849297299623391, |
| "learning_rate": 1.092896174863388e-05, |
| "loss": 0.3994, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.4127764127764126, |
| "grad_norm": 0.25594559931712746, |
| "learning_rate": 1.0883424408014572e-05, |
| "loss": 0.3598, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.4152334152334154, |
| "grad_norm": 0.24347214875726386, |
| "learning_rate": 1.0837887067395265e-05, |
| "loss": 0.4782, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.4176904176904177, |
| "grad_norm": 0.2667475881035064, |
| "learning_rate": 1.0792349726775957e-05, |
| "loss": 0.478, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.42014742014742, |
| "grad_norm": 0.25874841731246545, |
| "learning_rate": 1.0746812386156649e-05, |
| "loss": 0.4026, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.4226044226044228, |
| "grad_norm": 0.25268556183613805, |
| "learning_rate": 1.0701275045537342e-05, |
| "loss": 0.4032, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.425061425061425, |
| "grad_norm": 0.2395640061506075, |
| "learning_rate": 1.0655737704918032e-05, |
| "loss": 0.4052, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.4275184275184274, |
| "grad_norm": 0.2750057820112185, |
| "learning_rate": 1.0610200364298725e-05, |
| "loss": 0.5181, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.42997542997543, |
| "grad_norm": 0.23956660835432816, |
| "learning_rate": 1.0564663023679417e-05, |
| "loss": 0.4087, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.4324324324324325, |
| "grad_norm": 0.2682821537572547, |
| "learning_rate": 1.051912568306011e-05, |
| "loss": 0.3539, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.4348894348894348, |
| "grad_norm": 0.253635569161534, |
| "learning_rate": 1.0473588342440802e-05, |
| "loss": 0.4304, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.4373464373464375, |
| "grad_norm": 0.25725469397538964, |
| "learning_rate": 1.0428051001821494e-05, |
| "loss": 0.4665, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.43980343980344, |
| "grad_norm": 0.21869469055564097, |
| "learning_rate": 1.0382513661202187e-05, |
| "loss": 0.3391, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.442260442260442, |
| "grad_norm": 0.23932164929486857, |
| "learning_rate": 1.0336976320582877e-05, |
| "loss": 0.4214, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.444717444717445, |
| "grad_norm": 0.249887483386558, |
| "learning_rate": 1.029143897996357e-05, |
| "loss": 0.4299, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.447174447174447, |
| "grad_norm": 0.2448604037219602, |
| "learning_rate": 1.0245901639344262e-05, |
| "loss": 0.3619, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.4496314496314495, |
| "grad_norm": 0.23238074536535608, |
| "learning_rate": 1.0200364298724956e-05, |
| "loss": 0.4014, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.4520884520884523, |
| "grad_norm": 0.6771008652415798, |
| "learning_rate": 1.0154826958105647e-05, |
| "loss": 0.4356, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.4545454545454546, |
| "grad_norm": 0.2590635077299185, |
| "learning_rate": 1.0109289617486339e-05, |
| "loss": 0.4019, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.457002457002457, |
| "grad_norm": 0.2546310175980953, |
| "learning_rate": 1.0063752276867032e-05, |
| "loss": 0.4573, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.4594594594594597, |
| "grad_norm": 0.22827326397492897, |
| "learning_rate": 1.0018214936247722e-05, |
| "loss": 0.3811, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.461916461916462, |
| "grad_norm": 0.2433313320364577, |
| "learning_rate": 9.972677595628416e-06, |
| "loss": 0.429, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.4643734643734643, |
| "grad_norm": 0.26498928945986583, |
| "learning_rate": 9.927140255009108e-06, |
| "loss": 0.4492, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.4668304668304666, |
| "grad_norm": 0.2375299896617999, |
| "learning_rate": 9.8816029143898e-06, |
| "loss": 0.4213, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.4692874692874693, |
| "grad_norm": 0.2395572196206859, |
| "learning_rate": 9.836065573770493e-06, |
| "loss": 0.4685, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.4717444717444716, |
| "grad_norm": 0.2537090566068248, |
| "learning_rate": 9.790528233151184e-06, |
| "loss": 0.4412, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.4742014742014744, |
| "grad_norm": 0.25208141661171674, |
| "learning_rate": 9.744990892531878e-06, |
| "loss": 0.3854, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.4766584766584767, |
| "grad_norm": 0.2642583302690173, |
| "learning_rate": 9.699453551912568e-06, |
| "loss": 0.4179, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.479115479115479, |
| "grad_norm": 0.23767421601073707, |
| "learning_rate": 9.653916211293261e-06, |
| "loss": 0.4385, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.4815724815724813, |
| "grad_norm": 0.24744117798052945, |
| "learning_rate": 9.608378870673953e-06, |
| "loss": 0.4231, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.484029484029484, |
| "grad_norm": 0.23898600337582362, |
| "learning_rate": 9.562841530054644e-06, |
| "loss": 0.4217, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.4864864864864864, |
| "grad_norm": 0.25968529590110145, |
| "learning_rate": 9.517304189435338e-06, |
| "loss": 0.4322, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.488943488943489, |
| "grad_norm": 0.2552860811626078, |
| "learning_rate": 9.47176684881603e-06, |
| "loss": 0.4059, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.4914004914004915, |
| "grad_norm": 0.2535129474406969, |
| "learning_rate": 9.426229508196721e-06, |
| "loss": 0.4199, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.493857493857494, |
| "grad_norm": 0.23391837558421832, |
| "learning_rate": 9.380692167577413e-06, |
| "loss": 0.4875, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.496314496314496, |
| "grad_norm": 0.24298822208561335, |
| "learning_rate": 9.335154826958106e-06, |
| "loss": 0.4601, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.498771498771499, |
| "grad_norm": 0.9278988295393633, |
| "learning_rate": 9.2896174863388e-06, |
| "loss": 0.4622, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.501228501228501, |
| "grad_norm": 0.2392256552257423, |
| "learning_rate": 9.24408014571949e-06, |
| "loss": 0.4355, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.503685503685504, |
| "grad_norm": 0.2330893968890112, |
| "learning_rate": 9.198542805100183e-06, |
| "loss": 0.3879, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.506142506142506, |
| "grad_norm": 0.25975387161037466, |
| "learning_rate": 9.153005464480875e-06, |
| "loss": 0.5102, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.5085995085995085, |
| "grad_norm": 0.23725040470847344, |
| "learning_rate": 9.107468123861566e-06, |
| "loss": 0.3736, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.511056511056511, |
| "grad_norm": 0.2313618576568214, |
| "learning_rate": 9.061930783242258e-06, |
| "loss": 0.4254, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.5135135135135136, |
| "grad_norm": 0.24699951638595727, |
| "learning_rate": 9.016393442622952e-06, |
| "loss": 0.4583, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.515970515970516, |
| "grad_norm": 0.25544750210043377, |
| "learning_rate": 8.970856102003643e-06, |
| "loss": 0.3913, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.5184275184275187, |
| "grad_norm": 0.255332626493528, |
| "learning_rate": 8.925318761384335e-06, |
| "loss": 0.443, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.520884520884521, |
| "grad_norm": 0.24789597999286547, |
| "learning_rate": 8.879781420765028e-06, |
| "loss": 0.3952, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.5233415233415233, |
| "grad_norm": 0.23241695995795617, |
| "learning_rate": 8.83424408014572e-06, |
| "loss": 0.3795, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.5257985257985256, |
| "grad_norm": 0.22579417060539125, |
| "learning_rate": 8.788706739526412e-06, |
| "loss": 0.3723, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.5282555282555284, |
| "grad_norm": 0.22283238385668375, |
| "learning_rate": 8.743169398907103e-06, |
| "loss": 0.391, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.5307125307125307, |
| "grad_norm": 0.2246271370216966, |
| "learning_rate": 8.697632058287797e-06, |
| "loss": 0.3482, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.5331695331695334, |
| "grad_norm": 0.24663076613217097, |
| "learning_rate": 8.652094717668488e-06, |
| "loss": 0.4507, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.5356265356265357, |
| "grad_norm": 0.26050722671516013, |
| "learning_rate": 8.60655737704918e-06, |
| "loss": 0.3843, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.538083538083538, |
| "grad_norm": 0.2331322639700471, |
| "learning_rate": 8.561020036429874e-06, |
| "loss": 0.3914, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.5405405405405403, |
| "grad_norm": 0.24551537000672188, |
| "learning_rate": 8.515482695810565e-06, |
| "loss": 0.4184, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.542997542997543, |
| "grad_norm": 0.24176204248205907, |
| "learning_rate": 8.469945355191257e-06, |
| "loss": 0.3952, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.5454545454545454, |
| "grad_norm": 0.23506445053772151, |
| "learning_rate": 8.42440801457195e-06, |
| "loss": 0.4188, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.547911547911548, |
| "grad_norm": 0.23936543990096357, |
| "learning_rate": 8.378870673952642e-06, |
| "loss": 0.4283, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.5503685503685505, |
| "grad_norm": 0.2291008508057063, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.3352, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.552825552825553, |
| "grad_norm": 0.24326786660676292, |
| "learning_rate": 8.287795992714025e-06, |
| "loss": 0.447, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.555282555282555, |
| "grad_norm": 0.2275126614890594, |
| "learning_rate": 8.242258652094719e-06, |
| "loss": 0.4059, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.557739557739558, |
| "grad_norm": 0.23651265321324066, |
| "learning_rate": 8.196721311475409e-06, |
| "loss": 0.4521, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.56019656019656, |
| "grad_norm": 0.26505811816438285, |
| "learning_rate": 8.151183970856102e-06, |
| "loss": 0.4815, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.562653562653563, |
| "grad_norm": 0.2568061760157405, |
| "learning_rate": 8.105646630236796e-06, |
| "loss": 0.4375, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.5651105651105652, |
| "grad_norm": 0.22708969537757387, |
| "learning_rate": 8.060109289617487e-06, |
| "loss": 0.4164, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.5675675675675675, |
| "grad_norm": 0.22323849393344242, |
| "learning_rate": 8.014571948998179e-06, |
| "loss": 0.4076, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.57002457002457, |
| "grad_norm": 0.25111824108906006, |
| "learning_rate": 7.96903460837887e-06, |
| "loss": 0.4767, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.5724815724815726, |
| "grad_norm": 0.260989307316713, |
| "learning_rate": 7.923497267759564e-06, |
| "loss": 0.433, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.574938574938575, |
| "grad_norm": 0.26052793667495083, |
| "learning_rate": 7.877959927140254e-06, |
| "loss": 0.5054, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.5773955773955772, |
| "grad_norm": 0.24915581597371805, |
| "learning_rate": 7.832422586520947e-06, |
| "loss": 0.4814, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.57985257985258, |
| "grad_norm": 0.2543219048137912, |
| "learning_rate": 7.78688524590164e-06, |
| "loss": 0.4697, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.5823095823095823, |
| "grad_norm": 0.24030777009135726, |
| "learning_rate": 7.74134790528233e-06, |
| "loss": 0.4742, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.5847665847665846, |
| "grad_norm": 0.2231426133500323, |
| "learning_rate": 7.695810564663024e-06, |
| "loss": 0.3994, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.5872235872235874, |
| "grad_norm": 0.22308991337870562, |
| "learning_rate": 7.650273224043716e-06, |
| "loss": 0.3573, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.5896805896805897, |
| "grad_norm": 0.23216474618491506, |
| "learning_rate": 7.604735883424409e-06, |
| "loss": 0.4204, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.592137592137592, |
| "grad_norm": 0.2673680272419057, |
| "learning_rate": 7.5591985428051e-06, |
| "loss": 0.4019, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.5945945945945947, |
| "grad_norm": 0.2427622295285736, |
| "learning_rate": 7.513661202185793e-06, |
| "loss": 0.4387, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.597051597051597, |
| "grad_norm": 0.22860426639394132, |
| "learning_rate": 7.468123861566485e-06, |
| "loss": 0.3722, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.5995085995085994, |
| "grad_norm": 0.22665737637936162, |
| "learning_rate": 7.422586520947176e-06, |
| "loss": 0.3897, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.601965601965602, |
| "grad_norm": 0.23303711982308306, |
| "learning_rate": 7.3770491803278695e-06, |
| "loss": 0.4285, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.6044226044226044, |
| "grad_norm": 0.26128393033135633, |
| "learning_rate": 7.331511839708562e-06, |
| "loss": 0.4375, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.6068796068796067, |
| "grad_norm": 0.25239192353421025, |
| "learning_rate": 7.2859744990892545e-06, |
| "loss": 0.4289, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.6093366093366095, |
| "grad_norm": 0.2528160456529098, |
| "learning_rate": 7.240437158469945e-06, |
| "loss": 0.4413, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.611793611793612, |
| "grad_norm": 0.23646099966620737, |
| "learning_rate": 7.194899817850638e-06, |
| "loss": 0.47, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.614250614250614, |
| "grad_norm": 0.24423364418249202, |
| "learning_rate": 7.1493624772313305e-06, |
| "loss": 0.3873, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.616707616707617, |
| "grad_norm": 0.39790290853859617, |
| "learning_rate": 7.103825136612022e-06, |
| "loss": 0.4088, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.619164619164619, |
| "grad_norm": 0.24950104367468617, |
| "learning_rate": 7.058287795992715e-06, |
| "loss": 0.4047, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.6216216216216215, |
| "grad_norm": 0.2130145601569131, |
| "learning_rate": 7.012750455373407e-06, |
| "loss": 0.3621, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.6240786240786242, |
| "grad_norm": 0.24418610790883985, |
| "learning_rate": 6.967213114754098e-06, |
| "loss": 0.3692, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.6265356265356266, |
| "grad_norm": 0.24162683820897035, |
| "learning_rate": 6.921675774134791e-06, |
| "loss": 0.4028, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.628992628992629, |
| "grad_norm": 0.23928139013451663, |
| "learning_rate": 6.876138433515483e-06, |
| "loss": 0.3879, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.631449631449631, |
| "grad_norm": 0.23045710828373894, |
| "learning_rate": 6.830601092896176e-06, |
| "loss": 0.4, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.633906633906634, |
| "grad_norm": 0.2697298886110676, |
| "learning_rate": 6.785063752276867e-06, |
| "loss": 0.4548, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.6363636363636362, |
| "grad_norm": 0.24532148513902288, |
| "learning_rate": 6.73952641165756e-06, |
| "loss": 0.3913, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.638820638820639, |
| "grad_norm": 0.2337130540549015, |
| "learning_rate": 6.6939890710382525e-06, |
| "loss": 0.4059, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.6412776412776413, |
| "grad_norm": 0.2528405150657174, |
| "learning_rate": 6.648451730418943e-06, |
| "loss": 0.3687, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.6437346437346436, |
| "grad_norm": 0.23392129641022022, |
| "learning_rate": 6.602914389799636e-06, |
| "loss": 0.4117, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.646191646191646, |
| "grad_norm": 0.2462017004070107, |
| "learning_rate": 6.557377049180328e-06, |
| "loss": 0.4551, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.6486486486486487, |
| "grad_norm": 0.23207850038278316, |
| "learning_rate": 6.51183970856102e-06, |
| "loss": 0.4533, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.651105651105651, |
| "grad_norm": 0.22221002852774863, |
| "learning_rate": 6.466302367941713e-06, |
| "loss": 0.3899, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.6535626535626538, |
| "grad_norm": 0.22345355290268304, |
| "learning_rate": 6.420765027322405e-06, |
| "loss": 0.4201, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.656019656019656, |
| "grad_norm": 0.24769684733774203, |
| "learning_rate": 6.375227686703098e-06, |
| "loss": 0.4161, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.6584766584766584, |
| "grad_norm": 0.2407677406182194, |
| "learning_rate": 6.3296903460837886e-06, |
| "loss": 0.4225, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.6609336609336607, |
| "grad_norm": 0.2688663002431461, |
| "learning_rate": 6.284153005464481e-06, |
| "loss": 0.4576, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.6633906633906634, |
| "grad_norm": 0.21352788135666395, |
| "learning_rate": 6.238615664845173e-06, |
| "loss": 0.3745, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.6658476658476657, |
| "grad_norm": 0.2343987025317479, |
| "learning_rate": 6.193078324225865e-06, |
| "loss": 0.437, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.6683046683046685, |
| "grad_norm": 0.23634741722118774, |
| "learning_rate": 6.147540983606558e-06, |
| "loss": 0.4755, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.670761670761671, |
| "grad_norm": 0.2333977046249411, |
| "learning_rate": 6.10200364298725e-06, |
| "loss": 0.4226, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.673218673218673, |
| "grad_norm": 0.24140034792380946, |
| "learning_rate": 6.056466302367942e-06, |
| "loss": 0.4381, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.6756756756756754, |
| "grad_norm": 0.24610102385252078, |
| "learning_rate": 6.010928961748634e-06, |
| "loss": 0.4788, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.678132678132678, |
| "grad_norm": 0.21651298490313028, |
| "learning_rate": 5.965391621129326e-06, |
| "loss": 0.379, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.6805896805896805, |
| "grad_norm": 0.24002364672689916, |
| "learning_rate": 5.919854280510018e-06, |
| "loss": 0.4639, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.6830466830466833, |
| "grad_norm": 0.2401559730905027, |
| "learning_rate": 5.874316939890711e-06, |
| "loss": 0.4222, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.6855036855036856, |
| "grad_norm": 0.2296738324030033, |
| "learning_rate": 5.828779599271403e-06, |
| "loss": 0.4562, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.687960687960688, |
| "grad_norm": 0.24627104393871396, |
| "learning_rate": 5.783242258652095e-06, |
| "loss": 0.443, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.69041769041769, |
| "grad_norm": 0.23495018354076735, |
| "learning_rate": 5.737704918032787e-06, |
| "loss": 0.4446, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.692874692874693, |
| "grad_norm": 0.2515349565358722, |
| "learning_rate": 5.692167577413479e-06, |
| "loss": 0.4446, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.6953316953316953, |
| "grad_norm": 0.23661569080996545, |
| "learning_rate": 5.646630236794172e-06, |
| "loss": 0.4218, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.697788697788698, |
| "grad_norm": 0.25053626642778104, |
| "learning_rate": 5.601092896174863e-06, |
| "loss": 0.4072, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.7002457002457003, |
| "grad_norm": 0.23587139645082844, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 0.4173, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.7027027027027026, |
| "grad_norm": 0.2273539299051293, |
| "learning_rate": 5.510018214936248e-06, |
| "loss": 0.378, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.705159705159705, |
| "grad_norm": 0.24970174549990606, |
| "learning_rate": 5.46448087431694e-06, |
| "loss": 0.4712, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.7076167076167077, |
| "grad_norm": 0.22200832971393533, |
| "learning_rate": 5.418943533697633e-06, |
| "loss": 0.4103, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.71007371007371, |
| "grad_norm": 0.2269123817589866, |
| "learning_rate": 5.373406193078324e-06, |
| "loss": 0.4032, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.7125307125307128, |
| "grad_norm": 0.25690335857720653, |
| "learning_rate": 5.327868852459016e-06, |
| "loss": 0.4299, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.714987714987715, |
| "grad_norm": 0.24544690097434016, |
| "learning_rate": 5.2823315118397085e-06, |
| "loss": 0.4116, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.7174447174447174, |
| "grad_norm": 0.2319686270048615, |
| "learning_rate": 5.236794171220401e-06, |
| "loss": 0.3995, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.7199017199017197, |
| "grad_norm": 0.23710945966143346, |
| "learning_rate": 5.191256830601094e-06, |
| "loss": 0.4631, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.7223587223587224, |
| "grad_norm": 0.2259046385681155, |
| "learning_rate": 5.145719489981785e-06, |
| "loss": 0.4243, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.7248157248157248, |
| "grad_norm": 0.2257040623568255, |
| "learning_rate": 5.100182149362478e-06, |
| "loss": 0.4126, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.31168912185145126, |
| "learning_rate": 5.0546448087431695e-06, |
| "loss": 0.4209, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.72972972972973, |
| "grad_norm": 0.22549576040927052, |
| "learning_rate": 5.009107468123861e-06, |
| "loss": 0.3731, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.732186732186732, |
| "grad_norm": 0.24143398730766474, |
| "learning_rate": 4.963570127504554e-06, |
| "loss": 0.3649, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.7346437346437344, |
| "grad_norm": 0.22716863260793815, |
| "learning_rate": 4.918032786885246e-06, |
| "loss": 0.4307, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.737100737100737, |
| "grad_norm": 0.22591051220728886, |
| "learning_rate": 4.872495446265939e-06, |
| "loss": 0.3708, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.7395577395577395, |
| "grad_norm": 0.24622339640633006, |
| "learning_rate": 4.8269581056466305e-06, |
| "loss": 0.4399, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.7420147420147423, |
| "grad_norm": 0.2822872905637631, |
| "learning_rate": 4.781420765027322e-06, |
| "loss": 0.4346, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.7444717444717446, |
| "grad_norm": 0.22801344793802245, |
| "learning_rate": 4.735883424408015e-06, |
| "loss": 0.3956, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.746928746928747, |
| "grad_norm": 0.22155930655884537, |
| "learning_rate": 4.6903460837887065e-06, |
| "loss": 0.4215, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.749385749385749, |
| "grad_norm": 0.25029715906542904, |
| "learning_rate": 4.6448087431694e-06, |
| "loss": 0.488, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.751842751842752, |
| "grad_norm": 0.24274141839591315, |
| "learning_rate": 4.5992714025500915e-06, |
| "loss": 0.4195, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.7542997542997543, |
| "grad_norm": 0.23028544702814976, |
| "learning_rate": 4.553734061930783e-06, |
| "loss": 0.3788, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.756756756756757, |
| "grad_norm": 0.24916297952456937, |
| "learning_rate": 4.508196721311476e-06, |
| "loss": 0.4244, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.7592137592137593, |
| "grad_norm": 0.5209218145104284, |
| "learning_rate": 4.4626593806921675e-06, |
| "loss": 0.4771, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.7616707616707616, |
| "grad_norm": 0.2149644103008232, |
| "learning_rate": 4.41712204007286e-06, |
| "loss": 0.38, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.764127764127764, |
| "grad_norm": 3.4124629297354665, |
| "learning_rate": 4.371584699453552e-06, |
| "loss": 0.5629, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.7665847665847667, |
| "grad_norm": 0.2530450553088245, |
| "learning_rate": 4.326047358834244e-06, |
| "loss": 0.458, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.769041769041769, |
| "grad_norm": 0.24743507038028553, |
| "learning_rate": 4.280510018214937e-06, |
| "loss": 0.4901, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.7714987714987718, |
| "grad_norm": 0.24238514475994, |
| "learning_rate": 4.2349726775956285e-06, |
| "loss": 0.4958, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.773955773955774, |
| "grad_norm": 0.22170693726990862, |
| "learning_rate": 4.189435336976321e-06, |
| "loss": 0.3619, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.7764127764127764, |
| "grad_norm": 0.22200899039595443, |
| "learning_rate": 4.143897996357013e-06, |
| "loss": 0.3722, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.7788697788697787, |
| "grad_norm": 0.21557815228436708, |
| "learning_rate": 4.098360655737704e-06, |
| "loss": 0.4045, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.7813267813267815, |
| "grad_norm": 0.23101269211409964, |
| "learning_rate": 4.052823315118398e-06, |
| "loss": 0.3993, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.7837837837837838, |
| "grad_norm": 0.2506113219628401, |
| "learning_rate": 4.0072859744990895e-06, |
| "loss": 0.4394, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.786240786240786, |
| "grad_norm": 0.25829653944630426, |
| "learning_rate": 3.961748633879782e-06, |
| "loss": 0.4781, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.788697788697789, |
| "grad_norm": 0.2283437552109554, |
| "learning_rate": 3.916211293260474e-06, |
| "loss": 0.4199, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.791154791154791, |
| "grad_norm": 0.23261934599464665, |
| "learning_rate": 3.870673952641165e-06, |
| "loss": 0.4376, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.7936117936117935, |
| "grad_norm": 0.22397178290472075, |
| "learning_rate": 3.825136612021858e-06, |
| "loss": 0.3982, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.796068796068796, |
| "grad_norm": 0.2398679252310125, |
| "learning_rate": 3.77959927140255e-06, |
| "loss": 0.4303, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.7985257985257985, |
| "grad_norm": 0.2521861139284355, |
| "learning_rate": 3.7340619307832426e-06, |
| "loss": 0.3795, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.800982800982801, |
| "grad_norm": 0.22383712405363193, |
| "learning_rate": 3.6885245901639347e-06, |
| "loss": 0.4348, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.8034398034398036, |
| "grad_norm": 0.22946747083819977, |
| "learning_rate": 3.6429872495446273e-06, |
| "loss": 0.4206, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.805896805896806, |
| "grad_norm": 0.21677708187128097, |
| "learning_rate": 3.597449908925319e-06, |
| "loss": 0.4147, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.808353808353808, |
| "grad_norm": 0.21734633915684778, |
| "learning_rate": 3.551912568306011e-06, |
| "loss": 0.4093, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.810810810810811, |
| "grad_norm": 0.24321384485885375, |
| "learning_rate": 3.5063752276867036e-06, |
| "loss": 0.3783, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.8132678132678133, |
| "grad_norm": 0.24297335222493194, |
| "learning_rate": 3.4608378870673953e-06, |
| "loss": 0.5014, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.8157248157248156, |
| "grad_norm": 0.23892842770463382, |
| "learning_rate": 3.415300546448088e-06, |
| "loss": 0.4653, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.8181818181818183, |
| "grad_norm": 0.23160806679913898, |
| "learning_rate": 3.36976320582878e-06, |
| "loss": 0.422, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.8206388206388207, |
| "grad_norm": 0.22304524872709652, |
| "learning_rate": 3.3242258652094717e-06, |
| "loss": 0.3961, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.823095823095823, |
| "grad_norm": 0.2139600997939772, |
| "learning_rate": 3.278688524590164e-06, |
| "loss": 0.398, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.8255528255528253, |
| "grad_norm": 0.22956564153660272, |
| "learning_rate": 3.2331511839708563e-06, |
| "loss": 0.4151, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.828009828009828, |
| "grad_norm": 0.21826847683156242, |
| "learning_rate": 3.187613843351549e-06, |
| "loss": 0.3886, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.8304668304668303, |
| "grad_norm": 0.21141003742321426, |
| "learning_rate": 3.1420765027322406e-06, |
| "loss": 0.3906, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.832923832923833, |
| "grad_norm": 0.2357603696949525, |
| "learning_rate": 3.0965391621129327e-06, |
| "loss": 0.451, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.8353808353808354, |
| "grad_norm": 0.225812066393447, |
| "learning_rate": 3.051001821493625e-06, |
| "loss": 0.4064, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.8378378378378377, |
| "grad_norm": 0.21406448287602542, |
| "learning_rate": 3.005464480874317e-06, |
| "loss": 0.3674, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.84029484029484, |
| "grad_norm": 0.23290227939520636, |
| "learning_rate": 2.959927140255009e-06, |
| "loss": 0.4354, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.842751842751843, |
| "grad_norm": 0.2242096154461336, |
| "learning_rate": 2.9143897996357016e-06, |
| "loss": 0.4045, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.845208845208845, |
| "grad_norm": 0.23016845169620148, |
| "learning_rate": 2.8688524590163937e-06, |
| "loss": 0.4196, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.847665847665848, |
| "grad_norm": 0.21852096936661997, |
| "learning_rate": 2.823315118397086e-06, |
| "loss": 0.3982, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.85012285012285, |
| "grad_norm": 0.2108274165792627, |
| "learning_rate": 2.777777777777778e-06, |
| "loss": 0.3749, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.8525798525798525, |
| "grad_norm": 0.24160370753779067, |
| "learning_rate": 2.73224043715847e-06, |
| "loss": 0.4542, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.855036855036855, |
| "grad_norm": 1.6244239610624458, |
| "learning_rate": 2.686703096539162e-06, |
| "loss": 0.4585, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.8574938574938575, |
| "grad_norm": 0.22148267445895137, |
| "learning_rate": 2.6411657559198543e-06, |
| "loss": 0.3932, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.85995085995086, |
| "grad_norm": 0.23686699702884864, |
| "learning_rate": 2.595628415300547e-06, |
| "loss": 0.4043, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.8624078624078626, |
| "grad_norm": 0.2253190526682749, |
| "learning_rate": 2.550091074681239e-06, |
| "loss": 0.3865, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.864864864864865, |
| "grad_norm": 0.23283682010046694, |
| "learning_rate": 2.5045537340619306e-06, |
| "loss": 0.4241, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.8673218673218672, |
| "grad_norm": 0.22637501007112718, |
| "learning_rate": 2.459016393442623e-06, |
| "loss": 0.4535, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.8697788697788695, |
| "grad_norm": 0.23528102347160695, |
| "learning_rate": 2.4134790528233153e-06, |
| "loss": 0.485, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.8722358722358723, |
| "grad_norm": 0.23425484374934466, |
| "learning_rate": 2.3679417122040074e-06, |
| "loss": 0.4475, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.8746928746928746, |
| "grad_norm": 0.22149092071411625, |
| "learning_rate": 2.3224043715847e-06, |
| "loss": 0.4022, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.8771498771498774, |
| "grad_norm": 0.23427790871326182, |
| "learning_rate": 2.2768670309653916e-06, |
| "loss": 0.4638, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.8796068796068797, |
| "grad_norm": 0.23231854684157077, |
| "learning_rate": 2.2313296903460837e-06, |
| "loss": 0.4394, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.882063882063882, |
| "grad_norm": 0.2342789974677895, |
| "learning_rate": 2.185792349726776e-06, |
| "loss": 0.4825, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.8845208845208843, |
| "grad_norm": 0.22158002172153052, |
| "learning_rate": 2.1402550091074684e-06, |
| "loss": 0.386, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.886977886977887, |
| "grad_norm": 0.20696142757418035, |
| "learning_rate": 2.0947176684881605e-06, |
| "loss": 0.374, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.8894348894348894, |
| "grad_norm": 0.23895640881238192, |
| "learning_rate": 2.049180327868852e-06, |
| "loss": 0.4433, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.891891891891892, |
| "grad_norm": 0.21999586387865822, |
| "learning_rate": 2.0036429872495447e-06, |
| "loss": 0.3954, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.8943488943488944, |
| "grad_norm": 7.714103219123681, |
| "learning_rate": 1.958105646630237e-06, |
| "loss": 0.5093, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.8968058968058967, |
| "grad_norm": 0.20872227311945366, |
| "learning_rate": 1.912568306010929e-06, |
| "loss": 0.3708, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.899262899262899, |
| "grad_norm": 0.23835713585529297, |
| "learning_rate": 1.8670309653916213e-06, |
| "loss": 0.4284, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.901719901719902, |
| "grad_norm": 0.22864449909911705, |
| "learning_rate": 1.8214936247723136e-06, |
| "loss": 0.428, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.904176904176904, |
| "grad_norm": 0.2406324576550951, |
| "learning_rate": 1.7759562841530055e-06, |
| "loss": 0.4624, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.906633906633907, |
| "grad_norm": 0.23431139400422057, |
| "learning_rate": 1.7304189435336977e-06, |
| "loss": 0.4387, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.909090909090909, |
| "grad_norm": 0.21843455420917768, |
| "learning_rate": 1.68488160291439e-06, |
| "loss": 0.4249, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.9115479115479115, |
| "grad_norm": 0.2095922735664185, |
| "learning_rate": 1.639344262295082e-06, |
| "loss": 0.395, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.914004914004914, |
| "grad_norm": 0.2269069829992154, |
| "learning_rate": 1.5938069216757744e-06, |
| "loss": 0.4245, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.9164619164619165, |
| "grad_norm": 0.2119797460187829, |
| "learning_rate": 1.5482695810564663e-06, |
| "loss": 0.3898, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.918918918918919, |
| "grad_norm": 0.23711678102007225, |
| "learning_rate": 1.5027322404371585e-06, |
| "loss": 0.4812, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.9213759213759216, |
| "grad_norm": 0.2191981863306329, |
| "learning_rate": 1.4571948998178508e-06, |
| "loss": 0.4322, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.923832923832924, |
| "grad_norm": 0.22589359686916735, |
| "learning_rate": 1.411657559198543e-06, |
| "loss": 0.3886, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.9262899262899262, |
| "grad_norm": 0.24232246496079973, |
| "learning_rate": 1.366120218579235e-06, |
| "loss": 0.4787, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.9287469287469285, |
| "grad_norm": 0.23337563262261982, |
| "learning_rate": 1.3205828779599271e-06, |
| "loss": 0.4604, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.9312039312039313, |
| "grad_norm": 0.22211870990294277, |
| "learning_rate": 1.2750455373406195e-06, |
| "loss": 0.4262, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.9336609336609336, |
| "grad_norm": 0.2299266125727697, |
| "learning_rate": 1.2295081967213116e-06, |
| "loss": 0.4976, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.9361179361179364, |
| "grad_norm": 0.2213619140432279, |
| "learning_rate": 1.1839708561020037e-06, |
| "loss": 0.4219, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.9385749385749387, |
| "grad_norm": 0.21746767504739525, |
| "learning_rate": 1.1384335154826958e-06, |
| "loss": 0.4351, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.941031941031941, |
| "grad_norm": 0.23322041860850679, |
| "learning_rate": 1.092896174863388e-06, |
| "loss": 0.4407, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.9434889434889433, |
| "grad_norm": 0.21436428057177767, |
| "learning_rate": 1.0473588342440803e-06, |
| "loss": 0.3869, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.945945945945946, |
| "grad_norm": 0.23891934387649086, |
| "learning_rate": 1.0018214936247724e-06, |
| "loss": 0.4645, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.9484029484029484, |
| "grad_norm": 0.2278055983373202, |
| "learning_rate": 9.562841530054645e-07, |
| "loss": 0.4638, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.950859950859951, |
| "grad_norm": 0.3776834168448288, |
| "learning_rate": 9.107468123861568e-07, |
| "loss": 0.3994, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.9533169533169534, |
| "grad_norm": 0.22934397142430324, |
| "learning_rate": 8.652094717668488e-07, |
| "loss": 0.4639, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.9557739557739557, |
| "grad_norm": 0.2063303716692425, |
| "learning_rate": 8.19672131147541e-07, |
| "loss": 0.3707, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.958230958230958, |
| "grad_norm": 0.2351672804832617, |
| "learning_rate": 7.741347905282332e-07, |
| "loss": 0.4168, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.960687960687961, |
| "grad_norm": 0.22161703857940737, |
| "learning_rate": 7.285974499089254e-07, |
| "loss": 0.4014, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.963144963144963, |
| "grad_norm": 0.3343193912872951, |
| "learning_rate": 6.830601092896175e-07, |
| "loss": 0.3978, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.965601965601966, |
| "grad_norm": 0.22304950387987088, |
| "learning_rate": 6.375227686703097e-07, |
| "loss": 0.4135, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.968058968058968, |
| "grad_norm": 0.20953255552793454, |
| "learning_rate": 5.919854280510018e-07, |
| "loss": 0.3811, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.9705159705159705, |
| "grad_norm": 0.2329344600488229, |
| "learning_rate": 5.46448087431694e-07, |
| "loss": 0.4343, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.972972972972973, |
| "grad_norm": 0.21299980955076325, |
| "learning_rate": 5.009107468123862e-07, |
| "loss": 0.3967, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.9754299754299756, |
| "grad_norm": 0.22514110450209485, |
| "learning_rate": 4.553734061930784e-07, |
| "loss": 0.4594, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.977886977886978, |
| "grad_norm": 0.22706086722772673, |
| "learning_rate": 4.098360655737705e-07, |
| "loss": 0.4452, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.98034398034398, |
| "grad_norm": 0.22420225612233982, |
| "learning_rate": 3.642987249544627e-07, |
| "loss": 0.451, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.982800982800983, |
| "grad_norm": 0.2278750274266437, |
| "learning_rate": 3.1876138433515486e-07, |
| "loss": 0.4298, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.9852579852579852, |
| "grad_norm": 0.22450421754608937, |
| "learning_rate": 2.73224043715847e-07, |
| "loss": 0.4549, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.9877149877149876, |
| "grad_norm": 0.24947496879816358, |
| "learning_rate": 2.276867030965392e-07, |
| "loss": 0.4612, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.9901719901719903, |
| "grad_norm": 0.22458707836271088, |
| "learning_rate": 1.8214936247723135e-07, |
| "loss": 0.4051, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.9926289926289926, |
| "grad_norm": 0.21227609068441602, |
| "learning_rate": 1.366120218579235e-07, |
| "loss": 0.3807, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.995085995085995, |
| "grad_norm": 0.22465026975771382, |
| "learning_rate": 9.107468123861567e-08, |
| "loss": 0.4377, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.9975429975429977, |
| "grad_norm": 0.22388402187392278, |
| "learning_rate": 4.553734061930784e-08, |
| "loss": 0.4133, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.2335653945488078, |
| "learning_rate": 0.0, |
| "loss": 0.4107, |
| "step": 1221 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1221, |
| "total_flos": 1.0279209431224812e+18, |
| "train_loss": 0.6874593345968573, |
| "train_runtime": 70837.5294, |
| "train_samples_per_second": 0.275, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1221, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0279209431224812e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|