| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.993342210386152, |
| "eval_steps": 500, |
| "global_step": 1125, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002663115845539281, |
| "grad_norm": 59.669442519158444, |
| "learning_rate": 4.424778761061947e-07, |
| "loss": 11.0815, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005326231691078562, |
| "grad_norm": 59.77300379138749, |
| "learning_rate": 8.849557522123894e-07, |
| "loss": 11.0703, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.007989347536617843, |
| "grad_norm": 59.37811338851668, |
| "learning_rate": 1.3274336283185841e-06, |
| "loss": 11.1149, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.010652463382157125, |
| "grad_norm": 59.714257927262075, |
| "learning_rate": 1.7699115044247788e-06, |
| "loss": 11.1, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.013315579227696404, |
| "grad_norm": 62.19325541849273, |
| "learning_rate": 2.2123893805309734e-06, |
| "loss": 10.9008, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.015978695073235686, |
| "grad_norm": 64.3469313247898, |
| "learning_rate": 2.6548672566371683e-06, |
| "loss": 10.7897, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.018641810918774968, |
| "grad_norm": 64.70693307946331, |
| "learning_rate": 3.097345132743363e-06, |
| "loss": 10.6244, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02130492676431425, |
| "grad_norm": 100.07904925734698, |
| "learning_rate": 3.5398230088495575e-06, |
| "loss": 9.3505, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.023968042609853527, |
| "grad_norm": 121.42213770896274, |
| "learning_rate": 3.982300884955752e-06, |
| "loss": 8.5961, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02663115845539281, |
| "grad_norm": 64.96997432704501, |
| "learning_rate": 4.424778761061947e-06, |
| "loss": 3.5386, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02929427430093209, |
| "grad_norm": 53.5067123571589, |
| "learning_rate": 4.867256637168142e-06, |
| "loss": 3.1169, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03195739014647137, |
| "grad_norm": 34.28454533456946, |
| "learning_rate": 5.3097345132743365e-06, |
| "loss": 2.3171, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03462050599201065, |
| "grad_norm": 28.02284592011359, |
| "learning_rate": 5.752212389380531e-06, |
| "loss": 2.1704, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.037283621837549935, |
| "grad_norm": 6.230233716943746, |
| "learning_rate": 6.194690265486726e-06, |
| "loss": 1.3702, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03994673768308921, |
| "grad_norm": 4.8265444090252325, |
| "learning_rate": 6.6371681415929215e-06, |
| "loss": 1.2994, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0426098535286285, |
| "grad_norm": 3.4989649353882544, |
| "learning_rate": 7.079646017699115e-06, |
| "loss": 1.1939, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.045272969374167776, |
| "grad_norm": 2.548022240081304, |
| "learning_rate": 7.52212389380531e-06, |
| "loss": 1.1113, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.047936085219707054, |
| "grad_norm": 1.7785073197319812, |
| "learning_rate": 7.964601769911505e-06, |
| "loss": 1.0099, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05059920106524634, |
| "grad_norm": 52.43472197468591, |
| "learning_rate": 8.407079646017701e-06, |
| "loss": 1.0002, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05326231691078562, |
| "grad_norm": 18.71256882921437, |
| "learning_rate": 8.849557522123894e-06, |
| "loss": 0.9335, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0559254327563249, |
| "grad_norm": 1.6748381666125123, |
| "learning_rate": 9.29203539823009e-06, |
| "loss": 0.8897, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05858854860186418, |
| "grad_norm": 1.2119772296620004, |
| "learning_rate": 9.734513274336284e-06, |
| "loss": 0.8728, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06125166444740346, |
| "grad_norm": 0.9292233025769583, |
| "learning_rate": 1.0176991150442479e-05, |
| "loss": 0.8443, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06391478029294274, |
| "grad_norm": 0.8058222924733704, |
| "learning_rate": 1.0619469026548673e-05, |
| "loss": 0.8065, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.06657789613848203, |
| "grad_norm": 0.7676888976773729, |
| "learning_rate": 1.1061946902654869e-05, |
| "loss": 0.744, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0692410119840213, |
| "grad_norm": 1.1442962246712427, |
| "learning_rate": 1.1504424778761062e-05, |
| "loss": 0.7962, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07190412782956059, |
| "grad_norm": 0.8086732801653846, |
| "learning_rate": 1.1946902654867258e-05, |
| "loss": 0.7546, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07456724367509987, |
| "grad_norm": 0.6032687314644429, |
| "learning_rate": 1.2389380530973452e-05, |
| "loss": 0.6961, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07723035952063914, |
| "grad_norm": 0.8050008569135423, |
| "learning_rate": 1.2831858407079647e-05, |
| "loss": 0.7181, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.07989347536617843, |
| "grad_norm": 0.7760170053857292, |
| "learning_rate": 1.3274336283185843e-05, |
| "loss": 0.7011, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08255659121171771, |
| "grad_norm": 0.6911853454916363, |
| "learning_rate": 1.3716814159292036e-05, |
| "loss": 0.6767, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.085219707057257, |
| "grad_norm": 0.5690990372888421, |
| "learning_rate": 1.415929203539823e-05, |
| "loss": 0.6657, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08788282290279627, |
| "grad_norm": 0.46539236587043925, |
| "learning_rate": 1.4601769911504426e-05, |
| "loss": 0.6585, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09054593874833555, |
| "grad_norm": 0.6011651474231043, |
| "learning_rate": 1.504424778761062e-05, |
| "loss": 0.6571, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09320905459387484, |
| "grad_norm": 0.6055438783984222, |
| "learning_rate": 1.5486725663716813e-05, |
| "loss": 0.6307, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.09587217043941411, |
| "grad_norm": 0.4930140407791457, |
| "learning_rate": 1.592920353982301e-05, |
| "loss": 0.638, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0985352862849534, |
| "grad_norm": 0.38727032176053555, |
| "learning_rate": 1.6371681415929206e-05, |
| "loss": 0.6189, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.10119840213049268, |
| "grad_norm": 0.46992360907642716, |
| "learning_rate": 1.6814159292035402e-05, |
| "loss": 0.6242, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.10386151797603196, |
| "grad_norm": 0.5002104790615647, |
| "learning_rate": 1.7256637168141594e-05, |
| "loss": 0.6087, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.10652463382157124, |
| "grad_norm": 0.4378982855259104, |
| "learning_rate": 1.7699115044247787e-05, |
| "loss": 0.6112, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10918774966711052, |
| "grad_norm": 0.343549106950523, |
| "learning_rate": 1.8141592920353983e-05, |
| "loss": 0.6251, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1118508655126498, |
| "grad_norm": 0.43140422077824325, |
| "learning_rate": 1.858407079646018e-05, |
| "loss": 0.625, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.11451398135818908, |
| "grad_norm": 0.44945895418028914, |
| "learning_rate": 1.9026548672566372e-05, |
| "loss": 0.576, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.11717709720372836, |
| "grad_norm": 0.33640715838659224, |
| "learning_rate": 1.946902654867257e-05, |
| "loss": 0.602, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.11984021304926765, |
| "grad_norm": 0.3602083165810118, |
| "learning_rate": 1.991150442477876e-05, |
| "loss": 0.5707, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12250332889480692, |
| "grad_norm": 1.7341245223857158, |
| "learning_rate": 2.0353982300884957e-05, |
| "loss": 0.5662, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.12516644474034622, |
| "grad_norm": 0.42320706053839496, |
| "learning_rate": 2.079646017699115e-05, |
| "loss": 0.5718, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1278295605858855, |
| "grad_norm": 0.34356067841011745, |
| "learning_rate": 2.1238938053097346e-05, |
| "loss": 0.5652, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.13049267643142476, |
| "grad_norm": 0.37607875054105366, |
| "learning_rate": 2.1681415929203542e-05, |
| "loss": 0.6079, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.13315579227696406, |
| "grad_norm": 0.355877489349339, |
| "learning_rate": 2.2123893805309738e-05, |
| "loss": 0.5414, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13581890812250333, |
| "grad_norm": 0.3531413648567738, |
| "learning_rate": 2.2566371681415928e-05, |
| "loss": 0.5383, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.1384820239680426, |
| "grad_norm": 0.3900867327584249, |
| "learning_rate": 2.3008849557522124e-05, |
| "loss": 0.5607, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1411451398135819, |
| "grad_norm": 0.29096561379999103, |
| "learning_rate": 2.345132743362832e-05, |
| "loss": 0.5428, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.14380825565912117, |
| "grad_norm": 0.34882597172967983, |
| "learning_rate": 2.3893805309734516e-05, |
| "loss": 0.5597, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.14647137150466044, |
| "grad_norm": 0.31745047102841745, |
| "learning_rate": 2.433628318584071e-05, |
| "loss": 0.5427, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.14913448735019974, |
| "grad_norm": 0.3429464925874952, |
| "learning_rate": 2.4778761061946905e-05, |
| "loss": 0.5418, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.151797603195739, |
| "grad_norm": 0.28154789184935636, |
| "learning_rate": 2.5221238938053098e-05, |
| "loss": 0.5701, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.15446071904127828, |
| "grad_norm": 0.3141148216942468, |
| "learning_rate": 2.5663716814159294e-05, |
| "loss": 0.5279, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.15712383488681758, |
| "grad_norm": 0.3077683025338142, |
| "learning_rate": 2.610619469026549e-05, |
| "loss": 0.5443, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.15978695073235685, |
| "grad_norm": 0.35329472069062134, |
| "learning_rate": 2.6548672566371686e-05, |
| "loss": 0.5657, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.16245006657789615, |
| "grad_norm": 0.30082869981695665, |
| "learning_rate": 2.6991150442477875e-05, |
| "loss": 0.5386, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.16511318242343542, |
| "grad_norm": 0.3705381333041911, |
| "learning_rate": 2.743362831858407e-05, |
| "loss": 0.5417, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1677762982689747, |
| "grad_norm": 0.3424625742113855, |
| "learning_rate": 2.7876106194690264e-05, |
| "loss": 0.5334, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.170439414114514, |
| "grad_norm": 0.2904098798351202, |
| "learning_rate": 2.831858407079646e-05, |
| "loss": 0.5424, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.17310252996005326, |
| "grad_norm": 0.32851572085926894, |
| "learning_rate": 2.8761061946902656e-05, |
| "loss": 0.5231, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.17576564580559254, |
| "grad_norm": 0.29034784648982725, |
| "learning_rate": 2.9203539823008852e-05, |
| "loss": 0.5394, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.17842876165113183, |
| "grad_norm": 0.33213549417249844, |
| "learning_rate": 2.964601769911505e-05, |
| "loss": 0.54, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1810918774966711, |
| "grad_norm": 0.2751631826164567, |
| "learning_rate": 3.008849557522124e-05, |
| "loss": 0.5254, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.18375499334221038, |
| "grad_norm": 0.3037009657021324, |
| "learning_rate": 3.0530973451327434e-05, |
| "loss": 0.5216, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.18641810918774968, |
| "grad_norm": 0.30105360826964594, |
| "learning_rate": 3.097345132743363e-05, |
| "loss": 0.5111, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18908122503328895, |
| "grad_norm": 0.3202863693523833, |
| "learning_rate": 3.1415929203539826e-05, |
| "loss": 0.537, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.19174434087882822, |
| "grad_norm": 0.3294366280935238, |
| "learning_rate": 3.185840707964602e-05, |
| "loss": 0.5215, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.19440745672436752, |
| "grad_norm": 0.32228297514585236, |
| "learning_rate": 3.230088495575221e-05, |
| "loss": 0.536, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1970705725699068, |
| "grad_norm": 0.31224977631197853, |
| "learning_rate": 3.274336283185841e-05, |
| "loss": 0.5133, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.19973368841544606, |
| "grad_norm": 0.34249789697496347, |
| "learning_rate": 3.3185840707964604e-05, |
| "loss": 0.5187, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.20239680426098536, |
| "grad_norm": 0.3014674455677291, |
| "learning_rate": 3.3628318584070804e-05, |
| "loss": 0.5173, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.20505992010652463, |
| "grad_norm": 0.31181209074311145, |
| "learning_rate": 3.407079646017699e-05, |
| "loss": 0.4938, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.20772303595206393, |
| "grad_norm": 0.3421599429123891, |
| "learning_rate": 3.451327433628319e-05, |
| "loss": 0.5178, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2103861517976032, |
| "grad_norm": 0.32144698779599035, |
| "learning_rate": 3.495575221238938e-05, |
| "loss": 0.529, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.21304926764314247, |
| "grad_norm": 0.30829102288383803, |
| "learning_rate": 3.5398230088495574e-05, |
| "loss": 0.5045, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.21571238348868177, |
| "grad_norm": 0.3320673147021741, |
| "learning_rate": 3.5840707964601774e-05, |
| "loss": 0.5193, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.21837549933422104, |
| "grad_norm": 0.3257493459194373, |
| "learning_rate": 3.628318584070797e-05, |
| "loss": 0.5161, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2210386151797603, |
| "grad_norm": 0.3451069209364067, |
| "learning_rate": 3.672566371681416e-05, |
| "loss": 0.4902, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2237017310252996, |
| "grad_norm": 0.38062902785170477, |
| "learning_rate": 3.716814159292036e-05, |
| "loss": 0.5106, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.22636484687083888, |
| "grad_norm": 0.3437845837066077, |
| "learning_rate": 3.7610619469026545e-05, |
| "loss": 0.5072, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.22902796271637815, |
| "grad_norm": 0.4369801740657791, |
| "learning_rate": 3.8053097345132744e-05, |
| "loss": 0.5016, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.23169107856191745, |
| "grad_norm": 0.39323367167161793, |
| "learning_rate": 3.849557522123894e-05, |
| "loss": 0.5126, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.23435419440745672, |
| "grad_norm": 0.3804923058106557, |
| "learning_rate": 3.893805309734514e-05, |
| "loss": 0.5169, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.237017310252996, |
| "grad_norm": 0.3991475997522414, |
| "learning_rate": 3.938053097345133e-05, |
| "loss": 0.5206, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2396804260985353, |
| "grad_norm": 0.3345983998430803, |
| "learning_rate": 3.982300884955752e-05, |
| "loss": 0.5126, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.24234354194407456, |
| "grad_norm": 0.37605023011424904, |
| "learning_rate": 4.026548672566372e-05, |
| "loss": 0.517, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.24500665778961384, |
| "grad_norm": 0.30015095297467786, |
| "learning_rate": 4.0707964601769914e-05, |
| "loss": 0.5146, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.24766977363515313, |
| "grad_norm": 0.37615535541775885, |
| "learning_rate": 4.115044247787611e-05, |
| "loss": 0.4897, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.25033288948069243, |
| "grad_norm": 0.32506469165922075, |
| "learning_rate": 4.15929203539823e-05, |
| "loss": 0.5033, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2529960053262317, |
| "grad_norm": 0.3955130401533768, |
| "learning_rate": 4.20353982300885e-05, |
| "loss": 0.517, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.255659121171771, |
| "grad_norm": 0.38256193351931217, |
| "learning_rate": 4.247787610619469e-05, |
| "loss": 0.4903, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2583222370173103, |
| "grad_norm": 0.3757931359073768, |
| "learning_rate": 4.2920353982300885e-05, |
| "loss": 0.4881, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2609853528628495, |
| "grad_norm": 0.4073525724085135, |
| "learning_rate": 4.3362831858407084e-05, |
| "loss": 0.4981, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2636484687083888, |
| "grad_norm": 0.42226304140119747, |
| "learning_rate": 4.380530973451328e-05, |
| "loss": 0.4777, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2663115845539281, |
| "grad_norm": 0.47546631243940135, |
| "learning_rate": 4.4247787610619477e-05, |
| "loss": 0.5012, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.26897470039946736, |
| "grad_norm": 0.38067024978966585, |
| "learning_rate": 4.469026548672566e-05, |
| "loss": 0.5038, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.27163781624500666, |
| "grad_norm": 0.3549335612107799, |
| "learning_rate": 4.5132743362831855e-05, |
| "loss": 0.5046, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.27430093209054596, |
| "grad_norm": 0.4081532806299182, |
| "learning_rate": 4.5575221238938055e-05, |
| "loss": 0.4816, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2769640479360852, |
| "grad_norm": 0.35702973975911423, |
| "learning_rate": 4.601769911504425e-05, |
| "loss": 0.4969, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2796271637816245, |
| "grad_norm": 0.3750952303695297, |
| "learning_rate": 4.646017699115045e-05, |
| "loss": 0.5129, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2822902796271638, |
| "grad_norm": 0.3713537523929101, |
| "learning_rate": 4.690265486725664e-05, |
| "loss": 0.4871, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.28495339547270304, |
| "grad_norm": 0.47534354342607993, |
| "learning_rate": 4.734513274336283e-05, |
| "loss": 0.4971, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.28761651131824234, |
| "grad_norm": 0.41826478296211245, |
| "learning_rate": 4.778761061946903e-05, |
| "loss": 0.4943, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.29027962716378164, |
| "grad_norm": 0.39759514237849775, |
| "learning_rate": 4.823008849557522e-05, |
| "loss": 0.5014, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2929427430093209, |
| "grad_norm": 0.4548008624547614, |
| "learning_rate": 4.867256637168142e-05, |
| "loss": 0.5067, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2956058588548602, |
| "grad_norm": 0.4618812739465874, |
| "learning_rate": 4.911504424778761e-05, |
| "loss": 0.487, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2982689747003995, |
| "grad_norm": 0.31165613667101594, |
| "learning_rate": 4.955752212389381e-05, |
| "loss": 0.4908, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3009320905459387, |
| "grad_norm": 0.45735168765249185, |
| "learning_rate": 5e-05, |
| "loss": 0.4924, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.303595206391478, |
| "grad_norm": 0.4659242945372524, |
| "learning_rate": 4.9950592885375493e-05, |
| "loss": 0.49, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3062583222370173, |
| "grad_norm": 0.3422222311667708, |
| "learning_rate": 4.990118577075099e-05, |
| "loss": 0.4902, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.30892143808255657, |
| "grad_norm": 0.5702864889691999, |
| "learning_rate": 4.985177865612648e-05, |
| "loss": 0.4712, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.31158455392809586, |
| "grad_norm": 0.31000398399919754, |
| "learning_rate": 4.980237154150198e-05, |
| "loss": 0.4729, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.31424766977363516, |
| "grad_norm": 0.5329093367544124, |
| "learning_rate": 4.975296442687747e-05, |
| "loss": 0.4979, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3169107856191744, |
| "grad_norm": 0.41581595613618844, |
| "learning_rate": 4.970355731225297e-05, |
| "loss": 0.4979, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3195739014647137, |
| "grad_norm": 0.5898871183617019, |
| "learning_rate": 4.965415019762846e-05, |
| "loss": 0.4841, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.322237017310253, |
| "grad_norm": 0.5277745967026336, |
| "learning_rate": 4.960474308300396e-05, |
| "loss": 0.494, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3249001331557923, |
| "grad_norm": 0.6707049603761084, |
| "learning_rate": 4.955533596837945e-05, |
| "loss": 0.4816, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.32756324900133155, |
| "grad_norm": 0.39379278723705347, |
| "learning_rate": 4.950592885375494e-05, |
| "loss": 0.4708, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.33022636484687085, |
| "grad_norm": 0.5682660745624962, |
| "learning_rate": 4.945652173913044e-05, |
| "loss": 0.4844, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.33288948069241014, |
| "grad_norm": 0.4164160620027728, |
| "learning_rate": 4.940711462450593e-05, |
| "loss": 0.4577, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3355525965379494, |
| "grad_norm": 0.5359420179155978, |
| "learning_rate": 4.9357707509881426e-05, |
| "loss": 0.4723, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.3382157123834887, |
| "grad_norm": 0.5026386563312899, |
| "learning_rate": 4.930830039525692e-05, |
| "loss": 0.4706, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.340878828229028, |
| "grad_norm": 0.5189502106027113, |
| "learning_rate": 4.9258893280632415e-05, |
| "loss": 0.4814, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.34354194407456723, |
| "grad_norm": 0.46462849504368775, |
| "learning_rate": 4.9209486166007906e-05, |
| "loss": 0.4735, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.34620505992010653, |
| "grad_norm": 0.5495458064144569, |
| "learning_rate": 4.9160079051383404e-05, |
| "loss": 0.4964, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3488681757656458, |
| "grad_norm": 0.4136354389486864, |
| "learning_rate": 4.9110671936758895e-05, |
| "loss": 0.4937, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.35153129161118507, |
| "grad_norm": 0.49819742888588847, |
| "learning_rate": 4.906126482213439e-05, |
| "loss": 0.4929, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.35419440745672437, |
| "grad_norm": 0.5211986557669676, |
| "learning_rate": 4.901185770750988e-05, |
| "loss": 0.4722, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.35685752330226367, |
| "grad_norm": 0.3743611868649684, |
| "learning_rate": 4.896245059288538e-05, |
| "loss": 0.4852, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3595206391478029, |
| "grad_norm": 0.47244102498767254, |
| "learning_rate": 4.891304347826087e-05, |
| "loss": 0.4846, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3621837549933422, |
| "grad_norm": 0.39536123377896054, |
| "learning_rate": 4.886363636363637e-05, |
| "loss": 0.4812, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3648468708388815, |
| "grad_norm": 0.39389579963168014, |
| "learning_rate": 4.881422924901186e-05, |
| "loss": 0.4814, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.36750998668442075, |
| "grad_norm": 0.5517767967854046, |
| "learning_rate": 4.876482213438736e-05, |
| "loss": 0.4605, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.37017310252996005, |
| "grad_norm": 0.3371092349408584, |
| "learning_rate": 4.871541501976285e-05, |
| "loss": 0.4919, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.37283621837549935, |
| "grad_norm": 0.5454997328166629, |
| "learning_rate": 4.866600790513835e-05, |
| "loss": 0.478, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3754993342210386, |
| "grad_norm": 0.38191662974594565, |
| "learning_rate": 4.861660079051384e-05, |
| "loss": 0.4675, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3781624500665779, |
| "grad_norm": 0.44622867680541506, |
| "learning_rate": 4.8567193675889336e-05, |
| "loss": 0.4767, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3808255659121172, |
| "grad_norm": 0.40615171610446554, |
| "learning_rate": 4.851778656126482e-05, |
| "loss": 0.4796, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.38348868175765644, |
| "grad_norm": 0.4067512139515564, |
| "learning_rate": 4.846837944664032e-05, |
| "loss": 0.4921, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.38615179760319573, |
| "grad_norm": 0.3764557796844728, |
| "learning_rate": 4.841897233201581e-05, |
| "loss": 0.4859, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.38881491344873503, |
| "grad_norm": 0.4154794205261891, |
| "learning_rate": 4.836956521739131e-05, |
| "loss": 0.4673, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3914780292942743, |
| "grad_norm": 0.4269745611686079, |
| "learning_rate": 4.83201581027668e-05, |
| "loss": 0.4551, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.3941411451398136, |
| "grad_norm": 0.38377387438781274, |
| "learning_rate": 4.8270750988142296e-05, |
| "loss": 0.487, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.3968042609853529, |
| "grad_norm": 0.5603533831020405, |
| "learning_rate": 4.822134387351779e-05, |
| "loss": 0.4849, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.3994673768308921, |
| "grad_norm": 0.3973953941114295, |
| "learning_rate": 4.8171936758893284e-05, |
| "loss": 0.4776, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4021304926764314, |
| "grad_norm": 0.4956339650363368, |
| "learning_rate": 4.8122529644268775e-05, |
| "loss": 0.4588, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.4047936085219707, |
| "grad_norm": 0.38460346615021695, |
| "learning_rate": 4.807312252964427e-05, |
| "loss": 0.4737, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.40745672436750996, |
| "grad_norm": 0.5226991882164052, |
| "learning_rate": 4.8023715415019764e-05, |
| "loss": 0.4827, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.41011984021304926, |
| "grad_norm": 0.3418933085513387, |
| "learning_rate": 4.797430830039526e-05, |
| "loss": 0.4594, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.41278295605858856, |
| "grad_norm": 0.41779277140490917, |
| "learning_rate": 4.792490118577075e-05, |
| "loss": 0.4738, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.41544607190412786, |
| "grad_norm": 0.40524225841023903, |
| "learning_rate": 4.787549407114625e-05, |
| "loss": 0.4725, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4181091877496671, |
| "grad_norm": 0.37804713363928255, |
| "learning_rate": 4.782608695652174e-05, |
| "loss": 0.476, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.4207723035952064, |
| "grad_norm": 0.32987544007452513, |
| "learning_rate": 4.777667984189724e-05, |
| "loss": 0.4606, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4234354194407457, |
| "grad_norm": 0.32638522089295396, |
| "learning_rate": 4.772727272727273e-05, |
| "loss": 0.4796, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.42609853528628494, |
| "grad_norm": 0.3653611962183669, |
| "learning_rate": 4.767786561264823e-05, |
| "loss": 0.4703, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.42876165113182424, |
| "grad_norm": 0.39387144328442575, |
| "learning_rate": 4.762845849802372e-05, |
| "loss": 0.4821, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.43142476697736354, |
| "grad_norm": 0.473795283228247, |
| "learning_rate": 4.757905138339921e-05, |
| "loss": 0.4638, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.4340878828229028, |
| "grad_norm": 0.33040966306125785, |
| "learning_rate": 4.75296442687747e-05, |
| "loss": 0.4734, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4367509986684421, |
| "grad_norm": 0.42723446550700767, |
| "learning_rate": 4.74802371541502e-05, |
| "loss": 0.4809, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.4394141145139814, |
| "grad_norm": 0.3675475725903659, |
| "learning_rate": 4.743083003952569e-05, |
| "loss": 0.4586, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4420772303595206, |
| "grad_norm": 0.4219979464151687, |
| "learning_rate": 4.738142292490119e-05, |
| "loss": 0.4678, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.4447403462050599, |
| "grad_norm": 0.3857740050906692, |
| "learning_rate": 4.733201581027668e-05, |
| "loss": 0.4633, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.4474034620505992, |
| "grad_norm": 0.365686963876862, |
| "learning_rate": 4.7282608695652177e-05, |
| "loss": 0.4712, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.45006657789613846, |
| "grad_norm": 0.43242439287350204, |
| "learning_rate": 4.723320158102767e-05, |
| "loss": 0.4751, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.45272969374167776, |
| "grad_norm": 0.3908982963736634, |
| "learning_rate": 4.7183794466403165e-05, |
| "loss": 0.4723, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.45539280958721706, |
| "grad_norm": 0.4693769425526856, |
| "learning_rate": 4.7134387351778656e-05, |
| "loss": 0.4511, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.4580559254327563, |
| "grad_norm": 0.3437754359793867, |
| "learning_rate": 4.7084980237154154e-05, |
| "loss": 0.4634, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4607190412782956, |
| "grad_norm": 0.5270401669346302, |
| "learning_rate": 4.7035573122529645e-05, |
| "loss": 0.4621, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.4633821571238349, |
| "grad_norm": 0.4696714456346351, |
| "learning_rate": 4.698616600790514e-05, |
| "loss": 0.4544, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.46604527296937415, |
| "grad_norm": 0.5068508932227126, |
| "learning_rate": 4.6936758893280634e-05, |
| "loss": 0.4506, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.46870838881491345, |
| "grad_norm": 0.503240500645686, |
| "learning_rate": 4.688735177865613e-05, |
| "loss": 0.4653, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.47137150466045274, |
| "grad_norm": 0.4373004531246149, |
| "learning_rate": 4.683794466403162e-05, |
| "loss": 0.4711, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.474034620505992, |
| "grad_norm": 0.3777218592654747, |
| "learning_rate": 4.678853754940712e-05, |
| "loss": 0.466, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4766977363515313, |
| "grad_norm": 0.5064461910000716, |
| "learning_rate": 4.673913043478261e-05, |
| "loss": 0.4516, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4793608521970706, |
| "grad_norm": 0.37515242222191797, |
| "learning_rate": 4.668972332015811e-05, |
| "loss": 0.4708, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.48202396804260983, |
| "grad_norm": 0.44905049367290634, |
| "learning_rate": 4.66403162055336e-05, |
| "loss": 0.4462, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.48468708388814913, |
| "grad_norm": 0.37911463481430624, |
| "learning_rate": 4.659090909090909e-05, |
| "loss": 0.4451, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4873501997336884, |
| "grad_norm": 0.3830462171805543, |
| "learning_rate": 4.654150197628458e-05, |
| "loss": 0.4682, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.49001331557922767, |
| "grad_norm": 0.41200778908045926, |
| "learning_rate": 4.649209486166008e-05, |
| "loss": 0.4497, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.49267643142476697, |
| "grad_norm": 0.4315187398326425, |
| "learning_rate": 4.644268774703557e-05, |
| "loss": 0.4752, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.49533954727030627, |
| "grad_norm": 0.4519541174810682, |
| "learning_rate": 4.639328063241107e-05, |
| "loss": 0.4764, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.4980026631158455, |
| "grad_norm": 0.4089102997614078, |
| "learning_rate": 4.634387351778656e-05, |
| "loss": 0.4663, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5006657789613849, |
| "grad_norm": 0.352791614063271, |
| "learning_rate": 4.629446640316206e-05, |
| "loss": 0.4671, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5033288948069241, |
| "grad_norm": 0.3866144187741864, |
| "learning_rate": 4.624505928853755e-05, |
| "loss": 0.4746, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5059920106524634, |
| "grad_norm": 0.4028526989391047, |
| "learning_rate": 4.6195652173913046e-05, |
| "loss": 0.4811, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5086551264980027, |
| "grad_norm": 0.4580432915919317, |
| "learning_rate": 4.614624505928854e-05, |
| "loss": 0.4678, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.511318242343542, |
| "grad_norm": 0.47798645545842755, |
| "learning_rate": 4.6096837944664035e-05, |
| "loss": 0.4514, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5139813581890812, |
| "grad_norm": 0.40636636658954495, |
| "learning_rate": 4.6047430830039526e-05, |
| "loss": 0.4356, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5166444740346205, |
| "grad_norm": 0.4206946394322433, |
| "learning_rate": 4.5998023715415024e-05, |
| "loss": 0.4637, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5193075898801598, |
| "grad_norm": 0.4977083130622833, |
| "learning_rate": 4.5948616600790515e-05, |
| "loss": 0.4525, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.521970705725699, |
| "grad_norm": 0.3826090231131446, |
| "learning_rate": 4.589920948616601e-05, |
| "loss": 0.4647, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5246338215712384, |
| "grad_norm": 0.443905698975846, |
| "learning_rate": 4.5849802371541504e-05, |
| "loss": 0.466, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5272969374167776, |
| "grad_norm": 0.34058976392880835, |
| "learning_rate": 4.5800395256917e-05, |
| "loss": 0.4462, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5299600532623169, |
| "grad_norm": 0.3708303032984336, |
| "learning_rate": 4.575098814229249e-05, |
| "loss": 0.4638, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5326231691078562, |
| "grad_norm": 0.4046635861089521, |
| "learning_rate": 4.570158102766799e-05, |
| "loss": 0.4702, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5352862849533955, |
| "grad_norm": 0.390485621135718, |
| "learning_rate": 4.565217391304348e-05, |
| "loss": 0.467, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5379494007989347, |
| "grad_norm": 0.36389394329456204, |
| "learning_rate": 4.560276679841897e-05, |
| "loss": 0.4676, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5406125166444741, |
| "grad_norm": 0.36415110756708385, |
| "learning_rate": 4.555335968379447e-05, |
| "loss": 0.4508, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5432756324900133, |
| "grad_norm": 0.5185630368770853, |
| "learning_rate": 4.550395256916996e-05, |
| "loss": 0.4835, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5459387483355526, |
| "grad_norm": 0.3004205195451817, |
| "learning_rate": 4.545454545454546e-05, |
| "loss": 0.4655, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5486018641810919, |
| "grad_norm": 0.40992528241944887, |
| "learning_rate": 4.540513833992095e-05, |
| "loss": 0.4516, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5512649800266312, |
| "grad_norm": 0.3462175317121373, |
| "learning_rate": 4.535573122529644e-05, |
| "loss": 0.4471, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5539280958721704, |
| "grad_norm": 0.4220985656684442, |
| "learning_rate": 4.530632411067194e-05, |
| "loss": 0.4483, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5565912117177098, |
| "grad_norm": 0.2992081906139443, |
| "learning_rate": 4.525691699604743e-05, |
| "loss": 0.4659, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.559254327563249, |
| "grad_norm": 0.34958390386904065, |
| "learning_rate": 4.520750988142293e-05, |
| "loss": 0.4594, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5619174434087882, |
| "grad_norm": 0.36711080919022626, |
| "learning_rate": 4.515810276679842e-05, |
| "loss": 0.4329, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.5645805592543276, |
| "grad_norm": 0.32211416124144243, |
| "learning_rate": 4.5108695652173916e-05, |
| "loss": 0.4487, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.5672436750998668, |
| "grad_norm": 0.38626649006957514, |
| "learning_rate": 4.505928853754941e-05, |
| "loss": 0.4544, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.5699067909454061, |
| "grad_norm": 0.4022394284778984, |
| "learning_rate": 4.5009881422924905e-05, |
| "loss": 0.4505, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.5725699067909454, |
| "grad_norm": 0.3174185878452103, |
| "learning_rate": 4.4960474308300396e-05, |
| "loss": 0.4652, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5752330226364847, |
| "grad_norm": 0.3872997977647099, |
| "learning_rate": 4.4911067193675893e-05, |
| "loss": 0.4771, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.5778961384820239, |
| "grad_norm": 0.2832157450180407, |
| "learning_rate": 4.4861660079051384e-05, |
| "loss": 0.4535, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.5805592543275633, |
| "grad_norm": 0.3394496956003534, |
| "learning_rate": 4.481225296442688e-05, |
| "loss": 0.4401, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5832223701731025, |
| "grad_norm": 0.29084562762850125, |
| "learning_rate": 4.476284584980237e-05, |
| "loss": 0.445, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5858854860186418, |
| "grad_norm": 0.30783953367051076, |
| "learning_rate": 4.471343873517787e-05, |
| "loss": 0.437, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5885486018641811, |
| "grad_norm": 0.3183591003829617, |
| "learning_rate": 4.466403162055336e-05, |
| "loss": 0.4549, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5912117177097204, |
| "grad_norm": 0.30102542208170724, |
| "learning_rate": 4.461462450592885e-05, |
| "loss": 0.4455, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5938748335552596, |
| "grad_norm": 0.36209246659651434, |
| "learning_rate": 4.456521739130435e-05, |
| "loss": 0.4401, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.596537949400799, |
| "grad_norm": 0.3264752372953629, |
| "learning_rate": 4.451581027667984e-05, |
| "loss": 0.4379, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5992010652463382, |
| "grad_norm": 0.38508783562543825, |
| "learning_rate": 4.446640316205534e-05, |
| "loss": 0.4617, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6018641810918774, |
| "grad_norm": 0.3397449828204806, |
| "learning_rate": 4.441699604743083e-05, |
| "loss": 0.4516, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6045272969374168, |
| "grad_norm": 0.3587152523608094, |
| "learning_rate": 4.436758893280633e-05, |
| "loss": 0.4627, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.607190412782956, |
| "grad_norm": 0.3533298903513862, |
| "learning_rate": 4.431818181818182e-05, |
| "loss": 0.4539, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6098535286284953, |
| "grad_norm": 0.4031621223527615, |
| "learning_rate": 4.426877470355732e-05, |
| "loss": 0.4475, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6125166444740346, |
| "grad_norm": 0.31598897434214096, |
| "learning_rate": 4.421936758893281e-05, |
| "loss": 0.4594, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6151797603195739, |
| "grad_norm": 0.39490506767356415, |
| "learning_rate": 4.4169960474308306e-05, |
| "loss": 0.4481, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6178428761651131, |
| "grad_norm": 0.34551286464789904, |
| "learning_rate": 4.41205533596838e-05, |
| "loss": 0.4417, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6205059920106525, |
| "grad_norm": 0.3471665108105545, |
| "learning_rate": 4.4071146245059295e-05, |
| "loss": 0.444, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.6231691078561917, |
| "grad_norm": 0.3236727871934815, |
| "learning_rate": 4.4021739130434786e-05, |
| "loss": 0.4465, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.625832223701731, |
| "grad_norm": 0.3951638876292987, |
| "learning_rate": 4.397233201581028e-05, |
| "loss": 0.4476, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6284953395472703, |
| "grad_norm": 0.3186324774552031, |
| "learning_rate": 4.3922924901185774e-05, |
| "loss": 0.4359, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6311584553928096, |
| "grad_norm": 0.3446758582788272, |
| "learning_rate": 4.387351778656127e-05, |
| "loss": 0.4425, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6338215712383488, |
| "grad_norm": 0.3712178318421026, |
| "learning_rate": 4.382411067193676e-05, |
| "loss": 0.4479, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6364846870838882, |
| "grad_norm": 0.2869593917948936, |
| "learning_rate": 4.377470355731226e-05, |
| "loss": 0.4487, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6391478029294274, |
| "grad_norm": 0.35621809137402505, |
| "learning_rate": 4.3725296442687745e-05, |
| "loss": 0.459, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6418109187749668, |
| "grad_norm": 0.3219598029099912, |
| "learning_rate": 4.367588932806324e-05, |
| "loss": 0.4486, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.644474034620506, |
| "grad_norm": 0.345671883817814, |
| "learning_rate": 4.3626482213438734e-05, |
| "loss": 0.4494, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6471371504660453, |
| "grad_norm": 0.3326228424406132, |
| "learning_rate": 4.357707509881423e-05, |
| "loss": 0.467, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6498002663115846, |
| "grad_norm": 0.42093399894851624, |
| "learning_rate": 4.352766798418972e-05, |
| "loss": 0.4361, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6524633821571239, |
| "grad_norm": 0.4162222276319394, |
| "learning_rate": 4.347826086956522e-05, |
| "loss": 0.4606, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6551264980026631, |
| "grad_norm": 0.36750359997980137, |
| "learning_rate": 4.342885375494071e-05, |
| "loss": 0.4429, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6577896138482024, |
| "grad_norm": 0.5483612794064252, |
| "learning_rate": 4.337944664031621e-05, |
| "loss": 0.4533, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.6604527296937417, |
| "grad_norm": 0.3506444877775761, |
| "learning_rate": 4.33300395256917e-05, |
| "loss": 0.4469, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.6631158455392809, |
| "grad_norm": 0.49614493451666597, |
| "learning_rate": 4.32806324110672e-05, |
| "loss": 0.4511, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.6657789613848203, |
| "grad_norm": 0.38209500350480796, |
| "learning_rate": 4.323122529644269e-05, |
| "loss": 0.4556, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6684420772303595, |
| "grad_norm": 0.3909575859613948, |
| "learning_rate": 4.318181818181819e-05, |
| "loss": 0.4573, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.6711051930758988, |
| "grad_norm": 0.41081105341671875, |
| "learning_rate": 4.313241106719368e-05, |
| "loss": 0.4319, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.6737683089214381, |
| "grad_norm": 0.3263282193938601, |
| "learning_rate": 4.3083003952569175e-05, |
| "loss": 0.4477, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.6764314247669774, |
| "grad_norm": 0.30906206450856727, |
| "learning_rate": 4.3033596837944666e-05, |
| "loss": 0.449, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6790945406125166, |
| "grad_norm": 0.4519613203178409, |
| "learning_rate": 4.2984189723320164e-05, |
| "loss": 0.4411, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.681757656458056, |
| "grad_norm": 0.4018486844337667, |
| "learning_rate": 4.2934782608695655e-05, |
| "loss": 0.4402, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.6844207723035952, |
| "grad_norm": 0.41908409625079107, |
| "learning_rate": 4.288537549407115e-05, |
| "loss": 0.4531, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.6870838881491345, |
| "grad_norm": 0.34694110159483726, |
| "learning_rate": 4.2835968379446644e-05, |
| "loss": 0.4533, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.6897470039946738, |
| "grad_norm": 0.4051995527756752, |
| "learning_rate": 4.2786561264822135e-05, |
| "loss": 0.4533, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.6924101198402131, |
| "grad_norm": 0.3557731708549695, |
| "learning_rate": 4.2737154150197626e-05, |
| "loss": 0.4665, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6950732356857523, |
| "grad_norm": 0.387832077012766, |
| "learning_rate": 4.2687747035573124e-05, |
| "loss": 0.4407, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.6977363515312917, |
| "grad_norm": 0.38082367574409703, |
| "learning_rate": 4.2638339920948615e-05, |
| "loss": 0.453, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.7003994673768309, |
| "grad_norm": 0.33683683724829466, |
| "learning_rate": 4.258893280632411e-05, |
| "loss": 0.4635, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.7030625832223701, |
| "grad_norm": 0.4169335496839881, |
| "learning_rate": 4.2539525691699603e-05, |
| "loss": 0.4563, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7057256990679095, |
| "grad_norm": 0.3214835965167982, |
| "learning_rate": 4.24901185770751e-05, |
| "loss": 0.4542, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7083888149134487, |
| "grad_norm": 0.3530582715253166, |
| "learning_rate": 4.244071146245059e-05, |
| "loss": 0.4331, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.711051930758988, |
| "grad_norm": 0.36340494740289614, |
| "learning_rate": 4.239130434782609e-05, |
| "loss": 0.4394, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7137150466045273, |
| "grad_norm": 0.3874861034018051, |
| "learning_rate": 4.234189723320158e-05, |
| "loss": 0.4297, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7163781624500666, |
| "grad_norm": 0.387734289004501, |
| "learning_rate": 4.229249011857708e-05, |
| "loss": 0.4518, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.7190412782956058, |
| "grad_norm": 0.3011771126496286, |
| "learning_rate": 4.224308300395257e-05, |
| "loss": 0.4369, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7217043941411452, |
| "grad_norm": 0.41746724783245387, |
| "learning_rate": 4.219367588932807e-05, |
| "loss": 0.4509, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.7243675099866844, |
| "grad_norm": 0.3395798145391856, |
| "learning_rate": 4.214426877470356e-05, |
| "loss": 0.4643, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.7270306258322237, |
| "grad_norm": 0.4118033460496559, |
| "learning_rate": 4.2094861660079056e-05, |
| "loss": 0.4238, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.729693741677763, |
| "grad_norm": 0.2988995865914867, |
| "learning_rate": 4.204545454545455e-05, |
| "loss": 0.4414, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7323568575233023, |
| "grad_norm": 0.4755302873686915, |
| "learning_rate": 4.1996047430830045e-05, |
| "loss": 0.4408, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7350199733688415, |
| "grad_norm": 0.3321861192448237, |
| "learning_rate": 4.1946640316205536e-05, |
| "loss": 0.4471, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7376830892143809, |
| "grad_norm": 0.45541818319145366, |
| "learning_rate": 4.1897233201581034e-05, |
| "loss": 0.4473, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.7403462050599201, |
| "grad_norm": 0.37099566890533026, |
| "learning_rate": 4.1847826086956525e-05, |
| "loss": 0.4495, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7430093209054593, |
| "grad_norm": 0.4035270770785246, |
| "learning_rate": 4.1798418972332016e-05, |
| "loss": 0.4513, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7456724367509987, |
| "grad_norm": 0.3441312582159767, |
| "learning_rate": 4.174901185770751e-05, |
| "loss": 0.4358, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.748335552596538, |
| "grad_norm": 0.44606462407083225, |
| "learning_rate": 4.1699604743083005e-05, |
| "loss": 0.4441, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.7509986684420772, |
| "grad_norm": 0.41551217890891706, |
| "learning_rate": 4.1650197628458496e-05, |
| "loss": 0.4389, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.7536617842876165, |
| "grad_norm": 0.3972988958201408, |
| "learning_rate": 4.160079051383399e-05, |
| "loss": 0.4375, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.7563249001331558, |
| "grad_norm": 0.47085225893645843, |
| "learning_rate": 4.1551383399209484e-05, |
| "loss": 0.4567, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.758988015978695, |
| "grad_norm": 0.34543261673414827, |
| "learning_rate": 4.150197628458498e-05, |
| "loss": 0.4459, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7616511318242344, |
| "grad_norm": 0.43195994812681116, |
| "learning_rate": 4.145256916996047e-05, |
| "loss": 0.4589, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.7643142476697736, |
| "grad_norm": 0.3459436864735825, |
| "learning_rate": 4.140316205533597e-05, |
| "loss": 0.4599, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.7669773635153129, |
| "grad_norm": 0.36207300529867464, |
| "learning_rate": 4.135375494071146e-05, |
| "loss": 0.4303, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.7696404793608522, |
| "grad_norm": 0.41345784501066335, |
| "learning_rate": 4.130434782608696e-05, |
| "loss": 0.4271, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.7723035952063915, |
| "grad_norm": 0.3159838632384483, |
| "learning_rate": 4.125494071146245e-05, |
| "loss": 0.4559, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7749667110519307, |
| "grad_norm": 0.3812699162571922, |
| "learning_rate": 4.120553359683795e-05, |
| "loss": 0.4342, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.7776298268974701, |
| "grad_norm": 0.37911131885498967, |
| "learning_rate": 4.115612648221344e-05, |
| "loss": 0.4362, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.7802929427430093, |
| "grad_norm": 0.29763254355588903, |
| "learning_rate": 4.110671936758894e-05, |
| "loss": 0.438, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.7829560585885486, |
| "grad_norm": 0.42619217859831243, |
| "learning_rate": 4.105731225296443e-05, |
| "loss": 0.4359, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.7856191744340879, |
| "grad_norm": 0.3300550679665931, |
| "learning_rate": 4.1007905138339926e-05, |
| "loss": 0.43, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7882822902796272, |
| "grad_norm": 0.36668560763021596, |
| "learning_rate": 4.095849802371542e-05, |
| "loss": 0.4307, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.7909454061251664, |
| "grad_norm": 0.4285864023060217, |
| "learning_rate": 4.0909090909090915e-05, |
| "loss": 0.44, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.7936085219707057, |
| "grad_norm": 0.40308733058892654, |
| "learning_rate": 4.0859683794466406e-05, |
| "loss": 0.4438, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.796271637816245, |
| "grad_norm": 0.48251508562888784, |
| "learning_rate": 4.08102766798419e-05, |
| "loss": 0.465, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.7989347536617842, |
| "grad_norm": 0.3630289677972406, |
| "learning_rate": 4.076086956521739e-05, |
| "loss": 0.4472, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8015978695073236, |
| "grad_norm": 0.39496674097555107, |
| "learning_rate": 4.0711462450592886e-05, |
| "loss": 0.4391, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.8042609853528628, |
| "grad_norm": 0.3844393845604204, |
| "learning_rate": 4.0662055335968377e-05, |
| "loss": 0.4594, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8069241011984021, |
| "grad_norm": 0.41185922961873794, |
| "learning_rate": 4.0612648221343874e-05, |
| "loss": 0.4302, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.8095872170439414, |
| "grad_norm": 0.3856385433600225, |
| "learning_rate": 4.0563241106719365e-05, |
| "loss": 0.4436, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8122503328894807, |
| "grad_norm": 0.38840299488987834, |
| "learning_rate": 4.051383399209486e-05, |
| "loss": 0.4536, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8149134487350199, |
| "grad_norm": 0.3814150713404761, |
| "learning_rate": 4.0464426877470354e-05, |
| "loss": 0.4478, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8175765645805593, |
| "grad_norm": 0.3688695146114231, |
| "learning_rate": 4.041501976284585e-05, |
| "loss": 0.4371, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.8202396804260985, |
| "grad_norm": 0.4525942844580142, |
| "learning_rate": 4.036561264822134e-05, |
| "loss": 0.4291, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.8229027962716379, |
| "grad_norm": 0.4052871924274271, |
| "learning_rate": 4.031620553359684e-05, |
| "loss": 0.4441, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.8255659121171771, |
| "grad_norm": 0.39806513754399514, |
| "learning_rate": 4.026679841897233e-05, |
| "loss": 0.4411, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8282290279627164, |
| "grad_norm": 0.3805049053303521, |
| "learning_rate": 4.021739130434783e-05, |
| "loss": 0.4366, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.8308921438082557, |
| "grad_norm": 0.4001908389883243, |
| "learning_rate": 4.016798418972332e-05, |
| "loss": 0.4481, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.833555259653795, |
| "grad_norm": 0.3685478975261263, |
| "learning_rate": 4.011857707509882e-05, |
| "loss": 0.4444, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.8362183754993342, |
| "grad_norm": 0.3338436350006864, |
| "learning_rate": 4.006916996047431e-05, |
| "loss": 0.4479, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8388814913448736, |
| "grad_norm": 0.41429245260714803, |
| "learning_rate": 4.001976284584981e-05, |
| "loss": 0.449, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8415446071904128, |
| "grad_norm": 0.4423411865525233, |
| "learning_rate": 3.99703557312253e-05, |
| "loss": 0.4659, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.844207723035952, |
| "grad_norm": 0.2957853011048819, |
| "learning_rate": 3.9920948616600796e-05, |
| "loss": 0.4251, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.8468708388814914, |
| "grad_norm": 0.4030160825498704, |
| "learning_rate": 3.987154150197629e-05, |
| "loss": 0.4371, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.8495339547270306, |
| "grad_norm": 0.3580572215645172, |
| "learning_rate": 3.982213438735178e-05, |
| "loss": 0.4227, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.8521970705725699, |
| "grad_norm": 0.39710125591854223, |
| "learning_rate": 3.9772727272727275e-05, |
| "loss": 0.4293, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8548601864181092, |
| "grad_norm": 0.4051765562646604, |
| "learning_rate": 3.9723320158102766e-05, |
| "loss": 0.4334, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.8575233022636485, |
| "grad_norm": 0.41675278060825943, |
| "learning_rate": 3.9673913043478264e-05, |
| "loss": 0.4386, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.8601864181091877, |
| "grad_norm": 0.4375405045592726, |
| "learning_rate": 3.9624505928853755e-05, |
| "loss": 0.4533, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.8628495339547271, |
| "grad_norm": 0.4043621563504148, |
| "learning_rate": 3.957509881422925e-05, |
| "loss": 0.4497, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.8655126498002663, |
| "grad_norm": 0.37983530045601516, |
| "learning_rate": 3.9525691699604744e-05, |
| "loss": 0.4392, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8681757656458056, |
| "grad_norm": 0.4289732652538706, |
| "learning_rate": 3.947628458498024e-05, |
| "loss": 0.4401, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.8708388814913449, |
| "grad_norm": 0.34033600614743714, |
| "learning_rate": 3.942687747035573e-05, |
| "loss": 0.453, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.8735019973368842, |
| "grad_norm": 0.399300367168935, |
| "learning_rate": 3.937747035573123e-05, |
| "loss": 0.433, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.8761651131824234, |
| "grad_norm": 0.36717092389818584, |
| "learning_rate": 3.932806324110672e-05, |
| "loss": 0.4523, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.8788282290279628, |
| "grad_norm": 0.43669770511305556, |
| "learning_rate": 3.927865612648222e-05, |
| "loss": 0.437, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.881491344873502, |
| "grad_norm": 0.3631294987791108, |
| "learning_rate": 3.922924901185771e-05, |
| "loss": 0.4335, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.8841544607190412, |
| "grad_norm": 0.45116504976872973, |
| "learning_rate": 3.917984189723321e-05, |
| "loss": 0.4562, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.8868175765645806, |
| "grad_norm": 0.3163566159546663, |
| "learning_rate": 3.91304347826087e-05, |
| "loss": 0.4286, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.8894806924101198, |
| "grad_norm": 0.49699702016497876, |
| "learning_rate": 3.90810276679842e-05, |
| "loss": 0.4214, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.8921438082556591, |
| "grad_norm": 0.4164898463983148, |
| "learning_rate": 3.903162055335969e-05, |
| "loss": 0.4354, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8948069241011984, |
| "grad_norm": 0.39631778611383006, |
| "learning_rate": 3.8982213438735186e-05, |
| "loss": 0.4389, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.8974700399467377, |
| "grad_norm": 0.4545892509897146, |
| "learning_rate": 3.893280632411067e-05, |
| "loss": 0.4312, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.9001331557922769, |
| "grad_norm": 0.41988367228289636, |
| "learning_rate": 3.888339920948617e-05, |
| "loss": 0.4433, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.9027962716378163, |
| "grad_norm": 0.3123307577517813, |
| "learning_rate": 3.883399209486166e-05, |
| "loss": 0.4272, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.9054593874833555, |
| "grad_norm": 0.31692127951353677, |
| "learning_rate": 3.8784584980237156e-05, |
| "loss": 0.4292, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9081225033288948, |
| "grad_norm": 0.33613245505768613, |
| "learning_rate": 3.873517786561265e-05, |
| "loss": 0.4249, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.9107856191744341, |
| "grad_norm": 0.30559768683570065, |
| "learning_rate": 3.8685770750988145e-05, |
| "loss": 0.4398, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.9134487350199734, |
| "grad_norm": 0.3939981911193064, |
| "learning_rate": 3.8636363636363636e-05, |
| "loss": 0.4335, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.9161118508655126, |
| "grad_norm": 0.33858345690029085, |
| "learning_rate": 3.8586956521739134e-05, |
| "loss": 0.4451, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.918774966711052, |
| "grad_norm": 0.3422872934004404, |
| "learning_rate": 3.8537549407114625e-05, |
| "loss": 0.4353, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9214380825565912, |
| "grad_norm": 0.3280283881293896, |
| "learning_rate": 3.848814229249012e-05, |
| "loss": 0.4336, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.9241011984021305, |
| "grad_norm": 0.3212166344001671, |
| "learning_rate": 3.8438735177865614e-05, |
| "loss": 0.4436, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.9267643142476698, |
| "grad_norm": 0.29779879718680563, |
| "learning_rate": 3.838932806324111e-05, |
| "loss": 0.4224, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.929427430093209, |
| "grad_norm": 0.32257209602500175, |
| "learning_rate": 3.83399209486166e-05, |
| "loss": 0.4324, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.9320905459387483, |
| "grad_norm": 0.3283760169277036, |
| "learning_rate": 3.82905138339921e-05, |
| "loss": 0.4312, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9347536617842876, |
| "grad_norm": 0.29560048048387905, |
| "learning_rate": 3.824110671936759e-05, |
| "loss": 0.438, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.9374167776298269, |
| "grad_norm": 0.31047996971013586, |
| "learning_rate": 3.819169960474309e-05, |
| "loss": 0.436, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.9400798934753661, |
| "grad_norm": 0.3203340478559344, |
| "learning_rate": 3.814229249011858e-05, |
| "loss": 0.4178, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.9427430093209055, |
| "grad_norm": 0.3000799797652741, |
| "learning_rate": 3.809288537549408e-05, |
| "loss": 0.4283, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.9454061251664447, |
| "grad_norm": 0.31625082964426837, |
| "learning_rate": 3.804347826086957e-05, |
| "loss": 0.4355, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.948069241011984, |
| "grad_norm": 0.38688019968777704, |
| "learning_rate": 3.7994071146245066e-05, |
| "loss": 0.4561, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.9507323568575233, |
| "grad_norm": 0.309916135809927, |
| "learning_rate": 3.794466403162055e-05, |
| "loss": 0.4323, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.9533954727030626, |
| "grad_norm": 0.4119303884073823, |
| "learning_rate": 3.789525691699605e-05, |
| "loss": 0.4346, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.9560585885486018, |
| "grad_norm": 0.36057463061333933, |
| "learning_rate": 3.784584980237154e-05, |
| "loss": 0.4521, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.9587217043941412, |
| "grad_norm": 0.3385683676369823, |
| "learning_rate": 3.779644268774704e-05, |
| "loss": 0.4186, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9613848202396804, |
| "grad_norm": 0.40056553056875543, |
| "learning_rate": 3.774703557312253e-05, |
| "loss": 0.4577, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.9640479360852197, |
| "grad_norm": 0.3362167210172609, |
| "learning_rate": 3.7697628458498026e-05, |
| "loss": 0.4232, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.966711051930759, |
| "grad_norm": 0.39765353196088127, |
| "learning_rate": 3.764822134387352e-05, |
| "loss": 0.4441, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.9693741677762983, |
| "grad_norm": 0.34508268417865146, |
| "learning_rate": 3.7598814229249015e-05, |
| "loss": 0.4339, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.9720372836218375, |
| "grad_norm": 0.346158165413465, |
| "learning_rate": 3.7549407114624506e-05, |
| "loss": 0.4314, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9747003994673769, |
| "grad_norm": 0.38758138562436, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.4479, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.9773635153129161, |
| "grad_norm": 0.3616955496837348, |
| "learning_rate": 3.7450592885375494e-05, |
| "loss": 0.4295, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.9800266311584553, |
| "grad_norm": 0.36330419598482033, |
| "learning_rate": 3.740118577075099e-05, |
| "loss": 0.431, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.9826897470039947, |
| "grad_norm": 0.38220931731215757, |
| "learning_rate": 3.735177865612648e-05, |
| "loss": 0.4411, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.9853528628495339, |
| "grad_norm": 0.32482883893874537, |
| "learning_rate": 3.730237154150198e-05, |
| "loss": 0.4352, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9880159786950732, |
| "grad_norm": 0.3797976983855516, |
| "learning_rate": 3.725296442687747e-05, |
| "loss": 0.4273, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.9906790945406125, |
| "grad_norm": 0.3333203576267911, |
| "learning_rate": 3.720355731225297e-05, |
| "loss": 0.4353, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.9933422103861518, |
| "grad_norm": 0.3565932063789887, |
| "learning_rate": 3.715415019762846e-05, |
| "loss": 0.4312, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.996005326231691, |
| "grad_norm": 0.35499721260713074, |
| "learning_rate": 3.710474308300396e-05, |
| "loss": 0.4328, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.9986684420772304, |
| "grad_norm": 0.34312841144350587, |
| "learning_rate": 3.705533596837945e-05, |
| "loss": 0.4238, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.34312841144350587, |
| "learning_rate": 3.700592885375494e-05, |
| "loss": 0.4292, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.0026631158455392, |
| "grad_norm": 0.523484923884555, |
| "learning_rate": 3.695652173913043e-05, |
| "loss": 0.3827, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.0053262316910785, |
| "grad_norm": 0.44981178204276556, |
| "learning_rate": 3.690711462450593e-05, |
| "loss": 0.3497, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.007989347536618, |
| "grad_norm": 0.30585009680415987, |
| "learning_rate": 3.685770750988142e-05, |
| "loss": 0.3667, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.0106524633821572, |
| "grad_norm": 0.3734972975740805, |
| "learning_rate": 3.680830039525692e-05, |
| "loss": 0.365, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0133155792276964, |
| "grad_norm": 0.32549667969227175, |
| "learning_rate": 3.675889328063241e-05, |
| "loss": 0.3756, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.0159786950732357, |
| "grad_norm": 0.4493130971817616, |
| "learning_rate": 3.670948616600791e-05, |
| "loss": 0.358, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.018641810918775, |
| "grad_norm": 0.40705895511048784, |
| "learning_rate": 3.66600790513834e-05, |
| "loss": 0.3711, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.0213049267643142, |
| "grad_norm": 0.3979472669944709, |
| "learning_rate": 3.6610671936758896e-05, |
| "loss": 0.3613, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.0239680426098536, |
| "grad_norm": 0.44247177084982264, |
| "learning_rate": 3.656126482213439e-05, |
| "loss": 0.3461, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0266311584553929, |
| "grad_norm": 0.3643767210189153, |
| "learning_rate": 3.6511857707509884e-05, |
| "loss": 0.3682, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.0292942743009321, |
| "grad_norm": 0.3710522218627508, |
| "learning_rate": 3.6462450592885375e-05, |
| "loss": 0.3616, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.0319573901464714, |
| "grad_norm": 0.39199235847196745, |
| "learning_rate": 3.641304347826087e-05, |
| "loss": 0.3373, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.0346205059920106, |
| "grad_norm": 0.3716307271666748, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 0.3783, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.0372836218375499, |
| "grad_norm": 0.39593613574016095, |
| "learning_rate": 3.631422924901186e-05, |
| "loss": 0.3605, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0399467376830893, |
| "grad_norm": 0.3741049180680241, |
| "learning_rate": 3.626482213438735e-05, |
| "loss": 0.3643, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.0426098535286286, |
| "grad_norm": 0.39560887666458844, |
| "learning_rate": 3.621541501976285e-05, |
| "loss": 0.3873, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.0452729693741678, |
| "grad_norm": 0.4542194912059658, |
| "learning_rate": 3.616600790513834e-05, |
| "loss": 0.3517, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.047936085219707, |
| "grad_norm": 0.3376853296582342, |
| "learning_rate": 3.611660079051384e-05, |
| "loss": 0.3746, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.0505992010652463, |
| "grad_norm": 0.38846148578122447, |
| "learning_rate": 3.606719367588933e-05, |
| "loss": 0.3389, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.0532623169107855, |
| "grad_norm": 0.32360005393691865, |
| "learning_rate": 3.601778656126482e-05, |
| "loss": 0.3663, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.055925432756325, |
| "grad_norm": 0.326112805381814, |
| "learning_rate": 3.596837944664031e-05, |
| "loss": 0.3581, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.0585885486018642, |
| "grad_norm": 0.28926622056464246, |
| "learning_rate": 3.591897233201581e-05, |
| "loss": 0.358, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.0612516644474035, |
| "grad_norm": 0.3055465293423247, |
| "learning_rate": 3.58695652173913e-05, |
| "loss": 0.3617, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.0639147802929427, |
| "grad_norm": 0.33022021713183336, |
| "learning_rate": 3.58201581027668e-05, |
| "loss": 0.353, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.066577896138482, |
| "grad_norm": 0.29024468585164404, |
| "learning_rate": 3.577075098814229e-05, |
| "loss": 0.355, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.0692410119840212, |
| "grad_norm": 0.2733040275941461, |
| "learning_rate": 3.572134387351779e-05, |
| "loss": 0.3574, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.0719041278295607, |
| "grad_norm": 0.3226214256196561, |
| "learning_rate": 3.567193675889328e-05, |
| "loss": 0.3528, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.0745672436751, |
| "grad_norm": 0.31534151465175414, |
| "learning_rate": 3.5622529644268777e-05, |
| "loss": 0.3539, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.0772303595206392, |
| "grad_norm": 0.2751061424659443, |
| "learning_rate": 3.557312252964427e-05, |
| "loss": 0.3667, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0798934753661784, |
| "grad_norm": 0.3612676719250419, |
| "learning_rate": 3.5523715415019765e-05, |
| "loss": 0.3541, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.0825565912117177, |
| "grad_norm": 0.3011759295136269, |
| "learning_rate": 3.5474308300395256e-05, |
| "loss": 0.3606, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.085219707057257, |
| "grad_norm": 0.3978993850172965, |
| "learning_rate": 3.5424901185770754e-05, |
| "loss": 0.3626, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.0878828229027964, |
| "grad_norm": 0.2872210237523896, |
| "learning_rate": 3.5375494071146245e-05, |
| "loss": 0.3889, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.0905459387483356, |
| "grad_norm": 0.443073058318771, |
| "learning_rate": 3.532608695652174e-05, |
| "loss": 0.3535, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0932090545938749, |
| "grad_norm": 0.33127012106810017, |
| "learning_rate": 3.5276679841897234e-05, |
| "loss": 0.3459, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.095872170439414, |
| "grad_norm": 0.2919448905657829, |
| "learning_rate": 3.522727272727273e-05, |
| "loss": 0.365, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.0985352862849533, |
| "grad_norm": 0.33466018716475304, |
| "learning_rate": 3.517786561264822e-05, |
| "loss": 0.3625, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.1011984021304926, |
| "grad_norm": 0.3413607594653121, |
| "learning_rate": 3.512845849802372e-05, |
| "loss": 0.3724, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.103861517976032, |
| "grad_norm": 0.35737975021729407, |
| "learning_rate": 3.507905138339921e-05, |
| "loss": 0.3774, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.1065246338215713, |
| "grad_norm": 0.34162270993471044, |
| "learning_rate": 3.50296442687747e-05, |
| "loss": 0.3686, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.1091877496671105, |
| "grad_norm": 0.35133143811370443, |
| "learning_rate": 3.49802371541502e-05, |
| "loss": 0.3699, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.1118508655126498, |
| "grad_norm": 0.3579722853716089, |
| "learning_rate": 3.493083003952569e-05, |
| "loss": 0.3505, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.114513981358189, |
| "grad_norm": 0.2618428057689255, |
| "learning_rate": 3.488142292490119e-05, |
| "loss": 0.3463, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.1171770972037283, |
| "grad_norm": 0.35732356240927676, |
| "learning_rate": 3.483201581027668e-05, |
| "loss": 0.3473, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1198402130492677, |
| "grad_norm": 0.34101793627943705, |
| "learning_rate": 3.478260869565218e-05, |
| "loss": 0.3738, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.122503328894807, |
| "grad_norm": 0.3005835100136546, |
| "learning_rate": 3.473320158102767e-05, |
| "loss": 0.3748, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.1251664447403462, |
| "grad_norm": 0.3512554307406862, |
| "learning_rate": 3.4683794466403166e-05, |
| "loss": 0.3578, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.1278295605858855, |
| "grad_norm": 0.3037958675770476, |
| "learning_rate": 3.463438735177866e-05, |
| "loss": 0.3812, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.1304926764314247, |
| "grad_norm": 0.33131881019625853, |
| "learning_rate": 3.4584980237154155e-05, |
| "loss": 0.3475, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.133155792276964, |
| "grad_norm": 0.2887902456682679, |
| "learning_rate": 3.4535573122529646e-05, |
| "loss": 0.3658, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.1358189081225034, |
| "grad_norm": 0.3429001374635811, |
| "learning_rate": 3.4486166007905144e-05, |
| "loss": 0.37, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.1384820239680427, |
| "grad_norm": 0.32345869994940707, |
| "learning_rate": 3.4436758893280635e-05, |
| "loss": 0.3325, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.141145139813582, |
| "grad_norm": 0.3183193536956743, |
| "learning_rate": 3.438735177865613e-05, |
| "loss": 0.3597, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.1438082556591211, |
| "grad_norm": 0.3300209265208329, |
| "learning_rate": 3.4337944664031624e-05, |
| "loss": 0.3718, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.1464713715046604, |
| "grad_norm": 0.31339838507600637, |
| "learning_rate": 3.428853754940712e-05, |
| "loss": 0.3505, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.1491344873501999, |
| "grad_norm": 0.30103241701187505, |
| "learning_rate": 3.423913043478261e-05, |
| "loss": 0.3515, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.151797603195739, |
| "grad_norm": 0.33142077936580827, |
| "learning_rate": 3.418972332015811e-05, |
| "loss": 0.3454, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.1544607190412783, |
| "grad_norm": 0.26672583595142774, |
| "learning_rate": 3.41403162055336e-05, |
| "loss": 0.3557, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.1571238348868176, |
| "grad_norm": 0.29810972252935447, |
| "learning_rate": 3.409090909090909e-05, |
| "loss": 0.3627, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.1597869507323568, |
| "grad_norm": 0.4004613882147666, |
| "learning_rate": 3.404150197628458e-05, |
| "loss": 0.3596, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.162450066577896, |
| "grad_norm": 0.3230914038022782, |
| "learning_rate": 3.399209486166008e-05, |
| "loss": 0.3494, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.1651131824234353, |
| "grad_norm": 0.26213767359417905, |
| "learning_rate": 3.394268774703557e-05, |
| "loss": 0.3686, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.1677762982689748, |
| "grad_norm": 0.4095014774133373, |
| "learning_rate": 3.389328063241107e-05, |
| "loss": 0.3688, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.170439414114514, |
| "grad_norm": 0.266377270998587, |
| "learning_rate": 3.384387351778656e-05, |
| "loss": 0.3648, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.1731025299600533, |
| "grad_norm": 0.32985529288585497, |
| "learning_rate": 3.379446640316206e-05, |
| "loss": 0.3703, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.1757656458055925, |
| "grad_norm": 0.3629424885940422, |
| "learning_rate": 3.374505928853755e-05, |
| "loss": 0.3502, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.1784287616511318, |
| "grad_norm": 0.29079091604622403, |
| "learning_rate": 3.369565217391305e-05, |
| "loss": 0.3696, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.1810918774966712, |
| "grad_norm": 0.36019836895937174, |
| "learning_rate": 3.364624505928854e-05, |
| "loss": 0.3507, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.1837549933422105, |
| "grad_norm": 0.3710021105040673, |
| "learning_rate": 3.3596837944664036e-05, |
| "loss": 0.3458, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.1864181091877497, |
| "grad_norm": 0.2814671230360335, |
| "learning_rate": 3.354743083003953e-05, |
| "loss": 0.3625, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.189081225033289, |
| "grad_norm": 0.39752143956114194, |
| "learning_rate": 3.3498023715415025e-05, |
| "loss": 0.3372, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.1917443408788282, |
| "grad_norm": 0.3447518628047081, |
| "learning_rate": 3.3448616600790516e-05, |
| "loss": 0.352, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.1944074567243674, |
| "grad_norm": 0.23476338435026442, |
| "learning_rate": 3.3399209486166014e-05, |
| "loss": 0.3433, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.1970705725699067, |
| "grad_norm": 0.41285793244761565, |
| "learning_rate": 3.3349802371541505e-05, |
| "loss": 0.3507, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1997336884154461, |
| "grad_norm": 0.2756526642604148, |
| "learning_rate": 3.3300395256917e-05, |
| "loss": 0.3679, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.2023968042609854, |
| "grad_norm": 0.35361646973541144, |
| "learning_rate": 3.325098814229249e-05, |
| "loss": 0.3771, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.2050599201065246, |
| "grad_norm": 0.3011012199917682, |
| "learning_rate": 3.320158102766799e-05, |
| "loss": 0.3501, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.2077230359520639, |
| "grad_norm": 0.2753809532139054, |
| "learning_rate": 3.3152173913043475e-05, |
| "loss": 0.3751, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.2103861517976031, |
| "grad_norm": 0.345446601586865, |
| "learning_rate": 3.310276679841897e-05, |
| "loss": 0.3675, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2130492676431426, |
| "grad_norm": 0.3105483046559569, |
| "learning_rate": 3.3053359683794464e-05, |
| "loss": 0.3473, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.2157123834886818, |
| "grad_norm": 0.31097501000340777, |
| "learning_rate": 3.300395256916996e-05, |
| "loss": 0.3685, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.218375499334221, |
| "grad_norm": 0.35861972517870744, |
| "learning_rate": 3.295454545454545e-05, |
| "loss": 0.3493, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.2210386151797603, |
| "grad_norm": 0.2497414905559577, |
| "learning_rate": 3.290513833992095e-05, |
| "loss": 0.3596, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.2237017310252996, |
| "grad_norm": 0.3260671903675003, |
| "learning_rate": 3.285573122529644e-05, |
| "loss": 0.3584, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.2263648468708388, |
| "grad_norm": 0.303125715747872, |
| "learning_rate": 3.280632411067194e-05, |
| "loss": 0.3468, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.229027962716378, |
| "grad_norm": 0.2894307336548194, |
| "learning_rate": 3.275691699604743e-05, |
| "loss": 0.3589, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.2316910785619175, |
| "grad_norm": 0.3081296705994847, |
| "learning_rate": 3.270750988142293e-05, |
| "loss": 0.3586, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.2343541944074568, |
| "grad_norm": 0.2926327290593828, |
| "learning_rate": 3.265810276679842e-05, |
| "loss": 0.3594, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.237017310252996, |
| "grad_norm": 0.3050352656827861, |
| "learning_rate": 3.260869565217392e-05, |
| "loss": 0.3794, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.2396804260985352, |
| "grad_norm": 0.34421850278839233, |
| "learning_rate": 3.255928853754941e-05, |
| "loss": 0.3448, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.2423435419440745, |
| "grad_norm": 0.3178141996560178, |
| "learning_rate": 3.2509881422924906e-05, |
| "loss": 0.3596, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.245006657789614, |
| "grad_norm": 0.36055320312739547, |
| "learning_rate": 3.24604743083004e-05, |
| "loss": 0.3374, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.2476697736351532, |
| "grad_norm": 0.2584894490878346, |
| "learning_rate": 3.2411067193675894e-05, |
| "loss": 0.3381, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.2503328894806924, |
| "grad_norm": 0.3556442871963007, |
| "learning_rate": 3.2361660079051385e-05, |
| "loss": 0.3757, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.2529960053262317, |
| "grad_norm": 0.2936471278443274, |
| "learning_rate": 3.231225296442688e-05, |
| "loss": 0.3612, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.255659121171771, |
| "grad_norm": 0.34920820452723006, |
| "learning_rate": 3.2262845849802374e-05, |
| "loss": 0.3571, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.2583222370173104, |
| "grad_norm": 0.27353129045046504, |
| "learning_rate": 3.221343873517787e-05, |
| "loss": 0.366, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.2609853528628494, |
| "grad_norm": 0.3336825600119343, |
| "learning_rate": 3.2164031620553356e-05, |
| "loss": 0.3682, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.2636484687083889, |
| "grad_norm": 0.28422664920281926, |
| "learning_rate": 3.2114624505928854e-05, |
| "loss": 0.3574, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.2663115845539281, |
| "grad_norm": 0.27995772097533356, |
| "learning_rate": 3.2065217391304345e-05, |
| "loss": 0.3577, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.2689747003994674, |
| "grad_norm": 0.3073145651684054, |
| "learning_rate": 3.201581027667984e-05, |
| "loss": 0.356, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.2716378162450066, |
| "grad_norm": 0.2926799912079748, |
| "learning_rate": 3.1966403162055334e-05, |
| "loss": 0.3398, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.2743009320905458, |
| "grad_norm": 0.2638946062975387, |
| "learning_rate": 3.191699604743083e-05, |
| "loss": 0.3742, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.2769640479360853, |
| "grad_norm": 0.3188095670364053, |
| "learning_rate": 3.186758893280632e-05, |
| "loss": 0.3564, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2796271637816246, |
| "grad_norm": 0.2620162833825017, |
| "learning_rate": 3.181818181818182e-05, |
| "loss": 0.36, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.2822902796271638, |
| "grad_norm": 0.34823059030048475, |
| "learning_rate": 3.176877470355731e-05, |
| "loss": 0.3595, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.284953395472703, |
| "grad_norm": 0.31553137736166625, |
| "learning_rate": 3.171936758893281e-05, |
| "loss": 0.3599, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.2876165113182423, |
| "grad_norm": 0.2955708469323441, |
| "learning_rate": 3.16699604743083e-05, |
| "loss": 0.3402, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.2902796271637818, |
| "grad_norm": 0.3913482669169413, |
| "learning_rate": 3.16205533596838e-05, |
| "loss": 0.3758, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.2929427430093208, |
| "grad_norm": 0.35700628657251265, |
| "learning_rate": 3.157114624505929e-05, |
| "loss": 0.3581, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.2956058588548602, |
| "grad_norm": 0.3014863988052369, |
| "learning_rate": 3.152173913043479e-05, |
| "loss": 0.3554, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.2982689747003995, |
| "grad_norm": 0.3644987716917946, |
| "learning_rate": 3.147233201581028e-05, |
| "loss": 0.3562, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.3009320905459387, |
| "grad_norm": 0.30956500239595414, |
| "learning_rate": 3.1422924901185775e-05, |
| "loss": 0.3454, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.303595206391478, |
| "grad_norm": 0.4175232794253573, |
| "learning_rate": 3.1373517786561266e-05, |
| "loss": 0.3641, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.3062583222370172, |
| "grad_norm": 0.28246226404029123, |
| "learning_rate": 3.1324110671936764e-05, |
| "loss": 0.3601, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.3089214380825567, |
| "grad_norm": 0.3755376891190061, |
| "learning_rate": 3.1274703557312255e-05, |
| "loss": 0.3774, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.311584553928096, |
| "grad_norm": 0.27298674883257873, |
| "learning_rate": 3.1225296442687746e-05, |
| "loss": 0.3627, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.3142476697736352, |
| "grad_norm": 0.3706229801540267, |
| "learning_rate": 3.117588932806324e-05, |
| "loss": 0.3735, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.3169107856191744, |
| "grad_norm": 0.28143910738942546, |
| "learning_rate": 3.1126482213438735e-05, |
| "loss": 0.3725, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.3195739014647137, |
| "grad_norm": 0.3349025665393724, |
| "learning_rate": 3.1077075098814226e-05, |
| "loss": 0.3659, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.3222370173102531, |
| "grad_norm": 0.29588987329109573, |
| "learning_rate": 3.1027667984189724e-05, |
| "loss": 0.3749, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.3249001331557924, |
| "grad_norm": 0.27901948593654424, |
| "learning_rate": 3.0978260869565215e-05, |
| "loss": 0.3555, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.3275632490013316, |
| "grad_norm": 0.3180943674654497, |
| "learning_rate": 3.092885375494071e-05, |
| "loss": 0.3399, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.3302263648468708, |
| "grad_norm": 0.3257820898386027, |
| "learning_rate": 3.0879446640316203e-05, |
| "loss": 0.3592, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.33288948069241, |
| "grad_norm": 0.29341640703427146, |
| "learning_rate": 3.08300395256917e-05, |
| "loss": 0.3602, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.3355525965379493, |
| "grad_norm": 0.2975810782284494, |
| "learning_rate": 3.078063241106719e-05, |
| "loss": 0.3392, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.3382157123834886, |
| "grad_norm": 0.26682712897635374, |
| "learning_rate": 3.073122529644269e-05, |
| "loss": 0.3539, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.340878828229028, |
| "grad_norm": 0.29028707302441564, |
| "learning_rate": 3.068181818181818e-05, |
| "loss": 0.3511, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.3435419440745673, |
| "grad_norm": 0.32760242848226895, |
| "learning_rate": 3.063241106719368e-05, |
| "loss": 0.3804, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.3462050599201065, |
| "grad_norm": 0.3092786220233137, |
| "learning_rate": 3.058300395256917e-05, |
| "loss": 0.3699, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.3488681757656458, |
| "grad_norm": 0.3020724813833627, |
| "learning_rate": 3.053359683794467e-05, |
| "loss": 0.3676, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.351531291611185, |
| "grad_norm": 0.2824033966398368, |
| "learning_rate": 3.0484189723320162e-05, |
| "loss": 0.3729, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.3541944074567245, |
| "grad_norm": 0.3618887388165828, |
| "learning_rate": 3.0434782608695656e-05, |
| "loss": 0.3554, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.3568575233022637, |
| "grad_norm": 0.28130180514019887, |
| "learning_rate": 3.038537549407115e-05, |
| "loss": 0.3553, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.359520639147803, |
| "grad_norm": 0.2893653104001468, |
| "learning_rate": 3.0335968379446645e-05, |
| "loss": 0.3782, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.3621837549933422, |
| "grad_norm": 0.3469803538239057, |
| "learning_rate": 3.0286561264822133e-05, |
| "loss": 0.3464, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.3648468708388815, |
| "grad_norm": 0.2732418490440155, |
| "learning_rate": 3.0237154150197627e-05, |
| "loss": 0.3616, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.3675099866844207, |
| "grad_norm": 0.28562062527552706, |
| "learning_rate": 3.018774703557312e-05, |
| "loss": 0.3535, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.37017310252996, |
| "grad_norm": 0.2658369004792245, |
| "learning_rate": 3.0138339920948616e-05, |
| "loss": 0.3725, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.3728362183754994, |
| "grad_norm": 0.29358847654377684, |
| "learning_rate": 3.008893280632411e-05, |
| "loss": 0.3496, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.3754993342210386, |
| "grad_norm": 0.27539943140564604, |
| "learning_rate": 3.0039525691699605e-05, |
| "loss": 0.369, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.378162450066578, |
| "grad_norm": 0.300263236071914, |
| "learning_rate": 2.99901185770751e-05, |
| "loss": 0.3585, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.3808255659121171, |
| "grad_norm": 0.31613231965587374, |
| "learning_rate": 2.9940711462450593e-05, |
| "loss": 0.3777, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.3834886817576564, |
| "grad_norm": 0.2770700909868314, |
| "learning_rate": 2.9891304347826088e-05, |
| "loss": 0.3561, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.3861517976031958, |
| "grad_norm": 0.3050401099786546, |
| "learning_rate": 2.9841897233201582e-05, |
| "loss": 0.3563, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.388814913448735, |
| "grad_norm": 0.2533844111874208, |
| "learning_rate": 2.9792490118577076e-05, |
| "loss": 0.3469, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.3914780292942743, |
| "grad_norm": 0.2695972396120006, |
| "learning_rate": 2.974308300395257e-05, |
| "loss": 0.3621, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.3941411451398136, |
| "grad_norm": 0.28186697645815617, |
| "learning_rate": 2.9693675889328065e-05, |
| "loss": 0.3559, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.3968042609853528, |
| "grad_norm": 0.26628352738719235, |
| "learning_rate": 2.964426877470356e-05, |
| "loss": 0.3646, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.399467376830892, |
| "grad_norm": 0.2833122304678988, |
| "learning_rate": 2.9594861660079054e-05, |
| "loss": 0.3552, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.4021304926764313, |
| "grad_norm": 0.26716813523678146, |
| "learning_rate": 2.954545454545455e-05, |
| "loss": 0.3345, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.4047936085219708, |
| "grad_norm": 0.2754005215378796, |
| "learning_rate": 2.9496047430830043e-05, |
| "loss": 0.3531, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.40745672436751, |
| "grad_norm": 0.3036387674463336, |
| "learning_rate": 2.9446640316205537e-05, |
| "loss": 0.3394, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.4101198402130493, |
| "grad_norm": 0.28788105676480225, |
| "learning_rate": 2.939723320158103e-05, |
| "loss": 0.342, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.4127829560585885, |
| "grad_norm": 0.28191999375557225, |
| "learning_rate": 2.9347826086956526e-05, |
| "loss": 0.3488, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.4154460719041277, |
| "grad_norm": 0.2973599610924886, |
| "learning_rate": 2.9298418972332014e-05, |
| "loss": 0.369, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.4181091877496672, |
| "grad_norm": 0.29639597168777376, |
| "learning_rate": 2.9249011857707508e-05, |
| "loss": 0.3696, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.4207723035952065, |
| "grad_norm": 0.2943864772067253, |
| "learning_rate": 2.9199604743083002e-05, |
| "loss": 0.3708, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.4234354194407457, |
| "grad_norm": 0.3275031870349291, |
| "learning_rate": 2.9150197628458497e-05, |
| "loss": 0.359, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.426098535286285, |
| "grad_norm": 0.288973368099439, |
| "learning_rate": 2.910079051383399e-05, |
| "loss": 0.3534, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.4287616511318242, |
| "grad_norm": 0.3066522465043432, |
| "learning_rate": 2.9051383399209485e-05, |
| "loss": 0.3568, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.4314247669773636, |
| "grad_norm": 0.3056985012074139, |
| "learning_rate": 2.900197628458498e-05, |
| "loss": 0.3457, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.4340878828229027, |
| "grad_norm": 0.2793941010759859, |
| "learning_rate": 2.8952569169960474e-05, |
| "loss": 0.3559, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.4367509986684421, |
| "grad_norm": 0.2535278252678889, |
| "learning_rate": 2.890316205533597e-05, |
| "loss": 0.3528, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4394141145139814, |
| "grad_norm": 0.2842251418338047, |
| "learning_rate": 2.8853754940711463e-05, |
| "loss": 0.3522, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.4420772303595206, |
| "grad_norm": 0.2778073412674222, |
| "learning_rate": 2.8804347826086957e-05, |
| "loss": 0.3603, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.4447403462050599, |
| "grad_norm": 0.2554361454610928, |
| "learning_rate": 2.8754940711462452e-05, |
| "loss": 0.3635, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.447403462050599, |
| "grad_norm": 0.3049003958057493, |
| "learning_rate": 2.8705533596837946e-05, |
| "loss": 0.3602, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.4500665778961386, |
| "grad_norm": 0.2675057851041106, |
| "learning_rate": 2.865612648221344e-05, |
| "loss": 0.3612, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.4527296937416778, |
| "grad_norm": 0.24887490119807607, |
| "learning_rate": 2.8606719367588935e-05, |
| "loss": 0.3654, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.455392809587217, |
| "grad_norm": 0.3195728958038635, |
| "learning_rate": 2.855731225296443e-05, |
| "loss": 0.3513, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.4580559254327563, |
| "grad_norm": 0.2546987092178984, |
| "learning_rate": 2.8507905138339924e-05, |
| "loss": 0.3398, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.4607190412782955, |
| "grad_norm": 0.29773690473267483, |
| "learning_rate": 2.8458498023715418e-05, |
| "loss": 0.3694, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.463382157123835, |
| "grad_norm": 0.29315481833169116, |
| "learning_rate": 2.8409090909090912e-05, |
| "loss": 0.3426, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.466045272969374, |
| "grad_norm": 0.3296358712762741, |
| "learning_rate": 2.8359683794466403e-05, |
| "loss": 0.3761, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.4687083888149135, |
| "grad_norm": 0.2989240945630588, |
| "learning_rate": 2.8310276679841894e-05, |
| "loss": 0.3574, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.4713715046604527, |
| "grad_norm": 0.2933347023687216, |
| "learning_rate": 2.826086956521739e-05, |
| "loss": 0.3615, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.474034620505992, |
| "grad_norm": 0.31885875118020457, |
| "learning_rate": 2.8211462450592883e-05, |
| "loss": 0.3645, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.4766977363515312, |
| "grad_norm": 0.2777657172797497, |
| "learning_rate": 2.8162055335968378e-05, |
| "loss": 0.3531, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.4793608521970705, |
| "grad_norm": 0.3318676753935055, |
| "learning_rate": 2.8112648221343872e-05, |
| "loss": 0.3668, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.48202396804261, |
| "grad_norm": 0.3316376422278272, |
| "learning_rate": 2.8063241106719366e-05, |
| "loss": 0.348, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.4846870838881492, |
| "grad_norm": 0.34334200086282374, |
| "learning_rate": 2.801383399209486e-05, |
| "loss": 0.3684, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.4873501997336884, |
| "grad_norm": 0.2998752672686297, |
| "learning_rate": 2.7964426877470355e-05, |
| "loss": 0.343, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.4900133155792277, |
| "grad_norm": 0.323718625297975, |
| "learning_rate": 2.791501976284585e-05, |
| "loss": 0.3435, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.492676431424767, |
| "grad_norm": 0.3042077739086944, |
| "learning_rate": 2.7865612648221344e-05, |
| "loss": 0.357, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.4953395472703064, |
| "grad_norm": 0.3132911982849499, |
| "learning_rate": 2.7816205533596838e-05, |
| "loss": 0.3481, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.4980026631158454, |
| "grad_norm": 0.25389583970465485, |
| "learning_rate": 2.7766798418972333e-05, |
| "loss": 0.3567, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.5006657789613849, |
| "grad_norm": 0.263337393271962, |
| "learning_rate": 2.7717391304347827e-05, |
| "loss": 0.3431, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.503328894806924, |
| "grad_norm": 0.2712654205175259, |
| "learning_rate": 2.766798418972332e-05, |
| "loss": 0.3582, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.5059920106524634, |
| "grad_norm": 0.2612896047069462, |
| "learning_rate": 2.7618577075098816e-05, |
| "loss": 0.3445, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.5086551264980028, |
| "grad_norm": 0.27219615901029837, |
| "learning_rate": 2.756916996047431e-05, |
| "loss": 0.3652, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.5113182423435418, |
| "grad_norm": 0.24840155978956244, |
| "learning_rate": 2.7519762845849805e-05, |
| "loss": 0.3421, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.5139813581890813, |
| "grad_norm": 0.24176135920761713, |
| "learning_rate": 2.74703557312253e-05, |
| "loss": 0.3512, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.5166444740346205, |
| "grad_norm": 0.2647051981979065, |
| "learning_rate": 2.7420948616600793e-05, |
| "loss": 0.3499, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.5193075898801598, |
| "grad_norm": 0.27211007538489024, |
| "learning_rate": 2.7371541501976284e-05, |
| "loss": 0.3462, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.521970705725699, |
| "grad_norm": 0.2507493740105373, |
| "learning_rate": 2.732213438735178e-05, |
| "loss": 0.3434, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.5246338215712383, |
| "grad_norm": 0.2693556555763232, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": 0.3615, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.5272969374167777, |
| "grad_norm": 0.274645850715254, |
| "learning_rate": 2.7223320158102767e-05, |
| "loss": 0.3445, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.5299600532623168, |
| "grad_norm": 0.24351837189102682, |
| "learning_rate": 2.7173913043478262e-05, |
| "loss": 0.3686, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.5326231691078562, |
| "grad_norm": 0.27710340393878174, |
| "learning_rate": 2.7124505928853756e-05, |
| "loss": 0.3547, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.5352862849533955, |
| "grad_norm": 0.2806488747523977, |
| "learning_rate": 2.707509881422925e-05, |
| "loss": 0.3672, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.5379494007989347, |
| "grad_norm": 0.32294972985992815, |
| "learning_rate": 2.7025691699604745e-05, |
| "loss": 0.3527, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.5406125166444742, |
| "grad_norm": 0.24771959309258884, |
| "learning_rate": 2.697628458498024e-05, |
| "loss": 0.3626, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.5432756324900132, |
| "grad_norm": 0.31974111618484613, |
| "learning_rate": 2.6926877470355734e-05, |
| "loss": 0.3553, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5459387483355527, |
| "grad_norm": 0.28071413168163195, |
| "learning_rate": 2.6877470355731228e-05, |
| "loss": 0.3676, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.548601864181092, |
| "grad_norm": 0.2584928716043461, |
| "learning_rate": 2.6828063241106723e-05, |
| "loss": 0.3427, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.5512649800266312, |
| "grad_norm": 0.2648608207536266, |
| "learning_rate": 2.6778656126482217e-05, |
| "loss": 0.3377, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.5539280958721704, |
| "grad_norm": 0.2671119266891378, |
| "learning_rate": 2.672924901185771e-05, |
| "loss": 0.3559, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.5565912117177096, |
| "grad_norm": 0.2840788018392293, |
| "learning_rate": 2.6679841897233206e-05, |
| "loss": 0.355, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.559254327563249, |
| "grad_norm": 0.29216560920303836, |
| "learning_rate": 2.66304347826087e-05, |
| "loss": 0.3625, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.5619174434087881, |
| "grad_norm": 0.2782406231477868, |
| "learning_rate": 2.6581027667984194e-05, |
| "loss": 0.3544, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.5645805592543276, |
| "grad_norm": 0.27482653297611137, |
| "learning_rate": 2.653162055335969e-05, |
| "loss": 0.3505, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.5672436750998668, |
| "grad_norm": 0.2737639812672786, |
| "learning_rate": 2.6482213438735183e-05, |
| "loss": 0.3339, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.569906790945406, |
| "grad_norm": 0.30172379604459587, |
| "learning_rate": 2.643280632411067e-05, |
| "loss": 0.3574, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5725699067909455, |
| "grad_norm": 0.30937296239336515, |
| "learning_rate": 2.6383399209486165e-05, |
| "loss": 0.3552, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.5752330226364846, |
| "grad_norm": 0.30263893603202113, |
| "learning_rate": 2.633399209486166e-05, |
| "loss": 0.3806, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.577896138482024, |
| "grad_norm": 0.36351951882340405, |
| "learning_rate": 2.6284584980237154e-05, |
| "loss": 0.3483, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.5805592543275633, |
| "grad_norm": 0.27596120256597706, |
| "learning_rate": 2.623517786561265e-05, |
| "loss": 0.3785, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.5832223701731025, |
| "grad_norm": 0.30086295136857, |
| "learning_rate": 2.6185770750988143e-05, |
| "loss": 0.3536, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.5858854860186418, |
| "grad_norm": 0.3786534775512319, |
| "learning_rate": 2.6136363636363637e-05, |
| "loss": 0.3577, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.588548601864181, |
| "grad_norm": 0.294153803281236, |
| "learning_rate": 2.608695652173913e-05, |
| "loss": 0.3603, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.5912117177097205, |
| "grad_norm": 0.316506621080003, |
| "learning_rate": 2.6037549407114626e-05, |
| "loss": 0.3763, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.5938748335552595, |
| "grad_norm": 0.31539133712695033, |
| "learning_rate": 2.598814229249012e-05, |
| "loss": 0.3373, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.596537949400799, |
| "grad_norm": 0.29787884422276756, |
| "learning_rate": 2.5938735177865615e-05, |
| "loss": 0.3461, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5992010652463382, |
| "grad_norm": 0.2794574362382508, |
| "learning_rate": 2.588932806324111e-05, |
| "loss": 0.3607, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.6018641810918774, |
| "grad_norm": 0.28198668683252337, |
| "learning_rate": 2.5839920948616603e-05, |
| "loss": 0.3698, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.604527296937417, |
| "grad_norm": 0.2767707782956735, |
| "learning_rate": 2.5790513833992098e-05, |
| "loss": 0.3358, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.607190412782956, |
| "grad_norm": 0.26770289783678053, |
| "learning_rate": 2.5741106719367592e-05, |
| "loss": 0.3376, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.6098535286284954, |
| "grad_norm": 0.3244106061056206, |
| "learning_rate": 2.5691699604743087e-05, |
| "loss": 0.3515, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.6125166444740346, |
| "grad_norm": 0.29260066196150414, |
| "learning_rate": 2.564229249011858e-05, |
| "loss": 0.3712, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.6151797603195739, |
| "grad_norm": 0.39595763824507085, |
| "learning_rate": 2.5592885375494075e-05, |
| "loss": 0.3402, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.6178428761651131, |
| "grad_norm": 0.2911698047056363, |
| "learning_rate": 2.554347826086957e-05, |
| "loss": 0.3579, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.6205059920106524, |
| "grad_norm": 0.30667505894069086, |
| "learning_rate": 2.5494071146245064e-05, |
| "loss": 0.3488, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.6231691078561918, |
| "grad_norm": 0.3377626596928706, |
| "learning_rate": 2.5444664031620552e-05, |
| "loss": 0.3455, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.6258322237017309, |
| "grad_norm": 0.3019507720671119, |
| "learning_rate": 2.5395256916996046e-05, |
| "loss": 0.352, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.6284953395472703, |
| "grad_norm": 0.2835949922829532, |
| "learning_rate": 2.534584980237154e-05, |
| "loss": 0.3602, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.6311584553928096, |
| "grad_norm": 0.32444980944074003, |
| "learning_rate": 2.5296442687747035e-05, |
| "loss": 0.3626, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.6338215712383488, |
| "grad_norm": 0.30852262333031255, |
| "learning_rate": 2.524703557312253e-05, |
| "loss": 0.3415, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.6364846870838883, |
| "grad_norm": 0.2769395153617194, |
| "learning_rate": 2.5197628458498024e-05, |
| "loss": 0.36, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.6391478029294273, |
| "grad_norm": 0.3225695333542542, |
| "learning_rate": 2.5148221343873518e-05, |
| "loss": 0.3504, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.6418109187749668, |
| "grad_norm": 0.26000908179747434, |
| "learning_rate": 2.5098814229249012e-05, |
| "loss": 0.3511, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.644474034620506, |
| "grad_norm": 0.2558998742720099, |
| "learning_rate": 2.5049407114624507e-05, |
| "loss": 0.3551, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.6471371504660453, |
| "grad_norm": 0.2810631366750719, |
| "learning_rate": 2.5e-05, |
| "loss": 0.359, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.6498002663115847, |
| "grad_norm": 0.2764036943026752, |
| "learning_rate": 2.4950592885375496e-05, |
| "loss": 0.3552, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.6524633821571237, |
| "grad_norm": 0.29157627798525887, |
| "learning_rate": 2.490118577075099e-05, |
| "loss": 0.3477, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.6551264980026632, |
| "grad_norm": 0.30005399168360375, |
| "learning_rate": 2.4851778656126484e-05, |
| "loss": 0.3635, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.6577896138482024, |
| "grad_norm": 0.28682265413573244, |
| "learning_rate": 2.480237154150198e-05, |
| "loss": 0.3472, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.6604527296937417, |
| "grad_norm": 0.30810891527099654, |
| "learning_rate": 2.475296442687747e-05, |
| "loss": 0.3453, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.663115845539281, |
| "grad_norm": 0.2894658697891752, |
| "learning_rate": 2.4703557312252964e-05, |
| "loss": 0.348, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.6657789613848202, |
| "grad_norm": 0.26056026406293753, |
| "learning_rate": 2.465415019762846e-05, |
| "loss": 0.3422, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.6684420772303596, |
| "grad_norm": 0.27955802745377495, |
| "learning_rate": 2.4604743083003953e-05, |
| "loss": 0.351, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.6711051930758987, |
| "grad_norm": 0.2589447838000819, |
| "learning_rate": 2.4555335968379447e-05, |
| "loss": 0.3606, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.6737683089214381, |
| "grad_norm": 0.2726720946381243, |
| "learning_rate": 2.450592885375494e-05, |
| "loss": 0.3553, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.6764314247669774, |
| "grad_norm": 0.29585982981776077, |
| "learning_rate": 2.4456521739130436e-05, |
| "loss": 0.3429, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6790945406125166, |
| "grad_norm": 0.25866785993085295, |
| "learning_rate": 2.440711462450593e-05, |
| "loss": 0.3464, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.681757656458056, |
| "grad_norm": 0.26186173743371105, |
| "learning_rate": 2.4357707509881425e-05, |
| "loss": 0.3624, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.684420772303595, |
| "grad_norm": 0.27529386090536323, |
| "learning_rate": 2.430830039525692e-05, |
| "loss": 0.3464, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.6870838881491346, |
| "grad_norm": 0.24305368943964414, |
| "learning_rate": 2.425889328063241e-05, |
| "loss": 0.3542, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.6897470039946738, |
| "grad_norm": 0.263035963649886, |
| "learning_rate": 2.4209486166007905e-05, |
| "loss": 0.3638, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.692410119840213, |
| "grad_norm": 0.2737080512587832, |
| "learning_rate": 2.41600790513834e-05, |
| "loss": 0.3368, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.6950732356857523, |
| "grad_norm": 0.33404220986339256, |
| "learning_rate": 2.4110671936758893e-05, |
| "loss": 0.3724, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.6977363515312915, |
| "grad_norm": 0.2897416261690682, |
| "learning_rate": 2.4061264822134388e-05, |
| "loss": 0.3593, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.700399467376831, |
| "grad_norm": 0.3041816217006561, |
| "learning_rate": 2.4011857707509882e-05, |
| "loss": 0.3513, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.70306258322237, |
| "grad_norm": 0.2677006117678147, |
| "learning_rate": 2.3962450592885376e-05, |
| "loss": 0.3594, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.7057256990679095, |
| "grad_norm": 0.2783081801536929, |
| "learning_rate": 2.391304347826087e-05, |
| "loss": 0.3497, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.7083888149134487, |
| "grad_norm": 0.2949970037820572, |
| "learning_rate": 2.3863636363636365e-05, |
| "loss": 0.3527, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.711051930758988, |
| "grad_norm": 0.29435826287206446, |
| "learning_rate": 2.381422924901186e-05, |
| "loss": 0.3476, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.7137150466045274, |
| "grad_norm": 0.22820704347237256, |
| "learning_rate": 2.376482213438735e-05, |
| "loss": 0.3563, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.7163781624500665, |
| "grad_norm": 0.2662369562790593, |
| "learning_rate": 2.3715415019762845e-05, |
| "loss": 0.3564, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.719041278295606, |
| "grad_norm": 0.2660848595820705, |
| "learning_rate": 2.366600790513834e-05, |
| "loss": 0.3507, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.7217043941411452, |
| "grad_norm": 0.2736362440179924, |
| "learning_rate": 2.3616600790513834e-05, |
| "loss": 0.3583, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.7243675099866844, |
| "grad_norm": 0.2877841104207108, |
| "learning_rate": 2.3567193675889328e-05, |
| "loss": 0.3543, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.7270306258322237, |
| "grad_norm": 0.26935615929008033, |
| "learning_rate": 2.3517786561264823e-05, |
| "loss": 0.3437, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.729693741677763, |
| "grad_norm": 0.2578776022705283, |
| "learning_rate": 2.3468379446640317e-05, |
| "loss": 0.3665, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.7323568575233024, |
| "grad_norm": 0.28540169794092723, |
| "learning_rate": 2.341897233201581e-05, |
| "loss": 0.3427, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.7350199733688414, |
| "grad_norm": 0.302406678764912, |
| "learning_rate": 2.3369565217391306e-05, |
| "loss": 0.3493, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.7376830892143809, |
| "grad_norm": 0.2613558705976954, |
| "learning_rate": 2.33201581027668e-05, |
| "loss": 0.3384, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.74034620505992, |
| "grad_norm": 0.31445958338443253, |
| "learning_rate": 2.327075098814229e-05, |
| "loss": 0.3563, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.7430093209054593, |
| "grad_norm": 0.26295035895535324, |
| "learning_rate": 2.3221343873517785e-05, |
| "loss": 0.3523, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.7456724367509988, |
| "grad_norm": 0.26455791446031185, |
| "learning_rate": 2.317193675889328e-05, |
| "loss": 0.347, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.7483355525965378, |
| "grad_norm": 0.267920904226216, |
| "learning_rate": 2.3122529644268774e-05, |
| "loss": 0.3757, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.7509986684420773, |
| "grad_norm": 0.29766057642277893, |
| "learning_rate": 2.307312252964427e-05, |
| "loss": 0.3388, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.7536617842876165, |
| "grad_norm": 0.2614333124037635, |
| "learning_rate": 2.3023715415019763e-05, |
| "loss": 0.3448, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.7563249001331558, |
| "grad_norm": 0.2460873862604595, |
| "learning_rate": 2.2974308300395257e-05, |
| "loss": 0.3701, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.758988015978695, |
| "grad_norm": 0.32415471595000084, |
| "learning_rate": 2.2924901185770752e-05, |
| "loss": 0.3502, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.7616511318242343, |
| "grad_norm": 0.28861202445680917, |
| "learning_rate": 2.2875494071146246e-05, |
| "loss": 0.3419, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.7643142476697737, |
| "grad_norm": 0.33178480237112284, |
| "learning_rate": 2.282608695652174e-05, |
| "loss": 0.364, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.7669773635153128, |
| "grad_norm": 0.28362428197182826, |
| "learning_rate": 2.2776679841897235e-05, |
| "loss": 0.3447, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.7696404793608522, |
| "grad_norm": 0.2593493932357841, |
| "learning_rate": 2.272727272727273e-05, |
| "loss": 0.3566, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.7723035952063915, |
| "grad_norm": 0.32399886004151673, |
| "learning_rate": 2.267786561264822e-05, |
| "loss": 0.352, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.7749667110519307, |
| "grad_norm": 0.2898594306022826, |
| "learning_rate": 2.2628458498023715e-05, |
| "loss": 0.3552, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.7776298268974702, |
| "grad_norm": 0.30141440115798507, |
| "learning_rate": 2.257905138339921e-05, |
| "loss": 0.3394, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.7802929427430092, |
| "grad_norm": 0.2748566768296462, |
| "learning_rate": 2.2529644268774703e-05, |
| "loss": 0.3639, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.7829560585885487, |
| "grad_norm": 0.2597063738725183, |
| "learning_rate": 2.2480237154150198e-05, |
| "loss": 0.3523, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.785619174434088, |
| "grad_norm": 0.27428899527158185, |
| "learning_rate": 2.2430830039525692e-05, |
| "loss": 0.3576, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.7882822902796272, |
| "grad_norm": 0.27821642567843663, |
| "learning_rate": 2.2381422924901187e-05, |
| "loss": 0.3431, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.7909454061251664, |
| "grad_norm": 0.3009289717068197, |
| "learning_rate": 2.233201581027668e-05, |
| "loss": 0.3506, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.7936085219707056, |
| "grad_norm": 0.27901500754869907, |
| "learning_rate": 2.2282608695652175e-05, |
| "loss": 0.3413, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.796271637816245, |
| "grad_norm": 0.26359419972730574, |
| "learning_rate": 2.223320158102767e-05, |
| "loss": 0.3574, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.7989347536617841, |
| "grad_norm": 0.301875250326235, |
| "learning_rate": 2.2183794466403164e-05, |
| "loss": 0.3586, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.8015978695073236, |
| "grad_norm": 0.293396805853932, |
| "learning_rate": 2.213438735177866e-05, |
| "loss": 0.3631, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.8042609853528628, |
| "grad_norm": 0.2627077951859255, |
| "learning_rate": 2.2084980237154153e-05, |
| "loss": 0.3421, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.806924101198402, |
| "grad_norm": 0.2910041424241653, |
| "learning_rate": 2.2035573122529647e-05, |
| "loss": 0.3508, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.8095872170439415, |
| "grad_norm": 0.2700422024120216, |
| "learning_rate": 2.198616600790514e-05, |
| "loss": 0.3656, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.8122503328894806, |
| "grad_norm": 0.261122870241434, |
| "learning_rate": 2.1936758893280636e-05, |
| "loss": 0.3727, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.81491344873502, |
| "grad_norm": 0.2759182990026985, |
| "learning_rate": 2.188735177865613e-05, |
| "loss": 0.3429, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.8175765645805593, |
| "grad_norm": 0.25688731642570295, |
| "learning_rate": 2.183794466403162e-05, |
| "loss": 0.3638, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.8202396804260985, |
| "grad_norm": 0.2583299882188377, |
| "learning_rate": 2.1788537549407116e-05, |
| "loss": 0.3627, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.822902796271638, |
| "grad_norm": 0.24824630818405677, |
| "learning_rate": 2.173913043478261e-05, |
| "loss": 0.3509, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.825565912117177, |
| "grad_norm": 0.2775222142294749, |
| "learning_rate": 2.1689723320158105e-05, |
| "loss": 0.3421, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.8282290279627165, |
| "grad_norm": 0.23869310034905467, |
| "learning_rate": 2.16403162055336e-05, |
| "loss": 0.3376, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.8308921438082557, |
| "grad_norm": 0.2933357911415976, |
| "learning_rate": 2.1590909090909093e-05, |
| "loss": 0.3521, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.833555259653795, |
| "grad_norm": 0.27832210393035933, |
| "learning_rate": 2.1541501976284588e-05, |
| "loss": 0.3553, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.8362183754993342, |
| "grad_norm": 0.3087436970907245, |
| "learning_rate": 2.1492094861660082e-05, |
| "loss": 0.347, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.8388814913448734, |
| "grad_norm": 0.2943513499295711, |
| "learning_rate": 2.1442687747035576e-05, |
| "loss": 0.3536, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.841544607190413, |
| "grad_norm": 0.26722654225950093, |
| "learning_rate": 2.1393280632411067e-05, |
| "loss": 0.3624, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.844207723035952, |
| "grad_norm": 0.2686739391641238, |
| "learning_rate": 2.1343873517786562e-05, |
| "loss": 0.3551, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.8468708388814914, |
| "grad_norm": 0.3317404535951985, |
| "learning_rate": 2.1294466403162056e-05, |
| "loss": 0.3519, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.8495339547270306, |
| "grad_norm": 0.25888461414583197, |
| "learning_rate": 2.124505928853755e-05, |
| "loss": 0.3621, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.8521970705725699, |
| "grad_norm": 0.2388947383775022, |
| "learning_rate": 2.1195652173913045e-05, |
| "loss": 0.3464, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.8548601864181093, |
| "grad_norm": 0.32253652339123096, |
| "learning_rate": 2.114624505928854e-05, |
| "loss": 0.3486, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.8575233022636484, |
| "grad_norm": 0.23971764237483872, |
| "learning_rate": 2.1096837944664034e-05, |
| "loss": 0.3469, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.8601864181091878, |
| "grad_norm": 0.2822968430519757, |
| "learning_rate": 2.1047430830039528e-05, |
| "loss": 0.3464, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.862849533954727, |
| "grad_norm": 0.28707092445711563, |
| "learning_rate": 2.0998023715415023e-05, |
| "loss": 0.3454, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.8655126498002663, |
| "grad_norm": 0.26633357589223594, |
| "learning_rate": 2.0948616600790517e-05, |
| "loss": 0.3528, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.8681757656458056, |
| "grad_norm": 0.30480677025070735, |
| "learning_rate": 2.0899209486166008e-05, |
| "loss": 0.3705, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.8708388814913448, |
| "grad_norm": 0.2589295473498244, |
| "learning_rate": 2.0849802371541502e-05, |
| "loss": 0.366, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.8735019973368843, |
| "grad_norm": 0.3615686651832072, |
| "learning_rate": 2.0800395256916997e-05, |
| "loss": 0.3545, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.8761651131824233, |
| "grad_norm": 0.2643316410023579, |
| "learning_rate": 2.075098814229249e-05, |
| "loss": 0.3478, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.8788282290279628, |
| "grad_norm": 0.3002604064308654, |
| "learning_rate": 2.0701581027667985e-05, |
| "loss": 0.3691, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.881491344873502, |
| "grad_norm": 0.2842611156357375, |
| "learning_rate": 2.065217391304348e-05, |
| "loss": 0.361, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.8841544607190412, |
| "grad_norm": 0.3130168183378823, |
| "learning_rate": 2.0602766798418974e-05, |
| "loss": 0.3536, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.8868175765645807, |
| "grad_norm": 0.3519161067004107, |
| "learning_rate": 2.055335968379447e-05, |
| "loss": 0.3557, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.8894806924101197, |
| "grad_norm": 0.27233651062760655, |
| "learning_rate": 2.0503952569169963e-05, |
| "loss": 0.3594, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.8921438082556592, |
| "grad_norm": 0.31833253788492577, |
| "learning_rate": 2.0454545454545457e-05, |
| "loss": 0.3489, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.8948069241011984, |
| "grad_norm": 0.24567699858003664, |
| "learning_rate": 2.040513833992095e-05, |
| "loss": 0.3367, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.8974700399467377, |
| "grad_norm": 0.2969050880879015, |
| "learning_rate": 2.0355731225296443e-05, |
| "loss": 0.3537, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.900133155792277, |
| "grad_norm": 0.3189993081371087, |
| "learning_rate": 2.0306324110671937e-05, |
| "loss": 0.3669, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.9027962716378162, |
| "grad_norm": 0.24524923802003742, |
| "learning_rate": 2.025691699604743e-05, |
| "loss": 0.3448, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.9054593874833556, |
| "grad_norm": 0.3002012848114626, |
| "learning_rate": 2.0207509881422926e-05, |
| "loss": 0.3592, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.9081225033288947, |
| "grad_norm": 0.2577221774068482, |
| "learning_rate": 2.015810276679842e-05, |
| "loss": 0.3615, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.9107856191744341, |
| "grad_norm": 0.2662922499052391, |
| "learning_rate": 2.0108695652173915e-05, |
| "loss": 0.3564, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.9134487350199734, |
| "grad_norm": 0.2748543453818437, |
| "learning_rate": 2.005928853754941e-05, |
| "loss": 0.3367, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.9161118508655126, |
| "grad_norm": 0.29453902437825724, |
| "learning_rate": 2.0009881422924903e-05, |
| "loss": 0.3346, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.918774966711052, |
| "grad_norm": 0.2958384946201868, |
| "learning_rate": 1.9960474308300398e-05, |
| "loss": 0.3653, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.921438082556591, |
| "grad_norm": 0.3110870857995837, |
| "learning_rate": 1.991106719367589e-05, |
| "loss": 0.3626, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.9241011984021306, |
| "grad_norm": 0.29754006004298117, |
| "learning_rate": 1.9861660079051383e-05, |
| "loss": 0.3595, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.9267643142476698, |
| "grad_norm": 0.2637206512469971, |
| "learning_rate": 1.9812252964426878e-05, |
| "loss": 0.3637, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.929427430093209, |
| "grad_norm": 0.28572071909963137, |
| "learning_rate": 1.9762845849802372e-05, |
| "loss": 0.351, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.9320905459387483, |
| "grad_norm": 0.26449910347561634, |
| "learning_rate": 1.9713438735177866e-05, |
| "loss": 0.3607, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.9347536617842875, |
| "grad_norm": 0.312752897256756, |
| "learning_rate": 1.966403162055336e-05, |
| "loss": 0.3591, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.937416777629827, |
| "grad_norm": 0.2592410502272739, |
| "learning_rate": 1.9614624505928855e-05, |
| "loss": 0.3439, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.940079893475366, |
| "grad_norm": 0.24250837194662156, |
| "learning_rate": 1.956521739130435e-05, |
| "loss": 0.3322, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.9427430093209055, |
| "grad_norm": 0.27100632690728255, |
| "learning_rate": 1.9515810276679844e-05, |
| "loss": 0.3478, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.9454061251664447, |
| "grad_norm": 0.2792664428193274, |
| "learning_rate": 1.9466403162055335e-05, |
| "loss": 0.3667, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.948069241011984, |
| "grad_norm": 0.2619688688672022, |
| "learning_rate": 1.941699604743083e-05, |
| "loss": 0.3533, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.9507323568575234, |
| "grad_norm": 0.250474396728028, |
| "learning_rate": 1.9367588932806324e-05, |
| "loss": 0.3615, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.9533954727030625, |
| "grad_norm": 0.2592917559527508, |
| "learning_rate": 1.9318181818181818e-05, |
| "loss": 0.35, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.956058588548602, |
| "grad_norm": 0.28358412495828245, |
| "learning_rate": 1.9268774703557312e-05, |
| "loss": 0.3438, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.9587217043941412, |
| "grad_norm": 0.2905168266596484, |
| "learning_rate": 1.9219367588932807e-05, |
| "loss": 0.3363, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.9613848202396804, |
| "grad_norm": 0.2558334592646534, |
| "learning_rate": 1.91699604743083e-05, |
| "loss": 0.3636, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.9640479360852197, |
| "grad_norm": 0.2856486905717076, |
| "learning_rate": 1.9120553359683796e-05, |
| "loss": 0.3423, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.966711051930759, |
| "grad_norm": 0.25338680291782845, |
| "learning_rate": 1.907114624505929e-05, |
| "loss": 0.3647, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.9693741677762984, |
| "grad_norm": 0.25927241893410596, |
| "learning_rate": 1.9021739130434784e-05, |
| "loss": 0.361, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.9720372836218374, |
| "grad_norm": 0.26559107296256046, |
| "learning_rate": 1.8972332015810275e-05, |
| "loss": 0.3532, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.9747003994673769, |
| "grad_norm": 0.23909262831928838, |
| "learning_rate": 1.892292490118577e-05, |
| "loss": 0.3458, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.977363515312916, |
| "grad_norm": 0.29570607043062813, |
| "learning_rate": 1.8873517786561264e-05, |
| "loss": 0.3651, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.9800266311584553, |
| "grad_norm": 0.26837566907079335, |
| "learning_rate": 1.882411067193676e-05, |
| "loss": 0.3624, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.9826897470039948, |
| "grad_norm": 0.24855234703810405, |
| "learning_rate": 1.8774703557312253e-05, |
| "loss": 0.3458, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.9853528628495338, |
| "grad_norm": 0.2581276414313357, |
| "learning_rate": 1.8725296442687747e-05, |
| "loss": 0.3532, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.9880159786950733, |
| "grad_norm": 0.2769192507293847, |
| "learning_rate": 1.867588932806324e-05, |
| "loss": 0.3662, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.9906790945406125, |
| "grad_norm": 0.24782306003081656, |
| "learning_rate": 1.8626482213438736e-05, |
| "loss": 0.3444, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.9933422103861518, |
| "grad_norm": 0.23338769959338118, |
| "learning_rate": 1.857707509881423e-05, |
| "loss": 0.3375, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.996005326231691, |
| "grad_norm": 0.2399452380668713, |
| "learning_rate": 1.8527667984189725e-05, |
| "loss": 0.3577, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.9986684420772303, |
| "grad_norm": 0.24061002934920092, |
| "learning_rate": 1.8478260869565216e-05, |
| "loss": 0.3558, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.3876397436943037, |
| "learning_rate": 1.842885375494071e-05, |
| "loss": 0.3232, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.0026631158455395, |
| "grad_norm": 0.3255318592205839, |
| "learning_rate": 1.8379446640316205e-05, |
| "loss": 0.2829, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.0053262316910785, |
| "grad_norm": 0.2688339427044817, |
| "learning_rate": 1.83300395256917e-05, |
| "loss": 0.2808, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.007989347536618, |
| "grad_norm": 0.31006819974729777, |
| "learning_rate": 1.8280632411067193e-05, |
| "loss": 0.2619, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.010652463382157, |
| "grad_norm": 0.3391232912122683, |
| "learning_rate": 1.8231225296442688e-05, |
| "loss": 0.2797, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.0133155792276964, |
| "grad_norm": 0.22961985808221483, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 0.2716, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.015978695073236, |
| "grad_norm": 0.3029488541333639, |
| "learning_rate": 1.8132411067193676e-05, |
| "loss": 0.2748, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.018641810918775, |
| "grad_norm": 0.3272089229771229, |
| "learning_rate": 1.808300395256917e-05, |
| "loss": 0.259, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.0213049267643144, |
| "grad_norm": 0.2632568547847837, |
| "learning_rate": 1.8033596837944665e-05, |
| "loss": 0.2765, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.0239680426098534, |
| "grad_norm": 0.278440470950714, |
| "learning_rate": 1.7984189723320156e-05, |
| "loss": 0.2638, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.026631158455393, |
| "grad_norm": 0.3139907981507755, |
| "learning_rate": 1.793478260869565e-05, |
| "loss": 0.2805, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.029294274300932, |
| "grad_norm": 0.26955412514066035, |
| "learning_rate": 1.7885375494071145e-05, |
| "loss": 0.2617, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.0319573901464714, |
| "grad_norm": 0.2583856619944918, |
| "learning_rate": 1.783596837944664e-05, |
| "loss": 0.2678, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.034620505992011, |
| "grad_norm": 0.27298004272506543, |
| "learning_rate": 1.7786561264822134e-05, |
| "loss": 0.2674, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.03728362183755, |
| "grad_norm": 0.272776301937256, |
| "learning_rate": 1.7737154150197628e-05, |
| "loss": 0.2783, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.0399467376830893, |
| "grad_norm": 0.23604664211204196, |
| "learning_rate": 1.7687747035573123e-05, |
| "loss": 0.2694, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.0426098535286283, |
| "grad_norm": 0.2705685089413051, |
| "learning_rate": 1.7638339920948617e-05, |
| "loss": 0.2835, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.045272969374168, |
| "grad_norm": 0.2348856411632335, |
| "learning_rate": 1.758893280632411e-05, |
| "loss": 0.2591, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.0479360852197073, |
| "grad_norm": 0.24862768901035942, |
| "learning_rate": 1.7539525691699606e-05, |
| "loss": 0.2641, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.0505992010652463, |
| "grad_norm": 0.25511185080416404, |
| "learning_rate": 1.74901185770751e-05, |
| "loss": 0.2709, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.0532623169107858, |
| "grad_norm": 0.24302033763825434, |
| "learning_rate": 1.7440711462450594e-05, |
| "loss": 0.2759, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.0559254327563248, |
| "grad_norm": 0.20872328589643, |
| "learning_rate": 1.739130434782609e-05, |
| "loss": 0.2632, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.0585885486018642, |
| "grad_norm": 0.26636593407387676, |
| "learning_rate": 1.7341897233201583e-05, |
| "loss": 0.2636, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.0612516644474033, |
| "grad_norm": 0.28091568129361494, |
| "learning_rate": 1.7292490118577078e-05, |
| "loss": 0.2628, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.0639147802929427, |
| "grad_norm": 0.2560746499348802, |
| "learning_rate": 1.7243083003952572e-05, |
| "loss": 0.2655, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.066577896138482, |
| "grad_norm": 0.26276899174108526, |
| "learning_rate": 1.7193675889328066e-05, |
| "loss": 0.2728, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.069241011984021, |
| "grad_norm": 0.26384946938199305, |
| "learning_rate": 1.714426877470356e-05, |
| "loss": 0.2747, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.0719041278295607, |
| "grad_norm": 0.23715984391863434, |
| "learning_rate": 1.7094861660079055e-05, |
| "loss": 0.2694, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.0745672436750997, |
| "grad_norm": 0.2404103191932088, |
| "learning_rate": 1.7045454545454546e-05, |
| "loss": 0.2844, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.077230359520639, |
| "grad_norm": 0.2295546055568796, |
| "learning_rate": 1.699604743083004e-05, |
| "loss": 0.2563, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.0798934753661786, |
| "grad_norm": 0.25081138258701596, |
| "learning_rate": 1.6946640316205535e-05, |
| "loss": 0.2657, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.0825565912117177, |
| "grad_norm": 0.23299102413940379, |
| "learning_rate": 1.689723320158103e-05, |
| "loss": 0.2841, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.085219707057257, |
| "grad_norm": 0.2352302932330538, |
| "learning_rate": 1.6847826086956524e-05, |
| "loss": 0.2696, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.087882822902796, |
| "grad_norm": 0.2396805580902733, |
| "learning_rate": 1.6798418972332018e-05, |
| "loss": 0.2687, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.0905459387483356, |
| "grad_norm": 0.22897484277870242, |
| "learning_rate": 1.6749011857707512e-05, |
| "loss": 0.2678, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.0932090545938746, |
| "grad_norm": 0.224891214268194, |
| "learning_rate": 1.6699604743083007e-05, |
| "loss": 0.2729, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.095872170439414, |
| "grad_norm": 0.26860270920114504, |
| "learning_rate": 1.66501976284585e-05, |
| "loss": 0.2581, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.0985352862849536, |
| "grad_norm": 0.24961552358211944, |
| "learning_rate": 1.6600790513833996e-05, |
| "loss": 0.2624, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.1011984021304926, |
| "grad_norm": 0.22308364748740767, |
| "learning_rate": 1.6551383399209487e-05, |
| "loss": 0.2647, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.103861517976032, |
| "grad_norm": 0.2380839364570976, |
| "learning_rate": 1.650197628458498e-05, |
| "loss": 0.271, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.106524633821571, |
| "grad_norm": 0.24381955578610937, |
| "learning_rate": 1.6452569169960475e-05, |
| "loss": 0.2694, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.1091877496671105, |
| "grad_norm": 0.23758646142710013, |
| "learning_rate": 1.640316205533597e-05, |
| "loss": 0.2775, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.11185086551265, |
| "grad_norm": 0.23538198400085814, |
| "learning_rate": 1.6353754940711464e-05, |
| "loss": 0.2814, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.114513981358189, |
| "grad_norm": 0.21674748879871775, |
| "learning_rate": 1.630434782608696e-05, |
| "loss": 0.2548, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.1171770972037285, |
| "grad_norm": 0.24105445224605443, |
| "learning_rate": 1.6254940711462453e-05, |
| "loss": 0.2641, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.1198402130492675, |
| "grad_norm": 0.23753067329213304, |
| "learning_rate": 1.6205533596837947e-05, |
| "loss": 0.2709, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.122503328894807, |
| "grad_norm": 0.23404194217010732, |
| "learning_rate": 1.615612648221344e-05, |
| "loss": 0.271, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.125166444740346, |
| "grad_norm": 0.2121069651623829, |
| "learning_rate": 1.6106719367588936e-05, |
| "loss": 0.2627, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.1278295605858855, |
| "grad_norm": 0.22624703639894228, |
| "learning_rate": 1.6057312252964427e-05, |
| "loss": 0.2538, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.130492676431425, |
| "grad_norm": 0.2386292992012449, |
| "learning_rate": 1.600790513833992e-05, |
| "loss": 0.2576, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.133155792276964, |
| "grad_norm": 0.22877737188756703, |
| "learning_rate": 1.5958498023715416e-05, |
| "loss": 0.2727, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.1358189081225034, |
| "grad_norm": 0.27117813021650006, |
| "learning_rate": 1.590909090909091e-05, |
| "loss": 0.2895, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.1384820239680424, |
| "grad_norm": 0.22867337217751538, |
| "learning_rate": 1.5859683794466405e-05, |
| "loss": 0.2734, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.141145139813582, |
| "grad_norm": 0.24512337588151054, |
| "learning_rate": 1.58102766798419e-05, |
| "loss": 0.273, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.1438082556591214, |
| "grad_norm": 0.2727608695581687, |
| "learning_rate": 1.5760869565217393e-05, |
| "loss": 0.2901, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.1464713715046604, |
| "grad_norm": 0.2387866974014394, |
| "learning_rate": 1.5711462450592888e-05, |
| "loss": 0.2643, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.1491344873502, |
| "grad_norm": 0.22440460077720992, |
| "learning_rate": 1.5662055335968382e-05, |
| "loss": 0.2653, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.151797603195739, |
| "grad_norm": 0.248288295680679, |
| "learning_rate": 1.5612648221343873e-05, |
| "loss": 0.2549, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.1544607190412783, |
| "grad_norm": 0.24110717758110342, |
| "learning_rate": 1.5563241106719367e-05, |
| "loss": 0.2748, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.157123834886818, |
| "grad_norm": 0.23171730936199766, |
| "learning_rate": 1.5513833992094862e-05, |
| "loss": 0.2709, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.159786950732357, |
| "grad_norm": 0.22345452374040276, |
| "learning_rate": 1.5464426877470356e-05, |
| "loss": 0.2688, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.1624500665778963, |
| "grad_norm": 0.26551342546130663, |
| "learning_rate": 1.541501976284585e-05, |
| "loss": 0.2709, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.1651131824234353, |
| "grad_norm": 0.2375754285218798, |
| "learning_rate": 1.5365612648221345e-05, |
| "loss": 0.259, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.1677762982689748, |
| "grad_norm": 0.2115542246448785, |
| "learning_rate": 1.531620553359684e-05, |
| "loss": 0.2684, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.170439414114514, |
| "grad_norm": 0.2447171773393202, |
| "learning_rate": 1.5266798418972334e-05, |
| "loss": 0.2762, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.1731025299600533, |
| "grad_norm": 0.22704904523049146, |
| "learning_rate": 1.5217391304347828e-05, |
| "loss": 0.2587, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.1757656458055927, |
| "grad_norm": 0.2103985476952429, |
| "learning_rate": 1.5167984189723323e-05, |
| "loss": 0.2706, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.1784287616511318, |
| "grad_norm": 0.25159263014889965, |
| "learning_rate": 1.5118577075098814e-05, |
| "loss": 0.2584, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.181091877496671, |
| "grad_norm": 0.24458443995501622, |
| "learning_rate": 1.5069169960474308e-05, |
| "loss": 0.2704, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.1837549933422102, |
| "grad_norm": 0.22057301940141671, |
| "learning_rate": 1.5019762845849802e-05, |
| "loss": 0.2719, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.1864181091877497, |
| "grad_norm": 0.267519780973077, |
| "learning_rate": 1.4970355731225297e-05, |
| "loss": 0.2716, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.1890812250332887, |
| "grad_norm": 0.22154250046870252, |
| "learning_rate": 1.4920948616600791e-05, |
| "loss": 0.2591, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.191744340878828, |
| "grad_norm": 0.21165234414085649, |
| "learning_rate": 1.4871541501976285e-05, |
| "loss": 0.2655, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.1944074567243677, |
| "grad_norm": 0.24374815251314244, |
| "learning_rate": 1.482213438735178e-05, |
| "loss": 0.2655, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.1970705725699067, |
| "grad_norm": 0.2455699195489871, |
| "learning_rate": 1.4772727272727274e-05, |
| "loss": 0.2665, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.199733688415446, |
| "grad_norm": 0.22958103222280501, |
| "learning_rate": 1.4723320158102769e-05, |
| "loss": 0.266, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.202396804260985, |
| "grad_norm": 0.22203196516766327, |
| "learning_rate": 1.4673913043478263e-05, |
| "loss": 0.2646, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.2050599201065246, |
| "grad_norm": 0.24608492700980994, |
| "learning_rate": 1.4624505928853754e-05, |
| "loss": 0.2794, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.207723035952064, |
| "grad_norm": 0.21991565592070453, |
| "learning_rate": 1.4575098814229248e-05, |
| "loss": 0.2721, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.210386151797603, |
| "grad_norm": 0.21684224263000038, |
| "learning_rate": 1.4525691699604743e-05, |
| "loss": 0.2584, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.2130492676431426, |
| "grad_norm": 0.25977569519470245, |
| "learning_rate": 1.4476284584980237e-05, |
| "loss": 0.2726, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.2157123834886816, |
| "grad_norm": 0.2386084151402447, |
| "learning_rate": 1.4426877470355732e-05, |
| "loss": 0.2852, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.218375499334221, |
| "grad_norm": 0.21986693449971093, |
| "learning_rate": 1.4377470355731226e-05, |
| "loss": 0.2626, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.2210386151797605, |
| "grad_norm": 0.21749065277576188, |
| "learning_rate": 1.432806324110672e-05, |
| "loss": 0.2602, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.2237017310252996, |
| "grad_norm": 0.23989512729814974, |
| "learning_rate": 1.4278656126482215e-05, |
| "loss": 0.2692, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.226364846870839, |
| "grad_norm": 0.23832582321216103, |
| "learning_rate": 1.4229249011857709e-05, |
| "loss": 0.2635, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.229027962716378, |
| "grad_norm": 0.2426811597238821, |
| "learning_rate": 1.4179841897233202e-05, |
| "loss": 0.2668, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.2316910785619175, |
| "grad_norm": 0.22741820303496693, |
| "learning_rate": 1.4130434782608694e-05, |
| "loss": 0.2687, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.2343541944074565, |
| "grad_norm": 0.2193731262262756, |
| "learning_rate": 1.4081027667984189e-05, |
| "loss": 0.2707, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.237017310252996, |
| "grad_norm": 0.22566921822696567, |
| "learning_rate": 1.4031620553359683e-05, |
| "loss": 0.2676, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.2396804260985355, |
| "grad_norm": 0.22383415671065598, |
| "learning_rate": 1.3982213438735178e-05, |
| "loss": 0.2652, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.2423435419440745, |
| "grad_norm": 0.20320657711674117, |
| "learning_rate": 1.3932806324110672e-05, |
| "loss": 0.2595, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.245006657789614, |
| "grad_norm": 0.2333067790520279, |
| "learning_rate": 1.3883399209486166e-05, |
| "loss": 0.2584, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.247669773635153, |
| "grad_norm": 0.2198492093260434, |
| "learning_rate": 1.383399209486166e-05, |
| "loss": 0.2787, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.2503328894806924, |
| "grad_norm": 0.20578959481390344, |
| "learning_rate": 1.3784584980237155e-05, |
| "loss": 0.2717, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.2529960053262315, |
| "grad_norm": 0.23821537591362393, |
| "learning_rate": 1.373517786561265e-05, |
| "loss": 0.2699, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.255659121171771, |
| "grad_norm": 0.22087113735109618, |
| "learning_rate": 1.3685770750988142e-05, |
| "loss": 0.2643, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.2583222370173104, |
| "grad_norm": 0.21122229854050678, |
| "learning_rate": 1.3636363636363637e-05, |
| "loss": 0.2724, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.2609853528628494, |
| "grad_norm": 0.21706856754708864, |
| "learning_rate": 1.3586956521739131e-05, |
| "loss": 0.2726, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.263648468708389, |
| "grad_norm": 0.21623723691120003, |
| "learning_rate": 1.3537549407114625e-05, |
| "loss": 0.2551, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.266311584553928, |
| "grad_norm": 0.2271100658389757, |
| "learning_rate": 1.348814229249012e-05, |
| "loss": 0.2586, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.2689747003994674, |
| "grad_norm": 0.2209764109681619, |
| "learning_rate": 1.3438735177865614e-05, |
| "loss": 0.2716, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.271637816245007, |
| "grad_norm": 0.2178701412614265, |
| "learning_rate": 1.3389328063241108e-05, |
| "loss": 0.2891, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.274300932090546, |
| "grad_norm": 0.2661642988662999, |
| "learning_rate": 1.3339920948616603e-05, |
| "loss": 0.2564, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.2769640479360853, |
| "grad_norm": 0.21388446109096484, |
| "learning_rate": 1.3290513833992097e-05, |
| "loss": 0.2529, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.2796271637816243, |
| "grad_norm": 0.2216576992935052, |
| "learning_rate": 1.3241106719367592e-05, |
| "loss": 0.2636, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.282290279627164, |
| "grad_norm": 0.23210662511306396, |
| "learning_rate": 1.3191699604743083e-05, |
| "loss": 0.2589, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.2849533954727033, |
| "grad_norm": 0.2392108261983096, |
| "learning_rate": 1.3142292490118577e-05, |
| "loss": 0.265, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.2876165113182423, |
| "grad_norm": 0.21786440972478727, |
| "learning_rate": 1.3092885375494071e-05, |
| "loss": 0.2793, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.2902796271637818, |
| "grad_norm": 0.260403587668551, |
| "learning_rate": 1.3043478260869566e-05, |
| "loss": 0.2777, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.2929427430093208, |
| "grad_norm": 0.2430960989806936, |
| "learning_rate": 1.299407114624506e-05, |
| "loss": 0.2572, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.2956058588548602, |
| "grad_norm": 0.21752051573777517, |
| "learning_rate": 1.2944664031620555e-05, |
| "loss": 0.2803, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.2982689747003997, |
| "grad_norm": 0.2573344766515025, |
| "learning_rate": 1.2895256916996049e-05, |
| "loss": 0.2803, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.3009320905459387, |
| "grad_norm": 0.24369267722963625, |
| "learning_rate": 1.2845849802371543e-05, |
| "loss": 0.2559, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.303595206391478, |
| "grad_norm": 0.2676475243278646, |
| "learning_rate": 1.2796442687747038e-05, |
| "loss": 0.2634, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.306258322237017, |
| "grad_norm": 0.21674298638149098, |
| "learning_rate": 1.2747035573122532e-05, |
| "loss": 0.2673, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.3089214380825567, |
| "grad_norm": 0.23541545396380092, |
| "learning_rate": 1.2697628458498023e-05, |
| "loss": 0.2673, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.3115845539280957, |
| "grad_norm": 0.22699711620607352, |
| "learning_rate": 1.2648221343873517e-05, |
| "loss": 0.2674, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.314247669773635, |
| "grad_norm": 0.22613468537499234, |
| "learning_rate": 1.2598814229249012e-05, |
| "loss": 0.2655, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.316910785619174, |
| "grad_norm": 0.2253665054481723, |
| "learning_rate": 1.2549407114624506e-05, |
| "loss": 0.2542, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.3195739014647137, |
| "grad_norm": 0.2389905563347208, |
| "learning_rate": 1.25e-05, |
| "loss": 0.2642, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.322237017310253, |
| "grad_norm": 0.1972800090188119, |
| "learning_rate": 1.2450592885375495e-05, |
| "loss": 0.2738, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.324900133155792, |
| "grad_norm": 0.22018172948520282, |
| "learning_rate": 1.240118577075099e-05, |
| "loss": 0.2736, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.3275632490013316, |
| "grad_norm": 0.22660897800754057, |
| "learning_rate": 1.2351778656126482e-05, |
| "loss": 0.2797, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.3302263648468706, |
| "grad_norm": 0.22691181432819396, |
| "learning_rate": 1.2302371541501976e-05, |
| "loss": 0.2562, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.33288948069241, |
| "grad_norm": 0.21367535241766863, |
| "learning_rate": 1.225296442687747e-05, |
| "loss": 0.2687, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.3355525965379496, |
| "grad_norm": 0.23289737129114052, |
| "learning_rate": 1.2203557312252965e-05, |
| "loss": 0.2595, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.3382157123834886, |
| "grad_norm": 0.21941025876118542, |
| "learning_rate": 1.215415019762846e-05, |
| "loss": 0.2785, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.340878828229028, |
| "grad_norm": 0.23113074495001715, |
| "learning_rate": 1.2104743083003952e-05, |
| "loss": 0.283, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.343541944074567, |
| "grad_norm": 0.21978182787011594, |
| "learning_rate": 1.2055335968379447e-05, |
| "loss": 0.2602, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.3462050599201065, |
| "grad_norm": 0.22558732477437654, |
| "learning_rate": 1.2005928853754941e-05, |
| "loss": 0.2744, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.348868175765646, |
| "grad_norm": 0.21761347406156886, |
| "learning_rate": 1.1956521739130435e-05, |
| "loss": 0.2702, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.351531291611185, |
| "grad_norm": 0.5461188257601155, |
| "learning_rate": 1.190711462450593e-05, |
| "loss": 0.2894, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.3541944074567245, |
| "grad_norm": 0.21406318400975563, |
| "learning_rate": 1.1857707509881423e-05, |
| "loss": 0.2661, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.3568575233022635, |
| "grad_norm": 0.1984149911802996, |
| "learning_rate": 1.1808300395256917e-05, |
| "loss": 0.266, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.359520639147803, |
| "grad_norm": 0.21968916065746072, |
| "learning_rate": 1.1758893280632411e-05, |
| "loss": 0.2635, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.3621837549933424, |
| "grad_norm": 0.22188429396465353, |
| "learning_rate": 1.1709486166007906e-05, |
| "loss": 0.2729, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.3648468708388815, |
| "grad_norm": 0.21019336767245783, |
| "learning_rate": 1.16600790513834e-05, |
| "loss": 0.2773, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.367509986684421, |
| "grad_norm": 0.22711608967366953, |
| "learning_rate": 1.1610671936758893e-05, |
| "loss": 0.2714, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.37017310252996, |
| "grad_norm": 0.2226773168313416, |
| "learning_rate": 1.1561264822134387e-05, |
| "loss": 0.264, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.3728362183754994, |
| "grad_norm": 0.21211073663718902, |
| "learning_rate": 1.1511857707509881e-05, |
| "loss": 0.2623, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.3754993342210384, |
| "grad_norm": 0.22155796804883984, |
| "learning_rate": 1.1462450592885376e-05, |
| "loss": 0.2786, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.378162450066578, |
| "grad_norm": 0.21152104541352987, |
| "learning_rate": 1.141304347826087e-05, |
| "loss": 0.2754, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.3808255659121174, |
| "grad_norm": 0.2436663825711812, |
| "learning_rate": 1.1363636363636365e-05, |
| "loss": 0.2646, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.3834886817576564, |
| "grad_norm": 0.253729858596224, |
| "learning_rate": 1.1314229249011857e-05, |
| "loss": 0.2815, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.386151797603196, |
| "grad_norm": 0.20642310572208497, |
| "learning_rate": 1.1264822134387352e-05, |
| "loss": 0.272, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.388814913448735, |
| "grad_norm": 0.22828401957220001, |
| "learning_rate": 1.1215415019762846e-05, |
| "loss": 0.2679, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.3914780292942743, |
| "grad_norm": 0.2226863403827293, |
| "learning_rate": 1.116600790513834e-05, |
| "loss": 0.2783, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.3941411451398134, |
| "grad_norm": 0.2380848377629423, |
| "learning_rate": 1.1116600790513835e-05, |
| "loss": 0.2688, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.396804260985353, |
| "grad_norm": 0.23278674245520006, |
| "learning_rate": 1.106719367588933e-05, |
| "loss": 0.271, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.3994673768308923, |
| "grad_norm": 0.20419629462602493, |
| "learning_rate": 1.1017786561264824e-05, |
| "loss": 0.265, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.4021304926764313, |
| "grad_norm": 0.2390569872958442, |
| "learning_rate": 1.0968379446640318e-05, |
| "loss": 0.2638, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.4047936085219708, |
| "grad_norm": 0.2279702813171203, |
| "learning_rate": 1.091897233201581e-05, |
| "loss": 0.2428, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.40745672436751, |
| "grad_norm": 0.21319204168497982, |
| "learning_rate": 1.0869565217391305e-05, |
| "loss": 0.2738, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.4101198402130493, |
| "grad_norm": 0.21016965126306628, |
| "learning_rate": 1.08201581027668e-05, |
| "loss": 0.2591, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.4127829560585887, |
| "grad_norm": 0.23241828917431315, |
| "learning_rate": 1.0770750988142294e-05, |
| "loss": 0.2691, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.4154460719041277, |
| "grad_norm": 0.2508034557509808, |
| "learning_rate": 1.0721343873517788e-05, |
| "loss": 0.2716, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.418109187749667, |
| "grad_norm": 0.24987214036836988, |
| "learning_rate": 1.0671936758893281e-05, |
| "loss": 0.2578, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.4207723035952062, |
| "grad_norm": 0.2380445170755529, |
| "learning_rate": 1.0622529644268775e-05, |
| "loss": 0.264, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.4234354194407457, |
| "grad_norm": 0.2201379804572699, |
| "learning_rate": 1.057312252964427e-05, |
| "loss": 0.274, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.426098535286285, |
| "grad_norm": 0.250942745509917, |
| "learning_rate": 1.0523715415019764e-05, |
| "loss": 0.2648, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.428761651131824, |
| "grad_norm": 0.22995097984900165, |
| "learning_rate": 1.0474308300395258e-05, |
| "loss": 0.2647, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.4314247669773636, |
| "grad_norm": 0.23698141688133578, |
| "learning_rate": 1.0424901185770751e-05, |
| "loss": 0.2737, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.4340878828229027, |
| "grad_norm": 0.21803776160842997, |
| "learning_rate": 1.0375494071146246e-05, |
| "loss": 0.272, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.436750998668442, |
| "grad_norm": 0.24131490172282968, |
| "learning_rate": 1.032608695652174e-05, |
| "loss": 0.2695, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.4394141145139816, |
| "grad_norm": 0.21919070590537304, |
| "learning_rate": 1.0276679841897234e-05, |
| "loss": 0.2642, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.4420772303595206, |
| "grad_norm": 0.22130430229063322, |
| "learning_rate": 1.0227272727272729e-05, |
| "loss": 0.2648, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.44474034620506, |
| "grad_norm": 0.2207950484316367, |
| "learning_rate": 1.0177865612648221e-05, |
| "loss": 0.2661, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.447403462050599, |
| "grad_norm": 0.21836484864507769, |
| "learning_rate": 1.0128458498023716e-05, |
| "loss": 0.2674, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.4500665778961386, |
| "grad_norm": 0.20744131254325618, |
| "learning_rate": 1.007905138339921e-05, |
| "loss": 0.2541, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.4527296937416776, |
| "grad_norm": 0.21453675745306103, |
| "learning_rate": 1.0029644268774705e-05, |
| "loss": 0.2739, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.455392809587217, |
| "grad_norm": 0.20834291358903456, |
| "learning_rate": 9.980237154150199e-06, |
| "loss": 0.2683, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.458055925432756, |
| "grad_norm": 0.2218801415090961, |
| "learning_rate": 9.930830039525692e-06, |
| "loss": 0.2725, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.4607190412782955, |
| "grad_norm": 0.22892525986093554, |
| "learning_rate": 9.881422924901186e-06, |
| "loss": 0.2736, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.463382157123835, |
| "grad_norm": 0.21019735025511882, |
| "learning_rate": 9.83201581027668e-06, |
| "loss": 0.2667, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.466045272969374, |
| "grad_norm": 0.22029826331712365, |
| "learning_rate": 9.782608695652175e-06, |
| "loss": 0.2685, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.4687083888149135, |
| "grad_norm": 0.2048436758988922, |
| "learning_rate": 9.733201581027667e-06, |
| "loss": 0.2675, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.4713715046604525, |
| "grad_norm": 0.22910504440789492, |
| "learning_rate": 9.683794466403162e-06, |
| "loss": 0.2769, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.474034620505992, |
| "grad_norm": 0.22852762946943356, |
| "learning_rate": 9.634387351778656e-06, |
| "loss": 0.2834, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.4766977363515315, |
| "grad_norm": 0.21897574663546826, |
| "learning_rate": 9.58498023715415e-06, |
| "loss": 0.2778, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.4793608521970705, |
| "grad_norm": 0.2050794319936511, |
| "learning_rate": 9.535573122529645e-06, |
| "loss": 0.2715, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.48202396804261, |
| "grad_norm": 0.21728652059101256, |
| "learning_rate": 9.486166007905138e-06, |
| "loss": 0.259, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.484687083888149, |
| "grad_norm": 0.22845416533089977, |
| "learning_rate": 9.436758893280632e-06, |
| "loss": 0.2761, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.4873501997336884, |
| "grad_norm": 0.21231590297088435, |
| "learning_rate": 9.387351778656126e-06, |
| "loss": 0.2677, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.490013315579228, |
| "grad_norm": 0.1926400508160791, |
| "learning_rate": 9.33794466403162e-06, |
| "loss": 0.2575, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.492676431424767, |
| "grad_norm": 0.22996010092008873, |
| "learning_rate": 9.288537549407115e-06, |
| "loss": 0.2548, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.4953395472703064, |
| "grad_norm": 0.22619760087939098, |
| "learning_rate": 9.239130434782608e-06, |
| "loss": 0.2676, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.4980026631158454, |
| "grad_norm": 0.20946128187824178, |
| "learning_rate": 9.189723320158102e-06, |
| "loss": 0.2649, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.500665778961385, |
| "grad_norm": 0.21291901939824368, |
| "learning_rate": 9.140316205533597e-06, |
| "loss": 0.2794, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.5033288948069243, |
| "grad_norm": 0.23983156472432737, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.2612, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.5059920106524634, |
| "grad_norm": 0.21371558486466197, |
| "learning_rate": 9.041501976284585e-06, |
| "loss": 0.2715, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.508655126498003, |
| "grad_norm": 0.20948609220977954, |
| "learning_rate": 8.992094861660078e-06, |
| "loss": 0.2685, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.511318242343542, |
| "grad_norm": 0.20326902436416877, |
| "learning_rate": 8.942687747035572e-06, |
| "loss": 0.2646, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.5139813581890813, |
| "grad_norm": 0.20716732265525145, |
| "learning_rate": 8.893280632411067e-06, |
| "loss": 0.2624, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.5166444740346208, |
| "grad_norm": 0.21310454845084212, |
| "learning_rate": 8.843873517786561e-06, |
| "loss": 0.2666, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.51930758988016, |
| "grad_norm": 0.2356341947109539, |
| "learning_rate": 8.794466403162056e-06, |
| "loss": 0.2607, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.521970705725699, |
| "grad_norm": 0.206705458805249, |
| "learning_rate": 8.74505928853755e-06, |
| "loss": 0.2765, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.5246338215712383, |
| "grad_norm": 0.19941570008688478, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 0.2774, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.5272969374167777, |
| "grad_norm": 0.22426207188439748, |
| "learning_rate": 8.646245059288539e-06, |
| "loss": 0.2829, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.5299600532623168, |
| "grad_norm": 0.25258528314600287, |
| "learning_rate": 8.596837944664033e-06, |
| "loss": 0.2646, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.5326231691078562, |
| "grad_norm": 0.2145489784213885, |
| "learning_rate": 8.547430830039528e-06, |
| "loss": 0.2607, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.5352862849533953, |
| "grad_norm": 0.19599385905462602, |
| "learning_rate": 8.49802371541502e-06, |
| "loss": 0.2543, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.5379494007989347, |
| "grad_norm": 0.2480014218006241, |
| "learning_rate": 8.448616600790515e-06, |
| "loss": 0.2689, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.540612516644474, |
| "grad_norm": 0.24788509439736134, |
| "learning_rate": 8.399209486166009e-06, |
| "loss": 0.2725, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.543275632490013, |
| "grad_norm": 0.2267111546180155, |
| "learning_rate": 8.349802371541503e-06, |
| "loss": 0.2635, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.5459387483355527, |
| "grad_norm": 0.21182851928367047, |
| "learning_rate": 8.300395256916998e-06, |
| "loss": 0.2638, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.5486018641810917, |
| "grad_norm": 0.21455676194315262, |
| "learning_rate": 8.25098814229249e-06, |
| "loss": 0.2585, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.551264980026631, |
| "grad_norm": 0.2169073571862216, |
| "learning_rate": 8.201581027667985e-06, |
| "loss": 0.2617, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.5539280958721706, |
| "grad_norm": 0.22625888751011447, |
| "learning_rate": 8.15217391304348e-06, |
| "loss": 0.271, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.5565912117177096, |
| "grad_norm": 0.20470193896466704, |
| "learning_rate": 8.102766798418974e-06, |
| "loss": 0.2662, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.559254327563249, |
| "grad_norm": 0.21322007235950363, |
| "learning_rate": 8.053359683794468e-06, |
| "loss": 0.2556, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.561917443408788, |
| "grad_norm": 0.20150617925679104, |
| "learning_rate": 8.00395256916996e-06, |
| "loss": 0.2582, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.5645805592543276, |
| "grad_norm": 0.2286944491087834, |
| "learning_rate": 7.954545454545455e-06, |
| "loss": 0.2661, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.567243675099867, |
| "grad_norm": 0.20708520844073464, |
| "learning_rate": 7.90513833992095e-06, |
| "loss": 0.2625, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.569906790945406, |
| "grad_norm": 0.1993453778786671, |
| "learning_rate": 7.855731225296444e-06, |
| "loss": 0.2684, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.5725699067909455, |
| "grad_norm": 0.19939625758599083, |
| "learning_rate": 7.806324110671937e-06, |
| "loss": 0.2658, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.5752330226364846, |
| "grad_norm": 0.20007029899978518, |
| "learning_rate": 7.756916996047431e-06, |
| "loss": 0.2612, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.577896138482024, |
| "grad_norm": 0.20768490453881108, |
| "learning_rate": 7.707509881422925e-06, |
| "loss": 0.2671, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.5805592543275635, |
| "grad_norm": 0.21354810130953325, |
| "learning_rate": 7.65810276679842e-06, |
| "loss": 0.2578, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.5832223701731025, |
| "grad_norm": 0.23174711166338519, |
| "learning_rate": 7.608695652173914e-06, |
| "loss": 0.2715, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.5858854860186415, |
| "grad_norm": 0.21079000224350897, |
| "learning_rate": 7.559288537549407e-06, |
| "loss": 0.2658, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.588548601864181, |
| "grad_norm": 0.2001035421079937, |
| "learning_rate": 7.509881422924901e-06, |
| "loss": 0.2569, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.5912117177097205, |
| "grad_norm": 0.2021065412071498, |
| "learning_rate": 7.4604743083003955e-06, |
| "loss": 0.2608, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.5938748335552595, |
| "grad_norm": 0.214158147452307, |
| "learning_rate": 7.41106719367589e-06, |
| "loss": 0.2779, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.596537949400799, |
| "grad_norm": 0.20790431049928293, |
| "learning_rate": 7.361660079051384e-06, |
| "loss": 0.2733, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.599201065246338, |
| "grad_norm": 0.20549750329181854, |
| "learning_rate": 7.312252964426877e-06, |
| "loss": 0.276, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.6018641810918774, |
| "grad_norm": 0.20237657523764993, |
| "learning_rate": 7.262845849802371e-06, |
| "loss": 0.2735, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.604527296937417, |
| "grad_norm": 0.20973877300015645, |
| "learning_rate": 7.213438735177866e-06, |
| "loss": 0.281, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.607190412782956, |
| "grad_norm": 0.22017905718680691, |
| "learning_rate": 7.16403162055336e-06, |
| "loss": 0.2677, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.6098535286284954, |
| "grad_norm": 0.2144342458050631, |
| "learning_rate": 7.1146245059288545e-06, |
| "loss": 0.2604, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.6125166444740344, |
| "grad_norm": 0.2050156532271564, |
| "learning_rate": 7.065217391304347e-06, |
| "loss": 0.2701, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.615179760319574, |
| "grad_norm": 0.1970203183942734, |
| "learning_rate": 7.015810276679842e-06, |
| "loss": 0.2505, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.6178428761651134, |
| "grad_norm": 0.20402269570746995, |
| "learning_rate": 6.966403162055336e-06, |
| "loss": 0.2599, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.6205059920106524, |
| "grad_norm": 0.20759868626386915, |
| "learning_rate": 6.91699604743083e-06, |
| "loss": 0.2733, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.623169107856192, |
| "grad_norm": 0.22693920517209076, |
| "learning_rate": 6.867588932806325e-06, |
| "loss": 0.2627, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.625832223701731, |
| "grad_norm": 0.20970122945185465, |
| "learning_rate": 6.818181818181818e-06, |
| "loss": 0.2704, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.6284953395472703, |
| "grad_norm": 0.20332704992870704, |
| "learning_rate": 6.768774703557313e-06, |
| "loss": 0.2762, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.63115845539281, |
| "grad_norm": 0.20966961639828544, |
| "learning_rate": 6.719367588932807e-06, |
| "loss": 0.2737, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.633821571238349, |
| "grad_norm": 0.2392085498215163, |
| "learning_rate": 6.6699604743083014e-06, |
| "loss": 0.2639, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.6364846870838883, |
| "grad_norm": 0.22069815282030755, |
| "learning_rate": 6.620553359683796e-06, |
| "loss": 0.2623, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.6391478029294273, |
| "grad_norm": 0.2062130093620195, |
| "learning_rate": 6.5711462450592885e-06, |
| "loss": 0.2634, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.6418109187749668, |
| "grad_norm": 0.21202212454473487, |
| "learning_rate": 6.521739130434783e-06, |
| "loss": 0.2732, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.6444740346205062, |
| "grad_norm": 0.20742438691074003, |
| "learning_rate": 6.472332015810277e-06, |
| "loss": 0.2775, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.6471371504660453, |
| "grad_norm": 0.20539419758832048, |
| "learning_rate": 6.422924901185772e-06, |
| "loss": 0.2786, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.6498002663115847, |
| "grad_norm": 0.19871961616535505, |
| "learning_rate": 6.373517786561266e-06, |
| "loss": 0.2642, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.6524633821571237, |
| "grad_norm": 0.2445459224085182, |
| "learning_rate": 6.324110671936759e-06, |
| "loss": 0.271, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.655126498002663, |
| "grad_norm": 0.20294635449003665, |
| "learning_rate": 6.274703557312253e-06, |
| "loss": 0.272, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.6577896138482027, |
| "grad_norm": 0.20711520929552674, |
| "learning_rate": 6.2252964426877475e-06, |
| "loss": 0.277, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.6604527296937417, |
| "grad_norm": 0.19858451035812705, |
| "learning_rate": 6.175889328063241e-06, |
| "loss": 0.2781, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.6631158455392807, |
| "grad_norm": 0.2029933078164672, |
| "learning_rate": 6.126482213438735e-06, |
| "loss": 0.259, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.66577896138482, |
| "grad_norm": 0.21745287030160018, |
| "learning_rate": 6.07707509881423e-06, |
| "loss": 0.27, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.6684420772303596, |
| "grad_norm": 0.19345167090566057, |
| "learning_rate": 6.027667984189723e-06, |
| "loss": 0.268, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.6711051930758987, |
| "grad_norm": 0.21568939666641776, |
| "learning_rate": 5.978260869565218e-06, |
| "loss": 0.2643, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.673768308921438, |
| "grad_norm": 0.19296044607870885, |
| "learning_rate": 5.928853754940711e-06, |
| "loss": 0.2761, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.676431424766977, |
| "grad_norm": 0.20181257150105722, |
| "learning_rate": 5.879446640316206e-06, |
| "loss": 0.271, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.6790945406125166, |
| "grad_norm": 0.2073838164023787, |
| "learning_rate": 5.8300395256917e-06, |
| "loss": 0.2713, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.681757656458056, |
| "grad_norm": 0.20965825745167907, |
| "learning_rate": 5.7806324110671936e-06, |
| "loss": 0.2689, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.684420772303595, |
| "grad_norm": 0.20444583357709556, |
| "learning_rate": 5.731225296442688e-06, |
| "loss": 0.2831, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.6870838881491346, |
| "grad_norm": 0.20971896583727812, |
| "learning_rate": 5.681818181818182e-06, |
| "loss": 0.2626, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.6897470039946736, |
| "grad_norm": 0.2080555215910288, |
| "learning_rate": 5.632411067193676e-06, |
| "loss": 0.2602, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.692410119840213, |
| "grad_norm": 0.2013420667078693, |
| "learning_rate": 5.58300395256917e-06, |
| "loss": 0.2653, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.6950732356857525, |
| "grad_norm": 0.19614771328643982, |
| "learning_rate": 5.533596837944665e-06, |
| "loss": 0.2556, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.6977363515312915, |
| "grad_norm": 0.20085761642467498, |
| "learning_rate": 5.484189723320159e-06, |
| "loss": 0.2744, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.700399467376831, |
| "grad_norm": 0.21544774180757933, |
| "learning_rate": 5.4347826086956525e-06, |
| "loss": 0.2602, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.70306258322237, |
| "grad_norm": 0.19696825099825307, |
| "learning_rate": 5.385375494071147e-06, |
| "loss": 0.2595, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.7057256990679095, |
| "grad_norm": 0.1924176776922604, |
| "learning_rate": 5.3359683794466405e-06, |
| "loss": 0.2619, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.708388814913449, |
| "grad_norm": 0.22132480166121332, |
| "learning_rate": 5.286561264822135e-06, |
| "loss": 0.2697, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.711051930758988, |
| "grad_norm": 0.18691262036412767, |
| "learning_rate": 5.237154150197629e-06, |
| "loss": 0.2554, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.7137150466045274, |
| "grad_norm": 0.1938229034237995, |
| "learning_rate": 5.187747035573123e-06, |
| "loss": 0.2586, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.7163781624500665, |
| "grad_norm": 0.2129748283287826, |
| "learning_rate": 5.138339920948617e-06, |
| "loss": 0.2795, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.719041278295606, |
| "grad_norm": 0.20445583537089335, |
| "learning_rate": 5.088932806324111e-06, |
| "loss": 0.2658, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.7217043941411454, |
| "grad_norm": 0.1933528504807178, |
| "learning_rate": 5.039525691699605e-06, |
| "loss": 0.2621, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.7243675099866844, |
| "grad_norm": 0.21949852883334098, |
| "learning_rate": 4.9901185770750995e-06, |
| "loss": 0.2649, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.7270306258322234, |
| "grad_norm": 0.20152020359649447, |
| "learning_rate": 4.940711462450593e-06, |
| "loss": 0.265, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.729693741677763, |
| "grad_norm": 0.20583564086259545, |
| "learning_rate": 4.891304347826087e-06, |
| "loss": 0.2619, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.7323568575233024, |
| "grad_norm": 0.2007179587300372, |
| "learning_rate": 4.841897233201581e-06, |
| "loss": 0.2693, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.7350199733688414, |
| "grad_norm": 0.1998685679119499, |
| "learning_rate": 4.792490118577075e-06, |
| "loss": 0.2629, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.737683089214381, |
| "grad_norm": 0.21626697273734094, |
| "learning_rate": 4.743083003952569e-06, |
| "loss": 0.269, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.74034620505992, |
| "grad_norm": 0.19448387232242922, |
| "learning_rate": 4.693675889328063e-06, |
| "loss": 0.2761, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.7430093209054593, |
| "grad_norm": 0.19395208512967949, |
| "learning_rate": 4.644268774703558e-06, |
| "loss": 0.2653, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.745672436750999, |
| "grad_norm": 0.18925291663752578, |
| "learning_rate": 4.594861660079051e-06, |
| "loss": 0.2568, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.748335552596538, |
| "grad_norm": 0.20842012726728598, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.2689, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.7509986684420773, |
| "grad_norm": 0.20399895934870427, |
| "learning_rate": 4.496047430830039e-06, |
| "loss": 0.262, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.7536617842876163, |
| "grad_norm": 0.21638718896911208, |
| "learning_rate": 4.4466403162055334e-06, |
| "loss": 0.2589, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.756324900133156, |
| "grad_norm": 0.19757801710020018, |
| "learning_rate": 4.397233201581028e-06, |
| "loss": 0.2575, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.7589880159786953, |
| "grad_norm": 0.1930523815662032, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 0.2589, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.7616511318242343, |
| "grad_norm": 0.20093506678059855, |
| "learning_rate": 4.298418972332017e-06, |
| "loss": 0.2686, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.7643142476697737, |
| "grad_norm": 0.20051627815913756, |
| "learning_rate": 4.24901185770751e-06, |
| "loss": 0.2709, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.7669773635153128, |
| "grad_norm": 0.196594765327016, |
| "learning_rate": 4.1996047430830045e-06, |
| "loss": 0.2617, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.7696404793608522, |
| "grad_norm": 0.19314366189878793, |
| "learning_rate": 4.150197628458499e-06, |
| "loss": 0.2851, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.7723035952063917, |
| "grad_norm": 0.2161802526854043, |
| "learning_rate": 4.1007905138339924e-06, |
| "loss": 0.2674, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.7749667110519307, |
| "grad_norm": 0.18272700852758644, |
| "learning_rate": 4.051383399209487e-06, |
| "loss": 0.2523, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.77762982689747, |
| "grad_norm": 0.1914267001454524, |
| "learning_rate": 4.00197628458498e-06, |
| "loss": 0.271, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.780292942743009, |
| "grad_norm": 0.20563053341844564, |
| "learning_rate": 3.952569169960475e-06, |
| "loss": 0.2588, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.7829560585885487, |
| "grad_norm": 0.19474283827667518, |
| "learning_rate": 3.903162055335968e-06, |
| "loss": 0.259, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.785619174434088, |
| "grad_norm": 0.199541546086498, |
| "learning_rate": 3.853754940711463e-06, |
| "loss": 0.2766, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.788282290279627, |
| "grad_norm": 0.1962650749461456, |
| "learning_rate": 3.804347826086957e-06, |
| "loss": 0.275, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.790945406125166, |
| "grad_norm": 0.19771877806493995, |
| "learning_rate": 3.7549407114624506e-06, |
| "loss": 0.2651, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.7936085219707056, |
| "grad_norm": 0.25769379294942607, |
| "learning_rate": 3.705533596837945e-06, |
| "loss": 0.2792, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.796271637816245, |
| "grad_norm": 0.2095398170946154, |
| "learning_rate": 3.6561264822134385e-06, |
| "loss": 0.2671, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.798934753661784, |
| "grad_norm": 0.1929871299001819, |
| "learning_rate": 3.606719367588933e-06, |
| "loss": 0.2571, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.8015978695073236, |
| "grad_norm": 0.19854196709504868, |
| "learning_rate": 3.5573122529644273e-06, |
| "loss": 0.2734, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.8042609853528626, |
| "grad_norm": 0.20342959087962045, |
| "learning_rate": 3.507905138339921e-06, |
| "loss": 0.2675, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.806924101198402, |
| "grad_norm": 0.19566813473730155, |
| "learning_rate": 3.458498023715415e-06, |
| "loss": 0.2636, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.8095872170439415, |
| "grad_norm": 0.19394868609732532, |
| "learning_rate": 3.409090909090909e-06, |
| "loss": 0.2582, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.8122503328894806, |
| "grad_norm": 0.19315741666740258, |
| "learning_rate": 3.3596837944664035e-06, |
| "loss": 0.2744, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.81491344873502, |
| "grad_norm": 0.19500591092508857, |
| "learning_rate": 3.310276679841898e-06, |
| "loss": 0.2664, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.817576564580559, |
| "grad_norm": 0.20369824754516933, |
| "learning_rate": 3.2608695652173914e-06, |
| "loss": 0.2753, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.8202396804260985, |
| "grad_norm": 0.21679482311751339, |
| "learning_rate": 3.211462450592886e-06, |
| "loss": 0.2668, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.822902796271638, |
| "grad_norm": 0.19207166020188257, |
| "learning_rate": 3.1620553359683794e-06, |
| "loss": 0.2714, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.825565912117177, |
| "grad_norm": 0.18576307265975345, |
| "learning_rate": 3.1126482213438737e-06, |
| "loss": 0.2641, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.8282290279627165, |
| "grad_norm": 0.19216814433561258, |
| "learning_rate": 3.0632411067193677e-06, |
| "loss": 0.2686, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.8308921438082555, |
| "grad_norm": 0.20223820044568933, |
| "learning_rate": 3.0138339920948617e-06, |
| "loss": 0.2681, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.833555259653795, |
| "grad_norm": 0.2025068882484355, |
| "learning_rate": 2.9644268774703556e-06, |
| "loss": 0.2671, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.8362183754993344, |
| "grad_norm": 0.19192928047405172, |
| "learning_rate": 2.91501976284585e-06, |
| "loss": 0.2738, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.8388814913448734, |
| "grad_norm": 0.18813387022576608, |
| "learning_rate": 2.865612648221344e-06, |
| "loss": 0.2555, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.841544607190413, |
| "grad_norm": 0.17981642336035955, |
| "learning_rate": 2.816205533596838e-06, |
| "loss": 0.2649, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.844207723035952, |
| "grad_norm": 0.19082585501925517, |
| "learning_rate": 2.7667984189723323e-06, |
| "loss": 0.2717, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.8468708388814914, |
| "grad_norm": 0.1934715160744257, |
| "learning_rate": 2.7173913043478263e-06, |
| "loss": 0.2588, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.849533954727031, |
| "grad_norm": 0.1943027368827162, |
| "learning_rate": 2.6679841897233202e-06, |
| "loss": 0.2612, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.85219707057257, |
| "grad_norm": 0.20463059754180915, |
| "learning_rate": 2.6185770750988146e-06, |
| "loss": 0.2654, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.8548601864181093, |
| "grad_norm": 0.21078399413940485, |
| "learning_rate": 2.5691699604743086e-06, |
| "loss": 0.2671, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.8575233022636484, |
| "grad_norm": 0.20725181291345451, |
| "learning_rate": 2.5197628458498025e-06, |
| "loss": 0.2658, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.860186418109188, |
| "grad_norm": 0.19210859826009163, |
| "learning_rate": 2.4703557312252965e-06, |
| "loss": 0.2749, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.8628495339547273, |
| "grad_norm": 0.21087703729971102, |
| "learning_rate": 2.4209486166007905e-06, |
| "loss": 0.2565, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.8655126498002663, |
| "grad_norm": 0.1932869202958659, |
| "learning_rate": 2.3715415019762844e-06, |
| "loss": 0.2761, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.8681757656458053, |
| "grad_norm": 0.212098446975856, |
| "learning_rate": 2.322134387351779e-06, |
| "loss": 0.2739, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.870838881491345, |
| "grad_norm": 0.1907847773078055, |
| "learning_rate": 2.2727272727272728e-06, |
| "loss": 0.2645, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.8735019973368843, |
| "grad_norm": 0.2138904901003034, |
| "learning_rate": 2.2233201581027667e-06, |
| "loss": 0.266, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.8761651131824233, |
| "grad_norm": 0.19201411133409543, |
| "learning_rate": 2.173913043478261e-06, |
| "loss": 0.2589, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.8788282290279628, |
| "grad_norm": 0.1845739978063396, |
| "learning_rate": 2.124505928853755e-06, |
| "loss": 0.2597, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.881491344873502, |
| "grad_norm": 0.19569151053283082, |
| "learning_rate": 2.0750988142292494e-06, |
| "loss": 0.2617, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.8841544607190412, |
| "grad_norm": 0.19194512760322638, |
| "learning_rate": 2.0256916996047434e-06, |
| "loss": 0.2741, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.8868175765645807, |
| "grad_norm": 0.19164700223613637, |
| "learning_rate": 1.9762845849802374e-06, |
| "loss": 0.2557, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.8894806924101197, |
| "grad_norm": 0.20722349213232807, |
| "learning_rate": 1.9268774703557313e-06, |
| "loss": 0.2811, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.892143808255659, |
| "grad_norm": 0.21395903599582983, |
| "learning_rate": 1.8774703557312253e-06, |
| "loss": 0.2697, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.894806924101198, |
| "grad_norm": 0.19932722434475636, |
| "learning_rate": 1.8280632411067192e-06, |
| "loss": 0.2848, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.8974700399467377, |
| "grad_norm": 0.19719366657115883, |
| "learning_rate": 1.7786561264822136e-06, |
| "loss": 0.2715, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.900133155792277, |
| "grad_norm": 0.1975588211380889, |
| "learning_rate": 1.7292490118577076e-06, |
| "loss": 0.263, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.902796271637816, |
| "grad_norm": 0.1939515446139924, |
| "learning_rate": 1.6798418972332018e-06, |
| "loss": 0.2576, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.9054593874833556, |
| "grad_norm": 0.21461670844381095, |
| "learning_rate": 1.6304347826086957e-06, |
| "loss": 0.2622, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.9081225033288947, |
| "grad_norm": 0.18141714157708164, |
| "learning_rate": 1.5810276679841897e-06, |
| "loss": 0.2602, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.910785619174434, |
| "grad_norm": 0.18648909903146674, |
| "learning_rate": 1.5316205533596839e-06, |
| "loss": 0.2544, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.9134487350199736, |
| "grad_norm": 0.19749530453878072, |
| "learning_rate": 1.4822134387351778e-06, |
| "loss": 0.2511, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.9161118508655126, |
| "grad_norm": 0.2008025174676635, |
| "learning_rate": 1.432806324110672e-06, |
| "loss": 0.2621, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.918774966711052, |
| "grad_norm": 0.1926237458483956, |
| "learning_rate": 1.3833992094861662e-06, |
| "loss": 0.2584, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.921438082556591, |
| "grad_norm": 0.1917953810867646, |
| "learning_rate": 1.3339920948616601e-06, |
| "loss": 0.2696, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.9241011984021306, |
| "grad_norm": 0.18863387793323863, |
| "learning_rate": 1.2845849802371543e-06, |
| "loss": 0.269, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.92676431424767, |
| "grad_norm": 0.18859923936820897, |
| "learning_rate": 1.2351778656126482e-06, |
| "loss": 0.2629, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.929427430093209, |
| "grad_norm": 0.18918722042687142, |
| "learning_rate": 1.1857707509881422e-06, |
| "loss": 0.2659, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.932090545938748, |
| "grad_norm": 0.1909436486504395, |
| "learning_rate": 1.1363636363636364e-06, |
| "loss": 0.279, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.9347536617842875, |
| "grad_norm": 0.215394252478964, |
| "learning_rate": 1.0869565217391306e-06, |
| "loss": 0.2771, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.937416777629827, |
| "grad_norm": 0.1868050430391036, |
| "learning_rate": 1.0375494071146247e-06, |
| "loss": 0.255, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.940079893475366, |
| "grad_norm": 0.18705337019297927, |
| "learning_rate": 9.881422924901187e-07, |
| "loss": 0.2472, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.9427430093209055, |
| "grad_norm": 0.1935007995659731, |
| "learning_rate": 9.387351778656126e-07, |
| "loss": 0.2713, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.9454061251664445, |
| "grad_norm": 0.18412759277611498, |
| "learning_rate": 8.893280632411068e-07, |
| "loss": 0.2653, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.948069241011984, |
| "grad_norm": 0.18330377570006776, |
| "learning_rate": 8.399209486166009e-07, |
| "loss": 0.256, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.9507323568575234, |
| "grad_norm": 0.19950543771973236, |
| "learning_rate": 7.905138339920948e-07, |
| "loss": 0.2732, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.9533954727030625, |
| "grad_norm": 0.18701751210436693, |
| "learning_rate": 7.411067193675889e-07, |
| "loss": 0.2634, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.956058588548602, |
| "grad_norm": 0.18889807484399168, |
| "learning_rate": 6.916996047430831e-07, |
| "loss": 0.2519, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.958721704394141, |
| "grad_norm": 0.1898035633014786, |
| "learning_rate": 6.422924901185771e-07, |
| "loss": 0.2658, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.9613848202396804, |
| "grad_norm": 0.1864905294817814, |
| "learning_rate": 5.928853754940711e-07, |
| "loss": 0.2562, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.96404793608522, |
| "grad_norm": 0.18976880996630371, |
| "learning_rate": 5.434782608695653e-07, |
| "loss": 0.264, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.966711051930759, |
| "grad_norm": 0.19331420232956223, |
| "learning_rate": 4.940711462450593e-07, |
| "loss": 0.273, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.9693741677762984, |
| "grad_norm": 0.1930205378531215, |
| "learning_rate": 4.446640316205534e-07, |
| "loss": 0.2592, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.9720372836218374, |
| "grad_norm": 0.19028897264532088, |
| "learning_rate": 3.952569169960474e-07, |
| "loss": 0.2654, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.974700399467377, |
| "grad_norm": 0.19156481816748225, |
| "learning_rate": 3.4584980237154154e-07, |
| "loss": 0.261, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.9773635153129163, |
| "grad_norm": 0.1889476580235995, |
| "learning_rate": 2.9644268774703555e-07, |
| "loss": 0.2566, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.9800266311584553, |
| "grad_norm": 0.19663277621172817, |
| "learning_rate": 2.4703557312252967e-07, |
| "loss": 0.2751, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.982689747003995, |
| "grad_norm": 0.1848208372611624, |
| "learning_rate": 1.976284584980237e-07, |
| "loss": 0.2633, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.985352862849534, |
| "grad_norm": 0.18259691758877614, |
| "learning_rate": 1.4822134387351778e-07, |
| "loss": 0.2696, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.9880159786950733, |
| "grad_norm": 0.1849664900149779, |
| "learning_rate": 9.881422924901186e-08, |
| "loss": 0.2704, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.9906790945406128, |
| "grad_norm": 0.1854714711613864, |
| "learning_rate": 4.940711462450593e-08, |
| "loss": 0.2613, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.993342210386152, |
| "grad_norm": 0.18380044771707796, |
| "learning_rate": 0.0, |
| "loss": 0.2614, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.993342210386152, |
| "step": 1125, |
| "total_flos": 9.575573608085586e+17, |
| "train_loss": 0.4614936934842004, |
| "train_runtime": 99022.1208, |
| "train_samples_per_second": 0.182, |
| "train_steps_per_second": 0.011 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1125, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.575573608085586e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|