| { | |
| "best_metric": 0.46247440576553345, | |
| "best_model_checkpoint": "Phi-3.5-mini-instruct_text_to_sql_lora\\checkpoint-1000", | |
| "epoch": 1.4903129657228018, | |
| "eval_steps": 50, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07451564828614009, | |
| "grad_norm": 0.09373754262924194, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9134, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07451564828614009, | |
| "eval_loss": 0.5543281435966492, | |
| "eval_runtime": 19.9013, | |
| "eval_samples_per_second": 6.231, | |
| "eval_steps_per_second": 0.804, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14903129657228018, | |
| "grad_norm": 0.04845063388347626, | |
| "learning_rate": 0.0001996800092633612, | |
| "loss": 0.5243, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14903129657228018, | |
| "eval_loss": 0.5150236487388611, | |
| "eval_runtime": 19.8752, | |
| "eval_samples_per_second": 6.239, | |
| "eval_steps_per_second": 0.805, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22354694485842028, | |
| "grad_norm": 0.06224009767174721, | |
| "learning_rate": 0.00019872208493487546, | |
| "loss": 0.5046, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22354694485842028, | |
| "eval_loss": 0.504132091999054, | |
| "eval_runtime": 19.8753, | |
| "eval_samples_per_second": 6.239, | |
| "eval_steps_per_second": 0.805, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29806259314456035, | |
| "grad_norm": 0.053420498967170715, | |
| "learning_rate": 0.0001971323575527731, | |
| "loss": 0.4989, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.29806259314456035, | |
| "eval_loss": 0.49748167395591736, | |
| "eval_runtime": 19.873, | |
| "eval_samples_per_second": 6.24, | |
| "eval_steps_per_second": 0.805, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37257824143070045, | |
| "grad_norm": 0.04381432756781578, | |
| "learning_rate": 0.0001949210010777752, | |
| "loss": 0.4873, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.37257824143070045, | |
| "eval_loss": 0.4926624298095703, | |
| "eval_runtime": 19.9753, | |
| "eval_samples_per_second": 6.208, | |
| "eval_steps_per_second": 0.801, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.44709388971684055, | |
| "grad_norm": 0.05697334185242653, | |
| "learning_rate": 0.00019210216778162994, | |
| "loss": 0.4854, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.44709388971684055, | |
| "eval_loss": 0.48829007148742676, | |
| "eval_runtime": 19.8877, | |
| "eval_samples_per_second": 6.235, | |
| "eval_steps_per_second": 0.805, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5216095380029806, | |
| "grad_norm": 0.07128433138132095, | |
| "learning_rate": 0.0001886938976751951, | |
| "loss": 0.4788, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5216095380029806, | |
| "eval_loss": 0.48432213068008423, | |
| "eval_runtime": 19.8588, | |
| "eval_samples_per_second": 6.244, | |
| "eval_steps_per_second": 0.806, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5961251862891207, | |
| "grad_norm": 0.049252185970544815, | |
| "learning_rate": 0.00018471800305571129, | |
| "loss": 0.4811, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5961251862891207, | |
| "eval_loss": 0.4807914197444916, | |
| "eval_runtime": 19.8677, | |
| "eval_samples_per_second": 6.241, | |
| "eval_steps_per_second": 0.805, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6706408345752608, | |
| "grad_norm": 0.05810828506946564, | |
| "learning_rate": 0.00018019992891214008, | |
| "loss": 0.4796, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6706408345752608, | |
| "eval_loss": 0.478938490152359, | |
| "eval_runtime": 19.8654, | |
| "eval_samples_per_second": 6.242, | |
| "eval_steps_per_second": 0.805, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7451564828614009, | |
| "grad_norm": 0.06429937481880188, | |
| "learning_rate": 0.00017516859008194938, | |
| "loss": 0.4738, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7451564828614009, | |
| "eval_loss": 0.47758275270462036, | |
| "eval_runtime": 19.8891, | |
| "eval_samples_per_second": 6.235, | |
| "eval_steps_per_second": 0.804, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.819672131147541, | |
| "grad_norm": 0.057866841554641724, | |
| "learning_rate": 0.00016965618620151017, | |
| "loss": 0.4693, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.819672131147541, | |
| "eval_loss": 0.47498929500579834, | |
| "eval_runtime": 19.8807, | |
| "eval_samples_per_second": 6.237, | |
| "eval_steps_per_second": 0.805, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8941877794336811, | |
| "grad_norm": 0.07081956416368484, | |
| "learning_rate": 0.00016369799563438958, | |
| "loss": 0.4696, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8941877794336811, | |
| "eval_loss": 0.4721943736076355, | |
| "eval_runtime": 19.8698, | |
| "eval_samples_per_second": 6.241, | |
| "eval_steps_per_second": 0.805, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9687034277198212, | |
| "grad_norm": 0.04943346604704857, | |
| "learning_rate": 0.00015733214969635968, | |
| "loss": 0.4682, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9687034277198212, | |
| "eval_loss": 0.4704548120498657, | |
| "eval_runtime": 21.0797, | |
| "eval_samples_per_second": 5.882, | |
| "eval_steps_per_second": 0.759, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0432190760059612, | |
| "grad_norm": 0.04892728850245476, | |
| "learning_rate": 0.00015059938862204127, | |
| "loss": 0.4607, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0432190760059612, | |
| "eval_loss": 0.4692542850971222, | |
| "eval_runtime": 21.0437, | |
| "eval_samples_per_second": 5.893, | |
| "eval_steps_per_second": 0.76, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1177347242921014, | |
| "grad_norm": 0.05664736032485962, | |
| "learning_rate": 0.00014354280083495006, | |
| "loss": 0.4535, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1177347242921014, | |
| "eval_loss": 0.4676324725151062, | |
| "eval_runtime": 22.3216, | |
| "eval_samples_per_second": 5.555, | |
| "eval_steps_per_second": 0.717, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1922503725782414, | |
| "grad_norm": 0.04623207449913025, | |
| "learning_rate": 0.000136207547189569, | |
| "loss": 0.4538, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1922503725782414, | |
| "eval_loss": 0.4663061201572418, | |
| "eval_runtime": 19.8619, | |
| "eval_samples_per_second": 6.243, | |
| "eval_steps_per_second": 0.806, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2667660208643814, | |
| "grad_norm": 0.05065590888261795, | |
| "learning_rate": 0.00012864057195024643, | |
| "loss": 0.4508, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.2667660208643814, | |
| "eval_loss": 0.4653000235557556, | |
| "eval_runtime": 20.4016, | |
| "eval_samples_per_second": 6.078, | |
| "eval_steps_per_second": 0.784, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3412816691505216, | |
| "grad_norm": 0.050148364156484604, | |
| "learning_rate": 0.00012089030235660155, | |
| "loss": 0.449, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3412816691505216, | |
| "eval_loss": 0.4636177122592926, | |
| "eval_runtime": 20.3024, | |
| "eval_samples_per_second": 6.108, | |
| "eval_steps_per_second": 0.788, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.4157973174366618, | |
| "grad_norm": 0.05486341193318367, | |
| "learning_rate": 0.00011300633869816275, | |
| "loss": 0.4493, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4157973174366618, | |
| "eval_loss": 0.4632996618747711, | |
| "eval_runtime": 20.384, | |
| "eval_samples_per_second": 6.083, | |
| "eval_steps_per_second": 0.785, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4903129657228018, | |
| "grad_norm": 0.05443250760436058, | |
| "learning_rate": 0.00010503913688170396, | |
| "loss": 0.4512, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4903129657228018, | |
| "eval_loss": 0.46247440576553345, | |
| "eval_runtime": 20.3518, | |
| "eval_samples_per_second": 6.093, | |
| "eval_steps_per_second": 0.786, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2013, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.733040336763617e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |