Training completed

Browse files

Files changed (4) hide show

README.md +2 -2
all_results.json +7 -7
train_results.json +7 -7
trainer_state.json +23 -23

README.md CHANGED Viewed

@@ -8,14 +8,14 @@ tags:
 - transformers
 pipeline_tag: text-generation
 model-index:
-- name: new-300-steps-DeepSeek-R1-Distill-Qwen-7B
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# new-300-steps-DeepSeek-R1-Distill-Qwen-7B
 This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.

 - transformers
 pipeline_tag: text-generation
 model-index:
+- name: DeepSeek-R1-Distill-Qwen-7B-squad-nemo-replicaa
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# DeepSeek-R1-Distill-Qwen-7B-squad-nemo-replicaa
 This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.

all_results.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
-    "avg_step_time": 6.437501907348633,
     "epoch": 0.00228310502283105,
     "total_flos": 2405455154380800.0,
-    "total_training_time": 75.84518218040466,
-    "total_training_time_mins": 1.264086369673411,
-    "train_loss": 1.237103076837957,
-    "train_runtime": 68.9179,
-    "train_samples_per_second": 2.902,
-    "train_steps_per_second": 0.145
 }

 {
+    "avg_step_time": 6.510569095611572,
     "epoch": 0.00228310502283105,
     "total_flos": 2405455154380800.0,
+    "total_training_time": 76.59795832633972,
+    "total_training_time_mins": 1.2766326387723288,
+    "train_loss": 1.1941861145198345,
+    "train_runtime": 71.1991,
+    "train_samples_per_second": 2.809,
+    "train_steps_per_second": 0.14
 }

train_results.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
-    "avg_step_time": 6.437501907348633,
     "epoch": 0.00228310502283105,
     "total_flos": 2405455154380800.0,
-    "total_training_time": 75.84518218040466,
-    "total_training_time_mins": 1.264086369673411,
-    "train_loss": 1.237103076837957,
-    "train_runtime": 68.9179,
-    "train_samples_per_second": 2.902,
-    "train_steps_per_second": 0.145
 }

 {
+    "avg_step_time": 6.510569095611572,
     "epoch": 0.00228310502283105,
     "total_flos": 2405455154380800.0,
+    "total_training_time": 76.59795832633972,
+    "total_training_time_mins": 1.2766326387723288,
+    "train_loss": 1.1941861145198345,
+    "train_runtime": 71.1991,
+    "train_samples_per_second": 2.809,
+    "train_steps_per_second": 0.14
 }

trainer_state.json CHANGED Viewed

@@ -11,82 +11,82 @@
   "log_history": [
     {
       "epoch": 0.00022831050228310502,
-      "grad_norm": 58.639156341552734,
       "learning_rate": 0.0,
       "loss": 10.6752,
       "step": 1
     },
     {
       "epoch": 0.00045662100456621003,
-      "grad_norm": 19.770322799682617,
       "learning_rate": 0.001,
-      "loss": 0.7099,
       "step": 2
     },
     {
       "epoch": 0.0006849315068493151,
-      "grad_norm": 20.064632415771484,
       "learning_rate": 0.001,
-      "loss": 0.7537,
       "step": 3
     },
     {
       "epoch": 0.0009132420091324201,
-      "grad_norm": 0.2150338739156723,
       "learning_rate": 0.001,
-      "loss": 0.034,
       "step": 4
     },
     {
       "epoch": 0.001141552511415525,
-      "grad_norm": 0.10391011089086533,
       "learning_rate": 0.001,
-      "loss": 0.0311,
       "step": 5
     },
     {
       "epoch": 0.0013698630136986301,
-      "grad_norm": 0.13938532769680023,
       "learning_rate": 0.001,
-      "loss": 0.0313,
       "step": 6
     },
     {
       "epoch": 0.0015981735159817352,
-      "grad_norm": 0.14156730473041534,
       "learning_rate": 0.001,
-      "loss": 0.0308,
       "step": 7
     },
     {
       "epoch": 0.0018264840182648401,
-      "grad_norm": 0.16459645330905914,
       "learning_rate": 0.001,
-      "loss": 0.0328,
       "step": 8
     },
     {
       "epoch": 0.002054794520547945,
-      "grad_norm": 0.23767752945423126,
       "learning_rate": 0.001,
-      "loss": 0.0369,
       "step": 9
     },
     {
       "epoch": 0.00228310502283105,
-      "grad_norm": 0.22308476269245148,
       "learning_rate": 0.001,
-      "loss": 0.0352,
       "step": 10
     },
     {
       "epoch": 0.00228310502283105,
       "step": 10,
       "total_flos": 2405455154380800.0,
-      "train_loss": 1.237103076837957,
-      "train_runtime": 68.9179,
-      "train_samples_per_second": 2.902,
-      "train_steps_per_second": 0.145
     }
   ],
   "logging_steps": 1,

   "log_history": [
     {
       "epoch": 0.00022831050228310502,
+      "grad_norm": 61.761863708496094,
       "learning_rate": 0.0,
       "loss": 10.6752,
       "step": 1
     },
     {
       "epoch": 0.00045662100456621003,
+      "grad_norm": 14.540397644042969,
       "learning_rate": 0.001,
+      "loss": 0.4893,
       "step": 2
     },
     {
       "epoch": 0.0006849315068493151,
+      "grad_norm": 15.115448951721191,
       "learning_rate": 0.001,
+      "loss": 0.533,
       "step": 3
     },
     {
       "epoch": 0.0009132420091324201,
+      "grad_norm": 0.1035713478922844,
       "learning_rate": 0.001,
+      "loss": 0.0327,
       "step": 4
     },
     {
       "epoch": 0.001141552511415525,
+      "grad_norm": 0.1258726716041565,
       "learning_rate": 0.001,
+      "loss": 0.0315,
       "step": 5
     },
     {
       "epoch": 0.0013698630136986301,
+      "grad_norm": 0.13812150061130524,
       "learning_rate": 0.001,
+      "loss": 0.0314,
       "step": 6
     },
     {
       "epoch": 0.0015981735159817352,
+      "grad_norm": 0.14780189096927643,
       "learning_rate": 0.001,
+      "loss": 0.0321,
       "step": 7
     },
     {
       "epoch": 0.0018264840182648401,
+      "grad_norm": 0.18661947548389435,
       "learning_rate": 0.001,
+      "loss": 0.0352,
       "step": 8
     },
     {
       "epoch": 0.002054794520547945,
+      "grad_norm": 0.2705967426300049,
       "learning_rate": 0.001,
+      "loss": 0.0417,
       "step": 9
     },
     {
       "epoch": 0.00228310502283105,
+      "grad_norm": 0.24941541254520416,
       "learning_rate": 0.001,
+      "loss": 0.0397,
       "step": 10
     },
     {
       "epoch": 0.00228310502283105,
       "step": 10,
       "total_flos": 2405455154380800.0,
+      "train_loss": 1.1941861145198345,
+      "train_runtime": 71.1991,
+      "train_samples_per_second": 2.809,
+      "train_steps_per_second": 0.14
     }
   ],
   "logging_steps": 1,