Training completed

Files changed (5) hide show

adapter_config.json CHANGED Viewed

@@ -25,13 +25,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
-    "up_proj",
-    "v_proj",
     "q_proj",
     "o_proj",
     "k_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
     "o_proj",
+    "down_proj",
+    "gate_proj",
     "k_proj",
+    "v_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4011120d61484ba366228771bb4f4d23b97bd371b5f3a47b01777f1f28d40796
 size 40422208

 version https://git-lfs.github.com/spec/v1
+oid sha256:2be5a9f8760b42794c4abcf2dfe7a3ea53d86803aff66855ff8dac68f7cd17b2
 size 40422208

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 0.0091324200913242,
     "total_flos": 6.970544231337165e+16,
-    "train_loss": 4.493294906616211,
-    "train_runtime": 133.1193,
-    "train_samples_per_second": 6.01,
     "train_steps_per_second": 0.075
 }

 {
     "epoch": 0.0091324200913242,
     "total_flos": 6.970544231337165e+16,
+    "train_loss": 4.553681945800781,
+    "train_runtime": 133.9104,
+    "train_samples_per_second": 5.974,
     "train_steps_per_second": 0.075
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 0.0091324200913242,
     "total_flos": 6.970544231337165e+16,
-    "train_loss": 4.493294906616211,
-    "train_runtime": 133.1193,
-    "train_samples_per_second": 6.01,
     "train_steps_per_second": 0.075
 }

 {
     "epoch": 0.0091324200913242,
     "total_flos": 6.970544231337165e+16,
+    "train_loss": 4.553681945800781,
+    "train_runtime": 133.9104,
+    "train_samples_per_second": 5.974,
     "train_steps_per_second": 0.075
 }

trainer_state.json CHANGED Viewed

@@ -11,18 +11,18 @@
   "log_history": [
     {
       "epoch": 0.0091324200913242,
-      "grad_norm": 0.7308942675590515,
       "learning_rate": 0.0001,
-      "loss": 4.4933,
       "step": 10
     },
     {
       "epoch": 0.0091324200913242,
       "step": 10,
       "total_flos": 6.970544231337165e+16,
-      "train_loss": 4.493294906616211,
-      "train_runtime": 133.1193,
-      "train_samples_per_second": 6.01,
       "train_steps_per_second": 0.075
     }
   ],

   "log_history": [
     {
       "epoch": 0.0091324200913242,
+      "grad_norm": 0.3158293664455414,
       "learning_rate": 0.0001,
+      "loss": 4.5537,
       "step": 10
     },
     {
       "epoch": 0.0091324200913242,
       "step": 10,
       "total_flos": 6.970544231337165e+16,
+      "train_loss": 4.553681945800781,
+      "train_runtime": 133.9104,
+      "train_samples_per_second": 5.974,
       "train_steps_per_second": 0.075
     }
   ],