sravanthib commited on
Commit
8f4da6c
·
verified ·
1 Parent(s): e3496ae

Training completed

Browse files
Files changed (3) hide show
  1. all_results.json +4 -4
  2. train_results.json +4 -4
  3. trainer_state.json +14 -14
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.88,
3
  "total_flos": 3.380713971525878e+17,
4
- "train_loss": 0.9234967929124832,
5
- "train_runtime": 643.7881,
6
- "train_samples_per_second": 6.213,
7
- "train_steps_per_second": 0.078
8
  }
 
1
  {
2
  "epoch": 3.88,
3
  "total_flos": 3.380713971525878e+17,
4
+ "train_loss": 0.9221555387973785,
5
+ "train_runtime": 646.4979,
6
+ "train_samples_per_second": 6.187,
7
+ "train_steps_per_second": 0.077
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.88,
3
  "total_flos": 3.380713971525878e+17,
4
- "train_loss": 0.9234967929124832,
5
- "train_runtime": 643.7881,
6
- "train_samples_per_second": 6.213,
7
- "train_steps_per_second": 0.078
8
  }
 
1
  {
2
  "epoch": 3.88,
3
  "total_flos": 3.380713971525878e+17,
4
+ "train_loss": 0.9221555387973785,
5
+ "train_runtime": 646.4979,
6
+ "train_samples_per_second": 6.187,
7
+ "train_steps_per_second": 0.077
8
  }
trainer_state.json CHANGED
@@ -11,47 +11,47 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.8,
14
- "grad_norm": 0.5147947669029236,
15
  "learning_rate": 0.0001,
16
- "loss": 4.4204,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 1.56,
21
- "grad_norm": 0.16838183999061584,
22
  "learning_rate": 0.0001,
23
- "loss": 0.0551,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 2.32,
28
- "grad_norm": 0.16586190462112427,
29
  "learning_rate": 0.0001,
30
- "loss": 0.054,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 3.08,
35
- "grad_norm": 0.2226952314376831,
36
  "learning_rate": 0.0001,
37
- "loss": 0.0473,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 3.88,
42
- "grad_norm": 0.10044433176517487,
43
  "learning_rate": 0.0001,
44
- "loss": 0.0407,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 3.88,
49
  "step": 50,
50
  "total_flos": 3.380713971525878e+17,
51
- "train_loss": 0.9234967929124832,
52
- "train_runtime": 643.7881,
53
- "train_samples_per_second": 6.213,
54
- "train_steps_per_second": 0.078
55
  }
56
  ],
57
  "logging_steps": 10,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.8,
14
+ "grad_norm": 0.14122037589550018,
15
  "learning_rate": 0.0001,
16
+ "loss": 4.4121,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 1.56,
21
+ "grad_norm": 0.16405606269836426,
22
  "learning_rate": 0.0001,
23
+ "loss": 0.0555,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 2.32,
28
+ "grad_norm": 0.1663062423467636,
29
  "learning_rate": 0.0001,
30
+ "loss": 0.0541,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 3.08,
35
+ "grad_norm": 0.1997026652097702,
36
  "learning_rate": 0.0001,
37
+ "loss": 0.0479,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 3.88,
42
+ "grad_norm": 0.09167370945215225,
43
  "learning_rate": 0.0001,
44
+ "loss": 0.0412,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 3.88,
49
  "step": 50,
50
  "total_flos": 3.380713971525878e+17,
51
+ "train_loss": 0.9221555387973785,
52
+ "train_runtime": 646.4979,
53
+ "train_samples_per_second": 6.187,
54
+ "train_steps_per_second": 0.077
55
  }
56
  ],
57
  "logging_steps": 10,