dealignai commited on
Commit
8bfd4a8
·
verified ·
1 Parent(s): 84dbcdf

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +604 -603
config.json CHANGED
@@ -1,605 +1,606 @@
1
  {
2
- "architectures": [
3
- "MiniMaxM2ForCausalLM"
4
- ],
5
- "attn_type_list": [
6
- 1,
7
- 1,
8
- 1,
9
- 1,
10
- 1,
11
- 1,
12
- 1,
13
- 1,
14
- 1,
15
- 1,
16
- 1,
17
- 1,
18
- 1,
19
- 1,
20
- 1,
21
- 1,
22
- 1,
23
- 1,
24
- 1,
25
- 1,
26
- 1,
27
- 1,
28
- 1,
29
- 1,
30
- 1,
31
- 1,
32
- 1,
33
- 1,
34
- 1,
35
- 1,
36
- 1,
37
- 1,
38
- 1,
39
- 1,
40
- 1,
41
- 1,
42
- 1,
43
- 1,
44
- 1,
45
- 1,
46
- 1,
47
- 1,
48
- 1,
49
- 1,
50
- 1,
51
- 1,
52
- 1,
53
- 1,
54
- 1,
55
- 1,
56
- 1,
57
- 1,
58
- 1,
59
- 1,
60
- 1,
61
- 1,
62
- 1,
63
- 1,
64
- 1,
65
- 1,
66
- 1,
67
- 1
68
- ],
69
- "auto_map": {
70
- "AutoConfig": "configuration_minimax_m2.MiniMaxM2Config",
71
- "AutoModelForCausalLM": "modeling_minimax_m2.MiniMaxM2ForCausalLM"
72
- },
73
- "head_dim": 128,
74
- "hidden_act": "silu",
75
- "hidden_size": 3072,
76
- "intermediate_size": 1536,
77
- "max_position_embeddings": 196608,
78
- "model_type": "minimax_m2",
79
- "mtp_transformer_layers": 1,
80
- "num_attention_heads": 48,
81
- "num_experts_per_tok": 8,
82
- "num_hidden_layers": 62,
83
- "num_key_value_heads": 8,
84
- "num_local_experts": 192,
85
- "num_mtp_modules": 3,
86
- "qk_norm_type": "per_layer",
87
- "quantization": {
88
- "group_size": 64,
89
- "bits": 6,
90
- "mode": "affine",
91
- "model.layers.0.block_sparse_moe.gate": {
92
- "group_size": 64,
93
- "bits": 8
94
- },
95
- "model.layers.1.block_sparse_moe.gate": {
96
- "group_size": 64,
97
- "bits": 8
98
- },
99
- "model.layers.2.block_sparse_moe.gate": {
100
- "group_size": 64,
101
- "bits": 8
102
- },
103
- "model.layers.3.block_sparse_moe.gate": {
104
- "group_size": 64,
105
- "bits": 8
106
- },
107
- "model.layers.4.block_sparse_moe.gate": {
108
- "group_size": 64,
109
- "bits": 8
110
- },
111
- "model.layers.5.block_sparse_moe.gate": {
112
- "group_size": 64,
113
- "bits": 8
114
- },
115
- "model.layers.6.block_sparse_moe.gate": {
116
- "group_size": 64,
117
- "bits": 8
118
- },
119
- "model.layers.7.block_sparse_moe.gate": {
120
- "group_size": 64,
121
- "bits": 8
122
- },
123
- "model.layers.8.block_sparse_moe.gate": {
124
- "group_size": 64,
125
- "bits": 8
126
- },
127
- "model.layers.9.block_sparse_moe.gate": {
128
- "group_size": 64,
129
- "bits": 8
130
- },
131
- "model.layers.10.block_sparse_moe.gate": {
132
- "group_size": 64,
133
- "bits": 8
134
- },
135
- "model.layers.11.block_sparse_moe.gate": {
136
- "group_size": 64,
137
- "bits": 8
138
- },
139
- "model.layers.12.block_sparse_moe.gate": {
140
- "group_size": 64,
141
- "bits": 8
142
- },
143
- "model.layers.13.block_sparse_moe.gate": {
144
- "group_size": 64,
145
- "bits": 8
146
- },
147
- "model.layers.14.block_sparse_moe.gate": {
148
- "group_size": 64,
149
- "bits": 8
150
- },
151
- "model.layers.15.block_sparse_moe.gate": {
152
- "group_size": 64,
153
- "bits": 8
154
- },
155
- "model.layers.16.block_sparse_moe.gate": {
156
- "group_size": 64,
157
- "bits": 8
158
- },
159
- "model.layers.17.block_sparse_moe.gate": {
160
- "group_size": 64,
161
- "bits": 8
162
- },
163
- "model.layers.18.block_sparse_moe.gate": {
164
- "group_size": 64,
165
- "bits": 8
166
- },
167
- "model.layers.19.block_sparse_moe.gate": {
168
- "group_size": 64,
169
- "bits": 8
170
- },
171
- "model.layers.20.block_sparse_moe.gate": {
172
- "group_size": 64,
173
- "bits": 8
174
- },
175
- "model.layers.21.block_sparse_moe.gate": {
176
- "group_size": 64,
177
- "bits": 8
178
- },
179
- "model.layers.22.block_sparse_moe.gate": {
180
- "group_size": 64,
181
- "bits": 8
182
- },
183
- "model.layers.23.block_sparse_moe.gate": {
184
- "group_size": 64,
185
- "bits": 8
186
- },
187
- "model.layers.24.block_sparse_moe.gate": {
188
- "group_size": 64,
189
- "bits": 8
190
- },
191
- "model.layers.25.block_sparse_moe.gate": {
192
- "group_size": 64,
193
- "bits": 8
194
- },
195
- "model.layers.26.block_sparse_moe.gate": {
196
- "group_size": 64,
197
- "bits": 8
198
- },
199
- "model.layers.27.block_sparse_moe.gate": {
200
- "group_size": 64,
201
- "bits": 8
202
- },
203
- "model.layers.28.block_sparse_moe.gate": {
204
- "group_size": 64,
205
- "bits": 8
206
- },
207
- "model.layers.29.block_sparse_moe.gate": {
208
- "group_size": 64,
209
- "bits": 8
210
- },
211
- "model.layers.30.block_sparse_moe.gate": {
212
- "group_size": 64,
213
- "bits": 8
214
- },
215
- "model.layers.31.block_sparse_moe.gate": {
216
- "group_size": 64,
217
- "bits": 8
218
- },
219
- "model.layers.32.block_sparse_moe.gate": {
220
- "group_size": 64,
221
- "bits": 8
222
- },
223
- "model.layers.33.block_sparse_moe.gate": {
224
- "group_size": 64,
225
- "bits": 8
226
- },
227
- "model.layers.34.block_sparse_moe.gate": {
228
- "group_size": 64,
229
- "bits": 8
230
- },
231
- "model.layers.35.block_sparse_moe.gate": {
232
- "group_size": 64,
233
- "bits": 8
234
- },
235
- "model.layers.36.block_sparse_moe.gate": {
236
- "group_size": 64,
237
- "bits": 8
238
- },
239
- "model.layers.37.block_sparse_moe.gate": {
240
- "group_size": 64,
241
- "bits": 8
242
- },
243
- "model.layers.38.block_sparse_moe.gate": {
244
- "group_size": 64,
245
- "bits": 8
246
- },
247
- "model.layers.39.block_sparse_moe.gate": {
248
- "group_size": 64,
249
- "bits": 8
250
- },
251
- "model.layers.40.block_sparse_moe.gate": {
252
- "group_size": 64,
253
- "bits": 8
254
- },
255
- "model.layers.41.block_sparse_moe.gate": {
256
- "group_size": 64,
257
- "bits": 8
258
- },
259
- "model.layers.42.block_sparse_moe.gate": {
260
- "group_size": 64,
261
- "bits": 8
262
- },
263
- "model.layers.43.block_sparse_moe.gate": {
264
- "group_size": 64,
265
- "bits": 8
266
- },
267
- "model.layers.44.block_sparse_moe.gate": {
268
- "group_size": 64,
269
- "bits": 8
270
- },
271
- "model.layers.45.block_sparse_moe.gate": {
272
- "group_size": 64,
273
- "bits": 8
274
- },
275
- "model.layers.46.block_sparse_moe.gate": {
276
- "group_size": 64,
277
- "bits": 8
278
- },
279
- "model.layers.47.block_sparse_moe.gate": {
280
- "group_size": 64,
281
- "bits": 8
282
- },
283
- "model.layers.48.block_sparse_moe.gate": {
284
- "group_size": 64,
285
- "bits": 8
286
- },
287
- "model.layers.49.block_sparse_moe.gate": {
288
- "group_size": 64,
289
- "bits": 8
290
- },
291
- "model.layers.50.block_sparse_moe.gate": {
292
- "group_size": 64,
293
- "bits": 8
294
- },
295
- "model.layers.51.block_sparse_moe.gate": {
296
- "group_size": 64,
297
- "bits": 8
298
- },
299
- "model.layers.52.block_sparse_moe.gate": {
300
- "group_size": 64,
301
- "bits": 8
302
- },
303
- "model.layers.53.block_sparse_moe.gate": {
304
- "group_size": 64,
305
- "bits": 8
306
- },
307
- "model.layers.54.block_sparse_moe.gate": {
308
- "group_size": 64,
309
- "bits": 8
310
- },
311
- "model.layers.55.block_sparse_moe.gate": {
312
- "group_size": 64,
313
- "bits": 8
314
- },
315
- "model.layers.56.block_sparse_moe.gate": {
316
- "group_size": 64,
317
- "bits": 8
318
- },
319
- "model.layers.57.block_sparse_moe.gate": {
320
- "group_size": 64,
321
- "bits": 8
322
- },
323
- "model.layers.58.block_sparse_moe.gate": {
324
- "group_size": 64,
325
- "bits": 8
326
- },
327
- "model.layers.59.block_sparse_moe.gate": {
328
- "group_size": 64,
329
- "bits": 8
330
- },
331
- "model.layers.60.block_sparse_moe.gate": {
332
- "group_size": 64,
333
- "bits": 8
334
- },
335
- "model.layers.61.block_sparse_moe.gate": {
336
- "group_size": 64,
337
- "bits": 8
338
- }
339
- },
340
- "quantization_config": {
341
- "group_size": 64,
342
- "bits": 6,
343
- "mode": "affine",
344
- "model.layers.0.block_sparse_moe.gate": {
345
- "group_size": 64,
346
- "bits": 8
347
- },
348
- "model.layers.1.block_sparse_moe.gate": {
349
- "group_size": 64,
350
- "bits": 8
351
- },
352
- "model.layers.2.block_sparse_moe.gate": {
353
- "group_size": 64,
354
- "bits": 8
355
- },
356
- "model.layers.3.block_sparse_moe.gate": {
357
- "group_size": 64,
358
- "bits": 8
359
- },
360
- "model.layers.4.block_sparse_moe.gate": {
361
- "group_size": 64,
362
- "bits": 8
363
- },
364
- "model.layers.5.block_sparse_moe.gate": {
365
- "group_size": 64,
366
- "bits": 8
367
- },
368
- "model.layers.6.block_sparse_moe.gate": {
369
- "group_size": 64,
370
- "bits": 8
371
- },
372
- "model.layers.7.block_sparse_moe.gate": {
373
- "group_size": 64,
374
- "bits": 8
375
- },
376
- "model.layers.8.block_sparse_moe.gate": {
377
- "group_size": 64,
378
- "bits": 8
379
- },
380
- "model.layers.9.block_sparse_moe.gate": {
381
- "group_size": 64,
382
- "bits": 8
383
- },
384
- "model.layers.10.block_sparse_moe.gate": {
385
- "group_size": 64,
386
- "bits": 8
387
- },
388
- "model.layers.11.block_sparse_moe.gate": {
389
- "group_size": 64,
390
- "bits": 8
391
- },
392
- "model.layers.12.block_sparse_moe.gate": {
393
- "group_size": 64,
394
- "bits": 8
395
- },
396
- "model.layers.13.block_sparse_moe.gate": {
397
- "group_size": 64,
398
- "bits": 8
399
- },
400
- "model.layers.14.block_sparse_moe.gate": {
401
- "group_size": 64,
402
- "bits": 8
403
- },
404
- "model.layers.15.block_sparse_moe.gate": {
405
- "group_size": 64,
406
- "bits": 8
407
- },
408
- "model.layers.16.block_sparse_moe.gate": {
409
- "group_size": 64,
410
- "bits": 8
411
- },
412
- "model.layers.17.block_sparse_moe.gate": {
413
- "group_size": 64,
414
- "bits": 8
415
- },
416
- "model.layers.18.block_sparse_moe.gate": {
417
- "group_size": 64,
418
- "bits": 8
419
- },
420
- "model.layers.19.block_sparse_moe.gate": {
421
- "group_size": 64,
422
- "bits": 8
423
- },
424
- "model.layers.20.block_sparse_moe.gate": {
425
- "group_size": 64,
426
- "bits": 8
427
- },
428
- "model.layers.21.block_sparse_moe.gate": {
429
- "group_size": 64,
430
- "bits": 8
431
- },
432
- "model.layers.22.block_sparse_moe.gate": {
433
- "group_size": 64,
434
- "bits": 8
435
- },
436
- "model.layers.23.block_sparse_moe.gate": {
437
- "group_size": 64,
438
- "bits": 8
439
- },
440
- "model.layers.24.block_sparse_moe.gate": {
441
- "group_size": 64,
442
- "bits": 8
443
- },
444
- "model.layers.25.block_sparse_moe.gate": {
445
- "group_size": 64,
446
- "bits": 8
447
- },
448
- "model.layers.26.block_sparse_moe.gate": {
449
- "group_size": 64,
450
- "bits": 8
451
- },
452
- "model.layers.27.block_sparse_moe.gate": {
453
- "group_size": 64,
454
- "bits": 8
455
- },
456
- "model.layers.28.block_sparse_moe.gate": {
457
- "group_size": 64,
458
- "bits": 8
459
- },
460
- "model.layers.29.block_sparse_moe.gate": {
461
- "group_size": 64,
462
- "bits": 8
463
- },
464
- "model.layers.30.block_sparse_moe.gate": {
465
- "group_size": 64,
466
- "bits": 8
467
- },
468
- "model.layers.31.block_sparse_moe.gate": {
469
- "group_size": 64,
470
- "bits": 8
471
- },
472
- "model.layers.32.block_sparse_moe.gate": {
473
- "group_size": 64,
474
- "bits": 8
475
- },
476
- "model.layers.33.block_sparse_moe.gate": {
477
- "group_size": 64,
478
- "bits": 8
479
- },
480
- "model.layers.34.block_sparse_moe.gate": {
481
- "group_size": 64,
482
- "bits": 8
483
- },
484
- "model.layers.35.block_sparse_moe.gate": {
485
- "group_size": 64,
486
- "bits": 8
487
- },
488
- "model.layers.36.block_sparse_moe.gate": {
489
- "group_size": 64,
490
- "bits": 8
491
- },
492
- "model.layers.37.block_sparse_moe.gate": {
493
- "group_size": 64,
494
- "bits": 8
495
- },
496
- "model.layers.38.block_sparse_moe.gate": {
497
- "group_size": 64,
498
- "bits": 8
499
- },
500
- "model.layers.39.block_sparse_moe.gate": {
501
- "group_size": 64,
502
- "bits": 8
503
- },
504
- "model.layers.40.block_sparse_moe.gate": {
505
- "group_size": 64,
506
- "bits": 8
507
- },
508
- "model.layers.41.block_sparse_moe.gate": {
509
- "group_size": 64,
510
- "bits": 8
511
- },
512
- "model.layers.42.block_sparse_moe.gate": {
513
- "group_size": 64,
514
- "bits": 8
515
- },
516
- "model.layers.43.block_sparse_moe.gate": {
517
- "group_size": 64,
518
- "bits": 8
519
- },
520
- "model.layers.44.block_sparse_moe.gate": {
521
- "group_size": 64,
522
- "bits": 8
523
- },
524
- "model.layers.45.block_sparse_moe.gate": {
525
- "group_size": 64,
526
- "bits": 8
527
- },
528
- "model.layers.46.block_sparse_moe.gate": {
529
- "group_size": 64,
530
- "bits": 8
531
- },
532
- "model.layers.47.block_sparse_moe.gate": {
533
- "group_size": 64,
534
- "bits": 8
535
- },
536
- "model.layers.48.block_sparse_moe.gate": {
537
- "group_size": 64,
538
- "bits": 8
539
- },
540
- "model.layers.49.block_sparse_moe.gate": {
541
- "group_size": 64,
542
- "bits": 8
543
- },
544
- "model.layers.50.block_sparse_moe.gate": {
545
- "group_size": 64,
546
- "bits": 8
547
- },
548
- "model.layers.51.block_sparse_moe.gate": {
549
- "group_size": 64,
550
- "bits": 8
551
- },
552
- "model.layers.52.block_sparse_moe.gate": {
553
- "group_size": 64,
554
- "bits": 8
555
- },
556
- "model.layers.53.block_sparse_moe.gate": {
557
- "group_size": 64,
558
- "bits": 8
559
- },
560
- "model.layers.54.block_sparse_moe.gate": {
561
- "group_size": 64,
562
- "bits": 8
563
- },
564
- "model.layers.55.block_sparse_moe.gate": {
565
- "group_size": 64,
566
- "bits": 8
567
- },
568
- "model.layers.56.block_sparse_moe.gate": {
569
- "group_size": 64,
570
- "bits": 8
571
- },
572
- "model.layers.57.block_sparse_moe.gate": {
573
- "group_size": 64,
574
- "bits": 8
575
- },
576
- "model.layers.58.block_sparse_moe.gate": {
577
- "group_size": 64,
578
- "bits": 8
579
- },
580
- "model.layers.59.block_sparse_moe.gate": {
581
- "group_size": 64,
582
- "bits": 8
583
- },
584
- "model.layers.60.block_sparse_moe.gate": {
585
- "group_size": 64,
586
- "bits": 8
587
- },
588
- "model.layers.61.block_sparse_moe.gate": {
589
- "group_size": 64,
590
- "bits": 8
591
- }
592
- },
593
- "rms_norm_eps": 1e-06,
594
- "rope_theta": 5000000,
595
- "rotary_dim": 64,
596
- "scoring_func": "sigmoid",
597
- "shared_intermediate_size": 0,
598
- "tie_word_embeddings": false,
599
- "transformers_version": "4.46.1",
600
- "use_cache": true,
601
- "use_mtp": true,
602
- "use_qk_norm": true,
603
- "use_routing_bias": true,
604
- "vocab_size": 200064
 
605
  }
 
1
  {
2
+ "architectures": [
3
+ "MiniMaxM2ForCausalLM"
4
+ ],
5
+ "attn_type_list": [
6
+ 1,
7
+ 1,
8
+ 1,
9
+ 1,
10
+ 1,
11
+ 1,
12
+ 1,
13
+ 1,
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 1,
18
+ 1,
19
+ 1,
20
+ 1,
21
+ 1,
22
+ 1,
23
+ 1,
24
+ 1,
25
+ 1,
26
+ 1,
27
+ 1,
28
+ 1,
29
+ 1,
30
+ 1,
31
+ 1,
32
+ 1,
33
+ 1,
34
+ 1,
35
+ 1,
36
+ 1,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 1,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 1,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 1
68
+ ],
69
+ "auto_map": {
70
+ "AutoConfig": "configuration_minimax_m2.MiniMaxM2Config",
71
+ "AutoModelForCausalLM": "modeling_minimax_m2.MiniMaxM2ForCausalLM"
72
+ },
73
+ "head_dim": 128,
74
+ "hidden_act": "silu",
75
+ "hidden_size": 3072,
76
+ "intermediate_size": 1536,
77
+ "max_position_embeddings": 196608,
78
+ "model_type": "minimax_m2",
79
+ "mtp_transformer_layers": 1,
80
+ "num_attention_heads": 48,
81
+ "num_experts_per_tok": 8,
82
+ "num_hidden_layers": 62,
83
+ "num_key_value_heads": 8,
84
+ "num_local_experts": 192,
85
+ "num_mtp_modules": 3,
86
+ "qk_norm_type": "per_layer",
87
+ "quantization": {
88
+ "group_size": 64,
89
+ "bits": 6,
90
+ "mode": "affine",
91
+ "model.layers.0.block_sparse_moe.gate": {
92
+ "group_size": 64,
93
+ "bits": 8
94
+ },
95
+ "model.layers.1.block_sparse_moe.gate": {
96
+ "group_size": 64,
97
+ "bits": 8
98
+ },
99
+ "model.layers.2.block_sparse_moe.gate": {
100
+ "group_size": 64,
101
+ "bits": 8
102
+ },
103
+ "model.layers.3.block_sparse_moe.gate": {
104
+ "group_size": 64,
105
+ "bits": 8
106
+ },
107
+ "model.layers.4.block_sparse_moe.gate": {
108
+ "group_size": 64,
109
+ "bits": 8
110
+ },
111
+ "model.layers.5.block_sparse_moe.gate": {
112
+ "group_size": 64,
113
+ "bits": 8
114
+ },
115
+ "model.layers.6.block_sparse_moe.gate": {
116
+ "group_size": 64,
117
+ "bits": 8
118
+ },
119
+ "model.layers.7.block_sparse_moe.gate": {
120
+ "group_size": 64,
121
+ "bits": 8
122
+ },
123
+ "model.layers.8.block_sparse_moe.gate": {
124
+ "group_size": 64,
125
+ "bits": 8
126
+ },
127
+ "model.layers.9.block_sparse_moe.gate": {
128
+ "group_size": 64,
129
+ "bits": 8
130
+ },
131
+ "model.layers.10.block_sparse_moe.gate": {
132
+ "group_size": 64,
133
+ "bits": 8
134
+ },
135
+ "model.layers.11.block_sparse_moe.gate": {
136
+ "group_size": 64,
137
+ "bits": 8
138
+ },
139
+ "model.layers.12.block_sparse_moe.gate": {
140
+ "group_size": 64,
141
+ "bits": 8
142
+ },
143
+ "model.layers.13.block_sparse_moe.gate": {
144
+ "group_size": 64,
145
+ "bits": 8
146
+ },
147
+ "model.layers.14.block_sparse_moe.gate": {
148
+ "group_size": 64,
149
+ "bits": 8
150
+ },
151
+ "model.layers.15.block_sparse_moe.gate": {
152
+ "group_size": 64,
153
+ "bits": 8
154
+ },
155
+ "model.layers.16.block_sparse_moe.gate": {
156
+ "group_size": 64,
157
+ "bits": 8
158
+ },
159
+ "model.layers.17.block_sparse_moe.gate": {
160
+ "group_size": 64,
161
+ "bits": 8
162
+ },
163
+ "model.layers.18.block_sparse_moe.gate": {
164
+ "group_size": 64,
165
+ "bits": 8
166
+ },
167
+ "model.layers.19.block_sparse_moe.gate": {
168
+ "group_size": 64,
169
+ "bits": 8
170
+ },
171
+ "model.layers.20.block_sparse_moe.gate": {
172
+ "group_size": 64,
173
+ "bits": 8
174
+ },
175
+ "model.layers.21.block_sparse_moe.gate": {
176
+ "group_size": 64,
177
+ "bits": 8
178
+ },
179
+ "model.layers.22.block_sparse_moe.gate": {
180
+ "group_size": 64,
181
+ "bits": 8
182
+ },
183
+ "model.layers.23.block_sparse_moe.gate": {
184
+ "group_size": 64,
185
+ "bits": 8
186
+ },
187
+ "model.layers.24.block_sparse_moe.gate": {
188
+ "group_size": 64,
189
+ "bits": 8
190
+ },
191
+ "model.layers.25.block_sparse_moe.gate": {
192
+ "group_size": 64,
193
+ "bits": 8
194
+ },
195
+ "model.layers.26.block_sparse_moe.gate": {
196
+ "group_size": 64,
197
+ "bits": 8
198
+ },
199
+ "model.layers.27.block_sparse_moe.gate": {
200
+ "group_size": 64,
201
+ "bits": 8
202
+ },
203
+ "model.layers.28.block_sparse_moe.gate": {
204
+ "group_size": 64,
205
+ "bits": 8
206
+ },
207
+ "model.layers.29.block_sparse_moe.gate": {
208
+ "group_size": 64,
209
+ "bits": 8
210
+ },
211
+ "model.layers.30.block_sparse_moe.gate": {
212
+ "group_size": 64,
213
+ "bits": 8
214
+ },
215
+ "model.layers.31.block_sparse_moe.gate": {
216
+ "group_size": 64,
217
+ "bits": 8
218
+ },
219
+ "model.layers.32.block_sparse_moe.gate": {
220
+ "group_size": 64,
221
+ "bits": 8
222
+ },
223
+ "model.layers.33.block_sparse_moe.gate": {
224
+ "group_size": 64,
225
+ "bits": 8
226
+ },
227
+ "model.layers.34.block_sparse_moe.gate": {
228
+ "group_size": 64,
229
+ "bits": 8
230
+ },
231
+ "model.layers.35.block_sparse_moe.gate": {
232
+ "group_size": 64,
233
+ "bits": 8
234
+ },
235
+ "model.layers.36.block_sparse_moe.gate": {
236
+ "group_size": 64,
237
+ "bits": 8
238
+ },
239
+ "model.layers.37.block_sparse_moe.gate": {
240
+ "group_size": 64,
241
+ "bits": 8
242
+ },
243
+ "model.layers.38.block_sparse_moe.gate": {
244
+ "group_size": 64,
245
+ "bits": 8
246
+ },
247
+ "model.layers.39.block_sparse_moe.gate": {
248
+ "group_size": 64,
249
+ "bits": 8
250
+ },
251
+ "model.layers.40.block_sparse_moe.gate": {
252
+ "group_size": 64,
253
+ "bits": 8
254
+ },
255
+ "model.layers.41.block_sparse_moe.gate": {
256
+ "group_size": 64,
257
+ "bits": 8
258
+ },
259
+ "model.layers.42.block_sparse_moe.gate": {
260
+ "group_size": 64,
261
+ "bits": 8
262
+ },
263
+ "model.layers.43.block_sparse_moe.gate": {
264
+ "group_size": 64,
265
+ "bits": 8
266
+ },
267
+ "model.layers.44.block_sparse_moe.gate": {
268
+ "group_size": 64,
269
+ "bits": 8
270
+ },
271
+ "model.layers.45.block_sparse_moe.gate": {
272
+ "group_size": 64,
273
+ "bits": 8
274
+ },
275
+ "model.layers.46.block_sparse_moe.gate": {
276
+ "group_size": 64,
277
+ "bits": 8
278
+ },
279
+ "model.layers.47.block_sparse_moe.gate": {
280
+ "group_size": 64,
281
+ "bits": 8
282
+ },
283
+ "model.layers.48.block_sparse_moe.gate": {
284
+ "group_size": 64,
285
+ "bits": 8
286
+ },
287
+ "model.layers.49.block_sparse_moe.gate": {
288
+ "group_size": 64,
289
+ "bits": 8
290
+ },
291
+ "model.layers.50.block_sparse_moe.gate": {
292
+ "group_size": 64,
293
+ "bits": 8
294
+ },
295
+ "model.layers.51.block_sparse_moe.gate": {
296
+ "group_size": 64,
297
+ "bits": 8
298
+ },
299
+ "model.layers.52.block_sparse_moe.gate": {
300
+ "group_size": 64,
301
+ "bits": 8
302
+ },
303
+ "model.layers.53.block_sparse_moe.gate": {
304
+ "group_size": 64,
305
+ "bits": 8
306
+ },
307
+ "model.layers.54.block_sparse_moe.gate": {
308
+ "group_size": 64,
309
+ "bits": 8
310
+ },
311
+ "model.layers.55.block_sparse_moe.gate": {
312
+ "group_size": 64,
313
+ "bits": 8
314
+ },
315
+ "model.layers.56.block_sparse_moe.gate": {
316
+ "group_size": 64,
317
+ "bits": 8
318
+ },
319
+ "model.layers.57.block_sparse_moe.gate": {
320
+ "group_size": 64,
321
+ "bits": 8
322
+ },
323
+ "model.layers.58.block_sparse_moe.gate": {
324
+ "group_size": 64,
325
+ "bits": 8
326
+ },
327
+ "model.layers.59.block_sparse_moe.gate": {
328
+ "group_size": 64,
329
+ "bits": 8
330
+ },
331
+ "model.layers.60.block_sparse_moe.gate": {
332
+ "group_size": 64,
333
+ "bits": 8
334
+ },
335
+ "model.layers.61.block_sparse_moe.gate": {
336
+ "group_size": 64,
337
+ "bits": 8
338
+ }
339
+ },
340
+ "quantization_config": {
341
+ "group_size": 64,
342
+ "bits": 6,
343
+ "mode": "affine",
344
+ "model.layers.0.block_sparse_moe.gate": {
345
+ "group_size": 64,
346
+ "bits": 8
347
+ },
348
+ "model.layers.1.block_sparse_moe.gate": {
349
+ "group_size": 64,
350
+ "bits": 8
351
+ },
352
+ "model.layers.2.block_sparse_moe.gate": {
353
+ "group_size": 64,
354
+ "bits": 8
355
+ },
356
+ "model.layers.3.block_sparse_moe.gate": {
357
+ "group_size": 64,
358
+ "bits": 8
359
+ },
360
+ "model.layers.4.block_sparse_moe.gate": {
361
+ "group_size": 64,
362
+ "bits": 8
363
+ },
364
+ "model.layers.5.block_sparse_moe.gate": {
365
+ "group_size": 64,
366
+ "bits": 8
367
+ },
368
+ "model.layers.6.block_sparse_moe.gate": {
369
+ "group_size": 64,
370
+ "bits": 8
371
+ },
372
+ "model.layers.7.block_sparse_moe.gate": {
373
+ "group_size": 64,
374
+ "bits": 8
375
+ },
376
+ "model.layers.8.block_sparse_moe.gate": {
377
+ "group_size": 64,
378
+ "bits": 8
379
+ },
380
+ "model.layers.9.block_sparse_moe.gate": {
381
+ "group_size": 64,
382
+ "bits": 8
383
+ },
384
+ "model.layers.10.block_sparse_moe.gate": {
385
+ "group_size": 64,
386
+ "bits": 8
387
+ },
388
+ "model.layers.11.block_sparse_moe.gate": {
389
+ "group_size": 64,
390
+ "bits": 8
391
+ },
392
+ "model.layers.12.block_sparse_moe.gate": {
393
+ "group_size": 64,
394
+ "bits": 8
395
+ },
396
+ "model.layers.13.block_sparse_moe.gate": {
397
+ "group_size": 64,
398
+ "bits": 8
399
+ },
400
+ "model.layers.14.block_sparse_moe.gate": {
401
+ "group_size": 64,
402
+ "bits": 8
403
+ },
404
+ "model.layers.15.block_sparse_moe.gate": {
405
+ "group_size": 64,
406
+ "bits": 8
407
+ },
408
+ "model.layers.16.block_sparse_moe.gate": {
409
+ "group_size": 64,
410
+ "bits": 8
411
+ },
412
+ "model.layers.17.block_sparse_moe.gate": {
413
+ "group_size": 64,
414
+ "bits": 8
415
+ },
416
+ "model.layers.18.block_sparse_moe.gate": {
417
+ "group_size": 64,
418
+ "bits": 8
419
+ },
420
+ "model.layers.19.block_sparse_moe.gate": {
421
+ "group_size": 64,
422
+ "bits": 8
423
+ },
424
+ "model.layers.20.block_sparse_moe.gate": {
425
+ "group_size": 64,
426
+ "bits": 8
427
+ },
428
+ "model.layers.21.block_sparse_moe.gate": {
429
+ "group_size": 64,
430
+ "bits": 8
431
+ },
432
+ "model.layers.22.block_sparse_moe.gate": {
433
+ "group_size": 64,
434
+ "bits": 8
435
+ },
436
+ "model.layers.23.block_sparse_moe.gate": {
437
+ "group_size": 64,
438
+ "bits": 8
439
+ },
440
+ "model.layers.24.block_sparse_moe.gate": {
441
+ "group_size": 64,
442
+ "bits": 8
443
+ },
444
+ "model.layers.25.block_sparse_moe.gate": {
445
+ "group_size": 64,
446
+ "bits": 8
447
+ },
448
+ "model.layers.26.block_sparse_moe.gate": {
449
+ "group_size": 64,
450
+ "bits": 8
451
+ },
452
+ "model.layers.27.block_sparse_moe.gate": {
453
+ "group_size": 64,
454
+ "bits": 8
455
+ },
456
+ "model.layers.28.block_sparse_moe.gate": {
457
+ "group_size": 64,
458
+ "bits": 8
459
+ },
460
+ "model.layers.29.block_sparse_moe.gate": {
461
+ "group_size": 64,
462
+ "bits": 8
463
+ },
464
+ "model.layers.30.block_sparse_moe.gate": {
465
+ "group_size": 64,
466
+ "bits": 8
467
+ },
468
+ "model.layers.31.block_sparse_moe.gate": {
469
+ "group_size": 64,
470
+ "bits": 8
471
+ },
472
+ "model.layers.32.block_sparse_moe.gate": {
473
+ "group_size": 64,
474
+ "bits": 8
475
+ },
476
+ "model.layers.33.block_sparse_moe.gate": {
477
+ "group_size": 64,
478
+ "bits": 8
479
+ },
480
+ "model.layers.34.block_sparse_moe.gate": {
481
+ "group_size": 64,
482
+ "bits": 8
483
+ },
484
+ "model.layers.35.block_sparse_moe.gate": {
485
+ "group_size": 64,
486
+ "bits": 8
487
+ },
488
+ "model.layers.36.block_sparse_moe.gate": {
489
+ "group_size": 64,
490
+ "bits": 8
491
+ },
492
+ "model.layers.37.block_sparse_moe.gate": {
493
+ "group_size": 64,
494
+ "bits": 8
495
+ },
496
+ "model.layers.38.block_sparse_moe.gate": {
497
+ "group_size": 64,
498
+ "bits": 8
499
+ },
500
+ "model.layers.39.block_sparse_moe.gate": {
501
+ "group_size": 64,
502
+ "bits": 8
503
+ },
504
+ "model.layers.40.block_sparse_moe.gate": {
505
+ "group_size": 64,
506
+ "bits": 8
507
+ },
508
+ "model.layers.41.block_sparse_moe.gate": {
509
+ "group_size": 64,
510
+ "bits": 8
511
+ },
512
+ "model.layers.42.block_sparse_moe.gate": {
513
+ "group_size": 64,
514
+ "bits": 8
515
+ },
516
+ "model.layers.43.block_sparse_moe.gate": {
517
+ "group_size": 64,
518
+ "bits": 8
519
+ },
520
+ "model.layers.44.block_sparse_moe.gate": {
521
+ "group_size": 64,
522
+ "bits": 8
523
+ },
524
+ "model.layers.45.block_sparse_moe.gate": {
525
+ "group_size": 64,
526
+ "bits": 8
527
+ },
528
+ "model.layers.46.block_sparse_moe.gate": {
529
+ "group_size": 64,
530
+ "bits": 8
531
+ },
532
+ "model.layers.47.block_sparse_moe.gate": {
533
+ "group_size": 64,
534
+ "bits": 8
535
+ },
536
+ "model.layers.48.block_sparse_moe.gate": {
537
+ "group_size": 64,
538
+ "bits": 8
539
+ },
540
+ "model.layers.49.block_sparse_moe.gate": {
541
+ "group_size": 64,
542
+ "bits": 8
543
+ },
544
+ "model.layers.50.block_sparse_moe.gate": {
545
+ "group_size": 64,
546
+ "bits": 8
547
+ },
548
+ "model.layers.51.block_sparse_moe.gate": {
549
+ "group_size": 64,
550
+ "bits": 8
551
+ },
552
+ "model.layers.52.block_sparse_moe.gate": {
553
+ "group_size": 64,
554
+ "bits": 8
555
+ },
556
+ "model.layers.53.block_sparse_moe.gate": {
557
+ "group_size": 64,
558
+ "bits": 8
559
+ },
560
+ "model.layers.54.block_sparse_moe.gate": {
561
+ "group_size": 64,
562
+ "bits": 8
563
+ },
564
+ "model.layers.55.block_sparse_moe.gate": {
565
+ "group_size": 64,
566
+ "bits": 8
567
+ },
568
+ "model.layers.56.block_sparse_moe.gate": {
569
+ "group_size": 64,
570
+ "bits": 8
571
+ },
572
+ "model.layers.57.block_sparse_moe.gate": {
573
+ "group_size": 64,
574
+ "bits": 8
575
+ },
576
+ "model.layers.58.block_sparse_moe.gate": {
577
+ "group_size": 64,
578
+ "bits": 8
579
+ },
580
+ "model.layers.59.block_sparse_moe.gate": {
581
+ "group_size": 64,
582
+ "bits": 8
583
+ },
584
+ "model.layers.60.block_sparse_moe.gate": {
585
+ "group_size": 64,
586
+ "bits": 8
587
+ },
588
+ "model.layers.61.block_sparse_moe.gate": {
589
+ "group_size": 64,
590
+ "bits": 8
591
+ }
592
+ },
593
+ "rms_norm_eps": 1e-06,
594
+ "rope_theta": 5000000,
595
+ "rotary_dim": 64,
596
+ "scoring_func": "sigmoid",
597
+ "shared_intermediate_size": 0,
598
+ "tie_word_embeddings": false,
599
+ "transformers_version": "4.46.1",
600
+ "use_cache": true,
601
+ "use_mtp": true,
602
+ "use_qk_norm": true,
603
+ "use_routing_bias": true,
604
+ "vocab_size": 200064,
605
+ "eos_token_id": 200020
606
  }