tokenizers
tokenizer
shaantastic24 commited on
Commit
4d04c01
·
verified ·
1 Parent(s): b0bc90e

Create config.json

Browse files
Files changed (1) hide show
  1. config.json +32 -0
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: tokenizers
4
+ tags:
5
+ - tokenizer
6
+ language:
7
+ - hi
8
+ - as
9
+ - mr
10
+ - gu
11
+ - pa
12
+ - en
13
+ - or
14
+ - te
15
+ - ta
16
+ - ml
17
+ - kn
18
+ - bn
19
+ - sd
20
+ - ur
21
+ - ne
22
+ - ks
23
+ - sa
24
+ - gom
25
+ - mai
26
+ - mni
27
+ - brx
28
+ - doi
29
+ - sat
30
+ Vocab_size:
31
+ 2,56,000
32
+ ---