JamesShenNV commited on
Commit
4caa7d9
·
verified ·
1 Parent(s): 5d24e54

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +24 -45
tokenizer_config.json CHANGED
@@ -17,72 +17,51 @@
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
20
- "is_local": true,
21
  "mask_token": "<mask>",
22
  "model_max_length": 1000000000000000019884624838656,
23
- "model_specific_special_tokens": {
24
- "audio_token": "<|audio|>",
25
- "boa_token": "<|audio>",
26
- "boi_token": "<|image>",
27
- "eoa_token": "<audio|>",
28
- "eoc_token": "<channel|>",
29
- "eoi_token": "<image|>",
30
- "eot_token": "<turn|>",
31
- "escape_token": "<|\"|>",
32
- "etc_token": "<tool_call|>",
33
- "etd_token": "<tool|>",
34
- "etr_token": "<tool_response|>",
35
- "image_token": "<|image|>",
36
- "soc_token": "<|channel>",
37
- "sot_token": "<|turn>",
38
- "stc_token": "<|tool_call>",
39
- "std_token": "<|tool>",
40
- "str_token": "<|tool_response>",
41
- "think_token": "<|think|>"
42
- },
43
- "pad_token": "<eos>",
44
  "padding_side": "left",
45
  "processor_class": "Gemma4Processor",
46
  "response_schema": {
 
47
  "properties": {
48
  "role": {
49
  "const": "assistant"
50
  },
51
  "thinking": {
52
- "type": "string",
53
- "x-regex": "<\\|channel\\>(?:thought\\n)?(.+?)<channel\\|>"
 
 
54
  },
55
  "tool_calls": {
 
 
56
  "items": {
 
57
  "properties": {
 
 
 
58
  "function": {
 
 
59
  "properties": {
60
- "arguments": {
61
- "type": "string",
62
- "x-mapping-regex": {
63
- "(\\{|,)\\s*([a-zA-Z_]\\w+):": "\\1\"\\2\":",
64
- "<\\|\\\"\\|>": "\""
65
- },
66
- "x-regex": "call:[^{]+(\\{.*\\})"
67
- },
68
  "name": {
69
- "type": "string",
70
- "x-regex": "call:([^{]+)"
 
 
 
 
71
  }
72
- },
73
- "type": "object"
74
- },
75
- "type": {
76
- "const": "function"
77
  }
78
- },
79
- "type": "object"
80
- },
81
- "type": "array",
82
- "x-regex-iterator": "<\\|tool_call\\>(.*?)<tool_call\\|>"
83
  }
84
  },
85
- "type": "object"
86
  },
87
  "soc_token": "<|channel>",
88
  "sot_token": "<|turn>",
 
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
 
20
  "mask_token": "<mask>",
21
  "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": "<pad>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "padding_side": "left",
24
  "processor_class": "Gemma4Processor",
25
  "response_schema": {
26
+ "type": "object",
27
  "properties": {
28
  "role": {
29
  "const": "assistant"
30
  },
31
  "thinking": {
32
+ "type": "string"
33
+ },
34
+ "content": {
35
+ "type": "string"
36
  },
37
  "tool_calls": {
38
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
+ "type": "array",
40
  "items": {
41
+ "type": "object",
42
  "properties": {
43
+ "type": {
44
+ "const": "function"
45
+ },
46
  "function": {
47
+ "type": "object",
48
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
  "properties": {
 
 
 
 
 
 
 
 
50
  "name": {
51
+ "type": "string"
52
+ },
53
+ "arguments": {
54
+ "type": "object",
55
+ "x-parser": "gemma4-tool-call",
56
+ "additionalProperties": {}
57
  }
58
+ }
 
 
 
 
59
  }
60
+ }
61
+ }
 
 
 
62
  }
63
  },
64
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
65
  },
66
  "soc_token": "<|channel>",
67
  "sot_token": "<|turn>",