trohrbaugh commited on
Commit
397296b
·
verified ·
1 Parent(s): aa35666

Upload tokenizer

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {% set has_system = messages[0]['role'] == 'system' %}{% if not has_system %}{{ bos_token + 'system
2
+ You are an AI programming assistant, utilizing the Seed-Coder model, developed by ByteDance Seed, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.
3
+
4
+ ' + eos_token }}{% endif %}{% for message in messages %}{{ bos_token + message['role'] + '
5
+ ' + message['content'] | trim + eos_token }}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant
6
+ '}}{% endif %}
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2128ee20a31a0044f8c974670003040103aaef6c5381db2d815908f5ea2f5330
3
+ size 11891784
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<[begin▁of▁sentence]>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<[end▁of▁sentence]>",
6
+ "is_local": false,
7
+ "mask_token": "<[MASK_TOKEN]>",
8
+ "model_max_length": 32768,
9
+ "pad_token": "<[PAD▁TOKEN]>",
10
+ "sep_token": "<[SEP▁TOKEN]>",
11
+ "tokenizer_class": "TokenizersBackend"
12
+ }