| { |
| "version": "1.0", |
| "truncation": { |
| "max_length": 65536, |
| "strategy": "longest_first", |
| "direction": "right" |
| }, |
| "padding": { |
| "strategy": "right", |
| "pad_id": 0, |
| "pad_token": "<pad>" |
| }, |
| "added_tokens": [ |
| {"id": 0, "content": "<pad>", "single_word": false, "special": true}, |
| {"id": 1, "content": "<bos>", "single_word": false, "special": true}, |
| {"id": 2, "content": "<eos>", "single_word": false, "special": true} |
| ], |
| "normalizer": { |
| "type": "NFKC" |
| }, |
| "pre_tokenizer": { |
| "type": "Whitespace" |
| }, |
| "post_processor": { |
| "type": "TemplateProcessing", |
| "single": "<bos> $A <eos>", |
| "pair": "<bos> $A <eos> $B <eos>", |
| "special_tokens": { |
| "<bos>": 1, |
| "<eos>": 2 |
| } |
| }, |
| "decoder": { |
| "type": "WordPiece", |
| "prefix": "##" |
| }, |
| "model": { |
| "type": "BPE", |
| "vocab": { |
| "<pad>": 0, |
| "<bos>": 1, |
| "<eos>": 2, |
| "the": 3, |
| "of": 4, |
| "to": 5, |
| "and": 6, |
| "I": 7, |
| "you": 8 |
| }, |
| "merges": [ |
| "t h", |
| "th e", |
| "a n", |
| "a nd", |
| "i n", |
| "i ng" |
| ] |
| } |
| } |
|
|