| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "[UNK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "[PAD]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "[CLS]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 3, | |
| "content": "[SEP]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 4, | |
| "content": "[MASK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": { | |
| "type": "Lowercase" | |
| }, | |
| "pre_tokenizer": { | |
| "type": "Whitespace" | |
| }, | |
| "post_processor": { | |
| "type": "TemplateProcessing", | |
| "single": [ | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| } | |
| ], | |
| "pair": [ | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "B", | |
| "type_id": 1 | |
| } | |
| } | |
| ], | |
| "special_tokens": {} | |
| }, | |
| "decoder": null, | |
| "model": { | |
| "type": "BPE", | |
| "dropout": null, | |
| "unk_token": "[UNK]", | |
| "continuing_subword_prefix": null, | |
| "end_of_word_suffix": null, | |
| "fuse_unk": false, | |
| "byte_fallback": false, | |
| "ignore_merges": false, | |
| "vocab": { | |
| "[UNK]": 0, | |
| "[PAD]": 1, | |
| "[CLS]": 2, | |
| "[SEP]": 3, | |
| "[MASK]": 4, | |
| "a": 5, | |
| "b": 6, | |
| "c": 7, | |
| "d": 8, | |
| "e": 9, | |
| "f": 10, | |
| "g": 11, | |
| "h": 12, | |
| "i": 13, | |
| "k": 14, | |
| "l": 15, | |
| "m": 16, | |
| "n": 17, | |
| "o": 18, | |
| "r": 19, | |
| "s": 20, | |
| "t": 21, | |
| "u": 22, | |
| "v": 23, | |
| "z": 24, | |
| "en": 25, | |
| "is": 26, | |
| "st": 27, | |
| "te": 28, | |
| "ar": 29, | |
| "ce": 30, | |
| "ch": 31, | |
| "ct": 32, | |
| "er": 33, | |
| "in": 34, | |
| "om": 35, | |
| "to": 36, | |
| "ing": 37, | |
| "ab": 38, | |
| "an": 39, | |
| "at": 40, | |
| "ate": 41, | |
| "ace": 42, | |
| "bu": 43, | |
| "cl": 44, | |
| "cr": 45, | |
| "cu": 46, | |
| "cab": 47, | |
| "ding": 48, | |
| "ear": 49, | |
| "fr": 50, | |
| "face": 51, | |
| "gg": 52, | |
| "gen": 53, | |
| "hu": 54, | |
| "his": 55, | |
| "il": 56, | |
| "iz": 57, | |
| "ite": 58, | |
| "ict": 59, | |
| "ken": 60, | |
| "ocab": 61, | |
| "re": 62, | |
| "rict": 63, | |
| "sen": 64, | |
| "scr": 65, | |
| "ten": 66, | |
| "this": 67, | |
| "ure": 68, | |
| "vocab": 69, | |
| "stom": 70, | |
| "strict": 71, | |
| "test": 72, | |
| "arch": 73, | |
| "cture": 74, | |
| "erate": 75, | |
| "token": 76, | |
| "and": 77, | |
| "atch": 78, | |
| "buil": 79, | |
| "clear": 80, | |
| "custom": 81, | |
| "from": 82, | |
| "gging": 83, | |
| "generate": 84, | |
| "hugging": 85, | |
| "izer": 86, | |
| "itecture": 87, | |
| "senten": 88, | |
| "scratch": 89, | |
| "architecture": 90, | |
| "tokenizer": 91, | |
| "building": 92, | |
| "sentence": 93 | |
| }, | |
| "merges": [ | |
| [ | |
| "e", | |
| "n" | |
| ], | |
| [ | |
| "i", | |
| "s" | |
| ], | |
| [ | |
| "s", | |
| "t" | |
| ], | |
| [ | |
| "t", | |
| "e" | |
| ], | |
| [ | |
| "a", | |
| "r" | |
| ], | |
| [ | |
| "c", | |
| "e" | |
| ], | |
| [ | |
| "c", | |
| "h" | |
| ], | |
| [ | |
| "c", | |
| "t" | |
| ], | |
| [ | |
| "e", | |
| "r" | |
| ], | |
| [ | |
| "i", | |
| "n" | |
| ], | |
| [ | |
| "o", | |
| "m" | |
| ], | |
| [ | |
| "t", | |
| "o" | |
| ], | |
| [ | |
| "in", | |
| "g" | |
| ], | |
| [ | |
| "a", | |
| "b" | |
| ], | |
| [ | |
| "a", | |
| "n" | |
| ], | |
| [ | |
| "a", | |
| "t" | |
| ], | |
| [ | |
| "a", | |
| "te" | |
| ], | |
| [ | |
| "a", | |
| "ce" | |
| ], | |
| [ | |
| "b", | |
| "u" | |
| ], | |
| [ | |
| "c", | |
| "l" | |
| ], | |
| [ | |
| "c", | |
| "r" | |
| ], | |
| [ | |
| "c", | |
| "u" | |
| ], | |
| [ | |
| "c", | |
| "ab" | |
| ], | |
| [ | |
| "d", | |
| "ing" | |
| ], | |
| [ | |
| "e", | |
| "ar" | |
| ], | |
| [ | |
| "f", | |
| "r" | |
| ], | |
| [ | |
| "f", | |
| "ace" | |
| ], | |
| [ | |
| "g", | |
| "g" | |
| ], | |
| [ | |
| "g", | |
| "en" | |
| ], | |
| [ | |
| "h", | |
| "u" | |
| ], | |
| [ | |
| "h", | |
| "is" | |
| ], | |
| [ | |
| "i", | |
| "l" | |
| ], | |
| [ | |
| "i", | |
| "z" | |
| ], | |
| [ | |
| "i", | |
| "te" | |
| ], | |
| [ | |
| "i", | |
| "ct" | |
| ], | |
| [ | |
| "k", | |
| "en" | |
| ], | |
| [ | |
| "o", | |
| "cab" | |
| ], | |
| [ | |
| "r", | |
| "e" | |
| ], | |
| [ | |
| "r", | |
| "ict" | |
| ], | |
| [ | |
| "s", | |
| "en" | |
| ], | |
| [ | |
| "s", | |
| "cr" | |
| ], | |
| [ | |
| "t", | |
| "en" | |
| ], | |
| [ | |
| "t", | |
| "his" | |
| ], | |
| [ | |
| "u", | |
| "re" | |
| ], | |
| [ | |
| "v", | |
| "ocab" | |
| ], | |
| [ | |
| "st", | |
| "om" | |
| ], | |
| [ | |
| "st", | |
| "rict" | |
| ], | |
| [ | |
| "te", | |
| "st" | |
| ], | |
| [ | |
| "ar", | |
| "ch" | |
| ], | |
| [ | |
| "ct", | |
| "ure" | |
| ], | |
| [ | |
| "er", | |
| "ate" | |
| ], | |
| [ | |
| "to", | |
| "ken" | |
| ], | |
| [ | |
| "an", | |
| "d" | |
| ], | |
| [ | |
| "at", | |
| "ch" | |
| ], | |
| [ | |
| "bu", | |
| "il" | |
| ], | |
| [ | |
| "cl", | |
| "ear" | |
| ], | |
| [ | |
| "cu", | |
| "stom" | |
| ], | |
| [ | |
| "fr", | |
| "om" | |
| ], | |
| [ | |
| "gg", | |
| "ing" | |
| ], | |
| [ | |
| "gen", | |
| "erate" | |
| ], | |
| [ | |
| "hu", | |
| "gging" | |
| ], | |
| [ | |
| "iz", | |
| "er" | |
| ], | |
| [ | |
| "ite", | |
| "cture" | |
| ], | |
| [ | |
| "sen", | |
| "ten" | |
| ], | |
| [ | |
| "scr", | |
| "atch" | |
| ], | |
| [ | |
| "arch", | |
| "itecture" | |
| ], | |
| [ | |
| "token", | |
| "izer" | |
| ], | |
| [ | |
| "buil", | |
| "ding" | |
| ], | |
| [ | |
| "senten", | |
| "ce" | |
| ] | |
| ] | |
| } | |
| } |