{
  "version": "1.0",
  "truncation": null,
  "padding": null,
  "added_tokens": [
    {
      "id": 0,
      "content": "[UNK]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 1,
      "content": "[PAD]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 2,
      "content": "[CLS]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 3,
      "content": "[SEP]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 4,
      "content": "[MASK]",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    }
  ],
  "normalizer": {
    "type": "Lowercase"
  },
  "pre_tokenizer": {
    "type": "Whitespace"
  },
  "post_processor": {
    "type": "TemplateProcessing",
    "single": [
      {
        "Sequence": {
          "id": "A",
          "type_id": 0
        }
      }
    ],
    "pair": [
      {
        "Sequence": {
          "id": "A",
          "type_id": 0
        }
      },
      {
        "Sequence": {
          "id": "B",
          "type_id": 1
        }
      }
    ],
    "special_tokens": {}
  },
  "decoder": null,
  "model": {
    "type": "BPE",
    "dropout": null,
    "unk_token": "[UNK]",
    "continuing_subword_prefix": null,
    "end_of_word_suffix": null,
    "fuse_unk": false,
    "byte_fallback": false,
    "ignore_merges": false,
    "vocab": {
      "[UNK]": 0,
      "[PAD]": 1,
      "[CLS]": 2,
      "[SEP]": 3,
      "[MASK]": 4,
      "a": 5,
      "b": 6,
      "c": 7,
      "d": 8,
      "e": 9,
      "f": 10,
      "g": 11,
      "h": 12,
      "i": 13,
      "k": 14,
      "l": 15,
      "m": 16,
      "n": 17,
      "o": 18,
      "r": 19,
      "s": 20,
      "t": 21,
      "u": 22,
      "v": 23,
      "z": 24,
      "en": 25,
      "is": 26,
      "st": 27,
      "te": 28,
      "ar": 29,
      "ce": 30,
      "ch": 31,
      "ct": 32,
      "er": 33,
      "in": 34,
      "om": 35,
      "to": 36,
      "ing": 37,
      "ab": 38,
      "an": 39,
      "at": 40,
      "ate": 41,
      "ace": 42,
      "bu": 43,
      "cl": 44,
      "cr": 45,
      "cu": 46,
      "cab": 47,
      "ding": 48,
      "ear": 49,
      "fr": 50,
      "face": 51,
      "gg": 52,
      "gen": 53,
      "hu": 54,
      "his": 55,
      "il": 56,
      "iz": 57,
      "ite": 58,
      "ict": 59,
      "ken": 60,
      "ocab": 61,
      "re": 62,
      "rict": 63,
      "sen": 64,
      "scr": 65,
      "ten": 66,
      "this": 67,
      "ure": 68,
      "vocab": 69,
      "stom": 70,
      "strict": 71,
      "test": 72,
      "arch": 73,
      "cture": 74,
      "erate": 75,
      "token": 76,
      "and": 77,
      "atch": 78,
      "buil": 79,
      "clear": 80,
      "custom": 81,
      "from": 82,
      "gging": 83,
      "generate": 84,
      "hugging": 85,
      "izer": 86,
      "itecture": 87,
      "senten": 88,
      "scratch": 89,
      "architecture": 90,
      "tokenizer": 91,
      "building": 92,
      "sentence": 93
    },
    "merges": [
      [
        "e",
        "n"
      ],
      [
        "i",
        "s"
      ],
      [
        "s",
        "t"
      ],
      [
        "t",
        "e"
      ],
      [
        "a",
        "r"
      ],
      [
        "c",
        "e"
      ],
      [
        "c",
        "h"
      ],
      [
        "c",
        "t"
      ],
      [
        "e",
        "r"
      ],
      [
        "i",
        "n"
      ],
      [
        "o",
        "m"
      ],
      [
        "t",
        "o"
      ],
      [
        "in",
        "g"
      ],
      [
        "a",
        "b"
      ],
      [
        "a",
        "n"
      ],
      [
        "a",
        "t"
      ],
      [
        "a",
        "te"
      ],
      [
        "a",
        "ce"
      ],
      [
        "b",
        "u"
      ],
      [
        "c",
        "l"
      ],
      [
        "c",
        "r"
      ],
      [
        "c",
        "u"
      ],
      [
        "c",
        "ab"
      ],
      [
        "d",
        "ing"
      ],
      [
        "e",
        "ar"
      ],
      [
        "f",
        "r"
      ],
      [
        "f",
        "ace"
      ],
      [
        "g",
        "g"
      ],
      [
        "g",
        "en"
      ],
      [
        "h",
        "u"
      ],
      [
        "h",
        "is"
      ],
      [
        "i",
        "l"
      ],
      [
        "i",
        "z"
      ],
      [
        "i",
        "te"
      ],
      [
        "i",
        "ct"
      ],
      [
        "k",
        "en"
      ],
      [
        "o",
        "cab"
      ],
      [
        "r",
        "e"
      ],
      [
        "r",
        "ict"
      ],
      [
        "s",
        "en"
      ],
      [
        "s",
        "cr"
      ],
      [
        "t",
        "en"
      ],
      [
        "t",
        "his"
      ],
      [
        "u",
        "re"
      ],
      [
        "v",
        "ocab"
      ],
      [
        "st",
        "om"
      ],
      [
        "st",
        "rict"
      ],
      [
        "te",
        "st"
      ],
      [
        "ar",
        "ch"
      ],
      [
        "ct",
        "ure"
      ],
      [
        "er",
        "ate"
      ],
      [
        "to",
        "ken"
      ],
      [
        "an",
        "d"
      ],
      [
        "at",
        "ch"
      ],
      [
        "bu",
        "il"
      ],
      [
        "cl",
        "ear"
      ],
      [
        "cu",
        "stom"
      ],
      [
        "fr",
        "om"
      ],
      [
        "gg",
        "ing"
      ],
      [
        "gen",
        "erate"
      ],
      [
        "hu",
        "gging"
      ],
      [
        "iz",
        "er"
      ],
      [
        "ite",
        "cture"
      ],
      [
        "sen",
        "ten"
      ],
      [
        "scr",
        "atch"
      ],
      [
        "arch",
        "itecture"
      ],
      [
        "token",
        "izer"
      ],
      [
        "buil",
        "ding"
      ],
      [
        "senten",
        "ce"
      ]
    ]
  }
}