Skip to content

Bedrock jurassic tokenizer

BedrockJurassicTokenizer

Bases: SimpleTokenizer

Source code in griptape/tokenizers/bedrock_jurassic_tokenizer.py
@define()
class BedrockJurassicTokenizer(SimpleTokenizer):
    DEFAULT_CHARACTERS_PER_TOKEN = 6  # https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html#model-customization-prepare-finetuning
    MODEL_PREFIXES_TO_MAX_INPUT_TOKENS = {"ai21": 8192}
    MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS = {
        "ai21.j2-mid-v1": 8191,
        "ai21.j2-ultra-v1": 8191,
        "ai21.j2-large-v1": 8191,
        "ai21": 2048,
    }

    model: str = field(kw_only=True)
    characters_per_token: int = field(
        default=Factory(lambda self: self.DEFAULT_CHARACTERS_PER_TOKEN, takes_self=True), kw_only=True
    )

DEFAULT_CHARACTERS_PER_TOKEN = 6 class-attribute instance-attribute

MODEL_PREFIXES_TO_MAX_INPUT_TOKENS = {'ai21': 8192} class-attribute instance-attribute

MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS = {'ai21.j2-mid-v1': 8191, 'ai21.j2-ultra-v1': 8191, 'ai21.j2-large-v1': 8191, 'ai21': 2048} class-attribute instance-attribute

characters_per_token: int = field(default=Factory(lambda self: self.DEFAULT_CHARACTERS_PER_TOKEN, takes_self=True), kw_only=True) class-attribute instance-attribute

model: str = field(kw_only=True) class-attribute instance-attribute