Skip to content

Bedrock titan tokenizer

BedrockTitanTokenizer

Bases: SimpleTokenizer

Source code in griptape/griptape/tokenizers/bedrock_titan_tokenizer.py
@define(frozen=True)
class BedrockTitanTokenizer(SimpleTokenizer):
    DEFAULT_MODEL = "amazon.titan-text-express-v1"
    DEFAULT_CHARACTERS_PER_TOKEN = 6  # https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html#model-customization-prepare-finetuning
    DEFAULT_MAX_TOKENS = 4096

    characters_per_token: int = field(default=DEFAULT_CHARACTERS_PER_TOKEN, kw_only=True)
    max_tokens: int = field(default=DEFAULT_MAX_TOKENS, kw_only=True)
    stop_sequences: list[str] = field(default=Factory(lambda: ["User:"]), kw_only=True)
    model: str = field(kw_only=True)

DEFAULT_CHARACTERS_PER_TOKEN = 6 class-attribute instance-attribute

DEFAULT_MAX_TOKENS = 4096 class-attribute instance-attribute

DEFAULT_MODEL = 'amazon.titan-text-express-v1' class-attribute instance-attribute

characters_per_token: int = field(default=DEFAULT_CHARACTERS_PER_TOKEN, kw_only=True) class-attribute instance-attribute

max_tokens: int = field(default=DEFAULT_MAX_TOKENS, kw_only=True) class-attribute instance-attribute

model: str = field(kw_only=True) class-attribute instance-attribute

stop_sequences: list[str] = field(default=Factory(lambda : ['User:']), kw_only=True) class-attribute instance-attribute