Skip to content

Huggingface tokenizer

HuggingFaceTokenizer

Bases: BaseTokenizer

Source code in griptape/tokenizers/huggingface_tokenizer.py
@define()
class HuggingFaceTokenizer(BaseTokenizer):
    tokenizer: PreTrainedTokenizerBase = field(
        default=Factory(
            lambda self: import_optional_dependency("transformers").AutoTokenizer.from_pretrained(self.model),
            takes_self=True,
        ),
        kw_only=True,
    )
    max_input_tokens: int = field(
        default=Factory(lambda self: self.tokenizer.model_max_length, takes_self=True), kw_only=True
    )
    max_output_tokens: int = field(default=4096, kw_only=True)

    def count_tokens(self, text: str) -> int:
        return len(self.tokenizer.encode(text))

max_input_tokens: int = field(default=Factory(lambda self: self.tokenizer.model_max_length, takes_self=True), kw_only=True) class-attribute instance-attribute

max_output_tokens: int = field(default=4096, kw_only=True) class-attribute instance-attribute

tokenizer: PreTrainedTokenizerBase = field(default=Factory(lambda self: import_optional_dependency('transformers').AutoTokenizer.from_pretrained(self.model), takes_self=True), kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/tokenizers/huggingface_tokenizer.py
def count_tokens(self, text: str) -> int:
    return len(self.tokenizer.encode(text))