Skip to content

Hugging face tokenizer

HuggingFaceTokenizer

Bases: BaseTokenizer

Source code in griptape/griptape/tokenizers/hugging_face_tokenizer.py
@define(frozen=True)
class HuggingFaceTokenizer(BaseTokenizer):
    tokenizer: PreTrainedTokenizerBase = field(kw_only=True)
    max_tokens: int = field(
        default=Factory(lambda self: self.tokenizer.model_max_length, takes_self=True), kw_only=True
    )

    def count_tokens(self, text: str) -> int:
        return len(self.tokenizer.encode(text))

max_tokens: int = field(default=Factory(lambda : self.tokenizer.model_max_length, takes_self=True), kw_only=True) class-attribute instance-attribute

tokenizer: PreTrainedTokenizerBase = field(kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/griptape/tokenizers/hugging_face_tokenizer.py
def count_tokens(self, text: str) -> int:
    return len(self.tokenizer.encode(text))