Skip to content

Google tokenizer

GoogleTokenizer

Bases: BaseTokenizer

Source code in griptape/tokenizers/google_tokenizer.py
@define()
class GoogleTokenizer(BaseTokenizer):
    MODEL_PREFIXES_TO_MAX_INPUT_TOKENS = {"gemini": 30720}
    MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS = {"gemini": 2048}

    api_key: str = field(kw_only=True, metadata={"serializable": True})
    model_client: GenerativeModel = field(
        default=Factory(lambda self: self._default_model_client(), takes_self=True), kw_only=True
    )

    def count_tokens(self, text: str | list) -> int:
        if isinstance(text, str) or isinstance(text, list):
            return self.model_client.count_tokens(text).total_tokens
        else:
            raise ValueError("Text must be a string or a list.")

    def _default_model_client(self) -> GenerativeModel:
        genai = import_optional_dependency("google.generativeai")
        genai.configure(api_key=self.api_key)

        return genai.GenerativeModel(self.model)

MODEL_PREFIXES_TO_MAX_INPUT_TOKENS = {'gemini': 30720} class-attribute instance-attribute

MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS = {'gemini': 2048} class-attribute instance-attribute

api_key: str = field(kw_only=True, metadata={'serializable': True}) class-attribute instance-attribute

model_client: GenerativeModel = field(default=Factory(lambda self: self._default_model_client(), takes_self=True), kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/tokenizers/google_tokenizer.py
def count_tokens(self, text: str | list) -> int:
    if isinstance(text, str) or isinstance(text, list):
        return self.model_client.count_tokens(text).total_tokens
    else:
        raise ValueError("Text must be a string or a list.")