Skip to content

Tokenizers

__all__ = ['BaseTokenizer', 'OpenAiTokenizer', 'CohereTokenizer', 'HuggingFaceTokenizer', 'AnthropicTokenizer', 'BedrockTitanTokenizer', 'BedrockJurassicTokenizer', 'BedrockClaudeTokenizer'] module-attribute

AnthropicTokenizer

Bases: BaseTokenizer

Source code in griptape/griptape/tokenizers/anthropic_tokenizer.py
@define(frozen=True)
class AnthropicTokenizer(BaseTokenizer):
    DEFAULT_MODEL = "claude-2"
    DEFAULT_MAX_TOKENS = 100000

    model: str = field(kw_only=True)

    @property
    def max_tokens(self) -> int:
        return self.DEFAULT_MAX_TOKENS

    def count_tokens(self, text: str) -> int:
        anthropic = import_optional_dependency("anthropic")

        return len(anthropic._client.sync_get_tokenizer().encode(text).ids)

DEFAULT_MAX_TOKENS = 100000 class-attribute instance-attribute

DEFAULT_MODEL = 'claude-2' class-attribute instance-attribute

max_tokens: int property

model: str = field(kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/griptape/tokenizers/anthropic_tokenizer.py
def count_tokens(self, text: str) -> int:
    anthropic = import_optional_dependency("anthropic")

    return len(anthropic._client.sync_get_tokenizer().encode(text).ids)

BaseTokenizer

Bases: ABC

Source code in griptape/griptape/tokenizers/base_tokenizer.py
@define(frozen=True)
class BaseTokenizer(ABC):
    stop_sequences: list[str] = field(default=Factory(lambda: [utils.constants.RESPONSE_STOP_SEQUENCE]), kw_only=True)

    @property
    @abstractmethod
    def max_tokens(self) -> int:
        ...

    def count_tokens_left(self, text: str | list) -> int:
        diff = self.max_tokens - self.count_tokens(text)

        if diff > 0:
            return diff
        else:
            return 0

    @abstractmethod
    def count_tokens(self, text: str | list) -> int:
        ...

max_tokens: int abstractmethod property

stop_sequences: list[str] = field(default=Factory(lambda : [utils.constants.RESPONSE_STOP_SEQUENCE]), kw_only=True) class-attribute instance-attribute

count_tokens(text) abstractmethod

Source code in griptape/griptape/tokenizers/base_tokenizer.py
@abstractmethod
def count_tokens(self, text: str | list) -> int:
    ...

count_tokens_left(text)

Source code in griptape/griptape/tokenizers/base_tokenizer.py
def count_tokens_left(self, text: str | list) -> int:
    diff = self.max_tokens - self.count_tokens(text)

    if diff > 0:
        return diff
    else:
        return 0

BedrockClaudeTokenizer

Bases: AnthropicTokenizer

Source code in griptape/griptape/tokenizers/bedrock_claude_tokenizer.py
5
6
7
8
@define(frozen=True)
class BedrockClaudeTokenizer(AnthropicTokenizer):
    DEFAULT_MODEL = "anthropic.claude-v2"
    DEFAULT_MAX_TOKENS = 8192

DEFAULT_MAX_TOKENS = 8192 class-attribute instance-attribute

DEFAULT_MODEL = 'anthropic.claude-v2' class-attribute instance-attribute

BedrockJurassicTokenizer

Bases: BaseTokenizer

Source code in griptape/griptape/tokenizers/bedrock_jurassic_tokenizer.py
@define(frozen=True)
class BedrockJurassicTokenizer(BaseTokenizer):
    DEFAULT_MODEL = "ai21.j2-ultra-v1"
    DEFAULT_MAX_TOKENS = 8192

    session: boto3.Session = field(default=Factory(lambda: import_optional_dependency("boto3").Session()), kw_only=True)
    model: str = field(kw_only=True)
    bedrock_client: Any = field(
        default=Factory(lambda self: self.session.client("bedrock-runtime"), takes_self=True), kw_only=True
    )

    @property
    def max_tokens(self) -> int:
        return self.DEFAULT_MAX_TOKENS

    def count_tokens(self, text: str) -> int:
        payload = {"prompt": text}

        response = self.bedrock_client.invoke_model(
            body=json.dumps(payload), modelId=self.model, accept="application/json", contentType="application/json"
        )
        response_body = json.loads(response.get("body").read())

        return len(response_body["prompt"]["tokens"])

DEFAULT_MAX_TOKENS = 8192 class-attribute instance-attribute

DEFAULT_MODEL = 'ai21.j2-ultra-v1' class-attribute instance-attribute

bedrock_client: Any = field(default=Factory(lambda : self.session.client('bedrock-runtime'), takes_self=True), kw_only=True) class-attribute instance-attribute

max_tokens: int property

model: str = field(kw_only=True) class-attribute instance-attribute

session: boto3.Session = field(default=Factory(lambda : import_optional_dependency('boto3').Session()), kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/griptape/tokenizers/bedrock_jurassic_tokenizer.py
def count_tokens(self, text: str) -> int:
    payload = {"prompt": text}

    response = self.bedrock_client.invoke_model(
        body=json.dumps(payload), modelId=self.model, accept="application/json", contentType="application/json"
    )
    response_body = json.loads(response.get("body").read())

    return len(response_body["prompt"]["tokens"])

BedrockTitanTokenizer

Bases: BaseTokenizer

Source code in griptape/griptape/tokenizers/bedrock_titan_tokenizer.py
@define(frozen=True)
class BedrockTitanTokenizer(BaseTokenizer):
    DEFAULT_MODEL = "amazon.titan-text-express-v1"
    DEFAULT_MAX_TOKENS = 4096

    DEFAULT_EMBEDDING_MODELS = "amazon.titan-embed-text-v1"

    session: boto3.Session = field(default=Factory(lambda: import_optional_dependency("boto3").Session()), kw_only=True)
    stop_sequences: list[str] = field(factory=list, kw_only=True)
    model: str = field(kw_only=True)
    bedrock_client: Any = field(
        default=Factory(lambda self: self.session.client("bedrock-runtime"), takes_self=True), kw_only=True
    )

    @property
    def max_tokens(self) -> int:
        return self.DEFAULT_MAX_TOKENS

    def count_tokens(self, text: str) -> int:
        payload = {"inputText": text}

        response = self.bedrock_client.invoke_model(
            body=json.dumps(payload), modelId=self.model, accept="application/json", contentType="application/json"
        )
        response_body = json.loads(response.get("body").read())

        return response_body["inputTextTokenCount"]

DEFAULT_EMBEDDING_MODELS = 'amazon.titan-embed-text-v1' class-attribute instance-attribute

DEFAULT_MAX_TOKENS = 4096 class-attribute instance-attribute

DEFAULT_MODEL = 'amazon.titan-text-express-v1' class-attribute instance-attribute

bedrock_client: Any = field(default=Factory(lambda : self.session.client('bedrock-runtime'), takes_self=True), kw_only=True) class-attribute instance-attribute

max_tokens: int property

model: str = field(kw_only=True) class-attribute instance-attribute

session: boto3.Session = field(default=Factory(lambda : import_optional_dependency('boto3').Session()), kw_only=True) class-attribute instance-attribute

stop_sequences: list[str] = field(factory=list, kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/griptape/tokenizers/bedrock_titan_tokenizer.py
def count_tokens(self, text: str) -> int:
    payload = {"inputText": text}

    response = self.bedrock_client.invoke_model(
        body=json.dumps(payload), modelId=self.model, accept="application/json", contentType="application/json"
    )
    response_body = json.loads(response.get("body").read())

    return response_body["inputTextTokenCount"]

CohereTokenizer

Bases: BaseTokenizer

Source code in griptape/griptape/tokenizers/cohere_tokenizer.py
@define(frozen=True)
class CohereTokenizer(BaseTokenizer):
    DEFAULT_MODEL = "command"
    MAX_TOKENS = 2048

    model: str = field(kw_only=True)
    client: Client = field(kw_only=True)

    @property
    def max_tokens(self) -> int:
        return self.MAX_TOKENS

    def count_tokens(self, text: str) -> int:
        return len(self.client.tokenize(text=text).tokens)

DEFAULT_MODEL = 'command' class-attribute instance-attribute

MAX_TOKENS = 2048 class-attribute instance-attribute

client: Client = field(kw_only=True) class-attribute instance-attribute

max_tokens: int property

model: str = field(kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/griptape/tokenizers/cohere_tokenizer.py
def count_tokens(self, text: str) -> int:
    return len(self.client.tokenize(text=text).tokens)

HuggingFaceTokenizer

Bases: BaseTokenizer

Source code in griptape/griptape/tokenizers/hugging_face_tokenizer.py
@define(frozen=True)
class HuggingFaceTokenizer(BaseTokenizer):
    tokenizer: PreTrainedTokenizerBase = field(kw_only=True)
    max_tokens: int = field(
        default=Factory(lambda self: self.tokenizer.model_max_length, takes_self=True), kw_only=True
    )

    def count_tokens(self, text: str) -> int:
        return len(self.tokenizer.encode(text))

max_tokens: int = field(default=Factory(lambda : self.tokenizer.model_max_length, takes_self=True), kw_only=True) class-attribute instance-attribute

tokenizer: PreTrainedTokenizerBase = field(kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/griptape/tokenizers/hugging_face_tokenizer.py
def count_tokens(self, text: str) -> int:
    return len(self.tokenizer.encode(text))

OpenAiTokenizer

Bases: BaseTokenizer

Source code in griptape/griptape/tokenizers/openai_tokenizer.py
@define(frozen=True)
class OpenAiTokenizer(BaseTokenizer):
    DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL = "text-davinci-003"
    DEFAULT_OPENAI_GPT_3_CHAT_MODEL = "gpt-3.5-turbo"
    DEFAULT_OPENAI_GPT_4_MODEL = "gpt-4"
    DEFAULT_ENCODING = "cl100k_base"
    DEFAULT_MAX_TOKENS = 2049
    TOKEN_OFFSET = 8

    # https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
    MODEL_PREFIXES_TO_MAX_TOKENS = {
        "gpt-4-1106": 128000,
        "gpt-4-32k": 32768,
        "gpt-4": 8192,
        "gpt-3.5-turbo-16k": 16384,
        "gpt-3.5-turbo": 4096,
        "gpt-35-turbo-16k": 16384,
        "gpt-35-turbo": 4096,
        "text-davinci-003": 4097,
        "text-davinci-002": 4097,
        "code-davinci-002": 8001,
        "text-embedding-ada-002": 8191,
        "text-embedding-ada-001": 2046,
    }

    EMBEDDING_MODELS = ["text-embedding-ada-002", "text-embedding-ada-001"]

    model: str = field(kw_only=True)

    @property
    def encoding(self) -> tiktoken.Encoding:
        try:
            return tiktoken.encoding_for_model(self.model)
        except KeyError:
            return tiktoken.get_encoding(self.DEFAULT_ENCODING)

    @property
    def max_tokens(self) -> int:
        tokens = next(v for k, v in self.MODEL_PREFIXES_TO_MAX_TOKENS.items() if self.model.startswith(k))
        offset = 0 if self.model in self.EMBEDDING_MODELS else self.TOKEN_OFFSET

        return (tokens if tokens else self.DEFAULT_MAX_TOKENS) - offset

    def count_tokens(self, text: str | list, model: Optional[str] = None) -> int:
        """
        Handles the special case of ChatML. Implementation adopted from the official OpenAI notebook:
        https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
        """
        if isinstance(text, list):
            model = model if model else self.model

            try:
                encoding = tiktoken.encoding_for_model(model)
            except KeyError:
                logging.warning("model not found. Using cl100k_base encoding.")

                encoding = tiktoken.get_encoding("cl100k_base")

            if model in {
                "gpt-3.5-turbo-0613",
                "gpt-3.5-turbo-16k-0613",
                "gpt-4-0314",
                "gpt-4-32k-0314",
                "gpt-4-0613",
                "gpt-4-32k-0613",
            }:
                tokens_per_message = 3
                tokens_per_name = 1
            elif model == "gpt-3.5-turbo-0301":
                # every message follows <|start|>{role/name}\n{content}<|end|>\n
                tokens_per_message = 4
                # if there's a name, the role is omitted
                tokens_per_name = -1
            elif "gpt-3.5-turbo" in model or "gpt-35-turbo" in model:
                logging.info("gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
                return self.count_tokens(text, model="gpt-3.5-turbo-0613")
            elif "gpt-4" in model:
                logging.info("gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
                return self.count_tokens(text, model="gpt-4-0613")
            else:
                raise NotImplementedError(
                    f"""token_count() is not implemented for model {model}. 
                    See https://github.com/openai/openai-python/blob/main/chatml.md for 
                    information on how messages are converted to tokens."""
                )

            num_tokens = 0

            for message in text:
                num_tokens += tokens_per_message
                for key, value in message.items():
                    num_tokens += len(encoding.encode(value))
                    if key == "name":
                        num_tokens += tokens_per_name

            # every reply is primed with <|start|>assistant<|message|>
            num_tokens += 3

            return num_tokens
        else:
            return len(self.encoding.encode(text, allowed_special=set(self.stop_sequences)))

DEFAULT_ENCODING = 'cl100k_base' class-attribute instance-attribute

DEFAULT_MAX_TOKENS = 2049 class-attribute instance-attribute

DEFAULT_OPENAI_GPT_3_CHAT_MODEL = 'gpt-3.5-turbo' class-attribute instance-attribute

DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL = 'text-davinci-003' class-attribute instance-attribute

DEFAULT_OPENAI_GPT_4_MODEL = 'gpt-4' class-attribute instance-attribute

EMBEDDING_MODELS = ['text-embedding-ada-002', 'text-embedding-ada-001'] class-attribute instance-attribute

MODEL_PREFIXES_TO_MAX_TOKENS = {'gpt-4-1106': 128000, 'gpt-4-32k': 32768, 'gpt-4': 8192, 'gpt-3.5-turbo-16k': 16384, 'gpt-3.5-turbo': 4096, 'gpt-35-turbo-16k': 16384, 'gpt-35-turbo': 4096, 'text-davinci-003': 4097, 'text-davinci-002': 4097, 'code-davinci-002': 8001, 'text-embedding-ada-002': 8191, 'text-embedding-ada-001': 2046} class-attribute instance-attribute

TOKEN_OFFSET = 8 class-attribute instance-attribute

encoding: tiktoken.Encoding property

max_tokens: int property

model: str = field(kw_only=True) class-attribute instance-attribute

count_tokens(text, model=None)

Handles the special case of ChatML. Implementation adopted from the official OpenAI notebook: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb

Source code in griptape/griptape/tokenizers/openai_tokenizer.py
def count_tokens(self, text: str | list, model: Optional[str] = None) -> int:
    """
    Handles the special case of ChatML. Implementation adopted from the official OpenAI notebook:
    https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
    """
    if isinstance(text, list):
        model = model if model else self.model

        try:
            encoding = tiktoken.encoding_for_model(model)
        except KeyError:
            logging.warning("model not found. Using cl100k_base encoding.")

            encoding = tiktoken.get_encoding("cl100k_base")

        if model in {
            "gpt-3.5-turbo-0613",
            "gpt-3.5-turbo-16k-0613",
            "gpt-4-0314",
            "gpt-4-32k-0314",
            "gpt-4-0613",
            "gpt-4-32k-0613",
        }:
            tokens_per_message = 3
            tokens_per_name = 1
        elif model == "gpt-3.5-turbo-0301":
            # every message follows <|start|>{role/name}\n{content}<|end|>\n
            tokens_per_message = 4
            # if there's a name, the role is omitted
            tokens_per_name = -1
        elif "gpt-3.5-turbo" in model or "gpt-35-turbo" in model:
            logging.info("gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
            return self.count_tokens(text, model="gpt-3.5-turbo-0613")
        elif "gpt-4" in model:
            logging.info("gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
            return self.count_tokens(text, model="gpt-4-0613")
        else:
            raise NotImplementedError(
                f"""token_count() is not implemented for model {model}. 
                See https://github.com/openai/openai-python/blob/main/chatml.md for 
                information on how messages are converted to tokens."""
            )

        num_tokens = 0

        for message in text:
            num_tokens += tokens_per_message
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":
                    num_tokens += tokens_per_name

        # every reply is primed with <|start|>assistant<|message|>
        num_tokens += 3

        return num_tokens
    else:
        return len(self.encoding.encode(text, allowed_special=set(self.stop_sequences)))