Skip to content

Bedrock titan tokenizer

BedrockTitanTokenizer

Bases: BaseTokenizer

Source code in griptape/griptape/tokenizers/bedrock_titan_tokenizer.py
@define(frozen=True)
class BedrockTitanTokenizer(BaseTokenizer):
    DEFAULT_MODEL = "amazon.titan-text-express-v1"
    DEFAULT_MAX_TOKENS = 4096

    DEFAULT_EMBEDDING_MODELS = "amazon.titan-embed-text-v1"

    session: boto3.Session = field(default=Factory(lambda: import_optional_dependency("boto3").Session()), kw_only=True)
    stop_sequences: list[str] = field(factory=list, kw_only=True)
    model: str = field(kw_only=True)
    bedrock_client: Any = field(
        default=Factory(lambda self: self.session.client("bedrock-runtime"), takes_self=True), kw_only=True
    )

    @property
    def max_tokens(self) -> int:
        return self.DEFAULT_MAX_TOKENS

    def count_tokens(self, text: str) -> int:
        payload = {"inputText": text}

        response = self.bedrock_client.invoke_model(
            body=json.dumps(payload), modelId=self.model, accept="application/json", contentType="application/json"
        )
        response_body = json.loads(response.get("body").read())

        return response_body["inputTextTokenCount"]

DEFAULT_EMBEDDING_MODELS = 'amazon.titan-embed-text-v1' class-attribute instance-attribute

DEFAULT_MAX_TOKENS = 4096 class-attribute instance-attribute

DEFAULT_MODEL = 'amazon.titan-text-express-v1' class-attribute instance-attribute

bedrock_client: Any = field(default=Factory(lambda : self.session.client('bedrock-runtime'), takes_self=True), kw_only=True) class-attribute instance-attribute

max_tokens: int property

model: str = field(kw_only=True) class-attribute instance-attribute

session: boto3.Session = field(default=Factory(lambda : import_optional_dependency('boto3').Session()), kw_only=True) class-attribute instance-attribute

stop_sequences: list[str] = field(factory=list, kw_only=True) class-attribute instance-attribute

count_tokens(text)

Source code in griptape/griptape/tokenizers/bedrock_titan_tokenizer.py
def count_tokens(self, text: str) -> int:
    payload = {"inputText": text}

    response = self.bedrock_client.invoke_model(
        body=json.dumps(payload), modelId=self.model, accept="application/json", contentType="application/json"
    )
    response_body = json.loads(response.get("body").read())

    return response_body["inputTextTokenCount"]