Skip to content

Base extraction engine

BaseExtractionEngine

Bases: ABC

Source code in griptape/griptape/engines/extraction/base_extraction_engine.py
@define
class BaseExtractionEngine(ABC):
    max_token_multiplier: float = field(default=0.5, kw_only=True)
    chunk_joiner: str = field(default="\n\n", kw_only=True)
    prompt_driver: BasePromptDriver = field(
        default=Factory(lambda: OpenAiChatPromptDriver(model=OpenAiTokenizer.DEFAULT_OPENAI_GPT_3_CHAT_MODEL)),
        kw_only=True,
    )
    chunker: BaseChunker = field(
        default=Factory(
            lambda self: TextChunker(tokenizer=self.prompt_driver.tokenizer, max_tokens=self.max_chunker_tokens),
            takes_self=True,
        ),
        kw_only=True,
    )

    @max_token_multiplier.validator  # pyright: ignore
    def validate_max_token_multiplier(self, _, max_token_multiplier: int) -> None:
        if max_token_multiplier > 1:
            raise ValueError("has to be less than or equal to 1")
        elif max_token_multiplier <= 0:
            raise ValueError("has to be greater than 0")

    @property
    def max_chunker_tokens(self) -> int:
        return round(self.prompt_driver.tokenizer.max_tokens * self.max_token_multiplier)

    @property
    def min_response_tokens(self) -> int:
        return round(
            self.prompt_driver.tokenizer.max_tokens
            - self.prompt_driver.tokenizer.max_tokens * self.max_token_multiplier
        )

    @abstractmethod
    def extract(self, text: str | ListArtifact, rulesets: list[Ruleset] | None = None, **kwargs) -> ListArtifact:
        ...

chunk_joiner: str = field(default='\n\n', kw_only=True) class-attribute instance-attribute

chunker: BaseChunker = field(default=Factory(lambda : TextChunker(tokenizer=self.prompt_driver.tokenizer, max_tokens=self.max_chunker_tokens), takes_self=True), kw_only=True) class-attribute instance-attribute

max_chunker_tokens: int property

max_token_multiplier: float = field(default=0.5, kw_only=True) class-attribute instance-attribute

min_response_tokens: int property

prompt_driver: BasePromptDriver = field(default=Factory(lambda : OpenAiChatPromptDriver(model=OpenAiTokenizer.DEFAULT_OPENAI_GPT_3_CHAT_MODEL)), kw_only=True) class-attribute instance-attribute

extract(text, rulesets=None, **kwargs) abstractmethod

Source code in griptape/griptape/engines/extraction/base_extraction_engine.py
@abstractmethod
def extract(self, text: str | ListArtifact, rulesets: list[Ruleset] | None = None, **kwargs) -> ListArtifact:
    ...

validate_max_token_multiplier(_, max_token_multiplier)

Source code in griptape/griptape/engines/extraction/base_extraction_engine.py
@max_token_multiplier.validator  # pyright: ignore
def validate_max_token_multiplier(self, _, max_token_multiplier: int) -> None:
    if max_token_multiplier > 1:
        raise ValueError("has to be less than or equal to 1")
    elif max_token_multiplier <= 0:
        raise ValueError("has to be greater than 0")