Chunkers
__all__ = ['ChunkSeparator', 'BaseChunker', 'TextChunker', 'PdfChunker', 'MarkdownChunker']
module-attribute
BaseChunker
Bases: ABC
Source code in griptape/chunkers/base_chunker.py
DEFAULT_SEPARATORS = [ChunkSeparator(' ')]
class-attribute
instance-attribute
max_tokens: int = field(default=Factory(lambda self: self.tokenizer.max_input_tokens, takes_self=True), kw_only=True)
class-attribute
instance-attribute
separators: list[ChunkSeparator] = field(default=Factory(lambda self: self.DEFAULT_SEPARATORS, takes_self=True), kw_only=True)
class-attribute
instance-attribute
tokenizer: BaseTokenizer = field(default=Factory(lambda: OpenAiTokenizer(model=OpenAiTokenizer.DEFAULT_OPENAI_GPT_3_CHAT_MODEL)), kw_only=True)
class-attribute
instance-attribute
ChunkSeparator
dataclass
MarkdownChunker
Bases: BaseChunker
Source code in griptape/chunkers/markdown_chunker.py
DEFAULT_SEPARATORS = [ChunkSeparator('##', is_prefix=True), ChunkSeparator('###', is_prefix=True), ChunkSeparator('####', is_prefix=True), ChunkSeparator('#####', is_prefix=True), ChunkSeparator('######', is_prefix=True), ChunkSeparator('\n\n'), ChunkSeparator('. '), ChunkSeparator('! '), ChunkSeparator('? '), ChunkSeparator(' ')]
class-attribute
instance-attribute
PdfChunker
Bases: BaseChunker
Source code in griptape/chunkers/pdf_chunker.py
DEFAULT_SEPARATORS = [ChunkSeparator('\n\n'), ChunkSeparator('. '), ChunkSeparator('! '), ChunkSeparator('? '), ChunkSeparator(' ')]
class-attribute
instance-attribute
TextChunker
Bases: BaseChunker