chunkers
__all__ = ['ChunkSeparator', 'BaseChunker', 'TextChunker', 'PdfChunker', 'MarkdownChunker']
module-attribute
BaseChunker
Bases: ABC
Source code in griptape/chunkers/base_chunker.py
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
|
DEFAULT_SEPARATORS = [ChunkSeparator(' ')]
class-attribute
instance-attribute
max_tokens: int = field(default=Factory(lambda self: self.tokenizer.max_input_tokens, takes_self=True), kw_only=True)
class-attribute
instance-attribute
separators: list[ChunkSeparator] = field(default=Factory(lambda self: self.DEFAULT_SEPARATORS, takes_self=True), kw_only=True)
class-attribute
instance-attribute
tokenizer: BaseTokenizer = field(default=Factory(lambda: OpenAiTokenizer(model=OpenAiTokenizer.DEFAULT_OPENAI_GPT_3_CHAT_MODEL)), kw_only=True)
class-attribute
instance-attribute
__find_midpoint_index(subchunks, half_token_count)
Source code in griptape/chunkers/base_chunker.py
__get_subchunks(separator, subchunks, balance_index)
Source code in griptape/chunkers/base_chunker.py
chunk(text)
Source code in griptape/chunkers/base_chunker.py
validate_max_tokens(_, max_tokens)
ChunkSeparator
dataclass
MarkdownChunker
Bases: BaseChunker
Source code in griptape/chunkers/markdown_chunker.py
DEFAULT_SEPARATORS = [ChunkSeparator('##', is_prefix=True), ChunkSeparator('###', is_prefix=True), ChunkSeparator('####', is_prefix=True), ChunkSeparator('#####', is_prefix=True), ChunkSeparator('######', is_prefix=True), ChunkSeparator('\n\n'), ChunkSeparator('. '), ChunkSeparator('! '), ChunkSeparator('? '), ChunkSeparator(' ')]
class-attribute
instance-attribute
PdfChunker
Bases: BaseChunker
Source code in griptape/chunkers/pdf_chunker.py
DEFAULT_SEPARATORS = [ChunkSeparator('\n\n'), ChunkSeparator('. '), ChunkSeparator('! '), ChunkSeparator('? '), ChunkSeparator(' ')]
class-attribute
instance-attribute
TextChunker
Bases: BaseChunker