18 lines
784 B
Python
18 lines
784 B
Python
from langchain.text_splitter import TextSplitter, _split_text_with_regex
|
|
from typing import Any, List
|
|
|
|
|
|
class MyTextSplitter(TextSplitter):
|
|
"""Implementation of splitting text that looks at characters."""
|
|
|
|
def __init__(self, separator: str = "\n\n", **kwargs: Any) -> None:
|
|
"""Create a new TextSplitter."""
|
|
super().__init__(**kwargs)
|
|
self._separator = separator
|
|
|
|
def split_text(self, text: str) -> List[str]:
|
|
"""Split incoming text and return chunks."""
|
|
# First we naively split the large input into a bunch of smaller ones.
|
|
splits = _split_text_with_regex(text, self._separator, self._keep_separator)
|
|
_separator = "" if self._keep_separator else self._separator
|
|
return self._merge_splits(splits, _separator) |