update text splitter unit test name
This commit is contained in:
parent
f75812afcb
commit
dc413120e2
|
|
@ -13,8 +13,11 @@ import langchain.document_loaders
|
||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
|
|
||||||
def test_different_splitter(splitter_name, chunk_size: int = CHUNK_SIZE,
|
def test_different_splitter(
|
||||||
chunk_overlap: int = OVERLAP_SIZE, ):
|
splitter_name,
|
||||||
|
chunk_size: int = CHUNK_SIZE,
|
||||||
|
chunk_overlap: int = OVERLAP_SIZE,
|
||||||
|
):
|
||||||
if splitter_name == "MarkdownHeaderTextSplitter": # MarkdownHeaderTextSplitter特殊判定
|
if splitter_name == "MarkdownHeaderTextSplitter": # MarkdownHeaderTextSplitter特殊判定
|
||||||
headers_to_split_on = text_splitter_dict[splitter_name]['headers_to_split_on']
|
headers_to_split_on = text_splitter_dict[splitter_name]['headers_to_split_on']
|
||||||
text_splitter = langchain.text_splitter.MarkdownHeaderTextSplitter(
|
text_splitter = langchain.text_splitter.MarkdownHeaderTextSplitter(
|
||||||
|
|
@ -72,7 +75,7 @@ if __name__ == "__main__":
|
||||||
filepath = "../../knowledge_base/samples/content/test.txt"
|
filepath = "../../knowledge_base/samples/content/test.txt"
|
||||||
loader = document_loaders.UnstructuredFileLoader(filepath, autodetect_encoding=True)
|
loader = document_loaders.UnstructuredFileLoader(filepath, autodetect_encoding=True)
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
text_splitter = text_different_splitter(TEXT_SPLITTER_NAME, CHUNK_SIZE, OVERLAP_SIZE)
|
text_splitter = test_different_splitter(TEXT_SPLITTER_NAME, CHUNK_SIZE, OVERLAP_SIZE)
|
||||||
# 使用text_splitter进行分词
|
# 使用text_splitter进行分词
|
||||||
|
|
||||||
if TEXT_SPLITTER_NAME == "MarkdownHeaderTextSplitter":
|
if TEXT_SPLITTER_NAME == "MarkdownHeaderTextSplitter":
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue