From 464436dd20ae242fcac78a07b3f70b6d971e38d2 Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Tue, 14 Jan 2025 11:02:23 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../chatchat/server/knowledge_base/utils.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py index 9ebc7f9..ab4cdd4 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py @@ -228,6 +228,7 @@ def make_text_splitter(splitter_name, chunk_size, chunk_overlap): """ 根据参数获取特定的分词器 """ + logger.info(f"make_text_splitter start....splitter_name:{splitter_name}") splitter_name = splitter_name or "SpacyTextSplitter" try: if ( @@ -523,16 +524,16 @@ def format_reference(kb_name: str, docs: List[Dict], api_base_url: str="") -> Li return source_documents -if __name__ == "__main__": - from pprint import pprint - - kb_file = KnowledgeFile( - filename="E:\\LLM\\Data\\Test.md", knowledge_base_name="samples" - ) - # kb_file.text_splitter_name = "RecursiveCharacterTextSplitter" - kb_file.text_splitter_name = "MarkdownHeaderTextSplitter" - docs = kb_file.file2docs() - # pprint(docs[-1]) - texts = kb_file.docs2texts(docs) - for text in texts: - print(text) +# if __name__ == "__main__": +# from pprint import pprint +# +# kb_file = KnowledgeFile( +# filename="E:\\LLM\\Data\\Test.md", knowledge_base_name="samples" +# ) +# # kb_file.text_splitter_name = "RecursiveCharacterTextSplitter" +# kb_file.text_splitter_name = "MarkdownHeaderTextSplitter" +# docs = kb_file.file2docs() +# # pprint(docs[-1]) +# texts = kb_file.docs2texts(docs) +# for text in texts: +# print(text)