diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py index 9ebc7f9..ab4cdd4 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py @@ -228,6 +228,7 @@ def make_text_splitter(splitter_name, chunk_size, chunk_overlap): """ 根据参数获取特定的分词器 """ + logger.info(f"make_text_splitter start....splitter_name:{splitter_name}") splitter_name = splitter_name or "SpacyTextSplitter" try: if ( @@ -523,16 +524,16 @@ def format_reference(kb_name: str, docs: List[Dict], api_base_url: str="") -> Li return source_documents -if __name__ == "__main__": - from pprint import pprint - - kb_file = KnowledgeFile( - filename="E:\\LLM\\Data\\Test.md", knowledge_base_name="samples" - ) - # kb_file.text_splitter_name = "RecursiveCharacterTextSplitter" - kb_file.text_splitter_name = "MarkdownHeaderTextSplitter" - docs = kb_file.file2docs() - # pprint(docs[-1]) - texts = kb_file.docs2texts(docs) - for text in texts: - print(text) +# if __name__ == "__main__": +# from pprint import pprint +# +# kb_file = KnowledgeFile( +# filename="E:\\LLM\\Data\\Test.md", knowledge_base_name="samples" +# ) +# # kb_file.text_splitter_name = "RecursiveCharacterTextSplitter" +# kb_file.text_splitter_name = "MarkdownHeaderTextSplitter" +# docs = kb_file.file2docs() +# # pprint(docs[-1]) +# texts = kb_file.docs2texts(docs) +# for text in texts: +# print(text)