From 700a7c7298b3623c1fc3fc3ca8cd44ecd67358d4 Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Mon, 13 Jan 2025 16:11:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=A0=87=E9=A2=98=E5=A2=9E?= =?UTF-8?q?=E5=BC=BA=E6=96=87=E6=A1=A3=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/chatchat-server/chatchat/server/knowledge_base/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py index b76fc8d..80c90e2 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py @@ -377,6 +377,7 @@ class KnowledgeFile: else: print("文件不存在") + logger.info(f"********docs2texts") docs = docs or self.file2docs(refresh=refresh) #remove the redundant line break after loading, by weiweiwang 2025/1/13 for doc in docs: @@ -392,6 +393,8 @@ class KnowledgeFile: chunk_size=chunk_size, chunk_overlap=chunk_overlap, ) + else: + logger.error(f"text_splitter is Not None, text_splitter_name: {self.text_splitter_name}") if self.text_splitter_name == "MarkdownHeaderTextSplitter": docs = text_splitter.split_text(docs[0].page_content) else: @@ -400,7 +403,7 @@ class KnowledgeFile: if not docs: return [] - print(f"文档切分示例:{docs[0]}") + print(f"文档切分:{len(docs)} 块") if zh_title_enhance: # docs = func_zh_title_enhance(docs) docs = zh_third_title_enhance(docs)