From c897a1722fcd98804b585dcd045c02a086225730 Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Mon, 7 Apr 2025 07:55:31 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=86=E7=9F=A5=E8=AF=86=E5=BA=93=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=88=86=E5=9D=97=E6=96=87=E4=BB=B6=E4=BF=9D=E5=AD=98?= =?UTF-8?q?=EF=BC=8C=E4=BD=86=E6=98=AF=E4=B8=8D=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../chatchat/server/knowledge_base/utils.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py index 0d42a2b..84f1c24 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py @@ -70,6 +70,9 @@ def list_files_from_folder(kb_name: str): for x in ["temp", "tmp", ".", "~$"]: if tail.startswith(x): return True + if "_source.txt" in tail.lower() or "_split.txt" in tail.lower(): + return True + return False def process_entry(entry): @@ -422,15 +425,15 @@ class KnowledgeFile: docs = zh_first_title_enhance(docs) docs = customize_zh_title_enhance(docs) - # i = 1 - # outputfile = file_name_without_extension + "_split.txt" - # # 打开文件以写入模式 - # with open(outputfile, 'w') as file: - # for doc in docs: - # #print(f"**********切分段{i}:{doc}") - # file.write(f"\n**********切分段{i}") - # file.write(doc.page_content) - # i = i+1 + i = 1 + outputfile = file_name_without_extension + "_split.txt" + # 打开文件以写入模式 + with open(outputfile, 'w') as file: + for doc in docs: + #print(f"**********切分段{i}:{doc}") + file.write(f"\n**********切分段{i}") + file.write(doc.page_content) + i = i+1 self.splited_docs = docs return self.splited_docs