diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py index 0d42a2b..84f1c24 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py @@ -70,6 +70,9 @@ def list_files_from_folder(kb_name: str): for x in ["temp", "tmp", ".", "~$"]: if tail.startswith(x): return True + if "_source.txt" in tail.lower() or "_split.txt" in tail.lower(): + return True + return False def process_entry(entry): @@ -422,15 +425,15 @@ class KnowledgeFile: docs = zh_first_title_enhance(docs) docs = customize_zh_title_enhance(docs) - # i = 1 - # outputfile = file_name_without_extension + "_split.txt" - # # 打开文件以写入模式 - # with open(outputfile, 'w') as file: - # for doc in docs: - # #print(f"**********切分段{i}:{doc}") - # file.write(f"\n**********切分段{i}") - # file.write(doc.page_content) - # i = i+1 + i = 1 + outputfile = file_name_without_extension + "_split.txt" + # 打开文件以写入模式 + with open(outputfile, 'w') as file: + for doc in docs: + #print(f"**********切分段{i}:{doc}") + file.write(f"\n**********切分段{i}") + file.write(doc.page_content) + i = i+1 self.splited_docs = docs return self.splited_docs