增加标题增强文档功能

This commit is contained in:
weiweiw 2025-01-13 16:11:37 +08:00
parent 74f4f8174d
commit 700a7c7298
1 changed files with 4 additions and 1 deletions

View File

@ -377,6 +377,7 @@ class KnowledgeFile:
else:
print("文件不存在")
logger.info(f"********docs2texts")
docs = docs or self.file2docs(refresh=refresh)
#remove the redundant line break after loading, by weiweiwang 2025/1/13
for doc in docs:
@ -392,6 +393,8 @@ class KnowledgeFile:
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
else:
logger.error(f"text_splitter is Not None, text_splitter_name: {self.text_splitter_name}")
if self.text_splitter_name == "MarkdownHeaderTextSplitter":
docs = text_splitter.split_text(docs[0].page_content)
else:
@ -400,7 +403,7 @@ class KnowledgeFile:
if not docs:
return []
print(f"文档切分示例:{docs[0]}")
print(f"文档切分{len(docs)}")
if zh_title_enhance:
# docs = func_zh_title_enhance(docs)
docs = zh_third_title_enhance(docs)