增加标题增强文档功能
This commit is contained in:
parent
74f4f8174d
commit
700a7c7298
|
|
@ -377,6 +377,7 @@ class KnowledgeFile:
|
|||
else:
|
||||
print("文件不存在")
|
||||
|
||||
logger.info(f"********docs2texts")
|
||||
docs = docs or self.file2docs(refresh=refresh)
|
||||
#remove the redundant line break after loading, by weiweiwang 2025/1/13
|
||||
for doc in docs:
|
||||
|
|
@ -392,6 +393,8 @@ class KnowledgeFile:
|
|||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap,
|
||||
)
|
||||
else:
|
||||
logger.error(f"text_splitter is Not None, text_splitter_name: {self.text_splitter_name}")
|
||||
if self.text_splitter_name == "MarkdownHeaderTextSplitter":
|
||||
docs = text_splitter.split_text(docs[0].page_content)
|
||||
else:
|
||||
|
|
@ -400,7 +403,7 @@ class KnowledgeFile:
|
|||
if not docs:
|
||||
return []
|
||||
|
||||
print(f"文档切分示例:{docs[0]}")
|
||||
print(f"文档切分:{len(docs)} 块")
|
||||
if zh_title_enhance:
|
||||
# docs = func_zh_title_enhance(docs)
|
||||
docs = zh_third_title_enhance(docs)
|
||||
|
|
|
|||
Loading…
Reference in New Issue