增加标题增强文档功能
This commit is contained in:
parent
74f4f8174d
commit
700a7c7298
|
|
@ -377,6 +377,7 @@ class KnowledgeFile:
|
||||||
else:
|
else:
|
||||||
print("文件不存在")
|
print("文件不存在")
|
||||||
|
|
||||||
|
logger.info(f"********docs2texts")
|
||||||
docs = docs or self.file2docs(refresh=refresh)
|
docs = docs or self.file2docs(refresh=refresh)
|
||||||
#remove the redundant line break after loading, by weiweiwang 2025/1/13
|
#remove the redundant line break after loading, by weiweiwang 2025/1/13
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
|
|
@ -392,6 +393,8 @@ class KnowledgeFile:
|
||||||
chunk_size=chunk_size,
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=chunk_overlap,
|
chunk_overlap=chunk_overlap,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
logger.error(f"text_splitter is Not None, text_splitter_name: {self.text_splitter_name}")
|
||||||
if self.text_splitter_name == "MarkdownHeaderTextSplitter":
|
if self.text_splitter_name == "MarkdownHeaderTextSplitter":
|
||||||
docs = text_splitter.split_text(docs[0].page_content)
|
docs = text_splitter.split_text(docs[0].page_content)
|
||||||
else:
|
else:
|
||||||
|
|
@ -400,7 +403,7 @@ class KnowledgeFile:
|
||||||
if not docs:
|
if not docs:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
print(f"文档切分示例:{docs[0]}")
|
print(f"文档切分:{len(docs)} 块")
|
||||||
if zh_title_enhance:
|
if zh_title_enhance:
|
||||||
# docs = func_zh_title_enhance(docs)
|
# docs = func_zh_title_enhance(docs)
|
||||||
docs = zh_third_title_enhance(docs)
|
docs = zh_third_title_enhance(docs)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue