增加日志

This commit is contained in:
weiweiw 2025-01-16 10:29:09 +08:00
parent 8c16afa66a
commit a38364a980
1 changed files with 17 additions and 16 deletions

View File

@ -147,43 +147,44 @@ def zh_third_title_enhance(docs: Document) -> Document:
#print(f"zh_third_title_enhance ....") #print(f"zh_third_title_enhance ....")
if len(docs) > 0: if len(docs) > 0:
for doc in docs: for doc in docs:
#print(f"zh_third_title_enhance: {doc}") print(f"zh_third_title_enhance: {doc}")
third_title = get_third_level_title(doc.page_content) third_title = get_third_level_title(doc.page_content)
if third_title: if third_title:
title = third_title title = third_title
#print(f"title: {title}") print(f"title: {title}")
elif title: elif title:
#print(f"title is not none") print(f"title is not none")
temp_fourth_content = is_fourth_level_content(doc.page_content) temp_fourth_content = is_fourth_level_content(doc.page_content)
if temp_fourth_content: if temp_fourth_content:
#print(f"is_fourth_level_content : {temp_fourth_content}") print(f"is_fourth_level_content : {temp_fourth_content}")
doc.page_content = f"{title} {doc.page_content}" doc.page_content = f"{title} {doc.page_content}"
else: else:
title = None title = None
#print(f"final title: {title}") print(f"final third title: {title}")
return docs return docs
else: else:
print("zh_third_title_enhance 文件不存在") print("zh_third_title_enhance 文件不存在")
#给三级被分开的内容 增加二级标题 #给三级被分开的内容 增加二级标题
def zh_second_title_enhance(docs: Document) -> Document: def zh_second_title_enhance(docs: Document) -> Document:
title = None current_title = None
if len(docs) > 0: if len(docs) > 0:
for doc in docs: for doc in docs:
logger.debug(f"zh_second_title_enhance: {doc}") print(f"zh_second_title_enhance: {doc}")
second_title = get_second_level_title(doc.page_content) second_title = get_second_level_title(doc.page_content)
if second_title: if second_title:
title = second_title current_title = second_title
logger.debug(f"title: {title}") logger.debug(f"title: {current_title}")
elif title: continue
#print(f"title is not none") if current_title:
print(f"title is not none")
temp_third_content = is_third_level_content(doc.page_content) temp_third_content = is_third_level_content(doc.page_content)
if temp_third_content: if temp_third_content:
#print(f"is_third_level_content : {temp_third_content}") print(f"is_third_level_content : {temp_third_content}")
doc.page_content = f"{title} {doc.page_content}" doc.page_content = f"{current_title} {doc.page_content}"
else: else:
title = None current_title = None
logger.debug(f"final title: {title}") print(f"final second title: {current_title}")
return docs return docs
else: else:
print("zh_second_title_enhance 文件不存在") print("zh_second_title_enhance 文件不存在")
@ -205,7 +206,7 @@ def zh_first_title_enhance(docs: Document) -> Document:
doc.page_content = f"{title} {doc.page_content}" doc.page_content = f"{title} {doc.page_content}"
else: else:
title = None title = None
logger.debug(f"final title: {title}") logger.debug(f"final first title: {title}")
return docs return docs
else: else:
print("zh_first_title_enhance 文件不存在") print("zh_first_title_enhance 文件不存在")