增加日志

This commit is contained in:
weiweiw 2025-01-16 10:29:09 +08:00
parent 8c16afa66a
commit a38364a980
1 changed files with 17 additions and 16 deletions

View File

@ -147,43 +147,44 @@ def zh_third_title_enhance(docs: Document) -> Document:
#print(f"zh_third_title_enhance ....")
if len(docs) > 0:
for doc in docs:
#print(f"zh_third_title_enhance: {doc}")
print(f"zh_third_title_enhance: {doc}")
third_title = get_third_level_title(doc.page_content)
if third_title:
title = third_title
#print(f"title: {title}")
print(f"title: {title}")
elif title:
#print(f"title is not none")
print(f"title is not none")
temp_fourth_content = is_fourth_level_content(doc.page_content)
if temp_fourth_content:
#print(f"is_fourth_level_content : {temp_fourth_content}")
print(f"is_fourth_level_content : {temp_fourth_content}")
doc.page_content = f"{title} {doc.page_content}"
else:
title = None
#print(f"final title: {title}")
print(f"final third title: {title}")
return docs
else:
print("zh_third_title_enhance 文件不存在")
#给三级被分开的内容 增加二级标题
def zh_second_title_enhance(docs: Document) -> Document:
title = None
current_title = None
if len(docs) > 0:
for doc in docs:
logger.debug(f"zh_second_title_enhance: {doc}")
print(f"zh_second_title_enhance: {doc}")
second_title = get_second_level_title(doc.page_content)
if second_title:
title = second_title
logger.debug(f"title: {title}")
elif title:
#print(f"title is not none")
current_title = second_title
logger.debug(f"title: {current_title}")
continue
if current_title:
print(f"title is not none")
temp_third_content = is_third_level_content(doc.page_content)
if temp_third_content:
#print(f"is_third_level_content : {temp_third_content}")
doc.page_content = f"{title} {doc.page_content}"
print(f"is_third_level_content : {temp_third_content}")
doc.page_content = f"{current_title} {doc.page_content}"
else:
title = None
logger.debug(f"final title: {title}")
current_title = None
print(f"final second title: {current_title}")
return docs
else:
print("zh_second_title_enhance 文件不存在")
@ -205,7 +206,7 @@ def zh_first_title_enhance(docs: Document) -> Document:
doc.page_content = f"{title} {doc.page_content}"
else:
title = None
logger.debug(f"final title: {title}")
logger.debug(f"final first title: {title}")
return docs
else:
print("zh_first_title_enhance 文件不存在")