增加日志
This commit is contained in:
parent
8c16afa66a
commit
a38364a980
|
|
@ -147,43 +147,44 @@ def zh_third_title_enhance(docs: Document) -> Document:
|
||||||
#print(f"zh_third_title_enhance ....")
|
#print(f"zh_third_title_enhance ....")
|
||||||
if len(docs) > 0:
|
if len(docs) > 0:
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
#print(f"zh_third_title_enhance: {doc}")
|
print(f"zh_third_title_enhance: {doc}")
|
||||||
third_title = get_third_level_title(doc.page_content)
|
third_title = get_third_level_title(doc.page_content)
|
||||||
if third_title:
|
if third_title:
|
||||||
title = third_title
|
title = third_title
|
||||||
#print(f"title: {title}")
|
print(f"title: {title}")
|
||||||
elif title:
|
elif title:
|
||||||
#print(f"title is not none")
|
print(f"title is not none")
|
||||||
temp_fourth_content = is_fourth_level_content(doc.page_content)
|
temp_fourth_content = is_fourth_level_content(doc.page_content)
|
||||||
if temp_fourth_content:
|
if temp_fourth_content:
|
||||||
#print(f"is_fourth_level_content : {temp_fourth_content}")
|
print(f"is_fourth_level_content : {temp_fourth_content}")
|
||||||
doc.page_content = f"{title} {doc.page_content}"
|
doc.page_content = f"{title} {doc.page_content}"
|
||||||
else:
|
else:
|
||||||
title = None
|
title = None
|
||||||
#print(f"final title: {title}")
|
print(f"final third title: {title}")
|
||||||
return docs
|
return docs
|
||||||
else:
|
else:
|
||||||
print("zh_third_title_enhance 文件不存在")
|
print("zh_third_title_enhance 文件不存在")
|
||||||
|
|
||||||
#给三级被分开的内容 增加二级标题
|
#给三级被分开的内容 增加二级标题
|
||||||
def zh_second_title_enhance(docs: Document) -> Document:
|
def zh_second_title_enhance(docs: Document) -> Document:
|
||||||
title = None
|
current_title = None
|
||||||
if len(docs) > 0:
|
if len(docs) > 0:
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
logger.debug(f"zh_second_title_enhance: {doc}")
|
print(f"zh_second_title_enhance: {doc}")
|
||||||
second_title = get_second_level_title(doc.page_content)
|
second_title = get_second_level_title(doc.page_content)
|
||||||
if second_title:
|
if second_title:
|
||||||
title = second_title
|
current_title = second_title
|
||||||
logger.debug(f"title: {title}")
|
logger.debug(f"title: {current_title}")
|
||||||
elif title:
|
continue
|
||||||
#print(f"title is not none")
|
if current_title:
|
||||||
|
print(f"title is not none")
|
||||||
temp_third_content = is_third_level_content(doc.page_content)
|
temp_third_content = is_third_level_content(doc.page_content)
|
||||||
if temp_third_content:
|
if temp_third_content:
|
||||||
#print(f"is_third_level_content : {temp_third_content}")
|
print(f"is_third_level_content : {temp_third_content}")
|
||||||
doc.page_content = f"{title} {doc.page_content}"
|
doc.page_content = f"{current_title} {doc.page_content}"
|
||||||
else:
|
else:
|
||||||
title = None
|
current_title = None
|
||||||
logger.debug(f"final title: {title}")
|
print(f"final second title: {current_title}")
|
||||||
return docs
|
return docs
|
||||||
else:
|
else:
|
||||||
print("zh_second_title_enhance 文件不存在")
|
print("zh_second_title_enhance 文件不存在")
|
||||||
|
|
@ -205,7 +206,7 @@ def zh_first_title_enhance(docs: Document) -> Document:
|
||||||
doc.page_content = f"{title} {doc.page_content}"
|
doc.page_content = f"{title} {doc.page_content}"
|
||||||
else:
|
else:
|
||||||
title = None
|
title = None
|
||||||
logger.debug(f"final title: {title}")
|
logger.debug(f"final first title: {title}")
|
||||||
return docs
|
return docs
|
||||||
else:
|
else:
|
||||||
print("zh_first_title_enhance 文件不存在")
|
print("zh_first_title_enhance 文件不存在")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue