diff --git a/libs/chatchat-server/chatchat/server/file_rag/text_splitter/customer_zh_title_enhance.py b/libs/chatchat-server/chatchat/server/file_rag/text_splitter/customer_zh_title_enhance.py index 9cb447c..c3462a1 100644 --- a/libs/chatchat-server/chatchat/server/file_rag/text_splitter/customer_zh_title_enhance.py +++ b/libs/chatchat-server/chatchat/server/file_rag/text_splitter/customer_zh_title_enhance.py @@ -147,43 +147,44 @@ def zh_third_title_enhance(docs: Document) -> Document: #print(f"zh_third_title_enhance ....") if len(docs) > 0: for doc in docs: - #print(f"zh_third_title_enhance: {doc}") + print(f"zh_third_title_enhance: {doc}") third_title = get_third_level_title(doc.page_content) if third_title: title = third_title - #print(f"title: {title}") + print(f"title: {title}") elif title: - #print(f"title is not none") + print(f"title is not none") temp_fourth_content = is_fourth_level_content(doc.page_content) if temp_fourth_content: - #print(f"is_fourth_level_content : {temp_fourth_content}") + print(f"is_fourth_level_content : {temp_fourth_content}") doc.page_content = f"{title} {doc.page_content}" else: title = None - #print(f"final title: {title}") + print(f"final third title: {title}") return docs else: print("zh_third_title_enhance 文件不存在") #给三级被分开的内容 增加二级标题 def zh_second_title_enhance(docs: Document) -> Document: - title = None + current_title = None if len(docs) > 0: for doc in docs: - logger.debug(f"zh_second_title_enhance: {doc}") + print(f"zh_second_title_enhance: {doc}") second_title = get_second_level_title(doc.page_content) if second_title: - title = second_title - logger.debug(f"title: {title}") - elif title: - #print(f"title is not none") + current_title = second_title + logger.debug(f"title: {current_title}") + continue + if current_title: + print(f"title is not none") temp_third_content = is_third_level_content(doc.page_content) if temp_third_content: - #print(f"is_third_level_content : {temp_third_content}") - doc.page_content = f"{title} {doc.page_content}" + print(f"is_third_level_content : {temp_third_content}") + doc.page_content = f"{current_title} {doc.page_content}" else: - title = None - logger.debug(f"final title: {title}") + current_title = None + print(f"final second title: {current_title}") return docs else: print("zh_second_title_enhance 文件不存在") @@ -205,7 +206,7 @@ def zh_first_title_enhance(docs: Document) -> Document: doc.page_content = f"{title} {doc.page_content}" else: title = None - logger.debug(f"final title: {title}") + logger.debug(f"final first title: {title}") return docs else: print("zh_first_title_enhance 文件不存在")