enhance log

This commit is contained in:
wvivi2023 2024-04-02 10:32:34 +08:00
parent 6ed7002758
commit cc706ce7ef
14 changed files with 101 additions and 89 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@ -3,7 +3,7 @@ import os
import langchain
import tempfile
import shutil
from logging.handlers import RotatingFileHandler
# 是否显示详细日志
log_verbose = False
@ -14,9 +14,12 @@ langchain.verbose = False
# 日志格式
LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.basicConfig(format=LOG_FORMAT)
LOG_BACKUP_COUNT = 10 # 保留的归档文件数量
LOG_MAX_FILE_SIZE = 1024 * 1024 # 每个日志文件的最大大小(以字节为单位)
# 创建日志记录器并设置日志级别
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
# 日志存储路径
LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs")
@ -32,20 +35,14 @@ except Exception:
os.makedirs(BASE_TEMP_DIR, exist_ok=True)
#added by weiweiwang for log
# 创建日志记录器并设置日志级别
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
# 创建文件处理程序,并设置日志级别和文件名
#appLogPath = os.path.join(LOG_PATH, "app.log")
file_handler = logging.FileHandler(LOG_PATH +'/app.log')
file_handler = RotatingFileHandler(LOG_PATH +'/app.log', maxBytes=LOG_MAX_FILE_SIZE, backupCount=LOG_BACKUP_COUNT)
file_handler.setLevel(logging.INFO)
# 设置日志记录格式
# # 设置日志记录格式
formatter = logging.Formatter(LOG_FORMAT)
file_handler.setFormatter(formatter)
# 获取日志记录器并添加文件处理程序
appLogger = logging.getLogger(__name__)
appLogger.addHandler(file_handler)
logger.addHandler(file_handler)

View File

@ -1,6 +1,6 @@
from typing import List
from langchain.document_loaders.unstructured import UnstructuredFileLoader
from configs import PDF_OCR_THRESHOLD
from configs import PDF_OCR_THRESHOLD,logger
from document_loaders.ocr import get_ocr
#PDF_OCR_THRESHOLD= (0.6,0.6)
#from ocr import get_ocr
@ -23,7 +23,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader):
print(f"****page:{i+1}****")
text = page.get_text("")
text_lines = text.strip().split("\n")
#print(f"文字内容:{text_lines}")
logger.debug(f"文字内容:{text_lines}")
img_list = page.get_image_info(xrefs=True)
ocr_result = []
@ -39,7 +39,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader):
result, _ = ocr(img_array)
if result:
ocr_result = [line[1] for line in result]
#print(f"图片内容:{ocr_result}")
logger.debug(f"图片内容:{ocr_result}")
#resp += "\n".join(ocr_result)
if (len(ocr_result)>0):
@ -49,7 +49,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader):
# 假设页码在最后一行
if text_lines[-1].isdigit():
text = "\n".join(text_lines[:-1])
print(f"******去除了页码")
logger.debug(f"******去除了页码")
resp += text + "\n"
# 更新进度

View File

@ -1,5 +1,5 @@
from fastapi import Body
from configs import logger, log_verbose
from configs import logger, log_verbose, logger
from server.utils import BaseResponse
from server.db.repository import feedback_message_to_db

View File

@ -11,7 +11,8 @@ from configs import (LLM_MODELS,
RERANKER_MODEL,
RERANKER_MAX_LENGTH,
MODEL_PATH,
DOWNLOAD_BASE_URL)
DOWNLOAD_BASE_URL,
logger)
from server.utils import wrap_done, get_ChatOpenAI
from server.utils import BaseResponse, get_prompt_template
from langchain.chains import LLMChain
@ -26,6 +27,8 @@ from urllib.parse import urlencode
from server.knowledge_base.kb_doc_api import search_docs
from server.reranker.reranker import LangchainReranker
from server.utils import embedding_device
import time
async def knowledge_base_chat(query: str = Body(..., description="用户输入", examples=["你好"]),
knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]),
top_k: int = Body(VECTOR_SEARCH_TOP_K, description="匹配向量数"),
@ -81,6 +84,7 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
max_tokens=max_tokens,
callbacks=[callback],
)
start_time = time.time() # 记录开始时间
docs = search_docs(query, knowledge_base_name, top_k, score_threshold)
# docs = await run_in_threadpool(search_docs,
# query=query,
@ -88,7 +92,14 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
# top_k=top_k,
# score_threshold=score_threshold)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"search_docs 耗时{execution_time}")
# 加入reranker
logger.info(f"use_reranker:{USE_RERANKER}")
start_time = time.time() # 记录开始时间
if USE_RERANKER:
reranker_model_path = MODEL_PATH["reranker"].get(RERANKER_MODEL,"BAAI/bge-reranker-large")
print("-----------------model path------------------")
@ -102,6 +113,9 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
query=query)
print("---------after rerank------------------")
print(docs)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"reranker 耗时{execution_time}")
context = "\n".join([doc.page_content for doc in docs])

View File

@ -19,7 +19,7 @@ from server.knowledge_base.model.kb_document_model import DocumentWithVSId
from typing import List, Dict
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from configs import USE_RANKING, appLogger
from configs import USE_RANKING, logger
import jieba
from typing import List, Dict,Tuple
@ -39,7 +39,7 @@ def search_docs(
data = []
if kb is not None:
if query:
print(f"search_docs, query:{query}")
logger.info(f"search_docs, query:{query},top_k:{top_k},score_threshold:{score_threshold}, use_ranking:{USE_RANKING}")
docs = kb.search_docs(query, FIRST_VECTOR_SEARCH_TOP_K, score_threshold)
#print(f"search_docs,len of docs {len(docs)}, docs:{docs}")
@ -355,7 +355,7 @@ def update_docs(
failed_files = {}
kb_files = []
appLogger.info(f"111111 kb_doc_api update_docs file_names:{file_names},更新的doc 长度:{len(docs)}")
logger.info(f"111111 kb_doc_api update_docs file_names:{file_names},更新的doc 长度:{len(docs)}")
# 生成需要加载docs的文件列表
for file_name in file_names:
file_detail = get_file_detail(kb_name=knowledge_base_name, filename=file_name)
@ -364,7 +364,7 @@ def update_docs(
continue
if file_name not in docs:
try:
appLogger.info(f"****kb_doc_api update_docs file_name not in docs,filename:{file_name}")
logger.info(f"****kb_doc_api update_docs file_name not in docs,filename:{file_name}")
kb_files.append(KnowledgeFile(filename=file_name, knowledge_base_name=knowledge_base_name))
except Exception as e:
msg = f"加载文档 {file_name} 时出错:{e}"
@ -391,7 +391,7 @@ def update_docs(
# 将自定义的docs进行向量化
for file_name, v in docs.items():
try:
appLogger.info(f"222222 kb_doc_api update_docs file_name:{file_name},更新的doc 长度:{len(docs)}")
logger.info(f"222222 kb_doc_api update_docs file_name:{file_name},更新的doc 长度:{len(docs)}")
v = [x if isinstance(x, Document) else Document(**x) for x in v]
kb_file = KnowledgeFile(filename=file_name, knowledge_base_name=knowledge_base_name)
kb.update_doc(kb_file, docs=v, not_refresh_vs_cache=True)

View File

@ -28,7 +28,7 @@ from typing import List, Union, Dict, Optional, Tuple
from server.embeddings_api import embed_texts, aembed_texts, embed_documents
from server.knowledge_base.model.kb_document_model import DocumentWithVSId
from configs import logger,appLogger
from configs import logger
import time
@ -115,15 +115,15 @@ class KBService(ABC):
custom_docs = True
for doc in docs:
doc.metadata.setdefault("source", kb_file.filename)
appLogger.info(f"kb_doc_api add_doc docs 不为空len(docs){len(docs)},文件名称:{kb_file.filename}")
logger.info(f"kb_doc_api add_doc docs 不为空len(docs){len(docs)},文件名称:{kb_file.filename}")
else:
docs = kb_file.file2text()
custom_docs = False
appLogger.info(f"kb_doc_api add_doc docs 为空len(docs){len(docs)},文件名称:{kb_file.filename}")
logger.info(f"kb_doc_api add_doc docs 为空len(docs){len(docs)},文件名称:{kb_file.filename}")
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
appLogger.info(f"add_doc: 加载文件或分块耗时{execution_time}")
logger.info(f"add_doc: 加载文件或分块耗时{execution_time}")
start_time = time.time() # 记录开始时间
if docs:
@ -138,11 +138,11 @@ class KBService(ABC):
rel_path = Path(source).relative_to(self.doc_path)
doc.metadata["source"] = str(rel_path.as_posix().strip("/"))
except Exception as e:
appLogger.info(f"cannot convert absolute path ({source}) to relative path. error is : {e}")
logger.info(f"cannot convert absolute path ({source}) to relative path. error is : {e}")
self.delete_doc(kb_file)
#appLogger.info(f"add_doc filepath:{kb_file.filepath}将要执行do_add_doc")
#logger.info(f"add_doc filepath:{kb_file.filepath}将要执行do_add_doc")
doc_infos = self.do_add_doc(docs, **kwargs)
#appLogger.info(f"add_doc filepath:{kb_file.filepath} 将要执行dd_file_to_db")
#logger.info(f"add_doc filepath:{kb_file.filepath} 将要执行dd_file_to_db")
status = add_file_to_db(kb_file,
custom_docs=custom_docs,
docs_count=len(docs),
@ -150,7 +150,7 @@ class KBService(ABC):
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
appLogger.info(f"add_doc: 入库耗时:{execution_time}")
logger.info(f"add_doc: 入库耗时:{execution_time}")
else:
status = False
return status
@ -251,7 +251,7 @@ class KBService(ABC):
通过file_name或metadata检索Document
'''
doc_infos = list_docs_from_db(kb_name=self.kb_name, file_name=file_name, metadata=metadata)
#appLogger.info(f"kb_doc_api list_docs_from_db: {doc_infos}")
#logger.info(f"kb_doc_api list_docs_from_db: {doc_infos}")
docs = []
for x in doc_infos:
doc_info = self.get_doc_by_ids([x["id"]])

View File

@ -9,7 +9,7 @@ from server.knowledge_base.kb_service.base import KBService, SupportedVSType
from server.knowledge_base.utils import KnowledgeFile
from server.utils import load_local_embeddings
from elasticsearch import Elasticsearch,BadRequestError
from configs import logger,appLogger
from configs import logger
from configs import kbs_config
from server.knowledge_base.model.kb_document_model import DocumentWithVSId
@ -30,13 +30,13 @@ class ESKBService(KBService):
self.es_client_python = Elasticsearch(f"http://{self.IP}:{self.PORT}",
basic_auth=(self.user,self.password))
else:
appLogger.warning("ES未配置用户名和密码")
logger.warning("ES未配置用户名和密码")
self.es_client_python = Elasticsearch(f"http://{self.IP}:{self.PORT}")
except ConnectionError:
appLogger.error("连接到 Elasticsearch 失败!")
logger.error("连接到 Elasticsearch 失败!")
raise ConnectionError
except Exception as e:
appLogger.error(f"Error 发生 : {e}")
logger.error(f"Error 发生 : {e}")
raise e
try:
# 首先尝试通过es_client_python创建
@ -51,8 +51,8 @@ class ESKBService(KBService):
}
self.es_client_python.indices.create(index=self.index_name, mappings=mappings)
except BadRequestError as e:
appLogger.error("创建索引失败,重新")
appLogger.error(e)
logger.error("创建索引失败,重新")
logger.error(e)
try:
# langchain ES 连接、创建索引
@ -67,7 +67,7 @@ class ESKBService(KBService):
es_password=self.password
)
else:
appLogger.warning("ES未配置用户名和密码")
logger.warning("ES未配置用户名和密码")
self.db_init = ElasticsearchStore(
es_url=f"http://{self.IP}:{self.PORT}",
index_name=self.index_name,
@ -77,10 +77,10 @@ class ESKBService(KBService):
)
except ConnectionError:
print("### 初始化 Elasticsearch 失败!")
appLogger.error("### 初始化 Elasticsearch 失败!")
logger.error("### 初始化 Elasticsearch 失败!")
raise ConnectionError
except Exception as e:
appLogger.error(f"Error 发生 : {e}")
logger.error(f"Error 发生 : {e}")
raise e
try:
# 尝试通过db_init创建索引
@ -89,8 +89,8 @@ class ESKBService(KBService):
dims_length=self.dims_length
)
except Exception as e:
appLogger.error("创建索引失败...")
appLogger.error(e)
logger.error("创建索引失败...")
logger.error(e)
# raise e
@ -156,23 +156,22 @@ class ESKBService(KBService):
except ConnectionError as ce:
print(ce)
print("连接到 Elasticsearch 失败!")
appLogger.error("连接到 Elasticsearch 失败!")
logger.error("连接到 Elasticsearch 失败!")
except Exception as e:
appLogger.error(f"Error 发生 : {e}")
logger.error(f"Error 发生 : {e}")
print(e)
def do_search(self, query:str, top_k: int, score_threshold: float):
# 文本相似性检索
print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
docs = self.db_init.similarity_search_with_score(query=query,
k=top_k)
return docs
def searchbyContent(self, query:str, top_k: int = 2):
if self.es_client_python.indices.exists(index=self.index_name):
appLogger.info(f"******ESKBService searchByContent {self.index_name},query:{query}")
logger.info(f"******ESKBService searchByContent {self.index_name},query:{query}")
tem_query = {
"query": {"match": {
"context": "*" + query + "*"
@ -199,7 +198,7 @@ class ESKBService(KBService):
def searchbyContentInternal(self, query:str, top_k: int = 2):
if self.es_client_python.indices.exists(index=self.index_name):
appLogger.info(f"******ESKBService searchbyContentInternal {self.index_name},query:{query}")
logger.info(f"******ESKBService searchbyContentInternal {self.index_name},query:{query}")
tem_query = {
"query": {"match": {
"context": "*" + query + "*"
@ -231,19 +230,19 @@ class ESKBService(KBService):
metadata=result["_source"]["metadata"],
))
except Exception as e:
appLogger.error(f"ES Docs Get Error! {e}")
logger.error(f"ES Docs Get Error! {e}")
return result_list
def del_doc_by_ids(self,ids: List[str]) -> bool:
appLogger.info(f"es_kb_service del_doc_by_ids")
logger.info(f"es_kb_service del_doc_by_ids")
for doc_id in ids:
try:
self.es_client_python.delete(index=self.index_name,
id=doc_id,
refresh=True)
except Exception as e:
appLogger.error(f"ES Docs Delete Error! {e}")
logger.error(f"ES Docs Delete Error! {e}")
def do_delete_doc(self, kb_file, **kwargs):
@ -272,7 +271,7 @@ class ESKBService(KBService):
id=doc_id,
refresh=True)
except Exception as e:
appLogger.error(f"ES Docs Delete Error! {e}")
logger.error(f"ES Docs Delete Error! {e}")
# self.db_init.delete(ids=delete_list)
#self.es_client_python.indices.refresh(index=self.index_name)

View File

@ -1,7 +1,7 @@
from fastapi import Body
from configs import (DEFAULT_VS_TYPE, EMBEDDING_MODEL,
OVERLAP_SIZE,
logger, log_verbose, )
logger, log_verbose )
from server.knowledge_base.utils import (list_files_from_folder)
from sse_starlette import EventSourceResponse
import json
@ -10,7 +10,7 @@ from typing import List, Optional
from server.knowledge_base.kb_summary.base import KBSummaryService
from server.knowledge_base.kb_summary.summary_chunk import SummaryAdapter
from server.utils import wrap_done, get_ChatOpenAI, BaseResponse
from configs import LLM_MODELS, TEMPERATURE
from configs import LLM_MODELS, TEMPERATURE, logger
from server.knowledge_base.model.kb_document_model import DocumentWithVSId
def recreate_summary_vector_store(

View File

@ -65,7 +65,7 @@ def import_from_db(
con.close()
return True
except Exception as e:
print(f"无法读取备份数据库:{sqlite_path}。错误信息:{e}")
logger.error(f"无法读取备份数据库:{sqlite_path}。错误信息:{e}")
return False

View File

@ -5,7 +5,6 @@ from configs import (
OVERLAP_SIZE,
ZH_TITLE_ENHANCE,
logger,
appLogger,
log_verbose,
text_splitter_dict,
LLM_MODELS,
@ -95,7 +94,7 @@ def list_files_from_folder(kb_name: str):
process_entry(entry)
except Exception as e:
appLogger.error(f"Error 发生 : {e}")
logger.error(f"Error 发生 : {e}")
return result
@ -176,7 +175,7 @@ def get_loader(loader_name: str, file_path: str, loader_kwargs: Dict = None):
DocumentLoader = getattr(document_loaders_module, loader_name)
except Exception as e:
msg = f"为文件{file_path}查找加载器{loader_name}时出错:{e}"
appLogger.error(f'{e.__class__.__name__}: {msg}',
logger.error(f'{e.__class__.__name__}: {msg}',
exc_info=e if log_verbose else None)
document_loaders_module = importlib.import_module('langchain.document_loaders')
DocumentLoader = getattr(document_loaders_module, "UnstructuredFileLoader")
@ -315,14 +314,15 @@ class KnowledgeFile:
def file2docs(self, refresh: bool = False):
if self.docs is None or refresh:
appLogger.info(f"{self.document_loader_name} used for {self.filepath}")
logger.info(f"{self.document_loader_name} used for {self.filepath}")
loader = get_loader(loader_name=self.document_loader_name,
file_path=self.filepath,
loader_kwargs=self.loader_kwargs)
self.docs = loader.load()
logger.info(f"{self.filepath}加载完成")
return self.docs
print(f"KnowledgeFile: filepath:{self.filepath}, doc_title_name:{self.doc_title_name}, ext:{self.ext}")
#print(f"KnowledgeFile: filepath:{self.filepath}, doc_title_name:{self.doc_title_name}, ext:{self.ext}")
def docs2texts(
self,
@ -347,7 +347,7 @@ class KnowledgeFile:
if doc.page_content.strip()!="":
doc.page_content = re.sub(r"\n{2,}", "\n", doc.page_content.strip())
file_name_without_extension, file_extension = os.path.splitext(self.filepath)
print(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
logger.info(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}")
if not docs:
return []
if self.ext not in [".csv"]:
@ -437,10 +437,11 @@ def files2docs_in_thread(
def file2docs(*, file: KnowledgeFile, **kwargs) -> Tuple[bool, Tuple[str, str, List[Document]]]:
try:
logger.info(f"file2docs 从文件 {file.kb_name}/{file.filename}")
return True, (file.kb_name, file.filename, file.file2text(**kwargs))
except Exception as e:
msg = f"从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}"
appLogger.error(f'{e.__class__.__name__}: {msg}',
msg = f"file2docs 从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}"
logger.error(f'{e.__class__.__name__}: {msg}',
exc_info=e if log_verbose else None)
return False, (file.kb_name, file.filename, msg)

View File

@ -1,5 +1,5 @@
from fastapi import Body
from configs import logger, log_verbose, LLM_MODELS, HTTPX_DEFAULT_TIMEOUT
from configs import logger, log_verbose, LLM_MODELS, HTTPX_DEFAULT_TIMEOUT, logger
from server.utils import (BaseResponse, fschat_controller_address, list_config_llm_models,
get_httpx_client, get_model_worker_config)
from typing import List

View File

@ -566,36 +566,36 @@ def dump_server_info(after_start=False, args=None):
import fastchat
from server.utils import api_address, webui_address
print("\n")
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
print(f"操作系统:{platform.platform()}.")
print(f"python版本{sys.version}")
print(f"项目版本:{VERSION}")
print(f"langchain版本{langchain.__version__}. fastchat版本{fastchat.__version__}")
print("\n")
logger.info("\n")
logger.info("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
logger.info(f"操作系统:{platform.platform()}")
logger.info(f"python版本{sys.version}")
logger.info(f"项目版本:{VERSION}")
logger.info(f"langchain版本{langchain.__version__}. fastchat版本{fastchat.__version__}")
logger.info("\n")
models = LLM_MODELS
if args and args.model_name:
models = args.model_name
print(f"当前使用的分词器:{TEXT_SPLITTER_NAME}")
print(f"当前启动的LLM模型{models} @ {llm_device()}")
logger.info(f"当前使用的分词器:{TEXT_SPLITTER_NAME}")
logger.info(f"当前启动的LLM模型{models} @ {llm_device()}")
for model in models:
pprint(get_model_worker_config(model))
print(f"当前Embbedings模型 {EMBEDDING_MODEL} @ {embedding_device()}")
logger.info(f"当前Embbedings模型 {EMBEDDING_MODEL} @ {embedding_device()}")
if after_start:
print("\n")
print(f"服务端运行信息:")
logger.info("\n")
logger.info(f"服务端运行信息:")
if args.openai_api:
print(f" OpenAI API Server: {fschat_openai_api_address()}")
logger.info(f" OpenAI API Server: {fschat_openai_api_address()}")
if args.api:
print(f" Chatchat API Server: {api_address()}")
logger.info(f" Chatchat API Server: {api_address()}")
if args.webui:
print(f" Chatchat WEBUI Server: {webui_address()}")
print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
print("\n")
logger.info(f" Chatchat WEBUI Server: {webui_address()}")
logger.info("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30)
logger.info("\n")
async def start_main_server():

View File

@ -1,5 +1,6 @@
from langchain.docstore.document import Document
import re
from configs import logger
def get_fist_level_title(
text: str,
@ -181,11 +182,11 @@ def zh_second_title_enhance(docs: Document) -> Document:
title = None
if len(docs) > 0:
for doc in docs:
#print(f"zh_second_title_enhance: {doc}")
logger.debug(f"zh_second_title_enhance: {doc}")
second_title = get_second_level_title(doc.page_content)
if second_title:
title = second_title
#print(f"title: {title}")
logger.debug(f"title: {title}")
elif title:
#print(f"title is not none")
temp_third_content = is_third_level_content(doc.page_content)
@ -194,7 +195,7 @@ def zh_second_title_enhance(docs: Document) -> Document:
doc.page_content = f"{title} {doc.page_content}"
else:
title = None
print(f"final title: {title}")
logger.debug(f"final title: {title}")
return docs
else:
print("zh_second_title_enhance 文件不存在")
@ -204,19 +205,19 @@ def zh_first_title_enhance(docs: Document) -> Document:
title = None
if len(docs) > 0:
for doc in docs:
#print(f"zh_first_title_enhance: {doc}")
logger.debug(f"zh_first_title_enhance: {doc}")
first_title = get_fist_level_title(doc.page_content)
if first_title:
title = first_title
#print(f"title: {title}")
logger.debug(f"title: {title}")
elif title:
temp_second_content = is_second_level_content(doc.page_content)
if temp_second_content:
#print(f"is_second_level_content : {temp_second_content}")
logger.debug(f"is_second_level_content : {temp_second_content}")
doc.page_content = f"{title} {doc.page_content}"
else:
title = None
#print(f"final title: {title}")
logger.debug(f"final title: {title}")
return docs
else:
print("zh_first_title_enhance 文件不存在")