diff --git a/.DS_Store b/.DS_Store index d66e8fb..604f0e1 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/configs/basic_config.py b/configs/basic_config.py index 433cd53..506c26d 100644 --- a/configs/basic_config.py +++ b/configs/basic_config.py @@ -3,7 +3,7 @@ import os import langchain import tempfile import shutil - +from logging.handlers import RotatingFileHandler # 是否显示详细日志 log_verbose = False @@ -14,9 +14,12 @@ langchain.verbose = False # 日志格式 LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s" logger = logging.getLogger() -logger.setLevel(logging.INFO) -logging.basicConfig(format=LOG_FORMAT) +LOG_BACKUP_COUNT = 10 # 保留的归档文件数量 +LOG_MAX_FILE_SIZE = 1024 * 1024 # 每个日志文件的最大大小(以字节为单位) + +# 创建日志记录器并设置日志级别 +logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) # 日志存储路径 LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs") @@ -32,20 +35,14 @@ except Exception: os.makedirs(BASE_TEMP_DIR, exist_ok=True) -#added by weiweiwang for log - -# 创建日志记录器并设置日志级别 -logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) - # 创建文件处理程序,并设置日志级别和文件名 -#appLogPath = os.path.join(LOG_PATH, "app.log") -file_handler = logging.FileHandler(LOG_PATH +'/app.log') +file_handler = RotatingFileHandler(LOG_PATH +'/app.log', maxBytes=LOG_MAX_FILE_SIZE, backupCount=LOG_BACKUP_COUNT) file_handler.setLevel(logging.INFO) -# 设置日志记录格式 +# # 设置日志记录格式 formatter = logging.Formatter(LOG_FORMAT) file_handler.setFormatter(formatter) # 获取日志记录器并添加文件处理程序 -appLogger = logging.getLogger(__name__) -appLogger.addHandler(file_handler) \ No newline at end of file +logger.addHandler(file_handler) + diff --git a/document_loaders/mypdfloader.py b/document_loaders/mypdfloader.py index e7e2c76..2af4b0b 100644 --- a/document_loaders/mypdfloader.py +++ b/document_loaders/mypdfloader.py @@ -1,6 +1,6 @@ from typing import List from langchain.document_loaders.unstructured import UnstructuredFileLoader -from configs import PDF_OCR_THRESHOLD +from configs import PDF_OCR_THRESHOLD,logger from document_loaders.ocr import get_ocr #PDF_OCR_THRESHOLD= (0.6,0.6) #from ocr import get_ocr @@ -23,7 +23,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader): print(f"****page:{i+1}****") text = page.get_text("") text_lines = text.strip().split("\n") - #print(f"文字内容:{text_lines}") + logger.debug(f"文字内容:{text_lines}") img_list = page.get_image_info(xrefs=True) ocr_result = [] @@ -39,7 +39,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader): result, _ = ocr(img_array) if result: ocr_result = [line[1] for line in result] - #print(f"图片内容:{ocr_result}") + logger.debug(f"图片内容:{ocr_result}") #resp += "\n".join(ocr_result) if (len(ocr_result)>0): @@ -49,7 +49,7 @@ class RapidOCRPDFLoader(UnstructuredFileLoader): # 假设页码在最后一行 if text_lines[-1].isdigit(): text = "\n".join(text_lines[:-1]) - print(f"******去除了页码") + logger.debug(f"******去除了页码") resp += text + "\n" # 更新进度 diff --git a/server/chat/feedback.py b/server/chat/feedback.py index 7303dec..7d904bd 100644 --- a/server/chat/feedback.py +++ b/server/chat/feedback.py @@ -1,5 +1,5 @@ from fastapi import Body -from configs import logger, log_verbose +from configs import logger, log_verbose, logger from server.utils import BaseResponse from server.db.repository import feedback_message_to_db diff --git a/server/chat/knowledge_base_chat.py b/server/chat/knowledge_base_chat.py index 40d4c71..a1983df 100644 --- a/server/chat/knowledge_base_chat.py +++ b/server/chat/knowledge_base_chat.py @@ -11,7 +11,8 @@ from configs import (LLM_MODELS, RERANKER_MODEL, RERANKER_MAX_LENGTH, MODEL_PATH, - DOWNLOAD_BASE_URL) + DOWNLOAD_BASE_URL, + logger) from server.utils import wrap_done, get_ChatOpenAI from server.utils import BaseResponse, get_prompt_template from langchain.chains import LLMChain @@ -26,6 +27,8 @@ from urllib.parse import urlencode from server.knowledge_base.kb_doc_api import search_docs from server.reranker.reranker import LangchainReranker from server.utils import embedding_device +import time + async def knowledge_base_chat(query: str = Body(..., description="用户输入", examples=["你好"]), knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]), top_k: int = Body(VECTOR_SEARCH_TOP_K, description="匹配向量数"), @@ -81,6 +84,7 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入", max_tokens=max_tokens, callbacks=[callback], ) + start_time = time.time() # 记录开始时间 docs = search_docs(query, knowledge_base_name, top_k, score_threshold) # docs = await run_in_threadpool(search_docs, # query=query, @@ -88,7 +92,14 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入", # top_k=top_k, # score_threshold=score_threshold) + end_time = time.time() # 记录结束时间 + execution_time = end_time - start_time # 计算执行时间 + logger.info(f"search_docs 耗时{execution_time}秒") + # 加入reranker + logger.info(f"use_reranker:{USE_RERANKER}") + + start_time = time.time() # 记录开始时间 if USE_RERANKER: reranker_model_path = MODEL_PATH["reranker"].get(RERANKER_MODEL,"BAAI/bge-reranker-large") print("-----------------model path------------------") @@ -102,6 +113,9 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入", query=query) print("---------after rerank------------------") print(docs) + end_time = time.time() # 记录结束时间 + execution_time = end_time - start_time # 计算执行时间 + logger.info(f"reranker 耗时{execution_time}秒") context = "\n".join([doc.page_content for doc in docs]) diff --git a/server/knowledge_base/kb_doc_api.py b/server/knowledge_base/kb_doc_api.py index 080593e..0cda5a6 100644 --- a/server/knowledge_base/kb_doc_api.py +++ b/server/knowledge_base/kb_doc_api.py @@ -19,7 +19,7 @@ from server.knowledge_base.model.kb_document_model import DocumentWithVSId from typing import List, Dict from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity -from configs import USE_RANKING, appLogger +from configs import USE_RANKING, logger import jieba from typing import List, Dict,Tuple @@ -39,7 +39,7 @@ def search_docs( data = [] if kb is not None: if query: - print(f"search_docs, query:{query}") + logger.info(f"search_docs, query:{query},top_k:{top_k},score_threshold:{score_threshold}, use_ranking:{USE_RANKING}") docs = kb.search_docs(query, FIRST_VECTOR_SEARCH_TOP_K, score_threshold) #print(f"search_docs,len of docs {len(docs)}, docs:{docs}") @@ -355,7 +355,7 @@ def update_docs( failed_files = {} kb_files = [] - appLogger.info(f"111111 kb_doc_api update_docs file_names:{file_names},更新的doc 长度:{len(docs)}") + logger.info(f"111111 kb_doc_api update_docs file_names:{file_names},更新的doc 长度:{len(docs)}") # 生成需要加载docs的文件列表 for file_name in file_names: file_detail = get_file_detail(kb_name=knowledge_base_name, filename=file_name) @@ -364,7 +364,7 @@ def update_docs( continue if file_name not in docs: try: - appLogger.info(f"****kb_doc_api update_docs file_name not in docs,filename:{file_name}") + logger.info(f"****kb_doc_api update_docs file_name not in docs,filename:{file_name}") kb_files.append(KnowledgeFile(filename=file_name, knowledge_base_name=knowledge_base_name)) except Exception as e: msg = f"加载文档 {file_name} 时出错:{e}" @@ -391,7 +391,7 @@ def update_docs( # 将自定义的docs进行向量化 for file_name, v in docs.items(): try: - appLogger.info(f"222222 kb_doc_api update_docs file_name:{file_name},更新的doc 长度:{len(docs)}") + logger.info(f"222222 kb_doc_api update_docs file_name:{file_name},更新的doc 长度:{len(docs)}") v = [x if isinstance(x, Document) else Document(**x) for x in v] kb_file = KnowledgeFile(filename=file_name, knowledge_base_name=knowledge_base_name) kb.update_doc(kb_file, docs=v, not_refresh_vs_cache=True) diff --git a/server/knowledge_base/kb_service/base.py b/server/knowledge_base/kb_service/base.py index 31ce3fe..5c5daa3 100644 --- a/server/knowledge_base/kb_service/base.py +++ b/server/knowledge_base/kb_service/base.py @@ -28,7 +28,7 @@ from typing import List, Union, Dict, Optional, Tuple from server.embeddings_api import embed_texts, aembed_texts, embed_documents from server.knowledge_base.model.kb_document_model import DocumentWithVSId -from configs import logger,appLogger +from configs import logger import time @@ -115,15 +115,15 @@ class KBService(ABC): custom_docs = True for doc in docs: doc.metadata.setdefault("source", kb_file.filename) - appLogger.info(f"kb_doc_api add_doc docs 不为空,len(docs):{len(docs)},文件名称:{kb_file.filename}") + logger.info(f"kb_doc_api add_doc docs 不为空,len(docs):{len(docs)},文件名称:{kb_file.filename}") else: docs = kb_file.file2text() custom_docs = False - appLogger.info(f"kb_doc_api add_doc docs 为空,len(docs):{len(docs)},文件名称:{kb_file.filename}") + logger.info(f"kb_doc_api add_doc docs 为空,len(docs):{len(docs)},文件名称:{kb_file.filename}") end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 - appLogger.info(f"add_doc: 加载文件或分块耗时{execution_time}秒") + logger.info(f"add_doc: 加载文件或分块耗时{execution_time}秒") start_time = time.time() # 记录开始时间 if docs: @@ -138,11 +138,11 @@ class KBService(ABC): rel_path = Path(source).relative_to(self.doc_path) doc.metadata["source"] = str(rel_path.as_posix().strip("/")) except Exception as e: - appLogger.info(f"cannot convert absolute path ({source}) to relative path. error is : {e}") + logger.info(f"cannot convert absolute path ({source}) to relative path. error is : {e}") self.delete_doc(kb_file) - #appLogger.info(f"add_doc filepath:{kb_file.filepath},将要执行do_add_doc") + #logger.info(f"add_doc filepath:{kb_file.filepath},将要执行do_add_doc") doc_infos = self.do_add_doc(docs, **kwargs) - #appLogger.info(f"add_doc filepath:{kb_file.filepath} 将要执行dd_file_to_db") + #logger.info(f"add_doc filepath:{kb_file.filepath} 将要执行dd_file_to_db") status = add_file_to_db(kb_file, custom_docs=custom_docs, docs_count=len(docs), @@ -150,7 +150,7 @@ class KBService(ABC): end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 - appLogger.info(f"add_doc: 入库耗时:{execution_time}秒") + logger.info(f"add_doc: 入库耗时:{execution_time}秒") else: status = False return status @@ -251,7 +251,7 @@ class KBService(ABC): 通过file_name或metadata检索Document ''' doc_infos = list_docs_from_db(kb_name=self.kb_name, file_name=file_name, metadata=metadata) - #appLogger.info(f"kb_doc_api list_docs_from_db: {doc_infos}") + #logger.info(f"kb_doc_api list_docs_from_db: {doc_infos}") docs = [] for x in doc_infos: doc_info = self.get_doc_by_ids([x["id"]]) diff --git a/server/knowledge_base/kb_service/es_kb_service.py b/server/knowledge_base/kb_service/es_kb_service.py index d4a5109..c124cb8 100644 --- a/server/knowledge_base/kb_service/es_kb_service.py +++ b/server/knowledge_base/kb_service/es_kb_service.py @@ -9,7 +9,7 @@ from server.knowledge_base.kb_service.base import KBService, SupportedVSType from server.knowledge_base.utils import KnowledgeFile from server.utils import load_local_embeddings from elasticsearch import Elasticsearch,BadRequestError -from configs import logger,appLogger +from configs import logger from configs import kbs_config from server.knowledge_base.model.kb_document_model import DocumentWithVSId @@ -30,13 +30,13 @@ class ESKBService(KBService): self.es_client_python = Elasticsearch(f"http://{self.IP}:{self.PORT}", basic_auth=(self.user,self.password)) else: - appLogger.warning("ES未配置用户名和密码") + logger.warning("ES未配置用户名和密码") self.es_client_python = Elasticsearch(f"http://{self.IP}:{self.PORT}") except ConnectionError: - appLogger.error("连接到 Elasticsearch 失败!") + logger.error("连接到 Elasticsearch 失败!") raise ConnectionError except Exception as e: - appLogger.error(f"Error 发生 : {e}") + logger.error(f"Error 发生 : {e}") raise e try: # 首先尝试通过es_client_python创建 @@ -51,8 +51,8 @@ class ESKBService(KBService): } self.es_client_python.indices.create(index=self.index_name, mappings=mappings) except BadRequestError as e: - appLogger.error("创建索引失败,重新") - appLogger.error(e) + logger.error("创建索引失败,重新") + logger.error(e) try: # langchain ES 连接、创建索引 @@ -67,7 +67,7 @@ class ESKBService(KBService): es_password=self.password ) else: - appLogger.warning("ES未配置用户名和密码") + logger.warning("ES未配置用户名和密码") self.db_init = ElasticsearchStore( es_url=f"http://{self.IP}:{self.PORT}", index_name=self.index_name, @@ -77,10 +77,10 @@ class ESKBService(KBService): ) except ConnectionError: print("### 初始化 Elasticsearch 失败!") - appLogger.error("### 初始化 Elasticsearch 失败!") + logger.error("### 初始化 Elasticsearch 失败!") raise ConnectionError except Exception as e: - appLogger.error(f"Error 发生 : {e}") + logger.error(f"Error 发生 : {e}") raise e try: # 尝试通过db_init创建索引 @@ -89,8 +89,8 @@ class ESKBService(KBService): dims_length=self.dims_length ) except Exception as e: - appLogger.error("创建索引失败...") - appLogger.error(e) + logger.error("创建索引失败...") + logger.error(e) # raise e @@ -156,23 +156,22 @@ class ESKBService(KBService): except ConnectionError as ce: print(ce) print("连接到 Elasticsearch 失败!") - appLogger.error("连接到 Elasticsearch 失败!") + logger.error("连接到 Elasticsearch 失败!") except Exception as e: - appLogger.error(f"Error 发生 : {e}") + logger.error(f"Error 发生 : {e}") print(e) def do_search(self, query:str, top_k: int, score_threshold: float): # 文本相似性检索 - print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}") docs = self.db_init.similarity_search_with_score(query=query, k=top_k) return docs def searchbyContent(self, query:str, top_k: int = 2): if self.es_client_python.indices.exists(index=self.index_name): - appLogger.info(f"******ESKBService searchByContent {self.index_name},query:{query}") + logger.info(f"******ESKBService searchByContent {self.index_name},query:{query}") tem_query = { "query": {"match": { "context": "*" + query + "*" @@ -199,7 +198,7 @@ class ESKBService(KBService): def searchbyContentInternal(self, query:str, top_k: int = 2): if self.es_client_python.indices.exists(index=self.index_name): - appLogger.info(f"******ESKBService searchbyContentInternal {self.index_name},query:{query}") + logger.info(f"******ESKBService searchbyContentInternal {self.index_name},query:{query}") tem_query = { "query": {"match": { "context": "*" + query + "*" @@ -231,19 +230,19 @@ class ESKBService(KBService): metadata=result["_source"]["metadata"], )) except Exception as e: - appLogger.error(f"ES Docs Get Error! {e}") + logger.error(f"ES Docs Get Error! {e}") return result_list def del_doc_by_ids(self,ids: List[str]) -> bool: - appLogger.info(f"es_kb_service del_doc_by_ids") + logger.info(f"es_kb_service del_doc_by_ids") for doc_id in ids: try: self.es_client_python.delete(index=self.index_name, id=doc_id, refresh=True) except Exception as e: - appLogger.error(f"ES Docs Delete Error! {e}") + logger.error(f"ES Docs Delete Error! {e}") def do_delete_doc(self, kb_file, **kwargs): @@ -272,7 +271,7 @@ class ESKBService(KBService): id=doc_id, refresh=True) except Exception as e: - appLogger.error(f"ES Docs Delete Error! {e}") + logger.error(f"ES Docs Delete Error! {e}") # self.db_init.delete(ids=delete_list) #self.es_client_python.indices.refresh(index=self.index_name) diff --git a/server/knowledge_base/kb_summary_api.py b/server/knowledge_base/kb_summary_api.py index 6558f87..fa7b20b 100644 --- a/server/knowledge_base/kb_summary_api.py +++ b/server/knowledge_base/kb_summary_api.py @@ -1,7 +1,7 @@ from fastapi import Body from configs import (DEFAULT_VS_TYPE, EMBEDDING_MODEL, OVERLAP_SIZE, - logger, log_verbose, ) + logger, log_verbose ) from server.knowledge_base.utils import (list_files_from_folder) from sse_starlette import EventSourceResponse import json @@ -10,7 +10,7 @@ from typing import List, Optional from server.knowledge_base.kb_summary.base import KBSummaryService from server.knowledge_base.kb_summary.summary_chunk import SummaryAdapter from server.utils import wrap_done, get_ChatOpenAI, BaseResponse -from configs import LLM_MODELS, TEMPERATURE +from configs import LLM_MODELS, TEMPERATURE, logger from server.knowledge_base.model.kb_document_model import DocumentWithVSId def recreate_summary_vector_store( diff --git a/server/knowledge_base/migrate.py b/server/knowledge_base/migrate.py index 596e1f6..925c5f2 100644 --- a/server/knowledge_base/migrate.py +++ b/server/knowledge_base/migrate.py @@ -65,7 +65,7 @@ def import_from_db( con.close() return True except Exception as e: - print(f"无法读取备份数据库:{sqlite_path}。错误信息:{e}") + logger.error(f"无法读取备份数据库:{sqlite_path}。错误信息:{e}") return False diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index 96b25bc..36f07cd 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -5,7 +5,6 @@ from configs import ( OVERLAP_SIZE, ZH_TITLE_ENHANCE, logger, - appLogger, log_verbose, text_splitter_dict, LLM_MODELS, @@ -95,7 +94,7 @@ def list_files_from_folder(kb_name: str): process_entry(entry) except Exception as e: - appLogger.error(f"Error 发生 : {e}") + logger.error(f"Error 发生 : {e}") return result @@ -176,7 +175,7 @@ def get_loader(loader_name: str, file_path: str, loader_kwargs: Dict = None): DocumentLoader = getattr(document_loaders_module, loader_name) except Exception as e: msg = f"为文件{file_path}查找加载器{loader_name}时出错:{e}" - appLogger.error(f'{e.__class__.__name__}: {msg}', + logger.error(f'{e.__class__.__name__}: {msg}', exc_info=e if log_verbose else None) document_loaders_module = importlib.import_module('langchain.document_loaders') DocumentLoader = getattr(document_loaders_module, "UnstructuredFileLoader") @@ -315,14 +314,15 @@ class KnowledgeFile: def file2docs(self, refresh: bool = False): if self.docs is None or refresh: - appLogger.info(f"{self.document_loader_name} used for {self.filepath}") + logger.info(f"{self.document_loader_name} used for {self.filepath}") loader = get_loader(loader_name=self.document_loader_name, file_path=self.filepath, loader_kwargs=self.loader_kwargs) self.docs = loader.load() + logger.info(f"{self.filepath}加载完成") return self.docs - print(f"KnowledgeFile: filepath:{self.filepath}, doc_title_name:{self.doc_title_name}, ext:{self.ext}") + #print(f"KnowledgeFile: filepath:{self.filepath}, doc_title_name:{self.doc_title_name}, ext:{self.ext}") def docs2texts( self, @@ -347,7 +347,7 @@ class KnowledgeFile: if doc.page_content.strip()!="": doc.page_content = re.sub(r"\n{2,}", "\n", doc.page_content.strip()) file_name_without_extension, file_extension = os.path.splitext(self.filepath) - print(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}") + logger.info(f"filepath:{self.filepath},文件名拆分后:{file_name_without_extension},{file_extension}") if not docs: return [] if self.ext not in [".csv"]: @@ -437,10 +437,11 @@ def files2docs_in_thread( def file2docs(*, file: KnowledgeFile, **kwargs) -> Tuple[bool, Tuple[str, str, List[Document]]]: try: + logger.info(f"file2docs 从文件 {file.kb_name}/{file.filename}") return True, (file.kb_name, file.filename, file.file2text(**kwargs)) except Exception as e: - msg = f"从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}" - appLogger.error(f'{e.__class__.__name__}: {msg}', + msg = f"file2docs 从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}" + logger.error(f'{e.__class__.__name__}: {msg}', exc_info=e if log_verbose else None) return False, (file.kb_name, file.filename, msg) diff --git a/server/llm_api.py b/server/llm_api.py index fbac493..a290dbd 100644 --- a/server/llm_api.py +++ b/server/llm_api.py @@ -1,5 +1,5 @@ from fastapi import Body -from configs import logger, log_verbose, LLM_MODELS, HTTPX_DEFAULT_TIMEOUT +from configs import logger, log_verbose, LLM_MODELS, HTTPX_DEFAULT_TIMEOUT, logger from server.utils import (BaseResponse, fschat_controller_address, list_config_llm_models, get_httpx_client, get_model_worker_config) from typing import List diff --git a/startup.py b/startup.py index 359fb70..002543a 100644 --- a/startup.py +++ b/startup.py @@ -566,36 +566,36 @@ def dump_server_info(after_start=False, args=None): import fastchat from server.utils import api_address, webui_address - print("\n") - print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30) - print(f"操作系统:{platform.platform()}.") - print(f"python版本:{sys.version}") - print(f"项目版本:{VERSION}") - print(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}") - print("\n") + logger.info("\n") + logger.info("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30) + logger.info(f"操作系统:{platform.platform()}") + logger.info(f"python版本:{sys.version}") + logger.info(f"项目版本:{VERSION}") + logger.info(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}") + logger.info("\n") models = LLM_MODELS if args and args.model_name: models = args.model_name - print(f"当前使用的分词器:{TEXT_SPLITTER_NAME}") - print(f"当前启动的LLM模型:{models} @ {llm_device()}") + logger.info(f"当前使用的分词器:{TEXT_SPLITTER_NAME}") + logger.info(f"当前启动的LLM模型:{models} @ {llm_device()}") for model in models: pprint(get_model_worker_config(model)) - print(f"当前Embbedings模型: {EMBEDDING_MODEL} @ {embedding_device()}") + logger.info(f"当前Embbedings模型: {EMBEDDING_MODEL} @ {embedding_device()}") if after_start: - print("\n") - print(f"服务端运行信息:") + logger.info("\n") + logger.info(f"服务端运行信息:") if args.openai_api: - print(f" OpenAI API Server: {fschat_openai_api_address()}") + logger.info(f" OpenAI API Server: {fschat_openai_api_address()}") if args.api: - print(f" Chatchat API Server: {api_address()}") + logger.info(f" Chatchat API Server: {api_address()}") if args.webui: - print(f" Chatchat WEBUI Server: {webui_address()}") - print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30) - print("\n") + logger.info(f" Chatchat WEBUI Server: {webui_address()}") + logger.info("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30) + logger.info("\n") async def start_main_server(): diff --git a/text_splitter/zh_second_title_enhance.py b/text_splitter/zh_second_title_enhance.py index 90b4b31..aeffb40 100644 --- a/text_splitter/zh_second_title_enhance.py +++ b/text_splitter/zh_second_title_enhance.py @@ -1,5 +1,6 @@ from langchain.docstore.document import Document import re +from configs import logger def get_fist_level_title( text: str, @@ -181,11 +182,11 @@ def zh_second_title_enhance(docs: Document) -> Document: title = None if len(docs) > 0: for doc in docs: - #print(f"zh_second_title_enhance: {doc}") + logger.debug(f"zh_second_title_enhance: {doc}") second_title = get_second_level_title(doc.page_content) if second_title: title = second_title - #print(f"title: {title}") + logger.debug(f"title: {title}") elif title: #print(f"title is not none") temp_third_content = is_third_level_content(doc.page_content) @@ -194,7 +195,7 @@ def zh_second_title_enhance(docs: Document) -> Document: doc.page_content = f"{title} {doc.page_content}" else: title = None - print(f"final title: {title}") + logger.debug(f"final title: {title}") return docs else: print("zh_second_title_enhance 文件不存在") @@ -204,19 +205,19 @@ def zh_first_title_enhance(docs: Document) -> Document: title = None if len(docs) > 0: for doc in docs: - #print(f"zh_first_title_enhance: {doc}") + logger.debug(f"zh_first_title_enhance: {doc}") first_title = get_fist_level_title(doc.page_content) if first_title: title = first_title - #print(f"title: {title}") + logger.debug(f"title: {title}") elif title: temp_second_content = is_second_level_content(doc.page_content) if temp_second_content: - #print(f"is_second_level_content : {temp_second_content}") + logger.debug(f"is_second_level_content : {temp_second_content}") doc.page_content = f"{title} {doc.page_content}" else: title = None - #print(f"final title: {title}") + logger.debug(f"final title: {title}") return docs else: print("zh_first_title_enhance 文件不存在")