diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py index ab4cdd4..2216295 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py @@ -22,6 +22,7 @@ from chatchat.server.file_rag.text_splitter import ( from chatchat.server.utils import run_in_process_pool, run_in_thread_pool from chatchat.utils import build_logger import re +import threading logger = build_logger() @@ -228,7 +229,7 @@ def make_text_splitter(splitter_name, chunk_size, chunk_overlap): """ 根据参数获取特定的分词器 """ - logger.info(f"make_text_splitter start....splitter_name:{splitter_name}") + logger.info(f"threadid:{threading.get_ident()}, make_text_splitter start....splitter_name:{splitter_name}") splitter_name = splitter_name or "SpacyTextSplitter" try: if ( @@ -378,7 +379,7 @@ class KnowledgeFile: else: print("文件不存在") - logger.info(f"********docs2texts") + logger.info(f"threadid:{threading.get_ident()},********docs2texts") docs = docs or self.file2docs(refresh=refresh) #remove the redundant line break after loading, by weiweiwang 2025/1/13 for doc in docs: @@ -388,9 +389,9 @@ class KnowledgeFile: if not docs: return [] if self.ext not in [".csv"]: - logger.info(f"self.ext not in csv") + logger.info(f"threadid:{threading.get_ident()}, self.ext not in csv") if text_splitter is None: - logger.info(f"text_splitter is None") + logger.info(f" threadid:{threading.get_ident()}, text_splitter is None") text_splitter = make_text_splitter( splitter_name=self.text_splitter_name, chunk_size=chunk_size, @@ -451,6 +452,7 @@ def files2docs_in_thread_file2docs( *, file: KnowledgeFile, **kwargs ) -> Tuple[bool, Tuple[str, str, List[Document]]]: try: + logger.info(f"file2docs 从文件 {file.kb_name}/{file.filename}, threadid:{threading.get_ident()}") return True, (file.kb_name, file.filename, file.file2text(**kwargs)) except Exception as e: msg = f"从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}"