From ae7e658c1a77ba75ef23a5287ff88c5f365720ff Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Tue, 14 Jan 2025 12:41:19 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../chatchat/server/knowledge_base/utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py index ab4cdd4..2216295 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/utils.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/utils.py @@ -22,6 +22,7 @@ from chatchat.server.file_rag.text_splitter import ( from chatchat.server.utils import run_in_process_pool, run_in_thread_pool from chatchat.utils import build_logger import re +import threading logger = build_logger() @@ -228,7 +229,7 @@ def make_text_splitter(splitter_name, chunk_size, chunk_overlap): """ 根据参数获取特定的分词器 """ - logger.info(f"make_text_splitter start....splitter_name:{splitter_name}") + logger.info(f"threadid:{threading.get_ident()}, make_text_splitter start....splitter_name:{splitter_name}") splitter_name = splitter_name or "SpacyTextSplitter" try: if ( @@ -378,7 +379,7 @@ class KnowledgeFile: else: print("文件不存在") - logger.info(f"********docs2texts") + logger.info(f"threadid:{threading.get_ident()},********docs2texts") docs = docs or self.file2docs(refresh=refresh) #remove the redundant line break after loading, by weiweiwang 2025/1/13 for doc in docs: @@ -388,9 +389,9 @@ class KnowledgeFile: if not docs: return [] if self.ext not in [".csv"]: - logger.info(f"self.ext not in csv") + logger.info(f"threadid:{threading.get_ident()}, self.ext not in csv") if text_splitter is None: - logger.info(f"text_splitter is None") + logger.info(f" threadid:{threading.get_ident()}, text_splitter is None") text_splitter = make_text_splitter( splitter_name=self.text_splitter_name, chunk_size=chunk_size, @@ -451,6 +452,7 @@ def files2docs_in_thread_file2docs( *, file: KnowledgeFile, **kwargs ) -> Tuple[bool, Tuple[str, str, List[Document]]]: try: + logger.info(f"file2docs 从文件 {file.kb_name}/{file.filename}, threadid:{threading.get_ident()}") return True, (file.kb_name, file.filename, file.file2text(**kwargs)) except Exception as e: msg = f"从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}"