From 194437a271177a7e76de9b64c14e3f7c4583bad6 Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Mon, 24 Feb 2025 10:18:26 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3es=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../chatchat/server/chat/kb_chat.py | 4 ++-- .../server/knowledge_base/kb_doc_api.py | 21 ++++++++++++------- .../kb_service/es_kb_service.py | 14 ++++++++++--- .../kb_service/faiss_kb_service.py | 6 +++--- .../kb_service/milvus_kb_service.py | 4 ++-- .../kb_service/pg_kb_service.py | 4 ++-- .../kb_service/relyt_kb_service.py | 4 ++-- .../kb_service/zilliz_kb_service.py | 4 ++-- libs/chatchat-server/chatchat/webui.py | 2 +- 9 files changed, 39 insertions(+), 24 deletions(-) diff --git a/libs/chatchat-server/chatchat/server/chat/kb_chat.py b/libs/chatchat-server/chatchat/server/chat/kb_chat.py index 09fe58a..a27c48f 100644 --- a/libs/chatchat-server/chatchat/server/chat/kb_chat.py +++ b/libs/chatchat-server/chatchat/server/chat/kb_chat.py @@ -91,8 +91,8 @@ async def kb_chat(query: str = Body(..., description="用户输入", examples=[" metadata={}) source_documents = format_reference(kb_name, docs, api_address(is_public=True)) - logger.info( - f"***********************************knowledge_base_chat_iterator:,after format_reference:{docs}") + # logger.info( + # f"***********************************knowledge_base_chat_iterator:,after format_reference:{docs}") end_time1 = time.time() execution_time1 = end_time1 - start_time1 logger.info(f"kb_chat Execution time检索完成: {execution_time1:.6f} seconds") diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/kb_doc_api.py b/libs/chatchat-server/chatchat/server/knowledge_base/kb_doc_api.py index 56b35e0..1564549 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/kb_doc_api.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/kb_doc_api.py @@ -72,11 +72,15 @@ def search_docs( if kb is not None: if query: docs = kb.search_docs(query, top_k, score_threshold) - # logger.info(f"search_docs, query:{query},top_k:{top_k},len(docs):{len(docs)},docs:{docs}") + if docs is not None: + logger.info(f"search_docs, query:{query},top_k:{top_k},score_threshold:{score_threshold},len(docs):{len(docs)}") + docs_key = kb.search_content_internal(query,2) - # logger.info(f"before merge_and_deduplicate docs_key:{docs_key}") + if docs_key is not None: + logger.info(f"before merge_and_deduplicate ,len(docs_key):{len(docs_key)}") docs = merge_and_deduplicate(docs, docs_key) - logger.info(f"after merge_and_deduplicate docs:{docs}") + if docs is not None: + logger.info(f"after merge_and_deduplicate len(docs):{len(docs)}") data = [DocumentWithVSId(**{"id": x.metadata.get("id"), **x.dict()}) for x in docs] elif file_name or metadata: data = kb.list_docs(file_name=file_name, metadata=metadata) @@ -87,12 +91,15 @@ def search_docs( def merge_and_deduplicate(list1: List[Document], list2: List[Document]) -> List[Document]: # 使用字典存储唯一的 Document - merged_dict = {doc.page_content: doc for doc in list1} + merged_dict = {} + if list1 is not None: + merged_dict = {doc.page_content: doc for doc in list1} # 遍历 list2,将新的 Document 添加到字典 - for doc in list2: - if doc.page_content not in merged_dict: - merged_dict[doc.page_content] = doc + if list2 is not None: + for doc in list2: + if doc.page_content not in merged_dict: + merged_dict[doc.page_content] = doc # 返回去重后的列表 return list(merged_dict.values()) diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/es_kb_service.py b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/es_kb_service.py index 7aef928..f68ae0f 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/es_kb_service.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/es_kb_service.py @@ -58,7 +58,7 @@ class ESKBService(KBService): # ES python客户端连接(仅连接) logger.info(f"connection_info:{connection_info}") self.es_client_python = Elasticsearch(**connection_info) - # logger.info(f"after Elasticsearch connection_info:{connection_info}") + logger.info(f"after Elasticsearch connection_info:{connection_info}") except ConnectionError: logger.error("连接到 Elasticsearch 失败!") raise ConnectionError @@ -89,9 +89,10 @@ class ESKBService(KBService): es_url=f"{self.scheme}://{self.IP}:{self.PORT}", index_name=self.index_name, query_field="context", + distance_strategy="COSINE", vector_query_field="dense_vector", embedding=self.embeddings_model, - strategy=ApproxRetrievalStrategy(), + # strategy=ApproxRetrievalStrategy(), es_params={ "timeout": 60, }, @@ -106,6 +107,7 @@ class ESKBService(KBService): params["es_params"].update(client_key=self.client_key) params["es_params"].update(client_cert=self.client_cert) self.db = ElasticsearchStore(**params) + logger.info(f"after ElasticsearchStore create params:{params}") except ConnectionError: logger.error("### 初始化 Elasticsearch 失败!") raise ConnectionError @@ -138,14 +140,20 @@ class ESKBService(KBService): def vs_type(self) -> str: return SupportedVSType.ES - def do_search(self, query: str, top_k: int, score_threshold: float): + def do_search(self, query: str, top_k: int, score_threshold: float)->List[Document]: + # 确保 ElasticsearchStore 正确初始化 + if not hasattr(self, "db") or self.db is None: + raise ValueError("ElasticsearchStore (db) not initialized.") + # 文本相似性检索 retriever = get_Retriever("vectorstore").from_vectorstore( self.db, top_k=top_k, score_threshold=score_threshold, ) + docs = retriever.get_relevant_documents(query) + return docs def searchbyContent(self, query:str, top_k: int = 2): diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/faiss_kb_service.py b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/faiss_kb_service.py index e31151a..8e951c4 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/faiss_kb_service.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/faiss_kb_service.py @@ -78,11 +78,11 @@ class FaissKBService(KBService): docs = retriever.get_relevant_documents(query) return docs - def searchbyContent(self): + def searchbyContent(self, query:str, top_k: int = 2): pass - def searchbyContentInternal(self): - pass + def searchbyContentInternal(self, query:str, top_k: int = 2): + return None def do_add_doc( self, diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/milvus_kb_service.py b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/milvus_kb_service.py index 4041519..43a340d 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/milvus_kb_service.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/milvus_kb_service.py @@ -88,10 +88,10 @@ class MilvusKBService(KBService): docs = retriever.get_relevant_documents(query) return docs - def searchbyContent(self): + def searchbyContent(self, query:str, top_k: int = 2): pass - def searchbyContentInternal(self): + def searchbyContentInternal(self, query:str, top_k: int = 2): pass def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]: diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/pg_kb_service.py b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/pg_kb_service.py index 5e10b6e..4696d86 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/pg_kb_service.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/pg_kb_service.py @@ -84,10 +84,10 @@ class PGKBService(KBService): docs = retriever.get_relevant_documents(query) return docs - def searchbyContent(self): + def searchbyContent(self, query:str, top_k: int = 2): pass - def searchbyContentInternal(self): + def searchbyContentInternal(self, query:str, top_k: int = 2): pass def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]: diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/relyt_kb_service.py b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/relyt_kb_service.py index eccfe31..dfd63e5 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/relyt_kb_service.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/relyt_kb_service.py @@ -95,10 +95,10 @@ class RelytKBService(KBService): return score_threshold_process(score_threshold, top_k, docs) - def searchbyContent(self): + def searchbyContent(self, query:str, top_k: int = 2): pass - def searchbyContentInternal(self): + def searchbyContentInternal(self, query:str, top_k: int = 2): pass def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]: diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/zilliz_kb_service.py b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/zilliz_kb_service.py index 4af3dff..7519e38 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/zilliz_kb_service.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/zilliz_kb_service.py @@ -79,10 +79,10 @@ class ZillizKBService(KBService): docs = retriever.get_relevant_documents(query) return docs - def searchbyContent(self): + def searchbyContent(self, query:str, top_k: int = 2): pass - def searchbyContentInternal(self): + def searchbyContentInternal(self, query:str, top_k: int = 2): pass def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]: diff --git a/libs/chatchat-server/chatchat/webui.py b/libs/chatchat-server/chatchat/webui.py index df06834..669cf17 100644 --- a/libs/chatchat-server/chatchat/webui.py +++ b/libs/chatchat-server/chatchat/webui.py @@ -46,7 +46,7 @@ if __name__ == "__main__": with st.sidebar: st.image( - get_img_base64("logo-long-chatchat-trans-v2.png"), use_container_width=True + get_img_base64("logo-long-chatchat-trans-v2.png"), use_column_width=True ) st.caption( f"""
当前版本:{__version__}
""",