From d9acc07c59adfb9e97cee0f08502370278cd3301 Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Tue, 25 Feb 2025 12:50:40 +0800 Subject: [PATCH] =?UTF-8?q?es=E5=90=91=E9=87=8F=E5=85=A5=E5=BA=93=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kb_service/es_kb_service.py | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/es_kb_service.py b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/es_kb_service.py index f68ae0f..dc8fec6 100644 --- a/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/es_kb_service.py +++ b/libs/chatchat-server/chatchat/server/knowledge_base/kb_service/es_kb_service.py @@ -242,7 +242,8 @@ class ESKBService(KBService): }, "track_total_hits": True, } - print(f"***do_delete_doc: kb_file.filepath:{kb_file.filepath}") + print(f"***do_delete_doc: kb_file.filepath:{kb_file.filepath}, kb.filename:{kb_file.filename}") + print(f"***do_delete_doc: kb.filename:{kb_file.filename}") # 注意设置size,默认返回10个。 search_results = self.es_client_python.search(index=self.index_name, body=query,size=200) delete_list = [hit["_id"] for hit in search_results['hits']['hits']] @@ -277,22 +278,34 @@ class ESKBService(KBService): if self.es_client_python.indices.exists(index=self.index_name): file_path = docs[0].metadata.get("source") - query = { + print(f"****************do_add_doc, file_path:{file_path}") + # enhanced by weiweiwang 2025/2/24 to specific index name + # query = { + # "query": { + # "term": {"metadata.source.keyword": file_path}, + # # "term": {"_index": self.index_name}, + # } + # } + query = { "query": { - "term": {"metadata.source.keyword": file_path}, - "term": {"_index": self.index_name}, + "bool": { + "must": [ + { "term": { "metadata.source.keyword": file_path } }, + { "term": { "_index": self.index_name } } + ] + } } } # 注意设置size,默认返回10个。 - search_results = self.es_client_python.search(body=query, size=50) + search_results = self.es_client_python.search(body=query, size=200) if len(search_results["hits"]["hits"]) == 0: raise ValueError("召回元素个数为0") info_docs = [ {"id": hit["_id"], "metadata": hit["_source"]["metadata"]} for hit in search_results["hits"]["hits"] ] - #size = len(info_docs) - #print(f"do_add_doc 召回元素个数:{size}") + # size = len(info_docs) + # print(f"do_add_doc 召回元素个数:{size}") return info_docs def do_clear_vs(self):