es向量入库优化

This commit is contained in:
weiweiw 2025-02-25 12:50:40 +08:00
parent 92202e0f98
commit d9acc07c59
1 changed files with 20 additions and 7 deletions

View File

@ -242,7 +242,8 @@ class ESKBService(KBService):
},
"track_total_hits": True,
}
print(f"***do_delete_doc: kb_file.filepath:{kb_file.filepath}")
print(f"***do_delete_doc: kb_file.filepath:{kb_file.filepath}, kb.filename:{kb_file.filename}")
print(f"***do_delete_doc: kb.filename:{kb_file.filename}")
# 注意设置size默认返回10个。
search_results = self.es_client_python.search(index=self.index_name, body=query,size=200)
delete_list = [hit["_id"] for hit in search_results['hits']['hits']]
@ -277,22 +278,34 @@ class ESKBService(KBService):
if self.es_client_python.indices.exists(index=self.index_name):
file_path = docs[0].metadata.get("source")
print(f"****************do_add_doc, file_path:{file_path}")
# enhanced by weiweiwang 2025/2/24 to specific index name
# query = {
# "query": {
# "term": {"metadata.source.keyword": file_path},
# # "term": {"_index": self.index_name},
# }
# }
query = {
"query": {
"term": {"metadata.source.keyword": file_path},
"term": {"_index": self.index_name},
"bool": {
"must": [
{ "term": { "metadata.source.keyword": file_path } },
{ "term": { "_index": self.index_name } }
]
}
}
}
# 注意设置size默认返回10个。
search_results = self.es_client_python.search(body=query, size=50)
search_results = self.es_client_python.search(body=query, size=200)
if len(search_results["hits"]["hits"]) == 0:
raise ValueError("召回元素个数为0")
info_docs = [
{"id": hit["_id"], "metadata": hit["_source"]["metadata"]}
for hit in search_results["hits"]["hits"]
]
#size = len(info_docs)
#print(f"do_add_doc 召回元素个数:{size}")
# size = len(info_docs)
# print(f"do_add_doc 召回元素个数:{size}")
return info_docs
def do_clear_vs(self):