es向量入库优化
This commit is contained in:
parent
92202e0f98
commit
d9acc07c59
|
|
@ -242,7 +242,8 @@ class ESKBService(KBService):
|
||||||
},
|
},
|
||||||
"track_total_hits": True,
|
"track_total_hits": True,
|
||||||
}
|
}
|
||||||
print(f"***do_delete_doc: kb_file.filepath:{kb_file.filepath}")
|
print(f"***do_delete_doc: kb_file.filepath:{kb_file.filepath}, kb.filename:{kb_file.filename}")
|
||||||
|
print(f"***do_delete_doc: kb.filename:{kb_file.filename}")
|
||||||
# 注意设置size,默认返回10个。
|
# 注意设置size,默认返回10个。
|
||||||
search_results = self.es_client_python.search(index=self.index_name, body=query,size=200)
|
search_results = self.es_client_python.search(index=self.index_name, body=query,size=200)
|
||||||
delete_list = [hit["_id"] for hit in search_results['hits']['hits']]
|
delete_list = [hit["_id"] for hit in search_results['hits']['hits']]
|
||||||
|
|
@ -277,22 +278,34 @@ class ESKBService(KBService):
|
||||||
|
|
||||||
if self.es_client_python.indices.exists(index=self.index_name):
|
if self.es_client_python.indices.exists(index=self.index_name):
|
||||||
file_path = docs[0].metadata.get("source")
|
file_path = docs[0].metadata.get("source")
|
||||||
|
print(f"****************do_add_doc, file_path:{file_path}")
|
||||||
|
# enhanced by weiweiwang 2025/2/24 to specific index name
|
||||||
|
# query = {
|
||||||
|
# "query": {
|
||||||
|
# "term": {"metadata.source.keyword": file_path},
|
||||||
|
# # "term": {"_index": self.index_name},
|
||||||
|
# }
|
||||||
|
# }
|
||||||
query = {
|
query = {
|
||||||
"query": {
|
"query": {
|
||||||
"term": {"metadata.source.keyword": file_path},
|
"bool": {
|
||||||
"term": {"_index": self.index_name},
|
"must": [
|
||||||
|
{ "term": { "metadata.source.keyword": file_path } },
|
||||||
|
{ "term": { "_index": self.index_name } }
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# 注意设置size,默认返回10个。
|
# 注意设置size,默认返回10个。
|
||||||
search_results = self.es_client_python.search(body=query, size=50)
|
search_results = self.es_client_python.search(body=query, size=200)
|
||||||
if len(search_results["hits"]["hits"]) == 0:
|
if len(search_results["hits"]["hits"]) == 0:
|
||||||
raise ValueError("召回元素个数为0")
|
raise ValueError("召回元素个数为0")
|
||||||
info_docs = [
|
info_docs = [
|
||||||
{"id": hit["_id"], "metadata": hit["_source"]["metadata"]}
|
{"id": hit["_id"], "metadata": hit["_source"]["metadata"]}
|
||||||
for hit in search_results["hits"]["hits"]
|
for hit in search_results["hits"]["hits"]
|
||||||
]
|
]
|
||||||
#size = len(info_docs)
|
# size = len(info_docs)
|
||||||
#print(f"do_add_doc 召回元素个数:{size}")
|
# print(f"do_add_doc 召回元素个数:{size}")
|
||||||
return info_docs
|
return info_docs
|
||||||
|
|
||||||
def do_clear_vs(self):
|
def do_clear_vs(self):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue