完善文档查询接口

This commit is contained in:
wvivi2023 2024-03-07 14:29:08 +08:00
parent 53fb9f6319
commit 26f3f364d7
5 changed files with 26 additions and 18 deletions

View File

@ -144,7 +144,7 @@ def mount_knowledge_routes(app: FastAPI):
from server.knowledge_base.kb_doc_api import (list_files, upload_docs, delete_docs, from server.knowledge_base.kb_doc_api import (list_files, upload_docs, delete_docs,
update_docs, download_doc, recreate_vector_store, update_docs, download_doc, recreate_vector_store,
search_docs, DocumentWithVSId, update_info, search_docs, DocumentWithVSId, update_info,
update_docs_by_id,search_content,Document) update_docs_by_id,search_content)
app.post("/chat/knowledge_base_chat", app.post("/chat/knowledge_base_chat",
tags=["Chat"], tags=["Chat"],
@ -191,7 +191,7 @@ def mount_knowledge_routes(app: FastAPI):
app.post("/knowledge_base/search_content", app.post("/knowledge_base/search_content",
tags=["Knowledge Base Management"], tags=["Knowledge Base Management"],
response_model=List[Document], response_model=List[DocumentWithVSId],
summary="搜索文档库" summary="搜索文档库"
)(search_content) )(search_content)

View File

@ -97,7 +97,6 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
max_length=RERANKER_MAX_LENGTH, max_length=RERANKER_MAX_LENGTH,
model_name_or_path=reranker_model_path model_name_or_path=reranker_model_path
) )
print(docs)
docs = reranker_model.compress_documents(documents=docs, docs = reranker_model.compress_documents(documents=docs,
query=query) query=query)
print("---------after rerank------------------") print("---------after rerank------------------")

View File

@ -84,14 +84,14 @@ def search_content(
query: str = Body("", description="用户输入", examples=["国网安徽信通准入手续"]), query: str = Body("", description="用户输入", examples=["国网安徽信通准入手续"]),
knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]), knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]),
top_k: int = Body(2, description="匹配文档数"), top_k: int = Body(2, description="匹配文档数"),
)-> List[Document]: )-> List[DocumentWithVSId]:
print("kb_doc_api search_content") print("kb_doc_api search_content")
docs=[] docs=[]
kb = KBServiceFactory.get_service_by_name(knowledge_base_name) kb = KBServiceFactory.get_service_by_name(knowledge_base_name)
if kb is not None: if kb is not None:
if query: if query:
docs = kb.search_content(query, top_k) docs = kb.search_content(query, top_k)
print(f"search_content, docs:{docs}") #print(f"search_content, docs:{docs}")
return docs return docs
return docs return docs

View File

@ -119,6 +119,9 @@ class KBService(ABC):
if docs: if docs:
# 将 metadata["source"] 改为相对路径 # 将 metadata["source"] 改为相对路径
for doc in docs: for doc in docs:
#增加时间added by weiweiwang 2024.3.6
from datetime import datetime
doc.metadata["updatetime"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try: try:
source = doc.metadata.get("source", "") source = doc.metadata.get("source", "")
if os.path.isabs(source): if os.path.isabs(source):
@ -188,7 +191,7 @@ class KBService(ABC):
def search_content(self, def search_content(self,
query: str, query: str,
top_k: int, top_k: int,
)->List[Document]: )->List[DocumentWithVSId]:
print("KBService search_content") print("KBService search_content")
docs = self.searchbyContent(query,top_k) docs = self.searchbyContent(query,top_k)
return docs return docs
@ -282,7 +285,7 @@ class KBService(ABC):
def searchbyContent(self, def searchbyContent(self,
query: str, query: str,
top_k: int, top_k: int,
)->List[Document]: )->List[DocumentWithVSId]:
""" """
搜索知识库子类实自己逻辑 搜索知识库子类实自己逻辑
""" """

View File

@ -11,6 +11,7 @@ from server.utils import load_local_embeddings
from elasticsearch import Elasticsearch,BadRequestError from elasticsearch import Elasticsearch,BadRequestError
from configs import logger from configs import logger
from configs import kbs_config from configs import kbs_config
from server.knowledge_base.model.kb_document_model import DocumentWithVSId
class ESKBService(KBService): class ESKBService(KBService):
@ -171,24 +172,29 @@ class ESKBService(KBService):
def searchbyContent(self, query:str, top_k: int = 2): def searchbyContent(self, query:str, top_k: int = 2):
if self.es_client_python.indices.exists(index=self.index_name): if self.es_client_python.indices.exists(index=self.index_name):
print(f"******ESKBService searchByContent {self.index_name}") print(f"******ESKBService searchByContent {self.index_name},query:{query}")
tem_query = { tem_query = {
"query": {"match": { "query": {"match": {
"context": "*" + query + "*" "context": "*" + query + "*"
}} }},
"highlight":{"fields":{
"context":{}
}}
} }
search_results = self.es_client_python.search(index=self.index_name, body=tem_query, size=top_k) search_results = self.es_client_python.search(index=self.index_name, body=tem_query, size=top_k)
hits = [hit for hit in search_results["hits"]["hits"]] hits = [hit for hit in search_results["hits"]["hits"]]
docs_and_scores = [
(
Document(
page_content=hit["_source"]["context"],
metadata=hit["_source"]["metadata"],
)
)
for hit in hits
]
docs_and_scores = []
for hit in hits:
highlighted_contexts = ""
if 'highlight' in hit:
highlighted_contexts = " ".join(hit['highlight']['context'])
#print(f"******searchByContent highlighted_contexts:{highlighted_contexts}")
docs_and_scores.append(DocumentWithVSId(
page_content=highlighted_contexts,
metadata=hit["_source"]["metadata"],
id = hit["_id"],
))
return docs_and_scores return docs_and_scores
def del_doc_by_ids(self, ids: List[str]) -> bool: def del_doc_by_ids(self, ids: List[str]) -> bool: