完善文档查询接口
This commit is contained in:
parent
53fb9f6319
commit
26f3f364d7
|
|
@ -144,7 +144,7 @@ def mount_knowledge_routes(app: FastAPI):
|
||||||
from server.knowledge_base.kb_doc_api import (list_files, upload_docs, delete_docs,
|
from server.knowledge_base.kb_doc_api import (list_files, upload_docs, delete_docs,
|
||||||
update_docs, download_doc, recreate_vector_store,
|
update_docs, download_doc, recreate_vector_store,
|
||||||
search_docs, DocumentWithVSId, update_info,
|
search_docs, DocumentWithVSId, update_info,
|
||||||
update_docs_by_id,search_content,Document)
|
update_docs_by_id,search_content)
|
||||||
|
|
||||||
app.post("/chat/knowledge_base_chat",
|
app.post("/chat/knowledge_base_chat",
|
||||||
tags=["Chat"],
|
tags=["Chat"],
|
||||||
|
|
@ -191,7 +191,7 @@ def mount_knowledge_routes(app: FastAPI):
|
||||||
|
|
||||||
app.post("/knowledge_base/search_content",
|
app.post("/knowledge_base/search_content",
|
||||||
tags=["Knowledge Base Management"],
|
tags=["Knowledge Base Management"],
|
||||||
response_model=List[Document],
|
response_model=List[DocumentWithVSId],
|
||||||
summary="搜索文档库"
|
summary="搜索文档库"
|
||||||
)(search_content)
|
)(search_content)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,6 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
|
||||||
max_length=RERANKER_MAX_LENGTH,
|
max_length=RERANKER_MAX_LENGTH,
|
||||||
model_name_or_path=reranker_model_path
|
model_name_or_path=reranker_model_path
|
||||||
)
|
)
|
||||||
print(docs)
|
|
||||||
docs = reranker_model.compress_documents(documents=docs,
|
docs = reranker_model.compress_documents(documents=docs,
|
||||||
query=query)
|
query=query)
|
||||||
print("---------after rerank------------------")
|
print("---------after rerank------------------")
|
||||||
|
|
|
||||||
|
|
@ -84,14 +84,14 @@ def search_content(
|
||||||
query: str = Body("", description="用户输入", examples=["国网安徽信通准入手续"]),
|
query: str = Body("", description="用户输入", examples=["国网安徽信通准入手续"]),
|
||||||
knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]),
|
knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]),
|
||||||
top_k: int = Body(2, description="匹配文档数"),
|
top_k: int = Body(2, description="匹配文档数"),
|
||||||
)-> List[Document]:
|
)-> List[DocumentWithVSId]:
|
||||||
print("kb_doc_api search_content")
|
print("kb_doc_api search_content")
|
||||||
docs=[]
|
docs=[]
|
||||||
kb = KBServiceFactory.get_service_by_name(knowledge_base_name)
|
kb = KBServiceFactory.get_service_by_name(knowledge_base_name)
|
||||||
if kb is not None:
|
if kb is not None:
|
||||||
if query:
|
if query:
|
||||||
docs = kb.search_content(query, top_k)
|
docs = kb.search_content(query, top_k)
|
||||||
print(f"search_content, docs:{docs}")
|
#print(f"search_content, docs:{docs}")
|
||||||
return docs
|
return docs
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -119,6 +119,9 @@ class KBService(ABC):
|
||||||
if docs:
|
if docs:
|
||||||
# 将 metadata["source"] 改为相对路径
|
# 将 metadata["source"] 改为相对路径
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
|
#增加时间,added by weiweiwang 2024.3.6
|
||||||
|
from datetime import datetime
|
||||||
|
doc.metadata["updatetime"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
try:
|
try:
|
||||||
source = doc.metadata.get("source", "")
|
source = doc.metadata.get("source", "")
|
||||||
if os.path.isabs(source):
|
if os.path.isabs(source):
|
||||||
|
|
@ -188,7 +191,7 @@ class KBService(ABC):
|
||||||
def search_content(self,
|
def search_content(self,
|
||||||
query: str,
|
query: str,
|
||||||
top_k: int,
|
top_k: int,
|
||||||
)->List[Document]:
|
)->List[DocumentWithVSId]:
|
||||||
print("KBService search_content")
|
print("KBService search_content")
|
||||||
docs = self.searchbyContent(query,top_k)
|
docs = self.searchbyContent(query,top_k)
|
||||||
return docs
|
return docs
|
||||||
|
|
@ -282,7 +285,7 @@ class KBService(ABC):
|
||||||
def searchbyContent(self,
|
def searchbyContent(self,
|
||||||
query: str,
|
query: str,
|
||||||
top_k: int,
|
top_k: int,
|
||||||
)->List[Document]:
|
)->List[DocumentWithVSId]:
|
||||||
"""
|
"""
|
||||||
搜索知识库子类实自己逻辑
|
搜索知识库子类实自己逻辑
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from server.utils import load_local_embeddings
|
||||||
from elasticsearch import Elasticsearch,BadRequestError
|
from elasticsearch import Elasticsearch,BadRequestError
|
||||||
from configs import logger
|
from configs import logger
|
||||||
from configs import kbs_config
|
from configs import kbs_config
|
||||||
|
from server.knowledge_base.model.kb_document_model import DocumentWithVSId
|
||||||
|
|
||||||
class ESKBService(KBService):
|
class ESKBService(KBService):
|
||||||
|
|
||||||
|
|
@ -171,24 +172,29 @@ class ESKBService(KBService):
|
||||||
|
|
||||||
def searchbyContent(self, query:str, top_k: int = 2):
|
def searchbyContent(self, query:str, top_k: int = 2):
|
||||||
if self.es_client_python.indices.exists(index=self.index_name):
|
if self.es_client_python.indices.exists(index=self.index_name):
|
||||||
print(f"******ESKBService searchByContent {self.index_name}")
|
print(f"******ESKBService searchByContent {self.index_name},query:{query}")
|
||||||
tem_query = {
|
tem_query = {
|
||||||
"query": {"match": {
|
"query": {"match": {
|
||||||
"context": "*" + query + "*"
|
"context": "*" + query + "*"
|
||||||
}}
|
}},
|
||||||
|
"highlight":{"fields":{
|
||||||
|
"context":{}
|
||||||
|
}}
|
||||||
}
|
}
|
||||||
search_results = self.es_client_python.search(index=self.index_name, body=tem_query, size=top_k)
|
search_results = self.es_client_python.search(index=self.index_name, body=tem_query, size=top_k)
|
||||||
hits = [hit for hit in search_results["hits"]["hits"]]
|
hits = [hit for hit in search_results["hits"]["hits"]]
|
||||||
docs_and_scores = [
|
|
||||||
(
|
|
||||||
Document(
|
|
||||||
page_content=hit["_source"]["context"],
|
|
||||||
metadata=hit["_source"]["metadata"],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
for hit in hits
|
|
||||||
]
|
|
||||||
|
|
||||||
|
docs_and_scores = []
|
||||||
|
for hit in hits:
|
||||||
|
highlighted_contexts = ""
|
||||||
|
if 'highlight' in hit:
|
||||||
|
highlighted_contexts = " ".join(hit['highlight']['context'])
|
||||||
|
#print(f"******searchByContent highlighted_contexts:{highlighted_contexts}")
|
||||||
|
docs_and_scores.append(DocumentWithVSId(
|
||||||
|
page_content=highlighted_contexts,
|
||||||
|
metadata=hit["_source"]["metadata"],
|
||||||
|
id = hit["_id"],
|
||||||
|
))
|
||||||
return docs_and_scores
|
return docs_and_scores
|
||||||
|
|
||||||
def del_doc_by_ids(self, ids: List[str]) -> bool:
|
def del_doc_by_ids(self, ids: List[str]) -> bool:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue