diff --git a/server/api.py b/server/api.py index 69d9c76..669324f 100644 --- a/server/api.py +++ b/server/api.py @@ -144,7 +144,7 @@ def mount_knowledge_routes(app: FastAPI): from server.knowledge_base.kb_doc_api import (list_files, upload_docs, delete_docs, update_docs, download_doc, recreate_vector_store, search_docs, DocumentWithVSId, update_info, - update_docs_by_id,search_content,Document) + update_docs_by_id,search_content) app.post("/chat/knowledge_base_chat", tags=["Chat"], @@ -191,7 +191,7 @@ def mount_knowledge_routes(app: FastAPI): app.post("/knowledge_base/search_content", tags=["Knowledge Base Management"], - response_model=List[Document], + response_model=List[DocumentWithVSId], summary="搜索文档库" )(search_content) diff --git a/server/chat/knowledge_base_chat.py b/server/chat/knowledge_base_chat.py index 5e60a3e..5a14c26 100644 --- a/server/chat/knowledge_base_chat.py +++ b/server/chat/knowledge_base_chat.py @@ -97,7 +97,6 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入", max_length=RERANKER_MAX_LENGTH, model_name_or_path=reranker_model_path ) - print(docs) docs = reranker_model.compress_documents(documents=docs, query=query) print("---------after rerank------------------") diff --git a/server/knowledge_base/kb_doc_api.py b/server/knowledge_base/kb_doc_api.py index 927c8a6..ead1718 100644 --- a/server/knowledge_base/kb_doc_api.py +++ b/server/knowledge_base/kb_doc_api.py @@ -84,14 +84,14 @@ def search_content( query: str = Body("", description="用户输入", examples=["国网安徽信通准入手续"]), knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]), top_k: int = Body(2, description="匹配文档数"), - )-> List[Document]: + )-> List[DocumentWithVSId]: print("kb_doc_api search_content") docs=[] kb = KBServiceFactory.get_service_by_name(knowledge_base_name) if kb is not None: if query: docs = kb.search_content(query, top_k) - print(f"search_content, docs:{docs}") + #print(f"search_content, docs:{docs}") return docs return docs diff --git a/server/knowledge_base/kb_service/base.py b/server/knowledge_base/kb_service/base.py index 248b9ba..f60a44f 100644 --- a/server/knowledge_base/kb_service/base.py +++ b/server/knowledge_base/kb_service/base.py @@ -119,6 +119,9 @@ class KBService(ABC): if docs: # 将 metadata["source"] 改为相对路径 for doc in docs: + #增加时间,added by weiweiwang 2024.3.6 + from datetime import datetime + doc.metadata["updatetime"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") try: source = doc.metadata.get("source", "") if os.path.isabs(source): @@ -188,7 +191,7 @@ class KBService(ABC): def search_content(self, query: str, top_k: int, - )->List[Document]: + )->List[DocumentWithVSId]: print("KBService search_content") docs = self.searchbyContent(query,top_k) return docs @@ -282,7 +285,7 @@ class KBService(ABC): def searchbyContent(self, query: str, top_k: int, - )->List[Document]: + )->List[DocumentWithVSId]: """ 搜索知识库子类实自己逻辑 """ diff --git a/server/knowledge_base/kb_service/es_kb_service.py b/server/knowledge_base/kb_service/es_kb_service.py index 22ced1d..9013019 100644 --- a/server/knowledge_base/kb_service/es_kb_service.py +++ b/server/knowledge_base/kb_service/es_kb_service.py @@ -11,6 +11,7 @@ from server.utils import load_local_embeddings from elasticsearch import Elasticsearch,BadRequestError from configs import logger from configs import kbs_config +from server.knowledge_base.model.kb_document_model import DocumentWithVSId class ESKBService(KBService): @@ -171,24 +172,29 @@ class ESKBService(KBService): def searchbyContent(self, query:str, top_k: int = 2): if self.es_client_python.indices.exists(index=self.index_name): - print(f"******ESKBService searchByContent {self.index_name}") + print(f"******ESKBService searchByContent {self.index_name},query:{query}") tem_query = { "query": {"match": { "context": "*" + query + "*" - }} + }}, + "highlight":{"fields":{ + "context":{} + }} } search_results = self.es_client_python.search(index=self.index_name, body=tem_query, size=top_k) hits = [hit for hit in search_results["hits"]["hits"]] - docs_and_scores = [ - ( - Document( - page_content=hit["_source"]["context"], - metadata=hit["_source"]["metadata"], - ) - ) - for hit in hits - ] + docs_and_scores = [] + for hit in hits: + highlighted_contexts = "" + if 'highlight' in hit: + highlighted_contexts = " ".join(hit['highlight']['context']) + #print(f"******searchByContent highlighted_contexts:{highlighted_contexts}") + docs_and_scores.append(DocumentWithVSId( + page_content=highlighted_contexts, + metadata=hit["_source"]["metadata"], + id = hit["_id"], + )) return docs_and_scores def del_doc_by_ids(self, ids: List[str]) -> bool: