完善文档查询接口

2024-03-07 14:29:08 +08:00 · 2024-03-07 14:29:08 +08:00 · 26f3f364d7
parent 53fb9f6319
commit 26f3f364d7
5 changed files with 26 additions and 18 deletions
--- a/server/api.py
+++ b/server/api.py
@ -144,7 +144,7 @@ def mount_knowledge_routes(app: FastAPI):
    from server.knowledge_base.kb_doc_api import (list_files, upload_docs, delete_docs,
                                                update_docs, download_doc, recreate_vector_store,
                                                search_docs, DocumentWithVSId, update_info,
-                                                update_docs_by_id,search_content,Document)
+                                                update_docs_by_id,search_content)
    app.post("/chat/knowledge_base_chat",
             tags=["Chat"],
@ -191,7 +191,7 @@ def mount_knowledge_routes(app: FastAPI):
    app.post("/knowledge_base/search_content",
             tags=["Knowledge Base Management"],
-             response_model=List[Document],
+             response_model=List[DocumentWithVSId],
             summary="搜索文档库"
             )(search_content)
--- a/server/chat/knowledge_base_chat.py
+++ b/server/chat/knowledge_base_chat.py
@ -97,7 +97,6 @@ async def knowledge_base_chat(query: str = Body(..., description="用户输入",
                                            max_length=RERANKER_MAX_LENGTH,
                                            model_name_or_path=reranker_model_path
                                            )
            print(docs)
            docs = reranker_model.compress_documents(documents=docs,
                                                     query=query)
            print("---------after rerank------------------")
--- a/server/knowledge_base/kb_doc_api.py
+++ b/server/knowledge_base/kb_doc_api.py
@ -84,14 +84,14 @@ def search_content(
        query: str = Body("", description="用户输入", examples=["国网安徽信通准入手续"]),
        knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]),
        top_k: int = Body(2, description="匹配文档数"),
-        )-> List[Document]:
+        )-> List[DocumentWithVSId]:
    print("kb_doc_api search_content")
    docs=[]
    kb = KBServiceFactory.get_service_by_name(knowledge_base_name)
    if kb is not None:
        if query:
            docs = kb.search_content(query, top_k)
-            print(f"search_content, docs:{docs}")
+            #print(f"search_content, docs:{docs}")
            return docs
    return docs
--- a/server/knowledge_base/kb_service/base.py
+++ b/server/knowledge_base/kb_service/base.py
@ -119,6 +119,9 @@ class KBService(ABC):
        if docs:
            # 将 metadata["source"] 改为相对路径
            for doc in docs:
                #增加时间，added by weiweiwang 2024.3.6
                from datetime import datetime 
                doc.metadata["updatetime"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                try:
                    source = doc.metadata.get("source", "")
                    if os.path.isabs(source):
@ -188,7 +191,7 @@ class KBService(ABC):
    def search_content(self,
                    query: str,
                    top_k: int,
-                    )->List[Document]:
+                    )->List[DocumentWithVSId]:
        print("KBService search_content")
        docs = self.searchbyContent(query,top_k)
        return docs
@ -282,7 +285,7 @@ class KBService(ABC):
    def searchbyContent(self,
                    query: str,
                    top_k: int,
-                    )->List[Document]:
+                    )->List[DocumentWithVSId]:
        """
        搜索知识库子类实自己逻辑
        """
--- a/server/knowledge_base/kb_service/es_kb_service.py
+++ b/server/knowledge_base/kb_service/es_kb_service.py
@ -11,6 +11,7 @@ from server.utils import load_local_embeddings
 from elasticsearch import Elasticsearch,BadRequestError
 from configs import logger
 from configs import kbs_config
 from server.knowledge_base.model.kb_document_model import DocumentWithVSId
 class ESKBService(KBService):
@ -171,24 +172,29 @@ class ESKBService(KBService):
    def searchbyContent(self, query:str, top_k: int = 2):
        if self.es_client_python.indices.exists(index=self.index_name):
-            print(f"******ESKBService searchByContent {self.index_name}")
+            print(f"******ESKBService searchByContent {self.index_name},query:{query}")
            tem_query = {
                "query": {"match": {
                        "context": "*" + query + "*"
-                    }}
+                    }},
                "highlight":{"fields":{
                        "context":{}
                        }}
                }
            search_results = self.es_client_python.search(index=self.index_name, body=tem_query, size=top_k)
            hits = [hit for hit in search_results["hits"]["hits"]]
            docs_and_scores = [
                (
                    Document(
                        page_content=hit["_source"]["context"],
                        metadata=hit["_source"]["metadata"],
                    )
                )
                for hit in hits
            ]
            docs_and_scores = []
            for hit in hits:
                highlighted_contexts = ""
                if 'highlight' in hit:
                    highlighted_contexts = " ".join(hit['highlight']['context'])
                    #print(f"******searchByContent highlighted_contexts:{highlighted_contexts}")
                docs_and_scores.append(DocumentWithVSId(
                        page_content=highlighted_contexts,
                        metadata=hit["_source"]["metadata"],
                        id = hit["_id"],
                    ))
            return docs_and_scores
    def del_doc_by_ids(self, ids: List[str]) -> bool: