add log

2024-01-02 15:22:35 +08:00 · 2024-01-02 15:22:35 +08:00 · df348e96a5
parent 7b7a180323
commit df348e96a5
6 changed files with 11 additions and 0 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -77,3 +77,4 @@ streamlit-chatbox==1.1.11
 streamlit-modal>=0.1.0
 streamlit-aggrid>=0.3.4.post3
 watchdog>=3.0.0
+docx2txt
--- a/requirements_api.txt
+++ b/requirements_api.txt
@ -67,3 +67,4 @@ arxiv>=2.0.0
 youtube-search>=2.1.2
 duckduckgo-search>=3.9.9
 metaphor-python>=0.1.23
+docx2txt
--- a/requirements_webui.txt
+++ b/requirements_webui.txt
@ -7,3 +7,4 @@ streamlit-modal>=0.1.0
 streamlit-aggrid>=0.3.4.post3
 httpx[brotli,http2,socks]>=0.25.2
 watchdog>=3.0.0
+docx2txt
--- a/server/knowledge_base/kb_doc_api.py
+++ b/server/knowledge_base/kb_doc_api.py
@ -35,7 +35,9 @@ def search_docs(
    data = []
    if kb is not None:
        if query:
+            print(f"search_docs, query:{query}")  
            docs = kb.search_docs(query, top_k, score_threshold)
+            print(f"search_docs, docs:{docs}")
            data = [DocumentWithVSId(**x[0].dict(), score=x[1], id=x[0].metadata.get("id")) for x in docs]
        elif file_name or metadata:
            data = kb.list_docs(file_name=file_name, metadata=metadata)
@ -155,6 +157,8 @@ def upload_docs(
    failed_files = {}
    file_names = list(docs.keys())

+    print(f"upload_docs, file_names:{file_names}")
+
    # 先将上传的文件保存到磁盘
    for result in _save_files_in_thread(files, knowledge_base_name=knowledge_base_name, override=override):
        filename = result["data"]["file_name"]
@ -164,7 +168,9 @@ def upload_docs(
        if filename not in file_names:
            file_names.append(filename)

+   
    # 对保存的文件进行向量化
+    print(f"upload_docs, to_vector_store:{to_vector_store}")
    if to_vector_store:
        result = update_docs(
            knowledge_base_name=knowledge_base_name,
--- a/server/knowledge_base/kb_service/es_kb_service.py
+++ b/server/knowledge_base/kb_service/es_kb_service.py
@ -141,6 +141,7 @@ class ESKBService(KBService):

    def do_search(self, query:str, top_k: int, score_threshold: float):
        # 文本相似性检索
+        print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
        docs = self.db_init.similarity_search_with_score(query=query,
                                         k=top_k)
        return docs
--- a/server/knowledge_base/kb_service/faiss_kb_service.py
+++ b/server/knowledge_base/kb_service/faiss_kb_service.py
@ -62,6 +62,7 @@ class FaissKBService(KBService):
                  top_k: int,
                  score_threshold: float = SCORE_THRESHOLD,
                  ) -> List[Document]:
+        print(f"do_search,top_k:{top_k},score_threshold:{score_threshold}")
        embed_func = EmbeddingsFunAdapter(self.embed_model)
        embeddings = embed_func.embed_query(query)
        with self.load_vector_store().acquire() as vs: