Merge branch 'master' into dev

2023-05-14 00:14:43 +08:00 · 2023-05-14 00:14:43 +08:00 · 5284fb2726
parent c86dd4dd47 3ff885d0d3
commit 5284fb2726
5 changed files with 34 additions and 24 deletions
--- a/api.py
+++ b/api.py
@ -181,7 +181,7 @@ async def delete_docs(
        if os.path.exists(doc_path):
            os.remove(doc_path)
        else:
-            return {"code": 1, "msg": f"document {doc_name} not found"}
+            BaseResponse(code=1, msg=f"document {doc_name} not found")
        remain_docs = await list_docs(knowledge_base_id)
        if remain_docs["code"] != 0 or len(remain_docs["data"]) == 0:
@ -211,24 +211,30 @@ async def local_doc_chat(
 ):
    vs_path = os.path.join(VS_ROOT_PATH, knowledge_base_id)
    if not os.path.exists(vs_path):
-        raise ValueError(f"Knowledge base {knowledge_base_id} not found")
+        # return BaseResponse(code=1, msg=f"Knowledge base {knowledge_base_id} not found")
        return ChatMessage(
            question=question,
            response=f"Knowledge base {knowledge_base_id} not found",
            history=history,
            source_documents=[],
        )
    else:
        for resp, history in local_doc_qa.get_knowledge_based_answer(
                query=question, vs_path=vs_path, chat_history=history, streaming=True
        ):
            pass
        source_documents = [
            f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}：\n\n{doc.page_content}\n\n"""
            f"""相关度：{doc.metadata['score']}\n\n"""
            for inum, doc in enumerate(resp["source_documents"])
        ]
-    for resp, history in local_doc_qa.get_knowledge_based_answer(
+        return ChatMessage(
-            query=question, vs_path=vs_path, chat_history=history, streaming=True
+            question=question,
-    ):
+            response=resp["result"],
-        pass
+            history=history,
-    source_documents = [
+            source_documents=source_documents,
-        f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}：\n\n{doc.page_content}\n\n"""
+        )
        f"""相关度：{doc.metadata['score']}\n\n"""
        for inum, doc in enumerate(resp["source_documents"])
    ]
    return ChatMessage(
        question=question,
        response=resp["result"],
        history=history,
        source_documents=source_documents,
    )
 async def chat(
--- a/chains/local_doc_qa.py
+++ b/chains/local_doc_qa.py
@ -39,7 +39,11 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE):
 def write_check_file(filepath, docs):
-    fout = open('load_file.txt', 'a')
+    folder_path = os.path.join(os.path.dirname(filepath), "tmp_files")
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    fp = os.path.join(folder_path, 'load_file.txt')
    fout = open(fp, 'a')
    fout.write("filepath=%s,len=%s" % (filepath, len(docs)))
    fout.write('\n')
    for i in docs:
--- a/cli_demo.py
+++ b/cli_demo.py
@ -31,13 +31,13 @@ if __name__ == "__main__":
                                                                     chat_history=history,
                                                                     streaming=STREAMING):
            if STREAMING:
-                logger.info(resp["result"][last_print_len:])
+                print(resp["result"][last_print_len:], end="", flush=True)
                last_print_len = len(resp["result"])
            else:
-                logger.info(resp["result"])
+                print(resp["result"])
        if REPLY_WITH_SOURCE:
            source_text = [f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}：\n\n{doc.page_content}\n\n"""
                           # f"""相关度：{doc.metadata['score']}\n\n"""
                           for inum, doc in
                           enumerate(resp["source_documents"])]
-            logger.info("\n\n" + "\n\n".join(source_text))
+            print("\n\n" + "\n\n".join(source_text))
--- a/loader/image_loader.py
+++ b/loader/image_loader.py
@ -30,7 +30,7 @@ class UnstructuredPaddleImageLoader(UnstructuredFileLoader):
 if __name__ == "__main__":
-    filepath = "../content/samples/test.jpg"
+    filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content", "samples", "test.jpg")
    loader = UnstructuredPaddleImageLoader(filepath, mode="elements")
    docs = loader.load()
    for doc in docs:
--- a/loader/pdf_loader.py
+++ b/loader/pdf_loader.py
@ -46,7 +46,7 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
 if __name__ == "__main__":
-    filepath = "../content/samples/test.pdf"
+    filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content", "samples", "test.pdf")
    loader = UnstructuredPaddlePDFLoader(filepath, mode="elements")
    docs = loader.load()
    for doc in docs: