From b3c7f8b07240fe3cb2774200aad93058d9b93435 Mon Sep 17 00:00:00 2001
From: liunux4odoo <41217877+liunux4odoo@users.noreply.github.com>
Date: Thu, 28 Sep 2023 15:12:03 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dwebui=E4=B8=AD=E9=87=8D?=
 =?UTF-8?q?=E5=BB=BA=E7=9F=A5=E8=AF=86=E5=BA=93=E4=BB=A5=E5=8F=8A=E5=AF=B9?=
 =?UTF-8?q?=E8=AF=9D=E7=95=8C=E9=9D=A2UI=E9=94=99=E8=AF=AF=20(#1615)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 修复bug:webui点重建知识库时，如果存在不支持的文件会导致整个接口错误;migrate中没有导入CHUNK_SIZE

* 修复：webui对话界面的expander一直为running状态；简化历史消息获取方法
---
 server/knowledge_base/migrate.py  |  1 +
 server/knowledge_base/utils.py    | 29 ++++++++++++++-----------
 tests/api/test_stream_chat_api.py |  2 +-
 webui_pages/dialogue/dialogue.py  | 36 +++++++++++++------------------
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/server/knowledge_base/migrate.py b/server/knowledge_base/migrate.py
index 53abecf..2ca144a 100644
--- a/server/knowledge_base/migrate.py
+++ b/server/knowledge_base/migrate.py
@@ -1,4 +1,5 @@
 from configs import (EMBEDDING_MODEL, DEFAULT_VS_TYPE, ZH_TITLE_ENHANCE,
+                     CHUNK_SIZE, OVERLAP_SIZE,
                     logger, log_verbose)
 from server.knowledge_base.utils import (get_file_path, list_kbs_from_folder,
                                         list_files_from_folder,files2docs_in_thread,
diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py
index aa286ca..02212c8 100644
--- a/server/knowledge_base/utils.py
+++ b/server/knowledge_base/utils.py
@@ -373,18 +373,23 @@ def files2docs_in_thread(
     kwargs_list = []
     for i, file in enumerate(files):
         kwargs = {}
-        if isinstance(file, tuple) and len(file) >= 2:
-            file = KnowledgeFile(filename=file[0], knowledge_base_name=file[1])
-        elif isinstance(file, dict):
-            filename = file.pop("filename")
-            kb_name = file.pop("kb_name")
-            kwargs = file
-            file = KnowledgeFile(filename=filename, knowledge_base_name=kb_name)
-        kwargs["file"] = file
-        kwargs["chunk_size"] = chunk_size
-        kwargs["chunk_overlap"] = chunk_overlap
-        kwargs["zh_title_enhance"] = zh_title_enhance
-        kwargs_list.append(kwargs)
+        try:
+            if isinstance(file, tuple) and len(file) >= 2:
+                filename=file[0]
+                kb_name=file[1]
+                file = KnowledgeFile(filename=filename, knowledge_base_name=kb_name)
+            elif isinstance(file, dict):
+                filename = file.pop("filename")
+                kb_name = file.pop("kb_name")
+                kwargs.update(file)
+                file = KnowledgeFile(filename=filename, knowledge_base_name=kb_name)
+            kwargs["file"] = file
+            kwargs["chunk_size"] = chunk_size
+            kwargs["chunk_overlap"] = chunk_overlap
+            kwargs["zh_title_enhance"] = zh_title_enhance
+            kwargs_list.append(kwargs)
+        except Exception as e:
+            yield False, (kb_name, filename, str(e))
 
     for result in run_in_thread_pool(func=file2docs, params=kwargs_list, pool=pool):
         yield result
diff --git a/tests/api/test_stream_chat_api.py b/tests/api/test_stream_chat_api.py
index 47547f7..8b98c20 100644
--- a/tests/api/test_stream_chat_api.py
+++ b/tests/api/test_stream_chat_api.py
@@ -91,7 +91,7 @@ def test_knowledge_chat(api="/chat/knowledge_base_chat"):
     print("=" * 30 + api + "  output" + "="*30)
     for line in response.iter_content(None, decode_unicode=True):
         data = json.loads(line)
-        if "anser" in data:
+        if "answer" in data:
             print(data["answer"], end="", flush=True)
     assert "docs" in data and len(data["docs"]) > 0
     pprint(data["docs"])
diff --git a/webui_pages/dialogue/dialogue.py b/webui_pages/dialogue/dialogue.py
index 5286e75..05c2a4f 100644
--- a/webui_pages/dialogue/dialogue.py
+++ b/webui_pages/dialogue/dialogue.py
@@ -16,30 +16,24 @@ chat_box = ChatBox(
 )
 
 
-def get_messages_history(history_len: int) -> List[Dict]:
+def get_messages_history(history_len: int, content_in_expander: bool = False) -> List[Dict]:
+    '''
+    返回消息历史。
+    content_in_expander控制是否返回expander元素中的内容，一般导出的时候可以选上，传入LLM的history不需要
+    '''
+
     def filter(msg):
-        '''
-        针对当前简单文本对话，只返回每条消息的第一个element的内容
-        '''
-        content = [x._content for x in msg["elements"] if x._output_method in ["markdown", "text"]]
+        content = [x for x in msg["elements"] if x._output_method in ["markdown", "text"]]
+        if not content_in_expander:
+            content = [x for x in content if not x._in_expander]
+        content = [x.content for x in content]
+
         return {
             "role": msg["role"],
-            "content": content[0] if content else "",
+            "content": "\n\n".join(content),
         }
 
-    # workaround before upgrading streamlit-chatbox.
-    def stop(h):
-        return False
-
-    history = chat_box.filter_history(history_len=100000, filter=filter, stop=stop)
-    user_count = 0
-    i = 1
-    for i in range(1, len(history) + 1):
-        if history[-i]["role"] == "user":
-            user_count += 1
-            if user_count >= history_len:
-                break
-    return history[-i:]
+    return chat_box.filter_history(history_len=history_len, filter=filter)
 
 
 def dialogue_page(api: ApiRequest):
@@ -185,7 +179,7 @@ def dialogue_page(api: ApiRequest):
         elif dialogue_mode == "知识库问答":
             chat_box.ai_say([
                 f"正在查询知识库 `{selected_kb}` ...",
-                Markdown("...", in_expander=True, title="知识库匹配结果"),
+                Markdown("...", in_expander=True, title="知识库匹配结果", state="complete"),
             ])
             text = ""
             for d in api.knowledge_base_chat(prompt,
@@ -205,7 +199,7 @@ def dialogue_page(api: ApiRequest):
         elif dialogue_mode == "搜索引擎问答":
             chat_box.ai_say([
                 f"正在执行 `{search_engine}` 搜索...",
-                Markdown("...", in_expander=True, title="网络搜索结果"),
+                Markdown("...", in_expander=True, title="网络搜索结果", state="complete"),
             ])
             text = ""
             for d in api.search_engine_chat(prompt,