From b3c7f8b07240fe3cb2774200aad93058d9b93435 Mon Sep 17 00:00:00 2001 From: liunux4odoo <41217877+liunux4odoo@users.noreply.github.com> Date: Thu, 28 Sep 2023 15:12:03 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dwebui=E4=B8=AD=E9=87=8D?= =?UTF-8?q?=E5=BB=BA=E7=9F=A5=E8=AF=86=E5=BA=93=E4=BB=A5=E5=8F=8A=E5=AF=B9?= =?UTF-8?q?=E8=AF=9D=E7=95=8C=E9=9D=A2UI=E9=94=99=E8=AF=AF=20(#1615)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 修复bug:webui点重建知识库时,如果存在不支持的文件会导致整个接口错误;migrate中没有导入CHUNK_SIZE * 修复:webui对话界面的expander一直为running状态;简化历史消息获取方法 --- server/knowledge_base/migrate.py | 1 + server/knowledge_base/utils.py | 29 ++++++++++++++----------- tests/api/test_stream_chat_api.py | 2 +- webui_pages/dialogue/dialogue.py | 36 +++++++++++++------------------ 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/server/knowledge_base/migrate.py b/server/knowledge_base/migrate.py index 53abecf..2ca144a 100644 --- a/server/knowledge_base/migrate.py +++ b/server/knowledge_base/migrate.py @@ -1,4 +1,5 @@ from configs import (EMBEDDING_MODEL, DEFAULT_VS_TYPE, ZH_TITLE_ENHANCE, + CHUNK_SIZE, OVERLAP_SIZE, logger, log_verbose) from server.knowledge_base.utils import (get_file_path, list_kbs_from_folder, list_files_from_folder,files2docs_in_thread, diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index aa286ca..02212c8 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -373,18 +373,23 @@ def files2docs_in_thread( kwargs_list = [] for i, file in enumerate(files): kwargs = {} - if isinstance(file, tuple) and len(file) >= 2: - file = KnowledgeFile(filename=file[0], knowledge_base_name=file[1]) - elif isinstance(file, dict): - filename = file.pop("filename") - kb_name = file.pop("kb_name") - kwargs = file - file = KnowledgeFile(filename=filename, knowledge_base_name=kb_name) - kwargs["file"] = file - kwargs["chunk_size"] = chunk_size - kwargs["chunk_overlap"] = chunk_overlap - kwargs["zh_title_enhance"] = zh_title_enhance - kwargs_list.append(kwargs) + try: + if isinstance(file, tuple) and len(file) >= 2: + filename=file[0] + kb_name=file[1] + file = KnowledgeFile(filename=filename, knowledge_base_name=kb_name) + elif isinstance(file, dict): + filename = file.pop("filename") + kb_name = file.pop("kb_name") + kwargs.update(file) + file = KnowledgeFile(filename=filename, knowledge_base_name=kb_name) + kwargs["file"] = file + kwargs["chunk_size"] = chunk_size + kwargs["chunk_overlap"] = chunk_overlap + kwargs["zh_title_enhance"] = zh_title_enhance + kwargs_list.append(kwargs) + except Exception as e: + yield False, (kb_name, filename, str(e)) for result in run_in_thread_pool(func=file2docs, params=kwargs_list, pool=pool): yield result diff --git a/tests/api/test_stream_chat_api.py b/tests/api/test_stream_chat_api.py index 47547f7..8b98c20 100644 --- a/tests/api/test_stream_chat_api.py +++ b/tests/api/test_stream_chat_api.py @@ -91,7 +91,7 @@ def test_knowledge_chat(api="/chat/knowledge_base_chat"): print("=" * 30 + api + " output" + "="*30) for line in response.iter_content(None, decode_unicode=True): data = json.loads(line) - if "anser" in data: + if "answer" in data: print(data["answer"], end="", flush=True) assert "docs" in data and len(data["docs"]) > 0 pprint(data["docs"]) diff --git a/webui_pages/dialogue/dialogue.py b/webui_pages/dialogue/dialogue.py index 5286e75..05c2a4f 100644 --- a/webui_pages/dialogue/dialogue.py +++ b/webui_pages/dialogue/dialogue.py @@ -16,30 +16,24 @@ chat_box = ChatBox( ) -def get_messages_history(history_len: int) -> List[Dict]: +def get_messages_history(history_len: int, content_in_expander: bool = False) -> List[Dict]: + ''' + 返回消息历史。 + content_in_expander控制是否返回expander元素中的内容,一般导出的时候可以选上,传入LLM的history不需要 + ''' + def filter(msg): - ''' - 针对当前简单文本对话,只返回每条消息的第一个element的内容 - ''' - content = [x._content for x in msg["elements"] if x._output_method in ["markdown", "text"]] + content = [x for x in msg["elements"] if x._output_method in ["markdown", "text"]] + if not content_in_expander: + content = [x for x in content if not x._in_expander] + content = [x.content for x in content] + return { "role": msg["role"], - "content": content[0] if content else "", + "content": "\n\n".join(content), } - # workaround before upgrading streamlit-chatbox. - def stop(h): - return False - - history = chat_box.filter_history(history_len=100000, filter=filter, stop=stop) - user_count = 0 - i = 1 - for i in range(1, len(history) + 1): - if history[-i]["role"] == "user": - user_count += 1 - if user_count >= history_len: - break - return history[-i:] + return chat_box.filter_history(history_len=history_len, filter=filter) def dialogue_page(api: ApiRequest): @@ -185,7 +179,7 @@ def dialogue_page(api: ApiRequest): elif dialogue_mode == "知识库问答": chat_box.ai_say([ f"正在查询知识库 `{selected_kb}` ...", - Markdown("...", in_expander=True, title="知识库匹配结果"), + Markdown("...", in_expander=True, title="知识库匹配结果", state="complete"), ]) text = "" for d in api.knowledge_base_chat(prompt, @@ -205,7 +199,7 @@ def dialogue_page(api: ApiRequest): elif dialogue_mode == "搜索引擎问答": chat_box.ai_say([ f"正在执行 `{search_engine}` 搜索...", - Markdown("...", in_expander=True, title="网络搜索结果"), + Markdown("...", in_expander=True, title="网络搜索结果", state="complete"), ]) text = "" for d in api.search_engine_chat(prompt,