diff --git a/knowledge_base/samples/content/README.md b/knowledge_base/samples/content/README.md index 73bce4c..6bc715f 100644 --- a/knowledge_base/samples/content/README.md +++ b/knowledge_base/samples/content/README.md @@ -45,6 +45,8 @@ 参见 [版本更新日志](https://github.com/imClumsyPanda/langchain-ChatGLM/releases)。 +从`0.1.x`升级过来的用户请注意,在完成[“开发部署 3 设置配置项”](docs/INSTALL.md)之后,需要将现有知识库迁移到新格式,具体见[知识库初始化与迁移](docs/INSTALL.md#知识库初始化与迁移)。 + ## 模型支持 本项目中默认使用的 LLM 模型为 [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b),默认使用的 Embedding 模型为 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例。 diff --git a/knowledge_base/samples/vector_store/index.faiss b/knowledge_base/samples/vector_store/index.faiss index df2af8e..480036d 100644 Binary files a/knowledge_base/samples/vector_store/index.faiss and b/knowledge_base/samples/vector_store/index.faiss differ diff --git a/knowledge_base/samples/vector_store/index.pkl b/knowledge_base/samples/vector_store/index.pkl index 04b0446..ef82196 100644 Binary files a/knowledge_base/samples/vector_store/index.pkl and b/knowledge_base/samples/vector_store/index.pkl differ diff --git a/webui.py b/webui.py index c742437..7aad225 100644 --- a/webui.py +++ b/webui.py @@ -19,7 +19,6 @@ if __name__ == "__main__": f"欢迎使用 [`Langchain-Chatglm`](https://github.com/chatchat-space/langchain-chatglm) ! \n\n" f"当前使用模型`{LLM_MODEL}`, 您可以开始提问了." ) - st.toast(" ") pages = {"对话": {"icon": "chat", "func": dialogue_page, diff --git a/webui_pages/knowledge_base/knowledge_base.py b/webui_pages/knowledge_base/knowledge_base.py index 1faa312..8c4eacd 100644 --- a/webui_pages/knowledge_base/knowledge_base.py +++ b/webui_pages/knowledge_base/knowledge_base.py @@ -1,6 +1,6 @@ import streamlit as st from webui_pages.utils import * -from st_aggrid import AgGrid +from st_aggrid import AgGrid, JsCode from st_aggrid.grid_options_builder import GridOptionsBuilder import pandas as pd from server.knowledge_base.utils import get_file_path, LOADER_DICT @@ -10,6 +10,8 @@ from configs.model_config import embedding_model_dict, kbs_config, EMBEDDING_MOD # SENTENCE_SIZE = 100 +cell_renderer = JsCode("""function(params) {if(params.value==true){return '✓'}else{return '×'}}""") + def config_aggrid( df: pd.DataFrame, @@ -33,10 +35,16 @@ def knowledge_base_page(api: ApiRequest): kb_list = get_kb_details() kb_names = [x["kb_name"] for x in kb_list] + if "selected_kb_name" in st.session_state and st.session_state["selected_kb_name"] in kb_names: + selected_kb_index = kb_names.index(st.session_state["selected_kb_name"]) + else: + selected_kb_index = 0 + selected_kb = st.selectbox( - "请选择知识库:", + "请选择或新建知识库:", kb_list + ["新建知识库"], format_func=lambda s: f"{s['kb_name']} ({s['vs_type']} @ {s['embed_model']})" if type(s) != str else s, + index=selected_kb_index ) if selected_kb == "新建知识库": @@ -85,8 +93,8 @@ def knowledge_base_page(api: ApiRequest): embed_model=embed_model, ) st.toast(ret["msg"]) - # st.experimental_rerun() - + st.session_state["selected_kb_name"] = kb_name + st.experimental_rerun() elif selected_kb: kb = selected_kb["kb_name"] @@ -111,7 +119,7 @@ def knowledge_base_page(api: ApiRequest): if ret["code"] == 200: st.toast(ret["msg"], icon="✔") else: - st.toast(ret["msg"], icon="❌") + st.toast(ret["msg"], icon="✖") st.session_state.files = [] st.divider() @@ -123,22 +131,25 @@ def knowledge_base_page(api: ApiRequest): st.info(f"知识库 `{kb}` 中暂无文件") else: st.write(f"知识库 `{kb}` 中已有文件:") + st.info("知识库中包含源文件与向量库,请从下表中选择文件后操作") doc_details.drop(columns=["kb_name"], inplace=True) doc_details = doc_details[[ "No", "file_name", "document_loader", "text_splitter", "in_folder", "in_db", ]] - + # doc_details["in_folder"] = doc_details["in_folder"].replace(True, "✓").replace(False, "×") + # doc_details["in_db"] = doc_details["in_db"].replace(True, "✓").replace(False, "×") gb = config_aggrid( doc_details, { + ("No", "序号"): {}, ("file_name", "文档名称"): {}, # ("file_ext", "文档类型"): {}, # ("file_version", "文档版本"): {}, ("document_loader", "文档加载器"): {}, ("text_splitter", "分词器"): {}, # ("create_time", "创建时间"): {}, - ("in_folder", "源文件"): {}, - ("in_db", "向量库"): {}, + ("in_folder", "源文件"): {"cellRenderer": cell_renderer}, + ("in_db", "向量库"): {"cellRenderer": cell_renderer}, }, "multiple", ) @@ -151,6 +162,7 @@ def knowledge_base_page(api: ApiRequest): custom_css={ "#gridToolBar": {"display": "none"}, }, + allow_unsafe_jscode=True ) selected_rows = doc_grid.get("selected_rows", []) @@ -172,9 +184,10 @@ def knowledge_base_page(api: ApiRequest): disabled=True, use_container_width=True, ) + st.write() # 将文件分词并加载到向量库中 if cols[1].button( - "添加至向量库", # "重新添加至向量库" + "重新添加至向量库" if selected_rows and (pd.DataFrame(selected_rows)["in_db"]).any() else "添加至向量库", disabled=len(selected_rows) == 0, use_container_width=True, ): @@ -193,7 +206,7 @@ def knowledge_base_page(api: ApiRequest): st.experimental_rerun() if cols[3].button( - "删除选中文档!", + "从知识库中删除", type="primary", use_container_width=True, ): @@ -213,17 +226,18 @@ def knowledge_base_page(api: ApiRequest): use_container_width=True, type="primary", ): - empty = st.empty() - empty.progress(0.0, "") - for d in api.recreate_vector_store(kb): - print(d) - empty.progress(d["finished"] / d["total"], f"正在处理: {d['doc']}") - empty.write("重建完毕") + with st.spinner("向量库重构中"): + empty = st.empty() + empty.progress(0.0, "") + for d in api.recreate_vector_store(kb): + print(d) + empty.progress(d["finished"] / d["total"], f"正在处理: {d['doc']}") + st.experimental_rerun() if cols[2].button( "删除知识库", use_container_width=True, ): ret = api.delete_knowledge_base(kb) - st.toast(ret["detail"][0]["msg"]) st.experimental_rerun() + st.toast(ret["msg"])