From 1551adb867592de3ce7183f11bbb0f45003ffabb Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Wed, 16 Aug 2023 10:04:17 +0800 Subject: [PATCH 1/2] fix webui: use kb_names as selected_kb's options to avoid index error after delete_doc --- webui_pages/knowledge_base/knowledge_base.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/webui_pages/knowledge_base/knowledge_base.py b/webui_pages/knowledge_base/knowledge_base.py index 47eb24c..f059475 100644 --- a/webui_pages/knowledge_base/knowledge_base.py +++ b/webui_pages/knowledge_base/knowledge_base.py @@ -49,21 +49,27 @@ def file_exists(kb: str, selected_rows: List) -> Tuple[str, str]: def knowledge_base_page(api: ApiRequest): try: - kb_list = get_kb_details() + kb_list = {x["kb_name"]: x for x in get_kb_details()} except Exception as e: st.error("获取知识库信息错误,请检查是否已按照 `README.md` 中 `4 知识库初始化与迁移` 步骤完成初始化或迁移,或是否为数据库连接错误。") st.stop() - kb_names = [x["kb_name"] for x in kb_list] + kb_names = list(kb_list.keys()) if "selected_kb_name" in st.session_state and st.session_state["selected_kb_name"] in kb_names: selected_kb_index = kb_names.index(st.session_state["selected_kb_name"]) else: selected_kb_index = 0 + def format_selected_kb(kb_name: str) -> str: + if kb := kb_list.get(kb_name): + return f"{kb_name} ({kb['vs_type']} @ {kb['embed_model']})" + else: + return kb_name + selected_kb = st.selectbox( "请选择或新建知识库:", - kb_list + ["新建知识库"], - format_func=lambda s: f"{s['kb_name']} ({s['vs_type']} @ {s['embed_model']})" if type(s) != str else s, + kb_names + ["新建知识库"], + format_func=format_selected_kb, index=selected_kb_index ) @@ -117,7 +123,7 @@ def knowledge_base_page(api: ApiRequest): st.experimental_rerun() elif selected_kb: - kb = selected_kb["kb_name"] + kb = selected_kb # 上传文件 # sentence_size = st.slider("文本入库分句长度限制", 1, 1000, SENTENCE_SIZE, disabled=True) From a47240e87149434a834caac514f2831a066e91e2 Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Wed, 16 Aug 2023 10:14:47 +0800 Subject: [PATCH 2/2] fix KnowledgeFile: set text_splitter_name in database to SpacyTextSplitter if it is not set --- server/db/repository/knowledge_file_repository.py | 2 +- server/knowledge_base/utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/server/db/repository/knowledge_file_repository.py b/server/db/repository/knowledge_file_repository.py index f5e912f..404910f 100644 --- a/server/db/repository/knowledge_file_repository.py +++ b/server/db/repository/knowledge_file_repository.py @@ -27,7 +27,7 @@ def add_doc_to_db(session, kb_file: KnowledgeFile): file_ext=kb_file.ext, kb_name=kb_file.kb_name, document_loader_name=kb_file.document_loader_name, - text_splitter_name=kb_file.text_splitter_name, + text_splitter_name=kb_file.text_splitter_name or "SpacyTextSplitter", ) kb.file_count += 1 session.add(new_file) diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index 3e8be26..3ab6560 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -102,6 +102,7 @@ class KnowledgeFile: chunk_size=CHUNK_SIZE, chunk_overlap=OVERLAP_SIZE, ) + self.text_splitter_name = "SpacyTextSplitter" else: text_splitter_module = importlib.import_module('langchain.text_splitter') TextSplitter = getattr(text_splitter_module, self.text_splitter_name)