diff --git a/README.md b/README.md index 972da40..39c230f 100644 --- a/README.md +++ b/README.md @@ -243,7 +243,7 @@ Web UI 可以实现如下功能: - [x] 选择知识库开始问答 - [x] 上传文件/文件夹至知识库 - [x] 知识库测试 - - [ ] 删除知识库中文件 + - [x] 删除知识库中文件 - [x] 支持搜索引擎问答 - [ ] 增加 API 支持 - [x] 利用 fastapi 实现 API 部署方式 @@ -251,7 +251,7 @@ Web UI 可以实现如下功能: - [x] VUE 前端 ## 项目交流群 -二维码 +二维码 🎉 langchain-ChatGLM 项目微信交流群,如果你也对本项目感兴趣,欢迎加入群聊参与讨论交流。 diff --git a/configs/model_config.py b/configs/model_config.py index 0ead812..33176be 100644 --- a/configs/model_config.py +++ b/configs/model_config.py @@ -64,7 +64,18 @@ llm_model_dict = { "local_model_path": None, "provides": "ChatGLM" }, - + "chatglm2-6b-int4": { + "name": "chatglm2-6b-int4", + "pretrained_model_name": "THUDM/chatglm2-6b-int4", + "local_model_path": None, + "provides": "ChatGLM" + }, + "chatglm2-6b-int8": { + "name": "chatglm2-6b-int8", + "pretrained_model_name": "THUDM/chatglm2-6b-int8", + "local_model_path": None, + "provides": "ChatGLM" + }, "chatyuan": { "name": "chatyuan", "pretrained_model_name": "ClueAI/ChatYuan-large-v2", @@ -192,7 +203,7 @@ PROMPT_TEMPLATE = """已知信息: 根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}""" -# 缓存知识库数量 +# 缓存知识库数量,如果是ChatGLM2,ChatGLM2-int4,ChatGLM2-int8模型若检索效果不好可以调成’10’ CACHED_VS_NUM = 1 # 文本分句长度 diff --git a/img/qr_code_39.jpg b/img/qr_code_39.jpg deleted file mode 100644 index e866af0..0000000 Binary files a/img/qr_code_39.jpg and /dev/null differ diff --git a/img/qr_code_42.jpg b/img/qr_code_42.jpg new file mode 100644 index 0000000..146b873 Binary files /dev/null and b/img/qr_code_42.jpg differ diff --git a/loader/image_loader.py b/loader/image_loader.py index ec32459..4ac4c51 100644 --- a/loader/image_loader.py +++ b/loader/image_loader.py @@ -5,9 +5,6 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader from paddleocr import PaddleOCR import os import nltk -from configs.model_config import NLTK_DATA_PATH - -nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path class UnstructuredPaddleImageLoader(UnstructuredFileLoader): """Loader that uses unstructured to load image files, such as PNGs and JPGs.""" @@ -35,6 +32,10 @@ class UnstructuredPaddleImageLoader(UnstructuredFileLoader): if __name__ == "__main__": import sys sys.path.append(os.path.dirname(os.path.dirname(__file__))) + + from configs.model_config import NLTK_DATA_PATH + nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path + filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base", "samples", "content", "test.jpg") loader = UnstructuredPaddleImageLoader(filepath, mode="elements") docs = loader.load() diff --git a/webui.py b/webui.py index 0a96e4c..69962d2 100644 --- a/webui.py +++ b/webui.py @@ -218,7 +218,12 @@ def change_chunk_conent(mode, label_conent, history): def add_vs_name(vs_name, chatbot): - if vs_name in get_vs_list(): + if vs_name is None or vs_name.strip() == "" : + vs_status = "知识库名称不能为空,请重新填写知识库名称" + chatbot = chatbot + [[None, vs_status]] + return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update( + visible=False), chatbot, gr.update(visible=False) + elif vs_name in get_vs_list(): vs_status = "与已有知识库名称冲突,请重新选择其他名称后提交" chatbot = chatbot + [[None, vs_status]] return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(