diff --git a/README.md b/README.md index 8bcedb2..972da40 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,10 @@ 🚩 本项目未涉及微调、训练过程,但可利用微调或训练对本项目效果进行优化。 +🐳 Docker镜像:registry.cn-beijing.aliyuncs.com/isafetech/chatmydata:1.0 (感谢 @InkSong🌲 ) + +💻 运行方式:docker run -d -p 80:7860 --gpus all registry.cn-beijing.aliyuncs.com/isafetech/chatmydata:1.0  + 🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/langchain-ChatGLM) 📓 [ModelWhale 在线运行项目](https://www.heywhale.com/mw/project/643977aa446c45f4592a1e59) @@ -60,6 +64,23 @@ 本项目中默认选用的 Embedding 模型 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) 约占用显存 3GB,也可修改为在 CPU 中运行。 +## Docker 整合包 +🐳 Docker镜像地址:`registry.cn-beijing.aliyuncs.com/isafetech/chatmydata:1.0 `🌲 + +💻 一行命令运行: +```shell +docker run -d -p 80:7860 --gpus all registry.cn-beijing.aliyuncs.com/isafetech/chatmydata:1.0 +``` + +- 该版本镜像大小`25.2G`,使用[v0.1.16](https://github.com/imClumsyPanda/langchain-ChatGLM/releases/tag/v0.1.16),以`nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04`为基础镜像 +- 该版本内置两个`embedding`模型:`m3e-base`,`text2vec-large-chinese`,内置`fastchat+chatglm-6b` +- 该版本目标为方便一键部署使用,请确保您已经在Linux发行版上安装了NVIDIA驱动程序 +- 请注意,您不需要在主机系统上安装CUDA工具包,但需要安装`NVIDIA Driver`以及`NVIDIA Container Toolkit`,请参考[安装指南](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +- 首次拉取和启动均需要一定时间,首次启动时请参照下图使用`docker logs -f `查看日志 +- 如遇到启动过程卡在`Waiting..`步骤,建议使用`docker exec -it bash`进入`/logs/`目录查看对应阶段日志 +![](img/docker_logs.png) + + ## Docker 部署 为了能让容器使用主机GPU资源,需要在主机上安装 [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-container-toolkit)。具体安装步骤如下: ```shell @@ -230,7 +251,7 @@ Web UI 可以实现如下功能: - [x] VUE 前端 ## 项目交流群 -二维码 +二维码 🎉 langchain-ChatGLM 项目微信交流群,如果你也对本项目感兴趣,欢迎加入群聊参与讨论交流。 diff --git a/api.py b/api.py index 4965cbf..cf947b0 100644 --- a/api.py +++ b/api.py @@ -4,7 +4,7 @@ import os import shutil from typing import List, Optional import urllib - +import asyncio import nltk import pydantic import uvicorn @@ -382,6 +382,7 @@ async def stream_chat(websocket: WebSocket): for resp, history in local_doc_qa.get_knowledge_based_answer( query=question, vs_path=vs_path, chat_history=history, streaming=True ): + await asyncio.sleep(0) await websocket.send_text(resp["result"][last_print_len:]) last_print_len = len(resp["result"]) diff --git a/img/docker_logs.png b/img/docker_logs.png new file mode 100644 index 0000000..0382958 Binary files /dev/null and b/img/docker_logs.png differ diff --git a/img/qr_code_39.jpg b/img/qr_code_39.jpg new file mode 100644 index 0000000..e866af0 Binary files /dev/null and b/img/qr_code_39.jpg differ diff --git a/models/loader/loader.py b/models/loader/loader.py index f315e6c..0c32835 100644 --- a/models/loader/loader.py +++ b/models/loader/loader.py @@ -257,10 +257,21 @@ class LoaderCheckPoint: # 在调用chat或者stream_chat时,input_ids会被放到model.device上 # 如果transformer.word_embeddings.device和model.device不同,则会导致RuntimeError # 因此这里将transformer.word_embeddings,transformer.final_layernorm,lm_head都放到第一张卡上 - device_map = {f'{layer_prefix}.word_embeddings': 0, + + encode = "" + if 'chatglm2' in self.model_name: + device_map = { + f"{layer_prefix}.embedding.word_embeddings": 0, + f"{layer_prefix}.rotary_pos_emb": 0, + f"{layer_prefix}.output_layer": 0, + f"{layer_prefix}.encoder.final_layernorm": 0, + f"base_model.model.output_layer": 0 + } + encode = ".encoder" + else: + device_map = {f'{layer_prefix}.word_embeddings': 0, f'{layer_prefix}.final_layernorm': 0, 'lm_head': 0, f'base_model.model.lm_head': 0, } - used = 2 gpu_target = 0 for i in range(num_trans_layers): @@ -268,7 +279,7 @@ class LoaderCheckPoint: gpu_target += 1 used = 0 assert gpu_target < num_gpus - device_map[f'{layer_prefix}.layers.{i}'] = gpu_target + device_map[f'{layer_prefix}{encode}.layers.{i}'] = gpu_target used += 1 return device_map diff --git a/webui_st.py b/webui_st.py index 6d1265e..bd12a3a 100644 --- a/webui_st.py +++ b/webui_st.py @@ -143,7 +143,7 @@ def init_model(llm_model: str = 'chat-glm-6b', embedding_model: str = 'text2vec' # return history + [[None, model_status]] -def get_vector_store(vs_id, files, sentence_size, history, one_conent, one_content_segmentation): +def get_vector_store(local_doc_qa, vs_id, files, sentence_size, history, one_conent, one_content_segmentation): vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store") filelist = [] if not os.path.exists(os.path.join(KB_ROOT_PATH, vs_id, "content")): @@ -455,6 +455,8 @@ with st.sidebar: cols = st.columns([12, 10]) kb_name = cols[0].text_input( '新知识库名称', placeholder='新知识库名称', label_visibility='collapsed') + if 'kb_name' not in st.session_state: + st.session_state.kb_name = kb_name cols[1].button('新建知识库', on_click=on_new_kb) vs_path = st.selectbox( '选择知识库', vs_list, on_change=on_vs_change, key='vs_path')