diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..79c029e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +FROM python:3.8 + +MAINTAINER "chatGLM" + +COPY agent /chatGLM/agent + +COPY chains /chatGLM/chains + +COPY configs /chatGLM/configs + +COPY content /chatGLM/content + +COPY models /chatGLM/models + +COPY nltk_data /chatGLM/content + +COPY requirements.txt /chatGLM/ + +COPY cli_demo.py /chatGLM/ + +COPY webui.py /chatGLM/ + +WORKDIR /chatGLM + +RUN pip install --user torch torchvision tensorboard cython -i https://pypi.tuna.tsinghua.edu.cn/simple +# RUN pip install --user 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' + +# RUN pip install --user 'git+https://github.com/facebookresearch/fvcore' +# install detectron2 +# RUN git clone https://github.com/facebookresearch/detectron2 + +RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/ --trusted-host pypi.tuna.tsinghua.edu.cn + +CMD ["python","-u", "webui.py"] diff --git a/README.md b/README.md index c592cec..f521e5c 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,11 @@ 🌍 [_READ THIS IN ENGLISH_](README_en.md) -🤖️ 一种利用 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) + [langchain](https://github.com/hwchase17/langchain) 实现的基于本地知识的 ChatGLM 应用。 +🤖️ 一种利用 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) + [langchain](https://github.com/hwchase17/langchain) 实现的基于本地知识的 ChatGLM 应用。增加 [clue-ai/ChatYuan](https://github.com/clue-ai/ChatYuan) 项目的模型 [ClueAI/ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2) 的支持。 💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发,建立了全部基于开源模型实现的本地知识问答应用。 -✅ 本项目中 Embedding 选用的是 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main),LLM 选用的是 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B)。依托上述模型,本项目可实现全部使用**开源**模型**离线私有部署**。 +✅ 本项目中 Embedding 默认选用的是 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main),LLM 默认选用的是 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B)。依托上述模型,本项目可实现全部使用**开源**模型**离线私有部署**。 ⛓️ 本项目实现原理如下图所示,过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的`top k`个 -> 匹配出的文本作为上下文和问题一起添加到`prompt`中 -> 提交给`LLM`生成回答。 @@ -22,9 +22,7 @@ 参见 [变更日志](docs/CHANGELOG.md)。 -## 使用方式 - -### 硬件需求 +## 硬件需求 - ChatGLM-6B 模型硬件需求 @@ -38,9 +36,19 @@ 本项目中默认选用的 Embedding 模型 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) 约占用显存 3GB,也可修改为在 CPU 中运行。 +## Docker 部署 + +```commandline +$ docker build -t chatglm:v1.0 . + +$ docker run -d --restart=always --name chatglm -p 7860:7860 -v /www/wwwroot/code/langchain-ChatGLM:/chatGLM chatglm +``` + +## 开发部署 + ### 软件需求 -本项目已在 Python 3.8,CUDA 11.7 环境下完成测试。 +本项目已在 Python 3.8 - 3.10,CUDA 11.7 环境下完成测试。已在 Windows、ARM 架构的 macOS、Linux 系统中完成测试。 ### 从本地加载模型 @@ -123,6 +131,7 @@ Web UI 可以实现如下功能: - [x] THUDM/chatglm-6b - [x] THUDM/chatglm-6b-int4 - [x] THUDM/chatglm-6b-int4-qe + - [x] ClueAI/ChatYuan-large-v2 - [ ] Web UI - [x] 利用 gradio 实现 Web UI DEMO - [x] 添加输出内容及错误提示 diff --git a/api.py b/api.py index ae26c51..7f95be8 100644 --- a/api.py +++ b/api.py @@ -42,7 +42,7 @@ async def get_local_doc_qa(): @app.post("/file") -async def upload_file(UserFile: UploadFile=File(...)): +async def upload_file(UserFile: UploadFile=File(...),): global vs_path response = { "msg": None, @@ -67,7 +67,7 @@ async def upload_file(UserFile: UploadFile=File(...)): return response @app.post("/qa") -async def get_answer(UserQuery: Query): +async def get_answer(query: str = ""): response = { "status": 0, "message": "", @@ -76,7 +76,7 @@ async def get_answer(UserQuery: Query): global vs_path history = [] try: - resp, history = local_doc_qa.get_knowledge_based_answer(query=UserQuery.query, + resp, history = local_doc_qa.get_knowledge_based_answer(query=query, vs_path=vs_path, chat_history=history) if REPLY_WITH_SOURCE: @@ -95,9 +95,9 @@ async def get_answer(UserQuery: Query): if __name__ == "__main__": uvicorn.run( - app='api:app', + app=app, host='0.0.0.0', port=8100, - reload = True, + reload=True, ) diff --git a/chains/local_doc_qa.py b/chains/local_doc_qa.py index a15be3d..2640760 100644 --- a/chains/local_doc_qa.py +++ b/chains/local_doc_qa.py @@ -33,6 +33,7 @@ def load_file(filepath): class LocalDocQA: llm: object = None embeddings: object = None + top_k: int = VECTOR_SEARCH_TOP_K def init_cfg(self, embedding_model: str = EMBEDDING_MODEL, @@ -49,9 +50,10 @@ class LocalDocQA: use_ptuning_v2=use_ptuning_v2) self.llm.history_len = llm_history_len - self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[embedding_model], ) - self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name, - device=embedding_device) + self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[embedding_model], + model_kwargs={'device': embedding_device}) + # self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name, + # device=embedding_device) self.top_k = top_k def init_knowledge_vector_store(self, @@ -133,7 +135,7 @@ class LocalDocQA: ) knowledge_chain.return_source_documents = True - + result = knowledge_chain({"query": query}) self.llm.history[-1][0] = query return result, self.llm.history diff --git a/configs/model_config.py b/configs/model_config.py index 79baa2e..9d66d37 100644 --- a/configs/model_config.py +++ b/configs/model_config.py @@ -19,6 +19,7 @@ llm_model_dict = { "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe", "chatglm-6b-int4": "THUDM/chatglm-6b-int4", "chatglm-6b": "THUDM/chatglm-6b", + "chatyuan": "ClueAI/ChatYuan-large-v2", } # LLM model name diff --git a/docs/FAQ.md b/docs/FAQ.md index ec674e8..a0d5d54 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -95,7 +95,7 @@ Q9: 下载完模型后,如何修改代码以执行本地模型? A9: 模型下载完成后,请在 [configs/model_config.py](../configs/model_config.py) 文件中,对`embedding_model_dict`和`llm_model_dict`参数进行修改,如把`llm_model_dict`从 -```json +```python embedding_model_dict = { "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", "ernie-base": "nghuyong/ernie-3.0-base-zh", @@ -105,7 +105,7 @@ embedding_model_dict = { 修改为 -```json +```python embedding_model_dict = { "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", "ernie-base": "nghuyong/ernie-3.0-base-zh", diff --git a/models/chatglm_llm.py b/models/chatglm_llm.py index c3d1a21..c951b78 100644 --- a/models/chatglm_llm.py +++ b/models/chatglm_llm.py @@ -72,14 +72,27 @@ class ChatGLM(LLM): response, _ = self.model.chat( self.tokenizer, prompt, - history=self.history[-self.history_len:] if self.history_len>0 else [], + history=self.history[-self.history_len:] if self.history_len > 0 else [], max_length=self.max_token, temperature=self.temperature, ) torch_gc() if stop is not None: response = enforce_stop_tokens(response, stop) - self.history = self.history+[[None, response]] + self.history = self.history + [[None, response]] + return response + + def chat(self, + prompt: str) -> str: + response, _ = self.model.chat( + self.tokenizer, + prompt, + history=self.history[-self.history_len:] if self.history_len > 0 else [], + max_length=self.max_token, + temperature=self.temperature, + ) + torch_gc() + self.history = self.history + [[None, response]] return response def load_model(self, @@ -113,7 +126,7 @@ class ChatGLM(LLM): AutoModel.from_pretrained( model_name_or_path, config=model_config, - trust_remote_code=True, + trust_remote_code=True, **kwargs) .half() .cuda() @@ -146,7 +159,8 @@ class ChatGLM(LLM): new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v self.model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict) self.model.transformer.prefix_encoder.float() - except Exception: + except Exception as e: + print(e) print("加载PrefixEncoder模型参数失败") self.model = self.model.eval() diff --git a/requirements.txt b/requirements.txt index f15f07e..dd11f9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -langchain>=0.0.124 +langchain>=0.0.146 transformers==4.27.1 unstructured[local-inference] layoutparser[layoutmodels,tesseract] @@ -9,4 +9,4 @@ icetk cpm_kernels faiss-cpu gradio>=3.25.0 -detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2 \ No newline at end of file +#detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2 \ No newline at end of file diff --git a/webui.py b/webui.py index bf613c9..04ed490 100644 --- a/webui.py +++ b/webui.py @@ -17,10 +17,10 @@ LLM_HISTORY_LEN = 3 def get_vs_list(): if not os.path.exists(VS_ROOT_PATH): return [] - return ["新建知识库"] + os.listdir(VS_ROOT_PATH) + return os.listdir(VS_ROOT_PATH) -vs_list = get_vs_list() +vs_list = ["新建知识库"] + get_vs_list() embedding_model_dict_list = list(embedding_model_dict.keys())