From d9056a8df6d51736b36f2d6dd6f068bbae805d7c Mon Sep 17 00:00:00 2001 From: zR <2448370773@qq.com> Date: Fri, 29 Sep 2023 16:04:44 +0800 Subject: [PATCH] =?UTF-8?q?python3.8=E7=94=A8=E6=88=B7=E9=9C=80=E8=A6=81?= =?UTF-8?q?=E5=8A=A0=E4=B8=8A=5F=5Ffuture=5F=5F=20(#1624)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 增加了仅限GPT4的agent功能,陆续补充,中文版readme已写 * issue提到的一个bug * 温度最小改成0,但是不应该支持负数 * 修改了最小的温度 * 增加了部分Agent支持和修改了启动文件的部分bug * 修改了GPU数量配置文件 * 1 1 * 修复配置文件错误 * 更新readme,稳定测试 * 更新readme * python3.8用户需要加这两行 --- README.md | 2 +- README_en.md | 3 ++- configs/model_config.py.example | 1 + server/agent/callbacks.py | 1 + server/agent/custom_template.py | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c8e065e..258069a 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/ch + chatglm2-6b & LLaMA-7B 最低显存要求: 7GB 推荐显卡: RTX 3060, RTX 2060 + LLaMA-13B 最低显存要求: 11GB 推荐显卡: RTX 2060 12GB, RTX3060 12GB, RTX3080, RTXA2000 + Qwen-14B-Chat 最低显存要求: 13GB 推荐显卡: RTX 3090 -+ LLaMA-30B 最低显存要求: 22GB 推荐显卡:RTX A5000,RTX 3090,RTX 4090,RTX 6000,Tesla V100,RTX Tesla P40 ++ LLaMA-30B 最低显存要求: 22GB 推荐显卡:RTX A5000,RTX 3090,RTX 4090,RTX 6000,Tesla V100,RTX Tesla P40 + LLaMA-65B 最低显存要求: 40GB 推荐显卡:A100,A40,A6000 如果是int8 则显存x1.5 fp16 x2.5的要求 diff --git a/README_en.md b/README_en.md index c7771ff..8a20916 100644 --- a/README_en.md +++ b/README_en.md @@ -67,9 +67,10 @@ If you want to run the native model (int4 version) on the GPU without problems, + chatglm2-6b & LLaMA-7B Minimum RAM requirement: 7GB Recommended graphics cards: RTX 3060, RTX 2060 + LLaMA-13B Minimum graphics memory requirement: 11GB Recommended cards: RTX 2060 12GB, RTX3060 12GB, RTX3080, RTXA2000 + Qwen-14B-Chat Minimum memory requirement: 13GB Recommended graphics card: RTX 3090 -+ LLaMA-30B Minimum Memory Requirement: 22GB Recommended Cards: RTX A5000,RTX 3090,RTX 4090,RTX 6000,Tesla V100,RTX Tesla P40 ++ LLaMA-30B Minimum Memory Requirement: 22GB Recommended Cards: RTX A5000,RTX 3090,RTX 4090,RTX 6000,Tesla V100,RTX Tesla P40 + Minimum memory requirement for LLaMA-65B: 40GB Recommended cards: A100,A40,A6000 + If int8 then memory x1.5 fp16 x2.5 requirement. For example: using fp16 to reason about the Qwen-7B-Chat model requires 16GB of video memory. diff --git a/configs/model_config.py.example b/configs/model_config.py.example index 464e01b..59a1f7e 100644 --- a/configs/model_config.py.example +++ b/configs/model_config.py.example @@ -92,6 +92,7 @@ MODEL_PATH = { # 选用的 Embedding 名称 EMBEDDING_MODEL = "m3e-base" # 可以尝试最新的嵌入式sota模型:piccolo-large-zh + # Embedding 模型运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。 EMBEDDING_DEVICE = "auto" diff --git a/server/agent/callbacks.py b/server/agent/callbacks.py index 394d227..3901f7e 100644 --- a/server/agent/callbacks.py +++ b/server/agent/callbacks.py @@ -1,3 +1,4 @@ +from __future__ import annotations from uuid import UUID from langchain.callbacks import AsyncIteratorCallbackHandler import json diff --git a/server/agent/custom_template.py b/server/agent/custom_template.py index 25697a6..aa4aa11 100644 --- a/server/agent/custom_template.py +++ b/server/agent/custom_template.py @@ -1,3 +1,4 @@ +from __future__ import annotations from langchain.agents import Tool, AgentOutputParser from langchain.prompts import StringPromptTemplate from typing import List, Union