From 2716ff739deb0e63164c863648cc83a509660ad9 Mon Sep 17 00:00:00 2001 From: hzg0601 Date: Tue, 26 Sep 2023 21:55:57 +0800 Subject: [PATCH] =?UTF-8?q?1.=20=E6=9B=B4=E6=96=B0congif=E4=B8=AD=E5=85=B3?= =?UTF-8?q?=E4=BA=8Evllm=E5=90=8E=E7=AB=AF=E7=9B=B8=E5=85=B3=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=EF=BC=9B2.=20=E6=9B=B4=E6=96=B0requirements=EF=BC=8Cr?= =?UTF-8?q?equirements=5Fapi;?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configs/model_config.py.example | 3 ++- configs/server_config.py.example | 6 +++--- requirements.txt | 2 +- requirements_api.txt | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/configs/model_config.py.example b/configs/model_config.py.example index a4c1dbb..7fdb99e 100644 --- a/configs/model_config.py.example +++ b/configs/model_config.py.example @@ -36,6 +36,7 @@ MODEL_PATH = { }, # TODO: add all supported llm models "llm_model": { + # 以下部分模型并未完全测试,仅根据fastchat和vllm模型的模型列表推定支持 "chatglm-6b": "THUDM/chatglm-6b", "chatglm2-6b": "THUDM/chatglm2-6b", "chatglm2-6b-int4": "THUDM/chatglm2-6b-int4", @@ -93,7 +94,7 @@ EMBEDDING_MODEL = "m3e-base" EMBEDDING_DEVICE = "auto" # LLM 名称 -LLM_MODEL = "baichuan-7b" +LLM_MODEL = "chatglm2-6b" # LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。 LLM_DEVICE = "auto" diff --git a/configs/server_config.py.example b/configs/server_config.py.example index cc3a19b..6197052 100644 --- a/configs/server_config.py.example +++ b/configs/server_config.py.example @@ -38,10 +38,10 @@ FSCHAT_MODEL_WORKERS = { "host": DEFAULT_BIND_HOST, "port": 20002, "device": LLM_DEVICE, - "infer_turbo": "vllm" # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ + "infer_turbo": False # 可选[False,'vllm'],使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ # model_worker多卡加载需要配置的参数 - # "gpus": None, # 使用的GPU,以str的格式指定,如"0,1" + # "gpus": None, # 使用的GPU,以str的格式指定,如"0,1",如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定 # "num_gpus": 1, # 使用GPU的数量 # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存 @@ -62,7 +62,7 @@ FSCHAT_MODEL_WORKERS = { # "no_register": False, # "embed_in_truncate": False, - # 以下为vllm_woker配置参数,注意使用vllm必须有gpu + # 以下为vllm_woker配置参数,注意使用vllm必须有gpu,仅在Linux测试通过 # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加 # 'tokenizer_mode':'auto', diff --git a/requirements.txt b/requirements.txt index 3242ea7..6dbcfb7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ pathlib pytest scikit-learn numexpr -vllm==0.1.7 +vllm==0.1.7; sys_platform == "linux" # online api libs # zhipuai # dashscope>=1.10.0 # qwen diff --git a/requirements_api.txt b/requirements_api.txt index 674d759..e8e2b69 100644 --- a/requirements_api.txt +++ b/requirements_api.txt @@ -23,7 +23,7 @@ pathlib pytest scikit-learn numexpr -vllm==0.1.7 +vllm==0.1.7; sys_platform == "linux" # online api libs # zhipuai # dashscope>=1.10.0 # qwen