From 2716ff739deb0e63164c863648cc83a509660ad9 Mon Sep 17 00:00:00 2001
From: hzg0601 <hzg0601@163.com>
Date: Tue, 26 Sep 2023 21:55:57 +0800
Subject: [PATCH] =?UTF-8?q?1.=20=E6=9B=B4=E6=96=B0congif=E4=B8=AD=E5=85=B3?=
 =?UTF-8?q?=E4=BA=8Evllm=E5=90=8E=E7=AB=AF=E7=9B=B8=E5=85=B3=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=EF=BC=9B2.=20=E6=9B=B4=E6=96=B0requirements=EF=BC=8Cr?=
 =?UTF-8?q?equirements=5Fapi;?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 configs/model_config.py.example  | 3 ++-
 configs/server_config.py.example | 6 +++---
 requirements.txt                 | 2 +-
 requirements_api.txt             | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/configs/model_config.py.example b/configs/model_config.py.example
index a4c1dbb..7fdb99e 100644
--- a/configs/model_config.py.example
+++ b/configs/model_config.py.example
@@ -36,6 +36,7 @@ MODEL_PATH = {
     },
     # TODO: add all supported llm models
     "llm_model": {
+        # 以下部分模型并未完全测试，仅根据fastchat和vllm模型的模型列表推定支持
         "chatglm-6b": "THUDM/chatglm-6b",
         "chatglm2-6b": "THUDM/chatglm2-6b",
         "chatglm2-6b-int4": "THUDM/chatglm2-6b-int4",
@@ -93,7 +94,7 @@ EMBEDDING_MODEL = "m3e-base"
 EMBEDDING_DEVICE = "auto"
 
 # LLM 名称
-LLM_MODEL = "baichuan-7b"
+LLM_MODEL = "chatglm2-6b"
 
 # LLM 运行设备。设为"auto"会自动检测，也可手动设定为"cuda","mps","cpu"其中之一。
 LLM_DEVICE = "auto"
diff --git a/configs/server_config.py.example b/configs/server_config.py.example
index cc3a19b..6197052 100644
--- a/configs/server_config.py.example
+++ b/configs/server_config.py.example
@@ -38,10 +38,10 @@ FSCHAT_MODEL_WORKERS = {
         "host": DEFAULT_BIND_HOST,
         "port": 20002,
         "device": LLM_DEVICE,
-        "infer_turbo": "vllm" # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题，参见doc/FAQ
+        "infer_turbo": False # 可选[False,'vllm'],使用的推理加速框架,使用vllm如果出现HuggingFace通信问题，参见doc/FAQ
 
         # model_worker多卡加载需要配置的参数
-        # "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"
+        # "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"，如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
         # "num_gpus": 1, # 使用GPU的数量
         # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
 
@@ -62,7 +62,7 @@ FSCHAT_MODEL_WORKERS = {
         # "no_register": False,
         # "embed_in_truncate": False,
 
-        # 以下为vllm_woker配置参数,注意使用vllm必须有gpu
+        # 以下为vllm_woker配置参数,注意使用vllm必须有gpu，仅在Linux测试通过
 
         # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
         # 'tokenizer_mode':'auto',
diff --git a/requirements.txt b/requirements.txt
index 3242ea7..6dbcfb7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ pathlib
 pytest
 scikit-learn
 numexpr
-vllm==0.1.7
+vllm==0.1.7; sys_platform == "linux"
 # online api libs
 # zhipuai
 # dashscope>=1.10.0 # qwen
diff --git a/requirements_api.txt b/requirements_api.txt
index 674d759..e8e2b69 100644
--- a/requirements_api.txt
+++ b/requirements_api.txt
@@ -23,7 +23,7 @@ pathlib
 pytest
 scikit-learn
 numexpr
-vllm==0.1.7
+vllm==0.1.7; sys_platform == "linux"
 # online api libs
 # zhipuai
 # dashscope>=1.10.0 # qwen