Merge pull request #1603 from hzg0601/dev
1. 更新congif中关于vllm后端相关说明;2. 更新requirements,requirements_api;
This commit is contained in:
commit
c546b4271e
|
|
@ -36,6 +36,7 @@ MODEL_PATH = {
|
||||||
},
|
},
|
||||||
# TODO: add all supported llm models
|
# TODO: add all supported llm models
|
||||||
"llm_model": {
|
"llm_model": {
|
||||||
|
# 以下部分模型并未完全测试,仅根据fastchat和vllm模型的模型列表推定支持
|
||||||
"chatglm-6b": "THUDM/chatglm-6b",
|
"chatglm-6b": "THUDM/chatglm-6b",
|
||||||
"chatglm2-6b": "THUDM/chatglm2-6b",
|
"chatglm2-6b": "THUDM/chatglm2-6b",
|
||||||
"chatglm2-6b-int4": "THUDM/chatglm2-6b-int4",
|
"chatglm2-6b-int4": "THUDM/chatglm2-6b-int4",
|
||||||
|
|
@ -93,7 +94,7 @@ EMBEDDING_MODEL = "m3e-base"
|
||||||
EMBEDDING_DEVICE = "auto"
|
EMBEDDING_DEVICE = "auto"
|
||||||
|
|
||||||
# LLM 名称
|
# LLM 名称
|
||||||
LLM_MODEL = "baichuan-7b"
|
LLM_MODEL = "chatglm2-6b"
|
||||||
|
|
||||||
# LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
# LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
||||||
LLM_DEVICE = "auto"
|
LLM_DEVICE = "auto"
|
||||||
|
|
|
||||||
|
|
@ -38,10 +38,10 @@ FSCHAT_MODEL_WORKERS = {
|
||||||
"host": DEFAULT_BIND_HOST,
|
"host": DEFAULT_BIND_HOST,
|
||||||
"port": 20002,
|
"port": 20002,
|
||||||
"device": LLM_DEVICE,
|
"device": LLM_DEVICE,
|
||||||
"infer_turbo": "vllm" # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ
|
"infer_turbo": False # 可选[False,'vllm'],使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ
|
||||||
|
|
||||||
# model_worker多卡加载需要配置的参数
|
# model_worker多卡加载需要配置的参数
|
||||||
# "gpus": None, # 使用的GPU,以str的格式指定,如"0,1"
|
# "gpus": None, # 使用的GPU,以str的格式指定,如"0,1",如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
|
||||||
# "num_gpus": 1, # 使用GPU的数量
|
# "num_gpus": 1, # 使用GPU的数量
|
||||||
# "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
|
# "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
|
||||||
|
|
||||||
|
|
@ -62,7 +62,7 @@ FSCHAT_MODEL_WORKERS = {
|
||||||
# "no_register": False,
|
# "no_register": False,
|
||||||
# "embed_in_truncate": False,
|
# "embed_in_truncate": False,
|
||||||
|
|
||||||
# 以下为vllm_woker配置参数,注意使用vllm必须有gpu
|
# 以下为vllm_woker配置参数,注意使用vllm必须有gpu,仅在Linux测试通过
|
||||||
|
|
||||||
# tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
|
# tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
|
||||||
# 'tokenizer_mode':'auto',
|
# 'tokenizer_mode':'auto',
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ pathlib
|
||||||
pytest
|
pytest
|
||||||
scikit-learn
|
scikit-learn
|
||||||
numexpr
|
numexpr
|
||||||
vllm==0.1.7
|
vllm==0.1.7; sys_platform == "linux"
|
||||||
# online api libs
|
# online api libs
|
||||||
# zhipuai
|
# zhipuai
|
||||||
# dashscope>=1.10.0 # qwen
|
# dashscope>=1.10.0 # qwen
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ pathlib
|
||||||
pytest
|
pytest
|
||||||
scikit-learn
|
scikit-learn
|
||||||
numexpr
|
numexpr
|
||||||
vllm==0.1.7
|
vllm==0.1.7; sys_platform == "linux"
|
||||||
# online api libs
|
# online api libs
|
||||||
# zhipuai
|
# zhipuai
|
||||||
# dashscope>=1.10.0 # qwen
|
# dashscope>=1.10.0 # qwen
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue