fix: set vllm based on platform to avoid error on windows
This commit is contained in:
parent
d39878ff35
commit
523764e284
|
|
@ -38,7 +38,8 @@ FSCHAT_MODEL_WORKERS = {
|
||||||
"host": DEFAULT_BIND_HOST,
|
"host": DEFAULT_BIND_HOST,
|
||||||
"port": 20002,
|
"port": 20002,
|
||||||
"device": LLM_DEVICE,
|
"device": LLM_DEVICE,
|
||||||
"infer_turbo": False # 可选[False,'vllm'],使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ
|
# False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ
|
||||||
|
"infer_turbo": "vllm" if sys.platform.startswith("linux") else False,
|
||||||
|
|
||||||
# model_worker多卡加载需要配置的参数
|
# model_worker多卡加载需要配置的参数
|
||||||
# "gpus": None, # 使用的GPU,以str的格式指定,如"0,1",如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
|
# "gpus": None, # 使用的GPU,以str的格式指定,如"0,1",如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,6 @@ def create_model_worker_app(log_level: str = "INFO", **kwargs) -> FastAPI:
|
||||||
from fastchat.serve.model_worker import worker_id, logger
|
from fastchat.serve.model_worker import worker_id, logger
|
||||||
import argparse
|
import argparse
|
||||||
import fastchat.serve.model_worker
|
import fastchat.serve.model_worker
|
||||||
import fastchat.serve.vllm_worker
|
|
||||||
logger.setLevel(log_level)
|
logger.setLevel(log_level)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
@ -98,6 +97,7 @@ def create_model_worker_app(log_level: str = "INFO", **kwargs) -> FastAPI:
|
||||||
else:
|
else:
|
||||||
from configs.model_config import VLLM_MODEL_DICT
|
from configs.model_config import VLLM_MODEL_DICT
|
||||||
if kwargs["model_names"][0] in VLLM_MODEL_DICT and args.infer_turbo == "vllm":
|
if kwargs["model_names"][0] in VLLM_MODEL_DICT and args.infer_turbo == "vllm":
|
||||||
|
import fastchat.serve.vllm_worker
|
||||||
from fastchat.serve.vllm_worker import VLLMWorker,app
|
from fastchat.serve.vllm_worker import VLLMWorker,app
|
||||||
from vllm import AsyncLLMEngine
|
from vllm import AsyncLLMEngine
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs,EngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs,EngineArgs
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue