1.*在config里将所有fastchat的命令行参数加入;2.*加入启动和停止fastchat的shell脚本;3. **增加了通过命令行启动所有fastchat服务的python脚本llm_api_sh.py;4. 修改了默认的config日志格式
This commit is contained in:
parent
9e2b411b01
commit
15e67a4d3e
|
|
@ -1,13 +1,158 @@
|
|||
import os
|
||||
import logging
|
||||
import torch
|
||||
|
||||
import argparse
|
||||
import json
|
||||
# 日志格式
|
||||
LOG_FORMAT = "%(levelname) -5s %(asctime)s" "-1d: %(message)s"
|
||||
LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
logging.basicConfig(format=LOG_FORMAT)
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
#------multi worker-----------------
|
||||
parser.add_argument('--model-path-address',
|
||||
default="THUDM/chatglm2-6b@localhost@20002",
|
||||
nargs="+",
|
||||
type=str,
|
||||
help="model path, host, and port, formatted as model-path@host@path")
|
||||
#---------------controller-------------------------
|
||||
|
||||
parser.add_argument("--controller-host", type=str, default="localhost")
|
||||
parser.add_argument("--controller-port", type=int, default=21001)
|
||||
parser.add_argument(
|
||||
"--dispatch-method",
|
||||
type=str,
|
||||
choices=["lottery", "shortest_queue"],
|
||||
default="shortest_queue",
|
||||
)
|
||||
controller_args = ["controller-host","controller-port","dispatch-method"]
|
||||
|
||||
#----------------------worker------------------------------------------
|
||||
|
||||
parser.add_argument("--worker-host", type=str, default="localhost")
|
||||
parser.add_argument("--worker-port", type=int, default=21002)
|
||||
# parser.add_argument("--worker-address", type=str, default="http://localhost:21002")
|
||||
# parser.add_argument(
|
||||
# "--controller-address", type=str, default="http://localhost:21001"
|
||||
# )
|
||||
parser.add_argument(
|
||||
"--model-path",
|
||||
type=str,
|
||||
default="lmsys/vicuna-7b-v1.3",
|
||||
help="The path to the weights. This can be a local folder or a Hugging Face repo ID.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--revision",
|
||||
type=str,
|
||||
default="main",
|
||||
help="Hugging Face Hub model revision identifier",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
choices=["cpu", "cuda", "mps", "xpu"],
|
||||
default="cuda",
|
||||
help="The device type",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gpus",
|
||||
type=str,
|
||||
default="0",
|
||||
help="A single GPU like 1 or multiple GPUs like 0,2",
|
||||
)
|
||||
parser.add_argument("--num-gpus", type=int, default=1)
|
||||
parser.add_argument(
|
||||
"--max-gpu-memory",
|
||||
type=str,
|
||||
help="The maximum memory per gpu. Use a string like '13Gib'",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--load-8bit", action="store_true", help="Use 8-bit quantization"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cpu-offloading",
|
||||
action="store_true",
|
||||
help="Only when using 8-bit quantization: Offload excess weights to the CPU that don't fit on the GPU",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gptq-ckpt",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Load quantized model. The path to the local GPTQ checkpoint.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gptq-wbits",
|
||||
type=int,
|
||||
default=16,
|
||||
choices=[2, 3, 4, 8, 16],
|
||||
help="#bits to use for quantization",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gptq-groupsize",
|
||||
type=int,
|
||||
default=-1,
|
||||
help="Groupsize to use for quantization; default uses full row.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gptq-act-order",
|
||||
action="store_true",
|
||||
help="Whether to apply the activation order GPTQ heuristic",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-names",
|
||||
type=lambda s: s.split(","),
|
||||
help="Optional display comma separated names",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit-worker-concurrency",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Limit the model concurrency to prevent OOM.",
|
||||
)
|
||||
parser.add_argument("--stream-interval", type=int, default=2)
|
||||
parser.add_argument("--no-register", action="store_true")
|
||||
|
||||
worker_args = [
|
||||
"worker-host","work-port",
|
||||
"model-path","revision","device","gpus","num-gpus",
|
||||
"max-gpu-memory","load-8bit","cpu-offloading",
|
||||
"gptq-ckpt","gptq-wbits","gptq-groupsize",
|
||||
"gptq-act-order","model-names","limit-worker-concurrency",
|
||||
"stream-interval","no-register",
|
||||
"controller-address"
|
||||
]
|
||||
#-----------------openai server---------------------------
|
||||
|
||||
parser.add_argument("--server-host", type=str, default="localhost", help="host name")
|
||||
parser.add_argument("--server-port", type=int, default=8001, help="port number")
|
||||
parser.add_argument(
|
||||
"--allow-credentials", action="store_true", help="allow credentials"
|
||||
)
|
||||
# parser.add_argument(
|
||||
# "--allowed-origins", type=json.loads, default=["*"], help="allowed origins"
|
||||
# )
|
||||
# parser.add_argument(
|
||||
# "--allowed-methods", type=json.loads, default=["*"], help="allowed methods"
|
||||
# )
|
||||
# parser.add_argument(
|
||||
# "--allowed-headers", type=json.loads, default=["*"], help="allowed headers"
|
||||
# )
|
||||
parser.add_argument(
|
||||
"--api-keys",
|
||||
type=lambda s: s.split(","),
|
||||
help="Optional list of comma separated API keys",
|
||||
)
|
||||
server_args = ["server-host","server-port","allow-credentials","api-keys",
|
||||
"controller-address"
|
||||
]
|
||||
#-------------------似乎也可以在这里把所有可配置的项目做成命令行-----------------------
|
||||
|
||||
|
||||
|
||||
|
||||
# 在以下字典中修改属性值,以指定本地embedding模型存储位置
|
||||
# 如将 "text2vec": "GanymedeNil/text2vec-large-chinese" 修改为 "text2vec": "User/Downloads/text2vec-large-chinese"
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@ model_worker_port = 20002
|
|||
openai_api_port = 8888
|
||||
base_url = "http://127.0.0.1:{}"
|
||||
queue = Queue()
|
||||
import torch
|
||||
sys.modules['fastchat.constants.LOGDIR'] = LOG_PATH
|
||||
import parser
|
||||
|
||||
|
||||
def set_httpx_timeout(timeout=60.0):
|
||||
|
|
@ -122,6 +123,7 @@ def create_model_worker_app(
|
|||
sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config
|
||||
# #todo 替换fastchat的日志文件
|
||||
sys.modules["fastchat.serve.model_worker"].logger = logger
|
||||
|
||||
return app
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,100 @@
|
|||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
||||
from configs.model_config import LOG_PATH,controller_args,worker_args,server_args,parser
|
||||
import subprocess
|
||||
import re
|
||||
import argparse
|
||||
|
||||
args = parser.parse_args()
|
||||
# 必须要加http//:,否则InvalidSchema: No connection adapters were found
|
||||
args = argparse.Namespace(**vars(args),**{"controller-address":f"http://{args.controller_host}:{str(args.controller_port)}"})
|
||||
|
||||
if args.gpus:
|
||||
if len(args.gpus.split(",")) < args.num_gpus:
|
||||
raise ValueError(
|
||||
f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"
|
||||
)
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
|
||||
|
||||
#-------------global-----------------
|
||||
# parser.add_argument('--model-path-address',
|
||||
# default="THUDM/chatglm2-6b@localhost@20002",
|
||||
# nargs="+",
|
||||
# type=str,
|
||||
# help="model path, host, and port, formatted as model-path@host@path")
|
||||
# multi_worker_args = parser.parse_args()
|
||||
|
||||
|
||||
# 0,controller, model_worker, openai_api_server
|
||||
# 1, 命令行选项
|
||||
# 2,LOG_PATH
|
||||
# 3, log的文件名
|
||||
base_launch_sh = "nohup python3 -m fastchat.serve.{0} {1} >{2}/{3}.log 2>&1 &"
|
||||
|
||||
# 0 log_path
|
||||
#! 1 log的文件名,必须与bash_launch_sh一致
|
||||
# 2 controller, worker, openai_api_server
|
||||
base_check_sh = """while [ `grep -c "Uvicorn running on" {0}/{1}.log` -eq '0' ];do
|
||||
sleep 1s;
|
||||
echo "wait {2} running"
|
||||
done
|
||||
echo '{2} running' """
|
||||
|
||||
|
||||
def string_args(args,args_list):
|
||||
"""将args中的key转化为字符串"""
|
||||
args_str = ""
|
||||
for key, value in args._get_kwargs():
|
||||
# args._get_kwargs中的key以_为分隔符,先转换,再判断是否在指定的args列表中
|
||||
key = key.replace("_","-")
|
||||
if key not in args_list:
|
||||
continue
|
||||
# fastchat中port,host没有前缀,去除前缀
|
||||
key = key.split("-")[-1] if re.search("port|host",key) else key
|
||||
if not value:
|
||||
pass
|
||||
# 1==True -> True
|
||||
elif isinstance(value,bool) and value == True:
|
||||
args_str += f" --{key} "
|
||||
elif isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set):
|
||||
value = " ".join(value)
|
||||
args_str += f" --{key} {value} "
|
||||
else:
|
||||
args_str += f" --{key} {value} "
|
||||
|
||||
return args_str
|
||||
|
||||
def launch_worker(item):
|
||||
log_name = item.split("/")[-1].split("\\")[-1].replace("-","_").replace("@","_")
|
||||
# 先分割model-path-address,在传到string_args中分析参数
|
||||
args.model_path,args.worker_host, args.worker_port = item.split("@")
|
||||
worker_str_args = string_args(args,worker_args)
|
||||
worker_sh = base_launch_sh.format("model_worker",worker_str_args,LOG_PATH,f"worker_{log_name}")
|
||||
worker_check_sh = base_check_sh.format(LOG_PATH,f"worker_{log_name}","model_worker")
|
||||
subprocess.run(worker_sh,shell=True,check=True)
|
||||
subprocess.run(worker_check_sh,shell=True,check=True)
|
||||
|
||||
|
||||
def launch_all():
|
||||
controller_str_args = string_args(args,controller_args)
|
||||
controller_sh = base_launch_sh.format("controller",controller_str_args,LOG_PATH,"controller")
|
||||
controller_check_sh = base_check_sh.format(LOG_PATH,"controller","controller")
|
||||
subprocess.run(controller_sh,shell=True,check=True)
|
||||
subprocess.run(controller_check_sh,shell=True,check=True)
|
||||
|
||||
if isinstance(args.model_path_address, str):
|
||||
launch_worker(args.model_path_address)
|
||||
else:
|
||||
for idx,item in enumerate(args.model_path_address):
|
||||
print(f"开始加载第{idx}个模型:{item}")
|
||||
launch_worker(item)
|
||||
|
||||
server_str_args = string_args(args,server_args)
|
||||
server_sh = base_launch_sh.format("openai_api_server",server_str_args,LOG_PATH,"openai_api_server")
|
||||
server_check_sh = base_check_sh.format(LOG_PATH,"openai_api_server","openai_api_server")
|
||||
subprocess.run(server_sh,shell=True,check=True)
|
||||
subprocess.run(server_check_sh,shell=True,check=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
launch_all()
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
[ -d "../logs/" ] && echo "log dir exists" || mkdir "../logs/"
|
||||
# controller
|
||||
nohup python3 -m fastchat.serve.controller >../logs/controller.log 2>&1 &
|
||||
while [ `grep -c "Uvicorn running on" ../logs/controller.log` -eq '0' ];do
|
||||
sleep 1s;
|
||||
echo "wait controller running"
|
||||
done
|
||||
echo "controller running"
|
||||
|
||||
# worker
|
||||
nohup python3 -m fastchat.serve.model_worker \
|
||||
--model-name 'chatglm2-6b' \
|
||||
--model-path THUDM/chatglm2-6b \
|
||||
--num-gpus 2 \
|
||||
>> ./logs/worker.log 2>&1 &
|
||||
|
||||
while [ `grep -c "Uvicorn running on" ../logs/worker.log` -eq '0' ];do
|
||||
sleep 3s;
|
||||
echo "wait worker running"
|
||||
done
|
||||
echo "worker running"
|
||||
|
||||
# webui
|
||||
nohup python3 -m fastchat.serve.openai_api_server >> "../logs/openai_server.log" 2>&1 &
|
||||
|
||||
while [ `grep -c "Uvicorn running on" ../logs/openai_server.log` -eq '0' ];do
|
||||
sleep 3s;
|
||||
echo "wait openai_server running"
|
||||
done
|
||||
echo "openai_server running"
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
# controller
|
||||
nohup python3 -m fastchat.serve.controller >../logs/controller.log 2>&1 &
|
||||
while [ `grep -c "Uvicorn running on" ../logs/controller.log` -eq '0' ];do
|
||||
sleep 1s;
|
||||
echo "wait controller running"
|
||||
done
|
||||
echo "controller running"
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
# webui
|
||||
nohup python3 -m fastchat.serve.openai_api_server >> "../logs/openai_server.log" 2>&1 &
|
||||
|
||||
while [ `grep -c "Uvicorn running on" ../logs/openai_server.log` -eq '0' ];do
|
||||
sleep 3s;
|
||||
echo "wait openai_server running"
|
||||
done
|
||||
echo "openai_server running"
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
nohup python3 -m fastchat.serve.model_worker \
|
||||
--model-name 'chatglm2-6b' \
|
||||
--model-path THUDM/chatglm2-6b \
|
||||
--num-gpus 2 \
|
||||
>> ../logs/worker.log 2>&1 &
|
||||
|
||||
while [ `grep -c "Uvicorn running on" ../logs/worker.log` -eq '0' ];do
|
||||
sleep 3s;
|
||||
echo "wait worker running"
|
||||
done
|
||||
echo "worker running"
|
||||
|
|
@ -0,0 +1 @@
|
|||
ps -eo user,pid,cmd|grep fastchat.serve.controller|grep -v grep|awk '{print $2}'|xargs kill -9
|
||||
|
|
@ -0,0 +1 @@
|
|||
ps -eo user,pid,cmd|grep fastchat.serve.openai_api_server|grep -v grep|awk '{print $2}'|xargs kill -9
|
||||
|
|
@ -0,0 +1 @@
|
|||
ps -eo user,pid,cmd|grep fastchat.serve.model_worker|grep -v grep|awk '{print $2}'|xargs kill -9
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
[ -d "./logs/" ] && echo "log dir exists" || mkdir "./logs/"
|
||||
# controller
|
||||
nohup python3 -m fastchat.serve.controller >./logs/controller.log 2>&1 &
|
||||
while [ `grep -c "Uvicorn running on" ./logs/controller.log` -eq '0' ];do
|
||||
sleep 1s;
|
||||
echo "wait controller running"
|
||||
done
|
||||
echo "controller running"
|
||||
|
||||
# worker
|
||||
nohup python3 -m fastchat.serve.model_worker \
|
||||
--model-name 'chatglm2-6b' \
|
||||
--model-path THUDM/chatglm2-6b \
|
||||
--num-gpus 2 \
|
||||
>> ./logs/worker.log 2>&1 &
|
||||
|
||||
while [ `grep -c "Uvicorn running on" ./logs/worker.log` -eq '0' ];do
|
||||
sleep 3s;
|
||||
echo "wait worker running"
|
||||
done
|
||||
echo "worker running"
|
||||
|
||||
# webui
|
||||
nohup python3 -m fastchat.serve.openai_api_server >> "./logs/openai_server.log" 2>&1 &
|
||||
|
||||
while [ `grep -c "Uvicorn running on" ./logs/openai_server.log` -eq '0' ];do
|
||||
sleep 3s;
|
||||
echo "wait openai_server running"
|
||||
done
|
||||
echo "openai_server running"
|
||||
Loading…
Reference in New Issue