From 15e67a4d3e236c915a271531eb253cd7010d60b2 Mon Sep 17 00:00:00 2001 From: hzg0601 Date: Tue, 1 Aug 2023 17:59:20 +0800 Subject: [PATCH] =?UTF-8?q?1.*=E5=9C=A8config=E9=87=8C=E5=B0=86=E6=89=80?= =?UTF-8?q?=E6=9C=89fastchat=E7=9A=84=E5=91=BD=E4=BB=A4=E8=A1=8C=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E5=8A=A0=E5=85=A5;2.*=E5=8A=A0=E5=85=A5=E5=90=AF?= =?UTF-8?q?=E5=8A=A8=E5=92=8C=E5=81=9C=E6=AD=A2fastchat=E7=9A=84shell?= =?UTF-8?q?=E8=84=9A=E6=9C=AC=EF=BC=9B3.=20**=E5=A2=9E=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E9=80=9A=E8=BF=87=E5=91=BD=E4=BB=A4=E8=A1=8C=E5=90=AF=E5=8A=A8?= =?UTF-8?q?=E6=89=80=E6=9C=89fastchat=E6=9C=8D=E5=8A=A1=E7=9A=84python?= =?UTF-8?q?=E8=84=9A=E6=9C=ACllm=5Fapi=5Fsh.py;4.=20=E4=BF=AE=E6=94=B9?= =?UTF-8?q?=E4=BA=86=E9=BB=98=E8=AE=A4=E7=9A=84config=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configs/model_config.py | 149 ++++++++++++++++++- server/llm_api.py | 4 +- server/llm_api_sh.py | 100 +++++++++++++ server/setup_all.sh | 30 ++++ server/setup_controller.sh | 7 + server/setup_server.sh | 8 + server/setup_worker.sh | 11 ++ shutdown_server.sh => server/shutdown_all.sh | 0 server/shutdown_controller.sh | 1 + server/shutdown_server.sh | 1 + server/shutdown_worker.sh | 1 + setup_server.sh | 30 ---- 12 files changed, 309 insertions(+), 33 deletions(-) create mode 100644 server/llm_api_sh.py create mode 100644 server/setup_all.sh create mode 100644 server/setup_controller.sh create mode 100644 server/setup_server.sh create mode 100644 server/setup_worker.sh rename shutdown_server.sh => server/shutdown_all.sh (100%) create mode 100644 server/shutdown_controller.sh create mode 100644 server/shutdown_server.sh create mode 100644 server/shutdown_worker.sh delete mode 100644 setup_server.sh diff --git a/configs/model_config.py b/configs/model_config.py index f00b646..caf9deb 100644 --- a/configs/model_config.py +++ b/configs/model_config.py @@ -1,13 +1,158 @@ import os import logging import torch - +import argparse +import json # 日志格式 -LOG_FORMAT = "%(levelname) -5s %(asctime)s" "-1d: %(message)s" +LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s" logger = logging.getLogger() logger.setLevel(logging.INFO) logging.basicConfig(format=LOG_FORMAT) +import argparse +import json + +parser = argparse.ArgumentParser() +#------multi worker----------------- +parser.add_argument('--model-path-address', + default="THUDM/chatglm2-6b@localhost@20002", + nargs="+", + type=str, + help="model path, host, and port, formatted as model-path@host@path") +#---------------controller------------------------- + +parser.add_argument("--controller-host", type=str, default="localhost") +parser.add_argument("--controller-port", type=int, default=21001) +parser.add_argument( + "--dispatch-method", + type=str, + choices=["lottery", "shortest_queue"], + default="shortest_queue", +) +controller_args = ["controller-host","controller-port","dispatch-method"] + +#----------------------worker------------------------------------------ + +parser.add_argument("--worker-host", type=str, default="localhost") +parser.add_argument("--worker-port", type=int, default=21002) +# parser.add_argument("--worker-address", type=str, default="http://localhost:21002") +# parser.add_argument( +# "--controller-address", type=str, default="http://localhost:21001" +# ) +parser.add_argument( + "--model-path", + type=str, + default="lmsys/vicuna-7b-v1.3", + help="The path to the weights. This can be a local folder or a Hugging Face repo ID.", +) +parser.add_argument( + "--revision", + type=str, + default="main", + help="Hugging Face Hub model revision identifier", +) +parser.add_argument( + "--device", + type=str, + choices=["cpu", "cuda", "mps", "xpu"], + default="cuda", + help="The device type", +) +parser.add_argument( + "--gpus", + type=str, + default="0", + help="A single GPU like 1 or multiple GPUs like 0,2", +) +parser.add_argument("--num-gpus", type=int, default=1) +parser.add_argument( + "--max-gpu-memory", + type=str, + help="The maximum memory per gpu. Use a string like '13Gib'", +) +parser.add_argument( + "--load-8bit", action="store_true", help="Use 8-bit quantization" +) +parser.add_argument( + "--cpu-offloading", + action="store_true", + help="Only when using 8-bit quantization: Offload excess weights to the CPU that don't fit on the GPU", +) +parser.add_argument( + "--gptq-ckpt", + type=str, + default=None, + help="Load quantized model. The path to the local GPTQ checkpoint.", +) +parser.add_argument( + "--gptq-wbits", + type=int, + default=16, + choices=[2, 3, 4, 8, 16], + help="#bits to use for quantization", +) +parser.add_argument( + "--gptq-groupsize", + type=int, + default=-1, + help="Groupsize to use for quantization; default uses full row.", +) +parser.add_argument( + "--gptq-act-order", + action="store_true", + help="Whether to apply the activation order GPTQ heuristic", +) +parser.add_argument( + "--model-names", + type=lambda s: s.split(","), + help="Optional display comma separated names", +) +parser.add_argument( + "--limit-worker-concurrency", + type=int, + default=5, + help="Limit the model concurrency to prevent OOM.", +) +parser.add_argument("--stream-interval", type=int, default=2) +parser.add_argument("--no-register", action="store_true") + +worker_args = [ + "worker-host","work-port", + "model-path","revision","device","gpus","num-gpus", + "max-gpu-memory","load-8bit","cpu-offloading", + "gptq-ckpt","gptq-wbits","gptq-groupsize", + "gptq-act-order","model-names","limit-worker-concurrency", + "stream-interval","no-register", + "controller-address" + ] +#-----------------openai server--------------------------- + +parser.add_argument("--server-host", type=str, default="localhost", help="host name") +parser.add_argument("--server-port", type=int, default=8001, help="port number") +parser.add_argument( + "--allow-credentials", action="store_true", help="allow credentials" +) +# parser.add_argument( +# "--allowed-origins", type=json.loads, default=["*"], help="allowed origins" +# ) +# parser.add_argument( +# "--allowed-methods", type=json.loads, default=["*"], help="allowed methods" +# ) +# parser.add_argument( +# "--allowed-headers", type=json.loads, default=["*"], help="allowed headers" +# ) +parser.add_argument( + "--api-keys", + type=lambda s: s.split(","), + help="Optional list of comma separated API keys", +) +server_args = ["server-host","server-port","allow-credentials","api-keys", + "controller-address" + ] +#-------------------似乎也可以在这里把所有可配置的项目做成命令行----------------------- + + + # 在以下字典中修改属性值,以指定本地embedding模型存储位置 # 如将 "text2vec": "GanymedeNil/text2vec-large-chinese" 修改为 "text2vec": "User/Downloads/text2vec-large-chinese" diff --git a/server/llm_api.py b/server/llm_api.py index e8e5452..ac1ee37 100644 --- a/server/llm_api.py +++ b/server/llm_api.py @@ -12,7 +12,8 @@ model_worker_port = 20002 openai_api_port = 8888 base_url = "http://127.0.0.1:{}" queue = Queue() -import torch +sys.modules['fastchat.constants.LOGDIR'] = LOG_PATH +import parser def set_httpx_timeout(timeout=60.0): @@ -122,6 +123,7 @@ def create_model_worker_app( sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config # #todo 替换fastchat的日志文件 sys.modules["fastchat.serve.model_worker"].logger = logger + return app diff --git a/server/llm_api_sh.py b/server/llm_api_sh.py new file mode 100644 index 0000000..673fbc7 --- /dev/null +++ b/server/llm_api_sh.py @@ -0,0 +1,100 @@ +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(__file__))) +from configs.model_config import LOG_PATH,controller_args,worker_args,server_args,parser +import subprocess +import re +import argparse + +args = parser.parse_args() +# 必须要加http//:,否则InvalidSchema: No connection adapters were found +args = argparse.Namespace(**vars(args),**{"controller-address":f"http://{args.controller_host}:{str(args.controller_port)}"}) + +if args.gpus: + if len(args.gpus.split(",")) < args.num_gpus: + raise ValueError( + f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!" + ) + os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus + +#-------------global----------------- +# parser.add_argument('--model-path-address', +# default="THUDM/chatglm2-6b@localhost@20002", +# nargs="+", +# type=str, +# help="model path, host, and port, formatted as model-path@host@path") +# multi_worker_args = parser.parse_args() + + +# 0,controller, model_worker, openai_api_server +# 1, 命令行选项 +# 2,LOG_PATH +# 3, log的文件名 +base_launch_sh = "nohup python3 -m fastchat.serve.{0} {1} >{2}/{3}.log 2>&1 &" + +# 0 log_path +#! 1 log的文件名,必须与bash_launch_sh一致 +# 2 controller, worker, openai_api_server +base_check_sh = """while [ `grep -c "Uvicorn running on" {0}/{1}.log` -eq '0' ];do + sleep 1s; + echo "wait {2} running" + done + echo '{2} running' """ + + +def string_args(args,args_list): + """将args中的key转化为字符串""" + args_str = "" + for key, value in args._get_kwargs(): + # args._get_kwargs中的key以_为分隔符,先转换,再判断是否在指定的args列表中 + key = key.replace("_","-") + if key not in args_list: + continue + # fastchat中port,host没有前缀,去除前缀 + key = key.split("-")[-1] if re.search("port|host",key) else key + if not value: + pass + # 1==True -> True + elif isinstance(value,bool) and value == True: + args_str += f" --{key} " + elif isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set): + value = " ".join(value) + args_str += f" --{key} {value} " + else: + args_str += f" --{key} {value} " + + return args_str + +def launch_worker(item): + log_name = item.split("/")[-1].split("\\")[-1].replace("-","_").replace("@","_") + # 先分割model-path-address,在传到string_args中分析参数 + args.model_path,args.worker_host, args.worker_port = item.split("@") + worker_str_args = string_args(args,worker_args) + worker_sh = base_launch_sh.format("model_worker",worker_str_args,LOG_PATH,f"worker_{log_name}") + worker_check_sh = base_check_sh.format(LOG_PATH,f"worker_{log_name}","model_worker") + subprocess.run(worker_sh,shell=True,check=True) + subprocess.run(worker_check_sh,shell=True,check=True) + + +def launch_all(): + controller_str_args = string_args(args,controller_args) + controller_sh = base_launch_sh.format("controller",controller_str_args,LOG_PATH,"controller") + controller_check_sh = base_check_sh.format(LOG_PATH,"controller","controller") + subprocess.run(controller_sh,shell=True,check=True) + subprocess.run(controller_check_sh,shell=True,check=True) + + if isinstance(args.model_path_address, str): + launch_worker(args.model_path_address) + else: + for idx,item in enumerate(args.model_path_address): + print(f"开始加载第{idx}个模型:{item}") + launch_worker(item) + + server_str_args = string_args(args,server_args) + server_sh = base_launch_sh.format("openai_api_server",server_str_args,LOG_PATH,"openai_api_server") + server_check_sh = base_check_sh.format(LOG_PATH,"openai_api_server","openai_api_server") + subprocess.run(server_sh,shell=True,check=True) + subprocess.run(server_check_sh,shell=True,check=True) + +if __name__ == "__main__": + launch_all() \ No newline at end of file diff --git a/server/setup_all.sh b/server/setup_all.sh new file mode 100644 index 0000000..2411d7c --- /dev/null +++ b/server/setup_all.sh @@ -0,0 +1,30 @@ +[ -d "../logs/" ] && echo "log dir exists" || mkdir "../logs/" +# controller +nohup python3 -m fastchat.serve.controller >../logs/controller.log 2>&1 & +while [ `grep -c "Uvicorn running on" ../logs/controller.log` -eq '0' ];do + sleep 1s; + echo "wait controller running" +done +echo "controller running" + +# worker +nohup python3 -m fastchat.serve.model_worker \ +--model-name 'chatglm2-6b' \ +--model-path THUDM/chatglm2-6b \ +--num-gpus 2 \ +>> ./logs/worker.log 2>&1 & + +while [ `grep -c "Uvicorn running on" ../logs/worker.log` -eq '0' ];do + sleep 3s; + echo "wait worker running" +done +echo "worker running" + +# webui +nohup python3 -m fastchat.serve.openai_api_server >> "../logs/openai_server.log" 2>&1 & + +while [ `grep -c "Uvicorn running on" ../logs/openai_server.log` -eq '0' ];do + sleep 3s; + echo "wait openai_server running" +done +echo "openai_server running" \ No newline at end of file diff --git a/server/setup_controller.sh b/server/setup_controller.sh new file mode 100644 index 0000000..8d84b27 --- /dev/null +++ b/server/setup_controller.sh @@ -0,0 +1,7 @@ +# controller +nohup python3 -m fastchat.serve.controller >../logs/controller.log 2>&1 & +while [ `grep -c "Uvicorn running on" ../logs/controller.log` -eq '0' ];do + sleep 1s; + echo "wait controller running" +done +echo "controller running" \ No newline at end of file diff --git a/server/setup_server.sh b/server/setup_server.sh new file mode 100644 index 0000000..4ae44df --- /dev/null +++ b/server/setup_server.sh @@ -0,0 +1,8 @@ +# webui +nohup python3 -m fastchat.serve.openai_api_server >> "../logs/openai_server.log" 2>&1 & + +while [ `grep -c "Uvicorn running on" ../logs/openai_server.log` -eq '0' ];do + sleep 3s; + echo "wait openai_server running" +done +echo "openai_server running" \ No newline at end of file diff --git a/server/setup_worker.sh b/server/setup_worker.sh new file mode 100644 index 0000000..d719d6c --- /dev/null +++ b/server/setup_worker.sh @@ -0,0 +1,11 @@ +nohup python3 -m fastchat.serve.model_worker \ +--model-name 'chatglm2-6b' \ +--model-path THUDM/chatglm2-6b \ +--num-gpus 2 \ +>> ../logs/worker.log 2>&1 & + +while [ `grep -c "Uvicorn running on" ../logs/worker.log` -eq '0' ];do + sleep 3s; + echo "wait worker running" +done +echo "worker running" \ No newline at end of file diff --git a/shutdown_server.sh b/server/shutdown_all.sh similarity index 100% rename from shutdown_server.sh rename to server/shutdown_all.sh diff --git a/server/shutdown_controller.sh b/server/shutdown_controller.sh new file mode 100644 index 0000000..015f6ad --- /dev/null +++ b/server/shutdown_controller.sh @@ -0,0 +1 @@ +ps -eo user,pid,cmd|grep fastchat.serve.controller|grep -v grep|awk '{print $2}'|xargs kill -9 \ No newline at end of file diff --git a/server/shutdown_server.sh b/server/shutdown_server.sh new file mode 100644 index 0000000..a114bc2 --- /dev/null +++ b/server/shutdown_server.sh @@ -0,0 +1 @@ +ps -eo user,pid,cmd|grep fastchat.serve.openai_api_server|grep -v grep|awk '{print $2}'|xargs kill -9 \ No newline at end of file diff --git a/server/shutdown_worker.sh b/server/shutdown_worker.sh new file mode 100644 index 0000000..0405be4 --- /dev/null +++ b/server/shutdown_worker.sh @@ -0,0 +1 @@ +ps -eo user,pid,cmd|grep fastchat.serve.model_worker|grep -v grep|awk '{print $2}'|xargs kill -9 \ No newline at end of file diff --git a/setup_server.sh b/setup_server.sh deleted file mode 100644 index 3fd62c4..0000000 --- a/setup_server.sh +++ /dev/null @@ -1,30 +0,0 @@ -[ -d "./logs/" ] && echo "log dir exists" || mkdir "./logs/" -# controller -nohup python3 -m fastchat.serve.controller >./logs/controller.log 2>&1 & -while [ `grep -c "Uvicorn running on" ./logs/controller.log` -eq '0' ];do - sleep 1s; - echo "wait controller running" -done -echo "controller running" - -# worker -nohup python3 -m fastchat.serve.model_worker \ ---model-name 'chatglm2-6b' \ ---model-path THUDM/chatglm2-6b \ ---num-gpus 2 \ ->> ./logs/worker.log 2>&1 & - -while [ `grep -c "Uvicorn running on" ./logs/worker.log` -eq '0' ];do - sleep 3s; - echo "wait worker running" -done -echo "worker running" - -# webui -nohup python3 -m fastchat.serve.openai_api_server >> "./logs/openai_server.log" 2>&1 & - -while [ `grep -c "Uvicorn running on" ./logs/openai_server.log` -eq '0' ];do - sleep 3s; - echo "wait openai_server running" -done -echo "openai_server running" \ No newline at end of file