From 6590ca32db6cdb30cfce6defd59d2376220edf94 Mon Sep 17 00:00:00 2001 From: hzg0601 Date: Mon, 14 Aug 2023 11:24:52 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=A4=9A=E5=8D=A1=E9=83=A8?= =?UTF-8?q?=E7=BD=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/llm_api.py | 2 +- server/llm_api_launch.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/server/llm_api.py b/server/llm_api.py index d26a935..0a7d3b0 100644 --- a/server/llm_api.py +++ b/server/llm_api.py @@ -44,7 +44,7 @@ def create_model_worker_app( gptq_act_order=None, gpus=None, num_gpus=1, - max_gpu_memory=None, + max_gpu_memory="20GiB", cpu_offloading=None, worker_address=base_url.format(model_worker_port), controller_address=base_url.format(controller_port), diff --git a/server/llm_api_launch.py b/server/llm_api_launch.py index a4ba256..201bd2b 100644 --- a/server/llm_api_launch.py +++ b/server/llm_api_launch.py @@ -76,6 +76,7 @@ parser.add_argument("--num-gpus", type=int, default=1) parser.add_argument( "--max-gpu-memory", type=str, + default="20GiB", help="The maximum memory per gpu. Use a string like '13Gib'", ) parser.add_argument(