update fastchat to 0.2.24

This commit is contained in:
liunux4odoo 2023-08-16 16:15:45 +08:00
parent f64affc930
commit 8ed1f8efca
3 changed files with 29 additions and 15 deletions

View File

@ -1,8 +1,8 @@
langchain==0.0.257 langchain==0.0.257
openai openai
sentence_transformers sentence_transformers
fschat==0.2.20 fschat==0.2.24
transformers transformers>=4.31.0
torch~=2.0.0 torch~=2.0.0
fastapi~=0.99.1 fastapi~=0.99.1
nltk~=3.8.1 nltk~=3.8.1

View File

@ -1,8 +1,8 @@
langchain==0.0.257 langchain==0.0.257
openai openai
sentence_transformers sentence_transformers
fschat==0.2.20 fschat==0.2.24
transformers transformers>=4.31.0
torch~=2.0.0 torch~=2.0.0
fastapi~=0.99.1 fastapi~=0.99.1
nltk~=3.8.1 nltk~=3.8.1

View File

@ -38,27 +38,31 @@ def create_controller_app(
def create_model_worker_app( def create_model_worker_app(
worker_address=base_url.format(model_worker_port),
controller_address=base_url.format(controller_port),
model_path=llm_model_dict[LLM_MODEL].get("local_model_path"), model_path=llm_model_dict[LLM_MODEL].get("local_model_path"),
model_names=[LLM_MODEL],
device=LLM_DEVICE, device=LLM_DEVICE,
gpus=None,
max_gpu_memory="20GiB",
load_8bit=False, load_8bit=False,
cpu_offloading=None,
gptq_ckpt=None, gptq_ckpt=None,
gptq_wbits=16, gptq_wbits=16,
gptq_groupsize=-1, gptq_groupsize=-1,
gptq_act_order=None, gptq_act_order=False,
gpus=None, awq_ckpt=None,
num_gpus=1, awq_wbits=16,
max_gpu_memory="20GiB", awq_groupsize=-1,
cpu_offloading=None, model_names=[LLM_MODEL],
worker_address=base_url.format(model_worker_port), num_gpus=1, # not in fastchat
controller_address=base_url.format(controller_port), conv_template=None,
limit_worker_concurrency=5, limit_worker_concurrency=5,
stream_interval=2, stream_interval=2,
no_register=False, no_register=False,
): ):
import fastchat.constants import fastchat.constants
fastchat.constants.LOGDIR = LOG_PATH fastchat.constants.LOGDIR = LOG_PATH
from fastchat.serve.model_worker import app, GptqConfig, ModelWorker, worker_id from fastchat.serve.model_worker import app, GptqConfig, AWQConfig, ModelWorker, worker_id
import argparse import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -71,12 +75,16 @@ def create_model_worker_app(
args.gptq_wbits = gptq_wbits args.gptq_wbits = gptq_wbits
args.gptq_groupsize = gptq_groupsize args.gptq_groupsize = gptq_groupsize
args.gptq_act_order = gptq_act_order args.gptq_act_order = gptq_act_order
args.awq_ckpt = awq_ckpt
args.awq_wbits = awq_wbits
args.awq_groupsize = awq_groupsize
args.gpus = gpus args.gpus = gpus
args.num_gpus = num_gpus args.num_gpus = num_gpus
args.max_gpu_memory = max_gpu_memory args.max_gpu_memory = max_gpu_memory
args.cpu_offloading = cpu_offloading args.cpu_offloading = cpu_offloading
args.worker_address = worker_address args.worker_address = worker_address
args.controller_address = controller_address args.controller_address = controller_address
args.conv_template = conv_template
args.limit_worker_concurrency = limit_worker_concurrency args.limit_worker_concurrency = limit_worker_concurrency
args.stream_interval = stream_interval args.stream_interval = stream_interval
args.no_register = no_register args.no_register = no_register
@ -98,6 +106,12 @@ def create_model_worker_app(
groupsize=args.gptq_groupsize, groupsize=args.gptq_groupsize,
act_order=args.gptq_act_order, act_order=args.gptq_act_order,
) )
awq_config = AWQConfig(
ckpt=args.awq_ckpt or args.model_path,
wbits=args.awq_wbits,
groupsize=args.awq_groupsize,
)
# torch.multiprocessing.set_start_method('spawn') # torch.multiprocessing.set_start_method('spawn')
worker = ModelWorker( worker = ModelWorker(
controller_addr=args.controller_address, controller_addr=args.controller_address,
@ -113,7 +127,9 @@ def create_model_worker_app(
load_8bit=args.load_8bit, load_8bit=args.load_8bit,
cpu_offloading=args.cpu_offloading, cpu_offloading=args.cpu_offloading,
gptq_config=gptq_config, gptq_config=gptq_config,
awq_config=awq_config,
stream_interval=args.stream_interval, stream_interval=args.stream_interval,
conv_template=args.conv_template,
) )
sys.modules["fastchat.serve.model_worker"].worker = worker sys.modules["fastchat.serve.model_worker"].worker = worker
@ -126,8 +142,6 @@ def create_model_worker_app(
def create_openai_api_app( def create_openai_api_app(
host=host_ip,
port=openai_api_port,
controller_address=base_url.format(controller_port), controller_address=base_url.format(controller_port),
api_keys=[], api_keys=[],
): ):