From f92b0023427b8319a4d84e7eeac5fcd85c25ae90 Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Thu, 17 Aug 2023 13:24:53 +0800 Subject: [PATCH 01/16] add startup.py: start specified servers with one command. see python startup.py --help --- .gitignore | 2 +- configs/model_config.py.example | 4 +- configs/server_config.py.example | 88 ++++++++ server/api.py | 3 +- startup.py | 364 +++++++++++++++++++++++++++++++ tests/api/stream_api_test.py | 12 +- 6 files changed, 468 insertions(+), 5 deletions(-) create mode 100644 configs/server_config.py.example create mode 100644 startup.py diff --git a/.gitignore b/.gitignore index af50500..b5918ee 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ logs .idea/ __pycache__/ knowledge_base/ -configs/model_config.py \ No newline at end of file +configs/*.py diff --git a/configs/model_config.py.example b/configs/model_config.py.example index 8771cfc..be0bebc 100644 --- a/configs/model_config.py.example +++ b/configs/model_config.py.example @@ -52,13 +52,13 @@ llm_model_dict = { "chatglm2-6b": { "local_model_path": "THUDM/chatglm2-6b", - "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" + "api_base_url": "http://localhost:8888/v1", # URL需要与运行fastchat服务端的server_config.FSCHAT_OPENAI_API一致 "api_key": "EMPTY" }, "chatglm2-6b-32k": { "local_model_path": "THUDM/chatglm2-6b-32k", # "THUDM/chatglm2-6b-32k", - "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" + "api_base_url": "http://localhost:8888/v1", # "URL需要与运行fastchat服务端的server_config.FSCHAT_OPENAI_API一致 "api_key": "EMPTY" }, diff --git a/configs/server_config.py.example b/configs/server_config.py.example new file mode 100644 index 0000000..24ce6b4 --- /dev/null +++ b/configs/server_config.py.example @@ -0,0 +1,88 @@ +from .model_config import LLM_MODEL, llm_model_dict, LLM_DEVICE + + +# API 是否开启跨域,默认为False,如果需要开启,请设置为True +# is open cross domain +OPEN_CROSS_DOMAIN = False + +# 各服务器默认绑定host +DEFAULT_BIND_HOST = "127.0.0.1" + +# webui.py server +WEBUI_SERVER = { + "host": DEFAULT_BIND_HOST, + "port": 8501, +} + +# api.py server +API_SERVER = { + "host": DEFAULT_BIND_HOST, + "port": 7861, +} + +# fastchat openai_api server +FSCHAT_OPENAI_API = { + "host": DEFAULT_BIND_HOST, + "port": 8888, # model_config.llm_model_dict中模型配置的api_base_url需要与这里一致。 +} + +# fastchat model_worker server +# 这些模型必须是在model_config.llm_model_dict中正确配置的。 +# 在启动startup.py时,可用通过`--model-worker --model-name xxxx`指定模型,不指定则为LLM_MODEL +FSCHAT_MODEL_WORKERS = { + LLM_MODEL: { + "host": DEFAULT_BIND_HOST, + "port": 20002, + "device": LLM_DEVICE, + # todo: 多卡加载需要配置的参数 + "gpus": None, + "numgpus": 1, + # 以下为非常用参数,可根据需要配置 + # "max_gpu_memory": "20GiB", + # "load_8bit": False, + # "cpu_offloading": None, + # "gptq_ckpt": None, + # "gptq_wbits": 16, + # "gptq_groupsize": -1, + # "gptq_act_order": False, + # "awq_ckpt": None, + # "awq_wbits": 16, + # "awq_groupsize": -1, + # "model_names": [LLM_MODEL], + # "conv_template": None, + # "limit_worker_concurrency": 5, + # "stream_interval": 2, + # "no_register": False, + }, +} + + +# fastchat multi model worker server +FSCHAT_MULTI_MODEL_WORKERS = { + # todo +} + +# fastchat controller server +FSCHAT_CONTROLLER = { + "host": DEFAULT_BIND_HOST, + "port": 20001, + "dispatch_method": "shortest_queue", +} + + +# 以下不要更改 +def fschat_controller_address() -> str: + host = FSCHAT_CONTROLLER["host"] + port = FSCHAT_CONTROLLER["port"] + return f"http://{host}:{port}" + +def fschat_model_worker_address(model_name: str = LLM_MODEL) -> str: + if model := FSCHAT_MODEL_WORKERS.get(model_name): + host = model["host"] + port = model["port"] + return f"http://{host}:{port}" + +def fschat_openai_api_address() -> str: + host = FSCHAT_OPENAI_API["host"] + port = FSCHAT_OPENAI_API["port"] + return f"http://{host}:{port}" diff --git a/server/api.py b/server/api.py index 800680c..c398f15 100644 --- a/server/api.py +++ b/server/api.py @@ -4,7 +4,8 @@ import os sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from configs.model_config import NLTK_DATA_PATH, OPEN_CROSS_DOMAIN +from configs.model_config import NLTK_DATA_PATH +from configs.server_config import OPEN_CROSS_DOMAIN import argparse import uvicorn from fastapi.middleware.cors import CORSMiddleware diff --git a/startup.py b/startup.py new file mode 100644 index 0000000..975bc92 --- /dev/null +++ b/startup.py @@ -0,0 +1,364 @@ +from multiprocessing import Process, Queue +import multiprocessing as mp +import subprocess +import sys +import os +from xml.etree.ElementPath import prepare_child + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) +from configs.model_config import llm_model_dict, LLM_MODEL, LLM_DEVICE, LOG_PATH, logger +from configs.server_config import (WEBUI_SERVER, API_SERVER, OPEN_CROSS_DOMAIN, FSCHAT_CONTROLLER, FSCHAT_MODEL_WORKERS, + FSCHAT_OPENAI_API, fschat_controller_address, fschat_model_worker_address,) +from server.utils import MakeFastAPIOffline, FastAPI +import argparse +from typing import Tuple, List + + +def set_httpx_timeout(timeout=60.0): + import httpx + httpx._config.DEFAULT_TIMEOUT_CONFIG.connect = timeout + httpx._config.DEFAULT_TIMEOUT_CONFIG.read = timeout + httpx._config.DEFAULT_TIMEOUT_CONFIG.write = timeout + + +def create_controller_app( + dispatch_method: str, +) -> FastAPI: + import fastchat.constants + fastchat.constants.LOGDIR = LOG_PATH + from fastchat.serve.controller import app, Controller + + controller = Controller(dispatch_method) + sys.modules["fastchat.serve.controller"].controller = controller + + MakeFastAPIOffline(app) + app.title = "FastChat Controller" + return app + + +def create_model_worker_app(**kwargs) -> Tuple[argparse.ArgumentParser, FastAPI]: + import fastchat.constants + fastchat.constants.LOGDIR = LOG_PATH + from fastchat.serve.model_worker import app, GptqConfig, AWQConfig, ModelWorker, worker_id + import argparse + import threading + import fastchat.serve.model_worker + + # workaround to make program exit with Ctrl+c + # it should be deleted after pr is merged by fastchat + def _new_init_heart_beat(self): + self.register_to_controller() + self.heart_beat_thread = threading.Thread( + target=fastchat.serve.model_worker.heart_beat_worker, args=(self,), daemon=True, + ) + self.heart_beat_thread.start() + ModelWorker.init_heart_beat = _new_init_heart_beat + + parser = argparse.ArgumentParser() + args = parser.parse_args([]) + # default args. should be deleted after pr is merged by fastchat + args.gpus = None + args.max_gpu_memory = "20GiB" + args.load_8bit = False + args.cpu_offloading = None + args.gptq_ckpt = None + args.gptq_wbits = 16 + args.gptq_groupsize = -1 + args.gptq_act_order = False + args.awq_ckpt = None + args.awq_wbits = 16 + args.awq_groupsize = -1 + args.num_gpus = 1 + args.model_names = [] + args.conv_template = None + args.limit_worker_concurrency = 5 + args.stream_interval = 2 + args.no_register = False + + for k, v in kwargs.items(): + setattr(args, k, v) + + if args.gpus: + if args.num_gpus is None: + args.num_gpus = len(args.gpus.split(',')) + if len(args.gpus.split(",")) < args.num_gpus: + raise ValueError( + f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!" + ) + os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus + + + gptq_config = GptqConfig( + ckpt=args.gptq_ckpt or args.model_path, + wbits=args.gptq_wbits, + groupsize=args.gptq_groupsize, + act_order=args.gptq_act_order, + ) + awq_config = AWQConfig( + ckpt=args.awq_ckpt or args.model_path, + wbits=args.awq_wbits, + groupsize=args.awq_groupsize, + ) + + worker = ModelWorker( + controller_addr=args.controller_address, + worker_addr=args.worker_address, + worker_id=worker_id, + model_path=args.model_path, + model_names=args.model_names, + limit_worker_concurrency=args.limit_worker_concurrency, + no_register=args.no_register, + device=args.device, + num_gpus=args.num_gpus, + max_gpu_memory=args.max_gpu_memory, + load_8bit=args.load_8bit, + cpu_offloading=args.cpu_offloading, + gptq_config=gptq_config, + awq_config=awq_config, + stream_interval=args.stream_interval, + conv_template=args.conv_template, + ) + + sys.modules["fastchat.serve.model_worker"].worker = worker + sys.modules["fastchat.serve.model_worker"].args = args + sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config + + MakeFastAPIOffline(app) + app.title = f"FastChat LLM Server ({LLM_MODEL})" + return app + + +def create_openai_api_app( + controller_address: str, + api_keys: List = [], +) -> FastAPI: + import fastchat.constants + fastchat.constants.LOGDIR = LOG_PATH + from fastchat.serve.openai_api_server import app, CORSMiddleware, app_settings + + app.add_middleware( + CORSMiddleware, + allow_credentials=True, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], + ) + + app_settings.controller_address = controller_address + app_settings.api_keys = api_keys + + MakeFastAPIOffline(app) + app.title = "FastChat OpeanAI API Server" + return app + + +def _set_app_seq(app: FastAPI, q: Queue, run_seq: int): + if run_seq == 1: + @app.on_event("startup") + async def on_startup(): + set_httpx_timeout() + q.put(run_seq) + elif run_seq > 1: + @app.on_event("startup") + async def on_startup(): + set_httpx_timeout() + while True: + no = q.get() + if no != run_seq - 1: + q.put(no) + else: + break + q.put(run_seq) + + +def run_controller(q: Queue, run_seq: int = 1): + import uvicorn + + app = create_controller_app(FSCHAT_CONTROLLER.get("dispatch_method")) + _set_app_seq(app, q, run_seq) + + host = FSCHAT_CONTROLLER["host"] + port = FSCHAT_CONTROLLER["port"] + uvicorn.run(app, host=host, port=port) + + +def run_model_worker(model_name: str = LLM_MODEL, q: Queue = None, run_seq: int = 2): + import uvicorn + + kwargs = FSCHAT_MODEL_WORKERS[LLM_MODEL].copy() + host = kwargs.pop("host") + port = kwargs.pop("port") + model_path = llm_model_dict[model_name].get("local_model_path", "") + kwargs["model_path"] = model_path + kwargs["model_names"] = [model_name] + kwargs["controller_address"] = fschat_controller_address() + kwargs["worker_address"] = fschat_model_worker_address() + + app = create_model_worker_app(**kwargs) + _set_app_seq(app, q, run_seq) + + uvicorn.run(app, host=host, port=port) + + +def run_openai_api(q: Queue, run_seq: int = 3): + import uvicorn + + controller_addr = fschat_controller_address() + app = create_openai_api_app(controller_addr) # todo: not support keys yet. + _set_app_seq(app, q, run_seq) + + host = FSCHAT_OPENAI_API["host"] + port = FSCHAT_OPENAI_API["port"] + uvicorn.run(app, host=host, port=port) + + +def run_api_server(q: Queue, run_seq: int = 4): + from server.api import create_app + import uvicorn + + app = create_app() + _set_app_seq(app, q, run_seq) + + host = API_SERVER["host"] + port = API_SERVER["port"] + + uvicorn.run(app, host=host, port=port) + + +def run_webui(): + from configs.model_config import logger + host = WEBUI_SERVER["host"] + port = WEBUI_SERVER["port"] + p = subprocess.Popen(["streamlit", "run", "webui.py", + "--server.address", host, + "--server.port", str(port)]) + p.wait() + + +def parse_args() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument( + "--all", + action="store_true", + help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", + ) + parser.add_argument( + "--openai-api", + action="store_true", + help="run fastchat controller/openai_api servers", + ) + parser.add_argument( + "--model-worker", + action="store_true", + help="run fastchat model_worker server with specified model name. specify --model-name if not using default LLM_MODEL", + ) + parser.add_argument( + "--model-name", + type=str, + default=LLM_MODEL, + help="specify model name for model worker.", + ) + parser.add_argument( + "--api", + action="store_true", + help="run api.py server", + ) + parser.add_argument( + "--webui", + action="store_true", + help="run webui.py server", + ) + args = parser.parse_args() + return args + + +if __name__ == "__main__": + mp.set_start_method("spawn") + queue = Queue() + args = parse_args() + if args.all: + args.openai_api = True + args.model_worker = True + args.api = True + args.webui = True + + logger.info(f"正在启动服务:") + logger.info(f"如需查看 llm_api 日志,请前往 {LOG_PATH}") + + processes = {} + + if args.openai_api: + process = Process( + target=run_controller, + name=f"controller({os.getpid()})", + args=(queue, len(processes) + 1), + daemon=True, + ) + process.start() + processes["controller"] = process + + process = Process( + target=run_openai_api, + name=f"openai_api({os.getpid()})", + args=(queue, len(processes) + 1), + daemon=True, + ) + process.start() + processes["openai_api"] = process + + if args.model_worker: + process = Process( + target=run_model_worker, + name=f"model_worker({os.getpid()})", + args=(args.model_name, queue, len(processes) + 1), + daemon=True, + ) + process.start() + processes["model_worker"] = process + + if args.api: + process = Process( + target=run_api_server, + name=f"API Server{os.getpid()})", + args=(queue, len(processes) + 1), + daemon=True, + ) + process.start() + processes["api"] = process + + if args.webui: + process = Process( + target=run_webui, + name=f"WEBUI Server{os.getpid()})", + daemon=True, + ) + process.start() + processes["webui"] = process + + try: + if model_worker_process := processes.get("model_worker"): + model_worker_process.join() + for name, process in processes.items(): + if name != "model_worker": + process.join() + except: + if model_worker_process := processes.get("model_worker"): + model_worker_process.terminate() + for name, process in processes.items(): + if name != "model_worker": + process.terminate() + +# 服务启动后接口调用示例: +# import openai +# openai.api_key = "EMPTY" # Not support yet +# openai.api_base = "http://localhost:8888/v1" + +# model = "chatglm2-6b" + +# # create a chat completion +# completion = openai.ChatCompletion.create( +# model=model, +# messages=[{"role": "user", "content": "Hello! What is your name?"}] +# ) +# # print the completion +# print(completion.choices[0].message.content) diff --git a/tests/api/stream_api_test.py b/tests/api/stream_api_test.py index 06a9654..2902c8a 100644 --- a/tests/api/stream_api_test.py +++ b/tests/api/stream_api_test.py @@ -28,4 +28,14 @@ if __name__ == "__main__": for line in response.iter_content(decode_unicode=True): print(line, flush=True) else: - print("Error:", response.status_code) \ No newline at end of file + print("Error:", response.status_code) + + + r = requests.post( + openai_url + "/chat/completions", + json={"model": LLM_MODEL, "messages": "你好", "max_tokens": 1000}) + data = r.json() + print(f"/chat/completions\n") + print(data) + assert "choices" in data + From 7dfc337bfad6915016be5030c11acc70888aa1e3 Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Thu, 17 Aug 2023 14:11:21 +0800 Subject: [PATCH 02/16] add controller parameter. used to run model-worker --- startup.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/startup.py b/startup.py index 975bc92..4d84bf8 100644 --- a/startup.py +++ b/startup.py @@ -182,7 +182,12 @@ def run_controller(q: Queue, run_seq: int = 1): uvicorn.run(app, host=host, port=port) -def run_model_worker(model_name: str = LLM_MODEL, q: Queue = None, run_seq: int = 2): +def run_model_worker( + model_name: str = LLM_MODEL, + controller_address: str = "", + q: Queue = None, + run_seq: int = 2, +): import uvicorn kwargs = FSCHAT_MODEL_WORKERS[LLM_MODEL].copy() @@ -191,7 +196,7 @@ def run_model_worker(model_name: str = LLM_MODEL, q: Queue = None, run_seq: int model_path = llm_model_dict[model_name].get("local_model_path", "") kwargs["model_path"] = model_path kwargs["model_names"] = [model_name] - kwargs["controller_address"] = fschat_controller_address() + kwargs["controller_address"] = controller_address or fschat_controller_address() kwargs["worker_address"] = fschat_model_worker_address() app = create_model_worker_app(**kwargs) @@ -238,32 +243,43 @@ def run_webui(): def parse_args() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument( + "-a", "--all", action="store_true", help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", ) parser.add_argument( + "-o", "--openai-api", action="store_true", help="run fastchat controller/openai_api servers", ) parser.add_argument( + "-m", "--model-worker", action="store_true", help="run fastchat model_worker server with specified model name. specify --model-name if not using default LLM_MODEL", ) parser.add_argument( + "-n" "--model-name", type=str, default=LLM_MODEL, help="specify model name for model worker.", ) + parser.add_argument( + "-c" + "--controller", + type=str, + help="specify controller address the worker is registered to. default is server_config.FSCHAT_CONTROLLER", + ) parser.add_argument( "--api", action="store_true", help="run api.py server", ) parser.add_argument( + "-w", "--webui", action="store_true", help="run webui.py server", From 6c9e3ddc21f709088484ec24d53bd9791e5a24ba Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Thu, 17 Aug 2023 14:22:19 +0800 Subject: [PATCH 03/16] make webui started at last --- startup.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/startup.py b/startup.py index 4d84bf8..d7c2ef9 100644 --- a/startup.py +++ b/startup.py @@ -230,10 +230,16 @@ def run_api_server(q: Queue, run_seq: int = 4): uvicorn.run(app, host=host, port=port) -def run_webui(): - from configs.model_config import logger +def run_webui(q: Queue, run_seq: int = 5): host = WEBUI_SERVER["host"] port = WEBUI_SERVER["port"] + while True: + no = q.get() + if no != run_seq - 1: + q.put(no) + else: + break + q.put(run_seq) p = subprocess.Popen(["streamlit", "run", "webui.py", "--server.address", host, "--server.port", str(port)]) @@ -247,18 +253,21 @@ def parse_args() -> argparse.ArgumentParser: "--all", action="store_true", help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", + dest="all", ) parser.add_argument( "-o", "--openai-api", action="store_true", help="run fastchat controller/openai_api servers", + dest="openai_api", ) parser.add_argument( "-m", "--model-worker", action="store_true", help="run fastchat model_worker server with specified model name. specify --model-name if not using default LLM_MODEL", + dest="model_worker", ) parser.add_argument( "-n" @@ -266,23 +275,27 @@ def parse_args() -> argparse.ArgumentParser: type=str, default=LLM_MODEL, help="specify model name for model worker.", + dest="model_name", ) parser.add_argument( "-c" "--controller", type=str, help="specify controller address the worker is registered to. default is server_config.FSCHAT_CONTROLLER", + dest="controller_address", ) parser.add_argument( "--api", action="store_true", help="run api.py server", + dest="api", ) parser.add_argument( "-w", "--webui", action="store_true", help="run webui.py server", + dest="webui", ) args = parser.parse_args() return args @@ -326,7 +339,7 @@ if __name__ == "__main__": process = Process( target=run_model_worker, name=f"model_worker({os.getpid()})", - args=(args.model_name, queue, len(processes) + 1), + args=(args.model_name, args.controller_address, queue, len(processes) + 1), daemon=True, ) process.start() @@ -346,6 +359,7 @@ if __name__ == "__main__": process = Process( target=run_webui, name=f"WEBUI Server{os.getpid()})", + args=(queue,), daemon=True, ) process.start() From 44d4bacf1c01132607304b83b1bd83382fabfbbe Mon Sep 17 00:00:00 2001 From: hzg0601 Date: Thu, 17 Aug 2023 16:59:16 +0800 Subject: [PATCH 04/16] =?UTF-8?q?update=20llm=5Fapi=5Flaunch,api=5Fallinon?= =?UTF-8?q?e,webui=5Fallinone,readme:1.llm=5Fapi=5Flaunch,api=5Fallinone?= =?UTF-8?q?=5Fwebui=5Fallinone=E6=9B=B4=E5=90=8D;2.=20=E6=9B=B4=E6=96=B0re?= =?UTF-8?q?adme=E5=85=B3=E4=BA=8Estartup=E7=9A=84=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 86 +++---------------- ...{api_allinone.py => api_allinone_stale.py} | 2 +- .../{llm_api_launch.py => llm_api_stale.py} | 0 .../webui_allinone_stale.py | 4 +- startup.py | 30 ++++++- 5 files changed, 41 insertions(+), 81 deletions(-) rename server/{api_allinone.py => api_allinone_stale.py} (95%) rename server/{llm_api_launch.py => llm_api_stale.py} (100%) rename webui_allinone.py => server/webui_allinone_stale.py (93%) diff --git a/README.md b/README.md index 9766035..a52d30d 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,6 @@ embedding_model_dict = { ```shell $ python init_database.py ``` - - 如果您是第一次运行本项目,知识库尚未建立,或者配置文件中的知识库类型、嵌入模型发生变化,需要以下命令初始化或重建知识库: ```shell @@ -244,6 +243,7 @@ $ python server/llm_api.py ``` 项目支持多卡加载,需在 llm_api.py 中修改 create_model_worker_app 函数中,修改如下三个参数: + ```python gpus=None, num_gpus=1, @@ -258,7 +258,7 @@ max_gpu_memory="20GiB" ##### 5.1.2 基于命令行脚本 llm_api_launch.py 启动 LLM 服务 -⚠️ **注意:** +⚠️ **注意:** **1.llm_api_launch.py脚本原生仅适用于linux,mac设备需要安装对应的linux命令,win平台请使用wls;** @@ -275,11 +275,13 @@ $ python server/llm_api_launch.py ```shell $ python server/llm_api_launch.py --model-path-addresss model1@host1@port1 model2@host2@port2 ``` + 如果出现server端口占用情况,需手动指定server端口,并同步修改model_config.py下对应模型的base_api_url为指定端口: ```shell $ python server/llm_api_launch.py --server-port 8887 ``` + 如果要启动多卡加载,示例命令如下: ```shell @@ -354,7 +356,6 @@ $ streamlit run webui.py --server.port 666 - Web UI 对话界面: ![](img/webui_0813_0.png) - - Web UI 知识库管理页面: ![](img/webui_0813_1.png) @@ -363,86 +364,21 @@ $ streamlit run webui.py --server.port 666 ### 6. 一键启动 -⚠️ **注意:** - -**1. 一键启动脚本仅原生适用于Linux,Mac 设备需要安装对应的linux命令, Winodws 平台请使用 WLS;** - -**2. 加载非默认模型需要用命令行参数 `--model-path-address` 指定模型,不会读取 `model_config.py` 配置。** - -#### 6.1 API 服务一键启动脚本 - -新增 API 一键启动脚本,可一键开启 FastChat 后台服务及本项目提供的 API 服务,调用示例: - -调用默认模型: +更新一键启动脚本startup.py,一键启动所有fastchat服务、API服务、WebUI服务实例: ```shell -$ python server/api_allinone.py +$ python startup.py --all-webui ``` -加载多个非默认模型: +可选 `all-webui,all-api,llm-api,controller,openai-api,model-worker,api,webui`. + +若想指定非默认模型,需要用--model-name选项,示例: ```shell -$ python server/api_allinone.py --model-path-address model1@host1@port1 model2@host2@port2 +$ python startup.py --all-webui --model-name Qwen-7B-Chat ``` -如果出现server端口占用情况,需手动指定server端口,并同步修改model_config.py下对应模型的base_api_url为指定端口: - -```shell -$ python server/api_allinone.py --server-port 8887 -``` - -多卡启动: - -```shell -python server/api_allinone.py --model-path-address model@host@port --num-gpus 2 --gpus 0,1 --max-gpu-memory 10GiB -``` - -其他参数详见各脚本及 FastChat 服务说明。 - -#### 6.2 webui一键启动脚本 - -加载本地模型: - -```shell -$ python webui_allinone.py -``` - -调用远程 API 服务: - -```shell -$ python webui_allinone.py --use-remote-api -``` -如果出现server端口占用情况,需手动指定server端口,并同步修改model_config.py下对应模型的base_api_url为指定端口: - -```shell -$ python webui_allinone.py --server-port 8887 -``` - -后台运行webui服务: - -```shell -$ python webui_allinone.py --nohup -``` - -加载多个非默认模型: - -```shell -$ python webui_allinone.py --model-path-address model1@host1@port1 model2@host2@port2 -``` - -多卡启动: - -```shell -$ python webui_alline.py --model-path-address model@host@port --num-gpus 2 --gpus 0,1 --max-gpu-memory 10GiB -``` - -其他参数详见各脚本及 Fastchat 服务说明。 - -上述两个一键启动脚本会后台运行多个服务,如要停止所有服务,可使用 `shutdown_all.sh` 脚本: - -```shell -bash shutdown_all.sh -``` +**注意:startup脚本用多进程方式启动各模块的服务,可能会导致打印顺序问题,请等待全部服务发起后再调用,并根据默认端口调用服务(默认api服务端口127.0.0.1:7861,默认webui服务端口:`本机IP:8501`)** ## 常见问题 diff --git a/server/api_allinone.py b/server/api_allinone_stale.py similarity index 95% rename from server/api_allinone.py rename to server/api_allinone_stale.py index 3be8581..78a7a6d 100644 --- a/server/api_allinone.py +++ b/server/api_allinone_stale.py @@ -15,7 +15,7 @@ import os sys.path.append(os.path.dirname(__file__)) sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from llm_api_launch import launch_all, parser, controller_args, worker_args, server_args +from llm_api_stale import launch_all, parser, controller_args, worker_args, server_args from api import create_app import uvicorn diff --git a/server/llm_api_launch.py b/server/llm_api_stale.py similarity index 100% rename from server/llm_api_launch.py rename to server/llm_api_stale.py diff --git a/webui_allinone.py b/server/webui_allinone_stale.py similarity index 93% rename from webui_allinone.py rename to server/webui_allinone_stale.py index 2992ae5..627f956 100644 --- a/webui_allinone.py +++ b/server/webui_allinone_stale.py @@ -20,9 +20,9 @@ from webui_pages.utils import * from streamlit_option_menu import option_menu from webui_pages import * import os -from server.llm_api_launch import string_args,launch_all,controller_args,worker_args,server_args,LOG_PATH +from server.llm_api_stale import string_args,launch_all,controller_args,worker_args,server_args,LOG_PATH -from server.api_allinone import parser, api_args +from server.api_allinone_stale import parser, api_args import subprocess parser.add_argument("--use-remote-api",action="store_true") diff --git a/startup.py b/startup.py index d7c2ef9..299eec1 100644 --- a/startup.py +++ b/startup.py @@ -249,8 +249,19 @@ def run_webui(q: Queue, run_seq: int = 5): def parse_args() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument( - "-a", - "--all", + "--all-webui", + action="store_true", + help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", + dest="all", + ) + parser.add_argument( + "--all-api", + action="store_true", + help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", + dest="all", + ) + parser.add_argument( + "--llm-api", action="store_true", help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", dest="all", @@ -305,12 +316,25 @@ if __name__ == "__main__": mp.set_start_method("spawn") queue = Queue() args = parse_args() - if args.all: + if args.all_webui: args.openai_api = True args.model_worker = True args.api = True args.webui = True + elif args.all_api: + args.openai_api = True + args.model_worker = True + args.api = True + args.webui = False + + elif args.llm_api: + args.openai_api = True + args.model_worker = True + args.api = False + args.webui = False + + logger.info(f"正在启动服务:") logger.info(f"如需查看 llm_api 日志,请前往 {LOG_PATH}") From 0830f0d3b74e4d16223a99c45afc52aaf3c74121 Mon Sep 17 00:00:00 2001 From: hzg0601 Date: Thu, 17 Aug 2023 17:17:06 +0800 Subject: [PATCH 05/16] =?UTF-8?q?update=20readme:=20=E6=9B=B4=E6=96=B0star?= =?UTF-8?q?tup=E5=90=AF=E5=8A=A8=E6=96=B9=E5=BC=8F=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a52d30d..794a47f 100644 --- a/README.md +++ b/README.md @@ -364,13 +364,13 @@ $ streamlit run webui.py --server.port 666 ### 6. 一键启动 -更新一键启动脚本startup.py,一键启动所有fastchat服务、API服务、WebUI服务实例: +更新一键启动脚本startup.py,一键启动所有fastchat服务、API服务、WebUI服务,示例代码: ```shell $ python startup.py --all-webui ``` -可选 `all-webui,all-api,llm-api,controller,openai-api,model-worker,api,webui`. +可选 `all-webui,all-api,llm-api,controller,openai-api,model-worker,api,webui`,其中all-webui为一键启动webui所有依赖服务,all-api为一键启动api所有依赖服务,llm-api为一键启动fastchat所有依赖的llm服务,openai-api为仅启动fastchat的controller和openai-api-server服务,其他为单独服务启动选项。 若想指定非默认模型,需要用--model-name选项,示例: @@ -378,7 +378,11 @@ $ python startup.py --all-webui $ python startup.py --all-webui --model-name Qwen-7B-Chat ``` -**注意:startup脚本用多进程方式启动各模块的服务,可能会导致打印顺序问题,请等待全部服务发起后再调用,并根据默认端口调用服务(默认api服务端口127.0.0.1:7861,默认webui服务端口:`本机IP:8501`)** +**注意:** + +**1. startup脚本用多进程方式启动各模块的服务,可能会导致打印顺序问题,请等待全部服务发起后再调用,并根据默认或指定端口调用服务(默认llm-api服务端口:127.0.0.1:8888,默认api服务端口:127.0.0.1:7861,默认webui服务端口:`本机IP:8501`)** + +**2.服务启动时间示设备不同而不同,约3-10分钟,如长时间没有启动请前往 `./logs`目录下监控日志,定位问题。** ## 常见问题 From d4c6a23a8db845837a8298564064ec9eb3d22649 Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Thu, 17 Aug 2023 20:02:28 +0800 Subject: [PATCH 06/16] log server informations after startup --- startup.py | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/startup.py b/startup.py index 299eec1..e1ec023 100644 --- a/startup.py +++ b/startup.py @@ -3,12 +3,13 @@ import multiprocessing as mp import subprocess import sys import os -from xml.etree.ElementPath import prepare_child +from pprint import pprint + sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from configs.model_config import llm_model_dict, LLM_MODEL, LLM_DEVICE, LOG_PATH, logger +from configs.model_config import EMBEDDING_DEVICE, EMBEDDING_MODEL, llm_model_dict, LLM_MODEL, LLM_DEVICE, LOG_PATH, logger from configs.server_config import (WEBUI_SERVER, API_SERVER, OPEN_CROSS_DOMAIN, FSCHAT_CONTROLLER, FSCHAT_MODEL_WORKERS, - FSCHAT_OPENAI_API, fschat_controller_address, fschat_model_worker_address,) + FSCHAT_OPENAI_API, fschat_controller_address, fschat_model_worker_address, fschat_openai_api_address,) from server.utils import MakeFastAPIOffline, FastAPI import argparse from typing import Tuple, List @@ -313,6 +314,12 @@ def parse_args() -> argparse.ArgumentParser: if __name__ == "__main__": + import platform + import time + import langchain + import fastchat + from configs.server_config import api_address, webui_address + mp.set_start_method("spawn") queue = Queue() args = parse_args() @@ -383,13 +390,43 @@ if __name__ == "__main__": process = Process( target=run_webui, name=f"WEBUI Server{os.getpid()})", - args=(queue,), + args=(queue, len(processes) + 1), daemon=True, ) process.start() processes["webui"] = process try: + # log infors + while True: + no = queue.get() + if no == len(processes): + time.sleep(0.5) + print("\n\n") + print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30) + print(f"操作系统:{platform.platform()}.") + print(f"python版本:{sys.version}") + print(f"项目版本:") # todo + print(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}") + print("\n") + print(f"当前LLM模型:{LLM_MODEL} @ {LLM_DEVICE}") + pprint(llm_model_dict[LLM_MODEL]) + print(f"当前Embbedings模型: {EMBEDDING_MODEL} @ {EMBEDDING_DEVICE}") + print("\n") + print(f"服务端运行信息:") + if args.openai_api: + print(f" OpenAI API Server: {fschat_openai_api_address()}/v1") + print("请确认llm_model_dict中配置的api_base_url与上面地址一致。") + if args.api: + print(f" Chatchat API Server: {api_address()}") + if args.webui: + print(f" Chatchat WEBUI Server: {webui_address()}") + print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30) + print("\n\n") + break + else: + queue.put(no) + if model_worker_process := processes.get("model_worker"): model_worker_process.join() for name, process in processes.items(): From 0ecf3379b4bdc19b073efadc19d6f31390fbfea2 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 17 Aug 2023 21:29:16 +0800 Subject: [PATCH 07/16] add descriptions about server_config in README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 794a47f..39f4fe6 100644 --- a/README.md +++ b/README.md @@ -181,9 +181,11 @@ $ git clone https://huggingface.co/moka-ai/m3e-base ### 3. 设置配置项 -复制文件 [configs/model_config.py.example](configs/model_config.py.example) 存储至项目路径下 `./configs` 路径下,并重命名为 `model_config.py`。 +复制模型相关参数配置模板文件 [configs/model_config.py.example](configs/model_config.py.example) 存储至项目路径下 `./configs` 路径下,并重命名为 `model_config.py`。 -在开始执行 Web UI 或命令行交互前,请先检查 `configs/model_config.py` 中的各项模型参数设计是否符合需求: +复制服务相关参数配置模板文件 [configs/server_config.py.example](configs/server_config.py.example) 存储至项目路径下 `./configs` 路径下,并重命名为 `server_config.py`。 + +在开始执行 Web UI 或命令行交互前,请先检查 `configs/model_config.py` 和 `configs/server_config.py` 中的各项模型参数设计是否符合需求: - 请确认已下载至本地的 LLM 模型本地存储路径写在 `llm_model_dict` 对应模型的 `local_model_path` 属性中,如: From 4318197ac760dd912da2ef7f8d1364cef69a1ed5 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 17 Aug 2023 21:30:40 +0800 Subject: [PATCH 08/16] reformat server_config.py.example --- configs/server_config.py.example | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/server_config.py.example b/configs/server_config.py.example index 24ce6b4..fd83fc0 100644 --- a/configs/server_config.py.example +++ b/configs/server_config.py.example @@ -1,5 +1,4 @@ -from .model_config import LLM_MODEL, llm_model_dict, LLM_DEVICE - +from .model_config import LLM_MODEL, LLM_DEVICE # API 是否开启跨域,默认为False,如果需要开启,请设置为True # is open cross domain @@ -23,7 +22,7 @@ API_SERVER = { # fastchat openai_api server FSCHAT_OPENAI_API = { "host": DEFAULT_BIND_HOST, - "port": 8888, # model_config.llm_model_dict中模型配置的api_base_url需要与这里一致。 + "port": 8888, # model_config.llm_model_dict中模型配置的api_base_url需要与这里一致。 } # fastchat model_worker server @@ -56,7 +55,6 @@ FSCHAT_MODEL_WORKERS = { }, } - # fastchat multi model worker server FSCHAT_MULTI_MODEL_WORKERS = { # todo @@ -76,12 +74,14 @@ def fschat_controller_address() -> str: port = FSCHAT_CONTROLLER["port"] return f"http://{host}:{port}" + def fschat_model_worker_address(model_name: str = LLM_MODEL) -> str: if model := FSCHAT_MODEL_WORKERS.get(model_name): host = model["host"] port = model["port"] return f"http://{host}:{port}" + def fschat_openai_api_address() -> str: host = FSCHAT_OPENAI_API["host"] port = FSCHAT_OPENAI_API["port"] From 7f7238168c5c5057f833d64154cde6ca4ad68556 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 17 Aug 2023 21:31:30 +0800 Subject: [PATCH 09/16] reformat model_config.py.example --- configs/model_config.py.example | 3 --- 1 file changed, 3 deletions(-) diff --git a/configs/model_config.py.example b/configs/model_config.py.example index be0bebc..ccfd1b6 100644 --- a/configs/model_config.py.example +++ b/configs/model_config.py.example @@ -1,14 +1,11 @@ import os import logging import torch -import argparse -import json # 日志格式 LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s" logger = logging.getLogger() logger.setLevel(logging.INFO) logging.basicConfig(format=LOG_FORMAT) -import json # 在以下字典中修改属性值,以指定本地embedding模型存储位置 From 8b3b869418b181e46380015df0338f7339de7d91 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 17 Aug 2023 21:44:30 +0800 Subject: [PATCH 10/16] update README.md --- README.md | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 39f4fe6..a96fc12 100644 --- a/README.md +++ b/README.md @@ -366,15 +366,26 @@ $ streamlit run webui.py --server.port 666 ### 6. 一键启动 -更新一键启动脚本startup.py,一键启动所有fastchat服务、API服务、WebUI服务,示例代码: +更新一键启动脚本 startup.py,一键启动所有 Fastchat 服务、API 服务、WebUI 服务,示例代码: ```shell $ python startup.py --all-webui ``` -可选 `all-webui,all-api,llm-api,controller,openai-api,model-worker,api,webui`,其中all-webui为一键启动webui所有依赖服务,all-api为一键启动api所有依赖服务,llm-api为一键启动fastchat所有依赖的llm服务,openai-api为仅启动fastchat的controller和openai-api-server服务,其他为单独服务启动选项。 +可选 `--all-webui`, `--all-api`, `--llm-api`, `--controller`, `--openai-api`, +`--model-worker`, `--api`, `--webui`,其中: -若想指定非默认模型,需要用--model-name选项,示例: +- `--all-webui` 为一键启动 WebUI 所有依赖服务; + +- `--all-api` 为一键启动 API 所有依赖服务; + +- `--llm-api` 为一键启动 Fastchat 所有依赖的 LLM 服务; + +- `--openai-api` 为仅启动 FastChat 的 controller 和 openai-api-server 服务; + +- 其他为单独服务启动选项。 + +若想指定非默认模型,需要用 `--model-name` 选项,示例: ```shell $ python startup.py --all-webui --model-name Qwen-7B-Chat @@ -382,9 +393,9 @@ $ python startup.py --all-webui --model-name Qwen-7B-Chat **注意:** -**1. startup脚本用多进程方式启动各模块的服务,可能会导致打印顺序问题,请等待全部服务发起后再调用,并根据默认或指定端口调用服务(默认llm-api服务端口:127.0.0.1:8888,默认api服务端口:127.0.0.1:7861,默认webui服务端口:`本机IP:8501`)** +**1. startup 脚本用多进程方式启动各模块的服务,可能会导致打印顺序问题,请等待全部服务发起后再调用,并根据默认或指定端口调用服务(默认 LLM API 服务端口:`127.0.0.1:8888`,默认 API 服务端口:`127.0.0.1:7861`,默认 WebUI 服务端口:`本机IP:8501`)** -**2.服务启动时间示设备不同而不同,约3-10分钟,如长时间没有启动请前往 `./logs`目录下监控日志,定位问题。** +**2.服务启动时间示设备不同而不同,约 3-10 分钟,如长时间没有启动请前往 `./logs`目录下监控日志,定位问题。** ## 常见问题 From f8229fc4a972d98a05ccd2460a636578e451c1bd Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Thu, 17 Aug 2023 21:50:36 +0800 Subject: [PATCH 11/16] update server_config example --- configs/server_config.py.example | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/configs/server_config.py.example b/configs/server_config.py.example index fd83fc0..5f37779 100644 --- a/configs/server_config.py.example +++ b/configs/server_config.py.example @@ -86,3 +86,15 @@ def fschat_openai_api_address() -> str: host = FSCHAT_OPENAI_API["host"] port = FSCHAT_OPENAI_API["port"] return f"http://{host}:{port}" + + +def api_address() -> str: + host = API_SERVER["host"] + port = API_SERVER["port"] + return f"http://{host}:{port}" + + +def webui_address() -> str: + host = WEBUI_SERVER["host"] + port = WEBUI_SERVER["port"] + return f"http://{host}:{port}" From 5ec512801fd3ff708c7f38b2d413e6a937b65043 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 17 Aug 2023 21:51:07 +0800 Subject: [PATCH 12/16] update configs.__init__ --- configs/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configs/__init__.py b/configs/__init__.py index 0bed9b6..64289ae 100644 --- a/configs/__init__.py +++ b/configs/__init__.py @@ -1 +1,2 @@ -from .model_config import * \ No newline at end of file +from .model_config import * +from .server_config import * \ No newline at end of file From 4278d1e000b48e7692e8faf69b2c4cbf5dc1884e Mon Sep 17 00:00:00 2001 From: liunux4odoo Date: Thu, 17 Aug 2023 21:54:48 +0800 Subject: [PATCH 13/16] fix startup.py --- startup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/startup.py b/startup.py index e1ec023..4008e34 100644 --- a/startup.py +++ b/startup.py @@ -253,19 +253,19 @@ def parse_args() -> argparse.ArgumentParser: "--all-webui", action="store_true", help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", - dest="all", + dest="all_webui", ) parser.add_argument( "--all-api", action="store_true", help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", - dest="all", + dest="all_api", ) parser.add_argument( "--llm-api", action="store_true", help="run fastchat's controller/model_worker/openai_api servers, run api.py and webui.py", - dest="all", + dest="llm_api", ) parser.add_argument( "-o", From 52837429d28314a66ac66fd6c4544ee65263a994 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 17 Aug 2023 22:19:26 +0800 Subject: [PATCH 14/16] update configs.__init__, webui.py and api.py --- configs/__init__.py | 4 +++- server/api.py | 33 ++++++++++++++++++--------------- webui.py | 12 +++++++++++- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/configs/__init__.py b/configs/__init__.py index 64289ae..adec2c0 100644 --- a/configs/__init__.py +++ b/configs/__init__.py @@ -1,2 +1,4 @@ from .model_config import * -from .server_config import * \ No newline at end of file +from .server_config import * + +VERSION = "v0.2.1-preview" diff --git a/server/api.py b/server/api.py index c398f15..ecadd7c 100644 --- a/server/api.py +++ b/server/api.py @@ -6,6 +6,7 @@ sys.path.append(os.path.dirname(os.path.dirname(__file__))) from configs.model_config import NLTK_DATA_PATH from configs.server_config import OPEN_CROSS_DOMAIN +from configs import VERSION import argparse import uvicorn from fastapi.middleware.cors import CORSMiddleware @@ -15,11 +16,10 @@ from server.chat import (chat, knowledge_base_chat, openai_chat, from server.knowledge_base.kb_api import list_kbs, create_kb, delete_kb from server.knowledge_base.kb_doc_api import (list_docs, upload_doc, delete_doc, update_doc, download_doc, recreate_vector_store, - search_docs, DocumentWithScore) + search_docs, DocumentWithScore) from server.utils import BaseResponse, ListResponse, FastAPI, MakeFastAPIOffline from typing import List - nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path @@ -28,7 +28,10 @@ async def document(): def create_app(): - app = FastAPI(title="Langchain-Chatchat API Server") + app = FastAPI( + title="Langchain-Chatchat API Server", + version=VERSION + ) MakeFastAPIOffline(app) # Add CORS middleware to allow all origins # 在config.py中设置OPEN_DOMAIN=True,允许跨域 @@ -76,10 +79,10 @@ def create_app(): )(create_kb) app.post("/knowledge_base/delete_knowledge_base", - tags=["Knowledge Base Management"], - response_model=BaseResponse, - summary="删除知识库" - )(delete_kb) + tags=["Knowledge Base Management"], + response_model=BaseResponse, + summary="删除知识库" + )(delete_kb) app.get("/knowledge_base/list_docs", tags=["Knowledge Base Management"], @@ -88,10 +91,10 @@ def create_app(): )(list_docs) app.post("/knowledge_base/search_docs", - tags=["Knowledge Base Management"], - response_model=List[DocumentWithScore], - summary="搜索知识库" - )(search_docs) + tags=["Knowledge Base Management"], + response_model=List[DocumentWithScore], + summary="搜索知识库" + )(search_docs) app.post("/knowledge_base/upload_doc", tags=["Knowledge Base Management"], @@ -100,10 +103,10 @@ def create_app(): )(upload_doc) app.post("/knowledge_base/delete_doc", - tags=["Knowledge Base Management"], - response_model=BaseResponse, - summary="删除知识库内指定文件" - )(delete_doc) + tags=["Knowledge Base Management"], + response_model=BaseResponse, + summary="删除知识库内指定文件" + )(delete_doc) app.post("/knowledge_base/update_doc", tags=["Knowledge Base Management"], diff --git a/webui.py b/webui.py index 99db3f6..58fc0e3 100644 --- a/webui.py +++ b/webui.py @@ -9,6 +9,7 @@ from webui_pages.utils import * from streamlit_option_menu import option_menu from webui_pages import * import os +from configs import VERSION api = ApiRequest(base_url="http://127.0.0.1:7861", no_remote_api=False) @@ -17,6 +18,11 @@ if __name__ == "__main__": "Langchain-Chatchat WebUI", os.path.join("img", "chatchat_icon_blue_square_v2.png"), initial_sidebar_state="expanded", + menu_items={ + 'Get Help': 'https://github.com/chatchat-space/Langchain-Chatchat', + 'Report a bug': "https://github.com/chatchat-space/Langchain-Chatchat/issues", + 'About': f"""欢迎使用 Langchain-Chatchat WebUI {VERSION}!""" + } ) if not chat_box.chat_inited: @@ -35,7 +41,7 @@ if __name__ == "__main__": "func": knowledge_base_page, }, } - + with st.sidebar: st.image( os.path.join( @@ -44,6 +50,10 @@ if __name__ == "__main__": ), use_column_width=True ) + st.caption( + f"""

当前版本:{VERSION}

""", + unsafe_allow_html=True, + ) options = list(pages) icons = [x["icon"] for x in pages.values()] From 67839daad3e8a2c53f60cce01801f0e29740c9b6 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 17 Aug 2023 22:22:28 +0800 Subject: [PATCH 15/16] reformat and add version to startup.py --- startup.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/startup.py b/startup.py index 4008e34..830c322 100644 --- a/startup.py +++ b/startup.py @@ -5,14 +5,16 @@ import sys import os from pprint import pprint - sys.path.append(os.path.dirname(os.path.dirname(__file__))) -from configs.model_config import EMBEDDING_DEVICE, EMBEDDING_MODEL, llm_model_dict, LLM_MODEL, LLM_DEVICE, LOG_PATH, logger +from configs.model_config import EMBEDDING_DEVICE, EMBEDDING_MODEL, llm_model_dict, LLM_MODEL, LLM_DEVICE, LOG_PATH, \ + logger from configs.server_config import (WEBUI_SERVER, API_SERVER, OPEN_CROSS_DOMAIN, FSCHAT_CONTROLLER, FSCHAT_MODEL_WORKERS, - FSCHAT_OPENAI_API, fschat_controller_address, fschat_model_worker_address, fschat_openai_api_address,) + FSCHAT_OPENAI_API, fschat_controller_address, fschat_model_worker_address, + fschat_openai_api_address, ) from server.utils import MakeFastAPIOffline, FastAPI import argparse from typing import Tuple, List +from configs import VERSION def set_httpx_timeout(timeout=60.0): @@ -53,6 +55,7 @@ def create_model_worker_app(**kwargs) -> Tuple[argparse.ArgumentParser, FastAPI] target=fastchat.serve.model_worker.heart_beat_worker, args=(self,), daemon=True, ) self.heart_beat_thread.start() + ModelWorker.init_heart_beat = _new_init_heart_beat parser = argparse.ArgumentParser() @@ -88,7 +91,6 @@ def create_model_worker_app(**kwargs) -> Tuple[argparse.ArgumentParser, FastAPI] ) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus - gptq_config = GptqConfig( ckpt=args.gptq_ckpt or args.model_path, wbits=args.gptq_wbits, @@ -123,7 +125,7 @@ def create_model_worker_app(**kwargs) -> Tuple[argparse.ArgumentParser, FastAPI] sys.modules["fastchat.serve.model_worker"].worker = worker sys.modules["fastchat.serve.model_worker"].args = args sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config - + MakeFastAPIOffline(app) app.title = f"FastChat LLM Server ({LLM_MODEL})" return app @@ -184,10 +186,10 @@ def run_controller(q: Queue, run_seq: int = 1): def run_model_worker( - model_name: str = LLM_MODEL, - controller_address: str = "", - q: Queue = None, - run_seq: int = 2, + model_name: str = LLM_MODEL, + controller_address: str = "", + q: Queue = None, + run_seq: int = 2, ): import uvicorn @@ -210,7 +212,7 @@ def run_openai_api(q: Queue, run_seq: int = 3): import uvicorn controller_addr = fschat_controller_address() - app = create_openai_api_app(controller_addr) # todo: not support keys yet. + app = create_openai_api_app(controller_addr) # todo: not support keys yet. _set_app_seq(app, q, run_seq) host = FSCHAT_OPENAI_API["host"] @@ -242,8 +244,8 @@ def run_webui(q: Queue, run_seq: int = 5): break q.put(run_seq) p = subprocess.Popen(["streamlit", "run", "webui.py", - "--server.address", host, - "--server.port", str(port)]) + "--server.address", host, + "--server.port", str(port)]) p.wait() @@ -334,13 +336,12 @@ if __name__ == "__main__": args.model_worker = True args.api = True args.webui = False - + elif args.llm_api: args.openai_api = True args.model_worker = True args.api = False args.webui = False - logger.info(f"正在启动服务:") logger.info(f"如需查看 llm_api 日志,请前往 {LOG_PATH}") @@ -406,7 +407,7 @@ if __name__ == "__main__": print("=" * 30 + "Langchain-Chatchat Configuration" + "=" * 30) print(f"操作系统:{platform.platform()}.") print(f"python版本:{sys.version}") - print(f"项目版本:") # todo + print(f"项目版本:{VERSION}") print(f"langchain版本:{langchain.__version__}. fastchat版本:{fastchat.__version__}") print("\n") print(f"当前LLM模型:{LLM_MODEL} @ {LLM_DEVICE}") From d9f74ec0614fd1ff87430e75c3219f84c8168cb5 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 17 Aug 2023 22:26:41 +0800 Subject: [PATCH 16/16] update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a96fc12..fb3361d 100644 --- a/README.md +++ b/README.md @@ -372,7 +372,9 @@ $ streamlit run webui.py --server.port 666 $ python startup.py --all-webui ``` -可选 `--all-webui`, `--all-api`, `--llm-api`, `--controller`, `--openai-api`, +并可使用 `Ctrl + C` 直接关闭所有运行服务。 + +可选参数包括 `--all-webui`, `--all-api`, `--llm-api`, `--controller`, `--openai-api`, `--model-worker`, `--api`, `--webui`,其中: - `--all-webui` 为一键启动 WebUI 所有依赖服务;