llm_api can be terminated by Ctrl+c. success on windows
This commit is contained in:
parent
e7d07a7923
commit
6c00c03faa
|
|
@ -1,4 +1,5 @@
|
||||||
from multiprocessing import Process, Queue
|
from multiprocessing import Process, Queue
|
||||||
|
import multiprocessing as mp
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
@ -12,7 +13,6 @@ controller_port = 20001
|
||||||
model_worker_port = 20002
|
model_worker_port = 20002
|
||||||
openai_api_port = 8888
|
openai_api_port = 8888
|
||||||
base_url = "http://127.0.0.1:{}"
|
base_url = "http://127.0.0.1:{}"
|
||||||
queue = Queue()
|
|
||||||
|
|
||||||
|
|
||||||
def set_httpx_timeout(timeout=60.0):
|
def set_httpx_timeout(timeout=60.0):
|
||||||
|
|
@ -64,6 +64,18 @@ def create_model_worker_app(
|
||||||
fastchat.constants.LOGDIR = LOG_PATH
|
fastchat.constants.LOGDIR = LOG_PATH
|
||||||
from fastchat.serve.model_worker import app, GptqConfig, AWQConfig, ModelWorker, worker_id
|
from fastchat.serve.model_worker import app, GptqConfig, AWQConfig, ModelWorker, worker_id
|
||||||
import argparse
|
import argparse
|
||||||
|
import threading
|
||||||
|
import fastchat.serve.model_worker
|
||||||
|
|
||||||
|
# workaround to make program exit with Ctrl+c
|
||||||
|
# it should be deleted after pr is merged by fastchat
|
||||||
|
def _new_init_heart_beat(self):
|
||||||
|
self.register_to_controller()
|
||||||
|
self.heart_beat_thread = threading.Thread(
|
||||||
|
target=fastchat.serve.model_worker.heart_beat_worker, args=(self,), daemon=True,
|
||||||
|
)
|
||||||
|
self.heart_beat_thread.start()
|
||||||
|
ModelWorker.init_heart_beat = _new_init_heart_beat
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
@ -214,6 +226,8 @@ def run_openai_api(q):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
mp.set_start_method("spawn")
|
||||||
|
queue = Queue()
|
||||||
logger.info(llm_model_dict[LLM_MODEL])
|
logger.info(llm_model_dict[LLM_MODEL])
|
||||||
model_path = llm_model_dict[LLM_MODEL]["local_model_path"]
|
model_path = llm_model_dict[LLM_MODEL]["local_model_path"]
|
||||||
|
|
||||||
|
|
@ -230,15 +244,14 @@ if __name__ == "__main__":
|
||||||
)
|
)
|
||||||
controller_process.start()
|
controller_process.start()
|
||||||
|
|
||||||
# cuda 没办法用在fork的多进程中
|
model_worker_process = Process(
|
||||||
# model_worker_process = Process(
|
target=run_model_worker,
|
||||||
# target=run_model_worker,
|
name=f"model_worker({os.getpid()})",
|
||||||
# name=f"model_worker({os.getpid()})",
|
args=(queue,),
|
||||||
# args=(queue,),
|
# kwargs={"load_8bit": True},
|
||||||
# # kwargs={"load_8bit": True},
|
daemon=True,
|
||||||
# daemon=True,
|
)
|
||||||
# )
|
model_worker_process.start()
|
||||||
# model_worker_process.start()
|
|
||||||
|
|
||||||
openai_api_process = Process(
|
openai_api_process = Process(
|
||||||
target=run_openai_api,
|
target=run_openai_api,
|
||||||
|
|
@ -248,11 +261,14 @@ if __name__ == "__main__":
|
||||||
)
|
)
|
||||||
openai_api_process.start()
|
openai_api_process.start()
|
||||||
|
|
||||||
run_model_worker(queue)
|
try:
|
||||||
|
model_worker_process.join()
|
||||||
controller_process.join()
|
controller_process.join()
|
||||||
# model_worker_process.join()
|
openai_api_process.join()
|
||||||
openai_api_process.join()
|
except KeyboardInterrupt:
|
||||||
|
model_worker_process.terminate()
|
||||||
|
controller_process.terminate()
|
||||||
|
openai_api_process.terminate()
|
||||||
|
|
||||||
# 服务启动后接口调用示例:
|
# 服务启动后接口调用示例:
|
||||||
# import openai
|
# import openai
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue