From 17c96329420587b05196f83528d4fe07cb251b1a Mon Sep 17 00:00:00 2001 From: glide-the <2533736852@qq.com> Date: Tue, 21 Nov 2023 21:46:42 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=9F=E4=B8=80=E5=9C=A8=E7=BA=BF=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E5=BC=82=E5=B8=B8=E6=8A=A5=E6=96=87=E3=80=81=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E8=AF=A6=E7=BB=86=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/model_workers/azure.py | 6 ++++++ server/model_workers/baichuan.py | 17 +++++++++++++--- server/model_workers/fangzhou.py | 14 ++++++++++++- server/model_workers/minimax.py | 34 +++++++++++++++++++++++++++++--- server/model_workers/qianfan.py | 28 ++++++++++++++++++++++++-- server/model_workers/qwen.py | 24 ++++++++++++++++++++-- server/model_workers/zhipu.py | 17 ++++++++++++++-- server/utils.py | 5 ++++- webui_pages/utils.py | 4 ++++ 9 files changed, 135 insertions(+), 14 deletions(-) diff --git a/server/model_workers/azure.py b/server/model_workers/azure.py index 2735c53..46d9529 100644 --- a/server/model_workers/azure.py +++ b/server/model_workers/azure.py @@ -5,6 +5,7 @@ from server.utils import get_httpx_client from fastchat import conversation as conv import json from typing import List, Dict +from configs import logger, log_verbose class AzureWorker(ApiModelWorker): @@ -39,6 +40,11 @@ class AzureWorker(ApiModelWorker): } text = "" + if log_verbose: + logger.info(f'{self.__class__.__name__}:url: {url}') + logger.info(f'{self.__class__.__name__}:headers: {headers}') + logger.info(f'{self.__class__.__name__}:data: {data}') + with get_httpx_client() as client: with client.stream("POST", url, headers=headers, json=data) as response: for line in response.iter_lines(): diff --git a/server/model_workers/baichuan.py b/server/model_workers/baichuan.py index b8dc426..edc81eb 100644 --- a/server/model_workers/baichuan.py +++ b/server/model_workers/baichuan.py @@ -9,7 +9,7 @@ from fastchat import conversation as conv import sys import json from typing import List, Literal, Dict - +from configs import logger, log_verbose def calculate_md5(input_string): md5 = hashlib.md5() @@ -56,6 +56,11 @@ class BaiChuanWorker(ApiModelWorker): } text = "" + if log_verbose: + logger.info(f'{self.__class__.__name__}:json_data: {json_data}') + logger.info(f'{self.__class__.__name__}:url: {url}') + logger.info(f'{self.__class__.__name__}:headers: {headers}') + with get_httpx_client() as client: with client.stream("POST", url, headers=headers, json=data) as response: for line in response.iter_lines(): @@ -71,8 +76,14 @@ class BaiChuanWorker(ApiModelWorker): else: yield { "error_code": resp["code"], - "text": resp["msg"] + "text": resp["msg"], + "error": { + "message": resp["msg"], + "type": "invalid_request_error", + "param": None, + "code": None, } + } def get_embeddings(self, params): # TODO: 支持embeddings @@ -103,4 +114,4 @@ if __name__ == "__main__": sys.modules["fastchat.serve.model_worker"].worker = worker MakeFastAPIOffline(app) uvicorn.run(app, port=21007) - # do_request() \ No newline at end of file + # do_request() diff --git a/server/model_workers/fangzhou.py b/server/model_workers/fangzhou.py index dc4d0f8..3834b5a 100644 --- a/server/model_workers/fangzhou.py +++ b/server/model_workers/fangzhou.py @@ -3,6 +3,7 @@ from server.model_workers.base import * from fastchat import conversation as conv import sys from typing import List, Literal, Dict +from configs import logger, log_verbose class FangZhouWorker(ApiModelWorker): @@ -46,10 +47,21 @@ class FangZhouWorker(ApiModelWorker): } text = "" + if log_verbose: + logger.info(f'{self.__class__.__name__}:maas: {maas}') for resp in maas.stream_chat(req): error = resp.error if error.code_n > 0: - yield {"error_code": error.code_n, "text": error.message} + yield { + "error_code": error.code_n, + "text": error.message, + "error": { + "message": error.message, + "type": "invalid_request_error", + "param": None, + "code": None, + } + } elif chunk := resp.choice.message.content: text += chunk yield {"error_code": 0, "text": text} diff --git a/server/model_workers/minimax.py b/server/model_workers/minimax.py index fa4bb85..47d6099 100644 --- a/server/model_workers/minimax.py +++ b/server/model_workers/minimax.py @@ -6,6 +6,7 @@ import json from server.model_workers.base import ApiEmbeddingsParams from server.utils import get_httpx_client from typing import List, Dict +from configs import logger, log_verbose class MiniMaxWorker(ApiModelWorker): @@ -59,6 +60,10 @@ class MiniMaxWorker(ApiModelWorker): # "bot_setting": [], # "role_meta": params.role_meta, } + if log_verbose: + logger.info(f'{self.__class__.__name__}:data: {data}') + logger.info(f'{self.__class__.__name__}:url: {url.format(pro=pro, group_id=params.group_id)}') + logger.info(f'{self.__class__.__name__}:headers: {headers}') with get_httpx_client() as client: response = client.stream("POST", @@ -69,7 +74,16 @@ class MiniMaxWorker(ApiModelWorker): text = "" for e in r.iter_text(): if not e.startswith("data: "): # 真是优秀的返回 - yield {"error_code": 500, "text": f"minimax返回错误的结果:{e}"} + yield { + "error_code": 500, + "text": f"minimax返回错误的结果:{e}", + "error": { + "message": f"minimax返回错误的结果:{e}", + "type": "invalid_request_error", + "param": None, + "code": None, + } + } continue data = json.loads(e[6:]) @@ -95,13 +109,27 @@ class MiniMaxWorker(ApiModelWorker): "texts": params.texts, "type": "query" if params.to_query else "db", } - + if log_verbose: + logger.info(f'{self.__class__.__name__}:data: {data}') + logger.info(f'{self.__class__.__name__}:url: {url}') + logger.info(f'{self.__class__.__name__}:headers: {headers}') + with get_httpx_client() as client: r = client.post(url, headers=headers, json=data).json() if embeddings := r.get("vectors"): return {"code": 200, "data": embeddings} elif error := r.get("base_resp"): - return {"code": error["status_code"], "msg": error["status_msg"]} + return { + "code": error["status_code"], + "msg": error["status_msg"], + + "error": { + "message": error["status_msg"], + "type": "invalid_request_error", + "param": None, + "code": None, + } + } def get_embeddings(self, params): # TODO: 支持embeddings diff --git a/server/model_workers/qianfan.py b/server/model_workers/qianfan.py index ad71c0c..dacb8d1 100644 --- a/server/model_workers/qianfan.py +++ b/server/model_workers/qianfan.py @@ -8,6 +8,7 @@ from fastchat import conversation as conv import sys from server.model_workers.base import ApiEmbeddingsParams from typing import List, Literal, Dict +from configs import logger, log_verbose MODEL_VERSIONS = { "ernie-bot-4": "completions_pro", @@ -132,6 +133,11 @@ class QianFanWorker(ApiModelWorker): } text = "" + if log_verbose: + logger.info(f'{self.__class__.__name__}:data: {payload}') + logger.info(f'{self.__class__.__name__}:url: {url}') + logger.info(f'{self.__class__.__name__}:headers: {headers}') + with get_httpx_client() as client: with client.stream("POST", url, headers=headers, json=payload) as response: for line in response.iter_lines(): @@ -150,7 +156,13 @@ class QianFanWorker(ApiModelWorker): else: yield { "error_code": resp["error_code"], - "text": resp["error_msg"] + "text": resp["error_msg"], + "error": { + "message": resp["error_msg"], + "type": "invalid_request_error", + "param": None, + "code": None, + } } def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict: @@ -168,13 +180,25 @@ class QianFanWorker(ApiModelWorker): embed_model = params.embed_model or self.DEFAULT_EMBED_MODEL access_token = get_baidu_access_token(params.api_key, params.secret_key) url = f"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/{embed_model}?access_token={access_token}" + if log_verbose: + logger.info(f'{self.__class__.__name__}:url: {url}') + with get_httpx_client() as client: resp = client.post(url, json={"input": params.texts}).json() if "error_cdoe" not in resp: embeddings = [x["embedding"] for x in resp.get("data", [])] return {"code": 200, "data": embeddings} else: - return {"code": resp["error_code"], "msg": resp["error_msg"]} + return { + "code": resp["error_code"], + "msg": resp["error_msg"], + "error": { + "message": resp["error_msg"], + "type": "invalid_request_error", + "param": None, + "code": None, + } + } # TODO: qianfan支持续写模型 def get_embeddings(self, params): diff --git a/server/model_workers/qwen.py b/server/model_workers/qwen.py index 5c68791..fde0a4c 100644 --- a/server/model_workers/qwen.py +++ b/server/model_workers/qwen.py @@ -9,6 +9,7 @@ from typing import List, Literal, Dict from fastchat import conversation as conv from server.model_workers.base import * from server.model_workers.base import ApiEmbeddingsParams +from configs import logger, log_verbose class QwenWorker(ApiModelWorker): @@ -31,6 +32,8 @@ class QwenWorker(ApiModelWorker): def do_chat(self, params: ApiChatParams) -> Dict: import dashscope params.load_config(self.model_names[0]) + if log_verbose: + logger.info(f'{self.__class__.__name__}:params: {params}') gen = dashscope.Generation() responses = gen.call( @@ -53,12 +56,20 @@ class QwenWorker(ApiModelWorker): yield { "error_code": resp["status_code"], "text": resp["message"], + "error": { + "message": resp["message"], + "type": "invalid_request_error", + "param": None, + "code": None, + } } + def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict: import dashscope params.load_config(self.model_names[0]) - + if log_verbose: + logger.info(f'{self.__class__.__name__}:params: {params}') result = [] i = 0 while i < len(params.texts): @@ -69,7 +80,16 @@ class QwenWorker(ApiModelWorker): api_key=params.api_key, ) if resp["status_code"] != 200: - return {"code": resp["status_code"], "msg": resp.message} + return { + "code": resp["status_code"], + "msg": resp.message, + "error": { + "message": resp["message"], + "type": "invalid_request_error", + "param": None, + "code": None, + } + } else: embeddings = [x["embedding"] for x in resp["output"]["embeddings"]] result += embeddings diff --git a/server/model_workers/zhipu.py b/server/model_workers/zhipu.py index 980f446..cafb114 100644 --- a/server/model_workers/zhipu.py +++ b/server/model_workers/zhipu.py @@ -3,6 +3,7 @@ from server.model_workers.base import * from fastchat import conversation as conv import sys from typing import List, Dict, Iterator, Literal +from configs import logger, log_verbose class ChatGLMWorker(ApiModelWorker): @@ -29,6 +30,9 @@ class ChatGLMWorker(ApiModelWorker): params.load_config(self.model_names[0]) zhipuai.api_key = params.api_key + if log_verbose: + logger.info(f'{self.__class__.__name__}:params: {params}') + response = zhipuai.model_api.sse_invoke( model=params.version, prompt=params.messages, @@ -40,7 +44,16 @@ class ChatGLMWorker(ApiModelWorker): if e.event == "add": yield {"error_code": 0, "text": e.data} elif e.event in ["error", "interrupted"]: - yield {"error_code": 500, "text": str(e)} + yield { + "error_code": 500, + "text": str(e), + "error": { + "message": str(e), + "type": "invalid_request_error", + "param": None, + "code": None, + } + } def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict: import zhipuai @@ -55,7 +68,7 @@ class ChatGLMWorker(ApiModelWorker): if response["code"] == 200: embeddings.append(response["data"]["embedding"]) else: - return response # dict with code & msg + return response # dict with code & msg except Exception as e: return {"code": 500, "msg": f"对文本向量化时出错:{e}"} diff --git a/server/utils.py b/server/utils.py index 28c2a9b..9904157 100644 --- a/server/utils.py +++ b/server/utils.py @@ -639,7 +639,10 @@ def get_httpx_client( # construct Client kwargs.update(timeout=timeout, proxies=default_proxies) - print(kwargs) + + if log_verbose: + logger.info(f'{get_httpx_client.__class__.__name__}:kwargs: {kwargs}') + if use_async: return httpx.AsyncClient(**kwargs) else: diff --git a/webui_pages/utils.py b/webui_pages/utils.py index fd84cd5..345e753 100644 --- a/webui_pages/utils.py +++ b/webui_pages/utils.py @@ -85,6 +85,7 @@ class ApiRequest: ) -> Union[httpx.Response, Iterator[httpx.Response], None]: while retry > 0: try: + print(kwargs) if stream: return self.client.stream("POST", url, data=data, json=json, **kwargs) else: @@ -745,6 +746,9 @@ class ApiRequest: "controller_address": controller_address, } + if log_verbose: + logger.info(f'{self.__class__.__name__}:data: {data}') + response = self.post( "/llm_model/list_running_models", json=data,