Merge pull request #2130 from glide-the/dev

统一在线模型异常报文、增加详细日志
This commit is contained in:
glide-the 2023-11-21 21:48:39 +08:00 committed by GitHub
commit 569209289b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 135 additions and 14 deletions

View File

@ -5,6 +5,7 @@ from server.utils import get_httpx_client
from fastchat import conversation as conv from fastchat import conversation as conv
import json import json
from typing import List, Dict from typing import List, Dict
from configs import logger, log_verbose
class AzureWorker(ApiModelWorker): class AzureWorker(ApiModelWorker):
@ -39,6 +40,11 @@ class AzureWorker(ApiModelWorker):
} }
text = "" text = ""
if log_verbose:
logger.info(f'{self.__class__.__name__}:url: {url}')
logger.info(f'{self.__class__.__name__}:headers: {headers}')
logger.info(f'{self.__class__.__name__}:data: {data}')
with get_httpx_client() as client: with get_httpx_client() as client:
with client.stream("POST", url, headers=headers, json=data) as response: with client.stream("POST", url, headers=headers, json=data) as response:
for line in response.iter_lines(): for line in response.iter_lines():

View File

@ -9,7 +9,7 @@ from fastchat import conversation as conv
import sys import sys
import json import json
from typing import List, Literal, Dict from typing import List, Literal, Dict
from configs import logger, log_verbose
def calculate_md5(input_string): def calculate_md5(input_string):
md5 = hashlib.md5() md5 = hashlib.md5()
@ -56,6 +56,11 @@ class BaiChuanWorker(ApiModelWorker):
} }
text = "" text = ""
if log_verbose:
logger.info(f'{self.__class__.__name__}:json_data: {json_data}')
logger.info(f'{self.__class__.__name__}:url: {url}')
logger.info(f'{self.__class__.__name__}:headers: {headers}')
with get_httpx_client() as client: with get_httpx_client() as client:
with client.stream("POST", url, headers=headers, json=data) as response: with client.stream("POST", url, headers=headers, json=data) as response:
for line in response.iter_lines(): for line in response.iter_lines():
@ -71,8 +76,14 @@ class BaiChuanWorker(ApiModelWorker):
else: else:
yield { yield {
"error_code": resp["code"], "error_code": resp["code"],
"text": resp["msg"] "text": resp["msg"],
"error": {
"message": resp["msg"],
"type": "invalid_request_error",
"param": None,
"code": None,
} }
}
def get_embeddings(self, params): def get_embeddings(self, params):
# TODO: 支持embeddings # TODO: 支持embeddings

View File

@ -3,6 +3,7 @@ from server.model_workers.base import *
from fastchat import conversation as conv from fastchat import conversation as conv
import sys import sys
from typing import List, Literal, Dict from typing import List, Literal, Dict
from configs import logger, log_verbose
class FangZhouWorker(ApiModelWorker): class FangZhouWorker(ApiModelWorker):
@ -46,10 +47,21 @@ class FangZhouWorker(ApiModelWorker):
} }
text = "" text = ""
if log_verbose:
logger.info(f'{self.__class__.__name__}:maas: {maas}')
for resp in maas.stream_chat(req): for resp in maas.stream_chat(req):
error = resp.error error = resp.error
if error.code_n > 0: if error.code_n > 0:
yield {"error_code": error.code_n, "text": error.message} yield {
"error_code": error.code_n,
"text": error.message,
"error": {
"message": error.message,
"type": "invalid_request_error",
"param": None,
"code": None,
}
}
elif chunk := resp.choice.message.content: elif chunk := resp.choice.message.content:
text += chunk text += chunk
yield {"error_code": 0, "text": text} yield {"error_code": 0, "text": text}

View File

@ -6,6 +6,7 @@ import json
from server.model_workers.base import ApiEmbeddingsParams from server.model_workers.base import ApiEmbeddingsParams
from server.utils import get_httpx_client from server.utils import get_httpx_client
from typing import List, Dict from typing import List, Dict
from configs import logger, log_verbose
class MiniMaxWorker(ApiModelWorker): class MiniMaxWorker(ApiModelWorker):
@ -59,6 +60,10 @@ class MiniMaxWorker(ApiModelWorker):
# "bot_setting": [], # "bot_setting": [],
# "role_meta": params.role_meta, # "role_meta": params.role_meta,
} }
if log_verbose:
logger.info(f'{self.__class__.__name__}:data: {data}')
logger.info(f'{self.__class__.__name__}:url: {url.format(pro=pro, group_id=params.group_id)}')
logger.info(f'{self.__class__.__name__}:headers: {headers}')
with get_httpx_client() as client: with get_httpx_client() as client:
response = client.stream("POST", response = client.stream("POST",
@ -69,7 +74,16 @@ class MiniMaxWorker(ApiModelWorker):
text = "" text = ""
for e in r.iter_text(): for e in r.iter_text():
if not e.startswith("data: "): # 真是优秀的返回 if not e.startswith("data: "): # 真是优秀的返回
yield {"error_code": 500, "text": f"minimax返回错误的结果{e}"} yield {
"error_code": 500,
"text": f"minimax返回错误的结果{e}",
"error": {
"message": f"minimax返回错误的结果{e}",
"type": "invalid_request_error",
"param": None,
"code": None,
}
}
continue continue
data = json.loads(e[6:]) data = json.loads(e[6:])
@ -95,13 +109,27 @@ class MiniMaxWorker(ApiModelWorker):
"texts": params.texts, "texts": params.texts,
"type": "query" if params.to_query else "db", "type": "query" if params.to_query else "db",
} }
if log_verbose:
logger.info(f'{self.__class__.__name__}:data: {data}')
logger.info(f'{self.__class__.__name__}:url: {url}')
logger.info(f'{self.__class__.__name__}:headers: {headers}')
with get_httpx_client() as client: with get_httpx_client() as client:
r = client.post(url, headers=headers, json=data).json() r = client.post(url, headers=headers, json=data).json()
if embeddings := r.get("vectors"): if embeddings := r.get("vectors"):
return {"code": 200, "data": embeddings} return {"code": 200, "data": embeddings}
elif error := r.get("base_resp"): elif error := r.get("base_resp"):
return {"code": error["status_code"], "msg": error["status_msg"]} return {
"code": error["status_code"],
"msg": error["status_msg"],
"error": {
"message": error["status_msg"],
"type": "invalid_request_error",
"param": None,
"code": None,
}
}
def get_embeddings(self, params): def get_embeddings(self, params):
# TODO: 支持embeddings # TODO: 支持embeddings

View File

@ -8,6 +8,7 @@ from fastchat import conversation as conv
import sys import sys
from server.model_workers.base import ApiEmbeddingsParams from server.model_workers.base import ApiEmbeddingsParams
from typing import List, Literal, Dict from typing import List, Literal, Dict
from configs import logger, log_verbose
MODEL_VERSIONS = { MODEL_VERSIONS = {
"ernie-bot-4": "completions_pro", "ernie-bot-4": "completions_pro",
@ -132,6 +133,11 @@ class QianFanWorker(ApiModelWorker):
} }
text = "" text = ""
if log_verbose:
logger.info(f'{self.__class__.__name__}:data: {payload}')
logger.info(f'{self.__class__.__name__}:url: {url}')
logger.info(f'{self.__class__.__name__}:headers: {headers}')
with get_httpx_client() as client: with get_httpx_client() as client:
with client.stream("POST", url, headers=headers, json=payload) as response: with client.stream("POST", url, headers=headers, json=payload) as response:
for line in response.iter_lines(): for line in response.iter_lines():
@ -150,7 +156,13 @@ class QianFanWorker(ApiModelWorker):
else: else:
yield { yield {
"error_code": resp["error_code"], "error_code": resp["error_code"],
"text": resp["error_msg"] "text": resp["error_msg"],
"error": {
"message": resp["error_msg"],
"type": "invalid_request_error",
"param": None,
"code": None,
}
} }
def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict: def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict:
@ -168,13 +180,25 @@ class QianFanWorker(ApiModelWorker):
embed_model = params.embed_model or self.DEFAULT_EMBED_MODEL embed_model = params.embed_model or self.DEFAULT_EMBED_MODEL
access_token = get_baidu_access_token(params.api_key, params.secret_key) access_token = get_baidu_access_token(params.api_key, params.secret_key)
url = f"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/{embed_model}?access_token={access_token}" url = f"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/{embed_model}?access_token={access_token}"
if log_verbose:
logger.info(f'{self.__class__.__name__}:url: {url}')
with get_httpx_client() as client: with get_httpx_client() as client:
resp = client.post(url, json={"input": params.texts}).json() resp = client.post(url, json={"input": params.texts}).json()
if "error_cdoe" not in resp: if "error_cdoe" not in resp:
embeddings = [x["embedding"] for x in resp.get("data", [])] embeddings = [x["embedding"] for x in resp.get("data", [])]
return {"code": 200, "data": embeddings} return {"code": 200, "data": embeddings}
else: else:
return {"code": resp["error_code"], "msg": resp["error_msg"]} return {
"code": resp["error_code"],
"msg": resp["error_msg"],
"error": {
"message": resp["error_msg"],
"type": "invalid_request_error",
"param": None,
"code": None,
}
}
# TODO: qianfan支持续写模型 # TODO: qianfan支持续写模型
def get_embeddings(self, params): def get_embeddings(self, params):

View File

@ -9,6 +9,7 @@ from typing import List, Literal, Dict
from fastchat import conversation as conv from fastchat import conversation as conv
from server.model_workers.base import * from server.model_workers.base import *
from server.model_workers.base import ApiEmbeddingsParams from server.model_workers.base import ApiEmbeddingsParams
from configs import logger, log_verbose
class QwenWorker(ApiModelWorker): class QwenWorker(ApiModelWorker):
@ -31,6 +32,8 @@ class QwenWorker(ApiModelWorker):
def do_chat(self, params: ApiChatParams) -> Dict: def do_chat(self, params: ApiChatParams) -> Dict:
import dashscope import dashscope
params.load_config(self.model_names[0]) params.load_config(self.model_names[0])
if log_verbose:
logger.info(f'{self.__class__.__name__}:params: {params}')
gen = dashscope.Generation() gen = dashscope.Generation()
responses = gen.call( responses = gen.call(
@ -53,12 +56,20 @@ class QwenWorker(ApiModelWorker):
yield { yield {
"error_code": resp["status_code"], "error_code": resp["status_code"],
"text": resp["message"], "text": resp["message"],
"error": {
"message": resp["message"],
"type": "invalid_request_error",
"param": None,
"code": None,
}
} }
def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict: def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict:
import dashscope import dashscope
params.load_config(self.model_names[0]) params.load_config(self.model_names[0])
if log_verbose:
logger.info(f'{self.__class__.__name__}:params: {params}')
result = [] result = []
i = 0 i = 0
while i < len(params.texts): while i < len(params.texts):
@ -69,7 +80,16 @@ class QwenWorker(ApiModelWorker):
api_key=params.api_key, api_key=params.api_key,
) )
if resp["status_code"] != 200: if resp["status_code"] != 200:
return {"code": resp["status_code"], "msg": resp.message} return {
"code": resp["status_code"],
"msg": resp.message,
"error": {
"message": resp["message"],
"type": "invalid_request_error",
"param": None,
"code": None,
}
}
else: else:
embeddings = [x["embedding"] for x in resp["output"]["embeddings"]] embeddings = [x["embedding"] for x in resp["output"]["embeddings"]]
result += embeddings result += embeddings

View File

@ -3,6 +3,7 @@ from server.model_workers.base import *
from fastchat import conversation as conv from fastchat import conversation as conv
import sys import sys
from typing import List, Dict, Iterator, Literal from typing import List, Dict, Iterator, Literal
from configs import logger, log_verbose
class ChatGLMWorker(ApiModelWorker): class ChatGLMWorker(ApiModelWorker):
@ -29,6 +30,9 @@ class ChatGLMWorker(ApiModelWorker):
params.load_config(self.model_names[0]) params.load_config(self.model_names[0])
zhipuai.api_key = params.api_key zhipuai.api_key = params.api_key
if log_verbose:
logger.info(f'{self.__class__.__name__}:params: {params}')
response = zhipuai.model_api.sse_invoke( response = zhipuai.model_api.sse_invoke(
model=params.version, model=params.version,
prompt=params.messages, prompt=params.messages,
@ -40,7 +44,16 @@ class ChatGLMWorker(ApiModelWorker):
if e.event == "add": if e.event == "add":
yield {"error_code": 0, "text": e.data} yield {"error_code": 0, "text": e.data}
elif e.event in ["error", "interrupted"]: elif e.event in ["error", "interrupted"]:
yield {"error_code": 500, "text": str(e)} yield {
"error_code": 500,
"text": str(e),
"error": {
"message": str(e),
"type": "invalid_request_error",
"param": None,
"code": None,
}
}
def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict: def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict:
import zhipuai import zhipuai
@ -55,7 +68,7 @@ class ChatGLMWorker(ApiModelWorker):
if response["code"] == 200: if response["code"] == 200:
embeddings.append(response["data"]["embedding"]) embeddings.append(response["data"]["embedding"])
else: else:
return response # dict with code & msg return response # dict with code & msg
except Exception as e: except Exception as e:
return {"code": 500, "msg": f"对文本向量化时出错:{e}"} return {"code": 500, "msg": f"对文本向量化时出错:{e}"}

View File

@ -639,7 +639,10 @@ def get_httpx_client(
# construct Client # construct Client
kwargs.update(timeout=timeout, proxies=default_proxies) kwargs.update(timeout=timeout, proxies=default_proxies)
print(kwargs)
if log_verbose:
logger.info(f'{get_httpx_client.__class__.__name__}:kwargs: {kwargs}')
if use_async: if use_async:
return httpx.AsyncClient(**kwargs) return httpx.AsyncClient(**kwargs)
else: else:

View File

@ -85,6 +85,7 @@ class ApiRequest:
) -> Union[httpx.Response, Iterator[httpx.Response], None]: ) -> Union[httpx.Response, Iterator[httpx.Response], None]:
while retry > 0: while retry > 0:
try: try:
print(kwargs)
if stream: if stream:
return self.client.stream("POST", url, data=data, json=json, **kwargs) return self.client.stream("POST", url, data=data, json=json, **kwargs)
else: else:
@ -745,6 +746,9 @@ class ApiRequest:
"controller_address": controller_address, "controller_address": controller_address,
} }
if log_verbose:
logger.info(f'{self.__class__.__name__}:data: {data}')
response = self.post( response = self.post(
"/llm_model/list_running_models", "/llm_model/list_running_models",
json=data, json=data,