Langchain-Chatchat/server/model_workers/qwen.py

import json
import sys
from configs import TEMPERATURE
from http import HTTPStatus
from typing import List, Literal, Dict

from fastchat import conversation as conv

from server.model_workers.base import ApiModelWorker
from server.utils import get_model_worker_config


def request_qwen_api(
    messages: List[Dict[str, str]],
    api_key: str = None,
    version: str = "qwen-turbo",
    temperature: float = TEMPERATURE,
    model_name: str = "qwen-api",
):
    import dashscope

    config = get_model_worker_config(model_name)
    api_key = api_key or config.get("api_key")
    version = version or config.get("version")

    gen = dashscope.Generation()
    responses = gen.call(
        model=version,
        temperature=temperature,
        api_key=api_key,
        messages=messages,
        result_format='message',  # set the result is message format.
        stream=True,
    )

    text = ""
    for resp in responses:
        if resp.status_code != HTTPStatus.OK:
            yield {
                "code": resp.status_code,
                "text": "api not response correctly",
            }

        if resp["status_code"] == 200:
            if choices := resp["output"]["choices"]:
                yield {
                    "code": 200,
                    "text": choices[0]["message"]["content"],
                }
        else:
            yield {
                "code": resp["status_code"],
                "text": resp["message"],
            }


class QwenWorker(ApiModelWorker):
    def __init__(
            self,
            *,
            version: Literal["qwen-turbo", "qwen-plus"] = "qwen-turbo",
            model_names: List[str] = ["qwen-api"],
            controller_addr: str,
            worker_addr: str,
            **kwargs,
    ):
        kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
        kwargs.setdefault("context_len", 16384)
        super().__init__(**kwargs)

        # TODO: 确认模板是否需要修改
        self.conv = conv.Conversation(
            name=self.model_names[0],
            system_message="你是一个聪明、对人类有帮助的人工智能，你可以对人类提出的问题给出有用、详细、礼貌的回答。",
            messages=[],
            roles=["user", "assistant", "system"],
            sep="\n### ",
            stop_str="###",
        )
        config = self.get_config()
        self.api_key = config.get("api_key")
        self.version = version

    def generate_stream_gate(self, params):
        messages = self.prompt_to_messages(params["prompt"])

        for resp in request_qwen_api(messages=messages,
                                     api_key=self.api_key,
                                     version=self.version,
                                     temperature=params.get("temperature")):
            if resp["code"] == 200:
                yield json.dumps({
                    "error_code": 0,
                    "text": resp["text"]
                },
                    ensure_ascii=False
                ).encode() + b"\0"
            else:
                yield json.dumps({
                    "error_code": resp["code"],
                    "text": resp["text"]
                },
                    ensure_ascii=False
                ).encode() + b"\0"

    def get_embeddings(self, params):
        # TODO: 支持embeddings
        print("embedding")
        print(params)


if __name__ == "__main__":
    import uvicorn
    from server.utils import MakeFastAPIOffline
    from fastchat.serve.model_worker import app

    worker = QwenWorker(
        controller_addr="http://127.0.0.1:20001",
        worker_addr="http://127.0.0.1:20007",
    )
    sys.modules["fastchat.serve.model_worker"].worker = worker
    MakeFastAPIOffline(app)
    uvicorn.run(app, port=20007)
发版：v0.2.5 (#1620) * 优化configs (#1474) * remove llm_model_dict * optimize configs * fix get_model_path * 更改一些默认参数，添加千帆的默认配置 * Update server_config.py.example * fix merge conflict for #1474 (#1494) * 修复ChatGPT api_base_url错误；用户可以在model_config在线模型配置中覆盖默认的api_base_url (#1496) * 优化LLM模型列表获取、切换的逻辑： (#1497) 1、更准确的获取未运行的可用模型 2、优化WEBUI模型列表显示与切换的控制逻辑 * 更新migrate.py和init_database.py，加强知识库迁移工具： (#1498) 1. 添加--update-in-db参数，按照数据库信息，从本地文件更新向量库 2. 添加--increament参数，根据本地文件增量更新向量库 3. 添加--prune-db参数，删除本地文件后，自动清理相关的向量库 4. 添加--prune-folder参数，根据数据库信息，清理无用的本地文件 5. 取消--update-info-only参数。数据库中存储了向量库信息，该操作意义不大 6. 添加--kb-name参数，所有操作支持指定操作的知识库，不指定则为所有本地知识库 7. 添加知识库迁移的测试用例 8. 删除milvus_kb_service的save_vector_store方法 * feat: support volc fangzhou * 使火山方舟正常工作，添加错误处理和测试用例 * feat: support volc fangzhou (#1501) * feat: support volc fangzhou --------- Co-authored-by: liunux4odoo <41217877+liunux4odoo@users.noreply.github.com> Co-authored-by: liqiankun.1111 <liqiankun.1111@bytedance.com> * 第一版初步agent实现 (#1503) * 第一版初步agent实现 * 增加steaming参数 * 修改了weather.py --------- Co-authored-by: zR <zRzRzRzRzRzRzR> * 添加configs/prompt_config.py，允许用户自定义prompt模板： (#1504) 1、默认包含2个模板，分别用于LLM对话，知识库和搜索引擎对话 2、 server/utils.py提供函数get_prompt_template，获取指定的prompt模板内容（支持热加载） 3、 api.py中chat/knowledge_base_chat/search_engine_chat接口支持prompt_name参数 * 增加其它模型的参数适配 * 增加传入矢量名称加载 * 1. 搜索引擎问答支持历史记录； 2. 修复知识库问答历史记录传参错误：用户输入被传入history，问题出在webui中重复获取历史消息，api知识库对话接口并无问题。 * langchain日志开关 * move wrap_done & get_ChatOpenAI from server.chat.utils to server.utils (#1506) * 修复faiss_pool知识库缓存key错误 (#1507) * fix ReadMe anchor link (#1500) * fix : Duplicate variable and function name (#1509) Co-authored-by: Jim <zhangpengyi@taijihuabao.com> * Update README.md * fix #1519: streamlit-chatbox旧版BUG，但新版有兼容问题，先在webui中作处理，并限定chatbox版本 (#1525) close #1519 * 【功能新增】在线 LLM 模型支持阿里云通义千问 (#1534) * feat: add qwen-api * 使Qwen API支持temperature参数；添加测试用例 * 将online-api的sdk列为可选依赖 --------- Co-authored-by: liunux4odoo <liunux@qq.com> * 处理序列化至磁盘的逻辑 * remove depends on volcengine * update kb_doc_api: use Form instead of Body when upload file * 将所有httpx请求改为使用Client，提高效率，方便以后设置代理等。 (#1554) 将所有httpx请求改为使用Client，提高效率，方便以后设置代理等。将本项目相关服务加入无代理列表，避免fastchat的服务器请求错误。(windows下无效) * update QR code * update readme_en,readme,requirements_api,requirements,model_config.py.example:测试baichuan2-7b;更新相关文档 * 新增特性：1.支持vllm推理加速框架；2. 更新支持模型列表 * 更新文件：1. startup,model_config.py.example,serve_config.py.example,FAQ * 1. debug vllm加速框架完毕；2. 修改requirements,requirements_api对vllm的依赖；3.注释掉serve_config中baichuan-7b的device为cpu的配置 * 1. 更新congif中关于vllm后端相关说明；2. 更新requirements，requirements_api; * 增加了仅限GPT4的agent功能，陆续补充，中文版readme已写 (#1611) * Dev (#1613) * 增加了仅限GPT4的agent功能，陆续补充，中文版readme已写 * issue提到的一个bug * 温度最小改成0，但是不应该支持负数 * 修改了最小的温度 * fix: set vllm based on platform to avoid error on windows * fix: langchain warnings for import from root * 修复webui中重建知识库以及对话界面UI错误 (#1615) * 修复bug:webui点重建知识库时，如果存在不支持的文件会导致整个接口错误;migrate中没有导入CHUNK_SIZE * 修复：webui对话界面的expander一直为running状态；简化历史消息获取方法 * 根据官方文档，添加对英文版的bge embedding的指示模板 (#1585) Co-authored-by: zR <2448370773@qq.com> * Dev (#1618) * 增加了仅限GPT4的agent功能，陆续补充，中文版readme已写 * issue提到的一个bug * 温度最小改成0，但是不应该支持负数 * 修改了最小的温度 * 增加了部分Agent支持和修改了启动文件的部分bug * 修改了GPU数量配置文件 * 1 1 * 修复配置文件错误 * 更新readme，稳定测试 * 更改readme 0928 (#1619) * 增加了仅限GPT4的agent功能，陆续补充，中文版readme已写 * issue提到的一个bug * 温度最小改成0，但是不应该支持负数 * 修改了最小的温度 * 增加了部分Agent支持和修改了启动文件的部分bug * 修改了GPU数量配置文件 * 1 1 * 修复配置文件错误 * 更新readme，稳定测试 * 更新readme * fix readme * 处理序列化至磁盘的逻辑 * update version number to v0.2.5 --------- Co-authored-by: qiankunli <qiankun.li@qq.com> Co-authored-by: liqiankun.1111 <liqiankun.1111@bytedance.com> Co-authored-by: zR <2448370773@qq.com> Co-authored-by: glide-the <2533736852@qq.com> Co-authored-by: Water Zheng <1499383852@qq.com> Co-authored-by: Jim Zhang <dividi_z@163.com> Co-authored-by: Jim <zhangpengyi@taijihuabao.com> Co-authored-by: imClumsyPanda <littlepanda0716@gmail.com> Co-authored-by: Leego <leegodev@hotmail.com> Co-authored-by: hzg0601 <hzg0601@163.com> Co-authored-by: WilliamChen-luckbob <58684828+WilliamChen-luckbob@users.noreply.github.com> 2023-09-28 23:30:21 +08:00			`import json`
			`import sys`
			`from configs import TEMPERATURE`
			`from http import HTTPStatus`
			`from typing import List, Literal, Dict`

			`from fastchat import conversation as conv`

			`from server.model_workers.base import ApiModelWorker`
			`from server.utils import get_model_worker_config`


			`def request_qwen_api(`
			`messages: List[Dict[str, str]],`
			`api_key: str = None,`
			`version: str = "qwen-turbo",`
			`temperature: float = TEMPERATURE,`
			`model_name: str = "qwen-api",`
			`):`
			`import dashscope`

			`config = get_model_worker_config(model_name)`
			`api_key = api_key or config.get("api_key")`
			`version = version or config.get("version")`

			`gen = dashscope.Generation()`
			`responses = gen.call(`
			`model=version,`
			`temperature=temperature,`
			`api_key=api_key,`
			`messages=messages,`
			`result_format='message', # set the result is message format.`
			`stream=True,`
			`)`

			`text = ""`
			`for resp in responses:`
			`if resp.status_code != HTTPStatus.OK:`
			`yield {`
			`"code": resp.status_code,`
			`"text": "api not response correctly",`
			`}`

			`if resp["status_code"] == 200:`
			`if choices := resp["output"]["choices"]:`
			`yield {`
			`"code": 200,`
			`"text": choices[0]["message"]["content"],`
			`}`
			`else:`
			`yield {`
			`"code": resp["status_code"],`
			`"text": resp["message"],`
			`}`


			`class QwenWorker(ApiModelWorker):`
			`def __init__(`
			`self,`
			`*,`
			`version: Literal["qwen-turbo", "qwen-plus"] = "qwen-turbo",`
			`model_names: List[str] = ["qwen-api"],`
			`controller_addr: str,`
			`worker_addr: str,`
			`**kwargs,`
			`):`
			`kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)`
			`kwargs.setdefault("context_len", 16384)`
			`super().__init__(**kwargs)`

			`# TODO: 确认模板是否需要修改`
			`self.conv = conv.Conversation(`
			`name=self.model_names[0],`
			`system_message="你是一个聪明、对人类有帮助的人工智能，你可以对人类提出的问题给出有用、详细、礼貌的回答。",`
			`messages=[],`
			`roles=["user", "assistant", "system"],`
			`sep="\n### ",`
			`stop_str="###",`
			`)`
			`config = self.get_config()`
			`self.api_key = config.get("api_key")`
			`self.version = version`

			`def generate_stream_gate(self, params):`
			`messages = self.prompt_to_messages(params["prompt"])`

			`for resp in request_qwen_api(messages=messages,`
			`api_key=self.api_key,`
			`version=self.version,`
			`temperature=params.get("temperature")):`
			`if resp["code"] == 200:`
			`yield json.dumps({`
			`"error_code": 0,`
			`"text": resp["text"]`
			`},`
			`ensure_ascii=False`
			`).encode() + b"\0"`
			`else:`
			`yield json.dumps({`
			`"error_code": resp["code"],`
			`"text": resp["text"]`
			`},`
			`ensure_ascii=False`
			`).encode() + b"\0"`

			`def get_embeddings(self, params):`
			`# TODO: 支持embeddings`
			`print("embedding")`
			`print(params)`


			`if __name__ == "__main__":`
			`import uvicorn`
			`from server.utils import MakeFastAPIOffline`
			`from fastchat.serve.model_worker import app`

			`worker = QwenWorker(`
			`controller_addr="http://127.0.0.1:20001",`
			`worker_addr="http://127.0.0.1:20007",`
			`)`
			`sys.modules["fastchat.serve.model_worker"].worker = worker`
			`MakeFastAPIOffline(app)`
			`uvicorn.run(app, port=20007)`