Langchain-Chatchat/server/utils.py

import pydantic
from pydantic import BaseModel
from typing import List
from fastapi import FastAPI
from pathlib import Path
import asyncio
from configs.model_config import LLM_MODEL, llm_model_dict, LLM_DEVICE, EMBEDDING_DEVICE, logger, log_verbose
from configs.server_config import FSCHAT_MODEL_WORKERS
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Literal, Optional, Callable, Generator, Dict, Any


thread_pool = ThreadPoolExecutor(os.cpu_count())


class BaseResponse(BaseModel):
    code: int = pydantic.Field(200, description="API status code")
    msg: str = pydantic.Field("success", description="API status message")
    data: Any = pydantic.Field(None, description="API data")

    class Config:
        schema_extra = {
            "example": {
                "code": 200,
                "msg": "success",
            }
        }

class ListResponse(BaseResponse):
    data: List[str] = pydantic.Field(..., description="List of names")

    class Config:
        schema_extra = {
            "example": {
                "code": 200,
                "msg": "success",
                "data": ["doc1.docx", "doc2.pdf", "doc3.txt"],
            }
        }


class ChatMessage(BaseModel):
    question: str = pydantic.Field(..., description="Question text")
    response: str = pydantic.Field(..., description="Response text")
    history: List[List[str]] = pydantic.Field(..., description="History text")
    source_documents: List[str] = pydantic.Field(
        ..., description="List of source documents and their scores"
    )

    class Config:
        schema_extra = {
            "example": {
                "question": "工伤保险如何办理？",
                "response": "根据已知信息，可以总结如下：\n\n1. 参保单位为员工缴纳工伤保险费，以保障员工在发生工伤时能够获得相应的待遇。\n"
                            "2. 不同地区的工伤保险缴费规定可能有所不同，需要向当地社保部门咨询以了解具体的缴费标准和规定。\n"
                            "3. 工伤从业人员及其近亲属需要申请工伤认定，确认享受的待遇资格，并按时缴纳工伤保险费。\n"
                            "4. 工伤保险待遇包括工伤医疗、康复、辅助器具配置费用、伤残待遇、工亡待遇、一次性工亡补助金等。\n"
                            "5. 工伤保险待遇领取资格认证包括长期待遇领取人员认证和一次性待遇领取人员认证。\n"
                            "6. 工伤保险基金支付的待遇项目包括工伤医疗待遇、康复待遇、辅助器具配置费用、一次性工亡补助金、丧葬补助金等。",
                "history": [
                    [
                        "工伤保险是什么？",
                        "工伤保险是指用人单位按照国家规定，为本单位的职工和用人单位的其他人员，缴纳工伤保险费，"
                        "由保险机构按照国家规定的标准，给予工伤保险待遇的社会保险制度。",
                    ]
                ],
                "source_documents": [
                    "出处 [1] 广州市单位从业的特定人员参加工伤保险办事指引.docx：\n\n\t"
                    "( 一)  从业单位  (组织)  按“自愿参保”原则，  为未建 立劳动关系的特定从业人员单项参加工伤保险 、缴纳工伤保 险费。",
                    "出处 [2] ...",
                    "出处 [3] ...",
                ],
            }
        }

def torch_gc():
    import torch
    if torch.cuda.is_available():
        # with torch.cuda.device(DEVICE):
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
    elif torch.backends.mps.is_available():
        try:
            from torch.mps import empty_cache
            empty_cache()
        except Exception as e:
            msg=("如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本，"
                 "以支持及时清理 torch 产生的内存占用。")
            logger.error(f'{e.__class__.__name__}: {msg}',
                         exc_info=e if log_verbose else None)

def run_async(cor):
    '''
    在同步环境中运行异步代码.
    '''
    try:
        loop = asyncio.get_event_loop()
    except:
        loop = asyncio.new_event_loop()
    return loop.run_until_complete(cor)


def iter_over_async(ait, loop):
    '''
    将异步生成器封装成同步生成器.
    '''
    ait = ait.__aiter__()
    async def get_next():
        try:
            obj = await ait.__anext__()
            return False, obj
        except StopAsyncIteration:
            return True, None
    while True:
        done, obj = loop.run_until_complete(get_next())
        if done:
            break
        yield obj


def MakeFastAPIOffline(
    app: FastAPI,
    static_dir = Path(__file__).parent / "static",
    static_url = "/static-offline-docs",
    docs_url: Optional[str] = "/docs",
    redoc_url: Optional[str] = "/redoc",
) -> None:
    """patch the FastAPI obj that doesn't rely on CDN for the documentation page"""
    from fastapi import Request
    from fastapi.openapi.docs import (
        get_redoc_html,
        get_swagger_ui_html,
        get_swagger_ui_oauth2_redirect_html,
    )
    from fastapi.staticfiles import StaticFiles
    from starlette.responses import HTMLResponse

    openapi_url = app.openapi_url
    swagger_ui_oauth2_redirect_url = app.swagger_ui_oauth2_redirect_url

    def remove_route(url: str) -> None:
        '''
        remove original route from app
        '''
        index = None
        for i, r in enumerate(app.routes):
            if r.path.lower() == url.lower():
                index = i
                break
        if isinstance(index, int):
            app.routes.pop(i)

    # Set up static file mount
    app.mount(
        static_url,
        StaticFiles(directory=Path(static_dir).as_posix()),
        name="static-offline-docs",
    )

    if docs_url is not None:
        remove_route(docs_url)
        remove_route(swagger_ui_oauth2_redirect_url)

        # Define the doc and redoc pages, pointing at the right files
        @app.get(docs_url, include_in_schema=False)
        async def custom_swagger_ui_html(request: Request) -> HTMLResponse:
            root = request.scope.get("root_path")
            favicon = f"{root}{static_url}/favicon.png"
            return get_swagger_ui_html(
                openapi_url=f"{root}{openapi_url}",
                title=app.title + " - Swagger UI",
                oauth2_redirect_url=swagger_ui_oauth2_redirect_url,
                swagger_js_url=f"{root}{static_url}/swagger-ui-bundle.js",
                swagger_css_url=f"{root}{static_url}/swagger-ui.css",
                swagger_favicon_url=favicon,
            )

        @app.get(swagger_ui_oauth2_redirect_url, include_in_schema=False)
        async def swagger_ui_redirect() -> HTMLResponse:
            return get_swagger_ui_oauth2_redirect_html()

    if redoc_url is not None:
        remove_route(redoc_url)

        @app.get(redoc_url, include_in_schema=False)
        async def redoc_html(request: Request) -> HTMLResponse:
            root = request.scope.get("root_path")
            favicon = f"{root}{static_url}/favicon.png"

            return get_redoc_html(
                openapi_url=f"{root}{openapi_url}",
                title=app.title + " - ReDoc",
                redoc_js_url=f"{root}{static_url}/redoc.standalone.js",
                with_google_fonts=False,
                redoc_favicon_url=favicon,
            )


# 从server_config中获取服务信息
def get_model_worker_config(model_name: str = LLM_MODEL) -> dict:
    '''
    加载model worker的配置项。
    优先级:FSCHAT_MODEL_WORKERS[model_name] > llm_model_dict[model_name] > FSCHAT_MODEL_WORKERS["default"]
    '''
    from configs.server_config import FSCHAT_MODEL_WORKERS
    from server import model_workers
    from configs.model_config import llm_model_dict

    config = FSCHAT_MODEL_WORKERS.get("default", {}).copy()
    config.update(llm_model_dict.get(model_name, {}))
    config.update(FSCHAT_MODEL_WORKERS.get(model_name, {}))

    # 如果没有设置有效的local_model_path，则认为是在线模型API
    if not os.path.isdir(config.get("local_model_path", "")):
        config["online_api"] = True
        if provider := config.get("provider"):
            try:
                config["worker_class"] = getattr(model_workers, provider)
            except Exception as e:
                msg = f"在线模型 ‘{model_name}’ 的provider没有正确配置"
                logger.error(f'{e.__class__.__name__}: {msg}',
                             exc_info=e if log_verbose else None)

    config["device"] = llm_device(config.get("device") or LLM_DEVICE)
    return config


def get_all_model_worker_configs() -> dict:
    result = {}
    model_names = set(llm_model_dict.keys()) | set(FSCHAT_MODEL_WORKERS.keys())
    for name in model_names:
        if name != "default":
            result[name] = get_model_worker_config(name)
    return result


def fschat_controller_address() -> str:
    from configs.server_config import FSCHAT_CONTROLLER

    host = FSCHAT_CONTROLLER["host"]
    port = FSCHAT_CONTROLLER["port"]
    return f"http://{host}:{port}"


def fschat_model_worker_address(model_name: str = LLM_MODEL) -> str:
    if model := get_model_worker_config(model_name):
        host = model["host"]
        port = model["port"]
        return f"http://{host}:{port}"
    return ""


def fschat_openai_api_address() -> str:
    from configs.server_config import FSCHAT_OPENAI_API

    host = FSCHAT_OPENAI_API["host"]
    port = FSCHAT_OPENAI_API["port"]
    return f"http://{host}:{port}"


def api_address() -> str:
    from configs.server_config import API_SERVER

    host = API_SERVER["host"]
    port = API_SERVER["port"]
    return f"http://{host}:{port}"


def webui_address() -> str:
    from configs.server_config import WEBUI_SERVER

    host = WEBUI_SERVER["host"]
    port = WEBUI_SERVER["port"]
    return f"http://{host}:{port}"


def set_httpx_timeout(timeout: float = None):
    '''
    设置httpx默认timeout。
    httpx默认timeout是5秒，在请求LLM回答时不够用。
    '''
    import httpx
    from configs.server_config import HTTPX_DEFAULT_TIMEOUT

    timeout = timeout or HTTPX_DEFAULT_TIMEOUT
    httpx._config.DEFAULT_TIMEOUT_CONFIG.connect = timeout
    httpx._config.DEFAULT_TIMEOUT_CONFIG.read = timeout
    httpx._config.DEFAULT_TIMEOUT_CONFIG.write = timeout


# 自动检查torch可用的设备。分布式部署时，不运行LLM的机器上可以不装torch
def detect_device() -> Literal["cuda", "mps", "cpu"]:
    try:
        import torch
        if torch.cuda.is_available():
            return "cuda"
        if torch.backends.mps.is_available():
            return "mps"
    except:
        pass
    return "cpu"


def llm_device(device: str = LLM_DEVICE) -> Literal["cuda", "mps", "cpu"]:
    if device not in ["cuda", "mps", "cpu"]:
        device = detect_device()
    return device


def embedding_device(device: str = EMBEDDING_DEVICE) -> Literal["cuda", "mps", "cpu"]:
    if device not in ["cuda", "mps", "cpu"]:
        device = detect_device()
    return device


def run_in_thread_pool(
    func: Callable,
    params: List[Dict] = [],
    pool: ThreadPoolExecutor = None,
) -> Generator:
    '''
    在线程池中批量运行任务，并将运行结果以生成器的形式返回。
    请确保任务中的所有操作是线程安全的，任务函数请全部使用关键字参数。
    '''
    tasks = []
    pool = pool or thread_pool

    for kwargs in params:
        thread = pool.submit(func, **kwargs)
        tasks.append(thread)

    for obj in as_completed(tasks):
        yield obj.result()
-												v0.2.0 first commit

											
										
										
											2023-07-27 23:22:07 +08:00
+								import pydantic
 								from pydantic import BaseModel
 								from typing import List
-												update llm_api and api server:
1. fastchat's controller/model_worker/api_server use swagger UI offline.
2. add custom title and icon.
3. remove fastapi-offline dependence

											
										
										
											2023-08-16 14:20:09 +08:00
+								from fastapi import FastAPI
-												patch fastapi-offline to use local swagger-ui assests

											
										
										
											2023-08-10 17:00:39 +08:00
+								from pathlib import Path
-												更新API与ApiReuest：
1. 重新整理webui_pages/utils与server/knowledge_base间的工具依赖
2.
将delete_knowledge_base与delete_doc接口从delete改为post.delete不支持body参数
3. 修复update_doc
4. 修复部分bug

											
										
										
											2023-08-11 08:37:07 +08:00
+								import asyncio
-												增加显示详细日志开关

											
										
										
											2023-09-08 20:48:31 +08:00
+								from configs.model_config import LLM_MODEL, llm_model_dict, LLM_DEVICE, EMBEDDING_DEVICE, logger, log_verbose
-												添加切换模型功能，支持智谱AI在线模型 (#1342)

* 添加LLM模型切换功能，需要在server_config中设置可切换的模型
* add tests for api.py/llm_model/*
* - 支持模型切换
- 支持智普AI线上模型
- startup.py增加参数`--api-worker`，自动运行所有的线上API模型。使用`-a
  (--all-webui), --all-api`时默认开启该选项
* 修复被fastchat覆盖的标准输出
* 对fastchat日志进行更细致的控制，startup.py中增加-q(--quiet)开关，可以减少无用的fastchat日志输出
* 修正chatglm api的对话模板


Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-09-01 23:58:09 +08:00
+								from configs.server_config import FSCHAT_MODEL_WORKERS
 								import os
-												将KnowledgeFile的file2text拆分成file2docs、docs2texts和file2text三个部分，在保持接口不变的情况下，实现：
1、支持chunk_size和chunk_overlap参数
2、支持自定义text_splitter
3、支持自定义docs
修复：csv文件不使用text_splitter

											
										
										
											2023-09-04 16:37:44 +08:00
+								from concurrent.futures import ThreadPoolExecutor, as_completed
 								from typing import Literal, Optional, Callable, Generator, Dict, Any
-												修改Embeddings和FAISS缓存加载方式，知识库相关API接口支持多线程并发 (#1434)

* 修改Embeddings和FAISS缓存加载方式，支持多线程，支持内存FAISS

* 知识库相关API接口支持多线程并发

* 根据新的API接口调整ApiRequest和测试用例

* 删除webui.py失效的启动说明
											
										
										
											2023-09-11 20:41:41 +08:00
+								thread_pool = ThreadPoolExecutor(os.cpu_count())
-												patch fastapi-offline to use local swagger-ui assests

											
										
										
											2023-08-10 17:00:39 +08:00
-												v0.2.0 first commit

											
										
										
											2023-07-27 23:22:07 +08:00
 								class BaseResponse(BaseModel):
-												update chat and knowledge base api: unify exception processing and return types

											
										
										
											2023-08-19 15:14:45 +08:00
+								    code: int = pydantic.Field(200, description="API status code")
 								    msg: str = pydantic.Field("success", description="API status message")
-												添加切换模型功能，支持智谱AI在线模型 (#1342)

* 添加LLM模型切换功能，需要在server_config中设置可切换的模型
* add tests for api.py/llm_model/*
* - 支持模型切换
- 支持智普AI线上模型
- startup.py增加参数`--api-worker`，自动运行所有的线上API模型。使用`-a
  (--all-webui), --all-api`时默认开启该选项
* 修复被fastchat覆盖的标准输出
* 对fastchat日志进行更细致的控制，startup.py中增加-q(--quiet)开关，可以减少无用的fastchat日志输出
* 修正chatglm api的对话模板


Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-09-01 23:58:09 +08:00
+								    data: Any = pydantic.Field(None, description="API data")
-												v0.2.0 first commit

											
										
										
											2023-07-27 23:22:07 +08:00
 								    class Config:
 								        schema_extra = {
 								            "example": {
 								                "code": 200,
 								                "msg": "success",
 								            }
 								        }
 								class ListResponse(BaseResponse):
 								    data: List[str] = pydantic.Field(..., description="List of names")
 								    class Config:
 								        schema_extra = {
 								            "example": {
 								                "code": 200,
 								                "msg": "success",
 								                "data": ["doc1.docx", "doc2.pdf", "doc3.txt"],
 								            }
 								        }
 								class ChatMessage(BaseModel):
 								    question: str = pydantic.Field(..., description="Question text")
 								    response: str = pydantic.Field(..., description="Response text")
 								    history: List[List[str]] = pydantic.Field(..., description="History text")
 								    source_documents: List[str] = pydantic.Field(
 								        ..., description="List of source documents and their scores"
 								    )
 								    class Config:
 								        schema_extra = {
 								            "example": {
 								                "question": "工伤保险如何办理？",
-												update import pkgs and format

											
										
										
											2023-08-10 21:26:05 +08:00
+								                "response": "根据已知信息，可以总结如下：\n\n1. 参保单位为员工缴纳工伤保险费，以保障员工在发生工伤时能够获得相应的待遇。\n"
 								                            "2. 不同地区的工伤保险缴费规定可能有所不同，需要向当地社保部门咨询以了解具体的缴费标准和规定。\n"
 								                            "3. 工伤从业人员及其近亲属需要申请工伤认定，确认享受的待遇资格，并按时缴纳工伤保险费。\n"
 								                            "4. 工伤保险待遇包括工伤医疗、康复、辅助器具配置费用、伤残待遇、工亡待遇、一次性工亡补助金等。\n"
 								                            "5. 工伤保险待遇领取资格认证包括长期待遇领取人员认证和一次性待遇领取人员认证。\n"
 								                            "6. 工伤保险基金支付的待遇项目包括工伤医疗待遇、康复待遇、辅助器具配置费用、一次性工亡补助金、丧葬补助金等。",
-												v0.2.0 first commit

											
										
										
											2023-07-27 23:22:07 +08:00
+								                "history": [
 								                    [
 								                        "工伤保险是什么？",
-												update import pkgs and format

											
										
										
											2023-08-10 21:26:05 +08:00
+								                        "工伤保险是指用人单位按照国家规定，为本单位的职工和用人单位的其他人员，缴纳工伤保险费，"
 								                        "由保险机构按照国家规定的标准，给予工伤保险待遇的社会保险制度。",
-												v0.2.0 first commit

											
										
										
											2023-07-27 23:22:07 +08:00
+								                    ]
 								                ],
 								                "source_documents": [
-												update import pkgs and format

											
										
										
											2023-08-10 21:26:05 +08:00
+								                    "出处 [1] 广州市单位从业的特定人员参加工伤保险办事指引.docx：\n\n\t"
 								                    "( 一)  从业单位  (组织)  按“自愿参保”原则，  为未建 立劳动关系的特定从业人员单项参加工伤保险 、缴纳工伤保 险费。",
-												v0.2.0 first commit

											
										
										
											2023-07-27 23:22:07 +08:00
+								                    "出处 [2] ...",
 								                    "出处 [3] ...",
 								                ],
 								            }
 								        }
 								def torch_gc():
-												 升级注意
 数据库表发生变化，需要重建知识库

 新功能
- 增加FileDocModel库表，存储知识文件与向量库Document ID对应关系以及元数据，便于检索向量库
- 增加FileDocModel对应的数据库操作函数（这些函数主要是给KBService调用，用户一般无需使用）：
  - list_docs_from_db: 根据知识库名称、文件名称、元数据检索对应的Document IDs
  - delete_docs_from_db: 根据知识库名称、文件名称删除对应的file-doc映射
  - add_docs_to_db: 添加对应的file-doc映射
- KBService增加list_docs方法，可以根据文件名、元数据检索Document。当前仅支持FAISS，待milvus/pg实现get_doc_by_id方法后即自动支持。
- 去除server.utils对torch的依赖

 待完善
- milvus/pg kb_service需要实现get_doc_by_id方法

											
										
										
											2023-09-01 22:54:57 +08:00
+								    import torch
-												v0.2.0 first commit

											
										
										
											2023-07-27 23:22:07 +08:00
+								    if torch.cuda.is_available():
 								        # with torch.cuda.device(DEVICE):
 								        torch.cuda.empty_cache()
 								        torch.cuda.ipc_collect()
 								    elif torch.backends.mps.is_available():
 								        try:
 								            from torch.mps import empty_cache
 								            empty_cache()
 								        except Exception as e:
-												增加显示详细日志开关

											
										
										
											2023-09-08 20:48:31 +08:00
+								            msg=("如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本，"
 								                 "以支持及时清理 torch 产生的内存占用。")
 								            logger.error(f'{e.__class__.__name__}: {msg}',
 								                         exc_info=e if log_verbose else None)
-												更新API与ApiReuest：
1. 重新整理webui_pages/utils与server/knowledge_base间的工具依赖
2.
将delete_knowledge_base与delete_doc接口从delete改为post.delete不支持body参数
3. 修复update_doc
4. 修复部分bug

											
										
										
											2023-08-11 08:37:07 +08:00
 								def run_async(cor):
 								    '''
 								    在同步环境中运行异步代码.
 								    '''
 								    try:
 								        loop = asyncio.get_event_loop()
 								    except:
 								        loop = asyncio.new_event_loop()
 								    return loop.run_until_complete(cor)
 								def iter_over_async(ait, loop):
 								    '''
 								    将异步生成器封装成同步生成器.
 								    '''
 								    ait = ait.__aiter__()
 								    async def get_next():
 								        try:
 								            obj = await ait.__anext__()
 								            return False, obj
 								        except StopAsyncIteration:
 								            return True, None
 								    while True:
 								        done, obj = loop.run_until_complete(get_next())
 								        if done:
 								            break
 								        yield obj
-												update llm_api and api server:
1. fastchat's controller/model_worker/api_server use swagger UI offline.
2. add custom title and icon.
3. remove fastapi-offline dependence

											
										
										
											2023-08-16 14:20:09 +08:00
 								def MakeFastAPIOffline(
 								    app: FastAPI,
 								    static_dir = Path(__file__).parent / "static",
 								    static_url = "/static-offline-docs",
 								    docs_url: Optional[str] = "/docs",
 								    redoc_url: Optional[str] = "/redoc",
 								) -> None:
 								    """patch the FastAPI obj that doesn't rely on CDN for the documentation page"""
 								    from fastapi import Request
 								    from fastapi.openapi.docs import (
 								        get_redoc_html,
 								        get_swagger_ui_html,
 								        get_swagger_ui_oauth2_redirect_html,
 								    )
 								    from fastapi.staticfiles import StaticFiles
 								    from starlette.responses import HTMLResponse
 								    openapi_url = app.openapi_url
 								    swagger_ui_oauth2_redirect_url = app.swagger_ui_oauth2_redirect_url
 								    def remove_route(url: str) -> None:
 								        '''
 								        remove original route from app
 								        '''
 								        index = None
 								        for i, r in enumerate(app.routes):
 								            if r.path.lower() == url.lower():
 								                index = i
 								                break
 								        if isinstance(index, int):
 								            app.routes.pop(i)
 								    # Set up static file mount
 								    app.mount(
 								        static_url,
 								        StaticFiles(directory=Path(static_dir).as_posix()),
 								        name="static-offline-docs",
 								    )
 								    if docs_url is not None:
 								        remove_route(docs_url)
 								        remove_route(swagger_ui_oauth2_redirect_url)
 								        # Define the doc and redoc pages, pointing at the right files
 								        @app.get(docs_url, include_in_schema=False)
 								        async def custom_swagger_ui_html(request: Request) -> HTMLResponse:
 								            root = request.scope.get("root_path")
 								            favicon = f"{root}{static_url}/favicon.png"
 								            return get_swagger_ui_html(
 								                openapi_url=f"{root}{openapi_url}",
 								                title=app.title + " - Swagger UI",
 								                oauth2_redirect_url=swagger_ui_oauth2_redirect_url,
 								                swagger_js_url=f"{root}{static_url}/swagger-ui-bundle.js",
 								                swagger_css_url=f"{root}{static_url}/swagger-ui.css",
 								                swagger_favicon_url=favicon,
 								            )
 								        @app.get(swagger_ui_oauth2_redirect_url, include_in_schema=False)
 								        async def swagger_ui_redirect() -> HTMLResponse:
 								            return get_swagger_ui_oauth2_redirect_html()
 								    if redoc_url is not None:
 								        remove_route(redoc_url)
 								        @app.get(redoc_url, include_in_schema=False)
 								        async def redoc_html(request: Request) -> HTMLResponse:
 								            root = request.scope.get("root_path")
 								            favicon = f"{root}{static_url}/favicon.png"
 								            return get_redoc_html(
 								                openapi_url=f"{root}{openapi_url}",
 								                title=app.title + " - ReDoc",
 								                redoc_js_url=f"{root}{static_url}/redoc.standalone.js",
 								                with_google_fonts=False,
 								                redoc_favicon_url=favicon,
 								            )
-												优化server_config配置项 (#1293)

* update server_config.py:
- 在model_config中增加HISTORY_LEN配置参数
- 将server_config中helper function移动到server.utils中
- 统一set_httpx_timeout的定义和调用

* update webui.py:
应用model_config中的配置项：HISTORY_LEN,VECTOR_SEARCH_TOP_K,SEARCH_ENGINE_TOP_K

---------

Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-08-29 10:06:09 +08:00
 								# 从server_config中获取服务信息
 								def get_model_worker_config(model_name: str = LLM_MODEL) -> dict:
 								    '''
 								    加载model worker的配置项。
 								    优先级:FSCHAT_MODEL_WORKERS[model_name] > llm_model_dict[model_name] > FSCHAT_MODEL_WORKERS["default"]
 								    '''
 								    from configs.server_config import FSCHAT_MODEL_WORKERS
-												fix: 由于server.utils导入model_workers时，导入fastchat model_worker，导致startup启动延迟明显 (#1436)


											
										
										
											2023-09-12 08:52:00 +08:00
+								    from server import model_workers
-												优化server_config配置项 (#1293)

* update server_config.py:
- 在model_config中增加HISTORY_LEN配置参数
- 将server_config中helper function移动到server.utils中
- 统一set_httpx_timeout的定义和调用

* update webui.py:
应用model_config中的配置项：HISTORY_LEN,VECTOR_SEARCH_TOP_K,SEARCH_ENGINE_TOP_K

---------

Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-08-29 10:06:09 +08:00
+								    from configs.model_config import llm_model_dict
 								    config = FSCHAT_MODEL_WORKERS.get("default", {}).copy()
 								    config.update(llm_model_dict.get(model_name, {}))
 								    config.update(FSCHAT_MODEL_WORKERS.get(model_name, {}))
-												添加切换模型功能，支持智谱AI在线模型 (#1342)

* 添加LLM模型切换功能，需要在server_config中设置可切换的模型
* add tests for api.py/llm_model/*
* - 支持模型切换
- 支持智普AI线上模型
- startup.py增加参数`--api-worker`，自动运行所有的线上API模型。使用`-a
  (--all-webui), --all-api`时默认开启该选项
* 修复被fastchat覆盖的标准输出
* 对fastchat日志进行更细致的控制，startup.py中增加-q(--quiet)开关，可以减少无用的fastchat日志输出
* 修正chatglm api的对话模板


Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-09-01 23:58:09 +08:00
 								    # 如果没有设置有效的local_model_path，则认为是在线模型API
 								    if not os.path.isdir(config.get("local_model_path", "")):
 								        config["online_api"] = True
 								        if provider := config.get("provider"):
 								            try:
 								                config["worker_class"] = getattr(model_workers, provider)
 								            except Exception as e:
-												增加显示详细日志开关

											
										
										
											2023-09-08 20:48:31 +08:00
+								                msg = f"在线模型 ‘{model_name}’ 的provider没有正确配置"
 								                logger.error(f'{e.__class__.__name__}: {msg}',
 								                             exc_info=e if log_verbose else None)
-												添加切换模型功能，支持智谱AI在线模型 (#1342)

* 添加LLM模型切换功能，需要在server_config中设置可切换的模型
* add tests for api.py/llm_model/*
* - 支持模型切换
- 支持智普AI线上模型
- startup.py增加参数`--api-worker`，自动运行所有的线上API模型。使用`-a
  (--all-webui), --all-api`时默认开启该选项
* 修复被fastchat覆盖的标准输出
* 对fastchat日志进行更细致的控制，startup.py中增加-q(--quiet)开关，可以减少无用的fastchat日志输出
* 修正chatglm api的对话模板


Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-09-01 23:58:09 +08:00
-												update model_config.py.example

											
										
										
											2023-09-03 15:57:44 +08:00
+								    config["device"] = llm_device(config.get("device") or LLM_DEVICE)
-												优化server_config配置项 (#1293)

* update server_config.py:
- 在model_config中增加HISTORY_LEN配置参数
- 将server_config中helper function移动到server.utils中
- 统一set_httpx_timeout的定义和调用

* update webui.py:
应用model_config中的配置项：HISTORY_LEN,VECTOR_SEARCH_TOP_K,SEARCH_ENGINE_TOP_K

---------

Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-08-29 10:06:09 +08:00
+								    return config
-												添加切换模型功能，支持智谱AI在线模型 (#1342)

* 添加LLM模型切换功能，需要在server_config中设置可切换的模型
* add tests for api.py/llm_model/*
* - 支持模型切换
- 支持智普AI线上模型
- startup.py增加参数`--api-worker`，自动运行所有的线上API模型。使用`-a
  (--all-webui), --all-api`时默认开启该选项
* 修复被fastchat覆盖的标准输出
* 对fastchat日志进行更细致的控制，startup.py中增加-q(--quiet)开关，可以减少无用的fastchat日志输出
* 修正chatglm api的对话模板


Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-09-01 23:58:09 +08:00
+								def get_all_model_worker_configs() -> dict:
 								    result = {}
 								    model_names = set(llm_model_dict.keys()) | set(FSCHAT_MODEL_WORKERS.keys())
 								    for name in model_names:
 								        if name != "default":
 								            result[name] = get_model_worker_config(name)
 								    return result
-												优化server_config配置项 (#1293)

* update server_config.py:
- 在model_config中增加HISTORY_LEN配置参数
- 将server_config中helper function移动到server.utils中
- 统一set_httpx_timeout的定义和调用

* update webui.py:
应用model_config中的配置项：HISTORY_LEN,VECTOR_SEARCH_TOP_K,SEARCH_ENGINE_TOP_K

---------

Co-authored-by: liunux4odoo <liunu@qq.com>
											
										
										
											2023-08-29 10:06:09 +08:00
+								def fschat_controller_address() -> str:
 								    from configs.server_config import FSCHAT_CONTROLLER
 								    host = FSCHAT_CONTROLLER["host"]
 								    port = FSCHAT_CONTROLLER["port"]
 								    return f"http://{host}:{port}"
 								def fschat_model_worker_address(model_name: str = LLM_MODEL) -> str:
 								    if model := get_model_worker_config(model_name):
 								        host = model["host"]
 								        port = model["port"]
 								        return f"http://{host}:{port}"
 								    return ""
 								def fschat_openai_api_address() -> str:
 								    from configs.server_config import FSCHAT_OPENAI_API
 								    host = FSCHAT_OPENAI_API["host"]
 								    port = FSCHAT_OPENAI_API["port"]
 								    return f"http://{host}:{port}"
 								def api_address() -> str:
 								    from configs.server_config import API_SERVER
 								    host = API_SERVER["host"]
 								    port = API_SERVER["port"]
 								    return f"http://{host}:{port}"
 								def webui_address() -> str:
 								    from configs.server_config import WEBUI_SERVER
 								    host = WEBUI_SERVER["host"]
 								    port = WEBUI_SERVER["port"]
 								    return f"http://{host}:{port}"
 								def set_httpx_timeout(timeout: float = None):
 								    '''
 								    设置httpx默认timeout。
 								    httpx默认timeout是5秒，在请求LLM回答时不够用。
 								    '''
 								    import httpx
 								    from configs.server_config import HTTPX_DEFAULT_TIMEOUT
 								    timeout = timeout or HTTPX_DEFAULT_TIMEOUT
 								    httpx._config.DEFAULT_TIMEOUT_CONFIG.connect = timeout
 								    httpx._config.DEFAULT_TIMEOUT_CONFIG.read = timeout
 								    httpx._config.DEFAULT_TIMEOUT_CONFIG.write = timeout
-												优化LLM和Embedding模型运行设备配置，可设为auto自动检测

											
										
										
											2023-08-31 17:33:43 +08:00
 								# 自动检查torch可用的设备。分布式部署时，不运行LLM的机器上可以不装torch
 								def detect_device() -> Literal["cuda", "mps", "cpu"]:
 								    try:
 								        import torch
 								        if torch.cuda.is_available():
 								            return "cuda"
 								        if torch.backends.mps.is_available():
 								            return "mps"
 								    except:
 								        pass
 								    return "cpu"
 								def llm_device(device: str = LLM_DEVICE) -> Literal["cuda", "mps", "cpu"]:
 								    if device not in ["cuda", "mps", "cpu"]:
 								        device = detect_device()
 								    return device
 								def embedding_device(device: str = EMBEDDING_DEVICE) -> Literal["cuda", "mps", "cpu"]:
 								    if device not in ["cuda", "mps", "cpu"]:
 								        device = detect_device()
 								    return device
-												将KnowledgeFile的file2text拆分成file2docs、docs2texts和file2text三个部分，在保持接口不变的情况下，实现：
1、支持chunk_size和chunk_overlap参数
2、支持自定义text_splitter
3、支持自定义docs
修复：csv文件不使用text_splitter

											
										
										
											2023-09-04 16:37:44 +08:00
 								def run_in_thread_pool(
 								    func: Callable,
 								    params: List[Dict] = [],
 								    pool: ThreadPoolExecutor = None,
 								) -> Generator:
 								    '''
 								    在线程池中批量运行任务，并将运行结果以生成器的形式返回。
 								    请确保任务中的所有操作是线程安全的，任务函数请全部使用关键字参数。
 								    '''
 								    tasks = []
 								    pool = pool or thread_pool
-												增加显示详细日志开关

											
										
										
											2023-09-08 20:48:31 +08:00
-												将KnowledgeFile的file2text拆分成file2docs、docs2texts和file2text三个部分，在保持接口不变的情况下，实现：
1、支持chunk_size和chunk_overlap参数
2、支持自定义text_splitter
3、支持自定义docs
修复：csv文件不使用text_splitter

											
										
										
											2023-09-04 16:37:44 +08:00
+								    for kwargs in params:
 								        thread = pool.submit(func, **kwargs)
 								        tasks.append(thread)
-												增加显示详细日志开关

											
										
										
											2023-09-08 20:48:31 +08:00
-												将KnowledgeFile的file2text拆分成file2docs、docs2texts和file2text三个部分，在保持接口不变的情况下，实现：
1、支持chunk_size和chunk_overlap参数
2、支持自定义text_splitter
3、支持自定义docs
修复：csv文件不使用text_splitter

											
										
										
											2023-09-04 16:37:44 +08:00
+								    for obj in as_completed(tasks):
 								        yield obj.result()