diff --git a/.github/.DS_Store b/.github/.DS_Store new file mode 100644 index 0000000..e89adc3 Binary files /dev/null and b/.github/.DS_Store differ diff --git a/.gitignore b/.gitignore index ced8d9e..105fa0c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ *.bak logs /knowledge_base/ -/configs/*.py +#/configs/*.py .vscode/ # below are standard python ignore files diff --git a/configs/.DS_Store b/configs/.DS_Store new file mode 100644 index 0000000..3d58e20 Binary files /dev/null and b/configs/.DS_Store differ diff --git a/configs/basic_config.py b/configs/basic_config.py new file mode 100644 index 0000000..6bd8c8d --- /dev/null +++ b/configs/basic_config.py @@ -0,0 +1,22 @@ +import logging +import os +import langchain + +# 是否显示详细日志 +log_verbose = False +langchain.verbose = False + + +# 通常情况下不需要更改以下内容 + +# 日志格式 +LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s" +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logging.basicConfig(format=LOG_FORMAT) + + +# 日志存储路径 +LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs") +if not os.path.exists(LOG_PATH): + os.mkdir(LOG_PATH) diff --git a/configs/kb_config.py b/configs/kb_config.py new file mode 100644 index 0000000..ae51722 --- /dev/null +++ b/configs/kb_config.py @@ -0,0 +1,122 @@ +import os + +# 默认使用的知识库 +DEFAULT_KNOWLEDGE_BASE = "20231130" + +# 默认向量库类型。可选:faiss, milvus(离线) & zilliz(在线), pg. +DEFAULT_VS_TYPE = "faiss" + +# 缓存向量库数量(针对FAISS) +CACHED_VS_NUM = 10 + +# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter) +CHUNK_SIZE = 300 + +# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter) +OVERLAP_SIZE = 0 + +# 知识库匹配向量数量 +VECTOR_SEARCH_TOP_K = 10 + +# 知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右 +SCORE_THRESHOLD = 1.0 + +# 默认搜索引擎。可选:bing, duckduckgo, metaphor +DEFAULT_SEARCH_ENGINE = "duckduckgo" + +# 搜索引擎匹配结题数量 +SEARCH_ENGINE_TOP_K = 3 + + +# Bing 搜索必备变量 +# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search +# 具体申请方式请见 +# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource +# 使用python创建bing api 搜索实例详见: +# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python +BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search" +# 注意不是bing Webmaster Tools的api key, + +# 此外,如果是在服务器上,报Failed to establish a new connection: [Errno 110] Connection timed out +# 是因为服务器加了防火墙,需要联系管理员加白名单,如果公司的服务器的话,就别想了GG +BING_SUBSCRIPTION_KEY = "" + +# metaphor搜索需要KEY +METAPHOR_API_KEY = "" + + +# 是否开启中文标题加强,以及标题增强的相关配置 +# 通过增加标题判断,判断哪些文本为标题,并在metadata中进行标记; +# 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。 +ZH_TITLE_ENHANCE = False + + +# 每个知识库的初始化介绍,用于在初始化知识库时显示和Agent调用,没写则没有介绍,不会被Agent调用。 +KB_INFO = { + "知识库名称": "知识库介绍", + "samples": "关于本项目issue的解答", +} +# 通常情况下不需要更改以下内容 +# 知识库默认存储路径 +KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base") +if not os.path.exists(KB_ROOT_PATH): + os.mkdir(KB_ROOT_PATH) +# 数据库默认存储路径。 +# 如果使用sqlite,可以直接修改DB_ROOT_PATH;如果使用其它数据库,请直接修改SQLALCHEMY_DATABASE_URI。 +DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db") +SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}" + +# 可选向量库类型及对应配置 +kbs_config = { + "faiss": { + }, + "milvus": { + "host": "127.0.0.1", + "port": "19530", + "user": "", + "password": "", + "secure": False, + }, + "zilliz": { + "host": "in01-a7ce524e41e3935.ali-cn-hangzhou.vectordb.zilliz.com.cn", + "port": "19530", + "user": "", + "password": "", + "secure": True, + }, + "pg": { + "connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat", + } +} + +# TextSplitter配置项,如果你不明白其中的含义,就不要修改。 +text_splitter_dict = { + "ChineseRecursiveTextSplitter": { + "source": "huggingface", ## 选择tiktoken则使用openai的方法 + "tokenizer_name_or_path": "", + }, + "SpacyTextSplitter": { + "source": "huggingface", + "tokenizer_name_or_path": "gpt2", + }, + "RecursiveCharacterTextSplitter": { + "source": "tiktoken", + "tokenizer_name_or_path": "cl100k_base", + }, + "MarkdownHeaderTextSplitter": { + "headers_to_split_on": + [ + ("#", "head1"), + ("##", "head2"), + ("###", "head3"), + ("####", "head4"), + ] + }, + "ChineseTextSplitter": { + "source": "huggingface", + "tokenizer_name_or_path": "", + }, +} + +# TEXT_SPLITTER 名称 +TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter" diff --git a/configs/model_config.py b/configs/model_config.py new file mode 100644 index 0000000..5b30952 --- /dev/null +++ b/configs/model_config.py @@ -0,0 +1,260 @@ +import os + + +# 可以指定一个绝对路径,统一存放所有的Embedding和LLM模型。 +# 每个模型可以是一个单独的目录,也可以是某个目录下的二级子目录 +MODEL_ROOT_PATH = "" + +# 在以下字典中修改属性值,以指定本地embedding模型存储位置。支持3种设置方法: +# 1、将对应的值修改为模型绝对路径 +# 2、不修改此处的值(以 text2vec 为例): +# 2.1 如果{MODEL_ROOT_PATH}下存在如下任一子目录: +# - text2vec +# - GanymedeNil/text2vec-large-chinese +# - text2vec-large-chinese +# 2.2 如果以上本地路径不存在,则使用huggingface模型 +MODEL_PATH = { + "embed_model": { + "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", + "ernie-base": "nghuyong/ernie-3.0-base-zh", + "text2vec-base": "shibing624/text2vec-base-chinese", + "text2vec": "GanymedeNil/text2vec-large-chinese", + "text2vec-paraphrase": "shibing624/text2vec-base-chinese-paraphrase", + "text2vec-sentence": "shibing624/text2vec-base-chinese-sentence", + "text2vec-multilingual": "shibing624/text2vec-base-multilingual", + "text2vec-bge-large-chinese": "/home/bns001/shibing624/text2vec-bge-large-chinese", + "m3e-small": "moka-ai/m3e-small", + "m3e-base": "moka-ai/m3e-base", + "m3e-large": "moka-ai/m3e-large", + "bge-small-zh": "BAAI/bge-small-zh", + "bge-base-zh": "BAAI/bge-base-zh", + "bge-large-zh": "/home/bns001/BAAI/bge-large-zh-v1.5", + "bge-large-zh-noinstruct": "BAAI/bge-large-zh-noinstruct", + "bge-base-zh-v1.5": "BAAI/bge-base-zh-v1.5", + "bge-large-zh-v1.5": "BAAI/bge-large-zh-v1.5", + "piccolo-base-zh": "sensenova/piccolo-base-zh", + "piccolo-large-zh": "sensenova/piccolo-large-zh", + "text-embedding-ada-002": "your OPENAI_API_KEY", + }, + # TODO: add all supported llm models + "llm_model": { + # 以下部分模型并未完全测试,仅根据fastchat和vllm模型的模型列表推定支持 + "chatglm-6b": "THUDM/chatglm-6b", + "chatglm2-6b": "/home/bns001/chatglm2-6b", + "chatglm2-6b-int4": "THUDM/chatglm2-6b-int4", + "chatglm3-6b-32k": "/home/bns001/chatglm3-6b-32k", + "chatglm3-6b": "/home/bns001/chatglm3-6b", + + "baichuan2-13b": "baichuan-inc/Baichuan2-13B-Chat", + "baichuan2-7b":"baichuan-inc/Baichuan2-7B-Chat", + + "baichuan-7b": "baichuan-inc/Baichuan-7B", + "baichuan-13b": "baichuan-inc/Baichuan-13B", + 'baichuan-13b-chat':'baichuan-inc/Baichuan-13B-Chat', + + "aquila-7b":"BAAI/Aquila-7B", + "aquilachat-7b":"BAAI/AquilaChat-7B", + + "internlm-7b":"internlm/internlm-7b", + "internlm-chat-7b":"internlm/internlm-chat-7b", + + "falcon-7b":"tiiuae/falcon-7b", + "falcon-40b":"tiiuae/falcon-40b", + "falcon-rw-7b":"tiiuae/falcon-rw-7b", + + "gpt2":"gpt2", + "gpt2-xl":"gpt2-xl", + + "gpt-j-6b":"EleutherAI/gpt-j-6b", + "gpt4all-j":"nomic-ai/gpt4all-j", + "gpt-neox-20b":"EleutherAI/gpt-neox-20b", + "pythia-12b":"EleutherAI/pythia-12b", + "oasst-sft-4-pythia-12b-epoch-3.5":"OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + "dolly-v2-12b":"databricks/dolly-v2-12b", + "stablelm-tuned-alpha-7b":"stabilityai/stablelm-tuned-alpha-7b", + + "Llama-2-13b-hf":"meta-llama/Llama-2-13b-hf", + "Llama-2-70b-hf":"meta-llama/Llama-2-70b-hf", + "open_llama_13b":"openlm-research/open_llama_13b", + "vicuna-13b-v1.3":"lmsys/vicuna-13b-v1.3", + "koala":"young-geng/koala", + + "mpt-7b":"mosaicml/mpt-7b", + "mpt-7b-storywriter":"mosaicml/mpt-7b-storywriter", + "mpt-30b":"mosaicml/mpt-30b", + "opt-66b":"facebook/opt-66b", + "opt-iml-max-30b":"facebook/opt-iml-max-30b", + + "Qwen-7B":"Qwen/Qwen-7B", + "Qwen-14B":"Qwen/Qwen-14B", + "Qwen-7B-Chat":"Qwen/Qwen-7B-Chat", + "Qwen-14B-Chat":"Qwen/Qwen-14B-Chat", + }, +} +# 选用的 Embedding 名称 +EMBEDDING_MODEL = "bge-large-zh" # 可以尝试最新的嵌入式sota模型:bge-large-zh-v1.5 + + +# Embedding 模型运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。 +EMBEDDING_DEVICE = "auto" + +# LLM 名称 +LLM_MODEL = "chatglm3-6b" + +# LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。 +LLM_DEVICE = "auto" + +# 历史对话轮数 +HISTORY_LEN = 0 + +# LLM通用对话参数 +TEMPERATURE = 0.1 +# TOP_P = 0.95 # ChatOpenAI暂不支持该参数 + + +LANGCHAIN_LLM_MODEL = { + # 不需要走Fschat封装的,Langchain直接支持的模型。 + # 调用chatgpt时如果报出: urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.openai.com', port=443): + # Max retries exceeded with url: /v1/chat/completions + # 则需要将urllib3版本修改为1.25.11 + # 如果依然报urllib3.exceptions.MaxRetryError: HTTPSConnectionPool,则将https改为http + # 参考https://zhuanlan.zhihu.com/p/350015032 + + # 如果报出:raise NewConnectionError( + # urllib3.exceptions.NewConnectionError: : + # Failed to establish a new connection: [WinError 10060] + # 则是因为内地和香港的IP都被OPENAI封了,需要切换为日本、新加坡等地 + + # 如果出现WARNING: Retrying langchain.chat_models.openai.acompletion_with_retry.._completion_with_retry in + # 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI. + # 需要添加代理访问(正常开的代理软件可能会拦截不上)需要设置配置openai_proxy 或者 使用环境遍历OPENAI_PROXY 进行设置 + # 比如: "openai_proxy": 'http://127.0.0.1:4780' + + # 这些配置文件的名字不能改动 + "Azure-OpenAI": { + "deployment_name": "your Azure deployment name", + "model_version": "0701", + "openai_api_type": "azure", + "api_base_url": "https://your Azure point.azure.com", + "api_version": "2023-07-01-preview", + "api_key": "your Azure api key", + "openai_proxy": "", + }, + "OpenAI": { + "model_name": "your openai model name(such as gpt-4)", + "api_base_url": "https://api.openai.com/v1", + "api_key": "your OPENAI_API_KEY", + "openai_proxy": "", + }, + "Anthropic": { + "model_name": "your claude model name(such as claude2-100k)", + "api_key":"your ANTHROPIC_API_KEY", + } +} +ONLINE_LLM_MODEL = { + # 线上模型。请在server_config中为每个在线API设置不同的端口 + # 具体注册及api key获取请前往 http://open.bigmodel.cn + "zhipu-api": { + "api_key": "", + "version": "chatglm_pro", # 可选包括 "chatglm_lite", "chatglm_std", "chatglm_pro" + "provider": "ChatGLMWorker", + }, + # 具体注册及api key获取请前往 https://api.minimax.chat/ + "minimax-api": { + "group_id": "", + "api_key": "", + "is_pro": False, + "provider": "MiniMaxWorker", + }, + # 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/ + "xinghuo-api": { + "APPID": "", + "APISecret": "", + "api_key": "", + "is_v2": False, + "provider": "XingHuoWorker", + }, + # 百度千帆 API,申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf + "qianfan-api": { + "version": "ernie-bot-turbo", # 当前支持 "ernie-bot" 或 "ernie-bot-turbo", 更多的见官方文档。 + "version_url": "", # 也可以不填写version,直接填写在千帆申请模型发布的API地址 + "api_key": "", + "secret_key": "", + "provider": "QianFanWorker", + }, + # 火山方舟 API,文档参考 https://www.volcengine.com/docs/82379 + "fangzhou-api": { + "version": "chatglm-6b-model", # 当前支持 "chatglm-6b-model", 更多的见文档模型支持列表中方舟部分。 + "version_url": "", # 可以不填写version,直接填写在方舟申请模型发布的API地址 + "api_key": "", + "secret_key": "", + "provider": "FangZhouWorker", + }, + # 阿里云通义千问 API,文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details + "qwen-api": { + "version": "qwen-turbo", # 可选包括 "qwen-turbo", "qwen-plus" + "api_key": "", # 请在阿里云控制台模型服务灵积API-KEY管理页面创建 + "provider": "QwenWorker", + }, + + # 百川 API,申请方式请参考 https://www.baichuan-ai.com/home#api-enter + "baichuan-api": { + "version": "Baichuan2-53B", # 当前支持 "Baichuan2-53B", 见官方文档。 + "api_key": "", + "secret_key": "", + "provider": "BaiChuanWorker", + }, +} + + +# 通常情况下不需要更改以下内容 + +# nltk 模型存储路径 +NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data") + + +VLLM_MODEL_DICT = { + "aquila-7b":"BAAI/Aquila-7B", + "aquilachat-7b":"BAAI/AquilaChat-7B", + + "baichuan-7b": "baichuan-inc/Baichuan-7B", + "baichuan-13b": "baichuan-inc/Baichuan-13B", + 'baichuan-13b-chat':'baichuan-inc/Baichuan-13B-Chat', + # 注意:bloom系列的tokenizer与model是分离的,因此虽然vllm支持,但与fschat框架不兼容 + # "bloom":"bigscience/bloom", + # "bloomz":"bigscience/bloomz", + # "bloomz-560m":"bigscience/bloomz-560m", + # "bloomz-7b1":"bigscience/bloomz-7b1", + # "bloomz-1b7":"bigscience/bloomz-1b7", + + "internlm-7b":"internlm/internlm-7b", + "internlm-chat-7b":"internlm/internlm-chat-7b", + "falcon-7b":"tiiuae/falcon-7b", + "falcon-40b":"tiiuae/falcon-40b", + "falcon-rw-7b":"tiiuae/falcon-rw-7b", + "gpt2":"gpt2", + "gpt2-xl":"gpt2-xl", + "gpt-j-6b":"EleutherAI/gpt-j-6b", + "gpt4all-j":"nomic-ai/gpt4all-j", + "gpt-neox-20b":"EleutherAI/gpt-neox-20b", + "pythia-12b":"EleutherAI/pythia-12b", + "oasst-sft-4-pythia-12b-epoch-3.5":"OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + "dolly-v2-12b":"databricks/dolly-v2-12b", + "stablelm-tuned-alpha-7b":"stabilityai/stablelm-tuned-alpha-7b", + "Llama-2-13b-hf":"meta-llama/Llama-2-13b-hf", + "Llama-2-70b-hf":"meta-llama/Llama-2-70b-hf", + "open_llama_13b":"openlm-research/open_llama_13b", + "vicuna-13b-v1.3":"lmsys/vicuna-13b-v1.3", + "koala":"young-geng/koala", + "mpt-7b":"mosaicml/mpt-7b", + "mpt-7b-storywriter":"mosaicml/mpt-7b-storywriter", + "mpt-30b":"mosaicml/mpt-30b", + "opt-66b":"facebook/opt-66b", + "opt-iml-max-30b":"facebook/opt-iml-max-30b", + + "Qwen-7B":"Qwen/Qwen-7B", + "Qwen-14B":"Qwen/Qwen-14B", + "Qwen-7B-Chat":"Qwen/Qwen-7B-Chat", + "Qwen-14B-Chat":"Qwen/Qwen-14B-Chat", + +} diff --git a/configs/prompt_config.py b/configs/prompt_config.py new file mode 100644 index 0000000..059a491 --- /dev/null +++ b/configs/prompt_config.py @@ -0,0 +1,124 @@ +# prompt模板使用Jinja2语法,简单点就是用双大括号代替f-string的单大括号 +# 本配置文件支持热加载,修改prompt模板后无需重启服务。 + + +# LLM对话支持的变量: +# - input: 用户输入内容 + +# 知识库和搜索引擎对话支持的变量: +# - context: 从检索结果拼接的知识文本 +# - question: 用户提出的问题 + +# Agent对话支持的变量: + +# - tools: 可用的工具列表 +# - tool_names: 可用的工具名称列表 +# - history: 用户和Agent的对话历史 +# - input: 用户输入内容 +# - agent_scratchpad: Agent的思维记录 + +PROMPT_TEMPLATES = {} + +PROMPT_TEMPLATES["llm_chat"] = { + "default": "{{ input }}", + + "py": + """ + 你是一个聪明的代码助手,请你给我写出简单的py代码。 \n + {{ input }} + """ + , +} + +PROMPT_TEMPLATES["knowledge_base_chat"] = { + "default": + """ + <指令>你是一个电力公司相关的专家,请完全依据已知信息的内容,先找出与问题相关的信息,然后再根据这些相关信息回答简洁、专业地来回答问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题”,不允许在答案中添加编造成分,不回答与问题无关的内容,请使用中文 +简体回答. + <已知信息>{{ context }}、 + <问题>{{ question }} + """, + "text": + """ + <指令>根据已知信息,简洁和专业的来回答问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题”,答案请使用中文。 + <已知信息>{{ context }}、 + <问题>{{ question }} + """, +} + +PROMPT_TEMPLATES["data_augment"] = { + "default": + """ + <指令>参照原始句子,简洁生成一个与原始句子语义最相似而句子结构又不同于原始句子的一个新句子,答案请使用简体中文,直接给出答案而不用复述原始句子内容,并且新句子有助于提高检索的准确度 + <原始句子>{{question}} + """, +} + +PROMPT_TEMPLATES["search_engine_chat"] = { + "default": + """ + <指令>这是我搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 + <已知信息>{{ context }}、 + <问题>{{ question }} + """, + + "search": + """ + <指令>根据已知信息,简洁和专业的来回答问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题”,答案请使用中文。 + <已知信息>{{ context }}、 + <问题>{{ question }} + """, +} +PROMPT_TEMPLATES["agent_chat"] = { + "default": + """ + Answer the following questions as best you can. If it is in order, you can use some tools appropriately.You have access to the following tools: + + {tools} + + Please note that the "知识库查询工具" is information about the "西交利物浦大学" ,and if a question is asked about it, you must answer with the knowledge base, + Please note that the "天气查询工具" can only be used once since Question begin. + + Use the following format: + Question: the input question you must answer1 + Thought: you should always think about what to do and what tools to use. + Action: the action to take, should be one of [{tool_names}] + Action Input: the input to the action + Observation: the result of the action + ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) + Thought: I now know the final answer + Final Answer: the final answer to the original input question + + + Begin! + history: + {history} + Question: {input} + Thought: {agent_scratchpad} + """, + "ChatGLM": + """ + 请请严格按照提供的思维方式来思考。你的知识不一定正确,所以你一定要用提供的工具来思考,并给出用户答案。 + 你有以下工具可以使用: + {tools} + ``` + Question: 用户的提问或者观察到的信息, + Thought: 你应该思考该做什么,是根据工具的结果来回答问题,还是决定使用什么工具。 + Action: 需要使用的工具,应该是在[{tool_names}]中的一个。 + Action Input: 传入工具的内容 + Observation: 工具给出的答案(不是你生成的) + ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) + Thought: 通过工具给出的答案,你是否能回答Question。 + Final Answer是你的答案 + + 现在,我们开始! + 你和用户的历史记录: + History: + {history} + + 用户开始以提问: + Question: {input} + Thought: {agent_scratchpad} + + """, +} diff --git a/configs/server_config.py b/configs/server_config.py new file mode 100644 index 0000000..d381de6 --- /dev/null +++ b/configs/server_config.py @@ -0,0 +1,132 @@ +import sys +from configs.model_config import LLM_DEVICE + +# httpx 请求默认超时时间(秒)。如果加载模型或对话较慢,出现超时错误,可以适当加大该值。 +HTTPX_DEFAULT_TIMEOUT = 300.0 + +# API 是否开启跨域,默认为False,如果需要开启,请设置为True +# is open cross domain +OPEN_CROSS_DOMAIN = False + +# 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host +DEFAULT_BIND_HOST = "0.0.0.0" if sys.platform != "win32" else "127.0.0.1" + +# webui.py server +WEBUI_SERVER = { + "host": DEFAULT_BIND_HOST, + "port": 8502, +} + +# api.py server +API_SERVER = { + "host": DEFAULT_BIND_HOST, + "port": 7862, +} + +# fastchat openai_api server +FSCHAT_OPENAI_API = { + "host": DEFAULT_BIND_HOST, + "port": 30000, +} + +# fastchat model_worker server +# 这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。 +# 在启动startup.py时,可用通过`--model-worker --model-name xxxx`指定模型,不指定则为LLM_MODEL +# 必须在这里添加的模型才会出现在WEBUI中可选模型列表里(LLM_MODEL会自动添加) +FSCHAT_MODEL_WORKERS = { + # 所有模型共用的默认配置,可在模型专项配置中进行覆盖。 + "default": { + "host": DEFAULT_BIND_HOST, + "port": 30002, + "device": LLM_DEVICE, + # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ + # vllm对一些模型支持还不成熟,暂时默认关闭 + "infer_turbo": False, + + # model_worker多卡加载需要配置的参数 + "gpus": "0,1,2", # 使用的GPU,以str的格式指定,如"0,1",如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定 + "num_gpus": 3, # 使用GPU的数量 + "max_gpu_memory": "15GiB", # 每个GPU占用的最大显存 + + # 以下为model_worker非常用参数,可根据需要配置 + # "load_8bit": False, # 开启8bit量化 + # "cpu_offloading": None, + # "gptq_ckpt": None, + # "gptq_wbits": 16, + # "gptq_groupsize": -1, + # "gptq_act_order": False, + # "awq_ckpt": None, + # "awq_wbits": 16, + # "awq_groupsize": -1, + # "model_names": [LLM_MODEL], + # "conv_template": None, + # "limit_worker_concurrency": 5, + # "stream_interval": 2, + # "no_register": False, + # "embed_in_truncate": False, + + # 以下为vllm_woker配置参数,注意使用vllm必须有gpu,仅在Linux测试通过 + + # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加 + # 'tokenizer_mode':'auto', + # 'trust_remote_code':True, + # 'download_dir':None, + # 'load_format':'auto', + # 'dtype':'auto', + # 'seed':0, + # 'worker_use_ray':False, + # 'pipeline_parallel_size':1, + # 'tensor_parallel_size':1, + # 'block_size':16, + # 'swap_space':4 , # GiB + # 'gpu_memory_utilization':0.90, + # 'max_num_batched_tokens':2560, + # 'max_num_seqs':256, + # 'disable_log_stats':False, + # 'conv_template':None, + # 'limit_worker_concurrency':5, + # 'no_register':False, + # 'num_gpus': 1 + # 'engine_use_ray': False, + # 'disable_log_requests': False + + }, + # 可以如下示例方式更改默认配置 + # "baichuan-7b": { # 使用default中的IP和端口 + # "device": "cpu", + # }, + + "zhipu-api": { # 请为每个要运行的在线API设置不同的端口 + "port": 21001, + }, + # "minimax-api": { + # "port": 21002, + # }, + # "xinghuo-api": { + # "port": 21003, + # }, + # "qianfan-api": { + # "port": 21004, + # }, + # "fangzhou-api": { + # "port": 21005, + # }, + # "qwen-api": { + # "port": 21006, + # }, + # "baichuan-api": { + # "port": 21007, + # }, +} + +# fastchat multi model worker server +FSCHAT_MULTI_MODEL_WORKERS = { + # TODO: +} + +# fastchat controller server +FSCHAT_CONTROLLER = { + "host": DEFAULT_BIND_HOST, + "port": 30001, + "dispatch_method": "shortest_queue", +} diff --git a/nltk_data/.DS_Store b/nltk_data/.DS_Store new file mode 100644 index 0000000..d3aabd6 Binary files /dev/null and b/nltk_data/.DS_Store differ diff --git a/server/knowledge_base/.DS_Store b/server/knowledge_base/.DS_Store new file mode 100644 index 0000000..ade3c94 Binary files /dev/null and b/server/knowledge_base/.DS_Store differ diff --git a/tests/.DS_Store b/tests/.DS_Store new file mode 100644 index 0000000..2e2deab Binary files /dev/null and b/tests/.DS_Store differ diff --git a/webui_pages/.DS_Store b/webui_pages/.DS_Store new file mode 100644 index 0000000..2973463 Binary files /dev/null and b/webui_pages/.DS_Store differ