165 lines
5.4 KiB
Python
165 lines
5.4 KiB
Python
import torch.cuda
|
||
import torch.backends
|
||
import os
|
||
import logging
|
||
import uuid
|
||
|
||
LOG_FORMAT = "%(levelname) -5s %(asctime)s" "-1d: %(message)s"
|
||
logger = logging.getLogger()
|
||
logger.setLevel(logging.INFO)
|
||
logging.basicConfig(format=LOG_FORMAT)
|
||
|
||
embedding_model_dict = {
|
||
"ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
|
||
"ernie-base": "nghuyong/ernie-3.0-base-zh",
|
||
"text2vec-base": "shibing624/text2vec-base-chinese",
|
||
"text2vec": "GanymedeNil/text2vec-large-chinese",
|
||
"m3e-small": "moka-ai/m3e-small",
|
||
"m3e-base": "moka-ai/m3e-base",
|
||
}
|
||
|
||
# Embedding model name
|
||
EMBEDDING_MODEL = "text2vec"
|
||
|
||
# Embedding running device
|
||
EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
||
|
||
|
||
# supported LLM models
|
||
# llm_model_dict 处理了loader的一些预设行为,如加载位置,模型名称,模型处理器实例
|
||
llm_model_dict = {
|
||
"chatglm-6b-int4-qe": {
|
||
"name": "chatglm-6b-int4-qe",
|
||
"pretrained_model_name": "THUDM/chatglm-6b-int4-qe",
|
||
"local_model_path": None,
|
||
"provides": "ChatGLM"
|
||
},
|
||
"chatglm-6b-int4": {
|
||
"name": "chatglm-6b-int4",
|
||
"pretrained_model_name": "THUDM/chatglm-6b-int4",
|
||
"local_model_path": None,
|
||
"provides": "ChatGLM"
|
||
},
|
||
"chatglm-6b-int8": {
|
||
"name": "chatglm-6b-int8",
|
||
"pretrained_model_name": "THUDM/chatglm-6b-int8",
|
||
"local_model_path": None,
|
||
"provides": "ChatGLM"
|
||
},
|
||
"chatglm-6b": {
|
||
"name": "chatglm-6b",
|
||
"pretrained_model_name": "THUDM/chatglm-6b",
|
||
"local_model_path": "/media/checkpoint/chatglm-6b",
|
||
"provides": "ChatGLM"
|
||
},
|
||
|
||
"chatyuan": {
|
||
"name": "chatyuan",
|
||
"pretrained_model_name": "ClueAI/ChatYuan-large-v2",
|
||
"local_model_path": None,
|
||
"provides": None
|
||
},
|
||
"moss": {
|
||
"name": "moss",
|
||
"pretrained_model_name": "fnlp/moss-moon-003-sft",
|
||
"local_model_path": None,
|
||
"provides": "MOSSLLM"
|
||
},
|
||
"vicuna-13b-hf": {
|
||
"name": "vicuna-13b-hf",
|
||
"pretrained_model_name": "vicuna-13b-hf",
|
||
"local_model_path": "/media/checkpoint/vicuna-13b-hf",
|
||
"provides": "LLamaLLM"
|
||
},
|
||
|
||
# 通过 fastchat 调用的模型请参考如下格式
|
||
"fastchat-chatglm-6b": {
|
||
"name": "chatglm-6b", # "name"修改为fastchat服务中的"model_name"
|
||
"pretrained_model_name": "chatglm-6b",
|
||
"local_model_path": None,
|
||
"provides": "FastChatOpenAILLM", # 使用fastchat api时,需保证"provides"为"FastChatOpenAILLM"
|
||
"api_base_url": "http://localhost:8000/v1" # "name"修改为fastchat服务中的"api_base_url"
|
||
},
|
||
|
||
# 通过 fastchat 调用的模型请参考如下格式
|
||
"fastchat-vicuna-13b-hf": {
|
||
"name": "vicuna-13b-hf", # "name"修改为fastchat服务中的"model_name"
|
||
"pretrained_model_name": "vicuna-13b-hf",
|
||
"local_model_path": None,
|
||
"provides": "FastChatOpenAILLM", # 使用fastchat api时,需保证"provides"为"FastChatOpenAILLM"
|
||
"api_base_url": "http://localhost:8000/v1" # "name"修改为fastchat服务中的"api_base_url"
|
||
},
|
||
}
|
||
|
||
# LLM 名称
|
||
LLM_MODEL = "chatglm-6b"
|
||
# 量化加载8bit 模型
|
||
LOAD_IN_8BIT = False
|
||
# Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
|
||
BF16 = False
|
||
# 本地lora存放的位置
|
||
LORA_DIR = "loras/"
|
||
|
||
# LLM lora path,默认为空,如果有请直接指定文件夹路径
|
||
LLM_LORA_PATH = ""
|
||
USE_LORA = True if LLM_LORA_PATH else False
|
||
|
||
# LLM streaming reponse
|
||
STREAMING = True
|
||
|
||
# Use p-tuning-v2 PrefixEncoder
|
||
USE_PTUNING_V2 = False
|
||
|
||
# LLM running device
|
||
LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
||
|
||
|
||
VS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "vector_store")
|
||
|
||
UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content")
|
||
|
||
# 基于上下文的prompt模版,请务必保留"{question}"和"{context}"
|
||
PROMPT_TEMPLATE = """已知信息:
|
||
{context}
|
||
|
||
根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}"""
|
||
|
||
# 缓存知识库数量
|
||
CACHED_VS_NUM = 1
|
||
|
||
# 文本分句长度
|
||
SENTENCE_SIZE = 100
|
||
|
||
# 匹配后单段上下文长度
|
||
CHUNK_SIZE = 250
|
||
|
||
# LLM input history length
|
||
LLM_HISTORY_LEN = 3
|
||
|
||
# return top-k text chunk from vector store
|
||
VECTOR_SEARCH_TOP_K = 5
|
||
|
||
# 知识检索内容相关度 Score, 数值范围约为0-1100,如果为0,则不生效,经测试设置为小于500时,匹配结果更精准
|
||
VECTOR_SEARCH_SCORE_THRESHOLD = 0
|
||
|
||
NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")
|
||
|
||
FLAG_USER_NAME = uuid.uuid4().hex
|
||
|
||
logger.info(f"""
|
||
loading model config
|
||
llm device: {LLM_DEVICE}
|
||
embedding device: {EMBEDDING_DEVICE}
|
||
dir: {os.path.dirname(os.path.dirname(__file__))}
|
||
flagging username: {FLAG_USER_NAME}
|
||
""")
|
||
|
||
# 是否开启跨域,默认为False,如果需要开启,请设置为True
|
||
# is open cross domain
|
||
OPEN_CROSS_DOMAIN = False
|
||
|
||
# Bing 搜索必备变量
|
||
# 使用 Bing 搜索需要使用 Bing Subscription Key
|
||
# 具体申请方式请见 https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
|
||
BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
|
||
BING_SUBSCRIPTION_KEY = "" |