1.使api.py里可以使用args.py的参数;2.兼容args.py和model_config.py的控制方式;3.更新fastchat调用模型名的说明

2023-07-26 17:46:02 +08:00 · 2023-07-26 17:46:02 +08:00 · e8ff31be1f
parent 58a5de92a5
commit e8ff31be1f
2 changed files with 12 additions and 6 deletions
--- a/configs/model_config.py
+++ b/configs/model_config.py
@ -32,7 +32,7 @@ EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backe
 # llm_model_dict 处理了loader的一些预设行为，如加载位置，模型名称，模型处理器实例
 # 在以下字典中修改属性值，以指定本地 LLM 模型存储位置
 # 如将 "chatglm-6b" 的 "local_model_path" 由 None 修改为 "User/Downloads/chatglm-6b"
-# 此处请写绝对路径,且路径中必须包含模型名称，FastChat会根据路径名称提取repo-id
+# 此处请写绝对路径
 llm_model_dict = {
    "chatglm-6b-int4-qe": {
        "name": "chatglm-6b-int4-qe",
@ -104,15 +104,20 @@ LOAD_IN_8BIT = False
 # Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
 BF16 = False
 # 本地lora存放的位置
-LORA_DIR = "./loras/"
+LORA_DIR = "loras/"
+
+# LORA的名称，如有请指定为列表
+
+LORA_NAME = ""
+USE_LORA = True if LORA_NAME else False

 # LLM streaming reponse
 STREAMING = True

 # Use p-tuning-v2 PrefixEncoder
+USE_PTUNING_V2 = False

-PTUNING_DIR="./ptuning-v2"
-
+PTUNING_DIR = "./ptuning-v2"
 # LLM running device
 LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

--- a/models/loader/args.py
+++ b/models/loader/args.py
@ -42,9 +42,10 @@ parser.add_argument('--no-remote-model', action='store_true', help='remote in th
                                                                   'model to add the ` '
                                                                   '--no-remote-model`')
 parser.add_argument('--model-name', type=str, default=LLM_MODEL, help='Name of the model to load by default.')
-parser.add_argument('--lora', type=str, action="store_true",help='Name of the LoRA to apply to the model by default.')
+parser.add_argument("--use-lora",type=bool,default=USE_LORA,help="use lora or not")
+parser.add_argument('--lora', type=str, default=LORA_NAME,help='Name of the LoRA to apply to the model by default.')
 parser.add_argument("--lora-dir", type=str, default=LORA_DIR, help="Path to directory with all the loras")
-parser.add_argument('--use-ptuning-v2',action='store_true',help="whether use ptuning-v2 checkpoint")
+parser.add_argument('--use-ptuning-v2',action=USE_PTUNING_V2,help="whether use ptuning-v2 checkpoint")
 parser.add_argument("--ptuning-dir",type=str,default=PTUNING_DIR,help="the dir of ptuning-v2 checkpoint")
 # Accelerate/transformers
 parser.add_argument('--load-in-8bit', action='store_true', default=LOAD_IN_8BIT,