diff --git a/configs/model_config.py b/configs/model_config.py index db52851..21b1980 100644 --- a/configs/model_config.py +++ b/configs/model_config.py @@ -87,6 +87,12 @@ llm_model_dict = { "local_model_path": None, "provides": "MOSSLLM" }, + "moss-int4": { + "name": "moss", + "pretrained_model_name": "fnlp/moss-moon-003-sft-int4", + "local_model_path": None, + "provides": "MOSSLLM" + }, "vicuna-13b-hf": { "name": "vicuna-13b-hf", "pretrained_model_name": "vicuna-13b-hf", @@ -197,6 +203,7 @@ STREAMING = True # Use p-tuning-v2 PrefixEncoder USE_PTUNING_V2 = False +PTUNING_DIR='./ptuing-v2' # LLM running device LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" diff --git a/models/loader/args.py b/models/loader/args.py index b15ad5e..92105e2 100644 --- a/models/loader/args.py +++ b/models/loader/args.py @@ -43,6 +43,8 @@ parser.add_argument('--no-remote-model', action='store_true', help='remote in th parser.add_argument('--model-name', type=str, default=LLM_MODEL, help='Name of the model to load by default.') parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.') parser.add_argument("--lora-dir", type=str, default=LORA_DIR, help="Path to directory with all the loras") +parser.add_argument('--use-ptuning-v2',type=str,default=False,help="whether use ptuning-v2 checkpoint") +parser.add_argument("--ptuning-dir",type=str,default=PTUNING_DIR,help="the dir of ptuning-v2 checkpoint") # Accelerate/transformers parser.add_argument('--load-in-8bit', action='store_true', default=LOAD_IN_8BIT,