fix int4 and int8 model init bug
This commit is contained in:
parent
eeea00e294
commit
52c6f2d29f
|
|
@ -65,8 +65,18 @@ llm_model_dict = {
|
|||
}
|
||||
}
|
||||
|
||||
# LLM model name
|
||||
# LLM 名称
|
||||
LLM_MODEL = "chatglm-6b"
|
||||
# 如果你需要加载本地的model,指定这个参数 ` --no-remote-model`,或者下方参数修改为 `True`
|
||||
NO_REMOTE_MODEL = False
|
||||
# 量化加载8bit 模型
|
||||
LOAD_IN_8BIT = False
|
||||
# Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
|
||||
BF16 = False
|
||||
# 本地模型存放的位置
|
||||
MODEL_DIR = "model/"
|
||||
# 本地lora存放的位置
|
||||
LORA_DIR = "loras/"
|
||||
|
||||
# LLM lora path,默认为空,如果有请直接指定文件夹路径
|
||||
LLM_LORA_PATH = ""
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import argparse
|
||||
import os
|
||||
|
||||
from configs.model_config import *
|
||||
|
||||
|
||||
# Additional argparse types
|
||||
|
|
@ -32,28 +32,25 @@ def dir_path(string):
|
|||
|
||||
|
||||
parser = argparse.ArgumentParser(prog='langchina-ChatGLM',
|
||||
description='基于langchain和chatGML的LLM文档阅读器')
|
||||
description='About langchain-ChatGLM, local knowledge based ChatGLM with langchain | '
|
||||
'基于本地知识库的 ChatGLM 问答')
|
||||
|
||||
|
||||
|
||||
parser.add_argument('--no-remote-model', action='store_true', default=False, help='remote in the model on loader checkpoint, if your load local model to add the ` --no-remote-model`')
|
||||
parser.add_argument('--model', type=str, default='chatglm-6b', help='Name of the model to load by default.')
|
||||
parser.add_argument('--no-remote-model', action='store_true', default=NO_REMOTE_MODEL, help='remote in the model on '
|
||||
'loader checkpoint, '
|
||||
'if your load local '
|
||||
'model to add the ` '
|
||||
'--no-remote-model`')
|
||||
parser.add_argument('--model', type=str, default=LLM_MODEL, help='Name of the model to load by default.')
|
||||
parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.')
|
||||
parser.add_argument("--model-dir", type=str, default='model/', help="Path to directory with all the models")
|
||||
parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras")
|
||||
parser.add_argument("--model-dir", type=str, default=MODEL_DIR, help="Path to directory with all the models")
|
||||
parser.add_argument("--lora-dir", type=str, default=LORA_DIR, help="Path to directory with all the loras")
|
||||
|
||||
# Accelerate/transformers
|
||||
parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.')
|
||||
parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.')
|
||||
parser.add_argument('--gpu-memory', type=str, nargs="+", help='Maxmimum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs. You can also set values in MiB like --gpu-memory 3500MiB.')
|
||||
parser.add_argument('--cpu-memory', type=str, help='Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.')
|
||||
parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.')
|
||||
parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
|
||||
|
||||
parser.add_argument('--load-in-8bit', action='store_true', default=LOAD_IN_8BIT,
|
||||
help='Load the model with 8-bit precision.')
|
||||
parser.add_argument('--bf16', action='store_true', default=BF16,
|
||||
help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
|
||||
|
||||
args = parser.parse_args([])
|
||||
# Generares dict with a default value for each argument
|
||||
DEFAULT_ARGS = vars(args)
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue