拓展功能检查依赖提示安装
This commit is contained in:
parent
1c5f71beee
commit
994401670b
|
|
@ -4,18 +4,11 @@ import os
|
|||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from peft import PeftModel
|
||||
from typing import Optional, List, Dict, Tuple, Union
|
||||
import torch
|
||||
import transformers
|
||||
|
||||
from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
|
||||
AutoTokenizer, BitsAndBytesConfig, LlamaTokenizer)
|
||||
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
||||
from transformers.modeling_utils import no_init_weights
|
||||
from transformers.utils import ContextManagers
|
||||
from accelerate import init_empty_weights
|
||||
from accelerate.utils import get_balanced_memory, infer_auto_device_map
|
||||
AutoTokenizer, LlamaTokenizer)
|
||||
from configs.model_config import LLM_DEVICE
|
||||
|
||||
|
||||
|
|
@ -150,16 +143,35 @@ class LoaderCheckPoint:
|
|||
)
|
||||
|
||||
elif self.is_llamacpp:
|
||||
|
||||
try:
|
||||
from models.extensions.llamacpp_model_alternative import LlamaCppModel
|
||||
|
||||
except ImportError as exc:
|
||||
raise ValueError(
|
||||
"Could not import depend python package "
|
||||
"Please install it with `pip install llama-cpp-python`."
|
||||
) from exc
|
||||
|
||||
model_file = list(checkpoint.glob('ggml*.bin'))[0]
|
||||
print(f"llama.cpp weights detected: {model_file}\n")
|
||||
|
||||
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
||||
return model, tokenizer
|
||||
|
||||
# Custom
|
||||
else:
|
||||
elif self.load_in_8bit:
|
||||
try:
|
||||
from accelerate import init_empty_weights
|
||||
from accelerate.utils import get_balanced_memory, infer_auto_device_map
|
||||
from transformers import BitsAndBytesConfig
|
||||
|
||||
except ImportError as exc:
|
||||
raise ValueError(
|
||||
"Could not import depend python package "
|
||||
"Please install it with `pip install transformers` "
|
||||
"`pip install bitsandbytes``pip install accelerate`."
|
||||
) from exc
|
||||
|
||||
params = {"low_cpu_mem_usage": True}
|
||||
|
||||
if not self.llm_device.lower().startswith("cuda"):
|
||||
|
|
@ -167,18 +179,11 @@ class LoaderCheckPoint:
|
|||
else:
|
||||
params["device_map"] = 'auto'
|
||||
params["trust_remote_code"] = True
|
||||
if self.load_in_8bit:
|
||||
params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True,
|
||||
llm_int8_enable_fp32_cpu_offload=False)
|
||||
elif self.bf16:
|
||||
params["torch_dtype"] = torch.bfloat16
|
||||
else:
|
||||
params["torch_dtype"] = torch.float16
|
||||
|
||||
if self.load_in_8bit and params.get('max_memory', None) is not None and params['device_map'] == 'auto':
|
||||
config = AutoConfig.from_pretrained(checkpoint)
|
||||
with init_empty_weights():
|
||||
model = LoaderClass.from_config(config)
|
||||
model = LoaderClass.from_config(self.model_config,trust_remote_code = True)
|
||||
model.tie_weights()
|
||||
if self.device_map is not None:
|
||||
params['device_map'] = self.device_map
|
||||
|
|
@ -186,11 +191,18 @@ class LoaderCheckPoint:
|
|||
params['device_map'] = infer_auto_device_map(
|
||||
model,
|
||||
dtype=torch.int8,
|
||||
max_memory=params['max_memory'],
|
||||
no_split_module_classes=model._no_split_modules
|
||||
)
|
||||
try:
|
||||
|
||||
model = LoaderClass.from_pretrained(checkpoint, **params)
|
||||
except ImportError as exc:
|
||||
raise ValueError(
|
||||
"如果开启了8bit量化加载,项目无法启动,参考此位置,选择合适的cuda版本,https://github.com/TimDettmers/bitsandbytes/issues/156"
|
||||
) from exc
|
||||
# Custom
|
||||
else:
|
||||
pass
|
||||
|
||||
# Loading the tokenizer
|
||||
if type(model) is transformers.LlamaForCausalLM:
|
||||
|
|
@ -247,6 +259,20 @@ class LoaderCheckPoint:
|
|||
return device_map
|
||||
|
||||
def moss_auto_configure_device_map(self, num_gpus: int, model_name) -> Dict[str, int]:
|
||||
try:
|
||||
|
||||
from accelerate import init_empty_weights
|
||||
from accelerate.utils import get_balanced_memory, infer_auto_device_map
|
||||
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
||||
from transformers.modeling_utils import no_init_weights
|
||||
from transformers.utils import ContextManagers
|
||||
except ImportError as exc:
|
||||
raise ValueError(
|
||||
"Could not import depend python package "
|
||||
"Please install it with `pip install transformers` "
|
||||
"`pip install bitsandbytes``pip install accelerate`."
|
||||
) from exc
|
||||
|
||||
checkpoint = Path(f'{self.model_dir}/{model_name}')
|
||||
|
||||
if self.model_path:
|
||||
|
|
@ -271,6 +297,16 @@ class LoaderCheckPoint:
|
|||
return device_map
|
||||
|
||||
def _add_lora_to_model(self, lora_names):
|
||||
|
||||
try:
|
||||
|
||||
from peft import PeftModel
|
||||
|
||||
except ImportError as exc:
|
||||
raise ValueError(
|
||||
"Could not import depend python package. "
|
||||
"Please install it with `pip install peft``pip install accelerate`."
|
||||
) from exc
|
||||
# 目前加载的lora
|
||||
prior_set = set(self.lora_names)
|
||||
# 需要加载的
|
||||
|
|
|
|||
|
|
@ -11,16 +11,16 @@ beautifulsoup4
|
|||
icetk
|
||||
cpm_kernels
|
||||
faiss-cpu
|
||||
accelerate~=0.18.0
|
||||
gradio==3.28.3
|
||||
fastapi~=0.95.0
|
||||
uvicorn~=0.21.1
|
||||
peft~=0.3.0
|
||||
pypinyin~=0.48.0
|
||||
click~=8.1.3
|
||||
tabulate
|
||||
azure-core
|
||||
bitsandbytes; platform_system != "Windows"
|
||||
#accelerate~=0.18.0
|
||||
#peft~=0.3.0
|
||||
#bitsandbytes; platform_system != "Windows"
|
||||
#llama-cpp-python==0.1.34; platform_system != "Windows"
|
||||
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue