拓展功能检查依赖提示安装
This commit is contained in:
parent
1c5f71beee
commit
994401670b
|
|
@ -4,18 +4,11 @@ import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from peft import PeftModel
|
|
||||||
from typing import Optional, List, Dict, Tuple, Union
|
from typing import Optional, List, Dict, Tuple, Union
|
||||||
import torch
|
import torch
|
||||||
import transformers
|
import transformers
|
||||||
|
|
||||||
from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
|
from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
|
||||||
AutoTokenizer, BitsAndBytesConfig, LlamaTokenizer)
|
AutoTokenizer, LlamaTokenizer)
|
||||||
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
|
||||||
from transformers.modeling_utils import no_init_weights
|
|
||||||
from transformers.utils import ContextManagers
|
|
||||||
from accelerate import init_empty_weights
|
|
||||||
from accelerate.utils import get_balanced_memory, infer_auto_device_map
|
|
||||||
from configs.model_config import LLM_DEVICE
|
from configs.model_config import LLM_DEVICE
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -104,7 +97,7 @@ class LoaderCheckPoint:
|
||||||
LoaderClass = AutoModelForCausalLM
|
LoaderClass = AutoModelForCausalLM
|
||||||
|
|
||||||
# Load the model in simple 16-bit mode by default
|
# Load the model in simple 16-bit mode by default
|
||||||
if not any([self.llm_device.lower()=="cpu",
|
if not any([self.llm_device.lower() == "cpu",
|
||||||
self.load_in_8bit, self.is_llamacpp]):
|
self.load_in_8bit, self.is_llamacpp]):
|
||||||
|
|
||||||
if torch.cuda.is_available() and self.llm_device.lower().startswith("cuda"):
|
if torch.cuda.is_available() and self.llm_device.lower().startswith("cuda"):
|
||||||
|
|
@ -150,16 +143,35 @@ class LoaderCheckPoint:
|
||||||
)
|
)
|
||||||
|
|
||||||
elif self.is_llamacpp:
|
elif self.is_llamacpp:
|
||||||
|
|
||||||
|
try:
|
||||||
from models.extensions.llamacpp_model_alternative import LlamaCppModel
|
from models.extensions.llamacpp_model_alternative import LlamaCppModel
|
||||||
|
|
||||||
|
except ImportError as exc:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not import depend python package "
|
||||||
|
"Please install it with `pip install llama-cpp-python`."
|
||||||
|
) from exc
|
||||||
|
|
||||||
model_file = list(checkpoint.glob('ggml*.bin'))[0]
|
model_file = list(checkpoint.glob('ggml*.bin'))[0]
|
||||||
print(f"llama.cpp weights detected: {model_file}\n")
|
print(f"llama.cpp weights detected: {model_file}\n")
|
||||||
|
|
||||||
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
# Custom
|
elif self.load_in_8bit:
|
||||||
else:
|
try:
|
||||||
|
from accelerate import init_empty_weights
|
||||||
|
from accelerate.utils import get_balanced_memory, infer_auto_device_map
|
||||||
|
from transformers import BitsAndBytesConfig
|
||||||
|
|
||||||
|
except ImportError as exc:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not import depend python package "
|
||||||
|
"Please install it with `pip install transformers` "
|
||||||
|
"`pip install bitsandbytes``pip install accelerate`."
|
||||||
|
) from exc
|
||||||
|
|
||||||
params = {"low_cpu_mem_usage": True}
|
params = {"low_cpu_mem_usage": True}
|
||||||
|
|
||||||
if not self.llm_device.lower().startswith("cuda"):
|
if not self.llm_device.lower().startswith("cuda"):
|
||||||
|
|
@ -167,18 +179,11 @@ class LoaderCheckPoint:
|
||||||
else:
|
else:
|
||||||
params["device_map"] = 'auto'
|
params["device_map"] = 'auto'
|
||||||
params["trust_remote_code"] = True
|
params["trust_remote_code"] = True
|
||||||
if self.load_in_8bit:
|
|
||||||
params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True,
|
params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True,
|
||||||
llm_int8_enable_fp32_cpu_offload=False)
|
llm_int8_enable_fp32_cpu_offload=False)
|
||||||
elif self.bf16:
|
|
||||||
params["torch_dtype"] = torch.bfloat16
|
|
||||||
else:
|
|
||||||
params["torch_dtype"] = torch.float16
|
|
||||||
|
|
||||||
if self.load_in_8bit and params.get('max_memory', None) is not None and params['device_map'] == 'auto':
|
|
||||||
config = AutoConfig.from_pretrained(checkpoint)
|
|
||||||
with init_empty_weights():
|
with init_empty_weights():
|
||||||
model = LoaderClass.from_config(config)
|
model = LoaderClass.from_config(self.model_config,trust_remote_code = True)
|
||||||
model.tie_weights()
|
model.tie_weights()
|
||||||
if self.device_map is not None:
|
if self.device_map is not None:
|
||||||
params['device_map'] = self.device_map
|
params['device_map'] = self.device_map
|
||||||
|
|
@ -186,11 +191,18 @@ class LoaderCheckPoint:
|
||||||
params['device_map'] = infer_auto_device_map(
|
params['device_map'] = infer_auto_device_map(
|
||||||
model,
|
model,
|
||||||
dtype=torch.int8,
|
dtype=torch.int8,
|
||||||
max_memory=params['max_memory'],
|
|
||||||
no_split_module_classes=model._no_split_modules
|
no_split_module_classes=model._no_split_modules
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
|
||||||
model = LoaderClass.from_pretrained(checkpoint, **params)
|
model = LoaderClass.from_pretrained(checkpoint, **params)
|
||||||
|
except ImportError as exc:
|
||||||
|
raise ValueError(
|
||||||
|
"如果开启了8bit量化加载,项目无法启动,参考此位置,选择合适的cuda版本,https://github.com/TimDettmers/bitsandbytes/issues/156"
|
||||||
|
) from exc
|
||||||
|
# Custom
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
# Loading the tokenizer
|
# Loading the tokenizer
|
||||||
if type(model) is transformers.LlamaForCausalLM:
|
if type(model) is transformers.LlamaForCausalLM:
|
||||||
|
|
@ -247,6 +259,20 @@ class LoaderCheckPoint:
|
||||||
return device_map
|
return device_map
|
||||||
|
|
||||||
def moss_auto_configure_device_map(self, num_gpus: int, model_name) -> Dict[str, int]:
|
def moss_auto_configure_device_map(self, num_gpus: int, model_name) -> Dict[str, int]:
|
||||||
|
try:
|
||||||
|
|
||||||
|
from accelerate import init_empty_weights
|
||||||
|
from accelerate.utils import get_balanced_memory, infer_auto_device_map
|
||||||
|
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
||||||
|
from transformers.modeling_utils import no_init_weights
|
||||||
|
from transformers.utils import ContextManagers
|
||||||
|
except ImportError as exc:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not import depend python package "
|
||||||
|
"Please install it with `pip install transformers` "
|
||||||
|
"`pip install bitsandbytes``pip install accelerate`."
|
||||||
|
) from exc
|
||||||
|
|
||||||
checkpoint = Path(f'{self.model_dir}/{model_name}')
|
checkpoint = Path(f'{self.model_dir}/{model_name}')
|
||||||
|
|
||||||
if self.model_path:
|
if self.model_path:
|
||||||
|
|
@ -271,6 +297,16 @@ class LoaderCheckPoint:
|
||||||
return device_map
|
return device_map
|
||||||
|
|
||||||
def _add_lora_to_model(self, lora_names):
|
def _add_lora_to_model(self, lora_names):
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
from peft import PeftModel
|
||||||
|
|
||||||
|
except ImportError as exc:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not import depend python package. "
|
||||||
|
"Please install it with `pip install peft``pip install accelerate`."
|
||||||
|
) from exc
|
||||||
# 目前加载的lora
|
# 目前加载的lora
|
||||||
prior_set = set(self.lora_names)
|
prior_set = set(self.lora_names)
|
||||||
# 需要加载的
|
# 需要加载的
|
||||||
|
|
|
||||||
|
|
@ -11,16 +11,16 @@ beautifulsoup4
|
||||||
icetk
|
icetk
|
||||||
cpm_kernels
|
cpm_kernels
|
||||||
faiss-cpu
|
faiss-cpu
|
||||||
accelerate~=0.18.0
|
|
||||||
gradio==3.28.3
|
gradio==3.28.3
|
||||||
fastapi~=0.95.0
|
fastapi~=0.95.0
|
||||||
uvicorn~=0.21.1
|
uvicorn~=0.21.1
|
||||||
peft~=0.3.0
|
|
||||||
pypinyin~=0.48.0
|
pypinyin~=0.48.0
|
||||||
click~=8.1.3
|
click~=8.1.3
|
||||||
tabulate
|
tabulate
|
||||||
azure-core
|
azure-core
|
||||||
bitsandbytes; platform_system != "Windows"
|
#accelerate~=0.18.0
|
||||||
|
#peft~=0.3.0
|
||||||
|
#bitsandbytes; platform_system != "Windows"
|
||||||
#llama-cpp-python==0.1.34; platform_system != "Windows"
|
#llama-cpp-python==0.1.34; platform_system != "Windows"
|
||||||
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue