增加cpu加载模型逻辑

This commit is contained in:
glide-the 2023-05-26 22:52:55 +08:00
parent 561c40afee
commit 3324c12d69
1 changed files with 6 additions and 5 deletions

View File

@ -130,11 +130,8 @@ class LoaderCheckPoint:
model = dispatch_model(model, device_map=self.device_map) model = dispatch_model(model, device_map=self.device_map)
else: else:
# print(
# "Warning: torch.cuda.is_available() returned False.\nThis means that no GPU has been "
# "detected.\nFalling back to CPU mode.\n")
model = ( model = (
AutoModel.from_pretrained( LoaderClass.from_pretrained(
checkpoint, checkpoint,
config=self.model_config, config=self.model_config,
trust_remote_code=True) trust_remote_code=True)
@ -202,7 +199,11 @@ class LoaderCheckPoint:
) from exc ) from exc
# Custom # Custom
else: else:
pass
print(
"Warning: self.llm_device is False.\nThis means that no use GPU bring to be load CPU mode\n")
params = {"low_cpu_mem_usage": True, "torch_dtype": torch.float32, "trust_remote_code": True}
model = LoaderClass.from_pretrained(checkpoint, **params).to(self.llm_device, dtype=float)
# Loading the tokenizer # Loading the tokenizer
if type(model) is transformers.LlamaForCausalLM: if type(model) is transformers.LlamaForCausalLM: