增加cpu加载模型逻辑

2023-05-26 22:52:55 +08:00 · 2023-05-26 22:52:55 +08:00 · 3324c12d69
parent 561c40afee
commit 3324c12d69
1 changed files with 6 additions and 5 deletions
--- a/models/loader/loader.py
+++ b/models/loader/loader.py
@ -130,11 +130,8 @@ class LoaderCheckPoint:

                    model = dispatch_model(model, device_map=self.device_map)
            else:
-                # print(
-                #     "Warning: torch.cuda.is_available() returned False.\nThis means that no GPU has been "
-                #     "detected.\nFalling back to CPU mode.\n")
                model = (
-                    AutoModel.from_pretrained(
+                    LoaderClass.from_pretrained(
                        checkpoint,
                        config=self.model_config,
                        trust_remote_code=True)
@ -202,7 +199,11 @@ class LoaderCheckPoint:
                ) from exc
        # Custom
        else:
-            pass
+
+            print(
+                "Warning: self.llm_device is False.\nThis means that no use GPU  bring to be load CPU mode\n")
+            params = {"low_cpu_mem_usage": True, "torch_dtype": torch.float32, "trust_remote_code": True}
+            model = LoaderClass.from_pretrained(checkpoint, **params).to(self.llm_device, dtype=float)

        # Loading the tokenizer
        if type(model) is transformers.LlamaForCausalLM: