From f7e7d318d8dfac2bf46bd8db8471b9926385620f Mon Sep 17 00:00:00 2001 From: hzg0601 Date: Tue, 13 Jun 2023 23:30:10 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9loader.py=E4=B8=ADload=5Fin?= =?UTF-8?q?=5F8bit=E5=A4=B1=E8=B4=A5=E7=9A=84=E5=8E=9F=E5=9B=A0=E5=92=8C?= =?UTF-8?q?=E8=AF=A6=E7=BB=86=E8=A7=A3=E5=86=B3=E6=96=B9=E6=A1=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/loader/loader.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/models/loader/loader.py b/models/loader/loader.py index c68e71a..415048b 100644 --- a/models/loader/loader.py +++ b/models/loader/loader.py @@ -30,6 +30,15 @@ class LoaderCheckPoint: ptuning_dir: str = None use_ptuning_v2: bool = False # 如果开启了8bit量化加载,项目无法启动,参考此位置,选择合适的cuda版本,https://github.com/TimDettmers/bitsandbytes/issues/156 + # 原因主要是由于bitsandbytes安装时选择了系统环境变量里不匹配的cuda版本, + # 例如PATH下存在cuda10.2和cuda11.2,bitsandbytes安装时选择了10.2,而torch等安装依赖的版本是11.2 + # 因此主要的解决思路是清理环境变量里PATH下的不匹配的cuda版本,一劳永逸的方法是: + # 0. 在终端执行`pip uninstall bitsandbytes` + # 1. 删除.bashrc文件下关于PATH的条目 + # 2. 在终端执行 `echo $PATH >> .bashrc` + # 3. 在终端执行`source .bashrc` + # 4. 再执行`pip install bitsandbytes` + load_in_8bit: bool = False is_llamacpp: bool = False bf16: bool = False @@ -99,6 +108,8 @@ class LoaderCheckPoint: LoaderClass = AutoModelForCausalLM # Load the model in simple 16-bit mode by default + # 如果加载没问题,但在推理时报错RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)` + # 那还是因为显存不够,此时只能考虑--load-in-8bit,或者配置默认模型为`chatglm-6b-int8` if not any([self.llm_device.lower() == "cpu", self.load_in_8bit, self.is_llamacpp]):