From 64f22a9e53354cc59d95e2767dfecb3f8ed68c5f Mon Sep 17 00:00:00 2001 From: Zhi-guo Huang Date: Tue, 11 Jul 2023 20:24:49 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=BB=98=E8=AE=A4=E7=9A=84?= =?UTF-8?q?=E5=A4=9A=E5=8D=A1=E9=83=A8=E7=BD=B2=E6=96=B9=E6=A1=88=EF=BC=8C?= =?UTF-8?q?=E5=9F=BA=E6=9C=AC=E4=BF=9D=E8=AF=81=E9=92=88=E5=AF=B9=E6=96=B0?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E4=B9=9F=E4=B8=8D=E4=BC=9A=E5=A4=B1=E8=B4=A5?= =?UTF-8?q?=20(#788)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 修复 bing_search.py的typo;更新model_config.py中Bing Subscription Key申请方式及注意事项 * 更新FAQ,增加了[Errno 110] Connection timed out的原因与解决方案 * 修改loader.py中load_in_8bit失败的原因和详细解决方案 * update loader.py * stream_chat_bing * 修改stream_chat的接口,在请求体中选择knowledge_base_id;增加stream_chat_bing接口 * 优化cli_demo.py的逻辑:支持 输入提示;多输入;重新输入 * update cli_demo.py * 按照review建议进行修改 * 修改默认的多卡部署方案,基本保证针对新模型也不会失败 --------- Co-authored-by: imClumsyPanda --- chains/local_doc_qa.py | 1 + models/loader/loader.py | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/chains/local_doc_qa.py b/chains/local_doc_qa.py index fe70066..9ec3db5 100644 --- a/chains/local_doc_qa.py +++ b/chains/local_doc_qa.py @@ -200,6 +200,7 @@ class LocalDocQA: return vs_path, loaded_files else: logger.info("文件均未成功加载,请检查依赖包或替换为其他文件再次上传。") + return None, loaded_files def one_knowledge_add(self, vs_path, one_title, one_conent, one_content_segmentation, sentence_size): diff --git a/models/loader/loader.py b/models/loader/loader.py index cc74073..0d7b8f3 100644 --- a/models/loader/loader.py +++ b/models/loader/loader.py @@ -151,13 +151,24 @@ class LoaderCheckPoint: elif 'moss' in model_name.lower(): self.device_map = self.moss_auto_configure_device_map(num_gpus, model_name) else: + # 基于如下方式作为默认的多卡加载方案针对新模型基本不会失败 + # 在chatglm2-6b,bloom-3b,blooz-7b1上进行了测试,GPU负载也相对均衡 + from accelerate.utils import get_balanced_memory + max_memory = get_balanced_memory(model, + dtype=torch.int8 if self.load_in_8bit else None, + low_zero=False, + no_split_module_classes=model._no_split_modules) + self.device_map = infer_auto_device_map(model, + dtype=torch.float16 if not self.load_in_8bit else torch.int8, + max_memory=max_memory, + no_split_module_classes=model._no_split_modules) # 对于chaglm和moss意外的模型应使用自动指定,而非调用chatglm的配置方式 # 其他模型定义的层类几乎不可能与chatglm和moss一致,使用chatglm_auto_configure_device_map # 百分百会报错,使用infer_auto_device_map虽然可能导致负载不均衡,但至少不会报错 # 实测在bloom模型上如此 - self.device_map = infer_auto_device_map(model, - dtype=torch.int8, - no_split_module_classes=model._no_split_modules) +# self.device_map = infer_auto_device_map(model, +# dtype=torch.int8, +# no_split_module_classes=model._no_split_modules) model = dispatch_model(model, device_map=self.device_map) else: