fix readme
This commit is contained in:
parent
1b312d5715
commit
30b8daecb3
|
|
@ -69,7 +69,7 @@ docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/ch
|
|||
+ LLaMA-13B 最低显存要求: 11GB 推荐显卡: RTX 2060 12GB, RTX3060 12GB, RTX3080, RTXA2000
|
||||
+ Qwen-14B-Chat 最低显存要求: 13GB 推荐显卡: RTX 3090
|
||||
+ LLaMA-30B 最低显存要求: 22GB 推荐显卡:RTX A5000,RTX 3090,RTX 4090,RTX 6000,Tesla V100,RTX Tesla P40
|
||||
+ LLaMA-65B 最低显存要求: 22GB 推荐显卡:A100,A40,A6000
|
||||
+ LLaMA-65B 最低显存要求: 40GB 推荐显卡:A100,A40,A6000
|
||||
|
||||
如果是int8 则显存x1.5 fp16 x2.5的要求
|
||||
如:使用fp16 推理Qwen-7B-Chat 模型 则需要使用16GB显存。
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ If you want to run the native model (int4 version) on the GPU without problems,
|
|||
+ LLaMA-13B Minimum graphics memory requirement: 11GB Recommended cards: RTX 2060 12GB, RTX3060 12GB, RTX3080, RTXA2000
|
||||
+ Qwen-14B-Chat Minimum memory requirement: 13GB Recommended graphics card: RTX 3090
|
||||
+ LLaMA-30B Minimum Memory Requirement: 22GB Recommended Cards: RTX A5000,RTX 3090,RTX 4090,RTX 6000,Tesla V100,RTX Tesla P40
|
||||
+ Minimum memory requirement for LLaMA-65B: 22GB Recommended cards: A100,A40,A6000
|
||||
+ Minimum memory requirement for LLaMA-65B: 40GB Recommended cards: A100,A40,A6000
|
||||
|
||||
If int8 then memory x1.5 fp16 x2.5 requirement.
|
||||
For example: using fp16 to reason about the Qwen-7B-Chat model requires 16GB of video memory.
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ MODEL_PATH = {
|
|||
}
|
||||
|
||||
# 选用的 Embedding 名称
|
||||
EMBEDDING_MODEL = "piccolo-large-zh" # 最新的嵌入式sota模型
|
||||
EMBEDDING_MODEL = "m3e-base" # 可以尝试最新的嵌入式sota模型:piccolo-large-zh
|
||||
|
||||
# Embedding 模型运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
||||
EMBEDDING_DEVICE = "auto"
|
||||
|
|
|
|||
Loading…
Reference in New Issue