parent
33bbb4779e
commit
4c008c25d4
49
README.md
49
README.md
|
|
@ -18,6 +18,53 @@
|
|||
|
||||

|
||||
|
||||
项目目录结构
|
||||
```text
|
||||
.
|
||||
├── agent
|
||||
│ └── agent实现
|
||||
├── chains
|
||||
│ ├── modules
|
||||
│ └── chains实现
|
||||
├── configs
|
||||
│ └── 系统初始化配置
|
||||
├── content
|
||||
│ └── 临时附件上传位置
|
||||
├── docs
|
||||
│ └── 项目文档
|
||||
├── fastchat
|
||||
│ ├── api
|
||||
│ └── 一个fastchat langchain的LLM远程调用拓展
|
||||
├── img
|
||||
│ └── 项目资源目录
|
||||
├── loras
|
||||
│ └── 默认的本地lora文件存放路径
|
||||
├── model
|
||||
│ └── 默认的本地checkpoint存放路径
|
||||
├── models
|
||||
│ ├── extensions
|
||||
│ │ └── LLM内部拓展包
|
||||
│ ├── loader
|
||||
│ │ └── 项目checkpoint加载器,支持chatglm(AutoModel)、量化模型(llama.cpp)、其它模型(AutoModelForCausalLM),兼容lora、ptuning_v2微调文件加载
|
||||
│ └── 实现chatglm、LLama等一些模型的langchain LLM wrapper integrations
|
||||
├── nltk_data
|
||||
│ ├── corpora
|
||||
│ │ └── cmudict
|
||||
│ ├── taggers
|
||||
│ │ └── averaged_perceptron_tagger
|
||||
│ └── tokenizers
|
||||
│ └── punkt
|
||||
├── ptuning-v2
|
||||
├── textsplitter
|
||||
│ └── 中文语义分割
|
||||
├── utils
|
||||
│ └── 系统工具
|
||||
└── vector_store
|
||||
└── faiss本地矢量库索引文件
|
||||
|
||||
|
||||
```
|
||||
|
||||
🚩 本项目未涉及微调、训练过程,但可利用微调或训练对本项目效果进行优化。
|
||||
|
||||
🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/langchain-ChatGLM)
|
||||
|
|
@ -33,7 +80,7 @@
|
|||
- ChatGLM-6B 模型硬件需求
|
||||
|
||||
注:如未将模型下载至本地,请执行前检查`$HOME/.cache/huggingface/`文件夹剩余空间,模型文件下载至本地需要 15 GB 存储空间。
|
||||
|
||||
注:一些其它的可选启动项见[项目启动选项](docs/StartOption.md)
|
||||
模型下载方法可参考 [常见问题](docs/FAQ.md) 中 Q8。
|
||||
|
||||
| **量化等级** | **最低 GPU 显存**(推理) | **最低 GPU 显存**(高效参数微调) |
|
||||
|
|
|
|||
33
api.py
33
api.py
|
|
@ -19,6 +19,9 @@ from configs.model_config import (VS_ROOT_PATH, UPLOAD_ROOT_PATH, EMBEDDING_DEVI
|
|||
EMBEDDING_MODEL, LLM_MODEL, NLTK_DATA_PATH,
|
||||
VECTOR_SEARCH_TOP_K, LLM_HISTORY_LEN, OPEN_CROSS_DOMAIN)
|
||||
from agent import bing_search as agent_bing_search
|
||||
import models.shared as shared
|
||||
from models.loader.args import parser
|
||||
from models.loader import LoaderCheckPoint
|
||||
|
||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||
|
||||
|
|
@ -173,8 +176,8 @@ async def list_docs(
|
|||
|
||||
async def delete_docs(
|
||||
knowledge_base_id: str = Query(...,
|
||||
description="Knowledge Base Name(注意此方法仅删除上传的文件并不会删除知识库(FAISS)内数据)",
|
||||
example="kb1"),
|
||||
description="Knowledge Base Name(注意此方法仅删除上传的文件并不会删除知识库(FAISS)内数据)",
|
||||
example="kb1"),
|
||||
doc_name: Optional[str] = Query(
|
||||
None, description="doc name", example="doc_name_1.pdf"
|
||||
),
|
||||
|
|
@ -258,9 +261,12 @@ async def chat(
|
|||
],
|
||||
),
|
||||
):
|
||||
for resp, history in local_doc_qa.llm._call(
|
||||
prompt=question, history=history, streaming=True
|
||||
):
|
||||
|
||||
for answer_result in local_doc_qa.llm.generatorAnswer(prompt=question, history=history,
|
||||
streaming=True):
|
||||
|
||||
resp = answer_result.llm_output["answer"]
|
||||
history = answer_result.history
|
||||
pass
|
||||
|
||||
return ChatMessage(
|
||||
|
|
@ -312,6 +318,7 @@ async def stream_chat(websocket: WebSocket, knowledge_base_id: str):
|
|||
)
|
||||
turn += 1
|
||||
|
||||
|
||||
async def document():
|
||||
return RedirectResponse(url="/docs")
|
||||
|
||||
|
|
@ -333,10 +340,14 @@ async def bing_search(
|
|||
source_documents=[],
|
||||
)
|
||||
|
||||
|
||||
def api_start(host, port):
|
||||
global app
|
||||
global local_doc_qa
|
||||
|
||||
llm_model_ins = shared.loaderLLM()
|
||||
llm_model_ins.set_history_len(LLM_HISTORY_LEN)
|
||||
|
||||
app = FastAPI()
|
||||
# Add CORS middleware to allow all origins
|
||||
# 在config.py中设置OPEN_DOMAIN=True,允许跨域
|
||||
|
|
@ -365,18 +376,22 @@ def api_start(host, port):
|
|||
|
||||
local_doc_qa = LocalDocQA()
|
||||
local_doc_qa.init_cfg(
|
||||
llm_model=LLM_MODEL,
|
||||
llm_model=llm_model_ins,
|
||||
embedding_model=EMBEDDING_MODEL,
|
||||
embedding_device=EMBEDDING_DEVICE,
|
||||
llm_history_len=LLM_HISTORY_LEN,
|
||||
top_k=VECTOR_SEARCH_TOP_K,
|
||||
)
|
||||
uvicorn.run(app, host=host, port=port)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
parser.add_argument("--port", type=int, default=7861)
|
||||
args = parser.parse_args()
|
||||
# 初始化消息
|
||||
args = None
|
||||
args = parser.parse_args(args=['--model-dir', '/media/checkpoint/', '--model', 'chatglm-6b', '--no-remote-model'])
|
||||
args_dict = vars(args)
|
||||
shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
|
||||
api_start(args.host, args.port)
|
||||
|
|
|
|||
14
cli_demo.py
14
cli_demo.py
|
|
@ -2,7 +2,9 @@ from configs.model_config import *
|
|||
from chains.local_doc_qa import LocalDocQA
|
||||
import os
|
||||
import nltk
|
||||
|
||||
from models.loader.args import parser
|
||||
import models.shared as shared
|
||||
from models.loader import LoaderCheckPoint
|
||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||
|
||||
# Show reply with source text from input document
|
||||
|
|
@ -10,11 +12,17 @@ REPLY_WITH_SOURCE = True
|
|||
|
||||
|
||||
def main():
|
||||
args = None
|
||||
args = parser.parse_args()
|
||||
args_dict = vars(args)
|
||||
shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
|
||||
llm_model_ins = shared.loaderLLM()
|
||||
llm_model_ins.history_len = LLM_HISTORY_LEN
|
||||
|
||||
local_doc_qa = LocalDocQA()
|
||||
local_doc_qa.init_cfg(llm_model=LLM_MODEL,
|
||||
local_doc_qa.init_cfg(llm_model=llm_model_ins,
|
||||
embedding_model=EMBEDDING_MODEL,
|
||||
embedding_device=EMBEDDING_DEVICE,
|
||||
llm_history_len=LLM_HISTORY_LEN,
|
||||
top_k=VECTOR_SEARCH_TOP_K)
|
||||
vs_path = None
|
||||
while not vs_path:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
|
||||
#### 项目启动选项
|
||||
```test
|
||||
usage: langchina-ChatGLM [-h] [--no-remote-model] [--model MODEL] [--lora LORA] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY]
|
||||
[--load-in-8bit] [--bf16]
|
||||
|
||||
基于langchain和chatGML的LLM文档阅读器
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--no-remote-model remote in the model on loader checkpoint, if your load local model to add the ` --no-remote-model`
|
||||
--model MODEL Name of the model to load by default.
|
||||
--lora LORA Name of the LoRA to apply to the model by default.
|
||||
--model-dir MODEL_DIR
|
||||
Path to directory with all the models
|
||||
--lora-dir LORA_DIR Path to directory with all the loras
|
||||
--cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow.
|
||||
--auto-devices Automatically split the model across the available GPU(s) and CPU.
|
||||
--gpu-memory GPU_MEMORY [GPU_MEMORY ...]
|
||||
Maxmimum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs. You can also set values in MiB like --gpu-memory 3500MiB.
|
||||
--cpu-memory CPU_MEMORY
|
||||
Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.
|
||||
--load-in-8bit Load the model with 8-bit precision.
|
||||
--bf16 Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
|
||||
|
||||
```
|
||||
|
||||
#### 示例
|
||||
|
||||
- 1、加载本地模型
|
||||
|
||||
```text
|
||||
--model-dir 本地checkpoint存放文件夹
|
||||
--model 模型名称
|
||||
--no-remote-model 不从远程加载模型
|
||||
```
|
||||
```shell
|
||||
$ python cli_demo.py --model-dir /media/mnt/ --model chatglm-6b --no-remote-model
|
||||
```
|
||||
|
||||
- 2、低精度加载模型
|
||||
```text
|
||||
--model-dir 本地checkpoint存放文件夹
|
||||
--model 模型名称
|
||||
--no-remote-model 不从远程加载模型
|
||||
--load-in-8bit 以8位精度加载模型
|
||||
```
|
||||
```shell
|
||||
$ python cli_demo.py --model-dir /media/mnt/ --model chatglm-6b --no-remote-model --load-in-8bit
|
||||
```
|
||||
|
||||
|
||||
- 3、使用cpu预测模型
|
||||
```text
|
||||
--model-dir 本地checkpoint存放文件夹
|
||||
--model 模型名称
|
||||
--no-remote-model 不从远程加载模型
|
||||
--cpu 使用CPU生成文本。警告:CPU上的训练非常缓慢。
|
||||
```
|
||||
```shell
|
||||
$ python cli_demo.py --model-dir /media/mnt/ --model chatglm-6b --no-remote-model --cpu
|
||||
```
|
||||
|
||||
|
||||
|
||||
- 3、加载lora微调文件
|
||||
```text
|
||||
--model-dir 本地checkpoint存放文件夹
|
||||
--model 模型名称
|
||||
--no-remote-model 不从远程加载模型
|
||||
--lora-dir 本地lora存放文件夹
|
||||
--lora lora名称
|
||||
```
|
||||
```shell
|
||||
$ python cli_demo.py --model-dir /media/mnt/ --model chatglm-6b --no-remote-model --lora-dir /media/mnt/loras --lora chatglm-step100
|
||||
```
|
||||
Loading…
Reference in New Issue