parent
33bbb4779e
commit
4c008c25d4
49
README.md
49
README.md
|
|
@ -18,6 +18,53 @@
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
项目目录结构
|
||||||
|
```text
|
||||||
|
.
|
||||||
|
├── agent
|
||||||
|
│ └── agent实现
|
||||||
|
├── chains
|
||||||
|
│ ├── modules
|
||||||
|
│ └── chains实现
|
||||||
|
├── configs
|
||||||
|
│ └── 系统初始化配置
|
||||||
|
├── content
|
||||||
|
│ └── 临时附件上传位置
|
||||||
|
├── docs
|
||||||
|
│ └── 项目文档
|
||||||
|
├── fastchat
|
||||||
|
│ ├── api
|
||||||
|
│ └── 一个fastchat langchain的LLM远程调用拓展
|
||||||
|
├── img
|
||||||
|
│ └── 项目资源目录
|
||||||
|
├── loras
|
||||||
|
│ └── 默认的本地lora文件存放路径
|
||||||
|
├── model
|
||||||
|
│ └── 默认的本地checkpoint存放路径
|
||||||
|
├── models
|
||||||
|
│ ├── extensions
|
||||||
|
│ │ └── LLM内部拓展包
|
||||||
|
│ ├── loader
|
||||||
|
│ │ └── 项目checkpoint加载器,支持chatglm(AutoModel)、量化模型(llama.cpp)、其它模型(AutoModelForCausalLM),兼容lora、ptuning_v2微调文件加载
|
||||||
|
│ └── 实现chatglm、LLama等一些模型的langchain LLM wrapper integrations
|
||||||
|
├── nltk_data
|
||||||
|
│ ├── corpora
|
||||||
|
│ │ └── cmudict
|
||||||
|
│ ├── taggers
|
||||||
|
│ │ └── averaged_perceptron_tagger
|
||||||
|
│ └── tokenizers
|
||||||
|
│ └── punkt
|
||||||
|
├── ptuning-v2
|
||||||
|
├── textsplitter
|
||||||
|
│ └── 中文语义分割
|
||||||
|
├── utils
|
||||||
|
│ └── 系统工具
|
||||||
|
└── vector_store
|
||||||
|
└── faiss本地矢量库索引文件
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
🚩 本项目未涉及微调、训练过程,但可利用微调或训练对本项目效果进行优化。
|
🚩 本项目未涉及微调、训练过程,但可利用微调或训练对本项目效果进行优化。
|
||||||
|
|
||||||
🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/langchain-ChatGLM)
|
🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/langchain-ChatGLM)
|
||||||
|
|
@ -33,7 +80,7 @@
|
||||||
- ChatGLM-6B 模型硬件需求
|
- ChatGLM-6B 模型硬件需求
|
||||||
|
|
||||||
注:如未将模型下载至本地,请执行前检查`$HOME/.cache/huggingface/`文件夹剩余空间,模型文件下载至本地需要 15 GB 存储空间。
|
注:如未将模型下载至本地,请执行前检查`$HOME/.cache/huggingface/`文件夹剩余空间,模型文件下载至本地需要 15 GB 存储空间。
|
||||||
|
注:一些其它的可选启动项见[项目启动选项](docs/StartOption.md)
|
||||||
模型下载方法可参考 [常见问题](docs/FAQ.md) 中 Q8。
|
模型下载方法可参考 [常见问题](docs/FAQ.md) 中 Q8。
|
||||||
|
|
||||||
| **量化等级** | **最低 GPU 显存**(推理) | **最低 GPU 显存**(高效参数微调) |
|
| **量化等级** | **最低 GPU 显存**(推理) | **最低 GPU 显存**(高效参数微调) |
|
||||||
|
|
|
||||||
29
api.py
29
api.py
|
|
@ -19,6 +19,9 @@ from configs.model_config import (VS_ROOT_PATH, UPLOAD_ROOT_PATH, EMBEDDING_DEVI
|
||||||
EMBEDDING_MODEL, LLM_MODEL, NLTK_DATA_PATH,
|
EMBEDDING_MODEL, LLM_MODEL, NLTK_DATA_PATH,
|
||||||
VECTOR_SEARCH_TOP_K, LLM_HISTORY_LEN, OPEN_CROSS_DOMAIN)
|
VECTOR_SEARCH_TOP_K, LLM_HISTORY_LEN, OPEN_CROSS_DOMAIN)
|
||||||
from agent import bing_search as agent_bing_search
|
from agent import bing_search as agent_bing_search
|
||||||
|
import models.shared as shared
|
||||||
|
from models.loader.args import parser
|
||||||
|
from models.loader import LoaderCheckPoint
|
||||||
|
|
||||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||||
|
|
||||||
|
|
@ -258,9 +261,12 @@ async def chat(
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
):
|
):
|
||||||
for resp, history in local_doc_qa.llm._call(
|
|
||||||
prompt=question, history=history, streaming=True
|
for answer_result in local_doc_qa.llm.generatorAnswer(prompt=question, history=history,
|
||||||
):
|
streaming=True):
|
||||||
|
|
||||||
|
resp = answer_result.llm_output["answer"]
|
||||||
|
history = answer_result.history
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return ChatMessage(
|
return ChatMessage(
|
||||||
|
|
@ -312,6 +318,7 @@ async def stream_chat(websocket: WebSocket, knowledge_base_id: str):
|
||||||
)
|
)
|
||||||
turn += 1
|
turn += 1
|
||||||
|
|
||||||
|
|
||||||
async def document():
|
async def document():
|
||||||
return RedirectResponse(url="/docs")
|
return RedirectResponse(url="/docs")
|
||||||
|
|
||||||
|
|
@ -333,10 +340,14 @@ async def bing_search(
|
||||||
source_documents=[],
|
source_documents=[],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def api_start(host, port):
|
def api_start(host, port):
|
||||||
global app
|
global app
|
||||||
global local_doc_qa
|
global local_doc_qa
|
||||||
|
|
||||||
|
llm_model_ins = shared.loaderLLM()
|
||||||
|
llm_model_ins.set_history_len(LLM_HISTORY_LEN)
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
# Add CORS middleware to allow all origins
|
# Add CORS middleware to allow all origins
|
||||||
# 在config.py中设置OPEN_DOMAIN=True,允许跨域
|
# 在config.py中设置OPEN_DOMAIN=True,允许跨域
|
||||||
|
|
@ -365,18 +376,22 @@ def api_start(host, port):
|
||||||
|
|
||||||
local_doc_qa = LocalDocQA()
|
local_doc_qa = LocalDocQA()
|
||||||
local_doc_qa.init_cfg(
|
local_doc_qa.init_cfg(
|
||||||
llm_model=LLM_MODEL,
|
llm_model=llm_model_ins,
|
||||||
embedding_model=EMBEDDING_MODEL,
|
embedding_model=EMBEDDING_MODEL,
|
||||||
embedding_device=EMBEDDING_DEVICE,
|
embedding_device=EMBEDDING_DEVICE,
|
||||||
llm_history_len=LLM_HISTORY_LEN,
|
|
||||||
top_k=VECTOR_SEARCH_TOP_K,
|
top_k=VECTOR_SEARCH_TOP_K,
|
||||||
)
|
)
|
||||||
uvicorn.run(app, host=host, port=port)
|
uvicorn.run(app, host=host, port=port)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||||
parser.add_argument("--port", type=int, default=7861)
|
parser.add_argument("--port", type=int, default=7861)
|
||||||
args = parser.parse_args()
|
# 初始化消息
|
||||||
|
args = None
|
||||||
|
args = parser.parse_args(args=['--model-dir', '/media/checkpoint/', '--model', 'chatglm-6b', '--no-remote-model'])
|
||||||
|
args_dict = vars(args)
|
||||||
|
shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
|
||||||
api_start(args.host, args.port)
|
api_start(args.host, args.port)
|
||||||
|
|
|
||||||
14
cli_demo.py
14
cli_demo.py
|
|
@ -2,7 +2,9 @@ from configs.model_config import *
|
||||||
from chains.local_doc_qa import LocalDocQA
|
from chains.local_doc_qa import LocalDocQA
|
||||||
import os
|
import os
|
||||||
import nltk
|
import nltk
|
||||||
|
from models.loader.args import parser
|
||||||
|
import models.shared as shared
|
||||||
|
from models.loader import LoaderCheckPoint
|
||||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||||
|
|
||||||
# Show reply with source text from input document
|
# Show reply with source text from input document
|
||||||
|
|
@ -10,11 +12,17 @@ REPLY_WITH_SOURCE = True
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
args = None
|
||||||
|
args = parser.parse_args()
|
||||||
|
args_dict = vars(args)
|
||||||
|
shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
|
||||||
|
llm_model_ins = shared.loaderLLM()
|
||||||
|
llm_model_ins.history_len = LLM_HISTORY_LEN
|
||||||
|
|
||||||
local_doc_qa = LocalDocQA()
|
local_doc_qa = LocalDocQA()
|
||||||
local_doc_qa.init_cfg(llm_model=LLM_MODEL,
|
local_doc_qa.init_cfg(llm_model=llm_model_ins,
|
||||||
embedding_model=EMBEDDING_MODEL,
|
embedding_model=EMBEDDING_MODEL,
|
||||||
embedding_device=EMBEDDING_DEVICE,
|
embedding_device=EMBEDDING_DEVICE,
|
||||||
llm_history_len=LLM_HISTORY_LEN,
|
|
||||||
top_k=VECTOR_SEARCH_TOP_K)
|
top_k=VECTOR_SEARCH_TOP_K)
|
||||||
vs_path = None
|
vs_path = None
|
||||||
while not vs_path:
|
while not vs_path:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
|
||||||
|
#### 项目启动选项
|
||||||
|
```test
|
||||||
|
usage: langchina-ChatGLM [-h] [--no-remote-model] [--model MODEL] [--lora LORA] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY]
|
||||||
|
[--load-in-8bit] [--bf16]
|
||||||
|
|
||||||
|
基于langchain和chatGML的LLM文档阅读器
|
||||||
|
|
||||||
|
options:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
--no-remote-model remote in the model on loader checkpoint, if your load local model to add the ` --no-remote-model`
|
||||||
|
--model MODEL Name of the model to load by default.
|
||||||
|
--lora LORA Name of the LoRA to apply to the model by default.
|
||||||
|
--model-dir MODEL_DIR
|
||||||
|
Path to directory with all the models
|
||||||
|
--lora-dir LORA_DIR Path to directory with all the loras
|
||||||
|
--cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow.
|
||||||
|
--auto-devices Automatically split the model across the available GPU(s) and CPU.
|
||||||
|
--gpu-memory GPU_MEMORY [GPU_MEMORY ...]
|
||||||
|
Maxmimum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs. You can also set values in MiB like --gpu-memory 3500MiB.
|
||||||
|
--cpu-memory CPU_MEMORY
|
||||||
|
Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.
|
||||||
|
--load-in-8bit Load the model with 8-bit precision.
|
||||||
|
--bf16 Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 示例
|
||||||
|
|
||||||
|
- 1、加载本地模型
|
||||||
|
|
||||||
|
```text
|
||||||
|
--model-dir 本地checkpoint存放文件夹
|
||||||
|
--model 模型名称
|
||||||
|
--no-remote-model 不从远程加载模型
|
||||||
|
```
|
||||||
|
```shell
|
||||||
|
$ python cli_demo.py --model-dir /media/mnt/ --model chatglm-6b --no-remote-model
|
||||||
|
```
|
||||||
|
|
||||||
|
- 2、低精度加载模型
|
||||||
|
```text
|
||||||
|
--model-dir 本地checkpoint存放文件夹
|
||||||
|
--model 模型名称
|
||||||
|
--no-remote-model 不从远程加载模型
|
||||||
|
--load-in-8bit 以8位精度加载模型
|
||||||
|
```
|
||||||
|
```shell
|
||||||
|
$ python cli_demo.py --model-dir /media/mnt/ --model chatglm-6b --no-remote-model --load-in-8bit
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
- 3、使用cpu预测模型
|
||||||
|
```text
|
||||||
|
--model-dir 本地checkpoint存放文件夹
|
||||||
|
--model 模型名称
|
||||||
|
--no-remote-model 不从远程加载模型
|
||||||
|
--cpu 使用CPU生成文本。警告:CPU上的训练非常缓慢。
|
||||||
|
```
|
||||||
|
```shell
|
||||||
|
$ python cli_demo.py --model-dir /media/mnt/ --model chatglm-6b --no-remote-model --cpu
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
- 3、加载lora微调文件
|
||||||
|
```text
|
||||||
|
--model-dir 本地checkpoint存放文件夹
|
||||||
|
--model 模型名称
|
||||||
|
--no-remote-model 不从远程加载模型
|
||||||
|
--lora-dir 本地lora存放文件夹
|
||||||
|
--lora lora名称
|
||||||
|
```
|
||||||
|
```shell
|
||||||
|
$ python cli_demo.py --model-dir /media/mnt/ --model chatglm-6b --no-remote-model --lora-dir /media/mnt/loras --lora chatglm-step100
|
||||||
|
```
|
||||||
Loading…
Reference in New Issue