Merge branch 'dev' into chatglm2cpp

This commit is contained in:
cylee 2023-07-28 14:41:14 +08:00 committed by GitHub
commit 4235270a32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 101 additions and 48 deletions

View File

@ -87,6 +87,10 @@ $ conda create -p /your_path/env_name python=3.8
# Activate the environment
$ source activate /your_path/env_name
# or, do not specify an env path, note that /your_path/env_name is to be replaced with env_name below
$ conda create -n env_name python=3.8
$ conda activate env_name # Activate the environment
# Deactivate the environment
$ source deactivate /your_path/env_name

51
api.py
View File

@ -9,8 +9,9 @@ import asyncio
import nltk
import pydantic
import uvicorn
from fastapi import Body, FastAPI, File, Form, Query, UploadFile, WebSocket
from fastapi import Body, Request, FastAPI, File, Form, Query, UploadFile, WebSocket
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing_extensions import Annotated
from starlette.responses import RedirectResponse
@ -55,7 +56,7 @@ class ListDocsResponse(BaseResponse):
class ChatMessage(BaseModel):
question: str = pydantic.Field(..., description="Question text")
response: str = pydantic.Field(..., description="Response text")
history: List[List[str]] = pydantic.Field(..., description="History text")
history: List[List[Optional[str]]] = pydantic.Field(..., description="History text")
source_documents: List[str] = pydantic.Field(
..., description="List of source documents and their scores"
)
@ -303,7 +304,8 @@ async def update_doc(
async def local_doc_chat(
knowledge_base_id: str = Body(..., description="Knowledge Base Name", example="kb1"),
question: str = Body(..., description="Question", example="工伤保险是什么?"),
history: List[List[str]] = Body(
streaming: bool = Body(False, description="是否开启流式输出默认false有些模型可能不支持。"),
history: List[List[Optional[str]]] = Body(
[],
description="History of previous questions and answers",
example=[
@ -323,11 +325,23 @@ async def local_doc_chat(
history=history,
source_documents=[],
)
else:
if (streaming):
def generate_answer ():
last_print_len = 0
for resp, next_history in local_doc_qa.get_knowledge_based_answer(
query=question, vs_path=vs_path, chat_history=history, streaming=True
):
yield resp["result"][last_print_len:]
last_print_len=len(resp["result"])
return StreamingResponse(generate_answer())
else:
for resp, history in local_doc_qa.get_knowledge_based_answer(
query=question, vs_path=vs_path, chat_history=history, streaming=True
):
pass
source_documents = [
f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}\n\n{doc.page_content}\n\n"""
f"""相关度:{doc.metadata['score']}\n\n"""
@ -344,7 +358,7 @@ async def local_doc_chat(
async def bing_search_chat(
question: str = Body(..., description="Question", example="工伤保险是什么?"),
history: Optional[List[List[str]]] = Body(
history: Optional[List[List[Optional[str]]]] = Body(
[],
description="History of previous questions and answers",
example=[
@ -374,7 +388,8 @@ async def bing_search_chat(
async def chat(
question: str = Body(..., description="Question", example="工伤保险是什么?"),
history: Optional[List[List[str]]] = Body(
streaming: bool = Body(False, description="是否开启流式输出默认false有些模型可能不支持。"),
history: List[List[Optional[str]]] = Body(
[],
description="History of previous questions and answers",
example=[
@ -385,6 +400,30 @@ async def chat(
],
),
):
if (streaming):
def generate_answer ():
last_print_len = 0
answer_result_stream_result = local_doc_qa.llm_model_chain(
{"prompt": question, "history": history, "streaming": True})
for answer_result in answer_result_stream_result['answer_result_stream']:
yield answer_result.llm_output["answer"][last_print_len:]
last_print_len = len(answer_result.llm_output["answer"])
return StreamingResponse(generate_answer())
else:
answer_result_stream_result = local_doc_qa.llm_model_chain(
{"prompt": question, "history": history, "streaming": True})
for answer_result in answer_result_stream_result['answer_result_stream']:
resp = answer_result.llm_output["answer"]
history = answer_result.history
pass
return ChatMessage(
question=question,
response=resp,
history=history,
source_documents=[],
)
answer_result_stream_result = local_doc_qa.llm_model_chain(
{"prompt": question, "history": history, "streaming": True})
@ -544,7 +583,7 @@ if __name__ == "__main__":
parser.add_argument("--ssl_keyfile", type=str)
parser.add_argument("--ssl_certfile", type=str)
# 初始化消息
args = None
args = parser.parse_args()
args_dict = vars(args)
shared.loaderCheckPoint = LoaderCheckPoint(args_dict)

View File

@ -31,7 +31,7 @@ EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backe
# llm_model_dict 处理了loader的一些预设行为如加载位置模型名称模型处理器实例
# 在以下字典中修改属性值,以指定本地 LLM 模型存储位置
# 如将 "chatglm-6b" 的 "local_model_path" 由 None 修改为 "User/Downloads/chatglm-6b"
# 此处请写绝对路径
# 此处请写绝对路径,且路径中必须包含repo-id的模型名称因为FastChat是以模型名匹配的
llm_model_dict = {
"chatglm-6b-int4-qe": {
"name": "chatglm-6b-int4-qe",
@ -218,9 +218,10 @@ BF16 = False
# 本地lora存放的位置
LORA_DIR = "loras/"
# LLM lora path默认为空如果有请直接指定文件夹路径
LLM_LORA_PATH = ""
USE_LORA = True if LLM_LORA_PATH else False
# LORA的名称如有请指定为列表
LORA_NAME = ""
USE_LORA = True if LORA_NAME else False
# LLM streaming reponse
STREAMING = True

View File

@ -12,6 +12,12 @@ $ conda create -p /your_path/env_name python=3.8
# 激活环境
$ source activate /your_path/env_name
# 或conda安装不指定路径, 注意以下,都将/your_path/env_name替换为env_name
$ conda create -n env_name python=3.8
$ conda activate env_name # Activate the environment
# 更新py库
$ pip3 install --upgrade pip
# 关闭环境

View File

@ -42,9 +42,10 @@ parser.add_argument('--no-remote-model', action='store_true', help='remote in th
'model to add the ` '
'--no-remote-model`')
parser.add_argument('--model-name', type=str, default=LLM_MODEL, help='Name of the model to load by default.')
parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.')
parser.add_argument("--use-lora",type=bool,default=USE_LORA,help="use lora or not")
parser.add_argument('--lora', type=str, default=LORA_NAME,help='Name of the LoRA to apply to the model by default.')
parser.add_argument("--lora-dir", type=str, default=LORA_DIR, help="Path to directory with all the loras")
parser.add_argument('--use-ptuning-v2',action='store_true',help="whether use ptuning-v2 checkpoint")
parser.add_argument('--use-ptuning-v2',default=USE_PTUNING_V2,help="whether use ptuning-v2 checkpoint")
parser.add_argument("--ptuning-dir",type=str,default=PTUNING_DIR,help="the dir of ptuning-v2 checkpoint")
# Accelerate/transformers
parser.add_argument('--load-in-8bit', action='store_true', default=LOAD_IN_8BIT,

View File

@ -203,6 +203,12 @@ class LoaderCheckPoint:
return model, tokenizer
elif self.is_llamacpp:
# 要调用llama-cpp模型如vicuma-13b量化模型需要安装llama-cpp-python库
# but!!! 实测pip install 不好使需要手动从ttps://github.com/abetlen/llama-cpp-python/releases/下载
# 而且注意不同时期的ggml格式并不容!!!因此需要安装的llama-cpp-python版本也不一致需要手动测试才能确定
# 实测ggml-vicuna-13b-1.1在llama-cpp-python 0.1.63上可正常兼容
# 不过本项目模型加载的方式控制的比较严格与llama-cpp-python的兼容性较差很多参数设定不能使用
# 建议如非必要还是不要使用llama-cpp
try:
from llama_cpp import Llama
except ImportError as exc:

View File

@ -23,13 +23,6 @@ openai
#accelerate~=0.18.0
#peft~=0.3.0
#bitsandbytes; platform_system != "Windows"
# 要调用llama-cpp模型如vicuma-13b量化模型需要安装llama-cpp-python库
# but!!! 实测pip install 不好使需要手动从ttps://github.com/abetlen/llama-cpp-python/releases/下载
# 而且注意不同时期的ggml格式并不容!!!因此需要安装的llama-cpp-python版本也不一致需要手动测试才能确定
# 实测ggml-vicuna-13b-1.1在llama-cpp-python 0.1.63上可正常兼容
# 不过本项目模型加载的方式控制的比较严格与llama-cpp-python的兼容性较差很多参数设定不能使用
# 建议如非必要还是不要使用llama-cpp
torch~=2.0.0
pydantic~=1.10.7
starlette~=0.26.1

View File

@ -1,6 +1,8 @@
import streamlit as st
from streamlit_chatbox import st_chatbox
import tempfile
from pathlib import Path
###### 从webui借用的代码 #####
###### 做了少量修改 #####
import os
@ -101,23 +103,23 @@ def get_answer(query, vs_path, history, mode, score_threshold=VECTOR_SEARCH_SCOR
def get_vector_store(vs_id, files, sentence_size, history, one_conent, one_content_segmentation):
vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
filelist = []
if not os.path.exists(os.path.join(KB_ROOT_PATH, vs_id, "content")):
os.makedirs(os.path.join(KB_ROOT_PATH, vs_id, "content"))
vs_path = Path(KB_ROOT_PATH) / vs_id / "vector_store"
con_path = Path(KB_ROOT_PATH) / vs_id / "content"
con_path.mkdir(parents=True, exist_ok=True)
qa = st.session_state.local_doc_qa
if qa.llm_model_chain and qa.embeddings:
filelist = []
if isinstance(files, list):
for file in files:
filename = os.path.split(file.name)[-1]
shutil.move(file.name, os.path.join(
KB_ROOT_PATH, vs_id, "content", filename))
filelist.append(os.path.join(
KB_ROOT_PATH, vs_id, "content", filename))
target = con_path / filename
shutil.move(file.name, target)
filelist.append(str(target))
vs_path, loaded_files = qa.init_knowledge_vector_store(
filelist, vs_path, sentence_size)
filelist, str(vs_path), sentence_size)
else:
vs_path, loaded_files = qa.one_knowledge_add(vs_path, files, one_conent, one_content_segmentation,
vs_path, loaded_files = qa.one_knowledge_add(str(vs_path), files, one_conent, one_content_segmentation,
sentence_size)
if len(loaded_files):
file_status = f"已添加 {''.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问"
@ -322,7 +324,8 @@ with st.sidebar:
sentence_size = st.slider('文本入库分句长度限制', 1, 1000, SENTENCE_SIZE)
files = st.file_uploader('上传知识文件',
['docx', 'txt', 'md', 'csv', 'xlsx', 'pdf'],
accept_multiple_files=True)
accept_multiple_files=True,
)
if st.button('添加文件到知识库'):
temp_dir = tempfile.mkdtemp()
file_list = []