Merge branch 'dev_fastchat' of github.com:chatchat-space/langchain-ChatGLM into dev_fastchat

This commit is contained in:
hzg0601 2023-08-01 18:02:52 +08:00
commit 18a94fcf45
18 changed files with 475 additions and 67 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ logs
.idea/ .idea/
__pycache__/ __pycache__/
knowledge_base/ knowledge_base/
configs/model_config.py

View File

@ -171,7 +171,7 @@ embedding_model_dict = {
} }
# 选用的 Embedding 名称 # 选用的 Embedding 名称
EMBEDDING_MODEL = "text2vec" EMBEDDING_MODEL = "m3e-base"
# Embedding 模型运行设备 # Embedding 模型运行设备
EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
@ -196,6 +196,12 @@ llm_model_dict = {
"api_key": "EMPTY" "api_key": "EMPTY"
}, },
"chatglm2-6b-32k": {
"local_model_path": "THUDM/chatglm2-6b-32k", # "THUDM/chatglm2-6b-32k",
"api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url"
"api_key": "EMPTY"
},
"vicuna-13b-hf": { "vicuna-13b-hf": {
"local_model_path": "", "local_model_path": "",
"api_base_url": "http://localhost:8000/v1", # "name"修改为fastchat服务中的"api_base_url" "api_base_url": "http://localhost:8000/v1", # "name"修改为fastchat服务中的"api_base_url"
@ -246,3 +252,16 @@ PROMPT_TEMPLATE = """【指令】根据已知信息,简洁和专业的来回
# API 是否开启跨域默认为False如果需要开启请设置为True # API 是否开启跨域默认为False如果需要开启请设置为True
# is open cross domain # is open cross domain
OPEN_CROSS_DOMAIN = False OPEN_CROSS_DOMAIN = False
# Bing 搜索必备变量
# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search
# 具体申请方式请见
# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource
# 使用python创建bing api 搜索实例详见:
# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
# 注意不是bing Webmaster Tools的api key
# 此外如果是在服务器上报Failed to establish a new connection: [Errno 110] Connection timed out
# 是因为服务器加了防火墙需要联系管理员加白名单如果公司的服务器的话就别想了GG
BING_SUBSCRIPTION_KEY = ""

View File

@ -18,3 +18,4 @@ unstructured[local-inference]
streamlit>=1.25.0 streamlit>=1.25.0
streamlit-option-menu streamlit-option-menu
streamlit-chatbox>=1.1.0

View File

@ -7,8 +7,9 @@ import argparse
import uvicorn import uvicorn
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from starlette.responses import RedirectResponse, StreamingResponse from starlette.responses import RedirectResponse
from server.chat import chat, knowledge_base_chat, openai_chat from server.chat import (chat, knowledge_base_chat, openai_chat,
bing_search_chat, duckduckgo_search_chat)
from server.knowledge_base import (list_kbs, create_kb, delete_kb, from server.knowledge_base import (list_kbs, create_kb, delete_kb,
list_docs, upload_doc, delete_doc, update_doc) list_docs, upload_doc, delete_doc, update_doc)
from server.utils import BaseResponse, ListResponse from server.utils import BaseResponse, ListResponse
@ -50,7 +51,13 @@ def create_app():
tags=["Chat"], tags=["Chat"],
summary="与知识库对话")(knowledge_base_chat) summary="与知识库对话")(knowledge_base_chat)
# app.post("/chat/bing_search_chat", tags=["Chat"], summary="与Bing搜索对话")(bing_search_chat) app.post("/chat/bing_search_chat",
tags=["Chat"],
summary="与Bing搜索对话")(bing_search_chat)
app.post("/chat/duckduckgo_search_chat",
tags=["Chat"],
summary="与DuckDuckGo搜索对话")(duckduckgo_search_chat)
app.get("/knowledge_base/list_knowledge_bases", app.get("/knowledge_base/list_knowledge_bases",
tags=["Knowledge Base Management"], tags=["Knowledge Base Management"],

View File

@ -1,3 +1,5 @@
from .chat import chat from .chat import chat
from .knowledge_base_chat import knowledge_base_chat from .knowledge_base_chat import knowledge_base_chat
from .openai_chat import openai_chat from .openai_chat import openai_chat
from .duckduckgo_search_chat import duckduckgo_search_chat
from .bing_search_chat import bing_search_chat

View File

@ -1,3 +0,0 @@
# TODO: 完成 bing_chat agent 接口实现
def bing_chat():
pass

View File

@ -0,0 +1,67 @@
from langchain.utilities import BingSearchAPIWrapper
from configs.model_config import BING_SEARCH_URL, BING_SUBSCRIPTION_KEY
from fastapi import Body
from fastapi.responses import StreamingResponse
from configs.model_config import (llm_model_dict, LLM_MODEL, PROMPT_TEMPLATE)
from server.chat.utils import wrap_done
from langchain.chat_models import ChatOpenAI
from langchain import LLMChain
from langchain.callbacks import AsyncIteratorCallbackHandler
from typing import AsyncIterable
import asyncio
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
def bing_search(text, result_len=3):
if not (BING_SEARCH_URL and BING_SUBSCRIPTION_KEY):
return [{"snippet": "please set BING_SUBSCRIPTION_KEY and BING_SEARCH_URL in os ENV",
"title": "env info is not found",
"link": "https://python.langchain.com/en/latest/modules/agents/tools/examples/bing_search.html"}]
search = BingSearchAPIWrapper(bing_subscription_key=BING_SUBSCRIPTION_KEY,
bing_search_url=BING_SEARCH_URL)
return search.results(text, result_len)
def search_result2docs(search_results):
docs = []
for result in search_results:
doc = Document(page_content=result["snippet"] if "snippet" in result.keys() else "",
metadata={"source": result["link"] if "link" in result.keys() else "",
"filename": result["title"] if "title" in result.keys() else ""})
docs.append(doc)
return docs
def bing_search_chat(query: str = Body(..., description="用户输入", example="你好"),
):
async def bing_search_chat_iterator(query: str,
) -> AsyncIterable[str]:
callback = AsyncIteratorCallbackHandler()
model = ChatOpenAI(
streaming=True,
verbose=True,
callbacks=[callback],
openai_api_key=llm_model_dict[LLM_MODEL]["api_key"],
openai_api_base=llm_model_dict[LLM_MODEL]["api_base_url"],
model_name=LLM_MODEL
)
results = bing_search(query, result_len=3)
docs = search_result2docs(results)
context = "\n".join([doc.page_content for doc in docs])
prompt = PromptTemplate(template=PROMPT_TEMPLATE, input_variables=["context", "question"])
chain = LLMChain(prompt=prompt, llm=model)
# Begin a task that runs in the background.
task = asyncio.create_task(wrap_done(
chain.acall({"context": context, "question": query}),
callback.done),
)
async for token in callback.aiter():
# Use server-sent-events to stream the response
yield token
await task
return StreamingResponse(bing_search_chat_iterator(query), media_type="text/event-stream")

View File

@ -0,0 +1,62 @@
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from fastapi import Body
from fastapi.responses import StreamingResponse
from configs.model_config import (llm_model_dict, LLM_MODEL, PROMPT_TEMPLATE)
from server.chat.utils import wrap_done
from langchain.chat_models import ChatOpenAI
from langchain import LLMChain
from langchain.callbacks import AsyncIteratorCallbackHandler
from typing import AsyncIterable
import asyncio
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
def duckduckgo_search(text, result_len=3):
search = DuckDuckGoSearchAPIWrapper()
return search.results(text, result_len)
def search_result2docs(search_results):
docs = []
for result in search_results:
doc = Document(page_content=result["snippet"] if "snippet" in result.keys() else "",
metadata={"source": result["link"] if "link" in result.keys() else "",
"filename": result["title"] if "title" in result.keys() else ""})
docs.append(doc)
return docs
def duckduckgo_search_chat(query: str = Body(..., description="用户输入", example="你好"),
):
async def duckduckgo_search_chat_iterator(query: str,
) -> AsyncIterable[str]:
callback = AsyncIteratorCallbackHandler()
model = ChatOpenAI(
streaming=True,
verbose=True,
callbacks=[callback],
openai_api_key=llm_model_dict[LLM_MODEL]["api_key"],
openai_api_base=llm_model_dict[LLM_MODEL]["api_base_url"],
model_name=LLM_MODEL
)
results = duckduckgo_search(query, result_len=3)
docs = search_result2docs(results)
context = "\n".join([doc.page_content for doc in docs])
prompt = PromptTemplate(template=PROMPT_TEMPLATE, input_variables=["context", "question"])
chain = LLMChain(prompt=prompt, llm=model)
# Begin a task that runs in the background.
task = asyncio.create_task(wrap_done(
chain.acall({"context": context, "question": query}),
callback.done),
)
async for token in callback.aiter():
# Use server-sent-events to stream the response
yield token
await task
return StreamingResponse(duckduckgo_search_chat_iterator(query), media_type="text/event-stream")

View File

@ -1,7 +1,7 @@
from multiprocessing import Process, Queue from multiprocessing import Process, Queue
import sys import sys
import os import os
sys.path.append(os.path.dirname(os.path.dirname(__file__))) sys.path.append(os.path.dirname(os.path.dirname(__file__)))
from configs.model_config import llm_model_dict, LLM_MODEL, LLM_DEVICE, LOG_PATH, logger from configs.model_config import llm_model_dict, LLM_MODEL, LLM_DEVICE, LOG_PATH, logger
import asyncio import asyncio
@ -32,7 +32,7 @@ def create_controller_app(
controller = Controller(dispatch_method) controller = Controller(dispatch_method)
sys.modules["fastchat.serve.controller"].controller = controller sys.modules["fastchat.serve.controller"].controller = controller
#todo 替换fastchat的日志文件 # todo 替换fastchat的日志文件
sys.modules["fastchat.serve.controller"].logger = logger sys.modules["fastchat.serve.controller"].logger = logger
logger.info(f"controller dispatch method: {dispatch_method}") logger.info(f"controller dispatch method: {dispatch_method}")
return app return app
@ -201,9 +201,6 @@ def run_openai_api(q):
uvicorn.run(app, host=host_ip, port=openai_api_port) uvicorn.run(app, host=host_ip, port=openai_api_port)
if __name__ == "__main__": if __name__ == "__main__":
logger.info(llm_model_dict[LLM_MODEL]) logger.info(llm_model_dict[LLM_MODEL])
model_path = llm_model_dict[LLM_MODEL]["local_model_path"] model_path = llm_model_dict[LLM_MODEL]["local_model_path"]
@ -245,7 +242,6 @@ if __name__ == "__main__":
# model_worker_process.join() # model_worker_process.join()
openai_api_process.join() openai_api_process.join()
# 服务启动后接口调用示例: # 服务启动后接口调用示例:
# import openai # import openai
# openai.api_key = "EMPTY" # Not support yet # openai.api_key = "EMPTY" # Not support yet

View File

@ -1,67 +1,21 @@
import streamlit as st import streamlit as st
from webui_pages.utils import *
from streamlit_option_menu import option_menu from streamlit_option_menu import option_menu
import openai from webui_pages import *
def dialogue_page():
with st.sidebar:
dialogue_mode = st.radio("请选择对话模式",
["LLM 对话",
"知识库问答",
"Bing 搜索问答"])
if dialogue_mode == "知识库问答":
selected_kb = st.selectbox("请选择知识库:", ["知识库1", "知识库2"])
with st.expander(f"{selected_kb} 中已存储文件"):
st.write("123")
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("What is up?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
for response in openai.ChatCompletion.create(
model=OPENAI_MODEL,
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
stream=True,
):
full_response += response.choices[0].delta.get("content", "")
message_placeholder.markdown(full_response + "")
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
def knowledge_base_edit_page():
pass
def config_page():
pass
api = ApiRequest()
if __name__ == "__main__": if __name__ == "__main__":
st.set_page_config("langchain-chatglm WebUI") st.set_page_config("langchain-chatglm WebUI")
if "messages" not in st.session_state:
st.session_state.messages = []
pages = {"对话": {"icon": "chat", pages = {"对话": {"icon": "chat",
"func": dialogue_page, "func": dialogue_page,
}, },
"知识库管理": {"icon": "database-fill-gear", "知识库管理": {"icon": "database-fill-gear",
"func": knowledge_base_edit_page, "func": knowledge_base_page,
}, },
"模型配置": {"icon": "gear", "模型配置": {"icon": "gear",
"func": config_page, "func": model_config_page,
} }
} }
@ -72,4 +26,4 @@ if __name__ == "__main__":
menu_icon="chat-quote", menu_icon="chat-quote",
default_index=0) default_index=0)
pages[selected_page]["func"]() pages[selected_page]["func"](api)

3
webui_pages/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from .dialogue import dialogue_page
from .knowledge_base import knowledge_base_page
from .model_config import model_config_page

View File

@ -0,0 +1 @@
from .dialogue import dialogue_page

View File

@ -0,0 +1,38 @@
import streamlit as st
from webui_pages.utils import *
from streamlit_chatbox import *
chat_box = ChatBox()
def dialogue_page(api: ApiRequest):
with st.sidebar:
dialogue_mode = st.radio("请选择对话模式",
["LLM 对话",
"知识库问答",
"Bing 搜索问答"])
history_len = st.slider("历史对话轮数:", 1, 10, 1)
if dialogue_mode == "知识库问答":
selected_kb = st.selectbox("请选择知识库:", get_kb_list())
with st.expander(f"{selected_kb} 中已存储文件"):
st.write(get_kb_files(selected_kb))
# Display chat messages from history on app rerun
chat_box.output_messages()
if prompt := st.chat_input("请输入对话内容换行请使用Ctrl+Enter"):
chat_box.user_say(prompt)
chat_box.ai_say("正在思考...")
# with api.chat_fastchat([{"role": "user", "content": "prompt"}], stream=streaming) as r:
# todo: support history len
text = ""
r = api.chat_chat(prompt, no_remote_api=True)
for t in r:
text += t
chat_box.update_msg(text)
chat_box.update_msg(text, streaming=False)
# with api.chat_chat(prompt) as r:
# for t in r.iter_text(None):
# text += t
# chat_box.update_msg(text)
# chat_box.update_msg(text, streaming=False)

View File

@ -0,0 +1 @@
from .knowledge_base import knowledge_base_page

View File

@ -0,0 +1,6 @@
import streamlit as st
from webui_pages.utils import *
def knowledge_base_page(api: ApiRequest):
st.write(123)
pass

View File

@ -0,0 +1 @@
from .model_config import model_config_page

View File

@ -0,0 +1,5 @@
import streamlit as st
from webui_pages.utils import *
def model_config_page(api: ApiRequest):
pass

247
webui_pages/utils.py Normal file
View File

@ -0,0 +1,247 @@
# 该文件包含webui通用工具可以被不同的webui使用
from typing import *
from pathlib import Path
import os
from configs.model_config import (
KB_ROOT_PATH,
LLM_MODEL,
llm_model_dict,
)
import httpx
import asyncio
from server.chat.openai_chat import OpenAiChatMsgIn
from fastapi.responses import StreamingResponse
def set_httpx_timeout(timeout=60.0):
'''
设置httpx默认timeout到60秒
httpx默认timeout是5秒在请求LLM回答时不够用
'''
httpx._config.DEFAULT_TIMEOUT_CONFIG.connect = timeout
httpx._config.DEFAULT_TIMEOUT_CONFIG.read = timeout
httpx._config.DEFAULT_TIMEOUT_CONFIG.write = timeout
KB_ROOT_PATH = Path(KB_ROOT_PATH)
set_httpx_timeout()
def get_kb_list() -> List[str]:
'''
获取知识库列表
'''
kb_list = os.listdir(KB_ROOT_PATH)
return [x for x in kb_list if (KB_ROOT_PATH / x).is_dir()]
def get_kb_files(kb: str) -> List[str]:
'''
获取某个知识库下包含的所有文件只包括根目录一级
'''
kb = KB_ROOT_PATH / kb / "content"
if kb.is_dir():
kb_files = os.listdir(kb)
return kb_files
else:
return []
def run_async(cor):
'''
在同步环境中运行异步代码.
'''
try:
loop = asyncio.get_event_loop()
except:
loop = asyncio.new_event_loop()
return loop.run_until_complete(cor)
def iter_over_async(ait, loop):
'''
将异步生成器封装成同步生成器.
'''
ait = ait.__aiter__()
async def get_next():
try:
obj = await ait.__anext__()
return False, obj
except StopAsyncIteration:
return True, None
while True:
done, obj = loop.run_until_complete(get_next())
if done:
break
yield obj
class ApiRequest:
'''
api.py调用的封装,主要实现:
1. 简化api调用方式
2. 实现无api调用(直接运行server.chat.*中的视图函数获取结果),无需启动api.py
'''
def __init__(
self,
base_url: str = "http://127.0.0.1:7861",
timeout: float = 60.0,
):
self.base_url = base_url
self.timeout = timeout
def _parse_url(self, url: str) -> str:
if (not url.startswith("http")
and self.base_url
):
part1 = self.base_url.strip(" /")
part2 = url.strip(" /")
return f"{part1}/{part2}"
else:
return url
def get(
self,
url: str,
params: Union[Dict, List[Tuple], bytes] = None,
retry: int = 3,
**kwargs: Any,
) -> Union[httpx.Response, None]:
url = self._parse_url(url)
kwargs.setdefault("timeout", self.timeout)
while retry > 0:
try:
return httpx.get(url, params, **kwargs)
except:
retry -= 1
async def aget(
self,
url: str,
params: Union[Dict, List[Tuple], bytes] = None,
retry: int = 3,
**kwargs: Any,
) -> Union[httpx.Response, None]:
rl = self._parse_url(url)
kwargs.setdefault("timeout", self.timeout)
async with httpx.AsyncClient() as client:
while retry > 0:
try:
return await client.get(url, params, **kwargs)
except:
retry -= 1
def post(
self,
url: str,
data: Dict = None,
json: Dict = None,
retry: int = 3,
stream: bool = False,
**kwargs: Any
) -> Union[httpx.Response, None]:
url = self._parse_url(url)
kwargs.setdefault("timeout", self.timeout)
while retry > 0:
try:
# return requests.post(url, data=data, json=json, stream=stream, **kwargs)
if stream:
return httpx.stream("POST", url, data=data, json=json, **kwargs)
else:
return httpx.post(url, data=data, json=json, **kwargs)
except:
retry -= 1
async def apost(
self,
url: str,
data: Dict = None,
json: Dict = None,
retry: int = 3,
**kwargs: Any
) -> Union[httpx.Response, None]:
rl = self._parse_url(url)
kwargs.setdefault("timeout", self.timeout)
async with httpx.AsyncClient() as client:
while retry > 0:
try:
return await client.post(url, data=data, json=json, **kwargs)
except:
retry -= 1
def _stream2generator(self, response: StreamingResponse):
'''
将api.py中视图函数返回的StreamingResponse转化为同步生成器
'''
try:
loop = asyncio.get_event_loop()
except:
loop = asyncio.new_event_loop()
return iter_over_async(response.body_iterator, loop)
def chat_fastchat(
self,
messages: List[Dict],
stream: bool = True,
model: str = LLM_MODEL,
temperature: float = 0.7,
max_tokens: int = 1024, # todo:根据message内容自动计算max_tokens
no_remote_api=False, # all api view function directly
**kwargs: Any,
):
'''
对应api.py/chat/fastchat接口
'''
msg = OpenAiChatMsgIn(**{
"messages": messages,
"stream": stream,
"model": model,
"temperature": temperature,
"max_tokens": max_tokens,
**kwargs,
})
if no_remote_api:
from server.chat.openai_chat import openai_chat
response = openai_chat(msg)
return self._stream2generator(response)
else:
data = msg.dict(exclude_unset=True, exclude_none=True)
response = self.post(
"/chat/fastchat",
json=data,
stream=stream,
)
return response
def chat_chat(
self,
query: str,
no_remote_api: bool = False,
):
'''
对应api.py/chat/chat接口
'''
if no_remote_api:
from server.chat.chat import chat
response = chat(query)
return self._stream2generator(response)
else:
response = self.post("/chat/chat", json=f"{query}", stream=True)
return response
if __name__ == "__main__":
api = ApiRequest()
# print(api.chat_fastchat(
# messages=[{"role": "user", "content": "hello"}]
# ))
with api.chat_chat("你好") as r:
for t in r.iter_text(None):
print(t)
r = api.chat_chat("你好", no_remote_api=True)
for t in r:
print(t)