- 支持metaphor搜索引擎(无需代理,key申请简单,目前不支持中文)

- 增加默认知识库和默认搜索引擎的配置项
- 修复WEBUI弹出当前模型错误
This commit is contained in:
liunux4odoo 2023-10-18 23:02:20 +08:00
parent 69e5da4e7a
commit b9b42991f6
7 changed files with 80 additions and 15 deletions

View File

@ -1,6 +1,9 @@
import os import os
# 默认使用的知识库
DEFAULT_KNOWLEDGE_BASE = "samples"
# 默认向量库类型。可选faiss, milvus(离线) & zilliz(在线), pg. # 默认向量库类型。可选faiss, milvus(离线) & zilliz(在线), pg.
DEFAULT_VS_TYPE = "faiss" DEFAULT_VS_TYPE = "faiss"
@ -19,6 +22,9 @@ VECTOR_SEARCH_TOP_K = 3
# 知识库匹配相关度阈值取值范围在0-1之间SCORE越小相关度越高取到1相当于不筛选建议设置在0.5左右 # 知识库匹配相关度阈值取值范围在0-1之间SCORE越小相关度越高取到1相当于不筛选建议设置在0.5左右
SCORE_THRESHOLD = 1 SCORE_THRESHOLD = 1
# 默认搜索引擎。可选bing, duckduckgo, metaphor
DEFAULT_SEARCH_ENGINE = "duckduckgo"
# 搜索引擎匹配结题数量 # 搜索引擎匹配结题数量
SEARCH_ENGINE_TOP_K = 3 SEARCH_ENGINE_TOP_K = 3
@ -36,6 +42,10 @@ BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
# 是因为服务器加了防火墙需要联系管理员加白名单如果公司的服务器的话就别想了GG # 是因为服务器加了防火墙需要联系管理员加白名单如果公司的服务器的话就别想了GG
BING_SUBSCRIPTION_KEY = "" BING_SUBSCRIPTION_KEY = ""
# metaphor搜索需要KEY
METAPHOR_API_KEY = ""
# 是否开启中文标题加强,以及标题增强的相关配置 # 是否开启中文标题加强,以及标题增强的相关配置
# 通过增加标题判断判断哪些文本为标题并在metadata中进行标记 # 通过增加标题判断判断哪些文本为标题并在metadata中进行标记
# 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。 # 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。
@ -49,10 +59,12 @@ KB_INFO = {
} }
# 通常情况下不需要更改以下内容 # 通常情况下不需要更改以下内容
# 知识库默认存储路径 # 知识库默认存储路径
KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base") KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
if not os.path.exists(KB_ROOT_PATH): if not os.path.exists(KB_ROOT_PATH):
os.mkdir(KB_ROOT_PATH) os.mkdir(KB_ROOT_PATH)
# 数据库默认存储路径。 # 数据库默认存储路径。
# 如果使用sqlite可以直接修改DB_ROOT_PATH如果使用其它数据库请直接修改SQLALCHEMY_DATABASE_URI。 # 如果使用sqlite可以直接修改DB_ROOT_PATH如果使用其它数据库请直接修改SQLALCHEMY_DATABASE_URI。
DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db") DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")

View File

@ -2,7 +2,7 @@
from .search_knowledge_simple import knowledge_search_simple from .search_knowledge_simple import knowledge_search_simple
from .search_all_knowledge_once import knowledge_search_once from .search_all_knowledge_once import knowledge_search_once
from .search_all_knowledge_more import knowledge_search_more from .search_all_knowledge_more import knowledge_search_more
from .travel_assistant import travel_assistant # from .travel_assistant import travel_assistant
from .calculate import calculate from .calculate import calculate
from .translator import translate from .translator import translate
from .weather import weathercheck from .weather import weathercheck

View File

@ -1,6 +1,7 @@
from langchain.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper from langchain.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper
from configs import (BING_SEARCH_URL, BING_SUBSCRIPTION_KEY, from configs import (BING_SEARCH_URL, BING_SUBSCRIPTION_KEY, METAPHOR_API_KEY,
LLM_MODEL, SEARCH_ENGINE_TOP_K, TEMPERATURE) LLM_MODEL, SEARCH_ENGINE_TOP_K, TEMPERATURE,
TEXT_SPLITTER_NAME, OVERLAP_SIZE)
from fastapi import Body from fastapi import Body
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from fastapi.concurrency import run_in_threadpool from fastapi.concurrency import run_in_threadpool
@ -11,7 +12,7 @@ from langchain.callbacks import AsyncIteratorCallbackHandler
from typing import AsyncIterable from typing import AsyncIterable
import asyncio import asyncio
from langchain.prompts.chat import ChatPromptTemplate from langchain.prompts.chat import ChatPromptTemplate
from typing import List, Optional from typing import List, Optional, Dict
from server.chat.utils import History from server.chat.utils import History
from langchain.docstore.document import Document from langchain.docstore.document import Document
import json import json
@ -32,8 +33,49 @@ def duckduckgo_search(text, result_len=SEARCH_ENGINE_TOP_K):
return search.results(text, result_len) return search.results(text, result_len)
def metaphor_search(
text: str,
result_len: int = SEARCH_ENGINE_TOP_K,
splitter_name: str = "SpacyTextSplitter",
chunk_size: int = 500,
chunk_overlap: int = OVERLAP_SIZE,
) -> List[Dict]:
from metaphor_python import Metaphor
from server.knowledge_base.kb_cache.faiss_cache import memo_faiss_pool
from server.knowledge_base.utils import make_text_splitter
if not METAPHOR_API_KEY:
return []
client = Metaphor(METAPHOR_API_KEY)
search = client.search(text, num_results=result_len, use_autoprompt=True)
contents = search.get_contents().contents
# metaphor 返回的内容都是长文本,需要分词再检索
docs = [Document(page_content=x.extract,
metadata={"link": x.url, "title": x.title})
for x in contents]
text_splitter = make_text_splitter(splitter_name=splitter_name,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap)
splitted_docs = text_splitter.split_documents(docs)
# 将切分好的文档放入临时向量库重新筛选出TOP_K个文档
if len(splitted_docs) > result_len:
vs = memo_faiss_pool.new_vector_store()
vs.add_documents(splitted_docs)
splitted_docs = vs.similarity_search(text, k=result_len, score_threshold=1.0)
docs = [{"snippet": x.page_content,
"link": x.metadata["link"],
"title": x.metadata["title"]}
for x in splitted_docs]
return docs
SEARCH_ENGINES = {"bing": bing_search, SEARCH_ENGINES = {"bing": bing_search,
"duckduckgo": duckduckgo_search, "duckduckgo": duckduckgo_search,
"metaphor": metaphor_search,
} }

View File

@ -140,7 +140,7 @@ if __name__ == "__main__":
ids = vs.add_texts([f"text added by {name}"], embeddings=embeddings) ids = vs.add_texts([f"text added by {name}"], embeddings=embeddings)
pprint(ids) pprint(ids)
elif r == 2: # search docs elif r == 2: # search docs
docs = vs.similarity_search_with_score(f"{name}", top_k=3, score_threshold=1.0) docs = vs.similarity_search_with_score(f"{name}", k=3, score_threshold=1.0)
pprint(docs) pprint(docs)
if r == 3: # delete docs if r == 3: # delete docs
logger.warning(f"清除 {vs_name} by {name}") logger.warning(f"清除 {vs_name} by {name}")

View File

@ -568,6 +568,8 @@ def get_server_configs() -> Dict:
获取configs中的原始配置项供前端使用 获取configs中的原始配置项供前端使用
''' '''
from configs.kb_config import ( from configs.kb_config import (
DEFAULT_KNOWLEDGE_BASE,
DEFAULT_SEARCH_ENGINE,
DEFAULT_VS_TYPE, DEFAULT_VS_TYPE,
CHUNK_SIZE, CHUNK_SIZE,
OVERLAP_SIZE, OVERLAP_SIZE,

View File

@ -21,13 +21,6 @@ if __name__ == "__main__":
} }
) )
if not chat_box.chat_inited:
running_models = api.list_running_models()
st.toast(
f"欢迎使用 [`Langchain-Chatchat`](https://github.com/chatchat-space/Langchain-Chatchat) ! \n\n"
f"当前运行中的模型`{running_models}`, 您可以开始提问了."
)
pages = { pages = {
"对话": { "对话": {
"icon": "chat", "icon": "chat",

View File

@ -3,9 +3,11 @@ from webui_pages.utils import *
from streamlit_chatbox import * from streamlit_chatbox import *
from datetime import datetime from datetime import datetime
import os import os
from configs import LLM_MODEL, TEMPERATURE, HISTORY_LEN, PROMPT_TEMPLATES from configs import (LLM_MODEL, TEMPERATURE, HISTORY_LEN, PROMPT_TEMPLATES,
DEFAULT_KNOWLEDGE_BASE, DEFAULT_SEARCH_ENGINE)
from typing import List, Dict from typing import List, Dict
chat_box = ChatBox( chat_box = ChatBox(
assistant_avatar=os.path.join( assistant_avatar=os.path.join(
"img", "img",
@ -55,7 +57,13 @@ def get_default_llm_model(api: ApiRequest) -> (str, bool):
def dialogue_page(api: ApiRequest): def dialogue_page(api: ApiRequest):
chat_box.init_session() if not chat_box.chat_inited:
default_model = get_default_llm_model(api)[0]
st.toast(
f"欢迎使用 [`Langchain-Chatchat`](https://github.com/chatchat-space/Langchain-Chatchat) ! \n\n"
f"当前运行的模型`{default_model}`, 您可以开始提问了."
)
chat_box.init_session()
with st.sidebar: with st.sidebar:
# TODO: 对话模型与会话绑定 # TODO: 对话模型与会话绑定
@ -156,9 +164,13 @@ def dialogue_page(api: ApiRequest):
if dialogue_mode == "知识库问答": if dialogue_mode == "知识库问答":
with st.expander("知识库配置", True): with st.expander("知识库配置", True):
kb_list = api.list_knowledge_bases() kb_list = api.list_knowledge_bases()
index = 0
if DEFAULT_KNOWLEDGE_BASE in kb_list:
index = kb_list.index(DEFAULT_KNOWLEDGE_BASE)
selected_kb = st.selectbox( selected_kb = st.selectbox(
"请选择知识库:", "请选择知识库:",
kb_list, kb_list,
index=index,
on_change=on_kb_change, on_change=on_kb_change,
key="selected_kb", key="selected_kb",
) )
@ -167,11 +179,15 @@ def dialogue_page(api: ApiRequest):
elif dialogue_mode == "搜索引擎问答": elif dialogue_mode == "搜索引擎问答":
search_engine_list = api.list_search_engines() search_engine_list = api.list_search_engines()
if DEFAULT_SEARCH_ENGINE in search_engine_list:
index = search_engine_list.index(DEFAULT_SEARCH_ENGINE)
else:
index = search_engine_list.index("duckduckgo") if "duckduckgo" in search_engine_list else 0
with st.expander("搜索引擎配置", True): with st.expander("搜索引擎配置", True):
search_engine = st.selectbox( search_engine = st.selectbox(
label="请选择搜索引擎", label="请选择搜索引擎",
options=search_engine_list, options=search_engine_list,
index=search_engine_list.index("duckduckgo") if "duckduckgo" in search_engine_list else 0, index=index,
) )
se_top_k = st.number_input("匹配搜索结果条数:", 1, 20, SEARCH_ENGINE_TOP_K) se_top_k = st.number_input("匹配搜索结果条数:", 1, 20, SEARCH_ENGINE_TOP_K)