- 支持metaphor搜索引擎(无需代理,key申请简单,目前不支持中文)
- 增加默认知识库和默认搜索引擎的配置项 - 修复WEBUI弹出当前模型错误
This commit is contained in:
parent
69e5da4e7a
commit
b9b42991f6
|
|
@ -1,6 +1,9 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
# 默认使用的知识库
|
||||||
|
DEFAULT_KNOWLEDGE_BASE = "samples"
|
||||||
|
|
||||||
# 默认向量库类型。可选:faiss, milvus(离线) & zilliz(在线), pg.
|
# 默认向量库类型。可选:faiss, milvus(离线) & zilliz(在线), pg.
|
||||||
DEFAULT_VS_TYPE = "faiss"
|
DEFAULT_VS_TYPE = "faiss"
|
||||||
|
|
||||||
|
|
@ -19,6 +22,9 @@ VECTOR_SEARCH_TOP_K = 3
|
||||||
# 知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右
|
# 知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右
|
||||||
SCORE_THRESHOLD = 1
|
SCORE_THRESHOLD = 1
|
||||||
|
|
||||||
|
# 默认搜索引擎。可选:bing, duckduckgo, metaphor
|
||||||
|
DEFAULT_SEARCH_ENGINE = "duckduckgo"
|
||||||
|
|
||||||
# 搜索引擎匹配结题数量
|
# 搜索引擎匹配结题数量
|
||||||
SEARCH_ENGINE_TOP_K = 3
|
SEARCH_ENGINE_TOP_K = 3
|
||||||
|
|
||||||
|
|
@ -36,6 +42,10 @@ BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
|
||||||
# 是因为服务器加了防火墙,需要联系管理员加白名单,如果公司的服务器的话,就别想了GG
|
# 是因为服务器加了防火墙,需要联系管理员加白名单,如果公司的服务器的话,就别想了GG
|
||||||
BING_SUBSCRIPTION_KEY = ""
|
BING_SUBSCRIPTION_KEY = ""
|
||||||
|
|
||||||
|
# metaphor搜索需要KEY
|
||||||
|
METAPHOR_API_KEY = ""
|
||||||
|
|
||||||
|
|
||||||
# 是否开启中文标题加强,以及标题增强的相关配置
|
# 是否开启中文标题加强,以及标题增强的相关配置
|
||||||
# 通过增加标题判断,判断哪些文本为标题,并在metadata中进行标记;
|
# 通过增加标题判断,判断哪些文本为标题,并在metadata中进行标记;
|
||||||
# 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。
|
# 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。
|
||||||
|
|
@ -49,10 +59,12 @@ KB_INFO = {
|
||||||
}
|
}
|
||||||
|
|
||||||
# 通常情况下不需要更改以下内容
|
# 通常情况下不需要更改以下内容
|
||||||
|
|
||||||
# 知识库默认存储路径
|
# 知识库默认存储路径
|
||||||
KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
|
KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
|
||||||
if not os.path.exists(KB_ROOT_PATH):
|
if not os.path.exists(KB_ROOT_PATH):
|
||||||
os.mkdir(KB_ROOT_PATH)
|
os.mkdir(KB_ROOT_PATH)
|
||||||
|
|
||||||
# 数据库默认存储路径。
|
# 数据库默认存储路径。
|
||||||
# 如果使用sqlite,可以直接修改DB_ROOT_PATH;如果使用其它数据库,请直接修改SQLALCHEMY_DATABASE_URI。
|
# 如果使用sqlite,可以直接修改DB_ROOT_PATH;如果使用其它数据库,请直接修改SQLALCHEMY_DATABASE_URI。
|
||||||
DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
|
DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
from .search_knowledge_simple import knowledge_search_simple
|
from .search_knowledge_simple import knowledge_search_simple
|
||||||
from .search_all_knowledge_once import knowledge_search_once
|
from .search_all_knowledge_once import knowledge_search_once
|
||||||
from .search_all_knowledge_more import knowledge_search_more
|
from .search_all_knowledge_more import knowledge_search_more
|
||||||
from .travel_assistant import travel_assistant
|
# from .travel_assistant import travel_assistant
|
||||||
from .calculate import calculate
|
from .calculate import calculate
|
||||||
from .translator import translate
|
from .translator import translate
|
||||||
from .weather import weathercheck
|
from .weather import weathercheck
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from langchain.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper
|
from langchain.utilities import BingSearchAPIWrapper, DuckDuckGoSearchAPIWrapper
|
||||||
from configs import (BING_SEARCH_URL, BING_SUBSCRIPTION_KEY,
|
from configs import (BING_SEARCH_URL, BING_SUBSCRIPTION_KEY, METAPHOR_API_KEY,
|
||||||
LLM_MODEL, SEARCH_ENGINE_TOP_K, TEMPERATURE)
|
LLM_MODEL, SEARCH_ENGINE_TOP_K, TEMPERATURE,
|
||||||
|
TEXT_SPLITTER_NAME, OVERLAP_SIZE)
|
||||||
from fastapi import Body
|
from fastapi import Body
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from fastapi.concurrency import run_in_threadpool
|
from fastapi.concurrency import run_in_threadpool
|
||||||
|
|
@ -11,7 +12,7 @@ from langchain.callbacks import AsyncIteratorCallbackHandler
|
||||||
from typing import AsyncIterable
|
from typing import AsyncIterable
|
||||||
import asyncio
|
import asyncio
|
||||||
from langchain.prompts.chat import ChatPromptTemplate
|
from langchain.prompts.chat import ChatPromptTemplate
|
||||||
from typing import List, Optional
|
from typing import List, Optional, Dict
|
||||||
from server.chat.utils import History
|
from server.chat.utils import History
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
import json
|
import json
|
||||||
|
|
@ -32,8 +33,49 @@ def duckduckgo_search(text, result_len=SEARCH_ENGINE_TOP_K):
|
||||||
return search.results(text, result_len)
|
return search.results(text, result_len)
|
||||||
|
|
||||||
|
|
||||||
|
def metaphor_search(
|
||||||
|
text: str,
|
||||||
|
result_len: int = SEARCH_ENGINE_TOP_K,
|
||||||
|
splitter_name: str = "SpacyTextSplitter",
|
||||||
|
chunk_size: int = 500,
|
||||||
|
chunk_overlap: int = OVERLAP_SIZE,
|
||||||
|
) -> List[Dict]:
|
||||||
|
from metaphor_python import Metaphor
|
||||||
|
from server.knowledge_base.kb_cache.faiss_cache import memo_faiss_pool
|
||||||
|
from server.knowledge_base.utils import make_text_splitter
|
||||||
|
|
||||||
|
if not METAPHOR_API_KEY:
|
||||||
|
return []
|
||||||
|
|
||||||
|
client = Metaphor(METAPHOR_API_KEY)
|
||||||
|
search = client.search(text, num_results=result_len, use_autoprompt=True)
|
||||||
|
contents = search.get_contents().contents
|
||||||
|
|
||||||
|
# metaphor 返回的内容都是长文本,需要分词再检索
|
||||||
|
docs = [Document(page_content=x.extract,
|
||||||
|
metadata={"link": x.url, "title": x.title})
|
||||||
|
for x in contents]
|
||||||
|
text_splitter = make_text_splitter(splitter_name=splitter_name,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap)
|
||||||
|
splitted_docs = text_splitter.split_documents(docs)
|
||||||
|
|
||||||
|
# 将切分好的文档放入临时向量库,重新筛选出TOP_K个文档
|
||||||
|
if len(splitted_docs) > result_len:
|
||||||
|
vs = memo_faiss_pool.new_vector_store()
|
||||||
|
vs.add_documents(splitted_docs)
|
||||||
|
splitted_docs = vs.similarity_search(text, k=result_len, score_threshold=1.0)
|
||||||
|
|
||||||
|
docs = [{"snippet": x.page_content,
|
||||||
|
"link": x.metadata["link"],
|
||||||
|
"title": x.metadata["title"]}
|
||||||
|
for x in splitted_docs]
|
||||||
|
return docs
|
||||||
|
|
||||||
|
|
||||||
SEARCH_ENGINES = {"bing": bing_search,
|
SEARCH_ENGINES = {"bing": bing_search,
|
||||||
"duckduckgo": duckduckgo_search,
|
"duckduckgo": duckduckgo_search,
|
||||||
|
"metaphor": metaphor_search,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -140,7 +140,7 @@ if __name__ == "__main__":
|
||||||
ids = vs.add_texts([f"text added by {name}"], embeddings=embeddings)
|
ids = vs.add_texts([f"text added by {name}"], embeddings=embeddings)
|
||||||
pprint(ids)
|
pprint(ids)
|
||||||
elif r == 2: # search docs
|
elif r == 2: # search docs
|
||||||
docs = vs.similarity_search_with_score(f"{name}", top_k=3, score_threshold=1.0)
|
docs = vs.similarity_search_with_score(f"{name}", k=3, score_threshold=1.0)
|
||||||
pprint(docs)
|
pprint(docs)
|
||||||
if r == 3: # delete docs
|
if r == 3: # delete docs
|
||||||
logger.warning(f"清除 {vs_name} by {name}")
|
logger.warning(f"清除 {vs_name} by {name}")
|
||||||
|
|
|
||||||
|
|
@ -568,6 +568,8 @@ def get_server_configs() -> Dict:
|
||||||
获取configs中的原始配置项,供前端使用
|
获取configs中的原始配置项,供前端使用
|
||||||
'''
|
'''
|
||||||
from configs.kb_config import (
|
from configs.kb_config import (
|
||||||
|
DEFAULT_KNOWLEDGE_BASE,
|
||||||
|
DEFAULT_SEARCH_ENGINE,
|
||||||
DEFAULT_VS_TYPE,
|
DEFAULT_VS_TYPE,
|
||||||
CHUNK_SIZE,
|
CHUNK_SIZE,
|
||||||
OVERLAP_SIZE,
|
OVERLAP_SIZE,
|
||||||
|
|
|
||||||
7
webui.py
7
webui.py
|
|
@ -21,13 +21,6 @@ if __name__ == "__main__":
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
if not chat_box.chat_inited:
|
|
||||||
running_models = api.list_running_models()
|
|
||||||
st.toast(
|
|
||||||
f"欢迎使用 [`Langchain-Chatchat`](https://github.com/chatchat-space/Langchain-Chatchat) ! \n\n"
|
|
||||||
f"当前运行中的模型`{running_models}`, 您可以开始提问了."
|
|
||||||
)
|
|
||||||
|
|
||||||
pages = {
|
pages = {
|
||||||
"对话": {
|
"对话": {
|
||||||
"icon": "chat",
|
"icon": "chat",
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,11 @@ from webui_pages.utils import *
|
||||||
from streamlit_chatbox import *
|
from streamlit_chatbox import *
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import os
|
import os
|
||||||
from configs import LLM_MODEL, TEMPERATURE, HISTORY_LEN, PROMPT_TEMPLATES
|
from configs import (LLM_MODEL, TEMPERATURE, HISTORY_LEN, PROMPT_TEMPLATES,
|
||||||
|
DEFAULT_KNOWLEDGE_BASE, DEFAULT_SEARCH_ENGINE)
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
|
|
||||||
|
|
||||||
chat_box = ChatBox(
|
chat_box = ChatBox(
|
||||||
assistant_avatar=os.path.join(
|
assistant_avatar=os.path.join(
|
||||||
"img",
|
"img",
|
||||||
|
|
@ -55,7 +57,13 @@ def get_default_llm_model(api: ApiRequest) -> (str, bool):
|
||||||
|
|
||||||
|
|
||||||
def dialogue_page(api: ApiRequest):
|
def dialogue_page(api: ApiRequest):
|
||||||
chat_box.init_session()
|
if not chat_box.chat_inited:
|
||||||
|
default_model = get_default_llm_model(api)[0]
|
||||||
|
st.toast(
|
||||||
|
f"欢迎使用 [`Langchain-Chatchat`](https://github.com/chatchat-space/Langchain-Chatchat) ! \n\n"
|
||||||
|
f"当前运行的模型`{default_model}`, 您可以开始提问了."
|
||||||
|
)
|
||||||
|
chat_box.init_session()
|
||||||
|
|
||||||
with st.sidebar:
|
with st.sidebar:
|
||||||
# TODO: 对话模型与会话绑定
|
# TODO: 对话模型与会话绑定
|
||||||
|
|
@ -156,9 +164,13 @@ def dialogue_page(api: ApiRequest):
|
||||||
if dialogue_mode == "知识库问答":
|
if dialogue_mode == "知识库问答":
|
||||||
with st.expander("知识库配置", True):
|
with st.expander("知识库配置", True):
|
||||||
kb_list = api.list_knowledge_bases()
|
kb_list = api.list_knowledge_bases()
|
||||||
|
index = 0
|
||||||
|
if DEFAULT_KNOWLEDGE_BASE in kb_list:
|
||||||
|
index = kb_list.index(DEFAULT_KNOWLEDGE_BASE)
|
||||||
selected_kb = st.selectbox(
|
selected_kb = st.selectbox(
|
||||||
"请选择知识库:",
|
"请选择知识库:",
|
||||||
kb_list,
|
kb_list,
|
||||||
|
index=index,
|
||||||
on_change=on_kb_change,
|
on_change=on_kb_change,
|
||||||
key="selected_kb",
|
key="selected_kb",
|
||||||
)
|
)
|
||||||
|
|
@ -167,11 +179,15 @@ def dialogue_page(api: ApiRequest):
|
||||||
|
|
||||||
elif dialogue_mode == "搜索引擎问答":
|
elif dialogue_mode == "搜索引擎问答":
|
||||||
search_engine_list = api.list_search_engines()
|
search_engine_list = api.list_search_engines()
|
||||||
|
if DEFAULT_SEARCH_ENGINE in search_engine_list:
|
||||||
|
index = search_engine_list.index(DEFAULT_SEARCH_ENGINE)
|
||||||
|
else:
|
||||||
|
index = search_engine_list.index("duckduckgo") if "duckduckgo" in search_engine_list else 0
|
||||||
with st.expander("搜索引擎配置", True):
|
with st.expander("搜索引擎配置", True):
|
||||||
search_engine = st.selectbox(
|
search_engine = st.selectbox(
|
||||||
label="请选择搜索引擎",
|
label="请选择搜索引擎",
|
||||||
options=search_engine_list,
|
options=search_engine_list,
|
||||||
index=search_engine_list.index("duckduckgo") if "duckduckgo" in search_engine_list else 0,
|
index=index,
|
||||||
)
|
)
|
||||||
se_top_k = st.number_input("匹配搜索结果条数:", 1, 20, SEARCH_ENGINE_TOP_K)
|
se_top_k = st.number_input("匹配搜索结果条数:", 1, 20, SEARCH_ENGINE_TOP_K)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue