Wiki完善和Agent完善计划 (#1680)

* 更新上agent提示词代码

* 更新部分文档,修复了issue中提到的bge匹配超过1 的bug

* 按需修改

* 解决了部分最新用户用依赖的bug,加了两个工具,移除google工具

* Agent大幅度优化

1. 修改了UI界面
(1)高亮所有没有进行agent对齐的模型,
(2)优化输出体验和逻辑,使用markdown

2. 降低天气工具使用门槛
3. 依赖更新
(1) vllm 更新到0.2.0,增加了一些参数
(2) torch 建议更新到2.1
(3)pydantic不要更新到1.10.12

* 更新了一些注释

* 修改部分依赖,修改了部分wiki用图

---------

Co-authored-by: zR <zRzRzRzRzRzRzR>
This commit is contained in:
zR 2023-10-07 22:00:56 +08:00 committed by GitHub
parent 7475205eca
commit 111bc45d04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 105 additions and 6 deletions

View File

@ -0,0 +1,99 @@
import os
# 默认向量库类型。可选faiss, milvus, pg.
DEFAULT_VS_TYPE = "faiss"
# 缓存向量库数量针对FAISS
CACHED_VS_NUM = 1
# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
CHUNK_SIZE = 250
# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
OVERLAP_SIZE = 50
# 知识库匹配向量数量
VECTOR_SEARCH_TOP_K = 3
# 知识库匹配相关度阈值取值范围在0-1之间SCORE越小相关度越高取到1相当于不筛选建议设置在0.5左右
SCORE_THRESHOLD = 1
# 搜索引擎匹配结题数量
SEARCH_ENGINE_TOP_K = 3
# Bing 搜索必备变量
# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search
# 具体申请方式请见
# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource
# 使用python创建bing api 搜索实例详见:
# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
# 注意不是bing Webmaster Tools的api key
# 此外如果是在服务器上报Failed to establish a new connection: [Errno 110] Connection timed out
# 是因为服务器加了防火墙需要联系管理员加白名单如果公司的服务器的话就别想了GG
BING_SUBSCRIPTION_KEY = ""
# 是否开启中文标题加强,以及标题增强的相关配置
# 通过增加标题判断判断哪些文本为标题并在metadata中进行标记
# 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。
ZH_TITLE_ENHANCE = False
# 通常情况下不需要更改以下内容
# 知识库默认存储路径
KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
if not os.path.exists(KB_ROOT_PATH):
os.mkdir(KB_ROOT_PATH)
# 数据库默认存储路径。
# 如果使用sqlite可以直接修改DB_ROOT_PATH如果使用其它数据库请直接修改SQLALCHEMY_DATABASE_URI。
DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}"
# 可选向量库类型及对应配置
kbs_config = {
"faiss": {
},
"milvus": {
"host": "127.0.0.1",
"port": "19530",
"user": "",
"password": "",
"secure": False,
},
"pg": {
"connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat",
}
}
# TextSplitter配置项如果你不明白其中的含义就不要修改。
text_splitter_dict = {
"ChineseRecursiveTextSplitter": {
"source": "huggingface", ## 选择tiktoken则使用openai的方法
"tokenizer_name_or_path": "gpt2",
},
"SpacyTextSplitter": {
"source": "huggingface",
"tokenizer_name_or_path": "",
},
"RecursiveCharacterTextSplitter": {
"source": "tiktoken",
"tokenizer_name_or_path": "cl100k_base",
},
"MarkdownHeaderTextSplitter": {
"headers_to_split_on":
[
("#", "head1"),
("##", "head2"),
("###", "head3"),
("####", "head4"),
]
},
}
# TEXT_SPLITTER 名称
TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter"

BIN
img/LLM_success.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

BIN
img/init_knowledge_base.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 237 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 170 KiB

View File

@ -1,5 +1,5 @@
langchain>=0.0.302
fschat[model_worker]==0.2.30
langchain>=0.0.310
fschat[model_worker]>=0.2.30
openai
sentence_transformers
transformers>=4.34
@ -17,8 +17,8 @@ SQLAlchemy==2.0.19
faiss-cpu
accelerate
spacy
PyMuPDF==1.22.5
rapidocr_onnxruntime>=1.3.2
PyMuPDF
rapidocr_onnxruntime
requests
pathlib

View File

@ -1,7 +1,7 @@
langchain>=0.0.302
langchain>=0.0.310
fschat[model_worker]>=0.2.30
openai
sentence_transformers
sentence_transformers>=2.2.2
transformers>=4.34
torch>=2.0.1
torchvision