Merge branch 'dev' into pre-release
This commit is contained in:
commit
aaa3e8133c
24
README.md
24
README.md
|
|
@ -208,18 +208,19 @@ embedding_model_dict = {
|
|||
"m3e-base": "/Users/xxx/Downloads/m3e-base",
|
||||
}
|
||||
```
|
||||
如果你选择使用OpenAI的Embedding模型,请将模型的```key```写入`embedding_model_dict`中。使用该模型,你需要鞥能够访问OpenAI官的API,或设置代理。
|
||||
|
||||
如果你选择使用OpenAI的Embedding模型,请将模型的 ``key``写入 `embedding_model_dict`中。使用该模型,你需要鞥能够访问OpenAI官的API,或设置代理。
|
||||
|
||||
### 4. 知识库初始化与迁移
|
||||
|
||||
当前项目的知识库信息存储在数据库中,在正式运行项目之前请先初始化数据库(我们强烈建议您在执行操作前备份您的知识文件)。
|
||||
|
||||
- 如果您是从 `0.1.x` 版本升级过来的用户,针对已建立的知识库,请确认知识库的向量库类型、Embedding 模型 `configs/model_config.py` 中默认设置一致,如无变化只需以下命令将现有知识库信息添加到数据库即可:
|
||||
- 如果您是从 `0.1.x` 版本升级过来的用户,针对已建立的知识库,请确认知识库的向量库类型、Embedding 模型与 `configs/model_config.py` 中默认设置一致,如无变化只需以下命令将现有知识库信息添加到数据库即可:
|
||||
|
||||
```shell
|
||||
$ python init_database.py
|
||||
```
|
||||
- 如果您是第一次运行本项目,知识库尚未建立,或者配置文件中的知识库类型、嵌入模型发生变化,需要以下命令初始化或重建知识库:
|
||||
- 如果您是第一次运行本项目,知识库尚未建立,或者配置文件中的知识库类型、嵌入模型发生变化,或者之前的向量库没有开启 `normalize_L2`,需要以下命令初始化或重建知识库:
|
||||
|
||||
```shell
|
||||
$ python init_database.py --recreate-vs
|
||||
|
|
@ -308,7 +309,6 @@ $ python server/llm_api_shutdown.py --serve all
|
|||
|
||||

|
||||
|
||||
|
||||
#### 5.2 启动 API 服务
|
||||
|
||||
本地部署情况下,按照 [5.1 节](README.md#5.1-启动-LLM-服务)**启动 LLM 服务后**,再执行 [server/api.py](server/api.py) 脚本启动 **API** 服务;
|
||||
|
|
@ -361,22 +361,18 @@ $ streamlit run webui.py --server.port 666
|
|||
更新一键启动脚本 startup.py,一键启动所有 Fastchat 服务、API 服务、WebUI 服务,示例代码:
|
||||
|
||||
```shell
|
||||
$ python startup.py --all-webui
|
||||
$ python startup.py -a
|
||||
```
|
||||
|
||||
并可使用 `Ctrl + C` 直接关闭所有运行服务。
|
||||
并可使用 `Ctrl + C` 直接关闭所有运行服务。如果一次结束不了,可以多按几次。
|
||||
|
||||
可选参数包括 `--all-webui`, `--all-api`, `--llm-api`, `--controller`, `--openai-api`,
|
||||
`--model-worker`, `--api`, `--webui`,其中:
|
||||
可选参数包括 `-a (或--all-webui)`, `--all-api`, `--llm-api`, `-c (或--controller)`, `--openai-api`,
|
||||
`-m (或--model-worker)`, `--api`, `--webui`,其中:
|
||||
|
||||
- `--all-webui` 为一键启动 WebUI 所有依赖服务;
|
||||
|
||||
- `--all-api` 为一键启动 API 所有依赖服务;
|
||||
|
||||
- `--llm-api` 为一键启动 Fastchat 所有依赖的 LLM 服务;
|
||||
|
||||
- `--openai-api` 为仅启动 FastChat 的 controller 和 openai-api-server 服务;
|
||||
|
||||
- 其他为单独服务启动选项。
|
||||
|
||||
若想指定非默认模型,需要用 `--model-name` 选项,示例:
|
||||
|
|
@ -385,12 +381,16 @@ $ python startup.py --all-webui
|
|||
$ python startup.py --all-webui --model-name Qwen-7B-Chat
|
||||
```
|
||||
|
||||
更多信息可通过 `python startup.py -h`查看。
|
||||
|
||||
**注意:**
|
||||
|
||||
**1. startup 脚本用多进程方式启动各模块的服务,可能会导致打印顺序问题,请等待全部服务发起后再调用,并根据默认或指定端口调用服务(默认 LLM API 服务端口:`127.0.0.1:8888`,默认 API 服务端口:`127.0.0.1:7861`,默认 WebUI 服务端口:`本机IP:8501`)**
|
||||
|
||||
**2.服务启动时间示设备不同而不同,约 3-10 分钟,如长时间没有启动请前往 `./logs`目录下监控日志,定位问题。**
|
||||
|
||||
**3. 在Linux上使用ctrl+C退出可能会由于linux的多进程机制导致multiprocessing遗留孤儿进程,可通过shutdown_all.sh进行退出**
|
||||
|
||||
## 常见问题
|
||||
|
||||
参见 [常见问题](docs/FAQ.md)。
|
||||
|
|
|
|||
|
|
@ -34,11 +34,11 @@ FSCHAT_MODEL_WORKERS = {
|
|||
"port": 20002,
|
||||
"device": LLM_DEVICE,
|
||||
# todo: 多卡加载需要配置的参数
|
||||
"gpus": None,
|
||||
"numgpus": 1,
|
||||
"gpus": None, # 使用的GPU,以str的格式指定,如"0,1"
|
||||
"num_gpus": 1, # 使用GPU的数量
|
||||
# 以下为非常用参数,可根据需要配置
|
||||
# "max_gpu_memory": "20GiB",
|
||||
# "load_8bit": False,
|
||||
# "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
|
||||
# "load_8bit": False, # 开启8bit量化
|
||||
# "cpu_offloading": None,
|
||||
# "gptq_ckpt": None,
|
||||
# "gptq_wbits": 16,
|
||||
|
|
|
|||
|
|
@ -73,6 +73,9 @@ def search_engine_chat(query: str = Body(..., description="用户输入", exampl
|
|||
if search_engine_name not in SEARCH_ENGINES.keys():
|
||||
return BaseResponse(code=404, msg=f"未支持搜索引擎 {search_engine_name}")
|
||||
|
||||
if search_engine_name == "bing" and not BING_SUBSCRIPTION_KEY:
|
||||
return BaseResponse(code=404, msg=f"要使用Bing搜索引擎,需要设置 `BING_SUBSCRIPTION_KEY`")
|
||||
|
||||
history = [History.from_data(h) for h in history]
|
||||
|
||||
async def search_engine_chat_iterator(query: str,
|
||||
|
|
|
|||
|
|
@ -157,6 +157,7 @@ class KBService(ABC):
|
|||
def do_search(self,
|
||||
query: str,
|
||||
top_k: int,
|
||||
score_threshold: float,
|
||||
embeddings: Embeddings,
|
||||
) -> List[Document]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class MilvusKBService(KBService):
|
|||
def do_drop_kb(self):
|
||||
self.milvus.col.drop()
|
||||
|
||||
def do_search(self, query: str, top_k: int, embeddings: Embeddings):
|
||||
def do_search(self, query: str, top_k: int,score_threshold: float, embeddings: Embeddings):
|
||||
# todo: support score threshold
|
||||
self._load_milvus(embeddings=embeddings)
|
||||
return self.milvus.similarity_search_with_score(query, top_k)
|
||||
|
|
@ -70,7 +70,8 @@ class MilvusKBService(KBService):
|
|||
self.milvus.col.delete(expr=f'pk in {delete_list}')
|
||||
|
||||
def do_clear_vs(self):
|
||||
self.milvus.col.drop()
|
||||
if not self.milvus.col:
|
||||
self.milvus.col.drop()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class PGKBService(KBService):
|
|||
'''))
|
||||
connect.commit()
|
||||
|
||||
def do_search(self, query: str, top_k: int, embeddings: Embeddings):
|
||||
def do_search(self, query: str, top_k: int, score_threshold: float, embeddings: Embeddings):
|
||||
# todo: support score threshold
|
||||
self._load_pg_vector(embeddings=embeddings)
|
||||
return self.pg_vector.similarity_search_with_score(query, top_k)
|
||||
|
|
|
|||
|
|
@ -1 +1,2 @@
|
|||
ps -eo pid,user,cmd|grep -P 'server/api.py|webui.py|fastchat.serve'|grep -v grep|awk '{print $1}'|xargs kill -9
|
||||
# mac设备上的grep命令可能不支持grep -P选项,请使用Homebrew安装;或使用ggrep命令
|
||||
ps -eo pid,user,cmd|grep -P 'server/api.py|webui.py|fastchat.serve|multiprocessing'|grep -v grep|awk '{print $1}'|xargs kill -9
|
||||
|
|
@ -201,7 +201,7 @@ def run_model_worker(
|
|||
):
|
||||
import uvicorn
|
||||
|
||||
kwargs = FSCHAT_MODEL_WORKERS[LLM_MODEL].copy()
|
||||
kwargs = FSCHAT_MODEL_WORKERS[model_name].copy()
|
||||
host = kwargs.pop("host")
|
||||
port = kwargs.pop("port")
|
||||
model_path = llm_model_dict[model_name].get("local_model_path", "")
|
||||
|
|
|
|||
|
|
@ -1,41 +0,0 @@
|
|||
import requests
|
||||
import json
|
||||
|
||||
if __name__ == "__main__":
|
||||
url = 'http://localhost:7861/chat/chat'
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
data = {
|
||||
"query": "请用100字左右的文字介绍自己",
|
||||
"history": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "你好"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "你好,我是 ChatGLM"
|
||||
}
|
||||
],
|
||||
"stream": True
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, data=json.dumps(data), stream=True)
|
||||
if response.status_code == 200:
|
||||
for line in response.iter_content(decode_unicode=True):
|
||||
print(line, flush=True)
|
||||
else:
|
||||
print("Error:", response.status_code)
|
||||
|
||||
|
||||
r = requests.post(
|
||||
openai_url + "/chat/completions",
|
||||
json={"model": LLM_MODEL, "messages": "你好", "max_tokens": 1000})
|
||||
data = r.json()
|
||||
print(f"/chat/completions\n")
|
||||
print(data)
|
||||
assert "choices" in data
|
||||
|
||||
|
|
@ -4,6 +4,7 @@ import sys
|
|||
from pathlib import Path
|
||||
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent))
|
||||
from configs.model_config import BING_SUBSCRIPTION_KEY
|
||||
from configs.server_config import API_SERVER, api_address
|
||||
|
||||
from pprint import pprint
|
||||
|
|
@ -39,7 +40,7 @@ data = {
|
|||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "你好,我是 ChatGLM"
|
||||
"content": "你好,我是人工智能大模型"
|
||||
}
|
||||
],
|
||||
"stream": True
|
||||
|
|
@ -100,9 +101,30 @@ def test_knowledge_chat(api="/chat/knowledge_base_chat"):
|
|||
|
||||
|
||||
def test_search_engine_chat(api="/chat/search_engine_chat"):
|
||||
global data
|
||||
|
||||
data["query"] = "室温超导最新进展是什么样?"
|
||||
|
||||
url = f"{api_base_url}{api}"
|
||||
for se in ["bing", "duckduckgo"]:
|
||||
dump_input(data, api)
|
||||
data["search_engine_name"] = se
|
||||
dump_input(data, api + f" by {se}")
|
||||
response = requests.post(url, json=data, stream=True)
|
||||
dump_output(response, api)
|
||||
if se == "bing" and not BING_SUBSCRIPTION_KEY:
|
||||
data = response.json()
|
||||
assert data["code"] == 404
|
||||
assert data["msg"] == f"要使用Bing搜索引擎,需要设置 `BING_SUBSCRIPTION_KEY`"
|
||||
|
||||
print("\n")
|
||||
print("=" * 30 + api + " by {se} output" + "="*30)
|
||||
first = True
|
||||
for line in response.iter_content(None, decode_unicode=True):
|
||||
data = json.loads(line)
|
||||
assert "docs" in data and len(data["docs"]) > 0
|
||||
if first:
|
||||
for doc in data.get("docs", []):
|
||||
print(doc)
|
||||
first = False
|
||||
print(data["answer"], end="", flush=True)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
|
|
|||
|
|
@ -80,8 +80,13 @@ def dialogue_page(api: ApiRequest):
|
|||
# chunk_content = st.checkbox("关联上下文", False, disabled=True)
|
||||
# chunk_size = st.slider("关联长度:", 0, 500, 250, disabled=True)
|
||||
elif dialogue_mode == "搜索引擎问答":
|
||||
search_engine_list = list(SEARCH_ENGINES.keys())
|
||||
with st.expander("搜索引擎配置", True):
|
||||
search_engine = st.selectbox("请选择搜索引擎", SEARCH_ENGINES.keys(), 0)
|
||||
search_engine = st.selectbox(
|
||||
label="请选择搜索引擎",
|
||||
options=search_engine_list,
|
||||
index=search_engine_list.index("duckduckgo") if "duckduckgo" in search_engine_list else 0,
|
||||
)
|
||||
se_top_k = st.number_input("匹配搜索结果条数:", 1, 20, 3)
|
||||
|
||||
# Display chat messages from history on app rerun
|
||||
|
|
@ -125,11 +130,12 @@ def dialogue_page(api: ApiRequest):
|
|||
])
|
||||
text = ""
|
||||
for d in api.search_engine_chat(prompt, search_engine, se_top_k):
|
||||
if error_msg := check_error_msg(d): # check whether error occured
|
||||
if error_msg := check_error_msg(d): # check whether error occured
|
||||
st.error(error_msg)
|
||||
text += d["answer"]
|
||||
chat_box.update_msg(text, 0)
|
||||
chat_box.update_msg("\n\n".join(d["docs"]), 1, streaming=False)
|
||||
else:
|
||||
text += d["answer"]
|
||||
chat_box.update_msg(text, 0)
|
||||
chat_box.update_msg("\n\n".join(d["docs"]), 1, streaming=False)
|
||||
chat_box.update_msg(text, 0, streaming=False)
|
||||
|
||||
now = datetime.now()
|
||||
|
|
|
|||
Loading…
Reference in New Issue