diff --git a/README.md b/README.md index 991487b..208d5de 100644 --- a/README.md +++ b/README.md @@ -208,18 +208,19 @@ embedding_model_dict = { "m3e-base": "/Users/xxx/Downloads/m3e-base", } ``` -如果你选择使用OpenAI的Embedding模型,请将模型的```key```写入`embedding_model_dict`中。使用该模型,你需要鞥能够访问OpenAI官的API,或设置代理。 + +如果你选择使用OpenAI的Embedding模型,请将模型的 ``key``写入 `embedding_model_dict`中。使用该模型,你需要鞥能够访问OpenAI官的API,或设置代理。 ### 4. 知识库初始化与迁移 当前项目的知识库信息存储在数据库中,在正式运行项目之前请先初始化数据库(我们强烈建议您在执行操作前备份您的知识文件)。 -- 如果您是从 `0.1.x` 版本升级过来的用户,针对已建立的知识库,请确认知识库的向量库类型、Embedding 模型 `configs/model_config.py` 中默认设置一致,如无变化只需以下命令将现有知识库信息添加到数据库即可: +- 如果您是从 `0.1.x` 版本升级过来的用户,针对已建立的知识库,请确认知识库的向量库类型、Embedding 模型与 `configs/model_config.py` 中默认设置一致,如无变化只需以下命令将现有知识库信息添加到数据库即可: ```shell $ python init_database.py ``` -- 如果您是第一次运行本项目,知识库尚未建立,或者配置文件中的知识库类型、嵌入模型发生变化,需要以下命令初始化或重建知识库: +- 如果您是第一次运行本项目,知识库尚未建立,或者配置文件中的知识库类型、嵌入模型发生变化,或者之前的向量库没有开启 `normalize_L2`,需要以下命令初始化或重建知识库: ```shell $ python init_database.py --recreate-vs @@ -308,7 +309,6 @@ $ python server/llm_api_shutdown.py --serve all ![image](https://github.com/chatchat-space/Langchain-Chatchat/assets/22924096/4e056c1c-5c4b-4865-a1af-859cd58a625d) - #### 5.2 启动 API 服务 本地部署情况下,按照 [5.1 节](README.md#5.1-启动-LLM-服务)**启动 LLM 服务后**,再执行 [server/api.py](server/api.py) 脚本启动 **API** 服务; @@ -361,22 +361,18 @@ $ streamlit run webui.py --server.port 666 更新一键启动脚本 startup.py,一键启动所有 Fastchat 服务、API 服务、WebUI 服务,示例代码: ```shell -$ python startup.py --all-webui +$ python startup.py -a ``` -并可使用 `Ctrl + C` 直接关闭所有运行服务。 +并可使用 `Ctrl + C` 直接关闭所有运行服务。如果一次结束不了,可以多按几次。 -可选参数包括 `--all-webui`, `--all-api`, `--llm-api`, `--controller`, `--openai-api`, -`--model-worker`, `--api`, `--webui`,其中: +可选参数包括 `-a (或--all-webui)`, `--all-api`, `--llm-api`, `-c (或--controller)`, `--openai-api`, +`-m (或--model-worker)`, `--api`, `--webui`,其中: - `--all-webui` 为一键启动 WebUI 所有依赖服务; - - `--all-api` 为一键启动 API 所有依赖服务; - - `--llm-api` 为一键启动 Fastchat 所有依赖的 LLM 服务; - - `--openai-api` 为仅启动 FastChat 的 controller 和 openai-api-server 服务; - - 其他为单独服务启动选项。 若想指定非默认模型,需要用 `--model-name` 选项,示例: @@ -385,12 +381,16 @@ $ python startup.py --all-webui $ python startup.py --all-webui --model-name Qwen-7B-Chat ``` +更多信息可通过 `python startup.py -h`查看。 + **注意:** **1. startup 脚本用多进程方式启动各模块的服务,可能会导致打印顺序问题,请等待全部服务发起后再调用,并根据默认或指定端口调用服务(默认 LLM API 服务端口:`127.0.0.1:8888`,默认 API 服务端口:`127.0.0.1:7861`,默认 WebUI 服务端口:`本机IP:8501`)** **2.服务启动时间示设备不同而不同,约 3-10 分钟,如长时间没有启动请前往 `./logs`目录下监控日志,定位问题。** +**3. 在Linux上使用ctrl+C退出可能会由于linux的多进程机制导致multiprocessing遗留孤儿进程,可通过shutdown_all.sh进行退出** + ## 常见问题 参见 [常见问题](docs/FAQ.md)。 diff --git a/configs/server_config.py.example b/configs/server_config.py.example index 5f37779..b0f37bf 100644 --- a/configs/server_config.py.example +++ b/configs/server_config.py.example @@ -34,11 +34,11 @@ FSCHAT_MODEL_WORKERS = { "port": 20002, "device": LLM_DEVICE, # todo: 多卡加载需要配置的参数 - "gpus": None, - "numgpus": 1, + "gpus": None, # 使用的GPU,以str的格式指定,如"0,1" + "num_gpus": 1, # 使用GPU的数量 # 以下为非常用参数,可根据需要配置 - # "max_gpu_memory": "20GiB", - # "load_8bit": False, + # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存 + # "load_8bit": False, # 开启8bit量化 # "cpu_offloading": None, # "gptq_ckpt": None, # "gptq_wbits": 16, diff --git a/server/chat/search_engine_chat.py b/server/chat/search_engine_chat.py index 032d06a..8a2633b 100644 --- a/server/chat/search_engine_chat.py +++ b/server/chat/search_engine_chat.py @@ -73,6 +73,9 @@ def search_engine_chat(query: str = Body(..., description="用户输入", exampl if search_engine_name not in SEARCH_ENGINES.keys(): return BaseResponse(code=404, msg=f"未支持搜索引擎 {search_engine_name}") + if search_engine_name == "bing" and not BING_SUBSCRIPTION_KEY: + return BaseResponse(code=404, msg=f"要使用Bing搜索引擎,需要设置 `BING_SUBSCRIPTION_KEY`") + history = [History.from_data(h) for h in history] async def search_engine_chat_iterator(query: str, diff --git a/server/knowledge_base/kb_service/base.py b/server/knowledge_base/kb_service/base.py index 9af5b0e..8d1de48 100644 --- a/server/knowledge_base/kb_service/base.py +++ b/server/knowledge_base/kb_service/base.py @@ -157,6 +157,7 @@ class KBService(ABC): def do_search(self, query: str, top_k: int, + score_threshold: float, embeddings: Embeddings, ) -> List[Document]: """ diff --git a/server/knowledge_base/kb_service/milvus_kb_service.py b/server/knowledge_base/kb_service/milvus_kb_service.py index ba74c14..78c22f4 100644 --- a/server/knowledge_base/kb_service/milvus_kb_service.py +++ b/server/knowledge_base/kb_service/milvus_kb_service.py @@ -45,7 +45,7 @@ class MilvusKBService(KBService): def do_drop_kb(self): self.milvus.col.drop() - def do_search(self, query: str, top_k: int, embeddings: Embeddings): + def do_search(self, query: str, top_k: int,score_threshold: float, embeddings: Embeddings): # todo: support score threshold self._load_milvus(embeddings=embeddings) return self.milvus.similarity_search_with_score(query, top_k) @@ -70,7 +70,8 @@ class MilvusKBService(KBService): self.milvus.col.delete(expr=f'pk in {delete_list}') def do_clear_vs(self): - self.milvus.col.drop() + if not self.milvus.col: + self.milvus.col.drop() if __name__ == '__main__': diff --git a/server/knowledge_base/kb_service/pg_kb_service.py b/server/knowledge_base/kb_service/pg_kb_service.py index 38d8065..6876bd8 100644 --- a/server/knowledge_base/kb_service/pg_kb_service.py +++ b/server/knowledge_base/kb_service/pg_kb_service.py @@ -43,7 +43,7 @@ class PGKBService(KBService): ''')) connect.commit() - def do_search(self, query: str, top_k: int, embeddings: Embeddings): + def do_search(self, query: str, top_k: int, score_threshold: float, embeddings: Embeddings): # todo: support score threshold self._load_pg_vector(embeddings=embeddings) return self.pg_vector.similarity_search_with_score(query, top_k) diff --git a/shutdown_all.sh b/shutdown_all.sh index 961260d..8c64806 100644 --- a/shutdown_all.sh +++ b/shutdown_all.sh @@ -1 +1,2 @@ -ps -eo pid,user,cmd|grep -P 'server/api.py|webui.py|fastchat.serve'|grep -v grep|awk '{print $1}'|xargs kill -9 \ No newline at end of file +# mac设备上的grep命令可能不支持grep -P选项,请使用Homebrew安装;或使用ggrep命令 +ps -eo pid,user,cmd|grep -P 'server/api.py|webui.py|fastchat.serve|multiprocessing'|grep -v grep|awk '{print $1}'|xargs kill -9 \ No newline at end of file diff --git a/startup.py b/startup.py index df00851..c8706e2 100644 --- a/startup.py +++ b/startup.py @@ -201,7 +201,7 @@ def run_model_worker( ): import uvicorn - kwargs = FSCHAT_MODEL_WORKERS[LLM_MODEL].copy() + kwargs = FSCHAT_MODEL_WORKERS[model_name].copy() host = kwargs.pop("host") port = kwargs.pop("port") model_path = llm_model_dict[model_name].get("local_model_path", "") diff --git a/tests/api/stream_api_test.py b/tests/api/stream_api_test.py deleted file mode 100644 index 2902c8a..0000000 --- a/tests/api/stream_api_test.py +++ /dev/null @@ -1,41 +0,0 @@ -import requests -import json - -if __name__ == "__main__": - url = 'http://localhost:7861/chat/chat' - headers = { - 'accept': 'application/json', - 'Content-Type': 'application/json', - } - - data = { - "query": "请用100字左右的文字介绍自己", - "history": [ - { - "role": "user", - "content": "你好" - }, - { - "role": "assistant", - "content": "你好,我是 ChatGLM" - } - ], - "stream": True - } - - response = requests.post(url, headers=headers, data=json.dumps(data), stream=True) - if response.status_code == 200: - for line in response.iter_content(decode_unicode=True): - print(line, flush=True) - else: - print("Error:", response.status_code) - - - r = requests.post( - openai_url + "/chat/completions", - json={"model": LLM_MODEL, "messages": "你好", "max_tokens": 1000}) - data = r.json() - print(f"/chat/completions\n") - print(data) - assert "choices" in data - diff --git a/tests/api/test_stream_chat_api.py b/tests/api/test_stream_chat_api.py index 56d3237..ad9d3d8 100644 --- a/tests/api/test_stream_chat_api.py +++ b/tests/api/test_stream_chat_api.py @@ -4,6 +4,7 @@ import sys from pathlib import Path sys.path.append(str(Path(__file__).parent.parent.parent)) +from configs.model_config import BING_SUBSCRIPTION_KEY from configs.server_config import API_SERVER, api_address from pprint import pprint @@ -39,7 +40,7 @@ data = { }, { "role": "assistant", - "content": "你好,我是 ChatGLM" + "content": "你好,我是人工智能大模型" } ], "stream": True @@ -100,9 +101,30 @@ def test_knowledge_chat(api="/chat/knowledge_base_chat"): def test_search_engine_chat(api="/chat/search_engine_chat"): + global data + + data["query"] = "室温超导最新进展是什么样?" + url = f"{api_base_url}{api}" for se in ["bing", "duckduckgo"]: - dump_input(data, api) + data["search_engine_name"] = se + dump_input(data, api + f" by {se}") response = requests.post(url, json=data, stream=True) - dump_output(response, api) + if se == "bing" and not BING_SUBSCRIPTION_KEY: + data = response.json() + assert data["code"] == 404 + assert data["msg"] == f"要使用Bing搜索引擎,需要设置 `BING_SUBSCRIPTION_KEY`" + + print("\n") + print("=" * 30 + api + " by {se} output" + "="*30) + first = True + for line in response.iter_content(None, decode_unicode=True): + data = json.loads(line) + assert "docs" in data and len(data["docs"]) > 0 + if first: + for doc in data.get("docs", []): + print(doc) + first = False + print(data["answer"], end="", flush=True) assert response.status_code == 200 + diff --git a/webui_pages/dialogue/dialogue.py b/webui_pages/dialogue/dialogue.py index a317aba..04ece7d 100644 --- a/webui_pages/dialogue/dialogue.py +++ b/webui_pages/dialogue/dialogue.py @@ -80,8 +80,13 @@ def dialogue_page(api: ApiRequest): # chunk_content = st.checkbox("关联上下文", False, disabled=True) # chunk_size = st.slider("关联长度:", 0, 500, 250, disabled=True) elif dialogue_mode == "搜索引擎问答": + search_engine_list = list(SEARCH_ENGINES.keys()) with st.expander("搜索引擎配置", True): - search_engine = st.selectbox("请选择搜索引擎", SEARCH_ENGINES.keys(), 0) + search_engine = st.selectbox( + label="请选择搜索引擎", + options=search_engine_list, + index=search_engine_list.index("duckduckgo") if "duckduckgo" in search_engine_list else 0, + ) se_top_k = st.number_input("匹配搜索结果条数:", 1, 20, 3) # Display chat messages from history on app rerun @@ -125,11 +130,12 @@ def dialogue_page(api: ApiRequest): ]) text = "" for d in api.search_engine_chat(prompt, search_engine, se_top_k): - if error_msg := check_error_msg(d): # check whether error occured + if error_msg := check_error_msg(d): # check whether error occured st.error(error_msg) - text += d["answer"] - chat_box.update_msg(text, 0) - chat_box.update_msg("\n\n".join(d["docs"]), 1, streaming=False) + else: + text += d["answer"] + chat_box.update_msg(text, 0) + chat_box.update_msg("\n\n".join(d["docs"]), 1, streaming=False) chat_box.update_msg(text, 0, streaming=False) now = datetime.now()