From ac368b3814d4d19954fa0ee2fdc13eb07b9d856c Mon Sep 17 00:00:00 2001 From: GuanYuankai Date: Tue, 4 Mar 2025 09:19:37 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E6=9B=B4=E6=96=B0model=5Fsettings=EF=BC=9A?= =?UTF-8?q?=E9=87=8D=E6=96=B0=E5=90=AF=E7=94=A8xinterface?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chatchat_data/model_settings.yaml | 134 +++++++++++++++--------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/chatchat_data/model_settings.yaml b/chatchat_data/model_settings.yaml index edadeb0..f2b30ee 100644 --- a/chatchat_data/model_settings.yaml +++ b/chatchat_data/model_settings.yaml @@ -2,7 +2,7 @@ # 默认选用的 LLM 名称 -DEFAULT_LLM_MODEL: qwen2-instruct +DEFAULT_LLM_MODEL: qwen2.5-instruct # 默认选用的 Embedding 名称 DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5 @@ -112,78 +112,78 @@ LLM_MODEL_CONFIG: MODEL_PLATFORMS: - platform_name: xinference platform_type: xinference - api_base_url: http://127.0.0.1:9997/v1 + api_base_url: http://192.168.0.21:9997/v1 api_key: EMPTY api_proxy: '' api_concurrencies: 5 auto_detect_model: true - llm_models: [] - embed_models: [] - text2image_models: [] - image2text_models: [] - rerank_models: [bge-reranker-large] - speech2text_models: [] - text2speech_models: [] - - platform_name: ollama - platform_type: ollama - api_base_url: http://127.0.0.1:11434/v1 - api_key: EMPTY - api_proxy: '' - api_concurrencies: 5 - auto_detect_model: false - llm_models: - - qwen:7b - - qwen2:7b - embed_models: - - quentinz/bge-large-zh-v1.5 - text2image_models: [] - image2text_models: [] - rerank_models: [] - speech2text_models: [] - text2speech_models: [] - - platform_name: oneapi - platform_type: oneapi - api_base_url: http://127.0.0.1:3000/v1 - api_key: sk- - api_proxy: '' - api_concurrencies: 5 - auto_detect_model: false - llm_models: - - chatglm_pro - - chatglm_turbo - - chatglm_std - - chatglm_lite - - qwen-turbo - - qwen-plus - - qwen-max - - qwen-max-longcontext - - ERNIE-Bot - - ERNIE-Bot-turbo - - ERNIE-Bot-4 - - SparkDesk - embed_models: - - text-embedding-v1 - - Embedding-V1 - text2image_models: [] - image2text_models: [] - rerank_models: [] - speech2text_models: [] - text2speech_models: [] - - platform_name: openai - platform_type: openai - api_base_url: https://api.openai.com/v1 - api_key: sk-proj- - api_proxy: '' - api_concurrencies: 5 - auto_detect_model: false - llm_models: - - gpt-4o - - gpt-3.5-turbo - embed_models: - - text-embedding-3-small - - text-embedding-3-large + llm_models: [qwen2.5-instruct] + embed_models: [bge-large-zh-v1.5] text2image_models: [] image2text_models: [] rerank_models: [] speech2text_models: [] text2speech_models: [] +# - platform_name: ollama +# platform_type: ollama +# api_base_url: http://127.0.0.1:11434/v1 +# api_key: EMPTY +# api_proxy: '' +# api_concurrencies: 5 +# auto_detect_model: false +# llm_models: +# - qwen:7b +# - qwen2:7b +# embed_models: +# - quentinz/bge-large-zh-v1.5 +# text2image_models: [] +# image2text_models: [] +# rerank_models: [] +# speech2text_models: [] +# text2speech_models: [] +# - platform_name: oneapi +# platform_type: oneapi +# api_base_url: http://127.0.0.1:3000/v1 +# api_key: sk- +# api_proxy: '' +# api_concurrencies: 5 +# auto_detect_model: false +# llm_models: +# - chatglm_pro +# - chatglm_turbo +# - chatglm_std +# - chatglm_lite +# - qwen-turbo +# - qwen-plus +# - qwen-max +# - qwen-max-longcontext +# - ERNIE-Bot +# - ERNIE-Bot-turbo +# - ERNIE-Bot-4 +# - SparkDesk +# embed_models: +# - text-embedding-v1 +# - Embedding-V1 +# text2image_models: [] +# image2text_models: [] +# rerank_models: [] +# speech2text_models: [] +# text2speech_models: [] +# - platform_name: openai +# platform_type: openai +# api_base_url: https://api.openai.com/v1 +# api_key: sk-proj- +# api_proxy: '' +# api_concurrencies: 5 +# auto_detect_model: false +# llm_models: +# - gpt-4o +# - gpt-3.5-turbo +# embed_models: +# - text-embedding-3-small +# - text-embedding-3-large +# text2image_models: [] +# image2text_models: [] +# rerank_models: [] +# speech2text_models: [] +# text2speech_models: [] From 1ac1ab0ced85ccf0b1e09e6ce37393b56956fc71 Mon Sep 17 00:00:00 2001 From: GuanYuankai Date: Tue, 4 Mar 2025 15:49:31 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E4=BD=BF=E7=94=A8tavily=E4=BD=9C=E4=B8=BA?= =?UTF-8?q?=E4=BA=92=E8=81=94=E7=BD=91=E6=90=9C=E7=B4=A2=E5=BC=95=E6=93=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 11 ++++++++ chatchat_data/tool_settings.yaml | 8 +++++- .../agent/tools_factory/search_internet.py | 28 +++++++++++++++++-- libs/chatchat-server/chatchat/settings.py | 9 +++++- 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 0044ebc..47d002c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,14 @@ /chatchat_data.bak /chatchat_data/data/knowledge_base/samples /chatchat_data +.idea/inspectionProfiles/profiles_settings.xml +.idea/Langchain-Chatchat.iml +.idea/misc.xml +.idea/modules.xml +.idea/prettier.xml +.idea/vcs.xml +.idea/inspectionProfiles/profiles_settings.xml +.idea/Langchain-Chatchat.iml +.idea/modules.xml +.idea/prettier.xml +.idea/vcs.xml diff --git a/chatchat_data/tool_settings.yaml b/chatchat_data/tool_settings.yaml index 3b2d0a4..ed8ed2a 100644 --- a/chatchat_data/tool_settings.yaml +++ b/chatchat_data/tool_settings.yaml @@ -14,7 +14,7 @@ search_local_knowledgebase: # 搜索引擎工具配置项。推荐自己部署 searx 搜索引擎,国内使用最方便。 search_internet: use: false - search_engine_name: searx + search_engine_name: tavily search_engine_config: bing: bing_search_url: https://api.bing.microsoft.com/v7.0/search @@ -30,6 +30,12 @@ search_internet: engines: [] categories: [] language: zh-CN + tavily: + tavily_api_key: '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' + include_answer: true + search_depth: advanced + include_raw_content: True + max_results: 1 top_k: 5 verbose: Origin conclude_prompt: "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 diff --git a/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py b/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py index d7063f8..7df11a3 100644 --- a/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py +++ b/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py @@ -7,15 +7,19 @@ from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper from langchain.utilities.searx_search import SearxSearchWrapper from markdownify import markdownify from strsimpy.normalized_levenshtein import NormalizedLevenshtein +from langchain_community.tools.tavily_search import TavilySearchResults +import os from chatchat.settings import Settings from chatchat.server.pydantic_v1 import Field from chatchat.server.utils import get_tool_config from chatchat.utils import build_logger +# from tavily import TavilyClient from .tools_registry import BaseToolOutput, regist_tool, format_context logger = build_logger() + def searx_search(text ,config, top_k: int): print(f"searx_search: text: {text},config:{config},top_k:{top_k}") search = SearxSearchWrapper( @@ -85,12 +89,31 @@ def metaphor_search( return docs +def tavily_search(text, config, top_k): +# 配置tavily api key + os.environ["TAVILY_API_KEY"] = config["tavily_api_key"] + # 初始化工具(配置参数) + tavily_tool = TavilySearchResults( + include_answer=config["include_answer"], # 关键参数:启用答案生成 + search_depth=config["search_depth"], # 必须使用高级搜索模式 + include_raw_content=config["include_raw_content"], + max_results=config["max_results"] + ) + + # 直接执行搜索 + raw_results = tavily_tool.run(text) + search_results = [{k: v for k, v in item.items() if k != 'url'} for item in raw_results] + + print("=== 完整搜索返回值 ===") + print(search_results) + return search_results SEARCH_ENGINES = { "bing": bing_search, "duckduckgo": duckduckgo_search, "metaphor": metaphor_search, "searx": searx_search, + "tavily": tavily_search } @@ -98,9 +121,9 @@ def search_result2docs(search_results) -> List[Document]: docs = [] for result in search_results: doc = Document( - page_content=result["snippet"] if "snippet" in result.keys() else "", + page_content=result["content"] if "content" in result.keys() else "", metadata={ - "source": result["link"] if "link" in result.keys() else "", + "source": result["url"] if "url" in result.keys() else "", "filename": result["title"] if "title" in result.keys() else "", }, ) @@ -119,6 +142,7 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={}) text=query, config=config["search_engine_config"][engine_name], top_k=top_k ) docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()] + print(f"docs: {docs}") return {"docs": docs, "search_engine": engine_name} diff --git a/libs/chatchat-server/chatchat/settings.py b/libs/chatchat-server/chatchat/settings.py index b1c4d23..2ff1b5d 100644 --- a/libs/chatchat-server/chatchat/settings.py +++ b/libs/chatchat-server/chatchat/settings.py @@ -488,7 +488,7 @@ class ToolSettings(BaseFileSettings): search_internet: dict = { "use": False, - "search_engine_name": "duckduckgo", + "search_engine_name": "tavily", "search_engine_config": { "bing": { "bing_search_url": "https://api.bing.microsoft.com/v7.0/search", @@ -506,6 +506,13 @@ class ToolSettings(BaseFileSettings): "engines": [], "categories": [], "language": "zh-CN", + }, + "tavily":{ + "tavily_api_key": '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', + "include_answer": True, + "search_depth": "advanced", + "include_raw_content": True, + "max_results": 1 } }, "top_k": 5, From dfa583d344dd603e4bb7ab7f663d68a1c926b5ca Mon Sep 17 00:00:00 2001 From: GuanYuankai Date: Tue, 4 Mar 2025 15:49:52 +0800 Subject: [PATCH 3/4] Create .gitignore --- .idea/.gitignore | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .idea/.gitignore diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml From 42315ab3dabc65b7fc25dfc4c4c86c95e7958798 Mon Sep 17 00:00:00 2001 From: GuanYuankai Date: Tue, 4 Mar 2025 15:55:12 +0800 Subject: [PATCH 4/4] =?UTF-8?q?Revert=20"=E6=9B=B4=E6=96=B0model=5Fsetting?= =?UTF-8?q?s=EF=BC=9A=E9=87=8D=E6=96=B0=E5=90=AF=E7=94=A8xinterface"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit ac368b3814d4d19954fa0ee2fdc13eb07b9d856c. --- chatchat_data/model_settings.yaml | 134 +++++++++++++++--------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/chatchat_data/model_settings.yaml b/chatchat_data/model_settings.yaml index f2b30ee..edadeb0 100644 --- a/chatchat_data/model_settings.yaml +++ b/chatchat_data/model_settings.yaml @@ -2,7 +2,7 @@ # 默认选用的 LLM 名称 -DEFAULT_LLM_MODEL: qwen2.5-instruct +DEFAULT_LLM_MODEL: qwen2-instruct # 默认选用的 Embedding 名称 DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5 @@ -112,78 +112,78 @@ LLM_MODEL_CONFIG: MODEL_PLATFORMS: - platform_name: xinference platform_type: xinference - api_base_url: http://192.168.0.21:9997/v1 + api_base_url: http://127.0.0.1:9997/v1 api_key: EMPTY api_proxy: '' api_concurrencies: 5 auto_detect_model: true - llm_models: [qwen2.5-instruct] - embed_models: [bge-large-zh-v1.5] + llm_models: [] + embed_models: [] + text2image_models: [] + image2text_models: [] + rerank_models: [bge-reranker-large] + speech2text_models: [] + text2speech_models: [] + - platform_name: ollama + platform_type: ollama + api_base_url: http://127.0.0.1:11434/v1 + api_key: EMPTY + api_proxy: '' + api_concurrencies: 5 + auto_detect_model: false + llm_models: + - qwen:7b + - qwen2:7b + embed_models: + - quentinz/bge-large-zh-v1.5 + text2image_models: [] + image2text_models: [] + rerank_models: [] + speech2text_models: [] + text2speech_models: [] + - platform_name: oneapi + platform_type: oneapi + api_base_url: http://127.0.0.1:3000/v1 + api_key: sk- + api_proxy: '' + api_concurrencies: 5 + auto_detect_model: false + llm_models: + - chatglm_pro + - chatglm_turbo + - chatglm_std + - chatglm_lite + - qwen-turbo + - qwen-plus + - qwen-max + - qwen-max-longcontext + - ERNIE-Bot + - ERNIE-Bot-turbo + - ERNIE-Bot-4 + - SparkDesk + embed_models: + - text-embedding-v1 + - Embedding-V1 + text2image_models: [] + image2text_models: [] + rerank_models: [] + speech2text_models: [] + text2speech_models: [] + - platform_name: openai + platform_type: openai + api_base_url: https://api.openai.com/v1 + api_key: sk-proj- + api_proxy: '' + api_concurrencies: 5 + auto_detect_model: false + llm_models: + - gpt-4o + - gpt-3.5-turbo + embed_models: + - text-embedding-3-small + - text-embedding-3-large text2image_models: [] image2text_models: [] rerank_models: [] speech2text_models: [] text2speech_models: [] -# - platform_name: ollama -# platform_type: ollama -# api_base_url: http://127.0.0.1:11434/v1 -# api_key: EMPTY -# api_proxy: '' -# api_concurrencies: 5 -# auto_detect_model: false -# llm_models: -# - qwen:7b -# - qwen2:7b -# embed_models: -# - quentinz/bge-large-zh-v1.5 -# text2image_models: [] -# image2text_models: [] -# rerank_models: [] -# speech2text_models: [] -# text2speech_models: [] -# - platform_name: oneapi -# platform_type: oneapi -# api_base_url: http://127.0.0.1:3000/v1 -# api_key: sk- -# api_proxy: '' -# api_concurrencies: 5 -# auto_detect_model: false -# llm_models: -# - chatglm_pro -# - chatglm_turbo -# - chatglm_std -# - chatglm_lite -# - qwen-turbo -# - qwen-plus -# - qwen-max -# - qwen-max-longcontext -# - ERNIE-Bot -# - ERNIE-Bot-turbo -# - ERNIE-Bot-4 -# - SparkDesk -# embed_models: -# - text-embedding-v1 -# - Embedding-V1 -# text2image_models: [] -# image2text_models: [] -# rerank_models: [] -# speech2text_models: [] -# text2speech_models: [] -# - platform_name: openai -# platform_type: openai -# api_base_url: https://api.openai.com/v1 -# api_key: sk-proj- -# api_proxy: '' -# api_concurrencies: 5 -# auto_detect_model: false -# llm_models: -# - gpt-4o -# - gpt-3.5-turbo -# embed_models: -# - text-embedding-3-small -# - text-embedding-3-large -# text2image_models: [] -# image2text_models: [] -# rerank_models: [] -# speech2text_models: [] -# text2speech_models: []