diff --git a/.gitignore b/.gitignore index 0044ebc..47d002c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,14 @@ /chatchat_data.bak /chatchat_data/data/knowledge_base/samples /chatchat_data +.idea/inspectionProfiles/profiles_settings.xml +.idea/Langchain-Chatchat.iml +.idea/misc.xml +.idea/modules.xml +.idea/prettier.xml +.idea/vcs.xml +.idea/inspectionProfiles/profiles_settings.xml +.idea/Langchain-Chatchat.iml +.idea/modules.xml +.idea/prettier.xml +.idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/chatchat_data/tool_settings.yaml b/chatchat_data/tool_settings.yaml index 3b2d0a4..ed8ed2a 100644 --- a/chatchat_data/tool_settings.yaml +++ b/chatchat_data/tool_settings.yaml @@ -14,7 +14,7 @@ search_local_knowledgebase: # 搜索引擎工具配置项。推荐自己部署 searx 搜索引擎,国内使用最方便。 search_internet: use: false - search_engine_name: searx + search_engine_name: tavily search_engine_config: bing: bing_search_url: https://api.bing.microsoft.com/v7.0/search @@ -30,6 +30,12 @@ search_internet: engines: [] categories: [] language: zh-CN + tavily: + tavily_api_key: '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' + include_answer: true + search_depth: advanced + include_raw_content: True + max_results: 1 top_k: 5 verbose: Origin conclude_prompt: "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 diff --git a/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py b/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py index d7063f8..7df11a3 100644 --- a/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py +++ b/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py @@ -7,15 +7,19 @@ from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper from langchain.utilities.searx_search import SearxSearchWrapper from markdownify import markdownify from strsimpy.normalized_levenshtein import NormalizedLevenshtein +from langchain_community.tools.tavily_search import TavilySearchResults +import os from chatchat.settings import Settings from chatchat.server.pydantic_v1 import Field from chatchat.server.utils import get_tool_config from chatchat.utils import build_logger +# from tavily import TavilyClient from .tools_registry import BaseToolOutput, regist_tool, format_context logger = build_logger() + def searx_search(text ,config, top_k: int): print(f"searx_search: text: {text},config:{config},top_k:{top_k}") search = SearxSearchWrapper( @@ -85,12 +89,31 @@ def metaphor_search( return docs +def tavily_search(text, config, top_k): +# 配置tavily api key + os.environ["TAVILY_API_KEY"] = config["tavily_api_key"] + # 初始化工具(配置参数) + tavily_tool = TavilySearchResults( + include_answer=config["include_answer"], # 关键参数:启用答案生成 + search_depth=config["search_depth"], # 必须使用高级搜索模式 + include_raw_content=config["include_raw_content"], + max_results=config["max_results"] + ) + + # 直接执行搜索 + raw_results = tavily_tool.run(text) + search_results = [{k: v for k, v in item.items() if k != 'url'} for item in raw_results] + + print("=== 完整搜索返回值 ===") + print(search_results) + return search_results SEARCH_ENGINES = { "bing": bing_search, "duckduckgo": duckduckgo_search, "metaphor": metaphor_search, "searx": searx_search, + "tavily": tavily_search } @@ -98,9 +121,9 @@ def search_result2docs(search_results) -> List[Document]: docs = [] for result in search_results: doc = Document( - page_content=result["snippet"] if "snippet" in result.keys() else "", + page_content=result["content"] if "content" in result.keys() else "", metadata={ - "source": result["link"] if "link" in result.keys() else "", + "source": result["url"] if "url" in result.keys() else "", "filename": result["title"] if "title" in result.keys() else "", }, ) @@ -119,6 +142,7 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={}) text=query, config=config["search_engine_config"][engine_name], top_k=top_k ) docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()] + print(f"docs: {docs}") return {"docs": docs, "search_engine": engine_name} diff --git a/libs/chatchat-server/chatchat/settings.py b/libs/chatchat-server/chatchat/settings.py index b1c4d23..2ff1b5d 100644 --- a/libs/chatchat-server/chatchat/settings.py +++ b/libs/chatchat-server/chatchat/settings.py @@ -488,7 +488,7 @@ class ToolSettings(BaseFileSettings): search_internet: dict = { "use": False, - "search_engine_name": "duckduckgo", + "search_engine_name": "tavily", "search_engine_config": { "bing": { "bing_search_url": "https://api.bing.microsoft.com/v7.0/search", @@ -506,6 +506,13 @@ class ToolSettings(BaseFileSettings): "engines": [], "categories": [], "language": "zh-CN", + }, + "tavily":{ + "tavily_api_key": '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', + "include_answer": True, + "search_depth": "advanced", + "include_raw_content": True, + "max_results": 1 } }, "top_k": 5,