Merge pull request 'dev/search_internet_tavily' (#1) from dev/search_internet_tavily into main

Reviewed-on: #1
This commit is contained in:
guanyuankai 2025-03-04 15:55:49 +08:00
commit 04db85f02d
5 changed files with 60 additions and 4 deletions

11
.gitignore vendored
View File

@ -8,3 +8,14 @@
/chatchat_data.bak
/chatchat_data/data/knowledge_base/samples
/chatchat_data
.idea/inspectionProfiles/profiles_settings.xml
.idea/Langchain-Chatchat.iml
.idea/misc.xml
.idea/modules.xml
.idea/prettier.xml
.idea/vcs.xml
.idea/inspectionProfiles/profiles_settings.xml
.idea/Langchain-Chatchat.iml
.idea/modules.xml
.idea/prettier.xml
.idea/vcs.xml

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -14,7 +14,7 @@ search_local_knowledgebase:
# 搜索引擎工具配置项。推荐自己部署 searx 搜索引擎,国内使用最方便。
search_internet:
use: false
search_engine_name: searx
search_engine_name: tavily
search_engine_config:
bing:
bing_search_url: https://api.bing.microsoft.com/v7.0/search
@ -30,6 +30,12 @@ search_internet:
engines: []
categories: []
language: zh-CN
tavily:
tavily_api_key: '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'
include_answer: true
search_depth: advanced
include_raw_content: True
max_results: 1
top_k: 5
verbose: Origin
conclude_prompt: "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。

View File

@ -7,15 +7,19 @@ from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper
from markdownify import markdownify
from strsimpy.normalized_levenshtein import NormalizedLevenshtein
from langchain_community.tools.tavily_search import TavilySearchResults
import os
from chatchat.settings import Settings
from chatchat.server.pydantic_v1 import Field
from chatchat.server.utils import get_tool_config
from chatchat.utils import build_logger
# from tavily import TavilyClient
from .tools_registry import BaseToolOutput, regist_tool, format_context
logger = build_logger()
def searx_search(text ,config, top_k: int):
print(f"searx_search: text: {text},config:{config},top_k:{top_k}")
search = SearxSearchWrapper(
@ -85,12 +89,31 @@ def metaphor_search(
return docs
def tavily_search(text, config, top_k):
# 配置tavily api key
os.environ["TAVILY_API_KEY"] = config["tavily_api_key"]
# 初始化工具(配置参数)
tavily_tool = TavilySearchResults(
include_answer=config["include_answer"], # 关键参数:启用答案生成
search_depth=config["search_depth"], # 必须使用高级搜索模式
include_raw_content=config["include_raw_content"],
max_results=config["max_results"]
)
# 直接执行搜索
raw_results = tavily_tool.run(text)
search_results = [{k: v for k, v in item.items() if k != 'url'} for item in raw_results]
print("=== 完整搜索返回值 ===")
print(search_results)
return search_results
SEARCH_ENGINES = {
"bing": bing_search,
"duckduckgo": duckduckgo_search,
"metaphor": metaphor_search,
"searx": searx_search,
"tavily": tavily_search
}
@ -98,9 +121,9 @@ def search_result2docs(search_results) -> List[Document]:
docs = []
for result in search_results:
doc = Document(
page_content=result["snippet"] if "snippet" in result.keys() else "",
page_content=result["content"] if "content" in result.keys() else "",
metadata={
"source": result["link"] if "link" in result.keys() else "",
"source": result["url"] if "url" in result.keys() else "",
"filename": result["title"] if "title" in result.keys() else "",
},
)
@ -119,6 +142,7 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
text=query, config=config["search_engine_config"][engine_name], top_k=top_k
)
docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()]
print(f"docs: {docs}")
return {"docs": docs, "search_engine": engine_name}

View File

@ -488,7 +488,7 @@ class ToolSettings(BaseFileSettings):
search_internet: dict = {
"use": False,
"search_engine_name": "duckduckgo",
"search_engine_name": "tavily",
"search_engine_config": {
"bing": {
"bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
@ -506,6 +506,13 @@ class ToolSettings(BaseFileSettings):
"engines": [],
"categories": [],
"language": "zh-CN",
},
"tavily":{
"tavily_api_key": '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',
"include_answer": True,
"search_depth": "advanced",
"include_raw_content": True,
"max_results": 1
}
},
"top_k": 5,