Merge pull request 'dev/search_internet_tavily' (#1) from dev/search_internet_tavily into main
Reviewed-on: #1
This commit is contained in:
commit
04db85f02d
|
|
@ -8,3 +8,14 @@
|
|||
/chatchat_data.bak
|
||||
/chatchat_data/data/knowledge_base/samples
|
||||
/chatchat_data
|
||||
.idea/inspectionProfiles/profiles_settings.xml
|
||||
.idea/Langchain-Chatchat.iml
|
||||
.idea/misc.xml
|
||||
.idea/modules.xml
|
||||
.idea/prettier.xml
|
||||
.idea/vcs.xml
|
||||
.idea/inspectionProfiles/profiles_settings.xml
|
||||
.idea/Langchain-Chatchat.iml
|
||||
.idea/modules.xml
|
||||
.idea/prettier.xml
|
||||
.idea/vcs.xml
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
|
|
@ -14,7 +14,7 @@ search_local_knowledgebase:
|
|||
# 搜索引擎工具配置项。推荐自己部署 searx 搜索引擎,国内使用最方便。
|
||||
search_internet:
|
||||
use: false
|
||||
search_engine_name: searx
|
||||
search_engine_name: tavily
|
||||
search_engine_config:
|
||||
bing:
|
||||
bing_search_url: https://api.bing.microsoft.com/v7.0/search
|
||||
|
|
@ -30,6 +30,12 @@ search_internet:
|
|||
engines: []
|
||||
categories: []
|
||||
language: zh-CN
|
||||
tavily:
|
||||
tavily_api_key: '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'
|
||||
include_answer: true
|
||||
search_depth: advanced
|
||||
include_raw_content: True
|
||||
max_results: 1
|
||||
top_k: 5
|
||||
verbose: Origin
|
||||
conclude_prompt: "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。
|
||||
|
|
|
|||
|
|
@ -7,15 +7,19 @@ from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
|
|||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from markdownify import markdownify
|
||||
from strsimpy.normalized_levenshtein import NormalizedLevenshtein
|
||||
from langchain_community.tools.tavily_search import TavilySearchResults
|
||||
import os
|
||||
|
||||
from chatchat.settings import Settings
|
||||
from chatchat.server.pydantic_v1 import Field
|
||||
from chatchat.server.utils import get_tool_config
|
||||
from chatchat.utils import build_logger
|
||||
# from tavily import TavilyClient
|
||||
|
||||
from .tools_registry import BaseToolOutput, regist_tool, format_context
|
||||
logger = build_logger()
|
||||
|
||||
|
||||
def searx_search(text ,config, top_k: int):
|
||||
print(f"searx_search: text: {text},config:{config},top_k:{top_k}")
|
||||
search = SearxSearchWrapper(
|
||||
|
|
@ -85,12 +89,31 @@ def metaphor_search(
|
|||
|
||||
return docs
|
||||
|
||||
def tavily_search(text, config, top_k):
|
||||
# 配置tavily api key
|
||||
os.environ["TAVILY_API_KEY"] = config["tavily_api_key"]
|
||||
# 初始化工具(配置参数)
|
||||
tavily_tool = TavilySearchResults(
|
||||
include_answer=config["include_answer"], # 关键参数:启用答案生成
|
||||
search_depth=config["search_depth"], # 必须使用高级搜索模式
|
||||
include_raw_content=config["include_raw_content"],
|
||||
max_results=config["max_results"]
|
||||
)
|
||||
|
||||
# 直接执行搜索
|
||||
raw_results = tavily_tool.run(text)
|
||||
search_results = [{k: v for k, v in item.items() if k != 'url'} for item in raw_results]
|
||||
|
||||
print("=== 完整搜索返回值 ===")
|
||||
print(search_results)
|
||||
return search_results
|
||||
|
||||
SEARCH_ENGINES = {
|
||||
"bing": bing_search,
|
||||
"duckduckgo": duckduckgo_search,
|
||||
"metaphor": metaphor_search,
|
||||
"searx": searx_search,
|
||||
"tavily": tavily_search
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -98,9 +121,9 @@ def search_result2docs(search_results) -> List[Document]:
|
|||
docs = []
|
||||
for result in search_results:
|
||||
doc = Document(
|
||||
page_content=result["snippet"] if "snippet" in result.keys() else "",
|
||||
page_content=result["content"] if "content" in result.keys() else "",
|
||||
metadata={
|
||||
"source": result["link"] if "link" in result.keys() else "",
|
||||
"source": result["url"] if "url" in result.keys() else "",
|
||||
"filename": result["title"] if "title" in result.keys() else "",
|
||||
},
|
||||
)
|
||||
|
|
@ -119,6 +142,7 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
|
|||
text=query, config=config["search_engine_config"][engine_name], top_k=top_k
|
||||
)
|
||||
docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()]
|
||||
print(f"docs: {docs}")
|
||||
return {"docs": docs, "search_engine": engine_name}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -488,7 +488,7 @@ class ToolSettings(BaseFileSettings):
|
|||
|
||||
search_internet: dict = {
|
||||
"use": False,
|
||||
"search_engine_name": "duckduckgo",
|
||||
"search_engine_name": "tavily",
|
||||
"search_engine_config": {
|
||||
"bing": {
|
||||
"bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
|
||||
|
|
@ -506,6 +506,13 @@ class ToolSettings(BaseFileSettings):
|
|||
"engines": [],
|
||||
"categories": [],
|
||||
"language": "zh-CN",
|
||||
},
|
||||
"tavily":{
|
||||
"tavily_api_key": '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',
|
||||
"include_answer": True,
|
||||
"search_depth": "advanced",
|
||||
"include_raw_content": True,
|
||||
"max_results": 1
|
||||
}
|
||||
},
|
||||
"top_k": 5,
|
||||
|
|
|
|||
Loading…
Reference in New Issue