互联网搜索api修改
This commit is contained in:
parent
8cd80dcd9c
commit
cc8564ccca
|
|
@ -21,3 +21,20 @@
|
||||||
.idea/vcs.xml
|
.idea/vcs.xml
|
||||||
/.idea
|
/.idea
|
||||||
/test_tool
|
/test_tool
|
||||||
|
chatchat_data/tool_settings.yaml
|
||||||
|
chatchat_data/prompt_settings.yaml
|
||||||
|
chatchat_data/model_settings.yaml
|
||||||
|
chatchat_data/basic_settings.yaml
|
||||||
|
localconfig/data/knowledge_base/samples/content/分布式训练技术原理.md
|
||||||
|
localconfig/data/knowledge_base/samples/content/大模型应用技术原理.md
|
||||||
|
localconfig/data/knowledge_base/samples/content/大模型技术栈-实战与应用.md
|
||||||
|
localconfig/data/knowledge_base/samples/content/大模型技术栈-算法与原理.md
|
||||||
|
localconfig/data/knowledge_base/samples/content/大模型指令对齐训练原理.md
|
||||||
|
localconfig/data/knowledge_base/samples/content/大模型推理优化策略.md
|
||||||
|
localconfig/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss
|
||||||
|
localconfig/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl
|
||||||
|
localconfig/data/knowledge_base/info.db
|
||||||
|
chatchat_data/basic_settings.yaml
|
||||||
|
chatchat_data/model_settings.yaml
|
||||||
|
chatchat_data/prompt_settings.yaml
|
||||||
|
chatchat_data/tool_settings.yaml
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
|
@ -19,10 +20,11 @@ from chatchat.utils import build_logger
|
||||||
# from tavily import TavilyClient
|
# from tavily import TavilyClient
|
||||||
|
|
||||||
from .tools_registry import BaseToolOutput, regist_tool, format_context
|
from .tools_registry import BaseToolOutput, regist_tool, format_context
|
||||||
|
|
||||||
logger = build_logger()
|
logger = build_logger()
|
||||||
|
|
||||||
|
|
||||||
def searx_search(text ,config, top_k: int):
|
def searx_search(text, config, top_k: int):
|
||||||
print(f"searx_search: text: {text},config:{config},top_k:{top_k}")
|
print(f"searx_search: text: {text},config:{config},top_k:{top_k}")
|
||||||
search = SearxSearchWrapper(
|
search = SearxSearchWrapper(
|
||||||
searx_host=config["host"],
|
searx_host=config["host"],
|
||||||
|
|
@ -33,7 +35,7 @@ def searx_search(text ,config, top_k: int):
|
||||||
return search.results(text, top_k)
|
return search.results(text, top_k)
|
||||||
|
|
||||||
|
|
||||||
def bing_search(text, config, top_k:int):
|
def bing_search(text, config, top_k: int):
|
||||||
search = BingSearchAPIWrapper(
|
search = BingSearchAPIWrapper(
|
||||||
bing_subscription_key=config["bing_key"],
|
bing_subscription_key=config["bing_key"],
|
||||||
bing_search_url=config["bing_search_url"],
|
bing_search_url=config["bing_search_url"],
|
||||||
|
|
@ -41,15 +43,15 @@ def bing_search(text, config, top_k:int):
|
||||||
return search.results(text, top_k)
|
return search.results(text, top_k)
|
||||||
|
|
||||||
|
|
||||||
def duckduckgo_search(text, config, top_k:int):
|
def duckduckgo_search(text, config, top_k: int):
|
||||||
search = DuckDuckGoSearchAPIWrapper()
|
search = DuckDuckGoSearchAPIWrapper()
|
||||||
return search.results(text, top_k)
|
return search.results(text, top_k)
|
||||||
|
|
||||||
|
|
||||||
def metaphor_search(
|
def metaphor_search(
|
||||||
text: str,
|
text: str,
|
||||||
config: dict,
|
config: dict,
|
||||||
top_k:int
|
top_k: int
|
||||||
) -> List[Dict]:
|
) -> List[Dict]:
|
||||||
from metaphor_python import Metaphor
|
from metaphor_python import Metaphor
|
||||||
|
|
||||||
|
|
@ -91,13 +93,14 @@ def metaphor_search(
|
||||||
|
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|
||||||
def tavily_search(text, config, top_k):
|
def tavily_search(text, config, top_k):
|
||||||
# 配置tavily api key
|
# 配置tavily api key
|
||||||
os.environ["TAVILY_API_KEY"] = config["tavily_api_key"]
|
os.environ["TAVILY_API_KEY"] = config["tavily_api_key"]
|
||||||
# 初始化工具(配置参数)
|
# 初始化工具(配置参数)
|
||||||
tavily_tool = TavilySearchResults(
|
tavily_tool = TavilySearchResults(
|
||||||
include_answer=config["include_answer"], # 关键参数:启用答案生成
|
include_answer=config["include_answer"], # 关键参数:启用答案生成
|
||||||
search_depth=config["search_depth"], # 必须使用高级搜索模式
|
search_depth=config["search_depth"], # 必须使用高级搜索模式
|
||||||
include_raw_content=config["include_raw_content"],
|
include_raw_content=config["include_raw_content"],
|
||||||
max_results=config["max_results"]
|
max_results=config["max_results"]
|
||||||
)
|
)
|
||||||
|
|
@ -109,7 +112,9 @@ def tavily_search(text, config, top_k):
|
||||||
# print("=== 完整搜索返回值 ===")
|
# print("=== 完整搜索返回值 ===")
|
||||||
# print(search_results)
|
# print(search_results)
|
||||||
return search_results
|
return search_results
|
||||||
def zhipu_search(text, config):
|
|
||||||
|
|
||||||
|
def zhipu_search(text, config, top_k):
|
||||||
api_key = config["zhipu_api_key"]
|
api_key = config["zhipu_api_key"]
|
||||||
msg = [
|
msg = [
|
||||||
{
|
{
|
||||||
|
|
@ -132,9 +137,9 @@ def zhipu_search(text, config):
|
||||||
headers={'Authorization': api_key},
|
headers={'Authorization': api_key},
|
||||||
timeout=300
|
timeout=300
|
||||||
)
|
)
|
||||||
print(resp.content.decode())
|
|
||||||
return resp.content.decode()
|
return resp.content.decode()
|
||||||
|
|
||||||
|
|
||||||
SEARCH_ENGINES = {
|
SEARCH_ENGINES = {
|
||||||
"bing": bing_search,
|
"bing": bing_search,
|
||||||
"duckduckgo": duckduckgo_search,
|
"duckduckgo": duckduckgo_search,
|
||||||
|
|
@ -145,32 +150,24 @@ SEARCH_ENGINES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# tavily的解析
|
|
||||||
# def search_result2docs_tavily(search_results) -> List[Document]:
|
|
||||||
# docs = []
|
|
||||||
# for result in search_results:
|
|
||||||
# doc = Document(
|
|
||||||
# page_content=result["content"] if "content" in result.keys() else "",
|
|
||||||
# metadata={
|
|
||||||
# "source": result["url"] if "url" in result.keys() else "",
|
|
||||||
# "filename": result["title"] if "title" in result.keys() else "",
|
|
||||||
# },
|
|
||||||
# )
|
|
||||||
# docs.append(doc)
|
|
||||||
# return docs
|
|
||||||
|
|
||||||
def search_result2docs(search_results, engine_name) -> List[Document]:
|
def search_result2docs(search_results, engine_name) -> List[Document]:
|
||||||
docs = []
|
docs = []
|
||||||
if engine_name == "zhipu_search":
|
if engine_name == "zhipu_search":
|
||||||
try:
|
try:
|
||||||
results = search_results["choices"][0]["message"]["tool_calls"][1]["search_result"]
|
raw_result = json.loads(search_results)
|
||||||
|
results = raw_result["choices"][0]["message"]["tool_calls"][1]["search_result"]
|
||||||
except (KeyError, IndexError) as e:
|
except (KeyError, IndexError) as e:
|
||||||
print(f"结构异常: {e}")
|
print(f"结构异常: {e}")
|
||||||
results = []
|
results = []
|
||||||
# 遍历并处理每个结果
|
# 遍历并处理每个结果
|
||||||
for idx, result in enumerate(search_results, 1):
|
|
||||||
docs.append(result.get('content', '无内容'))
|
for idx, result in enumerate(results, 1):
|
||||||
print(f"内容:\n{result.get('content', '无内容')}\n")
|
doc = Document(
|
||||||
|
page_content=result["content"],
|
||||||
|
metadata={"link": result["link"], "title": result["title"]}
|
||||||
|
)
|
||||||
|
docs.append(doc)
|
||||||
|
print(f"内容:\n{result}\n")
|
||||||
return docs
|
return docs
|
||||||
page_contents_key = "snippet" if engine_name != "tavily" else "content"
|
page_contents_key = "snippet" if engine_name != "tavily" else "content"
|
||||||
metadata_key = "link" if engine_name != "tavily" else "url"
|
metadata_key = "link" if engine_name != "tavily" else "url"
|
||||||
|
|
@ -186,7 +183,7 @@ def search_result2docs(search_results, engine_name) -> List[Document]:
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|
||||||
def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={}):
|
def search_engine(query: str, top_k: int = 0, engine_name: str = "", config: dict = {}):
|
||||||
config = config or get_tool_config("search_internet")
|
config = config or get_tool_config("search_internet")
|
||||||
if top_k <= 0:
|
if top_k <= 0:
|
||||||
top_k = config.get("top_k", Settings.kb_settings.SEARCH_ENGINE_TOP_K)
|
top_k = config.get("top_k", Settings.kb_settings.SEARCH_ENGINE_TOP_K)
|
||||||
|
|
@ -199,6 +196,7 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
|
||||||
|
|
||||||
docs = [x for x in search_result2docs(results, engine_name) if x.page_content and x.page_content.strip()]
|
docs = [x for x in search_result2docs(results, engine_name) if x.page_content and x.page_content.strip()]
|
||||||
print(f"len(docs): {len(docs)}")
|
print(f"len(docs): {len(docs)}")
|
||||||
|
print(f"docs: {docs}")
|
||||||
return {"docs": docs, "search_engine": engine_name}
|
return {"docs": docs, "search_engine": engine_name}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -207,7 +205,7 @@ def search_internet(query: str = Field(description="query for Internet search"))
|
||||||
"""用这个工具实现获取世界、历史、实时新闻、或除电力系统之外的信息查询"""
|
"""用这个工具实现获取世界、历史、实时新闻、或除电力系统之外的信息查询"""
|
||||||
try:
|
try:
|
||||||
print(f"search_internet: query: {query}")
|
print(f"search_internet: query: {query}")
|
||||||
return BaseToolOutput(data= search_engine(query=query), format=format_context)
|
return BaseToolOutput(data=search_engine(query=query), format=format_context)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"未知错误: {str(e)}")
|
logger.error(f"未知错误: {str(e)}")
|
||||||
return BaseToolOutput(f"搜索过程中发生未知错误,{str(e)}", format=format_context)
|
return BaseToolOutput(f"搜索过程中发生未知错误,{str(e)}", format=format_context)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue