互联网搜索api修改

This commit is contained in:
GuanYuankai 2025-04-18 09:12:52 +08:00
parent 8cd80dcd9c
commit cc8564ccca
2 changed files with 46 additions and 31 deletions

17
.gitignore vendored
View File

@ -21,3 +21,20 @@
.idea/vcs.xml
/.idea
/test_tool
chatchat_data/tool_settings.yaml
chatchat_data/prompt_settings.yaml
chatchat_data/model_settings.yaml
chatchat_data/basic_settings.yaml
localconfig/data/knowledge_base/samples/content/分布式训练技术原理.md
localconfig/data/knowledge_base/samples/content/大模型应用技术原理.md
localconfig/data/knowledge_base/samples/content/大模型技术栈-实战与应用.md
localconfig/data/knowledge_base/samples/content/大模型技术栈-算法与原理.md
localconfig/data/knowledge_base/samples/content/大模型指令对齐训练原理.md
localconfig/data/knowledge_base/samples/content/大模型推理优化策略.md
localconfig/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss
localconfig/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl
localconfig/data/knowledge_base/info.db
chatchat_data/basic_settings.yaml
chatchat_data/model_settings.yaml
chatchat_data/prompt_settings.yaml
chatchat_data/tool_settings.yaml

View File

@ -1,3 +1,4 @@
import json
import uuid
from typing import Dict, List
@ -19,6 +20,7 @@ from chatchat.utils import build_logger
# from tavily import TavilyClient
from .tools_registry import BaseToolOutput, regist_tool, format_context
logger = build_logger()
@ -91,6 +93,7 @@ def metaphor_search(
return docs
def tavily_search(text, config, top_k):
# 配置tavily api key
os.environ["TAVILY_API_KEY"] = config["tavily_api_key"]
@ -109,7 +112,9 @@ def tavily_search(text, config, top_k):
# print("=== 完整搜索返回值 ===")
# print(search_results)
return search_results
def zhipu_search(text, config):
def zhipu_search(text, config, top_k):
api_key = config["zhipu_api_key"]
msg = [
{
@ -132,9 +137,9 @@ def zhipu_search(text, config):
headers={'Authorization': api_key},
timeout=300
)
print(resp.content.decode())
return resp.content.decode()
SEARCH_ENGINES = {
"bing": bing_search,
"duckduckgo": duckduckgo_search,
@ -145,32 +150,24 @@ SEARCH_ENGINES = {
}
# tavily的解析
# def search_result2docs_tavily(search_results) -> List[Document]:
# docs = []
# for result in search_results:
# doc = Document(
# page_content=result["content"] if "content" in result.keys() else "",
# metadata={
# "source": result["url"] if "url" in result.keys() else "",
# "filename": result["title"] if "title" in result.keys() else "",
# },
# )
# docs.append(doc)
# return docs
def search_result2docs(search_results, engine_name) -> List[Document]:
docs = []
if engine_name == "zhipu_search":
try:
results = search_results["choices"][0]["message"]["tool_calls"][1]["search_result"]
raw_result = json.loads(search_results)
results = raw_result["choices"][0]["message"]["tool_calls"][1]["search_result"]
except (KeyError, IndexError) as e:
print(f"结构异常: {e}")
results = []
# 遍历并处理每个结果
for idx, result in enumerate(search_results, 1):
docs.append(result.get('content', '无内容'))
print(f"内容:\n{result.get('content', '无内容')}\n")
for idx, result in enumerate(results, 1):
doc = Document(
page_content=result["content"],
metadata={"link": result["link"], "title": result["title"]}
)
docs.append(doc)
print(f"内容:\n{result}\n")
return docs
page_contents_key = "snippet" if engine_name != "tavily" else "content"
metadata_key = "link" if engine_name != "tavily" else "url"
@ -199,6 +196,7 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
docs = [x for x in search_result2docs(results, engine_name) if x.page_content and x.page_content.strip()]
print(f"len(docs): {len(docs)}")
print(f"docs: {docs}")
return {"docs": docs, "search_engine": engine_name}