diff --git a/.gitignore b/.gitignore index aa496c1..90a592e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,20 @@ .idea/vcs.xml /.idea /test_tool +chatchat_data/tool_settings.yaml +chatchat_data/prompt_settings.yaml +chatchat_data/model_settings.yaml +chatchat_data/basic_settings.yaml +localconfig/data/knowledge_base/samples/content/分布式训练技术原理.md +localconfig/data/knowledge_base/samples/content/大模型应用技术原理.md +localconfig/data/knowledge_base/samples/content/大模型技术栈-实战与应用.md +localconfig/data/knowledge_base/samples/content/大模型技术栈-算法与原理.md +localconfig/data/knowledge_base/samples/content/大模型指令对齐训练原理.md +localconfig/data/knowledge_base/samples/content/大模型推理优化策略.md +localconfig/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss +localconfig/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl +localconfig/data/knowledge_base/info.db +chatchat_data/basic_settings.yaml +chatchat_data/model_settings.yaml +chatchat_data/prompt_settings.yaml +chatchat_data/tool_settings.yaml diff --git a/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py b/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py index 49fb695..1ea8940 100644 --- a/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py +++ b/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py @@ -1,3 +1,4 @@ +import json import uuid from typing import Dict, List @@ -19,10 +20,11 @@ from chatchat.utils import build_logger # from tavily import TavilyClient from .tools_registry import BaseToolOutput, regist_tool, format_context + logger = build_logger() -def searx_search(text ,config, top_k: int): +def searx_search(text, config, top_k: int): print(f"searx_search: text: {text},config:{config},top_k:{top_k}") search = SearxSearchWrapper( searx_host=config["host"], @@ -33,7 +35,7 @@ def searx_search(text ,config, top_k: int): return search.results(text, top_k) -def bing_search(text, config, top_k:int): +def bing_search(text, config, top_k: int): search = BingSearchAPIWrapper( bing_subscription_key=config["bing_key"], bing_search_url=config["bing_search_url"], @@ -41,15 +43,15 @@ def bing_search(text, config, top_k:int): return search.results(text, top_k) -def duckduckgo_search(text, config, top_k:int): +def duckduckgo_search(text, config, top_k: int): search = DuckDuckGoSearchAPIWrapper() return search.results(text, top_k) def metaphor_search( - text: str, - config: dict, - top_k:int + text: str, + config: dict, + top_k: int ) -> List[Dict]: from metaphor_python import Metaphor @@ -91,13 +93,14 @@ def metaphor_search( return docs + def tavily_search(text, config, top_k): -# 配置tavily api key + # 配置tavily api key os.environ["TAVILY_API_KEY"] = config["tavily_api_key"] # 初始化工具(配置参数) tavily_tool = TavilySearchResults( - include_answer=config["include_answer"], # 关键参数:启用答案生成 - search_depth=config["search_depth"], # 必须使用高级搜索模式 + include_answer=config["include_answer"], # 关键参数:启用答案生成 + search_depth=config["search_depth"], # 必须使用高级搜索模式 include_raw_content=config["include_raw_content"], max_results=config["max_results"] ) @@ -109,7 +112,9 @@ def tavily_search(text, config, top_k): # print("=== 完整搜索返回值 ===") # print(search_results) return search_results -def zhipu_search(text, config): + + +def zhipu_search(text, config, top_k): api_key = config["zhipu_api_key"] msg = [ { @@ -132,9 +137,9 @@ def zhipu_search(text, config): headers={'Authorization': api_key}, timeout=300 ) - print(resp.content.decode()) return resp.content.decode() + SEARCH_ENGINES = { "bing": bing_search, "duckduckgo": duckduckgo_search, @@ -145,32 +150,24 @@ SEARCH_ENGINES = { } -# tavily的解析 -# def search_result2docs_tavily(search_results) -> List[Document]: -# docs = [] -# for result in search_results: -# doc = Document( -# page_content=result["content"] if "content" in result.keys() else "", -# metadata={ -# "source": result["url"] if "url" in result.keys() else "", -# "filename": result["title"] if "title" in result.keys() else "", -# }, -# ) -# docs.append(doc) -# return docs - def search_result2docs(search_results, engine_name) -> List[Document]: docs = [] if engine_name == "zhipu_search": try: - results = search_results["choices"][0]["message"]["tool_calls"][1]["search_result"] + raw_result = json.loads(search_results) + results = raw_result["choices"][0]["message"]["tool_calls"][1]["search_result"] except (KeyError, IndexError) as e: print(f"结构异常: {e}") results = [] # 遍历并处理每个结果 - for idx, result in enumerate(search_results, 1): - docs.append(result.get('content', '无内容')) - print(f"内容:\n{result.get('content', '无内容')}\n") + + for idx, result in enumerate(results, 1): + doc = Document( + page_content=result["content"], + metadata={"link": result["link"], "title": result["title"]} + ) + docs.append(doc) + print(f"内容:\n{result}\n") return docs page_contents_key = "snippet" if engine_name != "tavily" else "content" metadata_key = "link" if engine_name != "tavily" else "url" @@ -186,7 +183,7 @@ def search_result2docs(search_results, engine_name) -> List[Document]: return docs -def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={}): +def search_engine(query: str, top_k: int = 0, engine_name: str = "", config: dict = {}): config = config or get_tool_config("search_internet") if top_k <= 0: top_k = config.get("top_k", Settings.kb_settings.SEARCH_ENGINE_TOP_K) @@ -199,6 +196,7 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={}) docs = [x for x in search_result2docs(results, engine_name) if x.page_content and x.page_content.strip()] print(f"len(docs): {len(docs)}") + print(f"docs: {docs}") return {"docs": docs, "search_engine": engine_name} @@ -207,7 +205,7 @@ def search_internet(query: str = Field(description="query for Internet search")) """用这个工具实现获取世界、历史、实时新闻、或除电力系统之外的信息查询""" try: print(f"search_internet: query: {query}") - return BaseToolOutput(data= search_engine(query=query), format=format_context) + return BaseToolOutput(data=search_engine(query=query), format=format_context) except Exception as e: logger.error(f"未知错误: {str(e)}") return BaseToolOutput(f"搜索过程中发生未知错误,{str(e)}", format=format_context)