dev:search_result2doc接口根据引擎名称自动配置

2025-03-04 16:15:09 +08:00 · 2025-03-04 16:15:09 +08:00 · 34dc4f2c7f
parent 04db85f02d
commit 34dc4f2c7f
2 changed files with 87 additions and 71 deletions
--- a/chatchat_data/model_settings.yaml
+++ b/chatchat_data/model_settings.yaml
@ -2,7 +2,7 @@
 # 默认选用的 LLM 名称
-DEFAULT_LLM_MODEL: qwen2-instruct
+DEFAULT_LLM_MODEL: qwen2.5-instruct
 # 默认选用的 Embedding 名称
 DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5
@ -112,78 +112,78 @@ LLM_MODEL_CONFIG:
 MODEL_PLATFORMS:
  - platform_name: xinference
    platform_type: xinference
-    api_base_url: http://127.0.0.1:9997/v1
+    api_base_url: http://192.168.0.21:9997/v1
    api_key: EMPTY
    api_proxy: ''
    api_concurrencies: 5
    auto_detect_model: true
-    llm_models: []
+    llm_models: [qwen2.5-instruct]
-    embed_models: []
+    embed_models: [bge-large-zh-v1.5]
    text2image_models: []
    image2text_models: []
    rerank_models: [bge-reranker-large]
    speech2text_models: []
    text2speech_models: []
  - platform_name: ollama
    platform_type: ollama
    api_base_url: http://127.0.0.1:11434/v1
    api_key: EMPTY
    api_proxy: ''
    api_concurrencies: 5
    auto_detect_model: false
    llm_models:
      - qwen:7b
      - qwen2:7b
    embed_models:
      - quentinz/bge-large-zh-v1.5
    text2image_models: []
    image2text_models: []
    rerank_models: []
    speech2text_models: []
    text2speech_models: []
  - platform_name: oneapi
    platform_type: oneapi
    api_base_url: http://127.0.0.1:3000/v1
    api_key: sk-
    api_proxy: ''
    api_concurrencies: 5
    auto_detect_model: false
    llm_models:
      - chatglm_pro
      - chatglm_turbo
      - chatglm_std
      - chatglm_lite
      - qwen-turbo
      - qwen-plus
      - qwen-max
      - qwen-max-longcontext
      - ERNIE-Bot
      - ERNIE-Bot-turbo
      - ERNIE-Bot-4
      - SparkDesk
    embed_models:
      - text-embedding-v1
      - Embedding-V1
    text2image_models: []
    image2text_models: []
    rerank_models: []
    speech2text_models: []
    text2speech_models: []
  - platform_name: openai
    platform_type: openai
    api_base_url: https://api.openai.com/v1
    api_key: sk-proj-
    api_proxy: ''
    api_concurrencies: 5
    auto_detect_model: false
    llm_models:
      - gpt-4o
      - gpt-3.5-turbo
    embed_models:
      - text-embedding-3-small
      - text-embedding-3-large
    text2image_models: []
    image2text_models: []
    rerank_models: []
    speech2text_models: []
    text2speech_models: []
 #  - platform_name: ollama
 #    platform_type: ollama
 #    api_base_url: http://127.0.0.1:11434/v1
 #    api_key: EMPTY
 #    api_proxy: ''
 #    api_concurrencies: 5
 #    auto_detect_model: false
 #    llm_models:
 #      - qwen:7b
 #      - qwen2:7b
 #    embed_models:
 #      - quentinz/bge-large-zh-v1.5
 #    text2image_models: []
 #    image2text_models: []
 #    rerank_models: []
 #    speech2text_models: []
 #    text2speech_models: []
 #  - platform_name: oneapi
 #    platform_type: oneapi
 #    api_base_url: http://127.0.0.1:3000/v1
 #    api_key: sk-
 #    api_proxy: ''
 #    api_concurrencies: 5
 #    auto_detect_model: false
 #    llm_models:
 #      - chatglm_pro
 #      - chatglm_turbo
 #      - chatglm_std
 #      - chatglm_lite
 #      - qwen-turbo
 #      - qwen-plus
 #      - qwen-max
 #      - qwen-max-longcontext
 #      - ERNIE-Bot
 #      - ERNIE-Bot-turbo
 #      - ERNIE-Bot-4
 #      - SparkDesk
 #    embed_models:
 #      - text-embedding-v1
 #      - Embedding-V1
 #    text2image_models: []
 #    image2text_models: []
 #    rerank_models: []
 #    speech2text_models: []
 #    text2speech_models: []
 #  - platform_name: openai
 #    platform_type: openai
 #    api_base_url: https://api.openai.com/v1
 #    api_key: sk-proj-
 #    api_proxy: ''
 #    api_concurrencies: 5
 #    auto_detect_model: false
 #    llm_models:
 #      - gpt-4o
 #      - gpt-3.5-turbo
 #    embed_models:
 #      - text-embedding-3-small
 #      - text-embedding-3-large
 #    text2image_models: []
 #    image2text_models: []
 #    rerank_models: []
 #    speech2text_models: []
 #    text2speech_models: []
--- a/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py
+++ b/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py
@ -116,14 +116,29 @@ SEARCH_ENGINES = {
    "tavily": tavily_search
 }
 # tavily的解析
 # def search_result2docs_tavily(search_results) -> List[Document]:
 #     docs = []
 #     for result in search_results:
 #         doc = Document(
 #             page_content=result["content"] if "content" in result.keys() else "",
 #             metadata={
 #                 "source": result["url"] if "url" in result.keys() else "",
 #                 "filename": result["title"] if "title" in result.keys() else "",
 #             },
 #         )
 #         docs.append(doc)
 #     return docs
-def search_result2docs(search_results) -> List[Document]:
+def search_result2docs(search_results, engine_name) -> List[Document]:
    docs = []
    page_contents_key = "snippet" if engine_name != "tavily" else "content"
    metadata_key = "link" if engine_name != "tavily" else "url"
    for result in search_results:
        doc = Document(
-            page_content=result["content"] if "content" in result.keys() else "",
+            page_content=result[page_contents_key] if page_contents_key in result.keys() else "",
            metadata={
-                "source": result["url"] if "url" in result.keys() else "",
+                "source": result[metadata_key] if metadata_key in result.keys() else "",
                "filename": result["title"] if "title" in result.keys() else "",
            },
        )
@ -141,7 +156,8 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
    results = search_engine_use(
        text=query, config=config["search_engine_config"][engine_name], top_k=top_k
    )
-    docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()]
+
    docs = [x for x in search_result2docs(results, engine_name) if x.page_content and x.page_content.strip()]
    print(f"docs: {docs}")
    return {"docs": docs, "search_engine": engine_name}