dev:search_result2doc接口根据引擎名称自动配置

2025-03-04 16:15:09 +08:00 · 2025-03-04 16:15:09 +08:00 · 34dc4f2c7f
parent 04db85f02d
commit 34dc4f2c7f
2 changed files with 87 additions and 71 deletions
--- a/chatchat_data/model_settings.yaml
+++ b/chatchat_data/model_settings.yaml
@ -2,7 +2,7 @@


 # 默认选用的 LLM 名称
-DEFAULT_LLM_MODEL: qwen2-instruct
+DEFAULT_LLM_MODEL: qwen2.5-instruct

 # 默认选用的 Embedding 名称
 DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5
@ -112,78 +112,78 @@ LLM_MODEL_CONFIG:
 MODEL_PLATFORMS:
  - platform_name: xinference
    platform_type: xinference
-    api_base_url: http://127.0.0.1:9997/v1
+    api_base_url: http://192.168.0.21:9997/v1
    api_key: EMPTY
    api_proxy: ''
    api_concurrencies: 5
    auto_detect_model: true
-    llm_models: []
-    embed_models: []
-    text2image_models: []
-    image2text_models: []
-    rerank_models: [bge-reranker-large]
-    speech2text_models: []
-    text2speech_models: []
-  - platform_name: ollama
-    platform_type: ollama
-    api_base_url: http://127.0.0.1:11434/v1
-    api_key: EMPTY
-    api_proxy: ''
-    api_concurrencies: 5
-    auto_detect_model: false
-    llm_models:
-      - qwen:7b
-      - qwen2:7b
-    embed_models:
-      - quentinz/bge-large-zh-v1.5
-    text2image_models: []
-    image2text_models: []
-    rerank_models: []
-    speech2text_models: []
-    text2speech_models: []
-  - platform_name: oneapi
-    platform_type: oneapi
-    api_base_url: http://127.0.0.1:3000/v1
-    api_key: sk-
-    api_proxy: ''
-    api_concurrencies: 5
-    auto_detect_model: false
-    llm_models:
-      - chatglm_pro
-      - chatglm_turbo
-      - chatglm_std
-      - chatglm_lite
-      - qwen-turbo
-      - qwen-plus
-      - qwen-max
-      - qwen-max-longcontext
-      - ERNIE-Bot
-      - ERNIE-Bot-turbo
-      - ERNIE-Bot-4
-      - SparkDesk
-    embed_models:
-      - text-embedding-v1
-      - Embedding-V1
-    text2image_models: []
-    image2text_models: []
-    rerank_models: []
-    speech2text_models: []
-    text2speech_models: []
-  - platform_name: openai
-    platform_type: openai
-    api_base_url: https://api.openai.com/v1
-    api_key: sk-proj-
-    api_proxy: ''
-    api_concurrencies: 5
-    auto_detect_model: false
-    llm_models:
-      - gpt-4o
-      - gpt-3.5-turbo
-    embed_models:
-      - text-embedding-3-small
-      - text-embedding-3-large
+    llm_models: [qwen2.5-instruct]
+    embed_models: [bge-large-zh-v1.5]
    text2image_models: []
    image2text_models: []
    rerank_models: []
    speech2text_models: []
    text2speech_models: []
+#  - platform_name: ollama
+#    platform_type: ollama
+#    api_base_url: http://127.0.0.1:11434/v1
+#    api_key: EMPTY
+#    api_proxy: ''
+#    api_concurrencies: 5
+#    auto_detect_model: false
+#    llm_models:
+#      - qwen:7b
+#      - qwen2:7b
+#    embed_models:
+#      - quentinz/bge-large-zh-v1.5
+#    text2image_models: []
+#    image2text_models: []
+#    rerank_models: []
+#    speech2text_models: []
+#    text2speech_models: []
+#  - platform_name: oneapi
+#    platform_type: oneapi
+#    api_base_url: http://127.0.0.1:3000/v1
+#    api_key: sk-
+#    api_proxy: ''
+#    api_concurrencies: 5
+#    auto_detect_model: false
+#    llm_models:
+#      - chatglm_pro
+#      - chatglm_turbo
+#      - chatglm_std
+#      - chatglm_lite
+#      - qwen-turbo
+#      - qwen-plus
+#      - qwen-max
+#      - qwen-max-longcontext
+#      - ERNIE-Bot
+#      - ERNIE-Bot-turbo
+#      - ERNIE-Bot-4
+#      - SparkDesk
+#    embed_models:
+#      - text-embedding-v1
+#      - Embedding-V1
+#    text2image_models: []
+#    image2text_models: []
+#    rerank_models: []
+#    speech2text_models: []
+#    text2speech_models: []
+#  - platform_name: openai
+#    platform_type: openai
+#    api_base_url: https://api.openai.com/v1
+#    api_key: sk-proj-
+#    api_proxy: ''
+#    api_concurrencies: 5
+#    auto_detect_model: false
+#    llm_models:
+#      - gpt-4o
+#      - gpt-3.5-turbo
+#    embed_models:
+#      - text-embedding-3-small
+#      - text-embedding-3-large
+#    text2image_models: []
+#    image2text_models: []
+#    rerank_models: []
+#    speech2text_models: []
+#    text2speech_models: []
--- a/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py
+++ b/libs/chatchat-server/chatchat/server/agent/tools_factory/search_internet.py
@ -116,14 +116,29 @@ SEARCH_ENGINES = {
    "tavily": tavily_search
 }

+# tavily的解析
+# def search_result2docs_tavily(search_results) -> List[Document]:
+#     docs = []
+#     for result in search_results:
+#         doc = Document(
+#             page_content=result["content"] if "content" in result.keys() else "",
+#             metadata={
+#                 "source": result["url"] if "url" in result.keys() else "",
+#                 "filename": result["title"] if "title" in result.keys() else "",
+#             },
+#         )
+#         docs.append(doc)
+#     return docs

-def search_result2docs(search_results) -> List[Document]:
+def search_result2docs(search_results, engine_name) -> List[Document]:
    docs = []
+    page_contents_key = "snippet" if engine_name != "tavily" else "content"
+    metadata_key = "link" if engine_name != "tavily" else "url"
    for result in search_results:
        doc = Document(
-            page_content=result["content"] if "content" in result.keys() else "",
+            page_content=result[page_contents_key] if page_contents_key in result.keys() else "",
            metadata={
-                "source": result["url"] if "url" in result.keys() else "",
+                "source": result[metadata_key] if metadata_key in result.keys() else "",
                "filename": result["title"] if "title" in result.keys() else "",
            },
        )
@ -141,7 +156,8 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
    results = search_engine_use(
        text=query, config=config["search_engine_config"][engine_name], top_k=top_k
    )
-    docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()]
+
+    docs = [x for x in search_result2docs(results, engine_name) if x.page_content and x.page_content.strip()]
    print(f"docs: {docs}")
    return {"docs": docs, "search_engine": engine_name}