dev:search_result2doc接口根据引擎名称自动配置

This commit is contained in:
GuanYuankai 2025-03-04 16:15:09 +08:00
parent 04db85f02d
commit 34dc4f2c7f
2 changed files with 87 additions and 71 deletions

View File

@ -2,7 +2,7 @@
# 默认选用的 LLM 名称 # 默认选用的 LLM 名称
DEFAULT_LLM_MODEL: qwen2-instruct DEFAULT_LLM_MODEL: qwen2.5-instruct
# 默认选用的 Embedding 名称 # 默认选用的 Embedding 名称
DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5 DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5
@ -112,78 +112,78 @@ LLM_MODEL_CONFIG:
MODEL_PLATFORMS: MODEL_PLATFORMS:
- platform_name: xinference - platform_name: xinference
platform_type: xinference platform_type: xinference
api_base_url: http://127.0.0.1:9997/v1 api_base_url: http://192.168.0.21:9997/v1
api_key: EMPTY api_key: EMPTY
api_proxy: '' api_proxy: ''
api_concurrencies: 5 api_concurrencies: 5
auto_detect_model: true auto_detect_model: true
llm_models: [] llm_models: [qwen2.5-instruct]
embed_models: [] embed_models: [bge-large-zh-v1.5]
text2image_models: []
image2text_models: []
rerank_models: [bge-reranker-large]
speech2text_models: []
text2speech_models: []
- platform_name: ollama
platform_type: ollama
api_base_url: http://127.0.0.1:11434/v1
api_key: EMPTY
api_proxy: ''
api_concurrencies: 5
auto_detect_model: false
llm_models:
- qwen:7b
- qwen2:7b
embed_models:
- quentinz/bge-large-zh-v1.5
text2image_models: []
image2text_models: []
rerank_models: []
speech2text_models: []
text2speech_models: []
- platform_name: oneapi
platform_type: oneapi
api_base_url: http://127.0.0.1:3000/v1
api_key: sk-
api_proxy: ''
api_concurrencies: 5
auto_detect_model: false
llm_models:
- chatglm_pro
- chatglm_turbo
- chatglm_std
- chatglm_lite
- qwen-turbo
- qwen-plus
- qwen-max
- qwen-max-longcontext
- ERNIE-Bot
- ERNIE-Bot-turbo
- ERNIE-Bot-4
- SparkDesk
embed_models:
- text-embedding-v1
- Embedding-V1
text2image_models: []
image2text_models: []
rerank_models: []
speech2text_models: []
text2speech_models: []
- platform_name: openai
platform_type: openai
api_base_url: https://api.openai.com/v1
api_key: sk-proj-
api_proxy: ''
api_concurrencies: 5
auto_detect_model: false
llm_models:
- gpt-4o
- gpt-3.5-turbo
embed_models:
- text-embedding-3-small
- text-embedding-3-large
text2image_models: [] text2image_models: []
image2text_models: [] image2text_models: []
rerank_models: [] rerank_models: []
speech2text_models: [] speech2text_models: []
text2speech_models: [] text2speech_models: []
# - platform_name: ollama
# platform_type: ollama
# api_base_url: http://127.0.0.1:11434/v1
# api_key: EMPTY
# api_proxy: ''
# api_concurrencies: 5
# auto_detect_model: false
# llm_models:
# - qwen:7b
# - qwen2:7b
# embed_models:
# - quentinz/bge-large-zh-v1.5
# text2image_models: []
# image2text_models: []
# rerank_models: []
# speech2text_models: []
# text2speech_models: []
# - platform_name: oneapi
# platform_type: oneapi
# api_base_url: http://127.0.0.1:3000/v1
# api_key: sk-
# api_proxy: ''
# api_concurrencies: 5
# auto_detect_model: false
# llm_models:
# - chatglm_pro
# - chatglm_turbo
# - chatglm_std
# - chatglm_lite
# - qwen-turbo
# - qwen-plus
# - qwen-max
# - qwen-max-longcontext
# - ERNIE-Bot
# - ERNIE-Bot-turbo
# - ERNIE-Bot-4
# - SparkDesk
# embed_models:
# - text-embedding-v1
# - Embedding-V1
# text2image_models: []
# image2text_models: []
# rerank_models: []
# speech2text_models: []
# text2speech_models: []
# - platform_name: openai
# platform_type: openai
# api_base_url: https://api.openai.com/v1
# api_key: sk-proj-
# api_proxy: ''
# api_concurrencies: 5
# auto_detect_model: false
# llm_models:
# - gpt-4o
# - gpt-3.5-turbo
# embed_models:
# - text-embedding-3-small
# - text-embedding-3-large
# text2image_models: []
# image2text_models: []
# rerank_models: []
# speech2text_models: []
# text2speech_models: []

View File

@ -116,14 +116,29 @@ SEARCH_ENGINES = {
"tavily": tavily_search "tavily": tavily_search
} }
# tavily的解析
# def search_result2docs_tavily(search_results) -> List[Document]:
# docs = []
# for result in search_results:
# doc = Document(
# page_content=result["content"] if "content" in result.keys() else "",
# metadata={
# "source": result["url"] if "url" in result.keys() else "",
# "filename": result["title"] if "title" in result.keys() else "",
# },
# )
# docs.append(doc)
# return docs
def search_result2docs(search_results) -> List[Document]: def search_result2docs(search_results, engine_name) -> List[Document]:
docs = [] docs = []
page_contents_key = "snippet" if engine_name != "tavily" else "content"
metadata_key = "link" if engine_name != "tavily" else "url"
for result in search_results: for result in search_results:
doc = Document( doc = Document(
page_content=result["content"] if "content" in result.keys() else "", page_content=result[page_contents_key] if page_contents_key in result.keys() else "",
metadata={ metadata={
"source": result["url"] if "url" in result.keys() else "", "source": result[metadata_key] if metadata_key in result.keys() else "",
"filename": result["title"] if "title" in result.keys() else "", "filename": result["title"] if "title" in result.keys() else "",
}, },
) )
@ -141,7 +156,8 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
results = search_engine_use( results = search_engine_use(
text=query, config=config["search_engine_config"][engine_name], top_k=top_k text=query, config=config["search_engine_config"][engine_name], top_k=top_k
) )
docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()]
docs = [x for x in search_result2docs(results, engine_name) if x.page_content and x.page_content.strip()]
print(f"docs: {docs}") print(f"docs: {docs}")
return {"docs": docs, "search_engine": engine_name} return {"docs": docs, "search_engine": engine_name}