dev:search_result2doc接口根据引擎名称自动配置
This commit is contained in:
parent
04db85f02d
commit
34dc4f2c7f
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
|
|
||||||
# 默认选用的 LLM 名称
|
# 默认选用的 LLM 名称
|
||||||
DEFAULT_LLM_MODEL: qwen2-instruct
|
DEFAULT_LLM_MODEL: qwen2.5-instruct
|
||||||
|
|
||||||
# 默认选用的 Embedding 名称
|
# 默认选用的 Embedding 名称
|
||||||
DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5
|
DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5
|
||||||
|
|
@ -112,78 +112,78 @@ LLM_MODEL_CONFIG:
|
||||||
MODEL_PLATFORMS:
|
MODEL_PLATFORMS:
|
||||||
- platform_name: xinference
|
- platform_name: xinference
|
||||||
platform_type: xinference
|
platform_type: xinference
|
||||||
api_base_url: http://127.0.0.1:9997/v1
|
api_base_url: http://192.168.0.21:9997/v1
|
||||||
api_key: EMPTY
|
api_key: EMPTY
|
||||||
api_proxy: ''
|
api_proxy: ''
|
||||||
api_concurrencies: 5
|
api_concurrencies: 5
|
||||||
auto_detect_model: true
|
auto_detect_model: true
|
||||||
llm_models: []
|
llm_models: [qwen2.5-instruct]
|
||||||
embed_models: []
|
embed_models: [bge-large-zh-v1.5]
|
||||||
text2image_models: []
|
|
||||||
image2text_models: []
|
|
||||||
rerank_models: [bge-reranker-large]
|
|
||||||
speech2text_models: []
|
|
||||||
text2speech_models: []
|
|
||||||
- platform_name: ollama
|
|
||||||
platform_type: ollama
|
|
||||||
api_base_url: http://127.0.0.1:11434/v1
|
|
||||||
api_key: EMPTY
|
|
||||||
api_proxy: ''
|
|
||||||
api_concurrencies: 5
|
|
||||||
auto_detect_model: false
|
|
||||||
llm_models:
|
|
||||||
- qwen:7b
|
|
||||||
- qwen2:7b
|
|
||||||
embed_models:
|
|
||||||
- quentinz/bge-large-zh-v1.5
|
|
||||||
text2image_models: []
|
|
||||||
image2text_models: []
|
|
||||||
rerank_models: []
|
|
||||||
speech2text_models: []
|
|
||||||
text2speech_models: []
|
|
||||||
- platform_name: oneapi
|
|
||||||
platform_type: oneapi
|
|
||||||
api_base_url: http://127.0.0.1:3000/v1
|
|
||||||
api_key: sk-
|
|
||||||
api_proxy: ''
|
|
||||||
api_concurrencies: 5
|
|
||||||
auto_detect_model: false
|
|
||||||
llm_models:
|
|
||||||
- chatglm_pro
|
|
||||||
- chatglm_turbo
|
|
||||||
- chatglm_std
|
|
||||||
- chatglm_lite
|
|
||||||
- qwen-turbo
|
|
||||||
- qwen-plus
|
|
||||||
- qwen-max
|
|
||||||
- qwen-max-longcontext
|
|
||||||
- ERNIE-Bot
|
|
||||||
- ERNIE-Bot-turbo
|
|
||||||
- ERNIE-Bot-4
|
|
||||||
- SparkDesk
|
|
||||||
embed_models:
|
|
||||||
- text-embedding-v1
|
|
||||||
- Embedding-V1
|
|
||||||
text2image_models: []
|
|
||||||
image2text_models: []
|
|
||||||
rerank_models: []
|
|
||||||
speech2text_models: []
|
|
||||||
text2speech_models: []
|
|
||||||
- platform_name: openai
|
|
||||||
platform_type: openai
|
|
||||||
api_base_url: https://api.openai.com/v1
|
|
||||||
api_key: sk-proj-
|
|
||||||
api_proxy: ''
|
|
||||||
api_concurrencies: 5
|
|
||||||
auto_detect_model: false
|
|
||||||
llm_models:
|
|
||||||
- gpt-4o
|
|
||||||
- gpt-3.5-turbo
|
|
||||||
embed_models:
|
|
||||||
- text-embedding-3-small
|
|
||||||
- text-embedding-3-large
|
|
||||||
text2image_models: []
|
text2image_models: []
|
||||||
image2text_models: []
|
image2text_models: []
|
||||||
rerank_models: []
|
rerank_models: []
|
||||||
speech2text_models: []
|
speech2text_models: []
|
||||||
text2speech_models: []
|
text2speech_models: []
|
||||||
|
# - platform_name: ollama
|
||||||
|
# platform_type: ollama
|
||||||
|
# api_base_url: http://127.0.0.1:11434/v1
|
||||||
|
# api_key: EMPTY
|
||||||
|
# api_proxy: ''
|
||||||
|
# api_concurrencies: 5
|
||||||
|
# auto_detect_model: false
|
||||||
|
# llm_models:
|
||||||
|
# - qwen:7b
|
||||||
|
# - qwen2:7b
|
||||||
|
# embed_models:
|
||||||
|
# - quentinz/bge-large-zh-v1.5
|
||||||
|
# text2image_models: []
|
||||||
|
# image2text_models: []
|
||||||
|
# rerank_models: []
|
||||||
|
# speech2text_models: []
|
||||||
|
# text2speech_models: []
|
||||||
|
# - platform_name: oneapi
|
||||||
|
# platform_type: oneapi
|
||||||
|
# api_base_url: http://127.0.0.1:3000/v1
|
||||||
|
# api_key: sk-
|
||||||
|
# api_proxy: ''
|
||||||
|
# api_concurrencies: 5
|
||||||
|
# auto_detect_model: false
|
||||||
|
# llm_models:
|
||||||
|
# - chatglm_pro
|
||||||
|
# - chatglm_turbo
|
||||||
|
# - chatglm_std
|
||||||
|
# - chatglm_lite
|
||||||
|
# - qwen-turbo
|
||||||
|
# - qwen-plus
|
||||||
|
# - qwen-max
|
||||||
|
# - qwen-max-longcontext
|
||||||
|
# - ERNIE-Bot
|
||||||
|
# - ERNIE-Bot-turbo
|
||||||
|
# - ERNIE-Bot-4
|
||||||
|
# - SparkDesk
|
||||||
|
# embed_models:
|
||||||
|
# - text-embedding-v1
|
||||||
|
# - Embedding-V1
|
||||||
|
# text2image_models: []
|
||||||
|
# image2text_models: []
|
||||||
|
# rerank_models: []
|
||||||
|
# speech2text_models: []
|
||||||
|
# text2speech_models: []
|
||||||
|
# - platform_name: openai
|
||||||
|
# platform_type: openai
|
||||||
|
# api_base_url: https://api.openai.com/v1
|
||||||
|
# api_key: sk-proj-
|
||||||
|
# api_proxy: ''
|
||||||
|
# api_concurrencies: 5
|
||||||
|
# auto_detect_model: false
|
||||||
|
# llm_models:
|
||||||
|
# - gpt-4o
|
||||||
|
# - gpt-3.5-turbo
|
||||||
|
# embed_models:
|
||||||
|
# - text-embedding-3-small
|
||||||
|
# - text-embedding-3-large
|
||||||
|
# text2image_models: []
|
||||||
|
# image2text_models: []
|
||||||
|
# rerank_models: []
|
||||||
|
# speech2text_models: []
|
||||||
|
# text2speech_models: []
|
||||||
|
|
|
||||||
|
|
@ -116,14 +116,29 @@ SEARCH_ENGINES = {
|
||||||
"tavily": tavily_search
|
"tavily": tavily_search
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# tavily的解析
|
||||||
|
# def search_result2docs_tavily(search_results) -> List[Document]:
|
||||||
|
# docs = []
|
||||||
|
# for result in search_results:
|
||||||
|
# doc = Document(
|
||||||
|
# page_content=result["content"] if "content" in result.keys() else "",
|
||||||
|
# metadata={
|
||||||
|
# "source": result["url"] if "url" in result.keys() else "",
|
||||||
|
# "filename": result["title"] if "title" in result.keys() else "",
|
||||||
|
# },
|
||||||
|
# )
|
||||||
|
# docs.append(doc)
|
||||||
|
# return docs
|
||||||
|
|
||||||
def search_result2docs(search_results) -> List[Document]:
|
def search_result2docs(search_results, engine_name) -> List[Document]:
|
||||||
docs = []
|
docs = []
|
||||||
|
page_contents_key = "snippet" if engine_name != "tavily" else "content"
|
||||||
|
metadata_key = "link" if engine_name != "tavily" else "url"
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=result["content"] if "content" in result.keys() else "",
|
page_content=result[page_contents_key] if page_contents_key in result.keys() else "",
|
||||||
metadata={
|
metadata={
|
||||||
"source": result["url"] if "url" in result.keys() else "",
|
"source": result[metadata_key] if metadata_key in result.keys() else "",
|
||||||
"filename": result["title"] if "title" in result.keys() else "",
|
"filename": result["title"] if "title" in result.keys() else "",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
@ -141,7 +156,8 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
|
||||||
results = search_engine_use(
|
results = search_engine_use(
|
||||||
text=query, config=config["search_engine_config"][engine_name], top_k=top_k
|
text=query, config=config["search_engine_config"][engine_name], top_k=top_k
|
||||||
)
|
)
|
||||||
docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()]
|
|
||||||
|
docs = [x for x in search_result2docs(results, engine_name) if x.page_content and x.page_content.strip()]
|
||||||
print(f"docs: {docs}")
|
print(f"docs: {docs}")
|
||||||
return {"docs": docs, "search_engine": engine_name}
|
return {"docs": docs, "search_engine": engine_name}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue