Compare commits
23 Commits
dev/search
...
main
| Author | SHA1 | Date |
|---|---|---|
|
|
b577a3ad18 | |
|
|
122033131b | |
|
|
468b3116cd | |
|
|
43c82ee797 | |
|
|
5c7566bc69 | |
|
|
20228503a6 | |
|
|
cc8564ccca | |
|
|
8cd80dcd9c | |
|
|
718766abb8 | |
|
|
c6bb3b4ffd | |
|
|
c897a1722f | |
|
|
28c87ac7a7 | |
|
|
bad876fc98 | |
|
|
0e3a9d8dd0 | |
|
|
c49b048976 | |
|
|
34dc4f2c7f | |
|
|
04db85f02d | |
|
|
42315ab3da | |
|
|
dfa583d344 | |
|
|
1ac1ab0ced | |
|
|
ac368b3814 | |
|
|
0bff316575 | |
|
|
00f56aa23c |
|
|
@ -0,0 +1,40 @@
|
|||
*.csv
|
||||
*.yaml
|
||||
*.xlsx
|
||||
*.pdf
|
||||
*.txt
|
||||
*.log
|
||||
*.pyc
|
||||
/chatchat_data.bak
|
||||
/chatchat_data/data/knowledge_base/samples
|
||||
/chatchat_data
|
||||
.idea/inspectionProfiles/profiles_settings.xml
|
||||
.idea/Langchain-Chatchat.iml
|
||||
.idea/misc.xml
|
||||
.idea/modules.xml
|
||||
.idea/prettier.xml
|
||||
.idea/vcs.xml
|
||||
.idea/inspectionProfiles/profiles_settings.xml
|
||||
.idea/Langchain-Chatchat.iml
|
||||
.idea/modules.xml
|
||||
.idea/prettier.xml
|
||||
.idea/vcs.xml
|
||||
/.idea
|
||||
/test_tool
|
||||
chatchat_data/tool_settings.yaml
|
||||
chatchat_data/prompt_settings.yaml
|
||||
chatchat_data/model_settings.yaml
|
||||
chatchat_data/basic_settings.yaml
|
||||
localconfig/data/knowledge_base/samples/content/分布式训练技术原理.md
|
||||
localconfig/data/knowledge_base/samples/content/大模型应用技术原理.md
|
||||
localconfig/data/knowledge_base/samples/content/大模型技术栈-实战与应用.md
|
||||
localconfig/data/knowledge_base/samples/content/大模型技术栈-算法与原理.md
|
||||
localconfig/data/knowledge_base/samples/content/大模型指令对齐训练原理.md
|
||||
localconfig/data/knowledge_base/samples/content/大模型推理优化策略.md
|
||||
localconfig/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss
|
||||
localconfig/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl
|
||||
localconfig/data/knowledge_base/info.db
|
||||
chatchat_data/basic_settings.yaml
|
||||
chatchat_data/model_settings.yaml
|
||||
chatchat_data/prompt_settings.yaml
|
||||
chatchat_data/tool_settings.yaml
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<component name="ProjectDictionaryState">
|
||||
<dictionary name="Guan">
|
||||
<words>
|
||||
<w>aggrid</w>
|
||||
</words>
|
||||
</dictionary>
|
||||
</component>
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
|
||||
# 默认选用的 LLM 名称
|
||||
DEFAULT_LLM_MODEL: qwen2-instruct
|
||||
DEFAULT_LLM_MODEL: qwen2.5-instruct
|
||||
|
||||
# 默认选用的 Embedding 名称
|
||||
DEFAULT_EMBEDDING_MODEL: bge-large-zh-v1.5
|
||||
|
|
@ -112,78 +112,78 @@ LLM_MODEL_CONFIG:
|
|||
MODEL_PLATFORMS:
|
||||
- platform_name: xinference
|
||||
platform_type: xinference
|
||||
api_base_url: http://127.0.0.1:9997/v1
|
||||
api_base_url: http://192.168.0.21:9997/v1
|
||||
api_key: EMPTY
|
||||
api_proxy: ''
|
||||
api_concurrencies: 5
|
||||
auto_detect_model: true
|
||||
llm_models: []
|
||||
embed_models: []
|
||||
text2image_models: []
|
||||
image2text_models: []
|
||||
rerank_models: [bge-reranker-large]
|
||||
speech2text_models: []
|
||||
text2speech_models: []
|
||||
- platform_name: ollama
|
||||
platform_type: ollama
|
||||
api_base_url: http://127.0.0.1:11434/v1
|
||||
api_key: EMPTY
|
||||
api_proxy: ''
|
||||
api_concurrencies: 5
|
||||
auto_detect_model: false
|
||||
llm_models:
|
||||
- qwen:7b
|
||||
- qwen2:7b
|
||||
embed_models:
|
||||
- quentinz/bge-large-zh-v1.5
|
||||
text2image_models: []
|
||||
image2text_models: []
|
||||
rerank_models: []
|
||||
speech2text_models: []
|
||||
text2speech_models: []
|
||||
- platform_name: oneapi
|
||||
platform_type: oneapi
|
||||
api_base_url: http://127.0.0.1:3000/v1
|
||||
api_key: sk-
|
||||
api_proxy: ''
|
||||
api_concurrencies: 5
|
||||
auto_detect_model: false
|
||||
llm_models:
|
||||
- chatglm_pro
|
||||
- chatglm_turbo
|
||||
- chatglm_std
|
||||
- chatglm_lite
|
||||
- qwen-turbo
|
||||
- qwen-plus
|
||||
- qwen-max
|
||||
- qwen-max-longcontext
|
||||
- ERNIE-Bot
|
||||
- ERNIE-Bot-turbo
|
||||
- ERNIE-Bot-4
|
||||
- SparkDesk
|
||||
embed_models:
|
||||
- text-embedding-v1
|
||||
- Embedding-V1
|
||||
text2image_models: []
|
||||
image2text_models: []
|
||||
rerank_models: []
|
||||
speech2text_models: []
|
||||
text2speech_models: []
|
||||
- platform_name: openai
|
||||
platform_type: openai
|
||||
api_base_url: https://api.openai.com/v1
|
||||
api_key: sk-proj-
|
||||
api_proxy: ''
|
||||
api_concurrencies: 5
|
||||
auto_detect_model: false
|
||||
llm_models:
|
||||
- gpt-4o
|
||||
- gpt-3.5-turbo
|
||||
embed_models:
|
||||
- text-embedding-3-small
|
||||
- text-embedding-3-large
|
||||
llm_models: [qwen2.5-instruct]
|
||||
embed_models: [bge-large-zh-v1.5]
|
||||
text2image_models: []
|
||||
image2text_models: []
|
||||
rerank_models: []
|
||||
speech2text_models: []
|
||||
text2speech_models: []
|
||||
# - platform_name: ollama
|
||||
# platform_type: ollama
|
||||
# api_base_url: http://127.0.0.1:11434/v1
|
||||
# api_key: EMPTY
|
||||
# api_proxy: ''
|
||||
# api_concurrencies: 5
|
||||
# auto_detect_model: false
|
||||
# llm_models:
|
||||
# - qwen:7b
|
||||
# - qwen2:7b
|
||||
# embed_models:
|
||||
# - quentinz/bge-large-zh-v1.5
|
||||
# text2image_models: []
|
||||
# image2text_models: []
|
||||
# rerank_models: []
|
||||
# speech2text_models: []
|
||||
# text2speech_models: []
|
||||
# - platform_name: oneapi
|
||||
# platform_type: oneapi
|
||||
# api_base_url: http://127.0.0.1:3000/v1
|
||||
# api_key: sk-
|
||||
# api_proxy: ''
|
||||
# api_concurrencies: 5
|
||||
# auto_detect_model: false
|
||||
# llm_models:
|
||||
# - chatglm_pro
|
||||
# - chatglm_turbo
|
||||
# - chatglm_std
|
||||
# - chatglm_lite
|
||||
# - qwen-turbo
|
||||
# - qwen-plus
|
||||
# - qwen-max
|
||||
# - qwen-max-longcontext
|
||||
# - ERNIE-Bot
|
||||
# - ERNIE-Bot-turbo
|
||||
# - ERNIE-Bot-4
|
||||
# - SparkDesk
|
||||
# embed_models:
|
||||
# - text-embedding-v1
|
||||
# - Embedding-V1
|
||||
# text2image_models: []
|
||||
# image2text_models: []
|
||||
# rerank_models: []
|
||||
# speech2text_models: []
|
||||
# text2speech_models: []
|
||||
# - platform_name: openai
|
||||
# platform_type: openai
|
||||
# api_base_url: https://api.openai.com/v1
|
||||
# api_key: sk-proj-
|
||||
# api_proxy: ''
|
||||
# api_concurrencies: 5
|
||||
# auto_detect_model: false
|
||||
# llm_models:
|
||||
# - gpt-4o
|
||||
# - gpt-3.5-turbo
|
||||
# embed_models:
|
||||
# - text-embedding-3-small
|
||||
# - text-embedding-3-large
|
||||
# text2image_models: []
|
||||
# image2text_models: []
|
||||
# rerank_models: []
|
||||
# speech2text_models: []
|
||||
# text2speech_models: []
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ search_local_knowledgebase:
|
|||
# 搜索引擎工具配置项。推荐自己部署 searx 搜索引擎,国内使用最方便。
|
||||
search_internet:
|
||||
use: false
|
||||
search_engine_name: searx
|
||||
search_engine_name: zhipu_search
|
||||
search_engine_config:
|
||||
bing:
|
||||
bing_search_url: https://api.bing.microsoft.com/v7.0/search
|
||||
|
|
@ -30,6 +30,14 @@ search_internet:
|
|||
engines: []
|
||||
categories: []
|
||||
language: zh-CN
|
||||
tavily:
|
||||
tavily_api_key: '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'
|
||||
include_answer: true
|
||||
search_depth: advanced
|
||||
include_raw_content: True
|
||||
max_results: 1
|
||||
zhipu_search:
|
||||
zhipu_api_key: 'e2bdc39618624fd782ebcd721185645c.pcvcrTPFT69Jda8B'
|
||||
top_k: 5
|
||||
verbose: Origin
|
||||
conclude_prompt: "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。
|
||||
|
|
|
|||
|
|
@ -13,6 +13,6 @@ print(f"cuDNN 版本: {cudnn_version}")
|
|||
|
||||
# 检查是否可以访问 CUDA
|
||||
if torch.cuda.is_available():
|
||||
print("CUDA is available. GPU name:", torch.cuda.get_device_name(0))
|
||||
print("pip install sentence-transformers -i https://pypi.mirrors.ustc.edu.cn/simpleCUDA is available. GPU name:", torch.cuda.get_device_name(0))
|
||||
else:
|
||||
print("CUDA is not available. Please check your installation.")
|
||||
|
|
@ -1,5 +1,8 @@
|
|||
import json
|
||||
import uuid
|
||||
from typing import Dict, List
|
||||
|
||||
import requests
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain.utilities.bing_search import BingSearchAPIWrapper
|
||||
|
|
@ -7,15 +10,21 @@ from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
|
|||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from markdownify import markdownify
|
||||
from strsimpy.normalized_levenshtein import NormalizedLevenshtein
|
||||
from langchain_community.tools.tavily_search import TavilySearchResults
|
||||
import os
|
||||
|
||||
from chatchat.settings import Settings
|
||||
from chatchat.server.pydantic_v1 import Field
|
||||
from chatchat.server.utils import get_tool_config
|
||||
from chatchat.utils import build_logger
|
||||
# from tavily import TavilyClient
|
||||
|
||||
from .tools_registry import BaseToolOutput, regist_tool, format_context
|
||||
|
||||
logger = build_logger()
|
||||
|
||||
def searx_search(text ,config, top_k: int):
|
||||
|
||||
def searx_search(text, config, top_k: int):
|
||||
print(f"searx_search: text: {text},config:{config},top_k:{top_k}")
|
||||
search = SearxSearchWrapper(
|
||||
searx_host=config["host"],
|
||||
|
|
@ -26,7 +35,7 @@ def searx_search(text ,config, top_k: int):
|
|||
return search.results(text, top_k)
|
||||
|
||||
|
||||
def bing_search(text, config, top_k:int):
|
||||
def bing_search(text, config, top_k: int):
|
||||
search = BingSearchAPIWrapper(
|
||||
bing_subscription_key=config["bing_key"],
|
||||
bing_search_url=config["bing_search_url"],
|
||||
|
|
@ -34,7 +43,7 @@ def bing_search(text, config, top_k:int):
|
|||
return search.results(text, top_k)
|
||||
|
||||
|
||||
def duckduckgo_search(text, config, top_k:int):
|
||||
def duckduckgo_search(text, config, top_k: int):
|
||||
search = DuckDuckGoSearchAPIWrapper()
|
||||
return search.results(text, top_k)
|
||||
|
||||
|
|
@ -42,7 +51,7 @@ def duckduckgo_search(text, config, top_k:int):
|
|||
def metaphor_search(
|
||||
text: str,
|
||||
config: dict,
|
||||
top_k:int
|
||||
top_k: int
|
||||
) -> List[Dict]:
|
||||
from metaphor_python import Metaphor
|
||||
|
||||
|
|
@ -85,21 +94,77 @@ def metaphor_search(
|
|||
return docs
|
||||
|
||||
|
||||
def tavily_search(text, config, top_k):
|
||||
# 配置tavily api key
|
||||
os.environ["TAVILY_API_KEY"] = config["tavily_api_key"]
|
||||
# 初始化工具(配置参数)
|
||||
tavily_tool = TavilySearchResults(
|
||||
include_answer=config["include_answer"], # 关键参数:启用答案生成
|
||||
search_depth=config["search_depth"], # 必须使用高级搜索模式
|
||||
include_raw_content=config["include_raw_content"],
|
||||
max_results=config["max_results"]
|
||||
)
|
||||
|
||||
# 直接执行搜索
|
||||
raw_results = tavily_tool.run(text)
|
||||
search_results = [{k: v for k, v in item.items() if k != 'url'} for item in raw_results]
|
||||
|
||||
# print("=== 完整搜索返回值 ===")
|
||||
# print(search_results)
|
||||
return search_results
|
||||
|
||||
|
||||
def zhipu_search(text, config, top_k):
|
||||
api_key = config["zhipu_api_key"]
|
||||
endpoint = "https://open.bigmodel.cn/api/paas/v4/web_search"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
payload = {
|
||||
"search_engine": "Search-Pro", # 指定Web搜索专用模型
|
||||
"search_query": text
|
||||
}
|
||||
response = requests.post(endpoint, headers=headers, json=payload)
|
||||
result = response.json()
|
||||
print(f"================!! result: {result}")
|
||||
return result
|
||||
|
||||
|
||||
SEARCH_ENGINES = {
|
||||
"bing": bing_search,
|
||||
"duckduckgo": duckduckgo_search,
|
||||
"metaphor": metaphor_search,
|
||||
"searx": searx_search,
|
||||
"tavily": tavily_search,
|
||||
"zhipu_search": zhipu_search
|
||||
}
|
||||
|
||||
|
||||
def search_result2docs(search_results) -> List[Document]:
|
||||
def search_result2docs(search_results, engine_name, top_k) -> List[Document]:
|
||||
docs = []
|
||||
if engine_name == "zhipu_search":
|
||||
try:
|
||||
# search_results_json = json.loads(search_results)
|
||||
results = search_results["search_result"]
|
||||
except (KeyError, IndexError) as e:
|
||||
print(f"结构异常: {e}")
|
||||
results = []
|
||||
# 遍历并处理每个结果
|
||||
for item in results[:top_k]:
|
||||
doc = Document(
|
||||
page_content=item['content'],
|
||||
metadata={"link": item['link'], "title": item['title']}
|
||||
)
|
||||
docs.append(doc)
|
||||
return docs
|
||||
page_contents_key = "snippet" if engine_name != "tavily" else "content"
|
||||
metadata_key = "link" if engine_name != "tavily" else "url"
|
||||
for result in search_results:
|
||||
doc = Document(
|
||||
page_content=result["snippet"] if "snippet" in result.keys() else "",
|
||||
page_content=result[page_contents_key] if page_contents_key in result.keys() else "",
|
||||
metadata={
|
||||
"source": result["link"] if "link" in result.keys() else "",
|
||||
"source": result[metadata_key] if metadata_key in result.keys() else "",
|
||||
"filename": result["title"] if "title" in result.keys() else "",
|
||||
},
|
||||
)
|
||||
|
|
@ -107,7 +172,7 @@ def search_result2docs(search_results) -> List[Document]:
|
|||
return docs
|
||||
|
||||
|
||||
def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={}):
|
||||
def search_engine(query: str, top_k: int = 0, engine_name: str = "", config: dict = {}):
|
||||
config = config or get_tool_config("search_internet")
|
||||
if top_k <= 0:
|
||||
top_k = config.get("top_k", Settings.kb_settings.SEARCH_ENGINE_TOP_K)
|
||||
|
|
@ -117,12 +182,20 @@ def search_engine(query: str, top_k:int=0, engine_name: str="", config: dict={})
|
|||
results = search_engine_use(
|
||||
text=query, config=config["search_engine_config"][engine_name], top_k=top_k
|
||||
)
|
||||
docs = [x for x in search_result2docs(results) if x.page_content and x.page_content.strip()]
|
||||
|
||||
docs = [x for x in search_result2docs(results, engine_name, top_k) if x.page_content and x.page_content.strip()]
|
||||
print(f"len(docs): {len(docs)}")
|
||||
# print(f"docs:{docs}")
|
||||
# # print(f"docs: {docs[:150]}")
|
||||
return {"docs": docs, "search_engine": engine_name}
|
||||
|
||||
|
||||
@regist_tool(title="互联网搜索")
|
||||
def search_internet(query: str = Field(description="query for Internet search")):
|
||||
"""用这个工具实现获取世界、历史、实时新闻、或除电力系统之外的信息查询"""
|
||||
try:
|
||||
print(f"search_internet: query: {query}")
|
||||
return BaseToolOutput(search_engine(query=query), format=format_context)
|
||||
return BaseToolOutput(data=search_engine(query=query), format=format_context)
|
||||
except Exception as e:
|
||||
logger.error(f"未知错误: {str(e)}")
|
||||
return BaseToolOutput(f"搜索过程中发生未知错误,{str(e)}", format=format_context)
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ def format_context(self: BaseToolOutput) -> str:
|
|||
doc = DocumentWithVSId.parse_obj(doc)
|
||||
source_documents.append(doc.page_content)
|
||||
|
||||
print(f"format_context: doc.page_content: {doc.page_content}")
|
||||
# print(f"format_context: doc.page_content: {doc.page_content}")
|
||||
if len(source_documents) == 0:
|
||||
context = "没有找到相关文档,请更换关键词重试"
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ def weather_check(
|
|||
"- '今天':获取当前实时天气\n"
|
||||
"- '明天'/'后天':获取未来24/48小时预报\n"
|
||||
"- '未来X天':获取最多X天预报(如'未来3天'),X的抽取要符合客户意图\n"
|
||||
"- 不支持其他参数,如果是其他参数,则时间参数为None"
|
||||
"- 不支持其他参数,如果是其他参数,则时间参数为None\n"
|
||||
)
|
||||
):
|
||||
"""用这个工具获取指定地点和指定时间的天气"""
|
||||
|
|
@ -32,8 +32,7 @@ def weather_check(
|
|||
missing_params.append("日期参数")
|
||||
|
||||
if missing_params:
|
||||
return BaseToolOutput(
|
||||
error_message=f"缺少必要参数:{', '.join(missing_params)},请补充完整查询信息",
|
||||
return BaseToolOutput(data={"error_message": f"缺少必要参数:{', '.join(missing_params)},请补充完整查询信息"},
|
||||
require_additional_input=True
|
||||
)
|
||||
|
||||
|
|
@ -42,13 +41,13 @@ def weather_check(
|
|||
weather_type, number = parse_date_parameter(date)
|
||||
except ValueError as e:
|
||||
logging.error(f"日期参数解析失败: {str(e)}")
|
||||
return BaseToolOutput(str(e))
|
||||
return BaseToolOutput(data={"error_message": str(e)})
|
||||
|
||||
# 获取API配置
|
||||
tool_config = get_tool_config("weather_check")
|
||||
api_key = tool_config.get("api_key")
|
||||
if not api_key:
|
||||
return BaseToolOutput("API密钥未配置,请联系管理员")
|
||||
return BaseToolOutput(data={"error_message": "API密钥未配置,请联系管理员"})
|
||||
|
||||
# 根据天气类型调用API
|
||||
if weather_type == "daily":
|
||||
|
|
@ -56,7 +55,8 @@ def weather_check(
|
|||
elif weather_type == "future":
|
||||
return _get_future_weather(city, api_key, number)
|
||||
else:
|
||||
return BaseToolOutput("不支持的天气类型")
|
||||
return BaseToolOutput(data={"error_message": "不支持的天气类型"})
|
||||
|
||||
|
||||
def _get_current_weather(city: str, api_key: str) -> BaseToolOutput:
|
||||
"""获取当前实时天气"""
|
||||
|
|
@ -66,14 +66,15 @@ def _get_current_weather(city: str, api_key: str) -> BaseToolOutput:
|
|||
|
||||
if response.status_code != 200:
|
||||
logging.error(f"天气查询失败: {response.status_code}")
|
||||
return BaseToolOutput("天气查询API请求失败")
|
||||
return BaseToolOutput(data={"error_message": "天气查询API请求失败"})
|
||||
|
||||
data = response.json()
|
||||
weather = {
|
||||
"temperature": data["results"][0]["now"]["temperature"],
|
||||
"description": data["results"][0]["now"]["text"],
|
||||
}
|
||||
return BaseToolOutput(weather)
|
||||
return BaseToolOutput(data=weather)
|
||||
|
||||
|
||||
def _get_future_weather(city: str, api_key: str, days: int) -> BaseToolOutput:
|
||||
"""获取未来天气预报"""
|
||||
|
|
@ -115,9 +116,10 @@ def _get_future_weather(city: str, api_key: str, days: int) -> BaseToolOutput:
|
|||
"后天最高温度": daily_data[2]["high"],
|
||||
}
|
||||
else:
|
||||
return BaseToolOutput("不支持的天数参数")
|
||||
return BaseToolOutput(data={"error_message": "不支持的天数参数"})
|
||||
|
||||
return BaseToolOutput(data=weather)
|
||||
|
||||
return BaseToolOutput(weather)
|
||||
|
||||
def parse_date_parameter(date: str) -> tuple:
|
||||
"""解析日期参数,返回天气类型和天数"""
|
||||
|
|
@ -136,5 +138,6 @@ def parse_date_parameter(date: str) -> tuple:
|
|||
else:
|
||||
raise ValueError("不支持的日期参数")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
weather_check("合肥","明天")
|
||||
weather_check("合肥", "明天")
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ async def chat_completions(
|
|||
# import rich
|
||||
# rich.print(body)
|
||||
# 当调用本接口且 body 中没有传入 "max_tokens" 参数时, 默认使用配置中定义的值
|
||||
logger.info(f"body.model_config:{body.model_config},body.tools: {body.tools},body.messages:{body.messages}")
|
||||
# logger.info(f"body.model_config:{body.model_config},body.tools: {body.tools},body.messages:{body.messages}")
|
||||
if body.max_tokens in [None, 0]:
|
||||
body.max_tokens = Settings.model_settings.MAX_TOKENS
|
||||
|
||||
|
|
|
|||
|
|
@ -70,6 +70,9 @@ def list_files_from_folder(kb_name: str):
|
|||
for x in ["temp", "tmp", ".", "~$"]:
|
||||
if tail.startswith(x):
|
||||
return True
|
||||
if "_source.txt" in tail.lower() or "_split.txt" in tail.lower():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def process_entry(entry):
|
||||
|
|
@ -422,15 +425,15 @@ class KnowledgeFile:
|
|||
docs = zh_first_title_enhance(docs)
|
||||
docs = customize_zh_title_enhance(docs)
|
||||
|
||||
# i = 1
|
||||
# outputfile = file_name_without_extension + "_split.txt"
|
||||
# # 打开文件以写入模式
|
||||
# with open(outputfile, 'w') as file:
|
||||
# for doc in docs:
|
||||
# #print(f"**********切分段{i}:{doc}")
|
||||
# file.write(f"\n**********切分段{i}")
|
||||
# file.write(doc.page_content)
|
||||
# i = i+1
|
||||
i = 1
|
||||
outputfile = file_name_without_extension + "_split.txt"
|
||||
# 打开文件以写入模式
|
||||
with open(outputfile, 'w') as file:
|
||||
for doc in docs:
|
||||
#print(f"**********切分段{i}:{doc}")
|
||||
file.write(f"\n**********切分段{i}")
|
||||
file.write(doc.page_content)
|
||||
i = i+1
|
||||
|
||||
self.splited_docs = docs
|
||||
return self.splited_docs
|
||||
|
|
|
|||
|
|
@ -488,7 +488,7 @@ class ToolSettings(BaseFileSettings):
|
|||
|
||||
search_internet: dict = {
|
||||
"use": False,
|
||||
"search_engine_name": "duckduckgo",
|
||||
"search_engine_name": "zhipu_search",
|
||||
"search_engine_config": {
|
||||
"bing": {
|
||||
"bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
|
||||
|
|
@ -506,11 +506,21 @@ class ToolSettings(BaseFileSettings):
|
|||
"engines": [],
|
||||
"categories": [],
|
||||
"language": "zh-CN",
|
||||
},
|
||||
"tavily":{
|
||||
"tavily_api_key": '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',
|
||||
"include_answer": True,
|
||||
"search_depth": "advanced",
|
||||
"include_raw_content": True,
|
||||
"max_results": 1
|
||||
},
|
||||
"zhipu_search":{
|
||||
"zhipu_api_key": ""
|
||||
}
|
||||
},
|
||||
"top_k": 5,
|
||||
"top_k": 1,
|
||||
"verbose": "Origin",
|
||||
"conclude_prompt": "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 "
|
||||
"conclude_prompt": "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题,不得包含有重复的词汇或句子。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 "
|
||||
"</指令>\n<已知信息>{{ context }}</已知信息>\n"
|
||||
"<问题>\n"
|
||||
"{{ question }}\n"
|
||||
|
|
@ -650,7 +660,7 @@ class PromptSettings(BaseFileSettings):
|
|||
|
||||
rag: dict = {
|
||||
"default": (
|
||||
"【指令】根据已知信息,简洁和专业的来回答问题。"
|
||||
"【指令】根据已知信息,简洁和专业的来回答问题,不得包含有重复的词汇或句子。"
|
||||
"如果无法从中得到答案,请说 “根据已知信息无法回答该问题”,不允许在答案中添加编造成分,答案请使用中文。\n\n"
|
||||
"【已知信息】{{context}}\n\n"
|
||||
"【问题】{{question}}\n"
|
||||
|
|
@ -741,6 +751,8 @@ class PromptSettings(BaseFileSettings):
|
|||
"Begin!\n\n"
|
||||
"Question: {input}\n\n"
|
||||
"{agent_scratchpad}\n\n"
|
||||
"Important: After the last Observation, you must always add a Final Answer "
|
||||
"summarizing the result. Do not skip this step."
|
||||
),
|
||||
"structured-chat-agent": (
|
||||
"Respond to the human as helpfully and accurately as possible. You have access to the following tools:\n\n"
|
||||
|
|
|
|||
|
|
@ -238,7 +238,7 @@ def knowledge_base_page(api: ApiRequest, is_lite: bool = None):
|
|||
doc_details,
|
||||
{
|
||||
("No", "序号"): {},
|
||||
("file_name", "文档名称"): {},
|
||||
("file_name", "文档名称"): {"filter": "agTextColumnFilter"},
|
||||
# ("file_ext", "文档类型"): {},
|
||||
# ("file_version", "文档版本"): {},
|
||||
("document_loader", "文档加载器"): {},
|
||||
|
|
|
|||
Loading…
Reference in New Issue