api和webui知识库操作支持chunk_size/overlap_size/zh_title_enhance参数 (#1459)
This commit is contained in:
parent
9defa4332e
commit
16d8809c9a
|
|
@ -104,6 +104,10 @@ LLM_MODEL = "chatglm2-6b"
|
||||||
# 历史对话轮数
|
# 历史对话轮数
|
||||||
HISTORY_LEN = 3
|
HISTORY_LEN = 3
|
||||||
|
|
||||||
|
# LLM通用对话参数
|
||||||
|
TEMPERATURE = 0.7
|
||||||
|
# TOP_P = 0.95 # ChatOpenAI暂不支持该参数
|
||||||
|
|
||||||
# LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
# LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
|
||||||
LLM_DEVICE = "auto"
|
LLM_DEVICE = "auto"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import urllib
|
||||||
from fastapi import File, Form, Body, Query, UploadFile
|
from fastapi import File, Form, Body, Query, UploadFile
|
||||||
from configs.model_config import (DEFAULT_VS_TYPE, EMBEDDING_MODEL,
|
from configs.model_config import (DEFAULT_VS_TYPE, EMBEDDING_MODEL,
|
||||||
VECTOR_SEARCH_TOP_K, SCORE_THRESHOLD,
|
VECTOR_SEARCH_TOP_K, SCORE_THRESHOLD,
|
||||||
|
CHUNK_SIZE, OVERLAP_SIZE, ZH_TITLE_ENHANCE,
|
||||||
logger, log_verbose,)
|
logger, log_verbose,)
|
||||||
from server.utils import BaseResponse, ListResponse, run_in_thread_pool
|
from server.utils import BaseResponse, ListResponse, run_in_thread_pool
|
||||||
from server.knowledge_base.utils import (validate_kb_name, list_files_from_folder,get_file_path,
|
from server.knowledge_base.utils import (validate_kb_name, list_files_from_folder,get_file_path,
|
||||||
|
|
@ -121,6 +122,9 @@ def upload_docs(files: List[UploadFile] = File(..., description="上传文件,
|
||||||
knowledge_base_name: str = Form(..., description="知识库名称", examples=["samples"]),
|
knowledge_base_name: str = Form(..., description="知识库名称", examples=["samples"]),
|
||||||
override: bool = Form(False, description="覆盖已有文件"),
|
override: bool = Form(False, description="覆盖已有文件"),
|
||||||
to_vector_store: bool = Form(True, description="上传文件后是否进行向量化"),
|
to_vector_store: bool = Form(True, description="上传文件后是否进行向量化"),
|
||||||
|
chunk_size: int = Body(CHUNK_SIZE, description="知识库中单段文本最大长度"),
|
||||||
|
chunk_overlap: int = Body(OVERLAP_SIZE, description="知识库中相邻文本重合长度"),
|
||||||
|
zh_title_enhance: bool = Body(ZH_TITLE_ENHANCE, description="是否开启中文标题加强"),
|
||||||
docs: Json = Form({}, description="自定义的docs", examples=[{"test.txt": [Document(page_content="custom doc")]}]),
|
docs: Json = Form({}, description="自定义的docs", examples=[{"test.txt": [Document(page_content="custom doc")]}]),
|
||||||
not_refresh_vs_cache: bool = Form(False, description="暂不保存向量库(用于FAISS)"),
|
not_refresh_vs_cache: bool = Form(False, description="暂不保存向量库(用于FAISS)"),
|
||||||
) -> BaseResponse:
|
) -> BaseResponse:
|
||||||
|
|
@ -152,6 +156,9 @@ def upload_docs(files: List[UploadFile] = File(..., description="上传文件,
|
||||||
knowledge_base_name=knowledge_base_name,
|
knowledge_base_name=knowledge_base_name,
|
||||||
file_names=file_names,
|
file_names=file_names,
|
||||||
override_custom_docs=True,
|
override_custom_docs=True,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance,
|
||||||
docs=docs,
|
docs=docs,
|
||||||
not_refresh_vs_cache=True,
|
not_refresh_vs_cache=True,
|
||||||
)
|
)
|
||||||
|
|
@ -199,6 +206,9 @@ def delete_docs(knowledge_base_name: str = Body(..., examples=["samples"]),
|
||||||
def update_docs(
|
def update_docs(
|
||||||
knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]),
|
knowledge_base_name: str = Body(..., description="知识库名称", examples=["samples"]),
|
||||||
file_names: List[str] = Body(..., description="文件名称,支持多文件", examples=["file_name"]),
|
file_names: List[str] = Body(..., description="文件名称,支持多文件", examples=["file_name"]),
|
||||||
|
chunk_size: int = Body(CHUNK_SIZE, description="知识库中单段文本最大长度"),
|
||||||
|
chunk_overlap: int = Body(OVERLAP_SIZE, description="知识库中相邻文本重合长度"),
|
||||||
|
zh_title_enhance: bool = Body(ZH_TITLE_ENHANCE, description="是否开启中文标题加强"),
|
||||||
override_custom_docs: bool = Body(False, description="是否覆盖之前自定义的docs"),
|
override_custom_docs: bool = Body(False, description="是否覆盖之前自定义的docs"),
|
||||||
docs: Json = Body({}, description="自定义的docs", examples=[{"test.txt": [Document(page_content="custom doc")]}]),
|
docs: Json = Body({}, description="自定义的docs", examples=[{"test.txt": [Document(page_content="custom doc")]}]),
|
||||||
not_refresh_vs_cache: bool = Body(False, description="暂不保存向量库(用于FAISS)"),
|
not_refresh_vs_cache: bool = Body(False, description="暂不保存向量库(用于FAISS)"),
|
||||||
|
|
@ -233,7 +243,10 @@ def update_docs(
|
||||||
|
|
||||||
# 从文件生成docs,并进行向量化。
|
# 从文件生成docs,并进行向量化。
|
||||||
# 这里利用了KnowledgeFile的缓存功能,在多线程中加载Document,然后传给KnowledgeFile
|
# 这里利用了KnowledgeFile的缓存功能,在多线程中加载Document,然后传给KnowledgeFile
|
||||||
for status, result in files2docs_in_thread(kb_files):
|
for status, result in files2docs_in_thread(kb_files,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance):
|
||||||
if status:
|
if status:
|
||||||
kb_name, file_name, new_docs = result
|
kb_name, file_name, new_docs = result
|
||||||
kb_file = KnowledgeFile(filename=file_name,
|
kb_file = KnowledgeFile(filename=file_name,
|
||||||
|
|
@ -307,7 +320,10 @@ def recreate_vector_store(
|
||||||
allow_empty_kb: bool = Body(True),
|
allow_empty_kb: bool = Body(True),
|
||||||
vs_type: str = Body(DEFAULT_VS_TYPE),
|
vs_type: str = Body(DEFAULT_VS_TYPE),
|
||||||
embed_model: str = Body(EMBEDDING_MODEL),
|
embed_model: str = Body(EMBEDDING_MODEL),
|
||||||
):
|
chunk_size: int = Body(CHUNK_SIZE, description="知识库中单段文本最大长度"),
|
||||||
|
chunk_overlap: int = Body(OVERLAP_SIZE, description="知识库中相邻文本重合长度"),
|
||||||
|
zh_title_enhance: bool = Body(ZH_TITLE_ENHANCE, description="是否开启中文标题加强"),
|
||||||
|
):
|
||||||
'''
|
'''
|
||||||
recreate vector store from the content.
|
recreate vector store from the content.
|
||||||
this is usefull when user can copy files to content folder directly instead of upload through network.
|
this is usefull when user can copy files to content folder directly instead of upload through network.
|
||||||
|
|
@ -325,7 +341,10 @@ def recreate_vector_store(
|
||||||
files = list_files_from_folder(knowledge_base_name)
|
files = list_files_from_folder(knowledge_base_name)
|
||||||
kb_files = [(file, knowledge_base_name) for file in files]
|
kb_files = [(file, knowledge_base_name) for file in files]
|
||||||
i = 0
|
i = 0
|
||||||
for status, result in files2docs_in_thread(kb_files):
|
for status, result in files2docs_in_thread(kb_files,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance):
|
||||||
if status:
|
if status:
|
||||||
kb_name, file_name, docs = result
|
kb_name, file_name, docs = result
|
||||||
kb_file = KnowledgeFile(filename=file_name, knowledge_base_name=kb_name)
|
kb_file = KnowledgeFile(filename=file_name, knowledge_base_name=kb_name)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,6 @@
|
||||||
from configs.model_config import EMBEDDING_MODEL, DEFAULT_VS_TYPE, logger, log_verbose
|
from configs.model_config import (EMBEDDING_MODEL, DEFAULT_VS_TYPE,
|
||||||
|
CHUNK_SIZE, OVERLAP_SIZE, ZH_TITLE_ENHANCE,
|
||||||
|
logger, log_verbose)
|
||||||
from server.knowledge_base.utils import (get_file_path, list_kbs_from_folder,
|
from server.knowledge_base.utils import (get_file_path, list_kbs_from_folder,
|
||||||
list_files_from_folder,files2docs_in_thread,
|
list_files_from_folder,files2docs_in_thread,
|
||||||
KnowledgeFile,)
|
KnowledgeFile,)
|
||||||
|
|
@ -6,13 +8,9 @@ from server.knowledge_base.kb_service.base import KBServiceFactory, SupportedVST
|
||||||
from server.db.repository.knowledge_file_repository import add_file_to_db
|
from server.db.repository.knowledge_file_repository import add_file_to_db
|
||||||
from server.db.base import Base, engine
|
from server.db.base import Base, engine
|
||||||
import os
|
import os
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
from typing import Literal, Any, List
|
from typing import Literal, Any, List
|
||||||
|
|
||||||
|
|
||||||
pool = ThreadPoolExecutor(os.cpu_count())
|
|
||||||
|
|
||||||
|
|
||||||
def create_tables():
|
def create_tables():
|
||||||
Base.metadata.create_all(bind=engine)
|
Base.metadata.create_all(bind=engine)
|
||||||
|
|
||||||
|
|
@ -40,6 +38,9 @@ def folder2db(
|
||||||
mode: Literal["recreate_vs", "fill_info_only", "update_in_db", "increament"],
|
mode: Literal["recreate_vs", "fill_info_only", "update_in_db", "increament"],
|
||||||
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
|
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
|
||||||
embed_model: str = EMBEDDING_MODEL,
|
embed_model: str = EMBEDDING_MODEL,
|
||||||
|
chunk_size: int = CHUNK_SIZE,
|
||||||
|
chunk_overlap: int = OVERLAP_SIZE,
|
||||||
|
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
use existed files in local folder to populate database and/or vector store.
|
use existed files in local folder to populate database and/or vector store.
|
||||||
|
|
@ -60,7 +61,10 @@ def folder2db(
|
||||||
print(f"清理后,知识库 {kb_name} 中共有 {files_count} 个文档。")
|
print(f"清理后,知识库 {kb_name} 中共有 {files_count} 个文档。")
|
||||||
|
|
||||||
kb_files = file_to_kbfile(kb_name, list_files_from_folder(kb_name))
|
kb_files = file_to_kbfile(kb_name, list_files_from_folder(kb_name))
|
||||||
for success, result in files2docs_in_thread(kb_files, pool=pool):
|
for success, result in files2docs_in_thread(kb_files,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance):
|
||||||
if success:
|
if success:
|
||||||
_, filename, docs = result
|
_, filename, docs = result
|
||||||
print(f"正在将 {kb_name}/{filename} 添加到向量库,共包含{len(docs)}条文档")
|
print(f"正在将 {kb_name}/{filename} 添加到向量库,共包含{len(docs)}条文档")
|
||||||
|
|
@ -89,7 +93,10 @@ def folder2db(
|
||||||
files = list(set(folder_files) - set(db_files))
|
files = list(set(folder_files) - set(db_files))
|
||||||
kb_files = file_to_kbfile(kb_name, files)
|
kb_files = file_to_kbfile(kb_name, files)
|
||||||
|
|
||||||
for success, result in files2docs_in_thread(kb_files, pool=pool):
|
for success, result in files2docs_in_thread(kb_files,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance):
|
||||||
if success:
|
if success:
|
||||||
_, filename, docs = result
|
_, filename, docs = result
|
||||||
print(f"正在将 {kb_name}/{filename} 添加到向量库")
|
print(f"正在将 {kb_name}/{filename} 添加到向量库")
|
||||||
|
|
|
||||||
|
|
@ -237,7 +237,7 @@ class KnowledgeFile:
|
||||||
def docs2texts(
|
def docs2texts(
|
||||||
self,
|
self,
|
||||||
docs: List[Document] = None,
|
docs: List[Document] = None,
|
||||||
using_zh_title_enhance=ZH_TITLE_ENHANCE,
|
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
|
||||||
refresh: bool = False,
|
refresh: bool = False,
|
||||||
chunk_size: int = CHUNK_SIZE,
|
chunk_size: int = CHUNK_SIZE,
|
||||||
chunk_overlap: int = OVERLAP_SIZE,
|
chunk_overlap: int = OVERLAP_SIZE,
|
||||||
|
|
@ -252,14 +252,14 @@ class KnowledgeFile:
|
||||||
docs = text_splitter.split_documents(docs)
|
docs = text_splitter.split_documents(docs)
|
||||||
|
|
||||||
print(f"文档切分示例:{docs[0]}")
|
print(f"文档切分示例:{docs[0]}")
|
||||||
if using_zh_title_enhance:
|
if zh_title_enhance:
|
||||||
docs = zh_title_enhance(docs)
|
docs = zh_title_enhance(docs)
|
||||||
self.splited_docs = docs
|
self.splited_docs = docs
|
||||||
return self.splited_docs
|
return self.splited_docs
|
||||||
|
|
||||||
def file2text(
|
def file2text(
|
||||||
self,
|
self,
|
||||||
using_zh_title_enhance=ZH_TITLE_ENHANCE,
|
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
|
||||||
refresh: bool = False,
|
refresh: bool = False,
|
||||||
chunk_size: int = CHUNK_SIZE,
|
chunk_size: int = CHUNK_SIZE,
|
||||||
chunk_overlap: int = OVERLAP_SIZE,
|
chunk_overlap: int = OVERLAP_SIZE,
|
||||||
|
|
@ -268,7 +268,7 @@ class KnowledgeFile:
|
||||||
if self.splited_docs is None or refresh:
|
if self.splited_docs is None or refresh:
|
||||||
docs = self.file2docs()
|
docs = self.file2docs()
|
||||||
self.splited_docs = self.docs2texts(docs=docs,
|
self.splited_docs = self.docs2texts(docs=docs,
|
||||||
using_zh_title_enhance=using_zh_title_enhance,
|
zh_title_enhance=zh_title_enhance,
|
||||||
refresh=refresh,
|
refresh=refresh,
|
||||||
chunk_size=chunk_size,
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=chunk_overlap,
|
chunk_overlap=chunk_overlap,
|
||||||
|
|
@ -287,6 +287,9 @@ class KnowledgeFile:
|
||||||
|
|
||||||
def files2docs_in_thread(
|
def files2docs_in_thread(
|
||||||
files: List[Union[KnowledgeFile, Tuple[str, str], Dict]],
|
files: List[Union[KnowledgeFile, Tuple[str, str], Dict]],
|
||||||
|
chunk_size: int = CHUNK_SIZE,
|
||||||
|
chunk_overlap: int = OVERLAP_SIZE,
|
||||||
|
zh_title_enhance: bool = ZH_TITLE_ENHANCE,
|
||||||
pool: ThreadPoolExecutor = None,
|
pool: ThreadPoolExecutor = None,
|
||||||
) -> Generator:
|
) -> Generator:
|
||||||
'''
|
'''
|
||||||
|
|
@ -314,6 +317,9 @@ def files2docs_in_thread(
|
||||||
kwargs = file
|
kwargs = file
|
||||||
file = KnowledgeFile(filename=filename, knowledge_base_name=kb_name)
|
file = KnowledgeFile(filename=filename, knowledge_base_name=kb_name)
|
||||||
kwargs["file"] = file
|
kwargs["file"] = file
|
||||||
|
kwargs["chunk_size"] = chunk_size
|
||||||
|
kwargs["chunk_overlap"] = chunk_overlap
|
||||||
|
kwargs["zh_title_enhance"] = zh_title_enhance
|
||||||
kwargs_list.append(kwargs)
|
kwargs_list.append(kwargs)
|
||||||
|
|
||||||
for result in run_in_thread_pool(func=file2docs, params=kwargs_list, pool=pool):
|
for result in run_in_thread_pool(func=file2docs, params=kwargs_list, pool=pool):
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,9 @@ import pandas as pd
|
||||||
from server.knowledge_base.utils import get_file_path, LOADER_DICT
|
from server.knowledge_base.utils import get_file_path, LOADER_DICT
|
||||||
from server.knowledge_base.kb_service.base import get_kb_details, get_kb_file_details
|
from server.knowledge_base.kb_service.base import get_kb_details, get_kb_file_details
|
||||||
from typing import Literal, Dict, Tuple
|
from typing import Literal, Dict, Tuple
|
||||||
from configs.model_config import embedding_model_dict, kbs_config, EMBEDDING_MODEL, DEFAULT_VS_TYPE
|
from configs.model_config import (embedding_model_dict, kbs_config,
|
||||||
|
EMBEDDING_MODEL, DEFAULT_VS_TYPE,
|
||||||
|
CHUNK_SIZE, OVERLAP_SIZE, ZH_TITLE_ENHANCE)
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
@ -125,25 +127,32 @@ def knowledge_base_page(api: ApiRequest):
|
||||||
elif selected_kb:
|
elif selected_kb:
|
||||||
kb = selected_kb
|
kb = selected_kb
|
||||||
|
|
||||||
|
with st.sidebar:
|
||||||
|
chunk_size = st.number_input("单段文本最大长度:", 1, 1000, CHUNK_SIZE)
|
||||||
|
chunk_overlap = st.number_input("相邻文本重合长度:", 0, 500, OVERLAP_SIZE)
|
||||||
|
zh_title_enhance = st.checkbox("开启中文标题加强:", ZH_TITLE_ENHANCE)
|
||||||
|
|
||||||
# 上传文件
|
# 上传文件
|
||||||
# sentence_size = st.slider("文本入库分句长度限制", 1, 1000, SENTENCE_SIZE, disabled=True)
|
files = st.file_uploader("上传知识文件:",
|
||||||
files = st.file_uploader("上传知识文件",
|
|
||||||
[i for ls in LOADER_DICT.values() for i in ls],
|
[i for ls in LOADER_DICT.values() for i in ls],
|
||||||
accept_multiple_files=True,
|
accept_multiple_files=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
if st.button(
|
if st.button(
|
||||||
"添加文件到知识库",
|
"添加文件到知识库",
|
||||||
# help="请先上传文件,再点击添加",
|
|
||||||
# use_container_width=True,
|
# use_container_width=True,
|
||||||
disabled=len(files) == 0,
|
disabled=len(files) == 0,
|
||||||
):
|
):
|
||||||
ret = api.upload_kb_docs(files, knowledge_base_name=kb, override=True)
|
ret = api.upload_kb_docs(files,
|
||||||
|
knowledge_base_name=kb,
|
||||||
|
override=True,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance)
|
||||||
if msg := check_success_msg(ret):
|
if msg := check_success_msg(ret):
|
||||||
st.toast(msg, icon="✔")
|
st.toast(msg, icon="✔")
|
||||||
elif msg := check_error_msg(ret):
|
elif msg := check_error_msg(ret):
|
||||||
st.toast(msg, icon="✖")
|
st.toast(msg, icon="✖")
|
||||||
st.session_state.files = []
|
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
|
|
@ -216,7 +225,11 @@ def knowledge_base_page(api: ApiRequest):
|
||||||
use_container_width=True,
|
use_container_width=True,
|
||||||
):
|
):
|
||||||
file_names = [row["file_name"] for row in selected_rows]
|
file_names = [row["file_name"] for row in selected_rows]
|
||||||
api.update_kb_docs(kb, file_names=file_names)
|
api.update_kb_docs(kb,
|
||||||
|
file_names=file_names,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance)
|
||||||
st.experimental_rerun()
|
st.experimental_rerun()
|
||||||
|
|
||||||
# 将文件从向量库中删除,但不删除文件本身。
|
# 将文件从向量库中删除,但不删除文件本身。
|
||||||
|
|
@ -251,7 +264,10 @@ def knowledge_base_page(api: ApiRequest):
|
||||||
with st.spinner("向量库重构中,请耐心等待,勿刷新或关闭页面。"):
|
with st.spinner("向量库重构中,请耐心等待,勿刷新或关闭页面。"):
|
||||||
empty = st.empty()
|
empty = st.empty()
|
||||||
empty.progress(0.0, "")
|
empty.progress(0.0, "")
|
||||||
for d in api.recreate_vector_store(kb):
|
for d in api.recreate_vector_store(kb,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance):
|
||||||
if msg := check_error_msg(d):
|
if msg := check_error_msg(d):
|
||||||
st.toast(msg)
|
st.toast(msg)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,9 @@ from configs.model_config import (
|
||||||
HISTORY_LEN,
|
HISTORY_LEN,
|
||||||
TEMPERATURE,
|
TEMPERATURE,
|
||||||
SCORE_THRESHOLD,
|
SCORE_THRESHOLD,
|
||||||
|
CHUNK_SIZE,
|
||||||
|
OVERLAP_SIZE,
|
||||||
|
ZH_TITLE_ENHANCE,
|
||||||
VECTOR_SEARCH_TOP_K,
|
VECTOR_SEARCH_TOP_K,
|
||||||
SEARCH_ENGINE_TOP_K,
|
SEARCH_ENGINE_TOP_K,
|
||||||
logger, log_verbose,
|
logger, log_verbose,
|
||||||
|
|
@ -575,6 +578,9 @@ class ApiRequest:
|
||||||
knowledge_base_name: str,
|
knowledge_base_name: str,
|
||||||
override: bool = False,
|
override: bool = False,
|
||||||
to_vector_store: bool = True,
|
to_vector_store: bool = True,
|
||||||
|
chunk_size=CHUNK_SIZE,
|
||||||
|
chunk_overlap=OVERLAP_SIZE,
|
||||||
|
zh_title_enhance=ZH_TITLE_ENHANCE,
|
||||||
docs: Dict = {},
|
docs: Dict = {},
|
||||||
not_refresh_vs_cache: bool = False,
|
not_refresh_vs_cache: bool = False,
|
||||||
no_remote_api: bool = None,
|
no_remote_api: bool = None,
|
||||||
|
|
@ -600,6 +606,9 @@ class ApiRequest:
|
||||||
"knowledge_base_name": knowledge_base_name,
|
"knowledge_base_name": knowledge_base_name,
|
||||||
"override": override,
|
"override": override,
|
||||||
"to_vector_store": to_vector_store,
|
"to_vector_store": to_vector_store,
|
||||||
|
"chunk_size": chunk_size,
|
||||||
|
"chunk_overlap": chunk_overlap,
|
||||||
|
"zh_title_enhance": zh_title_enhance,
|
||||||
"docs": docs,
|
"docs": docs,
|
||||||
"not_refresh_vs_cache": not_refresh_vs_cache,
|
"not_refresh_vs_cache": not_refresh_vs_cache,
|
||||||
}
|
}
|
||||||
|
|
@ -665,6 +674,9 @@ class ApiRequest:
|
||||||
knowledge_base_name: str,
|
knowledge_base_name: str,
|
||||||
file_names: List[str],
|
file_names: List[str],
|
||||||
override_custom_docs: bool = False,
|
override_custom_docs: bool = False,
|
||||||
|
chunk_size=CHUNK_SIZE,
|
||||||
|
chunk_overlap=OVERLAP_SIZE,
|
||||||
|
zh_title_enhance=ZH_TITLE_ENHANCE,
|
||||||
docs: Dict = {},
|
docs: Dict = {},
|
||||||
not_refresh_vs_cache: bool = False,
|
not_refresh_vs_cache: bool = False,
|
||||||
no_remote_api: bool = None,
|
no_remote_api: bool = None,
|
||||||
|
|
@ -679,6 +691,9 @@ class ApiRequest:
|
||||||
"knowledge_base_name": knowledge_base_name,
|
"knowledge_base_name": knowledge_base_name,
|
||||||
"file_names": file_names,
|
"file_names": file_names,
|
||||||
"override_custom_docs": override_custom_docs,
|
"override_custom_docs": override_custom_docs,
|
||||||
|
"chunk_size": chunk_size,
|
||||||
|
"chunk_overlap": chunk_overlap,
|
||||||
|
"zh_title_enhance": zh_title_enhance,
|
||||||
"docs": docs,
|
"docs": docs,
|
||||||
"not_refresh_vs_cache": not_refresh_vs_cache,
|
"not_refresh_vs_cache": not_refresh_vs_cache,
|
||||||
}
|
}
|
||||||
|
|
@ -701,6 +716,9 @@ class ApiRequest:
|
||||||
allow_empty_kb: bool = True,
|
allow_empty_kb: bool = True,
|
||||||
vs_type: str = DEFAULT_VS_TYPE,
|
vs_type: str = DEFAULT_VS_TYPE,
|
||||||
embed_model: str = EMBEDDING_MODEL,
|
embed_model: str = EMBEDDING_MODEL,
|
||||||
|
chunk_size=CHUNK_SIZE,
|
||||||
|
chunk_overlap=OVERLAP_SIZE,
|
||||||
|
zh_title_enhance=ZH_TITLE_ENHANCE,
|
||||||
no_remote_api: bool = None,
|
no_remote_api: bool = None,
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
|
|
@ -714,6 +732,9 @@ class ApiRequest:
|
||||||
"allow_empty_kb": allow_empty_kb,
|
"allow_empty_kb": allow_empty_kb,
|
||||||
"vs_type": vs_type,
|
"vs_type": vs_type,
|
||||||
"embed_model": embed_model,
|
"embed_model": embed_model,
|
||||||
|
"chunk_size": chunk_size,
|
||||||
|
"chunk_overlap": chunk_overlap,
|
||||||
|
"zh_title_enhance": zh_title_enhance,
|
||||||
}
|
}
|
||||||
|
|
||||||
if no_remote_api:
|
if no_remote_api:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue