parent
f7c73b842a
commit
80375e1ff3
|
|
@ -8,7 +8,7 @@ DEFAULT_VS_TYPE = "faiss"
|
||||||
CACHED_VS_NUM = 1
|
CACHED_VS_NUM = 1
|
||||||
|
|
||||||
# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
|
# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
|
||||||
CHUNK_SIZE = 500
|
CHUNK_SIZE = 250
|
||||||
|
|
||||||
# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
|
# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
|
||||||
OVERLAP_SIZE = 50
|
OVERLAP_SIZE = 50
|
||||||
|
|
@ -104,4 +104,4 @@ text_splitter_dict = {
|
||||||
}
|
}
|
||||||
|
|
||||||
# TEXT_SPLITTER 名称
|
# TEXT_SPLITTER 名称
|
||||||
TEXT_SPLITTER_NAME = "SpacyTextSplitter"
|
TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter"
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,8 @@ from configs import (
|
||||||
logger,
|
logger,
|
||||||
log_verbose,
|
log_verbose,
|
||||||
text_splitter_dict,
|
text_splitter_dict,
|
||||||
llm_model_dict,
|
|
||||||
LLM_MODEL,
|
LLM_MODEL,
|
||||||
TEXT_SPLITTER
|
TEXT_SPLITTER_NAME,
|
||||||
)
|
)
|
||||||
import importlib
|
import importlib
|
||||||
from text_splitter import zh_title_enhance
|
from text_splitter import zh_title_enhance
|
||||||
|
|
@ -182,7 +181,7 @@ def get_loader(loader_name: str, file_path_or_content: Union[str, bytes, io.Stri
|
||||||
|
|
||||||
|
|
||||||
def make_text_splitter(
|
def make_text_splitter(
|
||||||
splitter_name: str = TEXT_SPLITTER,
|
splitter_name: str = TEXT_SPLITTER_NAME,
|
||||||
chunk_size: int = CHUNK_SIZE,
|
chunk_size: int = CHUNK_SIZE,
|
||||||
chunk_overlap: int = OVERLAP_SIZE,
|
chunk_overlap: int = OVERLAP_SIZE,
|
||||||
llm_model: str = LLM_MODEL,
|
llm_model: str = LLM_MODEL,
|
||||||
|
|
@ -275,7 +274,7 @@ class KnowledgeFile:
|
||||||
self.docs = None
|
self.docs = None
|
||||||
self.splited_docs = None
|
self.splited_docs = None
|
||||||
self.document_loader_name = get_LoaderClass(self.ext)
|
self.document_loader_name = get_LoaderClass(self.ext)
|
||||||
self.text_splitter_name = TEXT_SPLITTER
|
self.text_splitter_name = TEXT_SPLITTER_NAME
|
||||||
|
|
||||||
def file2docs(self, refresh: bool=False):
|
def file2docs(self, refresh: bool=False):
|
||||||
if self.docs is None or refresh:
|
if self.docs is None or refresh:
|
||||||
|
|
|
||||||
|
|
@ -479,7 +479,7 @@ def dump_server_info(after_start=False, args=None):
|
||||||
if args and args.model_name:
|
if args and args.model_name:
|
||||||
models = args.model_name
|
models = args.model_name
|
||||||
|
|
||||||
print(f"当前使用的分词器:{TEXT_SPLITTER}")
|
print(f"当前使用的分词器:{TEXT_SPLITTER_NAME}")
|
||||||
print(f"当前启动的LLM模型:{models} @ {llm_device()}")
|
print(f"当前启动的LLM模型:{models} @ {llm_device()}")
|
||||||
|
|
||||||
for model in models:
|
for model in models:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue