fix merge conflict for #1474 (#1494)

This commit is contained in:
liunux4odoo 2023-09-15 18:11:15 +08:00 committed by GitHub
parent f7c73b842a
commit 80375e1ff3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 8 additions and 9 deletions

View File

@ -8,7 +8,7 @@ DEFAULT_VS_TYPE = "faiss"
CACHED_VS_NUM = 1 CACHED_VS_NUM = 1
# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter) # 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
CHUNK_SIZE = 500 CHUNK_SIZE = 250
# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter) # 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
OVERLAP_SIZE = 50 OVERLAP_SIZE = 50
@ -104,4 +104,4 @@ text_splitter_dict = {
} }
# TEXT_SPLITTER 名称 # TEXT_SPLITTER 名称
TEXT_SPLITTER_NAME = "SpacyTextSplitter" TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter"

View File

@ -11,9 +11,8 @@ from configs import (
logger, logger,
log_verbose, log_verbose,
text_splitter_dict, text_splitter_dict,
llm_model_dict,
LLM_MODEL, LLM_MODEL,
TEXT_SPLITTER TEXT_SPLITTER_NAME,
) )
import importlib import importlib
from text_splitter import zh_title_enhance from text_splitter import zh_title_enhance
@ -182,7 +181,7 @@ def get_loader(loader_name: str, file_path_or_content: Union[str, bytes, io.Stri
def make_text_splitter( def make_text_splitter(
splitter_name: str = TEXT_SPLITTER, splitter_name: str = TEXT_SPLITTER_NAME,
chunk_size: int = CHUNK_SIZE, chunk_size: int = CHUNK_SIZE,
chunk_overlap: int = OVERLAP_SIZE, chunk_overlap: int = OVERLAP_SIZE,
llm_model: str = LLM_MODEL, llm_model: str = LLM_MODEL,
@ -275,7 +274,7 @@ class KnowledgeFile:
self.docs = None self.docs = None
self.splited_docs = None self.splited_docs = None
self.document_loader_name = get_LoaderClass(self.ext) self.document_loader_name = get_LoaderClass(self.ext)
self.text_splitter_name = TEXT_SPLITTER self.text_splitter_name = TEXT_SPLITTER_NAME
def file2docs(self, refresh: bool=False): def file2docs(self, refresh: bool=False):
if self.docs is None or refresh: if self.docs is None or refresh:

View File

@ -479,7 +479,7 @@ def dump_server_info(after_start=False, args=None):
if args and args.model_name: if args and args.model_name:
models = args.model_name models = args.model_name
print(f"当前使用的分词器:{TEXT_SPLITTER}") print(f"当前使用的分词器:{TEXT_SPLITTER_NAME}")
print(f"当前启动的LLM模型{models} @ {llm_device()}") print(f"当前启动的LLM模型{models} @ {llm_device()}")
for model in models: for model in models: