From 80375e1ff3988039ab9fc4d0e14f7f374d7e2ee1 Mon Sep 17 00:00:00 2001 From: liunux4odoo <41217877+liunux4odoo@users.noreply.github.com> Date: Fri, 15 Sep 2023 18:11:15 +0800 Subject: [PATCH] fix merge conflict for #1474 (#1494) --- configs/kb_config.py.exmaple | 4 ++-- server/knowledge_base/utils.py | 11 +++++------ startup.py | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/configs/kb_config.py.exmaple b/configs/kb_config.py.exmaple index 7df3f38..1c2deb9 100644 --- a/configs/kb_config.py.exmaple +++ b/configs/kb_config.py.exmaple @@ -8,7 +8,7 @@ DEFAULT_VS_TYPE = "faiss" CACHED_VS_NUM = 1 # 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter) -CHUNK_SIZE = 500 +CHUNK_SIZE = 250 # 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter) OVERLAP_SIZE = 50 @@ -104,4 +104,4 @@ text_splitter_dict = { } # TEXT_SPLITTER 名称 -TEXT_SPLITTER_NAME = "SpacyTextSplitter" +TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter" diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index c804a6a..033b48d 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -10,10 +10,9 @@ from configs import ( ZH_TITLE_ENHANCE, logger, log_verbose, - text_splitter_dict, - llm_model_dict, - LLM_MODEL, - TEXT_SPLITTER + text_splitter_dict, + LLM_MODEL, + TEXT_SPLITTER_NAME, ) import importlib from text_splitter import zh_title_enhance @@ -182,7 +181,7 @@ def get_loader(loader_name: str, file_path_or_content: Union[str, bytes, io.Stri def make_text_splitter( - splitter_name: str = TEXT_SPLITTER, + splitter_name: str = TEXT_SPLITTER_NAME, chunk_size: int = CHUNK_SIZE, chunk_overlap: int = OVERLAP_SIZE, llm_model: str = LLM_MODEL, @@ -275,7 +274,7 @@ class KnowledgeFile: self.docs = None self.splited_docs = None self.document_loader_name = get_LoaderClass(self.ext) - self.text_splitter_name = TEXT_SPLITTER + self.text_splitter_name = TEXT_SPLITTER_NAME def file2docs(self, refresh: bool=False): if self.docs is None or refresh: diff --git a/startup.py b/startup.py index d5be6e5..838e588 100644 --- a/startup.py +++ b/startup.py @@ -479,7 +479,7 @@ def dump_server_info(after_start=False, args=None): if args and args.model_name: models = args.model_name - print(f"当前使用的分词器:{TEXT_SPLITTER}") + print(f"当前使用的分词器:{TEXT_SPLITTER_NAME}") print(f"当前启动的LLM模型:{models} @ {llm_device()}") for model in models: