From 2ac52147d31c6a5fce5224381e1429e933f79588 Mon Sep 17 00:00:00 2001 From: wvivi2023 Date: Fri, 15 Dec 2023 09:48:22 +0800 Subject: [PATCH] fix merging issue --- text_splitter/chinese_recursive_text_splitter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/text_splitter/chinese_recursive_text_splitter.py b/text_splitter/chinese_recursive_text_splitter.py index 57ec53f..795f942 100644 --- a/text_splitter/chinese_recursive_text_splitter.py +++ b/text_splitter/chinese_recursive_text_splitter.py @@ -101,11 +101,12 @@ class ChineseRecursiveTextSplitter(RecursiveCharacterTextSplitter): final_chunks = [re.sub(r"\n{2,}", "\n", chunk.strip()) for chunk in final_chunks if chunk.strip()!=""] #将单行和两行的和下面的分块合并 + #将单行并且字数小于25,和下面的分块合并 return_chunks = [] temp_sencond = "" for chunk in final_chunks: if temp_sencond =="": - if len(chunk.splitlines()) <= 1: + if len(chunk.splitlines()) <= 1 and len(chunk) <= 25: temp_sencond = chunk else: return_chunks.append(chunk)