diff --git a/text_splitter/chinese_recursive_text_splitter.py b/text_splitter/chinese_recursive_text_splitter.py index 57ec53f..795f942 100644 --- a/text_splitter/chinese_recursive_text_splitter.py +++ b/text_splitter/chinese_recursive_text_splitter.py @@ -101,11 +101,12 @@ class ChineseRecursiveTextSplitter(RecursiveCharacterTextSplitter): final_chunks = [re.sub(r"\n{2,}", "\n", chunk.strip()) for chunk in final_chunks if chunk.strip()!=""] #将单行和两行的和下面的分块合并 + #将单行并且字数小于25,和下面的分块合并 return_chunks = [] temp_sencond = "" for chunk in final_chunks: if temp_sencond =="": - if len(chunk.splitlines()) <= 1: + if len(chunk.splitlines()) <= 1 and len(chunk) <= 25: temp_sencond = chunk else: return_chunks.append(chunk)