fix merging issue
This commit is contained in:
parent
77bc5891c8
commit
2ac52147d3
|
|
@ -101,11 +101,12 @@ class ChineseRecursiveTextSplitter(RecursiveCharacterTextSplitter):
|
||||||
|
|
||||||
final_chunks = [re.sub(r"\n{2,}", "\n", chunk.strip()) for chunk in final_chunks if chunk.strip()!=""]
|
final_chunks = [re.sub(r"\n{2,}", "\n", chunk.strip()) for chunk in final_chunks if chunk.strip()!=""]
|
||||||
#将单行和两行的和下面的分块合并
|
#将单行和两行的和下面的分块合并
|
||||||
|
#将单行并且字数小于25,和下面的分块合并
|
||||||
return_chunks = []
|
return_chunks = []
|
||||||
temp_sencond = ""
|
temp_sencond = ""
|
||||||
for chunk in final_chunks:
|
for chunk in final_chunks:
|
||||||
if temp_sencond =="":
|
if temp_sencond =="":
|
||||||
if len(chunk.splitlines()) <= 1:
|
if len(chunk.splitlines()) <= 1 and len(chunk) <= 25:
|
||||||
temp_sencond = chunk
|
temp_sencond = chunk
|
||||||
else:
|
else:
|
||||||
return_chunks.append(chunk)
|
return_chunks.append(chunk)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue