diff --git a/document_loaders/__init__.py b/document_loaders/__init__.py
index b1de210..8ad1da6 100644
--- a/document_loaders/__init__.py
+++ b/document_loaders/__init__.py
@@ -1,4 +1,5 @@
 from .mypdfloader import RapidOCRPDFLoader
 from .myimgloader import RapidOCRLoader
 from .customiedpdfloader import CustomizedPDFLoader
-from.mywordload import RapidWordLoader
\ No newline at end of file
+from .mywordload import RapidWordLoader
+#from .customercore import custom_group_broken_paragraphs
diff --git a/document_loaders/customercore.py b/document_loaders/customercore.py
new file mode 100644
index 0000000..de5bdba
--- /dev/null
+++ b/document_loaders/customercore.py
@@ -0,0 +1,61 @@
+import re
+
+from unstructured.nlp.patterns import (
+    DOUBLE_PARAGRAPH_PATTERN_RE,
+    E_BULLET_PATTERN,
+    PARAGRAPH_PATTERN,
+    PARAGRAPH_PATTERN_RE,
+    UNICODE_BULLETS_RE,
+)
+from unstructured.cleaners.core import group_bullet_paragraph
+
+def custom_group_broken_paragraphs(
+    text: str,
+    line_split: re.Pattern = PARAGRAPH_PATTERN_RE,
+    paragraph_split: re.Pattern = DOUBLE_PARAGRAPH_PATTERN_RE,
+) -> str:
+    """Groups paragraphs that have line breaks for visual/formatting purposes.
+    For example:
+
+    '''The big red fox
+    is walking down the lane.
+
+    At the end of the lane
+    the fox met a bear.'''
+
+    Gets converted to
+
+    '''The big red fox is walking down the lane.
+    At the end of the land the fox met a bear.'''
+    """
+    paragraphs = paragraph_split.split(text)
+    clean_paragraphs = []
+    for paragraph in paragraphs:
+        if not paragraph.strip():
+            continue
+        # NOTE(robinson) - This block is to account for lines like the following that shouldn't be
+        # grouped together, but aren't separated by a double line break.
+        #     Apache License
+        #     Version 2.0, January 2004
+        #     http://www.apache.org/licenses/
+
+        #para_split = line_split.split(paragraph)
+
+        # pytesseract converts some bullet points to standalone "e" characters
+        if UNICODE_BULLETS_RE.match(paragraph.strip()) or E_BULLET_PATTERN.match(paragraph.strip()):
+            tempList = group_bullet_paragraph(paragraph)
+            clean_paragraphs.extend(tempList)
+            #print(f"new 11111:{tempList}")
+        else:
+            tempList = re.sub(PARAGRAPH_PATTERN, " ", paragraph)
+            clean_paragraphs.append(tempList)
+            #print(f"new 333333:{tempList}")
+
+    return "\n\n".join(clean_paragraphs)
+
+
+# str1 = "手工分段**绝缘装置（10）  工作斗在额定载荷下起升至最大平台高度，制动后15 min, 工作斗下沉量应不超过该工况最大 平台高度的0.3%。"
+# str2 = "手工分段**操控系统（12） 电气系统的要求如下："
+# custom_group_broken_paragraphs(str1)
+# custom_group_broken_paragraphs(str2)
+
diff --git a/document_loaders/mywordload.py b/document_loaders/mywordload.py
index 0f79386..9c587d8 100644
--- a/document_loaders/mywordload.py
+++ b/document_loaders/mywordload.py
@@ -7,8 +7,10 @@ from docx.oxml.text.paragraph import CT_P
 from docx.oxml.table import CT_Tbl
 from docx.table import _Cell, Table
 from docx.text.paragraph import Paragraph
-#from langchain.document_loaders.unstructured import UnstructuredFileLoader
-#from langchain.document_loaders.word_document import Docx2txtLoader 
+from unstructured.partition.text import partition_text
+import unstructured.cleaners.core
+from .customercore import custom_group_broken_paragraphs
+unstructured.cleaners.core.group_broken_paragraphs = custom_group_broken_paragraphs
 
 class RapidWordLoader(UnstructuredFileLoader):
     def _get_elements(self) -> List:
@@ -59,7 +61,6 @@ class RapidWordLoader(UnstructuredFileLoader):
                 doc = docxDocument(filepath)
                 for block in iter_block_items(doc):
                     if isinstance(block,Paragraph):
-
                         #print(f"Paragraph:{block.text}")
                         resp += (block.text + "\n\n")
                     elif isinstance(block, Table):
@@ -71,8 +72,8 @@ class RapidWordLoader(UnstructuredFileLoader):
             return resp    
         
         text = word2text(self.file_path)
-        from unstructured.partition.text import partition_text
-        return partition_text(text=text, paragraph_grouper = False, **self.unstructured_kwargs)
+        listText =  partition_text(text=text, **self.unstructured_kwargs)
+        return listText
 
 if __name__ == "__main__":
     loader = RapidWordLoader(file_path="/Users/wangvivi/Desktop/Work/思极GPT/数字化部/设备类all/sb389/10kV带电作业用绝缘斗臂车.docx")
diff --git a/text_splitter/chinese_recursive_text_splitter.py b/text_splitter/chinese_recursive_text_splitter.py
index 66ab041..0a9f232 100644
--- a/text_splitter/chinese_recursive_text_splitter.py
+++ b/text_splitter/chinese_recursive_text_splitter.py
@@ -91,7 +91,7 @@ class ChineseRecursiveTextSplitter(RecursiveCharacterTextSplitter):
         _good_splits = []
         _separator = "" if self._keep_separator else separator
         for s in splits:
-            print(f"***s:{s},len:{self._length_function(s)}")
+            #print(f"***s:{s},len:{self._length_function(s)}")
             if self._length_function(s) < self._chunk_size:
                 _good_splits.append(s)
                 #print(f"***_good_splits.append(s):{s}")