From 3b49f2da540617537a7d01a92b961b00b5a74541 Mon Sep 17 00:00:00 2001 From: wvivi2023 Date: Tue, 12 Mar 2024 11:09:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8E=BB=E6=8E=89=E4=BA=8C=E7=BA=A7=E5=92=8C?= =?UTF-8?q?=E4=B8=89=E7=BA=A7=E7=9B=AE=E5=BD=95=E6=A0=87=E9=A2=98=E5=AF=B9?= =?UTF-8?q?=E6=A0=87=E7=82=B9=E7=AC=A6=E5=8F=B7=E7=9A=84=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- text_splitter/zh_second_title_enhance.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/text_splitter/zh_second_title_enhance.py b/text_splitter/zh_second_title_enhance.py index 208d12f..1a14998 100644 --- a/text_splitter/zh_second_title_enhance.py +++ b/text_splitter/zh_second_title_enhance.py @@ -35,10 +35,10 @@ def get_second_level_title( splitlines = text.splitlines() first_line = splitlines[0] # 文本中有标点符号,就不是title - ENDS_IN_PUNCT_PATTERN = r"[^\w\s]\Z" - ENDS_IN_PUNCT_RE = re.compile(ENDS_IN_PUNCT_PATTERN) - if ENDS_IN_PUNCT_RE.search(first_line) is not None: - return "" + # ENDS_IN_PUNCT_PATTERN = r"[^\w\s]\Z" + # ENDS_IN_PUNCT_RE = re.compile(ENDS_IN_PUNCT_PATTERN) + # if ENDS_IN_PUNCT_RE.search(first_line) is not None: + # return "" #3 **** #3.1 ***** @@ -105,10 +105,10 @@ def get_third_level_title( splitlines = text.splitlines() first_line = splitlines[0] # 文本中有标点符号,就不是title - ENDS_IN_PUNCT_PATTERN = r"[^\w\s]\Z" - ENDS_IN_PUNCT_RE = re.compile(ENDS_IN_PUNCT_PATTERN) - if ENDS_IN_PUNCT_RE.search(first_line) is not None: - return "" + # ENDS_IN_PUNCT_PATTERN = r"[^\w\s]\Z" + # ENDS_IN_PUNCT_RE = re.compile(ENDS_IN_PUNCT_PATTERN) + # if ENDS_IN_PUNCT_RE.search(first_line) is not None: + # return "" #3 **** #3.1 *****