From 8d463a31fdb771729a5cf8792a9fe946cf567ef4 Mon Sep 17 00:00:00 2001 From: imClumsyPanda Date: Thu, 10 Aug 2023 21:50:38 +0800 Subject: [PATCH] update import pkgs and format --- text_splitter/__init__.py | 2 +- text_splitter/ali_text_splitter.py | 9 ++++++++- text_splitter/chinese_text_splitter.py | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/text_splitter/__init__.py b/text_splitter/__init__.py index f059ccb..8f13f16 100644 --- a/text_splitter/__init__.py +++ b/text_splitter/__init__.py @@ -1,3 +1,3 @@ from .chinese_text_splitter import ChineseTextSplitter from .ali_text_splitter import AliTextSplitter -from .zh_title_enhance import zh_title_enhance \ No newline at end of file +from .zh_title_enhance import zh_title_enhance diff --git a/text_splitter/ali_text_splitter.py b/text_splitter/ali_text_splitter.py index 1e62eb6..93846d1 100644 --- a/text_splitter/ali_text_splitter.py +++ b/text_splitter/ali_text_splitter.py @@ -16,7 +16,14 @@ class AliTextSplitter(CharacterTextSplitter): text = re.sub(r"\n{3,}", r"\n", text) text = re.sub('\s', " ", text) text = re.sub("\n\n", "", text) - from modelscope.pipelines import pipeline + try: + from modelscope.pipelines import pipeline + except ImportError: + raise ImportError( + "Could not import modelscope python package. " + "Please install modelscope with `pip install modelscope`. " + ) + p = pipeline( task="document-segmentation", diff --git a/text_splitter/chinese_text_splitter.py b/text_splitter/chinese_text_splitter.py index b6e7940..d6294ae 100644 --- a/text_splitter/chinese_text_splitter.py +++ b/text_splitter/chinese_text_splitter.py @@ -1,11 +1,11 @@ from langchain.text_splitter import CharacterTextSplitter import re from typing import List -from configs.model_config import SENTENCE_SIZE +from configs.model_config import CHUNK_SIZE class ChineseTextSplitter(CharacterTextSplitter): - def __init__(self, pdf: bool = False, sentence_size: int = SENTENCE_SIZE, **kwargs): + def __init__(self, pdf: bool = False, sentence_size: int = CHUNK_SIZE, **kwargs): super().__init__(**kwargs) self.pdf = pdf self.sentence_size = sentence_size