update import pkgs and format

This commit is contained in:
imClumsyPanda 2023-08-10 21:50:38 +08:00
parent 8a4d9168fa
commit 8d463a31fd
3 changed files with 11 additions and 4 deletions

View File

@ -16,7 +16,14 @@ class AliTextSplitter(CharacterTextSplitter):
text = re.sub(r"\n{3,}", r"\n", text)
text = re.sub('\s', " ", text)
text = re.sub("\n\n", "", text)
try:
from modelscope.pipelines import pipeline
except ImportError:
raise ImportError(
"Could not import modelscope python package. "
"Please install modelscope with `pip install modelscope`. "
)
p = pipeline(
task="document-segmentation",

View File

@ -1,11 +1,11 @@
from langchain.text_splitter import CharacterTextSplitter
import re
from typing import List
from configs.model_config import SENTENCE_SIZE
from configs.model_config import CHUNK_SIZE
class ChineseTextSplitter(CharacterTextSplitter):
def __init__(self, pdf: bool = False, sentence_size: int = SENTENCE_SIZE, **kwargs):
def __init__(self, pdf: bool = False, sentence_size: int = CHUNK_SIZE, **kwargs):
super().__init__(**kwargs)
self.pdf = pdf
self.sentence_size = sentence_size