将paddle相关loader改为动态引入,可以在不上传pdf/image知识文件的前提下使用protobuf=4.x。
这样可以使用最新版streamlit和chainlit。
This commit is contained in:
parent
ee7285cd93
commit
dd3617fcdf
|
|
@ -8,7 +8,6 @@ from typing import List
|
|||
from utils import torch_gc
|
||||
from tqdm import tqdm
|
||||
from pypinyin import lazy_pinyin
|
||||
from loader import UnstructuredPaddleImageLoader, UnstructuredPaddlePDFLoader
|
||||
from models.base import (BaseAnswer,
|
||||
AnswerResult)
|
||||
from models.loader.args import parser
|
||||
|
|
@ -59,6 +58,7 @@ def tree(filepath, ignore_dir_names=None, ignore_file_names=None):
|
|||
|
||||
|
||||
def load_file(filepath, sentence_size=SENTENCE_SIZE, using_zh_title_enhance=ZH_TITLE_ENHANCE):
|
||||
|
||||
if filepath.lower().endswith(".md"):
|
||||
loader = UnstructuredFileLoader(filepath, mode="elements")
|
||||
docs = loader.load()
|
||||
|
|
@ -67,10 +67,14 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE, using_zh_title_enhance=ZH_T
|
|||
textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
|
||||
docs = loader.load_and_split(textsplitter)
|
||||
elif filepath.lower().endswith(".pdf"):
|
||||
# 暂且将paddle相关的loader改为动态加载,可以在不上传pdf/image知识文件的前提下使用protobuf=4.x
|
||||
from loader import UnstructuredPaddlePDFLoader
|
||||
loader = UnstructuredPaddlePDFLoader(filepath)
|
||||
textsplitter = ChineseTextSplitter(pdf=True, sentence_size=sentence_size)
|
||||
docs = loader.load_and_split(textsplitter)
|
||||
elif filepath.lower().endswith(".jpg") or filepath.lower().endswith(".png"):
|
||||
# 暂且将paddle相关的loader改为动态加载,可以在不上传pdf/image知识文件的前提下使用protobuf=4.x
|
||||
from loader import UnstructuredPaddleImageLoader
|
||||
loader = UnstructuredPaddleImageLoader(filepath, mode="elements")
|
||||
textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
|
||||
docs = loader.load_and_split(text_splitter=textsplitter)
|
||||
|
|
|
|||
Loading…
Reference in New Issue