将paddle相关loader改为动态引入,可以在不上传pdf/image知识文件的前提下使用protobuf=4.x。

这样可以使用最新版streamlit和chainlit。
This commit is contained in:
liunux@home 2023-07-20 12:32:37 +08:00
parent ee7285cd93
commit dd3617fcdf
1 changed files with 5 additions and 1 deletions

View File

@ -8,7 +8,6 @@ from typing import List
from utils import torch_gc
from tqdm import tqdm
from pypinyin import lazy_pinyin
from loader import UnstructuredPaddleImageLoader, UnstructuredPaddlePDFLoader
from models.base import (BaseAnswer,
AnswerResult)
from models.loader.args import parser
@ -59,6 +58,7 @@ def tree(filepath, ignore_dir_names=None, ignore_file_names=None):
def load_file(filepath, sentence_size=SENTENCE_SIZE, using_zh_title_enhance=ZH_TITLE_ENHANCE):
if filepath.lower().endswith(".md"):
loader = UnstructuredFileLoader(filepath, mode="elements")
docs = loader.load()
@ -67,10 +67,14 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE, using_zh_title_enhance=ZH_T
textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
docs = loader.load_and_split(textsplitter)
elif filepath.lower().endswith(".pdf"):
# 暂且将paddle相关的loader改为动态加载可以在不上传pdf/image知识文件的前提下使用protobuf=4.x
from loader import UnstructuredPaddlePDFLoader
loader = UnstructuredPaddlePDFLoader(filepath)
textsplitter = ChineseTextSplitter(pdf=True, sentence_size=sentence_size)
docs = loader.load_and_split(textsplitter)
elif filepath.lower().endswith(".jpg") or filepath.lower().endswith(".png"):
# 暂且将paddle相关的loader改为动态加载可以在不上传pdf/image知识文件的前提下使用protobuf=4.x
from loader import UnstructuredPaddleImageLoader
loader = UnstructuredPaddleImageLoader(filepath, mode="elements")
textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
docs = loader.load_and_split(text_splitter=textsplitter)