update loader
This commit is contained in:
parent
871a871651
commit
14295392d0
|
|
@ -15,7 +15,7 @@ class UnstructuredPaddleImageLoader(UnstructuredFileLoader):
|
||||||
if not os.path.exists(full_dir_path):
|
if not os.path.exists(full_dir_path):
|
||||||
os.makedirs(full_dir_path)
|
os.makedirs(full_dir_path)
|
||||||
filename = os.path.split(filepath)[-1]
|
filename = os.path.split(filepath)[-1]
|
||||||
ocr = PaddleOCR(lang="ch", use_gpu=False, show_log=False)
|
ocr = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False, show_log=False)
|
||||||
result = ocr.ocr(img=filepath)
|
result = ocr.ocr(img=filepath)
|
||||||
|
|
||||||
ocr_result = [i[1][0] for line in result for i in line]
|
ocr_result = [i[1][0] for line in result for i in line]
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
|
||||||
full_dir_path = os.path.join(os.path.dirname(filepath), dir_path)
|
full_dir_path = os.path.join(os.path.dirname(filepath), dir_path)
|
||||||
if not os.path.exists(full_dir_path):
|
if not os.path.exists(full_dir_path):
|
||||||
os.makedirs(full_dir_path)
|
os.makedirs(full_dir_path)
|
||||||
ocr = PaddleOCR(lang="ch", use_gpu=False, show_log=False)
|
ocr = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False, show_log=False)
|
||||||
doc = fitz.open(filepath)
|
doc = fitz.open(filepath)
|
||||||
txt_file_path = os.path.join(full_dir_path, f"{os.path.split(filepath)[-1]}.txt")
|
txt_file_path = os.path.join(full_dir_path, f"{os.path.split(filepath)[-1]}.txt")
|
||||||
img_name = os.path.join(full_dir_path, 'tmp.png')
|
img_name = os.path.join(full_dir_path, 'tmp.png')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue