update pdf_loader.py
This commit is contained in:
parent
94b4599cda
commit
3712eec6a9
|
|
@ -29,7 +29,8 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
|
|||
img_list = page.get_images()
|
||||
for img in img_list:
|
||||
pix = fitz.Pixmap(doc, img[0])
|
||||
|
||||
if pix.n - pix.alpha >= 4:
|
||||
pix = fitz.Pixmap(fitz.csRGB, pix)
|
||||
pix.save(img_name)
|
||||
|
||||
result = ocr.ocr(img_name)
|
||||
|
|
|
|||
Loading…
Reference in New Issue