update pdf_loader.py
This commit is contained in:
parent
99ee2e9fd8
commit
5c0c1eed93
|
|
@ -39,7 +39,8 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
|
|||
result = ocr.ocr(img_name)
|
||||
ocr_result = [i[1][0] for line in result for i in line]
|
||||
fout.write("\n".join(ocr_result))
|
||||
os.remove(img_name)
|
||||
if os.path.exists(img_name):
|
||||
os.remove(img_name)
|
||||
return txt_file_path
|
||||
|
||||
txt_file_path = pdf_ocr_txt(self.file_path)
|
||||
|
|
|
|||
Loading…
Reference in New Issue