update pdf_loader.py
This commit is contained in:
parent
99ee2e9fd8
commit
5c0c1eed93
|
|
@ -39,6 +39,7 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
|
||||||
result = ocr.ocr(img_name)
|
result = ocr.ocr(img_name)
|
||||||
ocr_result = [i[1][0] for line in result for i in line]
|
ocr_result = [i[1][0] for line in result for i in line]
|
||||||
fout.write("\n".join(ocr_result))
|
fout.write("\n".join(ocr_result))
|
||||||
|
if os.path.exists(img_name):
|
||||||
os.remove(img_name)
|
os.remove(img_name)
|
||||||
return txt_file_path
|
return txt_file_path
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue