ComponentDevelopment/OCRPython/maincopy.py

172 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import sys
import io
from paddleocr import PaddleOCR
import time
from configs.basic_config import *
from extractor.identitycard_extractor import IdentityCardExtractor
import base64
import json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
def extractIdCardInfo(type:int=0, filePath1: str= "", filePath2: str= "")->str:
if 0 == type:
return extractIdCardInfoByPath(filePath1, filePath2)
elif 1 == type:
return extractIdCardInfoByBase64Data(filePath1,filePath2)
else:
pass
def extractIdCardInfoByPath(filePath1: str = "", filePath2: str = "")->str:
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
text = ""
start_time = time.time() # 记录结束时间
finalResult = {
"code": LOCAL_PATH_NOT_EXIST,
"msg": error_codes[LOCAL_PATH_NOT_EXIST],
}
try:
if len(filePath1) > 0:
if os.path.exists(filePath1):
result = ocr.ocr(filePath1, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
else:
logger.error(f"{filePath1} doesn't exist,error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
if len(filePath2) > 0:
if os.path.exists(filePath2):
result = ocr.ocr(filePath2, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
else:
logger.error(f"{filePath2} doesn't exist,error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
except Exception as e:
finalResult["code"] = OCR_RECOGNIZE_OTHER_EXCEPTION
finalResult["msg"] = error_codes[OCR_RECOGNIZE_OTHER_EXCEPTION]
logger.error(f"{e} ,error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
if 0 != len(text):
extractor = IdentityCardExtractor()
tempdict = extractor.extract_textbyPaddle(text)
jsonString = json.dumps(tempdict, ensure_ascii=False)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
return jsonString
else:
finalResult["code"] = NO_TEXT_RECOGNIZED
finalResult["msg"] = error_codes[NO_TEXT_RECOGNIZED]
logger.error(f"{filePath1},{filePath2} can't be recognized,error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
finalResult = {
"code": BASE64_DATA_INCOMPLETE,
"msg": error_codes[BASE64_DATA_INCOMPLETE],
}
logger.info(f"extractIdCardInfoByBase64Data")
start_time = time.time() # 记录结束时间
jsonString = ""
try:
if 0 != len(base64data1):
image_data1 = base64.b64decode(base64data1)
with open("file1.png", "wb") as file:
file.write(image_data1)
if 0 != len(base64Data2):
image_data2 = base64.b64decode(base64Data2)
with open("file2.png", "wb") as file:
file.write(image_data2)
if os.path.exists("file1.png") and os.path.exists("file2.png"):
jsonString = extractIdCardInfoByPath("file1.png","file2.png")
os.remove("file1.png")
os.remove("file2.png")
elif os.path.exists("file1.png"):
jsonString = extractIdCardInfoByPath("file1.png","")
os.remove("file1.png")
elif os.path.exists("file2.png"):
jsonString = extractIdCardInfoByPath("file2.png","")
os.remove("file2.png")
except Exception as e:
logger.error(e)
logger.error(f"{e},error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
return jsonString
# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file:
# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
#
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
#jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG")
#jsonString = extractIdCardInfoByPath("./images/bf4343cfb5806c77ae21c56a8c35f474.jpeg")
#print(jsonString)
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString)
# #
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
# logger.info(f"test")
# jsonString = extractIdCardInfoByPath("./images/43302919641130423X_202311061953_front.jpg","./images/43302919641130423X_202311061953_back.jpg")#, "./images/江六斤反.jpg./images/han.jpg
# print(jsonString)
if __name__ == "__main__":
try:
logger.info(f"main.py len of parameter: {len(sys.argv)}")
jsonString = ""
if len(sys.argv) > 3:
logger.info(f"{sys.argv[1]}")
logger.info(f"{sys.argv[2]}")
logger.info(f"{sys.argv[3]}")
jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3])
elif len(sys.argv) > 1:
logger.info(f"python 脚本里的接收到的参数是:")
logger.info(f"{sys.argv[1]}")
logger.info(f"开始执行sys.stdin.read")
input_data = sys.stdin.read()
logger.info(f"len(input_data):{len(input_data)}")
split_data = input_data.split(os.linesep)
data1 = ""
data2 = ""
if 2 == len(split_data):
logger.info("2 == len(split_data)")
data1 = split_data[0]
data2 = split_data[1]
elif 1 == len(split_data):
data1 = split_data[0]
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
print(jsonString)
except KeyboardInterrupt:
logger.error("KeyboardInterrupt")
except IndexError:
# 如果参数过长,捕获 IndexError 异常并进行处理
logger.error("参数过长,未指定足够的参数")
except OverflowError:
# 如果发生了 OverflowError 异常,捕获并进行相应的处理
logger.error("命令行参数过长,导致溢出错误")
except EOFError:
# 如果达到输入流的末尾,捕获 EOFError 异常并进行处理
logger.error("已经到达输入流的末尾")
except Exception as e:
# 如果发生了其他异常,捕获并进行相应的处理
logger.error("发生了异常:", e)