ComponentDevelopment/OCRPython/util/ocr_recognition.py

122 lines
5.2 KiB
Python

# -*- coding: utf-8 -*-
import time
import sys
import io
from paddleocr import PaddleOCR
from configs.basic_config import *
from extractor.identitycard_extractor import IdentityCardExtractor
import base64
import json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
class OCRRecognition:
@staticmethod
def extractIdCardInfo(type: int = 0, filePath1: str = "", filePath2: str = "") -> str:
if 0 == type:
logger.info("0 == type")
return OCRRecognition.extractIdCardInfoByPath(filePath1, filePath2)
elif 1 == type:
logger.info("1 == type")
return OCRRecognition.extractIdCardInfoByBase64Data(filePath1, filePath2)
else:
logger.info("type is other")
finalResult = {"code": NO_DEFINED_FUNCTION_ERROR,
"msg": error_codes[NO_DEFINED_FUNCTION_ERROR]}
return json.dumps(finalResult, ensure_ascii=False)
@staticmethod
def extractIdCardInfoByPath(filePath1: str = "", filePath2: str = "") -> str:
# logger.info(f"ocr加载开始计时")
start_time = time.time() # 记录结束时间
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
text = ""
finalResult = {
"code": LOCAL_PATH_NOT_EXIST,
"msg": error_codes[LOCAL_PATH_NOT_EXIST],
}
try:
if len(filePath1) > 0:
if os.path.exists(filePath1):
result = ocr.ocr(filePath1, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
else:
logger.error(f"{filePath1} doesn't exist,error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
if len(filePath2) > 0:
if os.path.exists(filePath2):
result = ocr.ocr(filePath2, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
else:
logger.error(f"{filePath2} doesn't exist,error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
except Exception as e:
finalResult["code"] = OCR_RECOGNIZE_OTHER_EXCEPTION
finalResult["msg"] = error_codes[OCR_RECOGNIZE_OTHER_EXCEPTION]
logger.error(f"{e} ,error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
if 0 != len(text):
extractor = IdentityCardExtractor()
tempdict = extractor.extract_textbyPaddle(text)
jsonString = json.dumps(tempdict, ensure_ascii=False)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
# logger.info(f"extractIdCardInfoByPath 耗时{execution_time}秒")
return jsonString
else:
finalResult["code"] = NO_TEXT_RECOGNIZED
finalResult["msg"] = error_codes[NO_TEXT_RECOGNIZED]
logger.error(f"{filePath1},{filePath2} can't be recognized,error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
@staticmethod
def extractIdCardInfoByBase64Data(base64data1: str, base64Data2: str) -> str:
finalResult = {
"code": BASE64_DATA_INCOMPLETE,
"msg": error_codes[BASE64_DATA_INCOMPLETE],
}
# logger.info(f"extractIdCardInfoByBase64Data")
start_time = time.time() # 记录结束时间
jsonString = ""
try:
if 0 != len(base64data1):
image_data1 = base64.b64decode(base64data1)
with open("file1.png", "wb") as file:
file.write(image_data1)
if 0 != len(base64Data2):
image_data2 = base64.b64decode(base64Data2)
with open("file2.png", "wb") as file:
file.write(image_data2)
if os.path.exists("file1.png") and os.path.exists("file2.png"):
jsonString = OCRRecognition.extractIdCardInfoByPath("file1.png", "file2.png")
os.remove("file1.png")
os.remove("file2.png")
elif os.path.exists("file1.png"):
jsonString = OCRRecognition.extractIdCardInfoByPath("file1.png", "")
os.remove("file1.png")
elif os.path.exists("file2.png"):
jsonString = OCRRecognition.extractIdCardInfoByPath("file2.png", "")
os.remove("file2.png")
except Exception as e:
logger.error(e)
logger.error(f"{e},error information:{finalResult}")
return json.dumps(finalResult, ensure_ascii=False)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
# logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
return jsonString