ComponentDevelopment/OCRPython/maincopy.py

156 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import sys
import io
import os
from paddleocr import PaddleOCR
import time
from configs.basic_config import logger
from extractor.identitycard_extractor import IdentityCardExtractor
import base64
import json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str:
if (0 == type):
return extractIdCardInfoByPath(filePath1, filePath2)
elif (1 == type):
return extractIdCardInfoByBase64Data(filePath1,filePath2)
else:
pass
def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
text = ""
start_time = time.time() # 记录结束时间
jsonString = ""
try:
if os.path.exists(filePath1):
result = ocr.ocr(filePath1, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
else:
logger.error(f"{filePath1} doesn't exist")
if os.path.exists(filePath2):
result = ocr.ocr(filePath2, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
else:
logger.error(f"{filePath2} doesn't exist")
except Exception as e:
logger.error(e)
print(e)
if 0 != len(text):
logger.info(f"text:{text}")
extractor = IdentityCardExtractor()
tempdict = extractor.extract_textbyPaddle(text)
jsonString = json.dumps(tempdict, ensure_ascii=False)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
else:
logger.error(f"{filePath1},{filePath2} can't be recognized")
return jsonString
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
logger.info(f"extractIdCardInfoByBase64Data")
start_time = time.time() # 记录结束时间
jsonString = ""
try:
if 0!=len(base64data1):
logger.info(f"not base64data1.empty()")
image_data1 = base64.b64decode(base64data1)
with open("file1.png", "wb") as file:
file.write(image_data1)
if 0!=len(base64Data2):
logger.info(f"not base64Data2.empty()")
image_data2 = base64.b64decode(base64Data2)
with open("file2.png", "wb") as file:
file.write(image_data2)
if os.path.exists("file1.png") and os.path.exists("file2.png"):
logger.info(f"file1.png and file2.png exist")
jsonString = extractIdCardInfoByPath("file1.png","file2.png")
os.remove("file1.png")
os.remove("file2.png")
elif os.path.exists("file1.png"):
logger.info(f"file1.png exist")
jsonString = extractIdCardInfoByPath("file1.png","")
os.remove("file1.png")
elif os.path.exists("file2.png"):
logger.info(f"file2.png exist")
jsonString = extractIdCardInfoByPath("file2.png","")
os.remove("file2.png")
except Exception as e:
logger.error(e)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
return jsonString
# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file:
# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
#
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString)
# #
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
# logger.info(f"test")
# jsonString = extractIdCardInfoByPath("./images/43302919641130423X_202311061953_front.jpg","./images/43302919641130423X_202311061953_back.jpg")#, "./images/江六斤反.jpg./images/han.jpg
# print(jsonString)
if __name__ == "__main__":
try:
logger.info(f"main.py len of parameter: {len(sys.argv)}")
jsonString = ""
if len(sys.argv) > 3:
logger.info(f"{sys.argv[1]}")
logger.info(f"{sys.argv[2]}")
logger.info(f"{sys.argv[3]}")
jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3])
elif len(sys.argv) > 1:
logger.info(f"python 脚本里的接收到的参数是:")
logger.info(f"{sys.argv[1]}")
logger.info(f"开始执行sys.stdin.read")
input_data = sys.stdin.read()
logger.info(f"len(input_data):{len(input_data)}")
split_data = input_data.split(os.linesep)
data1 = ""
data2 = ""
if 2 == len(split_data):
logger.info("2 == len(split_data)")
data1 = split_data[0]
data2 = split_data[1]
elif 1 == len(split_data):
data1 = split_data[0]
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
print(jsonString)
except KeyboardInterrupt:
logger.error("KeyboardInterrupt")
except IndexError:
# 如果参数过长,捕获 IndexError 异常并进行处理
logger.error("参数过长,未指定足够的参数")
except OverflowError:
# 如果发生了 OverflowError 异常,捕获并进行相应的处理
logger.error("命令行参数过长,导致溢出错误")
except EOFError:
# 如果达到输入流的末尾,捕获 EOFError 异常并进行处理
logger.error("已经到达输入流的末尾")
except Exception as e:
# 如果发生了其他异常,捕获并进行相应的处理
logger.error("发生了异常:", e)