ComponentDevelopment/OCRPython/maincopy.py

156 lines
6.0 KiB
Python
Raw Normal View History

2024-04-22 10:02:06 +08:00
# -*- coding: utf-8 -*-
import sys
2024-04-24 08:41:51 +08:00
import io
2024-04-22 10:02:06 +08:00
import os
2024-04-24 08:41:51 +08:00
from paddleocr import PaddleOCR
2024-04-22 10:02:06 +08:00
import time
from configs.basic_config import logger
from extractor.identitycard_extractor import IdentityCardExtractor
import base64
2024-04-24 08:41:51 +08:00
import json
2024-04-22 10:02:06 +08:00
2024-04-24 08:41:51 +08:00
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
2024-04-22 10:02:06 +08:00
def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str:
if (0 == type):
return extractIdCardInfoByPath(filePath1, filePath2)
elif (1 == type):
return extractIdCardInfoByBase64Data(filePath1,filePath2)
else:
pass
def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
text = ""
start_time = time.time() # 记录结束时间
2024-04-24 08:41:51 +08:00
jsonString = ""
2024-04-22 10:02:06 +08:00
try:
if os.path.exists(filePath1):
result = ocr.ocr(filePath1, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
2024-05-06 10:44:19 +08:00
else:
logger.error(f"{filePath1} doesn't exist")
2024-04-22 10:02:06 +08:00
if os.path.exists(filePath2):
2024-04-22 17:12:21 +08:00
result = ocr.ocr(filePath2, cls=False)
2024-04-22 10:02:06 +08:00
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
2024-05-06 10:44:19 +08:00
else:
logger.error(f"{filePath2} doesn't exist")
2024-04-22 10:02:06 +08:00
except Exception as e:
logger.error(e)
2024-04-22 17:12:21 +08:00
print(e)
2024-04-22 10:02:06 +08:00
2024-04-22 17:12:21 +08:00
if 0 != len(text):
logger.info(f"text:{text}")
extractor = IdentityCardExtractor()
2024-04-24 08:41:51 +08:00
tempdict = extractor.extract_textbyPaddle(text)
jsonString = json.dumps(tempdict, ensure_ascii=False)
2024-04-22 17:12:21 +08:00
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
2024-05-06 10:44:19 +08:00
else:
logger.error(f"{filePath1},{filePath2} can't be recognized")
2024-04-22 10:02:06 +08:00
return jsonString
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
logger.info(f"extractIdCardInfoByBase64Data")
start_time = time.time() # 记录结束时间
jsonString = ""
try:
if 0!=len(base64data1):
logger.info(f"not base64data1.empty()")
image_data1 = base64.b64decode(base64data1)
with open("file1.png", "wb") as file:
file.write(image_data1)
if 0!=len(base64Data2):
logger.info(f"not base64Data2.empty()")
image_data2 = base64.b64decode(base64Data2)
with open("file2.png", "wb") as file:
file.write(image_data2)
2024-04-24 08:41:51 +08:00
2024-04-22 10:02:06 +08:00
if os.path.exists("file1.png") and os.path.exists("file2.png"):
logger.info(f"file1.png and file2.png exist")
jsonString = extractIdCardInfoByPath("file1.png","file2.png")
os.remove("file1.png")
os.remove("file2.png")
elif os.path.exists("file1.png"):
logger.info(f"file1.png exist")
jsonString = extractIdCardInfoByPath("file1.png","")
os.remove("file1.png")
elif os.path.exists("file2.png"):
logger.info(f"file2.png exist")
jsonString = extractIdCardInfoByPath("file2.png","")
os.remove("file2.png")
except Exception as e:
logger.error(e)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
return jsonString
# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file:
# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
2024-04-22 13:34:14 +08:00
#
2024-04-22 10:02:06 +08:00
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
2024-04-22 17:12:21 +08:00
# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
2024-04-22 10:02:06 +08:00
# print(jsonString)
2024-04-24 08:41:51 +08:00
# #
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
# logger.info(f"test")
2024-05-06 10:44:19 +08:00
# jsonString = extractIdCardInfoByPath("./images/43302919641130423X_202311061953_front.jpg","./images/43302919641130423X_202311061953_back.jpg")#, "./images/江六斤反.jpg./images/han.jpg
2024-04-22 13:34:14 +08:00
# print(jsonString)
2024-04-24 16:51:02 +08:00
2024-04-22 13:34:14 +08:00
if __name__ == "__main__":
try:
logger.info(f"main.py len of parameter: {len(sys.argv)}")
jsonString = ""
if len(sys.argv) > 3:
logger.info(f"{sys.argv[1]}")
logger.info(f"{sys.argv[2]}")
logger.info(f"{sys.argv[3]}")
jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3])
elif len(sys.argv) > 1:
logger.info(f"python 脚本里的接收到的参数是:")
logger.info(f"{sys.argv[1]}")
logger.info(f"开始执行sys.stdin.read")
input_data = sys.stdin.read()
logger.info(f"len(input_data):{len(input_data)}")
split_data = input_data.split(os.linesep)
data1 = ""
data2 = ""
if 2 == len(split_data):
2024-04-24 16:51:02 +08:00
logger.info("2 == len(split_data)")
2024-04-22 13:34:14 +08:00
data1 = split_data[0]
data2 = split_data[1]
elif 1 == len(split_data):
data1 = split_data[0]
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
print(jsonString)
except KeyboardInterrupt:
logger.error("KeyboardInterrupt")
except IndexError:
# 如果参数过长,捕获 IndexError 异常并进行处理
logger.error("参数过长,未指定足够的参数")
except OverflowError:
# 如果发生了 OverflowError 异常,捕获并进行相应的处理
logger.error("命令行参数过长,导致溢出错误")
except EOFError:
# 如果达到输入流的末尾,捕获 EOFError 异常并进行处理
logger.error("已经到达输入流的末尾")
except Exception as e:
# 如果发生了其他异常,捕获并进行相应的处理
logger.error("发生了异常:", e)
2024-04-22 10:02:06 +08:00