ComponentDevelopment/OCRPython/maincopy.py

# -*- coding: utf-8 -*-
import sys
import io
from paddleocr import PaddleOCR
import time
from configs.basic_config import *
from extractor.identitycard_extractor import IdentityCardExtractor
import base64
import json

sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
def extractIdCardInfo(type:int=0, filePath1: str= "", filePath2: str= "")->str:
    if 0 == type:
        return extractIdCardInfoByPath(filePath1, filePath2)
    elif 1 == type:
        return extractIdCardInfoByBase64Data(filePath1,filePath2)
    else:
        pass

def extractIdCardInfoByPath(filePath1: str = "", filePath2: str = "")->str:
    ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
    text = ""
    start_time = time.time()  # 记录结束时间
    finalResult = {
        "code": LOCAL_PATH_NOT_EXIST,
        "msg": error_codes[LOCAL_PATH_NOT_EXIST],

    }
    try:
        if len(filePath1) > 0:
            if os.path.exists(filePath1):
                result = ocr.ocr(filePath1, cls=False)
                for idx in range(len(result)):
                    res = result[idx]
                    for line in res:
                        text += (line[1][0] + '\n')
            else:
                logger.error(f"{filePath1} doesn't exist,error information:{finalResult}")
                return json.dumps(finalResult, ensure_ascii=False)

        if len(filePath2) > 0:
            if os.path.exists(filePath2):
                result = ocr.ocr(filePath2, cls=False)
                for idx in range(len(result)):
                    res = result[idx]
                    for line in res:
                        text += (line[1][0] + '\n')
            else:
                logger.error(f"{filePath2} doesn't exist,error information:{finalResult}")
                return json.dumps(finalResult, ensure_ascii=False)

    except Exception as e:
        finalResult["code"] = OCR_RECOGNIZE_OTHER_EXCEPTION
        finalResult["msg"] = error_codes[OCR_RECOGNIZE_OTHER_EXCEPTION]
        logger.error(f"{e} ,error information:{finalResult}")
        return json.dumps(finalResult, ensure_ascii=False)

    if 0 != len(text):
        extractor = IdentityCardExtractor()
        tempdict = extractor.extract_textbyPaddle(text)
        jsonString = json.dumps(tempdict, ensure_ascii=False)
        end_time = time.time()  # 记录结束时间
        execution_time = end_time - start_time  # 计算执行时间
        logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
        return jsonString
    else:
        finalResult["code"] = NO_TEXT_RECOGNIZED
        finalResult["msg"] = error_codes[NO_TEXT_RECOGNIZED]
        logger.error(f"{filePath1},{filePath2} can't be recognized,error information:{finalResult}")
        return json.dumps(finalResult, ensure_ascii=False)

def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
    finalResult = {
        "code": BASE64_DATA_INCOMPLETE,
        "msg": error_codes[BASE64_DATA_INCOMPLETE],
    }
    logger.info(f"extractIdCardInfoByBase64Data")
    start_time = time.time()  # 记录结束时间
    jsonString = ""
    try:
        if 0 != len(base64data1):
            image_data1 = base64.b64decode(base64data1)
            with open("file1.png", "wb") as file:
                file.write(image_data1)

        if 0 != len(base64Data2):
            image_data2 = base64.b64decode(base64Data2)
            with open("file2.png", "wb") as file:
                file.write(image_data2)

        if os.path.exists("file1.png") and os.path.exists("file2.png"):
            jsonString = extractIdCardInfoByPath("file1.png","file2.png")
            os.remove("file1.png")
            os.remove("file2.png")
        elif os.path.exists("file1.png"):
            jsonString = extractIdCardInfoByPath("file1.png","")
            os.remove("file1.png")
        elif os.path.exists("file2.png"):
            jsonString = extractIdCardInfoByPath("file2.png","")
            os.remove("file2.png")
    except Exception as e:
        logger.error(e)
        logger.error(f"{e},error information:{finalResult}")
        return json.dumps(finalResult, ensure_ascii=False)

    end_time = time.time()  # 记录结束时间
    execution_time = end_time - start_time  # 计算执行时间
    logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
    return jsonString

# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file:
#     base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
#
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
#jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG")
#jsonString = extractIdCardInfoByPath("./images/bf4343cfb5806c77ae21c56a8c35f474.jpeg")
#print(jsonString)

# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString)
# #
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
# logger.info(f"test")
# jsonString = extractIdCardInfoByPath("./images/43302919641130423X_202311061953_front.jpg","./images/43302919641130423X_202311061953_back.jpg")#, "./images/江六斤反.jpg，./images/han.jpg
# print(jsonString)

if __name__ == "__main__":
    try:
        logger.info(f"main.py len of parameter: {len(sys.argv)}")
        jsonString = ""
        if len(sys.argv) > 3:
            logger.info(f"{sys.argv[1]}")
            logger.info(f"{sys.argv[2]}")
            logger.info(f"{sys.argv[3]}")
            jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3])
        elif len(sys.argv) > 1:
            logger.info(f"python 脚本里的接收到的参数是：")
            logger.info(f"{sys.argv[1]}")
            logger.info(f"开始执行sys.stdin.read")
            input_data = sys.stdin.read()
            logger.info(f"len(input_data):{len(input_data)}")
            split_data = input_data.split(os.linesep)
            data1 = ""
            data2 = ""
            if 2 == len(split_data):
                logger.info("2 == len(split_data)")
                data1 = split_data[0]
                data2 = split_data[1]
            elif 1 == len(split_data):
                data1 = split_data[0]
            jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
        print(jsonString)
    except KeyboardInterrupt:
        logger.error("KeyboardInterrupt")

    except IndexError:
    # 如果参数过长，捕获 IndexError 异常并进行处理
        logger.error("参数过长，未指定足够的参数")

    except OverflowError:
    # 如果发生了 OverflowError 异常，捕获并进行相应的处理
        logger.error("命令行参数过长，导致溢出错误")

    except EOFError:
    # 如果达到输入流的末尾，捕获 EOFError 异常并进行处理
        logger.error("已经到达输入流的末尾")

    except Exception as e:
    # 如果发生了其他异常，捕获并进行相应的处理
        logger.error("发生了异常:", e)