# -*- coding: utf-8 -*- import time import sys import io from paddleocr import PaddleOCR from configs.basic_config import * from extractor.identitycard_extractor import IdentityCardExtractor import base64 import json sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') class OCRRecognition: @staticmethod def extractIdCardInfo(type: int = 0, filePath1: str = "", filePath2: str = "") -> str: if 0 == type: logger.info("0 == type") return OCRRecognition.extractIdCardInfoByPath(filePath1, filePath2) elif 1 == type: logger.info("1 == type") return OCRRecognition.extractIdCardInfoByBase64Data(filePath1, filePath2) else: logger.info("type is other") finalResult = {"code": NO_DEFINED_FUNCTION_ERROR, "msg": error_codes[NO_DEFINED_FUNCTION_ERROR]} return json.dumps(finalResult, ensure_ascii=False) @staticmethod def extractIdCardInfoByPath(filePath1: str = "", filePath2: str = "") -> str: logger.info(f"ocr加载开始计时") start_time = time.time() # 记录结束时间 ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory text = "" finalResult = { "code": LOCAL_PATH_NOT_EXIST, "msg": error_codes[LOCAL_PATH_NOT_EXIST], } try: if len(filePath1) > 0: if os.path.exists(filePath1): result = ocr.ocr(filePath1, cls=False) for idx in range(len(result)): res = result[idx] for line in res: text += (line[1][0] + '\n') else: logger.error(f"{filePath1} doesn't exist,error information:{finalResult}") return json.dumps(finalResult, ensure_ascii=False) if len(filePath2) > 0: if os.path.exists(filePath2): result = ocr.ocr(filePath2, cls=False) for idx in range(len(result)): res = result[idx] for line in res: text += (line[1][0] + '\n') else: logger.error(f"{filePath2} doesn't exist,error information:{finalResult}") return json.dumps(finalResult, ensure_ascii=False) except Exception as e: finalResult["code"] = OCR_RECOGNIZE_OTHER_EXCEPTION finalResult["msg"] = error_codes[OCR_RECOGNIZE_OTHER_EXCEPTION] logger.error(f"{e} ,error information:{finalResult}") return json.dumps(finalResult, ensure_ascii=False) if 0 != len(text): extractor = IdentityCardExtractor() tempdict = extractor.extract_textbyPaddle(text) jsonString = json.dumps(tempdict, ensure_ascii=False) end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 logger.info(f"extractIdCardInfoByPath 耗时{execution_time}秒") return jsonString else: finalResult["code"] = NO_TEXT_RECOGNIZED finalResult["msg"] = error_codes[NO_TEXT_RECOGNIZED] logger.error(f"{filePath1},{filePath2} can't be recognized,error information:{finalResult}") return json.dumps(finalResult, ensure_ascii=False) @staticmethod def extractIdCardInfoByBase64Data(base64data1: str, base64Data2: str) -> str: finalResult = { "code": BASE64_DATA_INCOMPLETE, "msg": error_codes[BASE64_DATA_INCOMPLETE], } logger.info(f"extractIdCardInfoByBase64Data") start_time = time.time() # 记录结束时间 jsonString = "" try: if 0 != len(base64data1): image_data1 = base64.b64decode(base64data1) with open("file1.png", "wb") as file: file.write(image_data1) if 0 != len(base64Data2): image_data2 = base64.b64decode(base64Data2) with open("file2.png", "wb") as file: file.write(image_data2) if os.path.exists("file1.png") and os.path.exists("file2.png"): jsonString = OCRRecognition.extractIdCardInfoByPath("file1.png", "file2.png") os.remove("file1.png") os.remove("file2.png") elif os.path.exists("file1.png"): jsonString = OCRRecognition.extractIdCardInfoByPath("file1.png", "") os.remove("file1.png") elif os.path.exists("file2.png"): jsonString = OCRRecognition.extractIdCardInfoByPath("file2.png", "") os.remove("file2.png") except Exception as e: logger.error(e) logger.error(f"{e},error information:{finalResult}") return json.dumps(finalResult, ensure_ascii=False) end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") return jsonString