# -*- coding: utf-8 -*- import sys import io import os from paddleocr import PaddleOCR import time from configs.basic_config import logger from extractor.identitycard_extractor import IdentityCardExtractor import base64 import json sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str: if (0 == type): return extractIdCardInfoByPath(filePath1, filePath2) elif (1 == type): return extractIdCardInfoByBase64Data(filePath1,filePath2) else: pass def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory text = "" start_time = time.time() # 记录结束时间 jsonString = "" try: if os.path.exists(filePath1): result = ocr.ocr(filePath1, cls=False) for idx in range(len(result)): res = result[idx] for line in res: text += (line[1][0] + '\n') if os.path.exists(filePath2): result = ocr.ocr(filePath2, cls=False) for idx in range(len(result)): res = result[idx] for line in res: text += (line[1][0] + '\n') except Exception as e: logger.error(e) print(e) if 0 != len(text): logger.info(f"text:{text}") extractor = IdentityCardExtractor() tempdict = extractor.extract_textbyPaddle(text) jsonString = json.dumps(tempdict, ensure_ascii=False) end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") return jsonString def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str: logger.info(f"extractIdCardInfoByBase64Data") start_time = time.time() # 记录结束时间 jsonString = "" try: if 0!=len(base64data1): logger.info(f"not base64data1.empty()") image_data1 = base64.b64decode(base64data1) with open("file1.png", "wb") as file: file.write(image_data1) if 0!=len(base64Data2): logger.info(f"not base64Data2.empty()") image_data2 = base64.b64decode(base64Data2) with open("file2.png", "wb") as file: file.write(image_data2) if os.path.exists("file1.png") and os.path.exists("file2.png"): logger.info(f"file1.png and file2.png exist") jsonString = extractIdCardInfoByPath("file1.png","file2.png") os.remove("file1.png") os.remove("file2.png") elif os.path.exists("file1.png"): logger.info(f"file1.png exist") jsonString = extractIdCardInfoByPath("file1.png","") os.remove("file1.png") elif os.path.exists("file2.png"): logger.info(f"file2.png exist") jsonString = extractIdCardInfoByPath("file2.png","") os.remove("file2.png") except Exception as e: logger.error(e) end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") return jsonString # with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file: # base64_image_string = base64.b64encode(image_file.read()).decode('utf-8') # # jsonString = extractIdCardInfoByBase64Data(base64_image_string,"") # jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") # print(jsonString) # # jsonString = extractIdCardInfoByPath("./images/han.jpg","") print(jsonString) # jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") # print(jsonString)