diff --git a/OCRPython/extractor/extractor.py b/OCRPython/extractor/extractor.py index f7e929a..3a1528d 100644 --- a/OCRPython/extractor/extractor.py +++ b/OCRPython/extractor/extractor.py @@ -14,7 +14,20 @@ class Extractor: # 使用join()方法将列表中的行重新连接成字符串 cleaned_text = '\n'.join(lines) return cleaned_text - + + def extract_birthday_from_id(self, id_number): + # 假设身份证号码为18位 + if len(id_number) == 18: + year = id_number[6:10] + month = id_number[10:12] + day = id_number[12:14] + return f"{year}年{int(month)}月{int(day)}日" + else: + return "" + def get_gender_from_id(self, id_num): + # 假设id_num是一个有效的身份证号码 + gender = '男' if int(id_num[-2]) % 2 == 1 else '女' + return gender # def remove_blank_lines(text:str)->str: # # 使用splitlines()方法将字符串按行分割成列表,并去除空行 diff --git a/OCRPython/maincopydet.py b/OCRPython/maincopydet.py new file mode 100644 index 0000000..3cfabb7 --- /dev/null +++ b/OCRPython/maincopydet.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +import sys +import io +import os +from paddleocr import PaddleOCR +import time +from configs.basic_config import logger +from extractor.identitycard_extractor import IdentityCardExtractor +import base64 +import json + +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + + +def extractIdCardInfo(type: int, filePath1: str, filePath2: str) -> str: + if (0 == type): + return extractIdCardInfoByPath(filePath1, filePath2) + elif (1 == type): + return extractIdCardInfoByBase64Data(filePath1, filePath2) + else: + pass + + +def extractIdCardInfoByPath(filePath1: str, filePath2: str) -> str: + # ocr = PaddleOCR(use_angle_cls=True, lang="ch", det_model_dir="./models/ch_PP-OCRv3_det_slim_infer", + # rec_model_dir="./models/ch_PP-OCRv3_rec_slim_infer", + # cls_model_dir="./models/ch_ppocr_mobile_v2.0_cls_slim_infer") # need to run only once to download and load model into memory + # ocr = PaddleOCR(use_angle_cls=True, lang="ch", det_model_dir="./models/infer/ch_PP-OCRv3_det_infer", + # rec_model_dir="./models/infer/ch_PP-OCRv3_rec_infer", + # cls_model_dir="./models/infer/ch_ppocr_mobile_v2.0_cls_infer") + ocr = PaddleOCR(use_angle_cls=True, lang="ch") + text = "" + start_time = time.time() # 记录结束时间 + jsonString = "" + try: + if os.path.exists(filePath1): + result = ocr.ocr(filePath1, cls=False) + for idx in range(len(result)): + res = result[idx] + for line in res: + text += (line[1][0] + '\n') + + if os.path.exists(filePath2): + result = ocr.ocr(filePath2, cls=False) + for idx in range(len(result)): + res = result[idx] + for line in res: + text += (line[1][0] + '\n') + except Exception as e: + logger.error(e) + print(e) + + if 0 != len(text): + logger.info(f"text:{text}") + extractor = IdentityCardExtractor() + tempdict = extractor.extract_textbyPaddle(text) + jsonString = json.dumps(tempdict, ensure_ascii=False) + end_time = time.time() # 记录结束时间 + execution_time = end_time - start_time # 计算执行时间 + logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") + return jsonString + + +def extractIdCardInfoByBase64Data(base64data1: str, base64Data2: str) -> str: + logger.info(f"extractIdCardInfoByBase64Data") + start_time = time.time() # 记录结束时间 + jsonString = "" + try: + if 0 != len(base64data1): + logger.info(f"not base64data1.empty()") + image_data1 = base64.b64decode(base64data1) + with open("file1.png", "wb") as file: + file.write(image_data1) + + if 0 != len(base64Data2): + logger.info(f"not base64Data2.empty()") + image_data2 = base64.b64decode(base64Data2) + with open("file2.png", "wb") as file: + file.write(image_data2) + + if os.path.exists("file1.png") and os.path.exists("file2.png"): + logger.info(f"file1.png and file2.png exist") + jsonString = extractIdCardInfoByPath("file1.png", "file2.png") + os.remove("file1.png") + os.remove("file2.png") + elif os.path.exists("file1.png"): + logger.info(f"file1.png exist") + jsonString = extractIdCardInfoByPath("file1.png", "") + os.remove("file1.png") + elif os.path.exists("file2.png"): + logger.info(f"file2.png exist") + jsonString = extractIdCardInfoByPath("file2.png", "") + os.remove("file2.png") + except Exception as e: + logger.error(e) + + end_time = time.time() # 记录结束时间 + execution_time = end_time - start_time # 计算执行时间 + logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") + return jsonString + + +# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file: +# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8') +# +# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"") +# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") +# print(jsonString) +# # +# jsonString = extractIdCardInfoByPath("./images/han.jpg","") +# logger.info(f"test") +# jsonString = extractIdCardInfoByPath("./images/龙辉正.jpg","./images/龙辉反.jpg")#, "./images/江六斤反.jpg,./images/han.jpg +# print(jsonString) +# +if __name__ == "__main__": + try: + logger.info(f"main.py len of parameter: {len(sys.argv)}") + jsonString = "" + if len(sys.argv) > 3: + logger.info(f"{sys.argv[1]}") + logger.info(f"{sys.argv[2]}") + logger.info(f"{sys.argv[3]}") + jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3]) + elif len(sys.argv) > 1: + logger.info(f"python 脚本里的接收到的参数是:") + logger.info(f"{sys.argv[1]}") + logger.info(f"开始执行sys.stdin.read") + input_data = sys.stdin.read() + logger.info(f"len(input_data):{len(input_data)}") + # imageData = base64.b64decode(input_data) + # logger.info(f"image_data1:{image_data1}") + #logger.info(f"input_data:{input_data}") + split_data = input_data.split(os.linesep) + data1 = "" + data2 = "" + if 2 == len(split_data): + logger.info("2 == len(split_data)") + data1 = split_data[0] + data2 = split_data[1] + elif 1 == len(split_data): + data1 = split_data[0] + # logger.info(data1) + # logger.info(data2) + jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2) + print(jsonString) + except KeyboardInterrupt: + logger.error("KeyboardInterrupt") + + except IndexError: + # 如果参数过长,捕获 IndexError 异常并进行处理 + logger.error("参数过长,未指定足够的参数") + + except OverflowError: + # 如果发生了 OverflowError 异常,捕获并进行相应的处理 + logger.error("命令行参数过长,导致溢出错误") + + except EOFError: + # 如果达到输入流的末尾,捕获 EOFError 异常并进行处理 + logger.error("已经到达输入流的末尾") + + except Exception as e: + # 如果发生了其他异常,捕获并进行相应的处理 + logger.error("发生了异常:", e)