fix log issue

2024-04-30 08:48:58 +08:00 · 2024-04-30 08:48:58 +08:00 · 1799c04a92
parent 80530f8b77
commit 1799c04a92
2 changed files with 177 additions and 1 deletions
--- a/OCRPython/extractor/extractor.py
+++ b/OCRPython/extractor/extractor.py
@ -14,7 +14,20 @@ class Extractor:
        # 使用join()方法将列表中的行重新连接成字符串
        cleaned_text = '\n'.join(lines)
        return cleaned_text
-    
+
+    def extract_birthday_from_id(self, id_number):
+        # 假设身份证号码为18位
+        if len(id_number) == 18:
+            year = id_number[6:10]
+            month = id_number[10:12]
+            day = id_number[12:14]
+            return f"{year}年{int(month)}月{int(day)}日"
+        else:
+            return ""
+    def get_gender_from_id(self, id_num):
+        # 假设id_num是一个有效的身份证号码
+        gender = '男' if int(id_num[-2]) % 2 == 1 else '女'
+        return gender

 # def remove_blank_lines(text:str)->str:
 #     # 使用splitlines()方法将字符串按行分割成列表，并去除空行
--- a/OCRPython/maincopydet.py
+++ b/OCRPython/maincopydet.py
@ -0,0 +1,163 @@
+# -*- coding: utf-8 -*-
+import sys
+import io
+import os
+from paddleocr import PaddleOCR
+import time
+from configs.basic_config import logger
+from extractor.identitycard_extractor import IdentityCardExtractor
+import base64
+import json
+
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+
+
+def extractIdCardInfo(type: int, filePath1: str, filePath2: str) -> str:
+    if (0 == type):
+        return extractIdCardInfoByPath(filePath1, filePath2)
+    elif (1 == type):
+        return extractIdCardInfoByBase64Data(filePath1, filePath2)
+    else:
+        pass
+
+
+def extractIdCardInfoByPath(filePath1: str, filePath2: str) -> str:
+    # ocr = PaddleOCR(use_angle_cls=True, lang="ch", det_model_dir="./models/ch_PP-OCRv3_det_slim_infer",
+    #                 rec_model_dir="./models/ch_PP-OCRv3_rec_slim_infer",
+    #                 cls_model_dir="./models/ch_ppocr_mobile_v2.0_cls_slim_infer")  # need to run only once to download and load model into memory
+    # ocr = PaddleOCR(use_angle_cls=True, lang="ch", det_model_dir="./models/infer/ch_PP-OCRv3_det_infer",
+    #                 rec_model_dir="./models/infer/ch_PP-OCRv3_rec_infer",
+    #                 cls_model_dir="./models/infer/ch_ppocr_mobile_v2.0_cls_infer")
+    ocr = PaddleOCR(use_angle_cls=True, lang="ch")
+    text = ""
+    start_time = time.time()  # 记录结束时间
+    jsonString = ""
+    try:
+        if os.path.exists(filePath1):
+            result = ocr.ocr(filePath1, cls=False)
+            for idx in range(len(result)):
+                res = result[idx]
+                for line in res:
+                    text += (line[1][0] + '\n')
+
+        if os.path.exists(filePath2):
+            result = ocr.ocr(filePath2, cls=False)
+            for idx in range(len(result)):
+                res = result[idx]
+                for line in res:
+                    text += (line[1][0] + '\n')
+    except Exception as e:
+        logger.error(e)
+        print(e)
+
+    if 0 != len(text):
+        logger.info(f"text:{text}")
+        extractor = IdentityCardExtractor()
+        tempdict = extractor.extract_textbyPaddle(text)
+        jsonString = json.dumps(tempdict, ensure_ascii=False)
+        end_time = time.time()  # 记录结束时间
+        execution_time = end_time - start_time  # 计算执行时间
+        logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
+    return jsonString
+
+
+def extractIdCardInfoByBase64Data(base64data1: str, base64Data2: str) -> str:
+    logger.info(f"extractIdCardInfoByBase64Data")
+    start_time = time.time()  # 记录结束时间
+    jsonString = ""
+    try:
+        if 0 != len(base64data1):
+            logger.info(f"not base64data1.empty()")
+            image_data1 = base64.b64decode(base64data1)
+            with open("file1.png", "wb") as file:
+                file.write(image_data1)
+
+        if 0 != len(base64Data2):
+            logger.info(f"not base64Data2.empty()")
+            image_data2 = base64.b64decode(base64Data2)
+            with open("file2.png", "wb") as file:
+                file.write(image_data2)
+
+        if os.path.exists("file1.png") and os.path.exists("file2.png"):
+            logger.info(f"file1.png and file2.png exist")
+            jsonString = extractIdCardInfoByPath("file1.png", "file2.png")
+            os.remove("file1.png")
+            os.remove("file2.png")
+        elif os.path.exists("file1.png"):
+            logger.info(f"file1.png exist")
+            jsonString = extractIdCardInfoByPath("file1.png", "")
+            os.remove("file1.png")
+        elif os.path.exists("file2.png"):
+            logger.info(f"file2.png exist")
+            jsonString = extractIdCardInfoByPath("file2.png", "")
+            os.remove("file2.png")
+    except Exception as e:
+        logger.error(e)
+
+    end_time = time.time()  # 记录结束时间
+    execution_time = end_time - start_time  # 计算执行时间
+    logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
+    return jsonString
+
+
+# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file:
+#     base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
+#
+# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
+# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
+# print(jsonString)
+# #
+# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
+# logger.info(f"test")
+# jsonString = extractIdCardInfoByPath("./images/龙辉正.jpg","./images/龙辉反.jpg")#, "./images/江六斤反.jpg，./images/han.jpg
+# print(jsonString)
+#
+if __name__ == "__main__":
+    try:
+        logger.info(f"main.py len of parameter: {len(sys.argv)}")
+        jsonString = ""
+        if len(sys.argv) > 3:
+            logger.info(f"{sys.argv[1]}")
+            logger.info(f"{sys.argv[2]}")
+            logger.info(f"{sys.argv[3]}")
+            jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3])
+        elif len(sys.argv) > 1:
+            logger.info(f"python 脚本里的接收到的参数是：")
+            logger.info(f"{sys.argv[1]}")
+            logger.info(f"开始执行sys.stdin.read")
+            input_data = sys.stdin.read()
+            logger.info(f"len(input_data):{len(input_data)}")
+            # imageData = base64.b64decode(input_data)
+            # logger.info(f"image_data1:{image_data1}")
+            #logger.info(f"input_data:{input_data}")
+            split_data = input_data.split(os.linesep)
+            data1 = ""
+            data2 = ""
+            if 2 == len(split_data):
+                logger.info("2 == len(split_data)")
+                data1 = split_data[0]
+                data2 = split_data[1]
+            elif 1 == len(split_data):
+                data1 = split_data[0]
+            # logger.info(data1)
+            # logger.info(data2)
+            jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
+        print(jsonString)
+    except KeyboardInterrupt:
+        logger.error("KeyboardInterrupt")
+
+    except IndexError:
+    # 如果参数过长，捕获 IndexError 异常并进行处理
+        logger.error("参数过长，未指定足够的参数")
+
+    except OverflowError:
+    # 如果发生了 OverflowError 异常，捕获并进行相应的处理
+        logger.error("命令行参数过长，导致溢出错误")
+
+    except EOFError:
+    # 如果达到输入流的末尾，捕获 EOFError 异常并进行处理
+        logger.error("已经到达输入流的末尾")
+
+    except Exception as e:
+    # 如果发生了其他异常，捕获并进行相应的处理
+        logger.error("发生了异常:", e)