From d5c882033a7e54fcb19b2e37c37e4fb50469c0b8 Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Mon, 6 May 2024 10:44:19 +0800 Subject: [PATCH] enhance --- OCRPython/extractor/identitycard_extractor.py | 19 ++++++++++++++----- OCRPython/maincopy.py | 14 ++++++++------ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/OCRPython/extractor/identitycard_extractor.py b/OCRPython/extractor/identitycard_extractor.py index 2a9ea6c..3bba09b 100644 --- a/OCRPython/extractor/identitycard_extractor.py +++ b/OCRPython/extractor/identitycard_extractor.py @@ -74,9 +74,9 @@ class IdentityCardExtractor(Extractor): "address": "", "idNumber": ""} # 提取签发机关 - issuing_authority = re.search(r"签发机关\n*(.+?)\n", text, re.DOTALL) + issuing_authority = re.search(r"(签发机关|签发机美)\n*(.+?)\n", text, re.DOTALL) if issuing_authority: - result["issuingAuthority"] = issuing_authority.group(1).strip() + result["issuingAuthority"] = issuing_authority.group(2).strip() # 提取有效期限 valid_time = re.search(r"有效期限\n*(\d{4}\.\d{2}\.\d{2}-\S+)", text, re.DOTALL) @@ -87,7 +87,7 @@ class IdentityCardExtractor(Extractor): name = re.search(r"姓名\s*(.*?)\n", text,re.DOTALL) if name: tempName = name.group(1).strip() - if tempName in "性别男" or tempName in "性别女": + if tempName in "性别男" or tempName in "性别女" or "性别男" in tempName or "性别女" in tempName: name = re.search(r"(\S+)\s*姓名", text, re.DOTALL) result["name"] = name.group(1).strip() else: @@ -107,9 +107,9 @@ class IdentityCardExtractor(Extractor): result["name"] = name.group(1).strip() # 提取民族 - ethnicity = re.search(r"民\s*族\s*(\S+)", text, re.DOTALL) + ethnicity = re.search(r"民\s*(族|旅)\s*(\S+)", text, re.DOTALL) if ethnicity: - result["ethnicity"] = ethnicity.group(1).strip() + result["ethnicity"] = ethnicity.group(2).strip() # 提取地址 address = re.search(r"(住址|佳址)(.*?)公民身份号码", text, re.DOTALL) @@ -302,6 +302,15 @@ class IdentityCardExtractor(Extractor): # 居民身份证 # 签发机关巴林右旗公安局 # 有效期限2004.10.27-2024.10.26""" +# text ="""唐昌梅 +# 姓名 +# 性别女民族苗 +# 出生1975年8月15日 +# 住址 +# 湖南省常德市鼎城区双桥 +# 坪镇全家坪村3组 +# 公民身份号码 +# 433030197508150820""" # extractor = IdentityCardExtractor() # jsonstring = extractor.extract_textbyPaddle(text) # print(jsonstring) diff --git a/OCRPython/maincopy.py b/OCRPython/maincopy.py index cffe427..b7dd4cf 100644 --- a/OCRPython/maincopy.py +++ b/OCRPython/maincopy.py @@ -30,6 +30,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: res = result[idx] for line in res: text += (line[1][0] + '\n') + else: + logger.error(f"{filePath1} doesn't exist") if os.path.exists(filePath2): result = ocr.ocr(filePath2, cls=False) @@ -37,6 +39,9 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: res = result[idx] for line in res: text += (line[1][0] + '\n') + else: + logger.error(f"{filePath2} doesn't exist") + except Exception as e: logger.error(e) print(e) @@ -49,6 +54,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") + else: + logger.error(f"{filePath1},{filePath2} can't be recognized") return jsonString def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str: @@ -98,7 +105,7 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str: # # # jsonString = extractIdCardInfoByPath("./images/han.jpg","") # logger.info(f"test") -# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") +# jsonString = extractIdCardInfoByPath("./images/43302919641130423X_202311061953_front.jpg","./images/43302919641130423X_202311061953_back.jpg")#, "./images/江六斤反.jpg,./images/han.jpg # print(jsonString) if __name__ == "__main__": @@ -116,9 +123,6 @@ if __name__ == "__main__": logger.info(f"开始执行sys.stdin.read") input_data = sys.stdin.read() logger.info(f"len(input_data):{len(input_data)}") - # imageData = base64.b64decode(input_data) - # logger.info(f"image_data1:{image_data1}") - #logger.info(f"input_data:{input_data}") split_data = input_data.split(os.linesep) data1 = "" data2 = "" @@ -128,8 +132,6 @@ if __name__ == "__main__": data2 = split_data[1] elif 1 == len(split_data): data1 = split_data[0] - # logger.info(data1) - # logger.info(data2) jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2) print(jsonString) except KeyboardInterrupt: