This commit is contained in:
weiweiw 2024-05-06 10:44:19 +08:00
parent be92b7cfde
commit d5c882033a
2 changed files with 22 additions and 11 deletions

View File

@ -74,9 +74,9 @@ class IdentityCardExtractor(Extractor):
"address": "",
"idNumber": ""}
# 提取签发机关
issuing_authority = re.search(r"签发机关\n*(.+?)\n", text, re.DOTALL)
issuing_authority = re.search(r"(签发机关|签发机美)\n*(.+?)\n", text, re.DOTALL)
if issuing_authority:
result["issuingAuthority"] = issuing_authority.group(1).strip()
result["issuingAuthority"] = issuing_authority.group(2).strip()
# 提取有效期限
valid_time = re.search(r"有效期限\n*(\d{4}\.\d{2}\.\d{2}-\S+)", text, re.DOTALL)
@ -87,7 +87,7 @@ class IdentityCardExtractor(Extractor):
name = re.search(r"姓名\s*(.*?)\n", text,re.DOTALL)
if name:
tempName = name.group(1).strip()
if tempName in "性别男" or tempName in "性别女":
if tempName in "性别男" or tempName in "性别女" or "性别男" in tempName or "性别女" in tempName:
name = re.search(r"(\S+)\s*姓名", text, re.DOTALL)
result["name"] = name.group(1).strip()
else:
@ -107,9 +107,9 @@ class IdentityCardExtractor(Extractor):
result["name"] = name.group(1).strip()
# 提取民族
ethnicity = re.search(r"\s*\s*(\S+)", text, re.DOTALL)
ethnicity = re.search(r"\s*(|旅)\s*(\S+)", text, re.DOTALL)
if ethnicity:
result["ethnicity"] = ethnicity.group(1).strip()
result["ethnicity"] = ethnicity.group(2).strip()
# 提取地址
address = re.search(r"(住址|佳址)(.*?)公民身份号码", text, re.DOTALL)
@ -302,6 +302,15 @@ class IdentityCardExtractor(Extractor):
# 居民身份证
# 签发机关巴林右旗公安局
# 有效期限2004.10.27-2024.10.26"""
# text ="""唐昌梅
# 姓名
# 性别女民族苗
# 出生1975年8月15日
# 住址
# 湖南省常德市鼎城区双桥
# 坪镇全家坪村3组
# 公民身份号码
# 433030197508150820"""
# extractor = IdentityCardExtractor()
# jsonstring = extractor.extract_textbyPaddle(text)
# print(jsonstring)

View File

@ -30,6 +30,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
else:
logger.error(f"{filePath1} doesn't exist")
if os.path.exists(filePath2):
result = ocr.ocr(filePath2, cls=False)
@ -37,6 +39,9 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
else:
logger.error(f"{filePath2} doesn't exist")
except Exception as e:
logger.error(e)
print(e)
@ -49,6 +54,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
else:
logger.error(f"{filePath1},{filePath2} can't be recognized")
return jsonString
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
@ -98,7 +105,7 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
# #
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
# logger.info(f"test")
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# jsonString = extractIdCardInfoByPath("./images/43302919641130423X_202311061953_front.jpg","./images/43302919641130423X_202311061953_back.jpg")#, "./images/江六斤反.jpg./images/han.jpg
# print(jsonString)
if __name__ == "__main__":
@ -116,9 +123,6 @@ if __name__ == "__main__":
logger.info(f"开始执行sys.stdin.read")
input_data = sys.stdin.read()
logger.info(f"len(input_data):{len(input_data)}")
# imageData = base64.b64decode(input_data)
# logger.info(f"image_data1:{image_data1}")
#logger.info(f"input_data:{input_data}")
split_data = input_data.split(os.linesep)
data1 = ""
data2 = ""
@ -128,8 +132,6 @@ if __name__ == "__main__":
data2 = split_data[1]
elif 1 == len(split_data):
data1 = split_data[0]
# logger.info(data1)
# logger.info(data2)
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
print(jsonString)
except KeyboardInterrupt: