enhance
This commit is contained in:
parent
be92b7cfde
commit
d5c882033a
|
|
@ -74,9 +74,9 @@ class IdentityCardExtractor(Extractor):
|
|||
"address": "",
|
||||
"idNumber": ""}
|
||||
# 提取签发机关
|
||||
issuing_authority = re.search(r"签发机关\n*(.+?)\n", text, re.DOTALL)
|
||||
issuing_authority = re.search(r"(签发机关|签发机美)\n*(.+?)\n", text, re.DOTALL)
|
||||
if issuing_authority:
|
||||
result["issuingAuthority"] = issuing_authority.group(1).strip()
|
||||
result["issuingAuthority"] = issuing_authority.group(2).strip()
|
||||
|
||||
# 提取有效期限
|
||||
valid_time = re.search(r"有效期限\n*(\d{4}\.\d{2}\.\d{2}-\S+)", text, re.DOTALL)
|
||||
|
|
@ -87,7 +87,7 @@ class IdentityCardExtractor(Extractor):
|
|||
name = re.search(r"姓名\s*(.*?)\n", text,re.DOTALL)
|
||||
if name:
|
||||
tempName = name.group(1).strip()
|
||||
if tempName in "性别男" or tempName in "性别女":
|
||||
if tempName in "性别男" or tempName in "性别女" or "性别男" in tempName or "性别女" in tempName:
|
||||
name = re.search(r"(\S+)\s*姓名", text, re.DOTALL)
|
||||
result["name"] = name.group(1).strip()
|
||||
else:
|
||||
|
|
@ -107,9 +107,9 @@ class IdentityCardExtractor(Extractor):
|
|||
result["name"] = name.group(1).strip()
|
||||
|
||||
# 提取民族
|
||||
ethnicity = re.search(r"民\s*族\s*(\S+)", text, re.DOTALL)
|
||||
ethnicity = re.search(r"民\s*(族|旅)\s*(\S+)", text, re.DOTALL)
|
||||
if ethnicity:
|
||||
result["ethnicity"] = ethnicity.group(1).strip()
|
||||
result["ethnicity"] = ethnicity.group(2).strip()
|
||||
|
||||
# 提取地址
|
||||
address = re.search(r"(住址|佳址)(.*?)公民身份号码", text, re.DOTALL)
|
||||
|
|
@ -302,6 +302,15 @@ class IdentityCardExtractor(Extractor):
|
|||
# 居民身份证
|
||||
# 签发机关巴林右旗公安局
|
||||
# 有效期限2004.10.27-2024.10.26"""
|
||||
# text ="""唐昌梅
|
||||
# 姓名
|
||||
# 性别女民族苗
|
||||
# 出生1975年8月15日
|
||||
# 住址
|
||||
# 湖南省常德市鼎城区双桥
|
||||
# 坪镇全家坪村3组
|
||||
# 公民身份号码
|
||||
# 433030197508150820"""
|
||||
# extractor = IdentityCardExtractor()
|
||||
# jsonstring = extractor.extract_textbyPaddle(text)
|
||||
# print(jsonstring)
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
|||
res = result[idx]
|
||||
for line in res:
|
||||
text += (line[1][0] + '\n')
|
||||
else:
|
||||
logger.error(f"{filePath1} doesn't exist")
|
||||
|
||||
if os.path.exists(filePath2):
|
||||
result = ocr.ocr(filePath2, cls=False)
|
||||
|
|
@ -37,6 +39,9 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
|||
res = result[idx]
|
||||
for line in res:
|
||||
text += (line[1][0] + '\n')
|
||||
else:
|
||||
logger.error(f"{filePath2} doesn't exist")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
print(e)
|
||||
|
|
@ -49,6 +54,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
|||
end_time = time.time() # 记录结束时间
|
||||
execution_time = end_time - start_time # 计算执行时间
|
||||
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
|
||||
else:
|
||||
logger.error(f"{filePath1},{filePath2} can't be recognized")
|
||||
return jsonString
|
||||
|
||||
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
||||
|
|
@ -98,7 +105,7 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
|||
# #
|
||||
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
|
||||
# logger.info(f"test")
|
||||
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
|
||||
# jsonString = extractIdCardInfoByPath("./images/43302919641130423X_202311061953_front.jpg","./images/43302919641130423X_202311061953_back.jpg")#, "./images/江六斤反.jpg,./images/han.jpg
|
||||
# print(jsonString)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -116,9 +123,6 @@ if __name__ == "__main__":
|
|||
logger.info(f"开始执行sys.stdin.read")
|
||||
input_data = sys.stdin.read()
|
||||
logger.info(f"len(input_data):{len(input_data)}")
|
||||
# imageData = base64.b64decode(input_data)
|
||||
# logger.info(f"image_data1:{image_data1}")
|
||||
#logger.info(f"input_data:{input_data}")
|
||||
split_data = input_data.split(os.linesep)
|
||||
data1 = ""
|
||||
data2 = ""
|
||||
|
|
@ -128,8 +132,6 @@ if __name__ == "__main__":
|
|||
data2 = split_data[1]
|
||||
elif 1 == len(split_data):
|
||||
data1 = split_data[0]
|
||||
# logger.info(data1)
|
||||
# logger.info(data2)
|
||||
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
|
||||
print(jsonString)
|
||||
except KeyboardInterrupt:
|
||||
|
|
|
|||
Loading…
Reference in New Issue