enhance
This commit is contained in:
parent
be92b7cfde
commit
d5c882033a
|
|
@ -74,9 +74,9 @@ class IdentityCardExtractor(Extractor):
|
||||||
"address": "",
|
"address": "",
|
||||||
"idNumber": ""}
|
"idNumber": ""}
|
||||||
# 提取签发机关
|
# 提取签发机关
|
||||||
issuing_authority = re.search(r"签发机关\n*(.+?)\n", text, re.DOTALL)
|
issuing_authority = re.search(r"(签发机关|签发机美)\n*(.+?)\n", text, re.DOTALL)
|
||||||
if issuing_authority:
|
if issuing_authority:
|
||||||
result["issuingAuthority"] = issuing_authority.group(1).strip()
|
result["issuingAuthority"] = issuing_authority.group(2).strip()
|
||||||
|
|
||||||
# 提取有效期限
|
# 提取有效期限
|
||||||
valid_time = re.search(r"有效期限\n*(\d{4}\.\d{2}\.\d{2}-\S+)", text, re.DOTALL)
|
valid_time = re.search(r"有效期限\n*(\d{4}\.\d{2}\.\d{2}-\S+)", text, re.DOTALL)
|
||||||
|
|
@ -87,7 +87,7 @@ class IdentityCardExtractor(Extractor):
|
||||||
name = re.search(r"姓名\s*(.*?)\n", text,re.DOTALL)
|
name = re.search(r"姓名\s*(.*?)\n", text,re.DOTALL)
|
||||||
if name:
|
if name:
|
||||||
tempName = name.group(1).strip()
|
tempName = name.group(1).strip()
|
||||||
if tempName in "性别男" or tempName in "性别女":
|
if tempName in "性别男" or tempName in "性别女" or "性别男" in tempName or "性别女" in tempName:
|
||||||
name = re.search(r"(\S+)\s*姓名", text, re.DOTALL)
|
name = re.search(r"(\S+)\s*姓名", text, re.DOTALL)
|
||||||
result["name"] = name.group(1).strip()
|
result["name"] = name.group(1).strip()
|
||||||
else:
|
else:
|
||||||
|
|
@ -107,9 +107,9 @@ class IdentityCardExtractor(Extractor):
|
||||||
result["name"] = name.group(1).strip()
|
result["name"] = name.group(1).strip()
|
||||||
|
|
||||||
# 提取民族
|
# 提取民族
|
||||||
ethnicity = re.search(r"民\s*族\s*(\S+)", text, re.DOTALL)
|
ethnicity = re.search(r"民\s*(族|旅)\s*(\S+)", text, re.DOTALL)
|
||||||
if ethnicity:
|
if ethnicity:
|
||||||
result["ethnicity"] = ethnicity.group(1).strip()
|
result["ethnicity"] = ethnicity.group(2).strip()
|
||||||
|
|
||||||
# 提取地址
|
# 提取地址
|
||||||
address = re.search(r"(住址|佳址)(.*?)公民身份号码", text, re.DOTALL)
|
address = re.search(r"(住址|佳址)(.*?)公民身份号码", text, re.DOTALL)
|
||||||
|
|
@ -302,6 +302,15 @@ class IdentityCardExtractor(Extractor):
|
||||||
# 居民身份证
|
# 居民身份证
|
||||||
# 签发机关巴林右旗公安局
|
# 签发机关巴林右旗公安局
|
||||||
# 有效期限2004.10.27-2024.10.26"""
|
# 有效期限2004.10.27-2024.10.26"""
|
||||||
|
# text ="""唐昌梅
|
||||||
|
# 姓名
|
||||||
|
# 性别女民族苗
|
||||||
|
# 出生1975年8月15日
|
||||||
|
# 住址
|
||||||
|
# 湖南省常德市鼎城区双桥
|
||||||
|
# 坪镇全家坪村3组
|
||||||
|
# 公民身份号码
|
||||||
|
# 433030197508150820"""
|
||||||
# extractor = IdentityCardExtractor()
|
# extractor = IdentityCardExtractor()
|
||||||
# jsonstring = extractor.extract_textbyPaddle(text)
|
# jsonstring = extractor.extract_textbyPaddle(text)
|
||||||
# print(jsonstring)
|
# print(jsonstring)
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
||||||
res = result[idx]
|
res = result[idx]
|
||||||
for line in res:
|
for line in res:
|
||||||
text += (line[1][0] + '\n')
|
text += (line[1][0] + '\n')
|
||||||
|
else:
|
||||||
|
logger.error(f"{filePath1} doesn't exist")
|
||||||
|
|
||||||
if os.path.exists(filePath2):
|
if os.path.exists(filePath2):
|
||||||
result = ocr.ocr(filePath2, cls=False)
|
result = ocr.ocr(filePath2, cls=False)
|
||||||
|
|
@ -37,6 +39,9 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
||||||
res = result[idx]
|
res = result[idx]
|
||||||
for line in res:
|
for line in res:
|
||||||
text += (line[1][0] + '\n')
|
text += (line[1][0] + '\n')
|
||||||
|
else:
|
||||||
|
logger.error(f"{filePath2} doesn't exist")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
print(e)
|
print(e)
|
||||||
|
|
@ -49,6 +54,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
||||||
end_time = time.time() # 记录结束时间
|
end_time = time.time() # 记录结束时间
|
||||||
execution_time = end_time - start_time # 计算执行时间
|
execution_time = end_time - start_time # 计算执行时间
|
||||||
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
|
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
|
||||||
|
else:
|
||||||
|
logger.error(f"{filePath1},{filePath2} can't be recognized")
|
||||||
return jsonString
|
return jsonString
|
||||||
|
|
||||||
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
||||||
|
|
@ -98,7 +105,7 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
||||||
# #
|
# #
|
||||||
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
|
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
|
||||||
# logger.info(f"test")
|
# logger.info(f"test")
|
||||||
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
|
# jsonString = extractIdCardInfoByPath("./images/43302919641130423X_202311061953_front.jpg","./images/43302919641130423X_202311061953_back.jpg")#, "./images/江六斤反.jpg,./images/han.jpg
|
||||||
# print(jsonString)
|
# print(jsonString)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
@ -116,9 +123,6 @@ if __name__ == "__main__":
|
||||||
logger.info(f"开始执行sys.stdin.read")
|
logger.info(f"开始执行sys.stdin.read")
|
||||||
input_data = sys.stdin.read()
|
input_data = sys.stdin.read()
|
||||||
logger.info(f"len(input_data):{len(input_data)}")
|
logger.info(f"len(input_data):{len(input_data)}")
|
||||||
# imageData = base64.b64decode(input_data)
|
|
||||||
# logger.info(f"image_data1:{image_data1}")
|
|
||||||
#logger.info(f"input_data:{input_data}")
|
|
||||||
split_data = input_data.split(os.linesep)
|
split_data = input_data.split(os.linesep)
|
||||||
data1 = ""
|
data1 = ""
|
||||||
data2 = ""
|
data2 = ""
|
||||||
|
|
@ -128,8 +132,6 @@ if __name__ == "__main__":
|
||||||
data2 = split_data[1]
|
data2 = split_data[1]
|
||||||
elif 1 == len(split_data):
|
elif 1 == len(split_data):
|
||||||
data1 = split_data[0]
|
data1 = split_data[0]
|
||||||
# logger.info(data1)
|
|
||||||
# logger.info(data2)
|
|
||||||
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
|
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
|
||||||
print(jsonString)
|
print(jsonString)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue