fix the identity recognition issue for minority population
This commit is contained in:
parent
4e920f5542
commit
be92b7cfde
|
|
@ -64,7 +64,15 @@ class IdentityCardExtractor(Extractor):
|
|||
|
||||
def extract_textbyPaddle(self, text:str)->dict:
|
||||
try:
|
||||
result = {}
|
||||
result = {
|
||||
"issuingAuthority": "",
|
||||
"validTime": "",
|
||||
"name": "",
|
||||
"gender": "",
|
||||
"ethnicity": "",
|
||||
"dateOfBirth": "",
|
||||
"address": "",
|
||||
"idNumber": ""}
|
||||
# 提取签发机关
|
||||
issuing_authority = re.search(r"签发机关\n*(.+?)\n", text, re.DOTALL)
|
||||
if issuing_authority:
|
||||
|
|
@ -80,7 +88,7 @@ class IdentityCardExtractor(Extractor):
|
|||
if name:
|
||||
tempName = name.group(1).strip()
|
||||
if tempName in "性别男" or tempName in "性别女":
|
||||
name = re.search(r"(.*?)\s*姓名", text, re.DOTALL)
|
||||
name = re.search(r"(\S+)\s*姓名", text, re.DOTALL)
|
||||
result["name"] = name.group(1).strip()
|
||||
else:
|
||||
result["name"] = name.group(1).strip()
|
||||
|
|
@ -284,6 +292,16 @@ class IdentityCardExtractor(Extractor):
|
|||
# 公民身份号码
|
||||
# 513422200501044415
|
||||
# """
|
||||
#
|
||||
# text = """姓名苏龙格德·胡尔查巴特尔
|
||||
# 性别男民族蒙古
|
||||
# 出生1973年10支月27日
|
||||
# 内蒙古赤峰市巴林右旗沙布
|
||||
# 台苏木树中嘎查
|
||||
# 中华人民共和国
|
||||
# 居民身份证
|
||||
# 签发机关巴林右旗公安局
|
||||
# 有效期限2004.10.27-2024.10.26"""
|
||||
# extractor = IdentityCardExtractor()
|
||||
# jsonstring = extractor.extract_textbyPaddle(text)
|
||||
# print(jsonstring)
|
||||
|
|
|
|||
Loading…
Reference in New Issue