From be92b7cfde527ddcbdc7082cef08ef721ca821ef Mon Sep 17 00:00:00 2001
From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com>
Date: Tue, 30 Apr 2024 10:43:16 +0800
Subject: [PATCH] fix the identity recognition issue for minority population

---
 OCRPython/extractor/identitycard_extractor.py | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/OCRPython/extractor/identitycard_extractor.py b/OCRPython/extractor/identitycard_extractor.py
index 352e289..2a9ea6c 100644
--- a/OCRPython/extractor/identitycard_extractor.py
+++ b/OCRPython/extractor/identitycard_extractor.py
@@ -64,7 +64,15 @@ class IdentityCardExtractor(Extractor):
 
     def extract_textbyPaddle(self, text:str)->dict:
         try:
-            result = {}
+            result = {
+                "issuingAuthority": "",
+                "validTime": "",
+                "name": "",
+                "gender": "",
+                "ethnicity": "",
+                "dateOfBirth": "",
+                "address": "",
+                "idNumber": ""}
             # 提取签发机关
             issuing_authority = re.search(r"签发机关\n*(.+?)\n", text, re.DOTALL)
             if issuing_authority:
@@ -80,7 +88,7 @@ class IdentityCardExtractor(Extractor):
             if name:
                 tempName = name.group(1).strip()
                 if tempName in "性别男" or tempName in "性别女":
-                    name = re.search(r"(.*?)\s*姓名", text, re.DOTALL)
+                    name = re.search(r"(\S+)\s*姓名", text, re.DOTALL)
                     result["name"] = name.group(1).strip()
                 else:
                     result["name"] = name.group(1).strip()
@@ -284,6 +292,16 @@ class IdentityCardExtractor(Extractor):
 # 公民身份号码
 # 513422200501044415
 # """
+#
+# text = """姓名苏龙格德·胡尔查巴特尔
+# 性别男民族蒙古
+# 出生1973年10支月27日
+# 内蒙古赤峰市巴林右旗沙布
+# 台苏木树中嘎查
+# 中华人民共和国
+# 居民身份证
+# 签发机关巴林右旗公安局
+# 有效期限2004.10.27-2024.10.26"""
 # extractor = IdentityCardExtractor()
 # jsonstring = extractor.extract_textbyPaddle(text)
 # print(jsonstring)