diff --git a/.DS_Store b/.DS_Store
index 2ab7314..309e0a5 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/OCRPython/.DS_Store b/OCRPython/.DS_Store
index 451627c..ba93859 100644
Binary files a/OCRPython/.DS_Store and b/OCRPython/.DS_Store differ
diff --git a/OCRPython/extractor/__pycache__/identitycard_extractor.cpython-39.pyc b/OCRPython/extractor/__pycache__/identitycard_extractor.cpython-39.pyc
index f53fd1e..7a6c1e4 100644
Binary files a/OCRPython/extractor/__pycache__/identitycard_extractor.cpython-39.pyc and b/OCRPython/extractor/__pycache__/identitycard_extractor.cpython-39.pyc differ
diff --git a/OCRPython/extractor/identitycard_extractor.py b/OCRPython/extractor/identitycard_extractor.py
index 1ac2a0c..0c72452 100644
--- a/OCRPython/extractor/identitycard_extractor.py
+++ b/OCRPython/extractor/identitycard_extractor.py
@@ -35,7 +35,7 @@ class IdentityCardExtractor(Extractor):
def extract_textbyPaddle(self, text:str)->dict:
try:
patterns = {
- "issuingAuthority": r"签发机关\n(.+?)\n",
+ "issuingAuthority": r"签发机关\n*(.+?)\n",
"validTime": r"有效期限\n*(.+?)\n",
"name": r"姓名(.*?)\n", #####
"gender": r"(\S)民族",
@@ -82,21 +82,21 @@ class InvoiceExtractor(Extractor):
# 幢2001室
# 公民身份号码
# 440203197306192118"""
-text = """
-中华人民共和国
-居民身份证
-oo
-签发机关宿州市公安局桥分局
-有效期限2023.01.18-2043.01.18
-姓名郭乾坤
-性别男民族汉
-出生1994年10月17日
-住址 安徽省宿州市场桥区朱仙
-庄镇郭庙村郭家组6号
-公民身份号码
-34220119941017327X
-"""
-extractor = IdentityCardExtractor()
-
-jsonstring = extractor.extract_textbyPaddle(text)
-print(jsonstring)
\ No newline at end of file
+# text = """
+# 中华人民共和国
+# 居民身份证
+# oo
+# 签发机关宿州市公安局桥分局
+# 有效期限2023.01.18-2043.01.18
+# 姓名郭乾坤
+# 性别男民族汉
+# 出生1994年10月17日
+# 住址 安徽省宿州市场桥区朱仙
+# 庄镇郭庙村郭家组6号
+# 公民身份号码
+# 34220119941017327X
+# """
+# extractor = IdentityCardExtractor()
+#
+# jsonstring = extractor.extract_textbyPaddle(text)
+# print(jsonstring)
\ No newline at end of file
diff --git a/OCRPython/logs/ocr_reconginition.log b/OCRPython/logs/ocr_reconginition.log
index c421abd..e69de29 100644
--- a/OCRPython/logs/ocr_reconginition.log
+++ b/OCRPython/logs/ocr_reconginition.log
@@ -1,224 +0,0 @@
-2024-04-17 14:28:11,092 - main.py[line:80] - INFO: main.py len of parameter: 2
-2024-04-17 14:28:11,093 - main.py[line:87] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 14:28:11,093 - main.py[line:88] - INFO: 1
-2024-04-17 14:28:11,093 - main.py[line:91] - INFO: 开始执行sys.stdin.read
-2024-04-17 14:28:31,648 - main.py[line:97] - INFO: 399921
-2024-04-17 14:29:55,152 - main.py[line:80] - INFO: main.py len of parameter: 2
-2024-04-17 14:29:55,152 - main.py[line:87] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 14:29:55,152 - main.py[line:88] - INFO: 1
-2024-04-17 14:29:55,152 - main.py[line:91] - INFO: 开始执行sys.stdin.read
-2024-04-17 14:29:55,154 - main.py[line:96] - INFO:
-2024-04-17 14:29:55,154 - main.py[line:97] - INFO: 399921
-2024-04-17 14:29:55,154 - main.py[line:106] - INFO: 199960
-2024-04-17 14:29:55,155 - main.py[line:107] - INFO: 199960
-2024-04-17 14:32:23,556 - main.py[line:80] - INFO: main.py len of parameter: 2
-2024-04-17 14:32:23,556 - main.py[line:88] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 14:32:23,556 - main.py[line:89] - INFO: 1
-2024-04-17 14:32:23,556 - main.py[line:92] - INFO: 开始执行sys.stdin.read
-2024-04-17 14:32:23,558 - main.py[line:97] - INFO:
-2024-04-17 14:32:23,558 - main.py[line:98] - INFO: 399921
-2024-04-17 14:32:23,559 - main.py[line:107] - INFO: 199960
-2024-04-17 14:32:23,559 - main.py[line:108] - INFO: 199960
-2024-04-17 14:32:23,559 - main.py[line:43] - INFO: extractIdCardInfoByBase64Data
-2024-04-17 14:32:23,559 - main.py[line:46] - INFO: not base64data1.empty()
-2024-04-17 14:32:23,562 - main.py[line:52] - INFO: not base64Data2.empty()
-2024-04-17 14:32:23,563 - main.py[line:58] - INFO: file1.png and file2.png exist
-2024-04-17 14:32:23,755 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 14:32:26,428 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 14:32:26,934 - main.py[line:37] - INFO: extractIdCardInfo 耗时3.37139892578125秒
-2024-04-17 14:32:26,934 - main.py[line:69] - INFO: extractIdCardInfo 耗时3.375098705291748秒
-2024-04-17 15:00:10,376 - main.py[line:91] - INFO: main.py len of parameter: 2
-2024-04-17 15:00:10,376 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 15:00:10,376 - main.py[line:100] - INFO: 1
-2024-04-17 15:00:10,376 - main.py[line:103] - INFO: 开始执行sys.stdin.read
-2024-04-17 15:00:10,378 - main.py[line:108] - INFO:
-2024-04-17 15:00:10,379 - main.py[line:109] - INFO: 399921
-2024-04-17 15:00:10,379 - main.py[line:118] - INFO: 199960
-2024-04-17 15:00:10,379 - main.py[line:119] - INFO: 199960
-2024-04-17 15:00:10,379 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
-2024-04-17 15:00:10,379 - main.py[line:51] - INFO: not base64data1.empty()
-2024-04-17 15:00:10,381 - main.py[line:57] - INFO: not base64Data2.empty()
-2024-04-17 15:00:10,382 - main.py[line:63] - INFO: file1.png and file2.png exist
-2024-04-17 15:00:10,578 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 15:00:13,327 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 15:00:13,839 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.456987142562866秒
-2024-04-17 15:00:13,840 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.460446834564209秒
-2024-04-17 15:01:33,280 - main.py[line:91] - INFO: main.py len of parameter: 2
-2024-04-17 15:01:33,280 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 15:01:33,280 - main.py[line:100] - INFO: 1
-2024-04-17 15:01:33,280 - main.py[line:103] - INFO: 开始执行sys.stdin.read
-2024-04-17 15:01:33,282 - main.py[line:108] - INFO:
-2024-04-17 15:01:33,283 - main.py[line:109] - INFO: 399921
-2024-04-17 15:01:33,283 - main.py[line:118] - INFO: 199960
-2024-04-17 15:01:33,283 - main.py[line:119] - INFO: 199960
-2024-04-17 15:01:33,283 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
-2024-04-17 15:01:33,283 - main.py[line:51] - INFO: not base64data1.empty()
-2024-04-17 15:01:33,285 - main.py[line:57] - INFO: not base64Data2.empty()
-2024-04-17 15:01:33,286 - main.py[line:63] - INFO: file1.png and file2.png exist
-2024-04-17 15:01:33,452 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 15:01:35,814 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 15:01:36,303 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.0165488719940186秒
-2024-04-17 15:01:36,303 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.0200371742248535秒
-2024-04-17 15:06:02,257 - main.py[line:91] - INFO: main.py len of parameter: 2
-2024-04-17 15:06:02,258 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 15:06:02,258 - main.py[line:100] - INFO: 1
-2024-04-17 15:06:02,258 - main.py[line:103] - INFO: 开始执行sys.stdin.read
-2024-04-17 15:09:35,113 - main.py[line:126] - ERROR: KeyboardInterrupt
-2024-04-17 15:10:50,601 - main.py[line:91] - INFO: main.py len of parameter: 2
-2024-04-17 15:10:50,601 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 15:10:50,602 - main.py[line:100] - INFO: 1
-2024-04-17 15:10:50,602 - main.py[line:103] - INFO: 开始执行sys.stdin.read
-2024-04-17 15:10:50,602 - main.py[line:108] - INFO:
-2024-04-17 15:10:50,603 - main.py[line:109] - INFO: 316256
-2024-04-17 15:10:50,603 - main.py[line:118] - INFO: 316256
-2024-04-17 15:10:50,603 - main.py[line:119] - INFO: 0
-2024-04-17 15:10:50,603 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
-2024-04-17 15:10:50,603 - main.py[line:51] - INFO: not base64data1.empty()
-2024-04-17 15:10:50,605 - main.py[line:68] - INFO: file1.png exist
-2024-04-17 15:10:50,775 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 15:10:54,797 - main.py[line:41] - INFO: extractIdCardInfo 耗时4.191910982131958秒
-2024-04-17 15:10:54,798 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时4.194473028182983秒
-2024-04-17 15:15:35,913 - main.py[line:91] - INFO: main.py len of parameter: 2
-2024-04-17 15:15:35,913 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 15:15:35,913 - main.py[line:100] - INFO: 0
-2024-04-17 15:15:35,913 - main.py[line:103] - INFO: 开始执行sys.stdin.read
-2024-04-17 15:15:35,913 - main.py[line:108] - INFO:
-2024-04-17 15:15:35,914 - main.py[line:109] - INFO: 56
-2024-04-17 15:15:35,914 - main.py[line:118] - INFO: 55
-2024-04-17 15:15:35,914 - main.py[line:119] - INFO: 0
-2024-04-17 15:15:36,072 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 15:15:38,824 - main.py[line:41] - INFO: extractIdCardInfo 耗时2.9099318981170654秒
-2024-04-17 15:41:57,020 - main.py[line:91] - INFO: main.py len of parameter: 2
-2024-04-17 15:41:57,021 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 15:41:57,021 - main.py[line:100] - INFO: 0
-2024-04-17 15:41:57,021 - main.py[line:103] - INFO: 开始执行sys.stdin.read
-2024-04-17 15:41:57,021 - main.py[line:108] - INFO:
-2024-04-17 15:41:57,021 - main.py[line:109] - INFO: 56
-2024-04-17 15:41:57,021 - main.py[line:118] - INFO: 55
-2024-04-17 15:41:57,021 - main.py[line:119] - INFO: 0
-2024-04-17 15:41:57,179 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 15:42:00,351 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.3298799991607666秒
-2024-04-17 17:35:41,366 - main.py[line:91] - INFO: main.py len of parameter: 2
-2024-04-17 17:35:41,366 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
-2024-04-17 17:35:41,366 - main.py[line:100] - INFO: 0
-2024-04-17 17:35:41,366 - main.py[line:103] - INFO: 开始执行sys.stdin.read
-2024-04-17 17:35:41,366 - main.py[line:108] - INFO:
-2024-04-17 17:35:41,366 - main.py[line:109] - INFO: 56
-2024-04-17 17:35:41,366 - main.py[line:118] - INFO: 55
-2024-04-17 17:35:41,366 - main.py[line:119] - INFO: 0
-2024-04-17 17:35:41,810 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-17 17:35:44,799 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.4327380657196045秒
-2024-04-18 14:28:41,233 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
-2024-04-18 14:28:41,233 - main.py[line:51] - INFO: not base64data1.empty()
-2024-04-18 14:28:41,235 - main.py[line:68] - INFO: file1.png exist
-2024-04-18 14:28:41,607 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-18 14:28:44,850 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.614640951156616秒
-2024-04-18 14:28:44,850 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.6167337894439697秒
-2024-04-18 17:20:03,651 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
-2024-04-18 17:20:03,651 - main.py[line:53] - INFO: not base64data1.empty()
-2024-04-18 17:20:03,653 - main.py[line:70] - INFO: file1.png exist
-2024-04-18 17:20:04,099 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-18 17:20:09,473 - main.py[line:42] - INFO: extractIdCardInfo 耗时5.820295810699463秒
-2024-04-18 17:20:09,473 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时5.822228193283081秒
-2024-04-19 08:58:12,184 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
-2024-04-19 08:58:12,185 - main.py[line:53] - INFO: not base64data1.empty()
-2024-04-19 08:58:12,186 - main.py[line:70] - INFO: file1.png exist
-2024-04-19 08:58:12,373 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-19 08:58:18,685 - main.py[line:42] - INFO: extractIdCardInfo 耗时6.498270034790039秒
-2024-04-19 08:58:18,685 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时6.499899864196777秒
-2024-04-19 09:00:57,608 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
-2024-04-19 09:00:57,609 - main.py[line:53] - INFO: not base64data1.empty()
-2024-04-19 09:00:57,609 - main.py[line:70] - INFO: file1.png exist
-2024-04-19 09:00:57,787 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-19 09:01:00,685 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.075079917907715秒
-2024-04-19 09:01:00,685 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时3.076277017593384秒
-2024-04-19 11:35:24,397 - main.py[line:38] - ERROR: unstructured package not found, please install it with `pip install unstructured`
-2024-04-19 11:35:24,398 - main.py[line:42] - INFO: extractIdCardInfo 耗时0.0006740093231201172秒
-2024-04-19 11:35:56,826 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-19 11:36:04,713 - main.py[line:42] - INFO: extractIdCardInfo 耗时8.266455888748169秒
-2024-04-19 11:36:11,025 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-19 11:36:25,929 - main.py[line:42] - INFO: extractIdCardInfo 耗时14.984557151794434秒
-2024-04-19 11:36:38,161 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-19 11:36:44,925 - main.py[line:42] - INFO: extractIdCardInfo 耗时6.842967987060547秒
-2024-04-21 11:10:25,753 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-21 11:11:01,024 - main.py[line:42] - INFO: extractIdCardInfo 耗时35.6481351852417秒
-2024-04-21 11:12:12,110 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-21 11:12:14,951 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.035506248474121秒
-2024-04-21 12:18:44,506 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-21 12:18:47,380 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.074634075164795秒
-2024-04-21 15:12:48,957 - main.py[line:96] - INFO: main.py len of parameter: 2
-2024-04-21 15:12:48,957 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
-2024-04-21 15:12:48,957 - main.py[line:105] - INFO: 0
-2024-04-21 15:12:48,957 - main.py[line:106] - INFO: 开始执行sys.stdin.read
-2024-04-21 15:12:48,957 - main.py[line:108] - INFO:
-2024-04-21 15:12:48,957 - main.py[line:109] - INFO: 56
-2024-04-21 15:12:48,957 - main.py[line:118] - INFO: 55
-2024-04-21 15:12:48,958 - main.py[line:119] - INFO: 0
-2024-04-21 15:12:48,958 - main.py[line:38] - ERROR: unstructured package not found, please install it with `pip install unstructured`
-2024-04-21 15:12:48,958 - main.py[line:42] - INFO: extractIdCardInfo 耗时8.0108642578125e-05秒
-2024-04-21 15:14:23,381 - main.py[line:96] - INFO: main.py len of parameter: 2
-2024-04-21 15:14:23,381 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
-2024-04-21 15:14:23,381 - main.py[line:105] - INFO: 0
-2024-04-21 15:14:23,381 - main.py[line:106] - INFO: 开始执行sys.stdin.read
-2024-04-21 15:14:23,382 - main.py[line:108] - INFO:
-2024-04-21 15:14:23,382 - main.py[line:109] - INFO: 56
-2024-04-21 15:14:23,382 - main.py[line:118] - INFO: 55
-2024-04-21 15:14:23,382 - main.py[line:119] - INFO: 0
-2024-04-21 15:14:23,653 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-21 15:14:55,795 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
-2024-04-21 15:14:55,796 - main.py[line:42] - INFO: extractIdCardInfo 耗时32.41406607627869秒
-2024-04-21 15:18:43,877 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-21 15:18:48,536 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
-2024-04-21 15:18:48,536 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.845187187194824秒
-2024-04-21 15:20:34,004 - main.py[line:96] - INFO: main.py len of parameter: 2
-2024-04-21 15:20:34,005 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
-2024-04-21 15:20:34,005 - main.py[line:105] - INFO: 0
-2024-04-21 15:20:34,005 - main.py[line:106] - INFO: 开始执行sys.stdin.read
-2024-04-21 15:20:34,005 - main.py[line:108] - INFO:
-2024-04-21 15:20:34,005 - main.py[line:109] - INFO: 56
-2024-04-21 15:20:34,005 - main.py[line:118] - INFO: 55
-2024-04-21 15:20:34,005 - main.py[line:119] - INFO: 0
-2024-04-21 15:20:34,176 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-21 15:20:36,793 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
-2024-04-21 15:20:36,793 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.7879230976104736秒
-2024-04-21 15:23:26,673 - main.py[line:96] - INFO: main.py len of parameter: 2
-2024-04-21 15:23:26,673 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
-2024-04-21 15:23:26,673 - main.py[line:105] - INFO: 0
-2024-04-21 15:23:26,673 - main.py[line:106] - INFO: 开始执行sys.stdin.read
-2024-04-21 15:23:26,673 - main.py[line:108] - INFO:
-2024-04-21 15:23:26,674 - main.py[line:109] - INFO: 56
-2024-04-21 15:23:26,674 - main.py[line:118] - INFO: 55
-2024-04-21 15:23:26,674 - main.py[line:119] - INFO: 0
-2024-04-21 15:23:26,856 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-21 15:23:36,818 - main.py[line:42] - INFO: extractIdCardInfo 耗时10.144625186920166秒
-2024-04-21 15:25:57,305 - main.py[line:96] - INFO: main.py len of parameter: 2
-2024-04-21 15:25:57,306 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
-2024-04-21 15:25:57,306 - main.py[line:105] - INFO: 0
-2024-04-21 15:25:57,306 - main.py[line:106] - INFO: 开始执行sys.stdin.read
-2024-04-21 15:25:57,306 - main.py[line:108] - INFO:
-2024-04-21 15:25:57,306 - main.py[line:109] - INFO: 56
-2024-04-21 15:25:57,306 - main.py[line:118] - INFO: 55
-2024-04-21 15:25:57,306 - main.py[line:119] - INFO: 0
-2024-04-21 15:25:57,483 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-21 15:26:00,173 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.866658926010132秒
-2024-04-22 11:01:10,204 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-22 11:01:14,447 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.689878702163696秒
-2024-04-22 11:01:54,639 - main.py[line:96] - INFO: main.py len of parameter: 2
-2024-04-22 11:01:54,640 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
-2024-04-22 11:01:54,640 - main.py[line:105] - INFO: 0
-2024-04-22 11:01:54,640 - main.py[line:106] - INFO: 开始执行sys.stdin.read
-2024-04-22 11:01:54,640 - main.py[line:108] - INFO:
-2024-04-22 11:01:54,640 - main.py[line:109] - INFO: 55
-2024-04-22 11:01:54,640 - main.py[line:118] - INFO: 55
-2024-04-22 11:01:54,640 - main.py[line:119] - INFO: 0
-2024-04-22 11:01:54,819 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
-2024-04-22 11:01:57,426 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.785860061645508秒
-2024-04-22 15:28:23,501 - maincopy.py[line:81] - ERROR: 'NoneType' object has no attribute 'strip'
-2024-04-22 16:18:56,272 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
-2024-04-22 16:21:44,808 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
-2024-04-22 16:28:20,400 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
-2024-04-22 16:28:20,400 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment
-2024-04-22 16:29:46,501 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
-2024-04-22 16:29:46,502 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment
-2024-04-22 16:32:09,871 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
-2024-04-22 16:32:09,872 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment
diff --git a/OCRPython/main.py b/OCRPython/main.py
index 0489c63..a818562 100644
--- a/OCRPython/main.py
+++ b/OCRPython/main.py
@@ -7,6 +7,7 @@ from configs.basic_config import logger
import base64
import time
import os
+import json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
@@ -33,7 +34,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
context += "\n".join([doc.page_content for doc in docs])
extractor = IdentityCardExtractor()
- jsonString = extractor.extract_text(context)
+ tempdict = extractor.extract_text(context)
+ json_string = json.dumps(tempdict, ensure_ascii=False)
except Exception as e:
logger.error(e)
diff --git a/OCRPython/maincopy.py b/OCRPython/maincopy.py
index 679d422..bd20fdc 100644
--- a/OCRPython/maincopy.py
+++ b/OCRPython/maincopy.py
@@ -1,12 +1,15 @@
# -*- coding: utf-8 -*-
import sys
+import io
import os
-from paddleocr import PaddleOCR, draw_ocr
+from paddleocr import PaddleOCR
import time
from configs.basic_config import logger
from extractor.identitycard_extractor import IdentityCardExtractor
import base64
+import json
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str:
if (0 == type):
return extractIdCardInfoByPath(filePath1, filePath2)
@@ -19,6 +22,7 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
text = ""
start_time = time.time() # 记录结束时间
+ jsonString = ""
try:
if os.path.exists(filePath1):
result = ocr.ocr(filePath1, cls=False)
@@ -40,7 +44,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
if 0 != len(text):
logger.info(f"text:{text}")
extractor = IdentityCardExtractor()
- jsonString = extractor.extract_textbyPaddle(text)
+ tempdict = extractor.extract_textbyPaddle(text)
+ jsonString = json.dumps(tempdict, ensure_ascii=False)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
@@ -62,7 +67,7 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
image_data2 = base64.b64decode(base64Data2)
with open("file2.png", "wb") as file:
file.write(image_data2)
-
+
if os.path.exists("file1.png") and os.path.exists("file2.png"):
logger.info(f"file1.png and file2.png exist")
jsonString = extractIdCardInfoByPath("file1.png","file2.png")
@@ -90,11 +95,12 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString)
-#
-#jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG","")
+# #
+# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
+# logger.info(f"test")
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString)
-
+#
if __name__ == "__main__":
try:
logger.info(f"main.py len of parameter: {len(sys.argv)}")
diff --git a/OCRTool/.DS_Store b/OCRTool/.DS_Store
index 62fee18..25efbf7 100644
Binary files a/OCRTool/.DS_Store and b/OCRTool/.DS_Store differ
diff --git a/OCRTool/.idea/workspace.xml b/OCRTool/.idea/workspace.xml
index 3d42762..c71d5c0 100644
--- a/OCRTool/.idea/workspace.xml
+++ b/OCRTool/.idea/workspace.xml
@@ -12,7 +12,29 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -69,7 +91,7 @@
"node.js.selected.package.eslint": "(autodetect)",
"node.js.selected.package.tslint": "(autodetect)",
"nodejs_package_manager_path": "npm",
- "project.structure.last.edited": "Project",
+ "project.structure.last.edited": "Problems",
"project.structure.proportion": "0.15",
"project.structure.side.proportion": "0.1150748",
"settings.editor.selected.configurable": "preferences.pluginManager",
@@ -84,7 +106,7 @@
-
+
@@ -93,6 +115,13 @@
+
+
+
+
+
+
+
@@ -118,7 +147,7 @@
-
+
@@ -155,7 +184,19 @@
@@ -168,19 +209,14 @@
-
- file://$PROJECT_DIR$/src/main/java/com/bonus/core/RecognitionController.java
- 9
-
-
file://$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java
- 28
+ 29
file://$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java
- 79
+ 90
diff --git a/OCRTool/OCRTool-1.0-SNAPSHOT.jar b/OCRTool/OCRTool-1.0-SNAPSHOT.jar
deleted file mode 100644
index 4cb787d..0000000
Binary files a/OCRTool/OCRTool-1.0-SNAPSHOT.jar and /dev/null differ
diff --git a/OCRTool/src/main/resources/libtesseract.a b/OCRTool/lib/libtesseract.a
similarity index 100%
rename from OCRTool/src/main/resources/libtesseract.a
rename to OCRTool/lib/libtesseract.a
diff --git a/OCRTool/src/main/resources/libtesseract.dylib b/OCRTool/lib/libtesseract.dylib
similarity index 100%
rename from OCRTool/src/main/resources/libtesseract.dylib
rename to OCRTool/lib/libtesseract.dylib
diff --git a/OCRTool/out/artifacts/OCRTool_jar/OCRTool.jar b/OCRTool/out/artifacts/OCRTool_jar/OCRTool.jar
index e39eddf..c3ce499 100644
Binary files a/OCRTool/out/artifacts/OCRTool_jar/OCRTool.jar and b/OCRTool/out/artifacts/OCRTool_jar/OCRTool.jar differ
diff --git a/OCRTool/pom.xml b/OCRTool/pom.xml
index d01b993..163a355 100644
--- a/OCRTool/pom.xml
+++ b/OCRTool/pom.xml
@@ -23,184 +23,32 @@
3.1.1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
org.springframework.boot
spring-boot-starter
-
-
-
-
-
-
-
-
-
-
-
-
-
-
org.springframework.boot
spring-boot-starter-web
-
-
- net.sourceforge.tess4j
- tess4j
- 4.5.1
-
-
- net.java.dev.jna
- jna
- 5.9.0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+ org.springframework.boot
+ spring-boot-maven-plugin
+
+
+
+ repackage
+
+
+
+
+
+
\ No newline at end of file
diff --git a/OCRTool/src/.DS_Store b/OCRTool/src/.DS_Store
index e6c26ab..91e1db4 100644
Binary files a/OCRTool/src/.DS_Store and b/OCRTool/src/.DS_Store differ
diff --git a/OCRTool/src/main/.DS_Store b/OCRTool/src/main/.DS_Store
index 344ecb0..1527838 100644
Binary files a/OCRTool/src/main/.DS_Store and b/OCRTool/src/main/.DS_Store differ
diff --git a/OCRTool/src/main/java/com/bonus/core/MySpringBootApplication.java b/OCRTool/src/main/java/com/bonus/core/MySpringBootApplication.java
index 0ed93d9..3c4146e 100644
--- a/OCRTool/src/main/java/com/bonus/core/MySpringBootApplication.java
+++ b/OCRTool/src/main/java/com/bonus/core/MySpringBootApplication.java
@@ -16,11 +16,11 @@ public class MySpringBootApplication {
context = SpringApplication.run(MySpringBootApplication.class, args);
MySpringBootApplication app = new MySpringBootApplication();
- //String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG";
- String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg";
- String filePath2 = "/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg";
- app.callExtractIdentityInfoByLocalPath(filePath1, filePath2);
- //app.callExtractIdentityInfoByBase64Data(filePath1, filePath2);
+ //String filePath1 = "E:\\OCRTool\\OCRPython\\images\\id_card.JPG";
+// String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg";
+// String filePath2 = "/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg";
+ //app.callExtractIdentityInfoByLocalPath(filePath1, "");
+ app.callExtractIdentityInfoByBase64Data(filePath1, filePath2);
}
private static ApplicationContext context;
diff --git a/OCRTool/src/main/java/com/bonus/core/OCRRecognition.java b/OCRTool/src/main/java/com/bonus/core/OCRRecognition.java
index 1053fd0..9a5e28d 100644
--- a/OCRTool/src/main/java/com/bonus/core/OCRRecognition.java
+++ b/OCRTool/src/main/java/com/bonus/core/OCRRecognition.java
@@ -4,9 +4,9 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
+import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
-import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@@ -22,9 +22,10 @@ public class OCRRecognition {
public String extractInfo(IdentifyRecognitionParams recognitionParams){
Process proc;
List stringList = new ArrayList<>();
- String lastElement = null;
+ String lastElement = "";
int offset = 0;
- int batchSize = 8092; // 设置每次写入的批次大小
+ // 设置每次写入的批次大小
+ int batchSize = 8092;
try {
System.out.println("开始打印从配置里读取的值:");
System.out.println(this.configure.getPythonEnv());
@@ -35,7 +36,8 @@ public class OCRRecognition {
System.out.println(recognitionParams.getRecognitionData1());
System.out.println("参数2的base64 string 是:");
System.out.println(recognitionParams.getRecognitionData2());
- String[] str = new String[]{this.configure.getPythonEnv(),this.configure.getScriptEnv(), String.valueOf(recognitionParams.getType().ordinal())/*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/};//
+ //*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/
+ String[] str = new String[]{this.configure.getPythonEnv(),this.configure.getScriptEnv(), String.valueOf(recognitionParams.getType().ordinal())};
Runtime runtime = Runtime.getRuntime();
if (runtime == null) {
System.out.println("runtime == null");
@@ -46,31 +48,40 @@ public class OCRRecognition {
OutputStream outputStream = proc.getOutputStream();
if (!recognitionParams.getRecognitionData1().isEmpty()) {
byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
- long len = input.length;
-
- while (offset < len) {
- int remaining = (int) (len - offset);
- int bytesToWrite = Math.min(remaining, batchSize);
- outputStream.write(input, offset, bytesToWrite);
- offset += bytesToWrite;
- }
+ passParameter(outputStream, input);
}
if (!recognitionParams.getRecognitionData2().isEmpty()) {
- outputStream.write(System.lineSeparator().getBytes());
- byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
- long len = input.length;
- while (offset < len) {
- int remaining = (int) (len - offset);
- int bytesToWrite = Math.min(remaining, batchSize);
- outputStream.write(input, offset, bytesToWrite);
- offset += bytesToWrite;
- }
+ byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
+ passParameter(outputStream, input);
}
+
+// if (!recognitionParams.getRecognitionData1().isEmpty()) {
+// byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
+// long len = input.length;
+//
+// while (offset < len) {
+// int remaining = (int) (len - offset);
+// int bytesToWrite = Math.min(remaining, batchSize);
+// outputStream.write(input, offset, bytesToWrite);
+// offset += bytesToWrite;
+// }
+// }
+// if (!recognitionParams.getRecognitionData2().isEmpty()) {
+// outputStream.write(System.lineSeparator().getBytes());
+// byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
+// long len = input.length;
+// while (offset < len) {
+// int remaining = (int) (len - offset);
+// int bytesToWrite = Math.min(remaining, batchSize);
+// outputStream.write(input, offset, bytesToWrite);
+// offset += bytesToWrite;
+// }
+//
outputStream.flush();
outputStream.close();
-
BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream(), StandardCharsets.UTF_8));
String line = null;
+
while ((line = in.readLine()) != null) {
stringList.add(line);
}
@@ -85,6 +96,20 @@ public class OCRRecognition {
return lastElement;
}
+ private void passParameter(OutputStream outputStream, byte[] input) throws IOException {
+ int offset = 0;
+ // 设置每次写入的批次大小
+ int batchSize = 8092;
+ long len = input.length;
+
+ while (offset < len) {
+ int remaining = (int) (len - offset);
+ int bytesToWrite = Math.min(remaining, batchSize);
+ outputStream.write(input, offset, bytesToWrite);
+ offset += bytesToWrite;
+ }
+ }
+
}
diff --git a/OCRTool/src/main/java/com/bonus/core/RecognitionController.java b/OCRTool/src/main/java/com/bonus/core/RecognitionController.java
index 70a0375..b0c2a39 100644
--- a/OCRTool/src/main/java/com/bonus/core/RecognitionController.java
+++ b/OCRTool/src/main/java/com/bonus/core/RecognitionController.java
@@ -5,20 +5,12 @@ import org.springframework.web.bind.annotation.*;
@RestController
public class RecognitionController {
-// @GetMapping("/hello")
-// public String hello() {
-// return "Hello, World!";
-// }
@Autowired OCRRecognition ocrRecognition;
@PostMapping("/recognition")
public String recognition(@RequestBody IdentifyRecognitionParams identifyRecognitionParams){
- String javaString = "";
IdentifyRecognitionParams.RecognitionType type = identifyRecognitionParams.getType();
- String idData1 = identifyRecognitionParams.getRecognitionData1();
- String idData2 = identifyRecognitionParams.getRecognitionData2();
- javaString = ocrRecognition.extractInfo(identifyRecognitionParams);
- return javaString;
+ return ocrRecognition.extractInfo(identifyRecognitionParams);
}
}
diff --git a/OCRTool/src/main/resources/.DS_Store b/OCRTool/src/main/resources/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/OCRTool/src/main/resources/.DS_Store differ
diff --git a/OCRTool/src/main/resources/application.yml b/OCRTool/src/main/resources/application.yml
index 2423790..bf88177 100644
--- a/OCRTool/src/main/resources/application.yml
+++ b/OCRTool/src/main/resources/application.yml
@@ -1,7 +1,3 @@
-spring:
- profiles:
- active=dev:
-
recognition:
# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python
# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py
diff --git a/OCRTool/target/OCRTool-1.0-SNAPSHOT.jar b/OCRTool/target/OCRTool-1.0-SNAPSHOT.jar
index 161570f..8d7a2ba 100644
Binary files a/OCRTool/target/OCRTool-1.0-SNAPSHOT.jar and b/OCRTool/target/OCRTool-1.0-SNAPSHOT.jar differ
diff --git a/OCRTool/target/classes/application.yml b/OCRTool/target/classes/application.yml
index 2423790..bf88177 100644
--- a/OCRTool/target/classes/application.yml
+++ b/OCRTool/target/classes/application.yml
@@ -1,7 +1,3 @@
-spring:
- profiles:
- active=dev:
-
recognition:
# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python
# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py
diff --git a/OCRTool/target/classes/com/bonus/core/MySpringBootApplication.class b/OCRTool/target/classes/com/bonus/core/MySpringBootApplication.class
index 57624bd..aa5196f 100644
Binary files a/OCRTool/target/classes/com/bonus/core/MySpringBootApplication.class and b/OCRTool/target/classes/com/bonus/core/MySpringBootApplication.class differ
diff --git a/OCRTool/target/classes/com/bonus/core/OCRRecognition.class b/OCRTool/target/classes/com/bonus/core/OCRRecognition.class
index 0a2a9cf..07d35cd 100644
Binary files a/OCRTool/target/classes/com/bonus/core/OCRRecognition.class and b/OCRTool/target/classes/com/bonus/core/OCRRecognition.class differ
diff --git a/OCRTool/target/classes/com/bonus/core/RecognitionController.class b/OCRTool/target/classes/com/bonus/core/RecognitionController.class
index 6cb9e01..3e49919 100644
Binary files a/OCRTool/target/classes/com/bonus/core/RecognitionController.class and b/OCRTool/target/classes/com/bonus/core/RecognitionController.class differ
diff --git a/OCRTool/target/classes/libtesseract.a b/OCRTool/target/classes/libtesseract.a
deleted file mode 100644
index e17296d..0000000
Binary files a/OCRTool/target/classes/libtesseract.a and /dev/null differ
diff --git a/OCRTool/target/classes/libtesseract.dylib b/OCRTool/target/classes/libtesseract.dylib
deleted file mode 100644
index 629f7df..0000000
Binary files a/OCRTool/target/classes/libtesseract.dylib and /dev/null differ