diff --git a/.DS_Store b/.DS_Store index 2ab7314..309e0a5 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/OCRPython/.DS_Store b/OCRPython/.DS_Store index 451627c..ba93859 100644 Binary files a/OCRPython/.DS_Store and b/OCRPython/.DS_Store differ diff --git a/OCRPython/extractor/__pycache__/identitycard_extractor.cpython-39.pyc b/OCRPython/extractor/__pycache__/identitycard_extractor.cpython-39.pyc index f53fd1e..7a6c1e4 100644 Binary files a/OCRPython/extractor/__pycache__/identitycard_extractor.cpython-39.pyc and b/OCRPython/extractor/__pycache__/identitycard_extractor.cpython-39.pyc differ diff --git a/OCRPython/extractor/identitycard_extractor.py b/OCRPython/extractor/identitycard_extractor.py index 1ac2a0c..0c72452 100644 --- a/OCRPython/extractor/identitycard_extractor.py +++ b/OCRPython/extractor/identitycard_extractor.py @@ -35,7 +35,7 @@ class IdentityCardExtractor(Extractor): def extract_textbyPaddle(self, text:str)->dict: try: patterns = { - "issuingAuthority": r"签发机关\n(.+?)\n", + "issuingAuthority": r"签发机关\n*(.+?)\n", "validTime": r"有效期限\n*(.+?)\n", "name": r"姓名(.*?)\n", ##### "gender": r"(\S)民族", @@ -82,21 +82,21 @@ class InvoiceExtractor(Extractor): # 幢2001室 # 公民身份号码 # 440203197306192118""" -text = """ -中华人民共和国 -居民身份证 -oo -签发机关宿州市公安局桥分局 -有效期限2023.01.18-2043.01.18 -姓名郭乾坤 -性别男民族汉 -出生1994年10月17日 -住址 安徽省宿州市场桥区朱仙 -庄镇郭庙村郭家组6号 -公民身份号码 -34220119941017327X -""" -extractor = IdentityCardExtractor() - -jsonstring = extractor.extract_textbyPaddle(text) -print(jsonstring) \ No newline at end of file +# text = """ +# 中华人民共和国 +# 居民身份证 +# oo +# 签发机关宿州市公安局桥分局 +# 有效期限2023.01.18-2043.01.18 +# 姓名郭乾坤 +# 性别男民族汉 +# 出生1994年10月17日 +# 住址 安徽省宿州市场桥区朱仙 +# 庄镇郭庙村郭家组6号 +# 公民身份号码 +# 34220119941017327X +# """ +# extractor = IdentityCardExtractor() +# +# jsonstring = extractor.extract_textbyPaddle(text) +# print(jsonstring) \ No newline at end of file diff --git a/OCRPython/logs/ocr_reconginition.log b/OCRPython/logs/ocr_reconginition.log index c421abd..e69de29 100644 --- a/OCRPython/logs/ocr_reconginition.log +++ b/OCRPython/logs/ocr_reconginition.log @@ -1,224 +0,0 @@ -2024-04-17 14:28:11,092 - main.py[line:80] - INFO: main.py len of parameter: 2 -2024-04-17 14:28:11,093 - main.py[line:87] - INFO: python 脚本里的接收到的参数是: -2024-04-17 14:28:11,093 - main.py[line:88] - INFO: 1 -2024-04-17 14:28:11,093 - main.py[line:91] - INFO: 开始执行sys.stdin.read -2024-04-17 14:28:31,648 - main.py[line:97] - INFO: 399921 -2024-04-17 14:29:55,152 - main.py[line:80] - INFO: main.py len of parameter: 2 -2024-04-17 14:29:55,152 - main.py[line:87] - INFO: python 脚本里的接收到的参数是: -2024-04-17 14:29:55,152 - main.py[line:88] - INFO: 1 -2024-04-17 14:29:55,152 - main.py[line:91] - INFO: 开始执行sys.stdin.read -2024-04-17 14:29:55,154 - main.py[line:96] - INFO: -2024-04-17 14:29:55,154 - main.py[line:97] - INFO: 399921 -2024-04-17 14:29:55,154 - main.py[line:106] - INFO: 199960 -2024-04-17 14:29:55,155 - main.py[line:107] - INFO: 199960 -2024-04-17 14:32:23,556 - main.py[line:80] - INFO: main.py len of parameter: 2 -2024-04-17 14:32:23,556 - main.py[line:88] - INFO: python 脚本里的接收到的参数是: -2024-04-17 14:32:23,556 - main.py[line:89] - INFO: 1 -2024-04-17 14:32:23,556 - main.py[line:92] - INFO: 开始执行sys.stdin.read -2024-04-17 14:32:23,558 - main.py[line:97] - INFO: -2024-04-17 14:32:23,558 - main.py[line:98] - INFO: 399921 -2024-04-17 14:32:23,559 - main.py[line:107] - INFO: 199960 -2024-04-17 14:32:23,559 - main.py[line:108] - INFO: 199960 -2024-04-17 14:32:23,559 - main.py[line:43] - INFO: extractIdCardInfoByBase64Data -2024-04-17 14:32:23,559 - main.py[line:46] - INFO: not base64data1.empty() -2024-04-17 14:32:23,562 - main.py[line:52] - INFO: not base64Data2.empty() -2024-04-17 14:32:23,563 - main.py[line:58] - INFO: file1.png and file2.png exist -2024-04-17 14:32:23,755 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 14:32:26,428 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 14:32:26,934 - main.py[line:37] - INFO: extractIdCardInfo 耗时3.37139892578125秒 -2024-04-17 14:32:26,934 - main.py[line:69] - INFO: extractIdCardInfo 耗时3.375098705291748秒 -2024-04-17 15:00:10,376 - main.py[line:91] - INFO: main.py len of parameter: 2 -2024-04-17 15:00:10,376 - main.py[line:99] - INFO: python 脚本里的接收到的参数是: -2024-04-17 15:00:10,376 - main.py[line:100] - INFO: 1 -2024-04-17 15:00:10,376 - main.py[line:103] - INFO: 开始执行sys.stdin.read -2024-04-17 15:00:10,378 - main.py[line:108] - INFO: -2024-04-17 15:00:10,379 - main.py[line:109] - INFO: 399921 -2024-04-17 15:00:10,379 - main.py[line:118] - INFO: 199960 -2024-04-17 15:00:10,379 - main.py[line:119] - INFO: 199960 -2024-04-17 15:00:10,379 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data -2024-04-17 15:00:10,379 - main.py[line:51] - INFO: not base64data1.empty() -2024-04-17 15:00:10,381 - main.py[line:57] - INFO: not base64Data2.empty() -2024-04-17 15:00:10,382 - main.py[line:63] - INFO: file1.png and file2.png exist -2024-04-17 15:00:10,578 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 15:00:13,327 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 15:00:13,839 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.456987142562866秒 -2024-04-17 15:00:13,840 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.460446834564209秒 -2024-04-17 15:01:33,280 - main.py[line:91] - INFO: main.py len of parameter: 2 -2024-04-17 15:01:33,280 - main.py[line:99] - INFO: python 脚本里的接收到的参数是: -2024-04-17 15:01:33,280 - main.py[line:100] - INFO: 1 -2024-04-17 15:01:33,280 - main.py[line:103] - INFO: 开始执行sys.stdin.read -2024-04-17 15:01:33,282 - main.py[line:108] - INFO: -2024-04-17 15:01:33,283 - main.py[line:109] - INFO: 399921 -2024-04-17 15:01:33,283 - main.py[line:118] - INFO: 199960 -2024-04-17 15:01:33,283 - main.py[line:119] - INFO: 199960 -2024-04-17 15:01:33,283 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data -2024-04-17 15:01:33,283 - main.py[line:51] - INFO: not base64data1.empty() -2024-04-17 15:01:33,285 - main.py[line:57] - INFO: not base64Data2.empty() -2024-04-17 15:01:33,286 - main.py[line:63] - INFO: file1.png and file2.png exist -2024-04-17 15:01:33,452 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 15:01:35,814 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 15:01:36,303 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.0165488719940186秒 -2024-04-17 15:01:36,303 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.0200371742248535秒 -2024-04-17 15:06:02,257 - main.py[line:91] - INFO: main.py len of parameter: 2 -2024-04-17 15:06:02,258 - main.py[line:99] - INFO: python 脚本里的接收到的参数是: -2024-04-17 15:06:02,258 - main.py[line:100] - INFO: 1 -2024-04-17 15:06:02,258 - main.py[line:103] - INFO: 开始执行sys.stdin.read -2024-04-17 15:09:35,113 - main.py[line:126] - ERROR: KeyboardInterrupt -2024-04-17 15:10:50,601 - main.py[line:91] - INFO: main.py len of parameter: 2 -2024-04-17 15:10:50,601 - main.py[line:99] - INFO: python 脚本里的接收到的参数是: -2024-04-17 15:10:50,602 - main.py[line:100] - INFO: 1 -2024-04-17 15:10:50,602 - main.py[line:103] - INFO: 开始执行sys.stdin.read -2024-04-17 15:10:50,602 - main.py[line:108] - INFO: -2024-04-17 15:10:50,603 - main.py[line:109] - INFO: 316256 -2024-04-17 15:10:50,603 - main.py[line:118] - INFO: 316256 -2024-04-17 15:10:50,603 - main.py[line:119] - INFO: 0 -2024-04-17 15:10:50,603 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data -2024-04-17 15:10:50,603 - main.py[line:51] - INFO: not base64data1.empty() -2024-04-17 15:10:50,605 - main.py[line:68] - INFO: file1.png exist -2024-04-17 15:10:50,775 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 15:10:54,797 - main.py[line:41] - INFO: extractIdCardInfo 耗时4.191910982131958秒 -2024-04-17 15:10:54,798 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时4.194473028182983秒 -2024-04-17 15:15:35,913 - main.py[line:91] - INFO: main.py len of parameter: 2 -2024-04-17 15:15:35,913 - main.py[line:99] - INFO: python 脚本里的接收到的参数是: -2024-04-17 15:15:35,913 - main.py[line:100] - INFO: 0 -2024-04-17 15:15:35,913 - main.py[line:103] - INFO: 开始执行sys.stdin.read -2024-04-17 15:15:35,913 - main.py[line:108] - INFO: -2024-04-17 15:15:35,914 - main.py[line:109] - INFO: 56 -2024-04-17 15:15:35,914 - main.py[line:118] - INFO: 55 -2024-04-17 15:15:35,914 - main.py[line:119] - INFO: 0 -2024-04-17 15:15:36,072 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 15:15:38,824 - main.py[line:41] - INFO: extractIdCardInfo 耗时2.9099318981170654秒 -2024-04-17 15:41:57,020 - main.py[line:91] - INFO: main.py len of parameter: 2 -2024-04-17 15:41:57,021 - main.py[line:99] - INFO: python 脚本里的接收到的参数是: -2024-04-17 15:41:57,021 - main.py[line:100] - INFO: 0 -2024-04-17 15:41:57,021 - main.py[line:103] - INFO: 开始执行sys.stdin.read -2024-04-17 15:41:57,021 - main.py[line:108] - INFO: -2024-04-17 15:41:57,021 - main.py[line:109] - INFO: 56 -2024-04-17 15:41:57,021 - main.py[line:118] - INFO: 55 -2024-04-17 15:41:57,021 - main.py[line:119] - INFO: 0 -2024-04-17 15:41:57,179 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 15:42:00,351 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.3298799991607666秒 -2024-04-17 17:35:41,366 - main.py[line:91] - INFO: main.py len of parameter: 2 -2024-04-17 17:35:41,366 - main.py[line:99] - INFO: python 脚本里的接收到的参数是: -2024-04-17 17:35:41,366 - main.py[line:100] - INFO: 0 -2024-04-17 17:35:41,366 - main.py[line:103] - INFO: 开始执行sys.stdin.read -2024-04-17 17:35:41,366 - main.py[line:108] - INFO: -2024-04-17 17:35:41,366 - main.py[line:109] - INFO: 56 -2024-04-17 17:35:41,366 - main.py[line:118] - INFO: 55 -2024-04-17 17:35:41,366 - main.py[line:119] - INFO: 0 -2024-04-17 17:35:41,810 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-17 17:35:44,799 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.4327380657196045秒 -2024-04-18 14:28:41,233 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data -2024-04-18 14:28:41,233 - main.py[line:51] - INFO: not base64data1.empty() -2024-04-18 14:28:41,235 - main.py[line:68] - INFO: file1.png exist -2024-04-18 14:28:41,607 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-18 14:28:44,850 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.614640951156616秒 -2024-04-18 14:28:44,850 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.6167337894439697秒 -2024-04-18 17:20:03,651 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data -2024-04-18 17:20:03,651 - main.py[line:53] - INFO: not base64data1.empty() -2024-04-18 17:20:03,653 - main.py[line:70] - INFO: file1.png exist -2024-04-18 17:20:04,099 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-18 17:20:09,473 - main.py[line:42] - INFO: extractIdCardInfo 耗时5.820295810699463秒 -2024-04-18 17:20:09,473 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时5.822228193283081秒 -2024-04-19 08:58:12,184 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data -2024-04-19 08:58:12,185 - main.py[line:53] - INFO: not base64data1.empty() -2024-04-19 08:58:12,186 - main.py[line:70] - INFO: file1.png exist -2024-04-19 08:58:12,373 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-19 08:58:18,685 - main.py[line:42] - INFO: extractIdCardInfo 耗时6.498270034790039秒 -2024-04-19 08:58:18,685 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时6.499899864196777秒 -2024-04-19 09:00:57,608 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data -2024-04-19 09:00:57,609 - main.py[line:53] - INFO: not base64data1.empty() -2024-04-19 09:00:57,609 - main.py[line:70] - INFO: file1.png exist -2024-04-19 09:00:57,787 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-19 09:01:00,685 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.075079917907715秒 -2024-04-19 09:01:00,685 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时3.076277017593384秒 -2024-04-19 11:35:24,397 - main.py[line:38] - ERROR: unstructured package not found, please install it with `pip install unstructured` -2024-04-19 11:35:24,398 - main.py[line:42] - INFO: extractIdCardInfo 耗时0.0006740093231201172秒 -2024-04-19 11:35:56,826 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-19 11:36:04,713 - main.py[line:42] - INFO: extractIdCardInfo 耗时8.266455888748169秒 -2024-04-19 11:36:11,025 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-19 11:36:25,929 - main.py[line:42] - INFO: extractIdCardInfo 耗时14.984557151794434秒 -2024-04-19 11:36:38,161 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-19 11:36:44,925 - main.py[line:42] - INFO: extractIdCardInfo 耗时6.842967987060547秒 -2024-04-21 11:10:25,753 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-21 11:11:01,024 - main.py[line:42] - INFO: extractIdCardInfo 耗时35.6481351852417秒 -2024-04-21 11:12:12,110 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-21 11:12:14,951 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.035506248474121秒 -2024-04-21 12:18:44,506 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-21 12:18:47,380 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.074634075164795秒 -2024-04-21 15:12:48,957 - main.py[line:96] - INFO: main.py len of parameter: 2 -2024-04-21 15:12:48,957 - main.py[line:104] - INFO: python 脚本里的接收到的参数是: -2024-04-21 15:12:48,957 - main.py[line:105] - INFO: 0 -2024-04-21 15:12:48,957 - main.py[line:106] - INFO: 开始执行sys.stdin.read -2024-04-21 15:12:48,957 - main.py[line:108] - INFO: -2024-04-21 15:12:48,957 - main.py[line:109] - INFO: 56 -2024-04-21 15:12:48,957 - main.py[line:118] - INFO: 55 -2024-04-21 15:12:48,958 - main.py[line:119] - INFO: 0 -2024-04-21 15:12:48,958 - main.py[line:38] - ERROR: unstructured package not found, please install it with `pip install unstructured` -2024-04-21 15:12:48,958 - main.py[line:42] - INFO: extractIdCardInfo 耗时8.0108642578125e-05秒 -2024-04-21 15:14:23,381 - main.py[line:96] - INFO: main.py len of parameter: 2 -2024-04-21 15:14:23,381 - main.py[line:104] - INFO: python 脚本里的接收到的参数是: -2024-04-21 15:14:23,381 - main.py[line:105] - INFO: 0 -2024-04-21 15:14:23,381 - main.py[line:106] - INFO: 开始执行sys.stdin.read -2024-04-21 15:14:23,382 - main.py[line:108] - INFO: -2024-04-21 15:14:23,382 - main.py[line:109] - INFO: 56 -2024-04-21 15:14:23,382 - main.py[line:118] - INFO: 55 -2024-04-21 15:14:23,382 - main.py[line:119] - INFO: 0 -2024-04-21 15:14:23,653 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-21 15:14:55,795 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip' -2024-04-21 15:14:55,796 - main.py[line:42] - INFO: extractIdCardInfo 耗时32.41406607627869秒 -2024-04-21 15:18:43,877 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-21 15:18:48,536 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip' -2024-04-21 15:18:48,536 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.845187187194824秒 -2024-04-21 15:20:34,004 - main.py[line:96] - INFO: main.py len of parameter: 2 -2024-04-21 15:20:34,005 - main.py[line:104] - INFO: python 脚本里的接收到的参数是: -2024-04-21 15:20:34,005 - main.py[line:105] - INFO: 0 -2024-04-21 15:20:34,005 - main.py[line:106] - INFO: 开始执行sys.stdin.read -2024-04-21 15:20:34,005 - main.py[line:108] - INFO: -2024-04-21 15:20:34,005 - main.py[line:109] - INFO: 56 -2024-04-21 15:20:34,005 - main.py[line:118] - INFO: 55 -2024-04-21 15:20:34,005 - main.py[line:119] - INFO: 0 -2024-04-21 15:20:34,176 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-21 15:20:36,793 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip' -2024-04-21 15:20:36,793 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.7879230976104736秒 -2024-04-21 15:23:26,673 - main.py[line:96] - INFO: main.py len of parameter: 2 -2024-04-21 15:23:26,673 - main.py[line:104] - INFO: python 脚本里的接收到的参数是: -2024-04-21 15:23:26,673 - main.py[line:105] - INFO: 0 -2024-04-21 15:23:26,673 - main.py[line:106] - INFO: 开始执行sys.stdin.read -2024-04-21 15:23:26,673 - main.py[line:108] - INFO: -2024-04-21 15:23:26,674 - main.py[line:109] - INFO: 56 -2024-04-21 15:23:26,674 - main.py[line:118] - INFO: 55 -2024-04-21 15:23:26,674 - main.py[line:119] - INFO: 0 -2024-04-21 15:23:26,856 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-21 15:23:36,818 - main.py[line:42] - INFO: extractIdCardInfo 耗时10.144625186920166秒 -2024-04-21 15:25:57,305 - main.py[line:96] - INFO: main.py len of parameter: 2 -2024-04-21 15:25:57,306 - main.py[line:104] - INFO: python 脚本里的接收到的参数是: -2024-04-21 15:25:57,306 - main.py[line:105] - INFO: 0 -2024-04-21 15:25:57,306 - main.py[line:106] - INFO: 开始执行sys.stdin.read -2024-04-21 15:25:57,306 - main.py[line:108] - INFO: -2024-04-21 15:25:57,306 - main.py[line:109] - INFO: 56 -2024-04-21 15:25:57,306 - main.py[line:118] - INFO: 55 -2024-04-21 15:25:57,306 - main.py[line:119] - INFO: 0 -2024-04-21 15:25:57,483 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-21 15:26:00,173 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.866658926010132秒 -2024-04-22 11:01:10,204 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-22 11:01:14,447 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.689878702163696秒 -2024-04-22 11:01:54,639 - main.py[line:96] - INFO: main.py len of parameter: 2 -2024-04-22 11:01:54,640 - main.py[line:104] - INFO: python 脚本里的接收到的参数是: -2024-04-22 11:01:54,640 - main.py[line:105] - INFO: 0 -2024-04-22 11:01:54,640 - main.py[line:106] - INFO: 开始执行sys.stdin.read -2024-04-22 11:01:54,640 - main.py[line:108] - INFO: -2024-04-22 11:01:54,640 - main.py[line:109] - INFO: 55 -2024-04-22 11:01:54,640 - main.py[line:118] - INFO: 55 -2024-04-22 11:01:54,640 - main.py[line:119] - INFO: 0 -2024-04-22 11:01:54,819 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR -2024-04-22 11:01:57,426 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.785860061645508秒 -2024-04-22 15:28:23,501 - maincopy.py[line:81] - ERROR: 'NoneType' object has no attribute 'strip' -2024-04-22 16:18:56,272 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape' -2024-04-22 16:21:44,808 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape' -2024-04-22 16:28:20,400 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape' -2024-04-22 16:28:20,400 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment -2024-04-22 16:29:46,501 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape' -2024-04-22 16:29:46,502 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment -2024-04-22 16:32:09,871 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape' -2024-04-22 16:32:09,872 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment diff --git a/OCRPython/main.py b/OCRPython/main.py index 0489c63..a818562 100644 --- a/OCRPython/main.py +++ b/OCRPython/main.py @@ -7,6 +7,7 @@ from configs.basic_config import logger import base64 import time import os +import json sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') @@ -33,7 +34,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: context += "\n".join([doc.page_content for doc in docs]) extractor = IdentityCardExtractor() - jsonString = extractor.extract_text(context) + tempdict = extractor.extract_text(context) + json_string = json.dumps(tempdict, ensure_ascii=False) except Exception as e: logger.error(e) diff --git a/OCRPython/maincopy.py b/OCRPython/maincopy.py index 679d422..bd20fdc 100644 --- a/OCRPython/maincopy.py +++ b/OCRPython/maincopy.py @@ -1,12 +1,15 @@ # -*- coding: utf-8 -*- import sys +import io import os -from paddleocr import PaddleOCR, draw_ocr +from paddleocr import PaddleOCR import time from configs.basic_config import logger from extractor.identitycard_extractor import IdentityCardExtractor import base64 +import json +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str: if (0 == type): return extractIdCardInfoByPath(filePath1, filePath2) @@ -19,6 +22,7 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory text = "" start_time = time.time() # 记录结束时间 + jsonString = "" try: if os.path.exists(filePath1): result = ocr.ocr(filePath1, cls=False) @@ -40,7 +44,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: if 0 != len(text): logger.info(f"text:{text}") extractor = IdentityCardExtractor() - jsonString = extractor.extract_textbyPaddle(text) + tempdict = extractor.extract_textbyPaddle(text) + jsonString = json.dumps(tempdict, ensure_ascii=False) end_time = time.time() # 记录结束时间 execution_time = end_time - start_time # 计算执行时间 logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") @@ -62,7 +67,7 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str: image_data2 = base64.b64decode(base64Data2) with open("file2.png", "wb") as file: file.write(image_data2) - + if os.path.exists("file1.png") and os.path.exists("file2.png"): logger.info(f"file1.png and file2.png exist") jsonString = extractIdCardInfoByPath("file1.png","file2.png") @@ -90,11 +95,12 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str: # jsonString = extractIdCardInfoByBase64Data(base64_image_string,"") # jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") # print(jsonString) -# -#jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG","") +# # +# jsonString = extractIdCardInfoByPath("./images/han.jpg","") +# logger.info(f"test") # jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") # print(jsonString) - +# if __name__ == "__main__": try: logger.info(f"main.py len of parameter: {len(sys.argv)}") diff --git a/OCRTool/.DS_Store b/OCRTool/.DS_Store index 62fee18..25efbf7 100644 Binary files a/OCRTool/.DS_Store and b/OCRTool/.DS_Store differ diff --git a/OCRTool/.idea/workspace.xml b/OCRTool/.idea/workspace.xml index 3d42762..c71d5c0 100644 --- a/OCRTool/.idea/workspace.xml +++ b/OCRTool/.idea/workspace.xml @@ -12,7 +12,29 @@