Compare commits
2 Commits
ffc9d68e9c
...
733f8288e6
| Author | SHA1 | Date |
|---|---|---|
|
|
733f8288e6 | |
|
|
e74b2ca934 |
Binary file not shown.
Binary file not shown.
|
|
@ -35,7 +35,7 @@ class IdentityCardExtractor(Extractor):
|
|||
def extract_textbyPaddle(self, text:str)->dict:
|
||||
try:
|
||||
patterns = {
|
||||
"issuingAuthority": r"签发机关\n(.+?)\n",
|
||||
"issuingAuthority": r"签发机关\n*(.+?)\n",
|
||||
"validTime": r"有效期限\n*(.+?)\n",
|
||||
"name": r"姓名(.*?)\n", #####
|
||||
"gender": r"(\S)民族",
|
||||
|
|
@ -82,21 +82,21 @@ class InvoiceExtractor(Extractor):
|
|||
# 幢2001室
|
||||
# 公民身份号码
|
||||
# 440203197306192118"""
|
||||
text = """
|
||||
中华人民共和国
|
||||
居民身份证
|
||||
oo
|
||||
签发机关宿州市公安局桥分局
|
||||
有效期限2023.01.18-2043.01.18
|
||||
姓名郭乾坤
|
||||
性别男民族汉
|
||||
出生1994年10月17日
|
||||
住址 安徽省宿州市场桥区朱仙
|
||||
庄镇郭庙村郭家组6号
|
||||
公民身份号码
|
||||
34220119941017327X
|
||||
"""
|
||||
extractor = IdentityCardExtractor()
|
||||
|
||||
jsonstring = extractor.extract_textbyPaddle(text)
|
||||
print(jsonstring)
|
||||
# text = """
|
||||
# 中华人民共和国
|
||||
# 居民身份证
|
||||
# oo
|
||||
# 签发机关宿州市公安局桥分局
|
||||
# 有效期限2023.01.18-2043.01.18
|
||||
# 姓名郭乾坤
|
||||
# 性别男民族汉
|
||||
# 出生1994年10月17日
|
||||
# 住址 安徽省宿州市场桥区朱仙
|
||||
# 庄镇郭庙村郭家组6号
|
||||
# 公民身份号码
|
||||
# 34220119941017327X
|
||||
# """
|
||||
# extractor = IdentityCardExtractor()
|
||||
#
|
||||
# jsonstring = extractor.extract_textbyPaddle(text)
|
||||
# print(jsonstring)
|
||||
|
|
@ -1,224 +0,0 @@
|
|||
2024-04-17 14:28:11,092 - main.py[line:80] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 14:28:11,093 - main.py[line:87] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 14:28:11,093 - main.py[line:88] - INFO: 1
|
||||
2024-04-17 14:28:11,093 - main.py[line:91] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 14:28:31,648 - main.py[line:97] - INFO: 399921
|
||||
2024-04-17 14:29:55,152 - main.py[line:80] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 14:29:55,152 - main.py[line:87] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 14:29:55,152 - main.py[line:88] - INFO: 1
|
||||
2024-04-17 14:29:55,152 - main.py[line:91] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 14:29:55,154 - main.py[line:96] - INFO:
|
||||
2024-04-17 14:29:55,154 - main.py[line:97] - INFO: 399921
|
||||
2024-04-17 14:29:55,154 - main.py[line:106] - INFO: 199960
|
||||
2024-04-17 14:29:55,155 - main.py[line:107] - INFO: 199960
|
||||
2024-04-17 14:32:23,556 - main.py[line:80] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 14:32:23,556 - main.py[line:88] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 14:32:23,556 - main.py[line:89] - INFO: 1
|
||||
2024-04-17 14:32:23,556 - main.py[line:92] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 14:32:23,558 - main.py[line:97] - INFO:
|
||||
2024-04-17 14:32:23,558 - main.py[line:98] - INFO: 399921
|
||||
2024-04-17 14:32:23,559 - main.py[line:107] - INFO: 199960
|
||||
2024-04-17 14:32:23,559 - main.py[line:108] - INFO: 199960
|
||||
2024-04-17 14:32:23,559 - main.py[line:43] - INFO: extractIdCardInfoByBase64Data
|
||||
2024-04-17 14:32:23,559 - main.py[line:46] - INFO: not base64data1.empty()
|
||||
2024-04-17 14:32:23,562 - main.py[line:52] - INFO: not base64Data2.empty()
|
||||
2024-04-17 14:32:23,563 - main.py[line:58] - INFO: file1.png and file2.png exist
|
||||
2024-04-17 14:32:23,755 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 14:32:26,428 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 14:32:26,934 - main.py[line:37] - INFO: extractIdCardInfo 耗时3.37139892578125秒
|
||||
2024-04-17 14:32:26,934 - main.py[line:69] - INFO: extractIdCardInfo 耗时3.375098705291748秒
|
||||
2024-04-17 15:00:10,376 - main.py[line:91] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 15:00:10,376 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 15:00:10,376 - main.py[line:100] - INFO: 1
|
||||
2024-04-17 15:00:10,376 - main.py[line:103] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 15:00:10,378 - main.py[line:108] - INFO:
|
||||
2024-04-17 15:00:10,379 - main.py[line:109] - INFO: 399921
|
||||
2024-04-17 15:00:10,379 - main.py[line:118] - INFO: 199960
|
||||
2024-04-17 15:00:10,379 - main.py[line:119] - INFO: 199960
|
||||
2024-04-17 15:00:10,379 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
|
||||
2024-04-17 15:00:10,379 - main.py[line:51] - INFO: not base64data1.empty()
|
||||
2024-04-17 15:00:10,381 - main.py[line:57] - INFO: not base64Data2.empty()
|
||||
2024-04-17 15:00:10,382 - main.py[line:63] - INFO: file1.png and file2.png exist
|
||||
2024-04-17 15:00:10,578 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 15:00:13,327 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 15:00:13,839 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.456987142562866秒
|
||||
2024-04-17 15:00:13,840 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.460446834564209秒
|
||||
2024-04-17 15:01:33,280 - main.py[line:91] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 15:01:33,280 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 15:01:33,280 - main.py[line:100] - INFO: 1
|
||||
2024-04-17 15:01:33,280 - main.py[line:103] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 15:01:33,282 - main.py[line:108] - INFO:
|
||||
2024-04-17 15:01:33,283 - main.py[line:109] - INFO: 399921
|
||||
2024-04-17 15:01:33,283 - main.py[line:118] - INFO: 199960
|
||||
2024-04-17 15:01:33,283 - main.py[line:119] - INFO: 199960
|
||||
2024-04-17 15:01:33,283 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
|
||||
2024-04-17 15:01:33,283 - main.py[line:51] - INFO: not base64data1.empty()
|
||||
2024-04-17 15:01:33,285 - main.py[line:57] - INFO: not base64Data2.empty()
|
||||
2024-04-17 15:01:33,286 - main.py[line:63] - INFO: file1.png and file2.png exist
|
||||
2024-04-17 15:01:33,452 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 15:01:35,814 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 15:01:36,303 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.0165488719940186秒
|
||||
2024-04-17 15:01:36,303 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.0200371742248535秒
|
||||
2024-04-17 15:06:02,257 - main.py[line:91] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 15:06:02,258 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 15:06:02,258 - main.py[line:100] - INFO: 1
|
||||
2024-04-17 15:06:02,258 - main.py[line:103] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 15:09:35,113 - main.py[line:126] - ERROR: KeyboardInterrupt
|
||||
2024-04-17 15:10:50,601 - main.py[line:91] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 15:10:50,601 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 15:10:50,602 - main.py[line:100] - INFO: 1
|
||||
2024-04-17 15:10:50,602 - main.py[line:103] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 15:10:50,602 - main.py[line:108] - INFO:
|
||||
2024-04-17 15:10:50,603 - main.py[line:109] - INFO: 316256
|
||||
2024-04-17 15:10:50,603 - main.py[line:118] - INFO: 316256
|
||||
2024-04-17 15:10:50,603 - main.py[line:119] - INFO: 0
|
||||
2024-04-17 15:10:50,603 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
|
||||
2024-04-17 15:10:50,603 - main.py[line:51] - INFO: not base64data1.empty()
|
||||
2024-04-17 15:10:50,605 - main.py[line:68] - INFO: file1.png exist
|
||||
2024-04-17 15:10:50,775 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 15:10:54,797 - main.py[line:41] - INFO: extractIdCardInfo 耗时4.191910982131958秒
|
||||
2024-04-17 15:10:54,798 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时4.194473028182983秒
|
||||
2024-04-17 15:15:35,913 - main.py[line:91] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 15:15:35,913 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 15:15:35,913 - main.py[line:100] - INFO: 0
|
||||
2024-04-17 15:15:35,913 - main.py[line:103] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 15:15:35,913 - main.py[line:108] - INFO:
|
||||
2024-04-17 15:15:35,914 - main.py[line:109] - INFO: 56
|
||||
2024-04-17 15:15:35,914 - main.py[line:118] - INFO: 55
|
||||
2024-04-17 15:15:35,914 - main.py[line:119] - INFO: 0
|
||||
2024-04-17 15:15:36,072 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 15:15:38,824 - main.py[line:41] - INFO: extractIdCardInfo 耗时2.9099318981170654秒
|
||||
2024-04-17 15:41:57,020 - main.py[line:91] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 15:41:57,021 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 15:41:57,021 - main.py[line:100] - INFO: 0
|
||||
2024-04-17 15:41:57,021 - main.py[line:103] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 15:41:57,021 - main.py[line:108] - INFO:
|
||||
2024-04-17 15:41:57,021 - main.py[line:109] - INFO: 56
|
||||
2024-04-17 15:41:57,021 - main.py[line:118] - INFO: 55
|
||||
2024-04-17 15:41:57,021 - main.py[line:119] - INFO: 0
|
||||
2024-04-17 15:41:57,179 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 15:42:00,351 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.3298799991607666秒
|
||||
2024-04-17 17:35:41,366 - main.py[line:91] - INFO: main.py len of parameter: 2
|
||||
2024-04-17 17:35:41,366 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-17 17:35:41,366 - main.py[line:100] - INFO: 0
|
||||
2024-04-17 17:35:41,366 - main.py[line:103] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-17 17:35:41,366 - main.py[line:108] - INFO:
|
||||
2024-04-17 17:35:41,366 - main.py[line:109] - INFO: 56
|
||||
2024-04-17 17:35:41,366 - main.py[line:118] - INFO: 55
|
||||
2024-04-17 17:35:41,366 - main.py[line:119] - INFO: 0
|
||||
2024-04-17 17:35:41,810 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-17 17:35:44,799 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.4327380657196045秒
|
||||
2024-04-18 14:28:41,233 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
|
||||
2024-04-18 14:28:41,233 - main.py[line:51] - INFO: not base64data1.empty()
|
||||
2024-04-18 14:28:41,235 - main.py[line:68] - INFO: file1.png exist
|
||||
2024-04-18 14:28:41,607 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-18 14:28:44,850 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.614640951156616秒
|
||||
2024-04-18 14:28:44,850 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.6167337894439697秒
|
||||
2024-04-18 17:20:03,651 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
|
||||
2024-04-18 17:20:03,651 - main.py[line:53] - INFO: not base64data1.empty()
|
||||
2024-04-18 17:20:03,653 - main.py[line:70] - INFO: file1.png exist
|
||||
2024-04-18 17:20:04,099 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-18 17:20:09,473 - main.py[line:42] - INFO: extractIdCardInfo 耗时5.820295810699463秒
|
||||
2024-04-18 17:20:09,473 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时5.822228193283081秒
|
||||
2024-04-19 08:58:12,184 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
|
||||
2024-04-19 08:58:12,185 - main.py[line:53] - INFO: not base64data1.empty()
|
||||
2024-04-19 08:58:12,186 - main.py[line:70] - INFO: file1.png exist
|
||||
2024-04-19 08:58:12,373 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-19 08:58:18,685 - main.py[line:42] - INFO: extractIdCardInfo 耗时6.498270034790039秒
|
||||
2024-04-19 08:58:18,685 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时6.499899864196777秒
|
||||
2024-04-19 09:00:57,608 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
|
||||
2024-04-19 09:00:57,609 - main.py[line:53] - INFO: not base64data1.empty()
|
||||
2024-04-19 09:00:57,609 - main.py[line:70] - INFO: file1.png exist
|
||||
2024-04-19 09:00:57,787 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-19 09:01:00,685 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.075079917907715秒
|
||||
2024-04-19 09:01:00,685 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时3.076277017593384秒
|
||||
2024-04-19 11:35:24,397 - main.py[line:38] - ERROR: unstructured package not found, please install it with `pip install unstructured`
|
||||
2024-04-19 11:35:24,398 - main.py[line:42] - INFO: extractIdCardInfo 耗时0.0006740093231201172秒
|
||||
2024-04-19 11:35:56,826 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-19 11:36:04,713 - main.py[line:42] - INFO: extractIdCardInfo 耗时8.266455888748169秒
|
||||
2024-04-19 11:36:11,025 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-19 11:36:25,929 - main.py[line:42] - INFO: extractIdCardInfo 耗时14.984557151794434秒
|
||||
2024-04-19 11:36:38,161 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-19 11:36:44,925 - main.py[line:42] - INFO: extractIdCardInfo 耗时6.842967987060547秒
|
||||
2024-04-21 11:10:25,753 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-21 11:11:01,024 - main.py[line:42] - INFO: extractIdCardInfo 耗时35.6481351852417秒
|
||||
2024-04-21 11:12:12,110 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-21 11:12:14,951 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.035506248474121秒
|
||||
2024-04-21 12:18:44,506 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-21 12:18:47,380 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.074634075164795秒
|
||||
2024-04-21 15:12:48,957 - main.py[line:96] - INFO: main.py len of parameter: 2
|
||||
2024-04-21 15:12:48,957 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-21 15:12:48,957 - main.py[line:105] - INFO: 0
|
||||
2024-04-21 15:12:48,957 - main.py[line:106] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-21 15:12:48,957 - main.py[line:108] - INFO:
|
||||
2024-04-21 15:12:48,957 - main.py[line:109] - INFO: 56
|
||||
2024-04-21 15:12:48,957 - main.py[line:118] - INFO: 55
|
||||
2024-04-21 15:12:48,958 - main.py[line:119] - INFO: 0
|
||||
2024-04-21 15:12:48,958 - main.py[line:38] - ERROR: unstructured package not found, please install it with `pip install unstructured`
|
||||
2024-04-21 15:12:48,958 - main.py[line:42] - INFO: extractIdCardInfo 耗时8.0108642578125e-05秒
|
||||
2024-04-21 15:14:23,381 - main.py[line:96] - INFO: main.py len of parameter: 2
|
||||
2024-04-21 15:14:23,381 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-21 15:14:23,381 - main.py[line:105] - INFO: 0
|
||||
2024-04-21 15:14:23,381 - main.py[line:106] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-21 15:14:23,382 - main.py[line:108] - INFO:
|
||||
2024-04-21 15:14:23,382 - main.py[line:109] - INFO: 56
|
||||
2024-04-21 15:14:23,382 - main.py[line:118] - INFO: 55
|
||||
2024-04-21 15:14:23,382 - main.py[line:119] - INFO: 0
|
||||
2024-04-21 15:14:23,653 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-21 15:14:55,795 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
|
||||
2024-04-21 15:14:55,796 - main.py[line:42] - INFO: extractIdCardInfo 耗时32.41406607627869秒
|
||||
2024-04-21 15:18:43,877 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-21 15:18:48,536 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
|
||||
2024-04-21 15:18:48,536 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.845187187194824秒
|
||||
2024-04-21 15:20:34,004 - main.py[line:96] - INFO: main.py len of parameter: 2
|
||||
2024-04-21 15:20:34,005 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-21 15:20:34,005 - main.py[line:105] - INFO: 0
|
||||
2024-04-21 15:20:34,005 - main.py[line:106] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-21 15:20:34,005 - main.py[line:108] - INFO:
|
||||
2024-04-21 15:20:34,005 - main.py[line:109] - INFO: 56
|
||||
2024-04-21 15:20:34,005 - main.py[line:118] - INFO: 55
|
||||
2024-04-21 15:20:34,005 - main.py[line:119] - INFO: 0
|
||||
2024-04-21 15:20:34,176 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-21 15:20:36,793 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
|
||||
2024-04-21 15:20:36,793 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.7879230976104736秒
|
||||
2024-04-21 15:23:26,673 - main.py[line:96] - INFO: main.py len of parameter: 2
|
||||
2024-04-21 15:23:26,673 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-21 15:23:26,673 - main.py[line:105] - INFO: 0
|
||||
2024-04-21 15:23:26,673 - main.py[line:106] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-21 15:23:26,673 - main.py[line:108] - INFO:
|
||||
2024-04-21 15:23:26,674 - main.py[line:109] - INFO: 56
|
||||
2024-04-21 15:23:26,674 - main.py[line:118] - INFO: 55
|
||||
2024-04-21 15:23:26,674 - main.py[line:119] - INFO: 0
|
||||
2024-04-21 15:23:26,856 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-21 15:23:36,818 - main.py[line:42] - INFO: extractIdCardInfo 耗时10.144625186920166秒
|
||||
2024-04-21 15:25:57,305 - main.py[line:96] - INFO: main.py len of parameter: 2
|
||||
2024-04-21 15:25:57,306 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-21 15:25:57,306 - main.py[line:105] - INFO: 0
|
||||
2024-04-21 15:25:57,306 - main.py[line:106] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-21 15:25:57,306 - main.py[line:108] - INFO:
|
||||
2024-04-21 15:25:57,306 - main.py[line:109] - INFO: 56
|
||||
2024-04-21 15:25:57,306 - main.py[line:118] - INFO: 55
|
||||
2024-04-21 15:25:57,306 - main.py[line:119] - INFO: 0
|
||||
2024-04-21 15:25:57,483 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-21 15:26:00,173 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.866658926010132秒
|
||||
2024-04-22 11:01:10,204 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-22 11:01:14,447 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.689878702163696秒
|
||||
2024-04-22 11:01:54,639 - main.py[line:96] - INFO: main.py len of parameter: 2
|
||||
2024-04-22 11:01:54,640 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
|
||||
2024-04-22 11:01:54,640 - main.py[line:105] - INFO: 0
|
||||
2024-04-22 11:01:54,640 - main.py[line:106] - INFO: 开始执行sys.stdin.read
|
||||
2024-04-22 11:01:54,640 - main.py[line:108] - INFO:
|
||||
2024-04-22 11:01:54,640 - main.py[line:109] - INFO: 55
|
||||
2024-04-22 11:01:54,640 - main.py[line:118] - INFO: 55
|
||||
2024-04-22 11:01:54,640 - main.py[line:119] - INFO: 0
|
||||
2024-04-22 11:01:54,819 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
|
||||
2024-04-22 11:01:57,426 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.785860061645508秒
|
||||
2024-04-22 15:28:23,501 - maincopy.py[line:81] - ERROR: 'NoneType' object has no attribute 'strip'
|
||||
2024-04-22 16:18:56,272 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
|
||||
2024-04-22 16:21:44,808 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
|
||||
2024-04-22 16:28:20,400 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
|
||||
2024-04-22 16:28:20,400 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment
|
||||
2024-04-22 16:29:46,501 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
|
||||
2024-04-22 16:29:46,502 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment
|
||||
2024-04-22 16:32:09,871 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
|
||||
2024-04-22 16:32:09,872 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment
|
||||
|
|
@ -7,6 +7,7 @@ from configs.basic_config import logger
|
|||
import base64
|
||||
import time
|
||||
import os
|
||||
import json
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
|
||||
|
|
@ -33,7 +34,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
|||
context += "\n".join([doc.page_content for doc in docs])
|
||||
|
||||
extractor = IdentityCardExtractor()
|
||||
jsonString = extractor.extract_text(context)
|
||||
tempdict = extractor.extract_text(context)
|
||||
json_string = json.dumps(tempdict, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
import io
|
||||
import os
|
||||
from paddleocr import PaddleOCR, draw_ocr
|
||||
from paddleocr import PaddleOCR
|
||||
import time
|
||||
from configs.basic_config import logger
|
||||
from extractor.identitycard_extractor import IdentityCardExtractor
|
||||
import base64
|
||||
import json
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str:
|
||||
if (0 == type):
|
||||
return extractIdCardInfoByPath(filePath1, filePath2)
|
||||
|
|
@ -19,6 +22,7 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
|||
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
|
||||
text = ""
|
||||
start_time = time.time() # 记录结束时间
|
||||
jsonString = ""
|
||||
try:
|
||||
if os.path.exists(filePath1):
|
||||
result = ocr.ocr(filePath1, cls=False)
|
||||
|
|
@ -40,7 +44,8 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
|||
if 0 != len(text):
|
||||
logger.info(f"text:{text}")
|
||||
extractor = IdentityCardExtractor()
|
||||
jsonString = extractor.extract_textbyPaddle(text)
|
||||
tempdict = extractor.extract_textbyPaddle(text)
|
||||
jsonString = json.dumps(tempdict, ensure_ascii=False)
|
||||
end_time = time.time() # 记录结束时间
|
||||
execution_time = end_time - start_time # 计算执行时间
|
||||
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
|
||||
|
|
@ -62,7 +67,7 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
|||
image_data2 = base64.b64decode(base64Data2)
|
||||
with open("file2.png", "wb") as file:
|
||||
file.write(image_data2)
|
||||
|
||||
|
||||
if os.path.exists("file1.png") and os.path.exists("file2.png"):
|
||||
logger.info(f"file1.png and file2.png exist")
|
||||
jsonString = extractIdCardInfoByPath("file1.png","file2.png")
|
||||
|
|
@ -90,11 +95,12 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
|||
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
|
||||
# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
|
||||
# print(jsonString)
|
||||
#
|
||||
#jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG","")
|
||||
# #
|
||||
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
|
||||
# logger.info(f"test")
|
||||
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
|
||||
# print(jsonString)
|
||||
|
||||
#
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
logger.info(f"main.py len of parameter: {len(sys.argv)}")
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -12,7 +12,29 @@
|
|||
<option name="autoReloadType" value="SELECTIVE" />
|
||||
</component>
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="de15048c-2fb1-4f65-af80-34b29e3a0154" name="Changes" comment="first commit" />
|
||||
<list default="true" id="de15048c-2fb1-4f65-af80-34b29e3a0154" name="Changes" comment="first commit">
|
||||
<change beforePath="$PROJECT_DIR$/../OCRPython/extractor/identitycard_extractor.py" beforeDir="false" afterPath="$PROJECT_DIR$/../OCRPython/extractor/identitycard_extractor.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/../OCRPython/logs/ocr_reconginition.log" beforeDir="false" afterPath="$PROJECT_DIR$/../OCRPython/logs/ocr_reconginition.log" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/../OCRPython/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/../OCRPython/main.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/../OCRPython/maincopy.py" beforeDir="false" afterPath="$PROJECT_DIR$/../OCRPython/maincopy.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/OCRTool-1.0-SNAPSHOT.jar" beforeDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/out/artifacts/OCRTool_jar/OCRTool.jar" beforeDir="false" afterPath="$PROJECT_DIR$/out/artifacts/OCRTool_jar/OCRTool.jar" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/pom.xml" beforeDir="false" afterPath="$PROJECT_DIR$/pom.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/src/main/java/com/bonus/core/MySpringBootApplication.java" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/java/com/bonus/core/MySpringBootApplication.java" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/src/main/java/com/bonus/core/RecognitionController.java" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/java/com/bonus/core/RecognitionController.java" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/src/main/resources/application.yml" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/resources/application.yml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/src/main/resources/libtesseract.a" beforeDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/src/main/resources/libtesseract.dylib" beforeDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/target/OCRTool-1.0-SNAPSHOT.jar" beforeDir="false" afterPath="$PROJECT_DIR$/target/OCRTool-1.0-SNAPSHOT.jar" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/target/classes/application.yml" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/application.yml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/target/classes/com/bonus/core/MySpringBootApplication.class" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/com/bonus/core/MySpringBootApplication.class" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/target/classes/com/bonus/core/OCRRecognition.class" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/com/bonus/core/OCRRecognition.class" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/target/classes/com/bonus/core/RecognitionController.class" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/com/bonus/core/RecognitionController.class" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/target/classes/libtesseract.a" beforeDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/target/classes/libtesseract.dylib" beforeDir="false" />
|
||||
</list>
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
|
|
@ -69,7 +91,7 @@
|
|||
"node.js.selected.package.eslint": "(autodetect)",
|
||||
"node.js.selected.package.tslint": "(autodetect)",
|
||||
"nodejs_package_manager_path": "npm",
|
||||
"project.structure.last.edited": "Project",
|
||||
"project.structure.last.edited": "Problems",
|
||||
"project.structure.proportion": "0.15",
|
||||
"project.structure.side.proportion": "0.1150748",
|
||||
"settings.editor.selected.configurable": "preferences.pluginManager",
|
||||
|
|
@ -84,7 +106,7 @@
|
|||
<recent name="com.bonus.core" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="RunManager">
|
||||
<component name="RunManager" selected="Application.OCRTool">
|
||||
<configuration name="OCRTool" type="Application" factoryName="Application">
|
||||
<option name="MAIN_CLASS_NAME" value="com.bonus.core.MySpringBootApplication" />
|
||||
<module name="OCRTool" />
|
||||
|
|
@ -93,6 +115,13 @@
|
|||
<option name="Make" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
<configuration name="MySpringBootApplication" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot" nameIsGenerated="true">
|
||||
<module name="OCRTool" />
|
||||
<option name="SPRING_BOOT_MAIN_CLASS" value="com.bonus.core.MySpringBootApplication" />
|
||||
<method v="2">
|
||||
<option name="Make" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
</component>
|
||||
<component name="SharedIndexes">
|
||||
<attachedChunks>
|
||||
|
|
@ -118,7 +147,7 @@
|
|||
<workItem from="1713668202906" duration="5970000" />
|
||||
<workItem from="1713747776583" duration="310000" />
|
||||
<workItem from="1713748089331" duration="843000" />
|
||||
<workItem from="1713756484781" duration="10479000" />
|
||||
<workItem from="1713756484781" duration="35613000" />
|
||||
</task>
|
||||
<task id="LOCAL-00001" summary="first commit">
|
||||
<option name="closed" value="true" />
|
||||
|
|
@ -155,7 +184,19 @@
|
|||
<map>
|
||||
<entry key="MAIN">
|
||||
<value>
|
||||
<State />
|
||||
<State>
|
||||
<option name="FILTERS">
|
||||
<map>
|
||||
<entry key="branch">
|
||||
<value>
|
||||
<list>
|
||||
<option value="main" />
|
||||
</list>
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
</option>
|
||||
</State>
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
|
|
@ -168,19 +209,14 @@
|
|||
<component name="XDebuggerManager">
|
||||
<breakpoint-manager>
|
||||
<breakpoints>
|
||||
<line-breakpoint enabled="true" type="java-line">
|
||||
<url>file://$PROJECT_DIR$/src/main/java/com/bonus/core/RecognitionController.java</url>
|
||||
<line>9</line>
|
||||
<option name="timeStamp" value="2" />
|
||||
</line-breakpoint>
|
||||
<line-breakpoint enabled="true" type="java-line">
|
||||
<url>file://$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java</url>
|
||||
<line>28</line>
|
||||
<line>29</line>
|
||||
<option name="timeStamp" value="20" />
|
||||
</line-breakpoint>
|
||||
<line-breakpoint enabled="true" type="java-line">
|
||||
<url>file://$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java</url>
|
||||
<line>79</line>
|
||||
<line>90</line>
|
||||
<option name="timeStamp" value="21" />
|
||||
</line-breakpoint>
|
||||
</breakpoints>
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
182
OCRTool/pom.xml
182
OCRTool/pom.xml
|
|
@ -23,184 +23,32 @@
|
|||
<maven-jar-plugin.version>3.1.1</maven-jar-plugin.version>
|
||||
</properties>
|
||||
|
||||
<!-- <profiles>-->
|
||||
<!-- <profile>-->
|
||||
<!-- <id>dev</id>-->
|
||||
<!-- <activation>-->
|
||||
<!-- <activeByDefault>false</activeByDefault>-->
|
||||
<!-- </activation>-->
|
||||
<!-- <properties>-->
|
||||
<!-- <!– 在这里定义 dev 环境的配置 –>-->
|
||||
<!-- </properties>-->
|
||||
<!-- </profile>-->
|
||||
<!--<!– <profile>–>-->
|
||||
<!--<!– <id>prod</id>–>-->
|
||||
<!--<!– <properties>–>-->
|
||||
<!--<!– <!– 在这里定义 prod 环境的配置 –>–>-->
|
||||
<!--<!– </properties>–>-->
|
||||
<!--<!– </profile>–>-->
|
||||
<!-- </profiles>-->
|
||||
|
||||
<dependencies>
|
||||
<!-- SpringBoot 核心包 -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- <!– SpringBoot 测试 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.springframework.boot</groupId>-->
|
||||
<!-- <artifactId>spring-boot-starter-test</artifactId>-->
|
||||
<!-- <scope>test</scope>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– SpringBoot 拦截器 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.springframework.boot</groupId>-->
|
||||
<!-- <artifactId>spring-boot-starter-aop</artifactId>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- SpringBoot Web容器 -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>net.sourceforge.tess4j</groupId>
|
||||
<artifactId>tess4j</artifactId>
|
||||
<version>4.5.1</version> <!-- 根据需要替换为最新版本 -->
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.java.dev.jna</groupId>
|
||||
<artifactId>jna</artifactId>
|
||||
<version>5.9.0</version> <!-- 根据需要替换为最新版本 -->
|
||||
</dependency>
|
||||
<!-- <!– spring-boot-devtools –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.springframework.boot</groupId>-->
|
||||
<!-- <artifactId>spring-boot-devtools</artifactId>-->
|
||||
<!-- <optional>true</optional> <!– 表示依赖不会传递 –>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– spring security 安全认证 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.springframework.boot</groupId>-->
|
||||
<!-- <artifactId>spring-boot-starter-security</artifactId>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– redis 缓存操作 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.springframework.boot</groupId>-->
|
||||
<!-- <artifactId>spring-boot-starter-data-redis</artifactId>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– pool 对象池 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.apache.commons</groupId>-->
|
||||
<!-- <artifactId>commons-pool2</artifactId>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– Mysql驱动包 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>mysql</groupId>-->
|
||||
<!-- <artifactId>mysql-connector-java</artifactId>-->
|
||||
<!-- <scope>runtime</scope>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– pagehelper 分页插件 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>com.github.pagehelper</groupId>-->
|
||||
<!-- <artifactId>pagehelper-spring-boot-starter</artifactId>-->
|
||||
<!-- <version>${pagehelper.spring.boot.starter.version}</version>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– 阿里数据库连接池 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>com.alibaba</groupId>-->
|
||||
<!-- <artifactId>druid-spring-boot-starter</artifactId>-->
|
||||
<!-- <version>${druid.version}</version>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– 自定义验证注解 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.springframework.boot</groupId>-->
|
||||
<!-- <artifactId>spring-boot-starter-validation</artifactId>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– 常用工具类 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.apache.commons</groupId>-->
|
||||
<!-- <artifactId>commons-lang3</artifactId>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– io常用工具类 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>commons-io</groupId>-->
|
||||
<!-- <artifactId>commons-io</artifactId>-->
|
||||
<!-- <version>${commons.io.version}</version>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– 解析客户端操作系统、浏览器等 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>eu.bitwalker</groupId>-->
|
||||
<!-- <artifactId>UserAgentUtils</artifactId>-->
|
||||
<!-- <version>${bitwalker.version}</version>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– 阿里JSON解析器 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>com.alibaba.fastjson2</groupId>-->
|
||||
<!-- <artifactId>fastjson2</artifactId>-->
|
||||
<!-- <version>${fastjson.version}</version>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– Spring框架基本的核心工具–>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>org.springframework</groupId>-->
|
||||
<!-- <artifactId>spring-context-support</artifactId>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– Token生成与解析–>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>io.jsonwebtoken</groupId>-->
|
||||
<!-- <artifactId>jjwt</artifactId>-->
|
||||
<!-- <version>${jwt.version}</version>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– Jaxb –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>javax.xml.bind</groupId>-->
|
||||
<!-- <artifactId>jaxb-api</artifactId>-->
|
||||
<!-- </dependency>-->
|
||||
|
||||
<!-- <!– Swagger3依赖 –>-->
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>io.springfox</groupId>-->
|
||||
<!-- <artifactId>springfox-boot-starter</artifactId>-->
|
||||
<!-- <version>${swagger.version}</version>-->
|
||||
<!-- <exclusions>-->
|
||||
<!-- <exclusion>-->
|
||||
<!-- <groupId>io.swagger</groupId>-->
|
||||
<!-- <artifactId>swagger-models</artifactId>-->
|
||||
<!-- </exclusion>-->
|
||||
<!-- </exclusions>-->
|
||||
<!-- </dependency>-->
|
||||
</dependencies>
|
||||
|
||||
<!-- <build>-->
|
||||
<!-- <plugins>-->
|
||||
<!-- <plugin>-->
|
||||
<!-- <groupId>org.springframework.boot</groupId>-->
|
||||
<!-- <artifactId>spring-boot-maven-plugin</artifactId>-->
|
||||
<!-- <configuration>-->
|
||||
<!-- <profiles>-->
|
||||
<!-- <profile>default</profile>-->
|
||||
<!-- </profiles>-->
|
||||
<!-- </configuration>-->
|
||||
<!-- </plugin>-->
|
||||
<!-- </plugins>-->
|
||||
<!-- </build>-->
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>repackage</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -16,11 +16,11 @@ public class MySpringBootApplication {
|
|||
context = SpringApplication.run(MySpringBootApplication.class, args);
|
||||
MySpringBootApplication app = new MySpringBootApplication();
|
||||
|
||||
//String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG";
|
||||
String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg";
|
||||
String filePath2 = "/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg";
|
||||
app.callExtractIdentityInfoByLocalPath(filePath1, filePath2);
|
||||
//app.callExtractIdentityInfoByBase64Data(filePath1, filePath2);
|
||||
//String filePath1 = "E:\\OCRTool\\OCRPython\\images\\id_card.JPG";
|
||||
// String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg";
|
||||
// String filePath2 = "/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg";
|
||||
//app.callExtractIdentityInfoByLocalPath(filePath1, "");
|
||||
app.callExtractIdentityInfoByBase64Data(filePath1, filePath2);
|
||||
}
|
||||
private static ApplicationContext context;
|
||||
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ import org.springframework.beans.factory.annotation.Autowired;
|
|||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
|
@ -22,9 +22,10 @@ public class OCRRecognition {
|
|||
public String extractInfo(IdentifyRecognitionParams recognitionParams){
|
||||
Process proc;
|
||||
List<String> stringList = new ArrayList<>();
|
||||
String lastElement = null;
|
||||
String lastElement = "";
|
||||
int offset = 0;
|
||||
int batchSize = 8092; // 设置每次写入的批次大小
|
||||
// 设置每次写入的批次大小
|
||||
int batchSize = 8092;
|
||||
try {
|
||||
System.out.println("开始打印从配置里读取的值:");
|
||||
System.out.println(this.configure.getPythonEnv());
|
||||
|
|
@ -35,7 +36,8 @@ public class OCRRecognition {
|
|||
System.out.println(recognitionParams.getRecognitionData1());
|
||||
System.out.println("参数2的base64 string 是:");
|
||||
System.out.println(recognitionParams.getRecognitionData2());
|
||||
String[] str = new String[]{this.configure.getPythonEnv(),this.configure.getScriptEnv(), String.valueOf(recognitionParams.getType().ordinal())/*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/};//
|
||||
//*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/
|
||||
String[] str = new String[]{this.configure.getPythonEnv(),this.configure.getScriptEnv(), String.valueOf(recognitionParams.getType().ordinal())};
|
||||
Runtime runtime = Runtime.getRuntime();
|
||||
if (runtime == null) {
|
||||
System.out.println("runtime == null");
|
||||
|
|
@ -46,31 +48,40 @@ public class OCRRecognition {
|
|||
OutputStream outputStream = proc.getOutputStream();
|
||||
if (!recognitionParams.getRecognitionData1().isEmpty()) {
|
||||
byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
|
||||
long len = input.length;
|
||||
|
||||
while (offset < len) {
|
||||
int remaining = (int) (len - offset);
|
||||
int bytesToWrite = Math.min(remaining, batchSize);
|
||||
outputStream.write(input, offset, bytesToWrite);
|
||||
offset += bytesToWrite;
|
||||
}
|
||||
passParameter(outputStream, input);
|
||||
}
|
||||
if (!recognitionParams.getRecognitionData2().isEmpty()) {
|
||||
outputStream.write(System.lineSeparator().getBytes());
|
||||
byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
|
||||
long len = input.length;
|
||||
while (offset < len) {
|
||||
int remaining = (int) (len - offset);
|
||||
int bytesToWrite = Math.min(remaining, batchSize);
|
||||
outputStream.write(input, offset, bytesToWrite);
|
||||
offset += bytesToWrite;
|
||||
}
|
||||
byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
|
||||
passParameter(outputStream, input);
|
||||
}
|
||||
|
||||
// if (!recognitionParams.getRecognitionData1().isEmpty()) {
|
||||
// byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
|
||||
// long len = input.length;
|
||||
//
|
||||
// while (offset < len) {
|
||||
// int remaining = (int) (len - offset);
|
||||
// int bytesToWrite = Math.min(remaining, batchSize);
|
||||
// outputStream.write(input, offset, bytesToWrite);
|
||||
// offset += bytesToWrite;
|
||||
// }
|
||||
// }
|
||||
// if (!recognitionParams.getRecognitionData2().isEmpty()) {
|
||||
// outputStream.write(System.lineSeparator().getBytes());
|
||||
// byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
|
||||
// long len = input.length;
|
||||
// while (offset < len) {
|
||||
// int remaining = (int) (len - offset);
|
||||
// int bytesToWrite = Math.min(remaining, batchSize);
|
||||
// outputStream.write(input, offset, bytesToWrite);
|
||||
// offset += bytesToWrite;
|
||||
// }
|
||||
//
|
||||
outputStream.flush();
|
||||
outputStream.close();
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream(), StandardCharsets.UTF_8));
|
||||
String line = null;
|
||||
|
||||
while ((line = in.readLine()) != null) {
|
||||
stringList.add(line);
|
||||
}
|
||||
|
|
@ -85,6 +96,20 @@ public class OCRRecognition {
|
|||
return lastElement;
|
||||
}
|
||||
|
||||
private void passParameter(OutputStream outputStream, byte[] input) throws IOException {
|
||||
int offset = 0;
|
||||
// 设置每次写入的批次大小
|
||||
int batchSize = 8092;
|
||||
long len = input.length;
|
||||
|
||||
while (offset < len) {
|
||||
int remaining = (int) (len - offset);
|
||||
int bytesToWrite = Math.min(remaining, batchSize);
|
||||
outputStream.write(input, offset, bytesToWrite);
|
||||
offset += bytesToWrite;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,20 +5,12 @@ import org.springframework.web.bind.annotation.*;
|
|||
|
||||
@RestController
|
||||
public class RecognitionController {
|
||||
// @GetMapping("/hello")
|
||||
// public String hello() {
|
||||
// return "Hello, World!";
|
||||
// }
|
||||
|
||||
@Autowired OCRRecognition ocrRecognition;
|
||||
@PostMapping("/recognition")
|
||||
public String recognition(@RequestBody IdentifyRecognitionParams identifyRecognitionParams){
|
||||
String javaString = "";
|
||||
IdentifyRecognitionParams.RecognitionType type = identifyRecognitionParams.getType();
|
||||
String idData1 = identifyRecognitionParams.getRecognitionData1();
|
||||
String idData2 = identifyRecognitionParams.getRecognitionData2();
|
||||
javaString = ocrRecognition.extractInfo(identifyRecognitionParams);
|
||||
return javaString;
|
||||
return ocrRecognition.extractInfo(identifyRecognitionParams);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,7 +1,3 @@
|
|||
spring:
|
||||
profiles:
|
||||
active=dev:
|
||||
|
||||
recognition:
|
||||
# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python
|
||||
# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,7 +1,3 @@
|
|||
spring:
|
||||
profiles:
|
||||
active=dev:
|
||||
|
||||
recognition:
|
||||
# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python
|
||||
# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue