Compare commits

..

No commits in common. "733f8288e68981ddbbc331ff90a322d0a530be4c" and "ffc9d68e9cc0cd17bb6a2fab63f0df4988db2fbb" have entirely different histories.

28 changed files with 473 additions and 150 deletions

BIN
.DS_Store vendored

Binary file not shown.

BIN
OCRPython/.DS_Store vendored

Binary file not shown.

View File

@ -35,7 +35,7 @@ class IdentityCardExtractor(Extractor):
def extract_textbyPaddle(self, text:str)->dict: def extract_textbyPaddle(self, text:str)->dict:
try: try:
patterns = { patterns = {
"issuingAuthority": r"签发机关\n*(.+?)\n", "issuingAuthority": r"签发机关\n(.+?)\n",
"validTime": r"有效期限\n*(.+?)\n", "validTime": r"有效期限\n*(.+?)\n",
"name": r"姓名(.*?)\n", ##### "name": r"姓名(.*?)\n", #####
"gender": r"(\S)民族", "gender": r"(\S)民族",
@ -82,21 +82,21 @@ class InvoiceExtractor(Extractor):
# 幢2001室 # 幢2001室
# 公民身份号码 # 公民身份号码
# 440203197306192118""" # 440203197306192118"""
# text = """ text = """
# 中华人民共和国 中华人民共和国
# 居民身份证 居民身份证
# oo oo
# 签发机关宿州市公安局桥分局 签发机关宿州市公安局桥分局
# 有效期限2023.01.18-2043.01.18 有效期限2023.01.18-2043.01.18
# 姓名郭乾坤 姓名郭乾坤
# 性别男民族汉 性别男民族汉
# 出生1994年10月17日 出生1994年10月17日
# 住址 安徽省宿州市场桥区朱仙 住址 安徽省宿州市场桥区朱仙
# 庄镇郭庙村郭家组6号 庄镇郭庙村郭家组6号
# 公民身份号码 公民身份号码
# 34220119941017327X 34220119941017327X
# """ """
# extractor = IdentityCardExtractor() extractor = IdentityCardExtractor()
#
# jsonstring = extractor.extract_textbyPaddle(text) jsonstring = extractor.extract_textbyPaddle(text)
# print(jsonstring) print(jsonstring)

View File

@ -0,0 +1,224 @@
2024-04-17 14:28:11,092 - main.py[line:80] - INFO: main.py len of parameter: 2
2024-04-17 14:28:11,093 - main.py[line:87] - INFO: python 脚本里的接收到的参数是:
2024-04-17 14:28:11,093 - main.py[line:88] - INFO: 1
2024-04-17 14:28:11,093 - main.py[line:91] - INFO: 开始执行sys.stdin.read
2024-04-17 14:28:31,648 - main.py[line:97] - INFO: 399921
2024-04-17 14:29:55,152 - main.py[line:80] - INFO: main.py len of parameter: 2
2024-04-17 14:29:55,152 - main.py[line:87] - INFO: python 脚本里的接收到的参数是:
2024-04-17 14:29:55,152 - main.py[line:88] - INFO: 1
2024-04-17 14:29:55,152 - main.py[line:91] - INFO: 开始执行sys.stdin.read
2024-04-17 14:29:55,154 - main.py[line:96] - INFO:
2024-04-17 14:29:55,154 - main.py[line:97] - INFO: 399921
2024-04-17 14:29:55,154 - main.py[line:106] - INFO: 199960
2024-04-17 14:29:55,155 - main.py[line:107] - INFO: 199960
2024-04-17 14:32:23,556 - main.py[line:80] - INFO: main.py len of parameter: 2
2024-04-17 14:32:23,556 - main.py[line:88] - INFO: python 脚本里的接收到的参数是:
2024-04-17 14:32:23,556 - main.py[line:89] - INFO: 1
2024-04-17 14:32:23,556 - main.py[line:92] - INFO: 开始执行sys.stdin.read
2024-04-17 14:32:23,558 - main.py[line:97] - INFO:
2024-04-17 14:32:23,558 - main.py[line:98] - INFO: 399921
2024-04-17 14:32:23,559 - main.py[line:107] - INFO: 199960
2024-04-17 14:32:23,559 - main.py[line:108] - INFO: 199960
2024-04-17 14:32:23,559 - main.py[line:43] - INFO: extractIdCardInfoByBase64Data
2024-04-17 14:32:23,559 - main.py[line:46] - INFO: not base64data1.empty()
2024-04-17 14:32:23,562 - main.py[line:52] - INFO: not base64Data2.empty()
2024-04-17 14:32:23,563 - main.py[line:58] - INFO: file1.png and file2.png exist
2024-04-17 14:32:23,755 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 14:32:26,428 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 14:32:26,934 - main.py[line:37] - INFO: extractIdCardInfo 耗时3.37139892578125秒
2024-04-17 14:32:26,934 - main.py[line:69] - INFO: extractIdCardInfo 耗时3.375098705291748秒
2024-04-17 15:00:10,376 - main.py[line:91] - INFO: main.py len of parameter: 2
2024-04-17 15:00:10,376 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
2024-04-17 15:00:10,376 - main.py[line:100] - INFO: 1
2024-04-17 15:00:10,376 - main.py[line:103] - INFO: 开始执行sys.stdin.read
2024-04-17 15:00:10,378 - main.py[line:108] - INFO:
2024-04-17 15:00:10,379 - main.py[line:109] - INFO: 399921
2024-04-17 15:00:10,379 - main.py[line:118] - INFO: 199960
2024-04-17 15:00:10,379 - main.py[line:119] - INFO: 199960
2024-04-17 15:00:10,379 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
2024-04-17 15:00:10,379 - main.py[line:51] - INFO: not base64data1.empty()
2024-04-17 15:00:10,381 - main.py[line:57] - INFO: not base64Data2.empty()
2024-04-17 15:00:10,382 - main.py[line:63] - INFO: file1.png and file2.png exist
2024-04-17 15:00:10,578 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 15:00:13,327 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 15:00:13,839 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.456987142562866秒
2024-04-17 15:00:13,840 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.460446834564209秒
2024-04-17 15:01:33,280 - main.py[line:91] - INFO: main.py len of parameter: 2
2024-04-17 15:01:33,280 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
2024-04-17 15:01:33,280 - main.py[line:100] - INFO: 1
2024-04-17 15:01:33,280 - main.py[line:103] - INFO: 开始执行sys.stdin.read
2024-04-17 15:01:33,282 - main.py[line:108] - INFO:
2024-04-17 15:01:33,283 - main.py[line:109] - INFO: 399921
2024-04-17 15:01:33,283 - main.py[line:118] - INFO: 199960
2024-04-17 15:01:33,283 - main.py[line:119] - INFO: 199960
2024-04-17 15:01:33,283 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
2024-04-17 15:01:33,283 - main.py[line:51] - INFO: not base64data1.empty()
2024-04-17 15:01:33,285 - main.py[line:57] - INFO: not base64Data2.empty()
2024-04-17 15:01:33,286 - main.py[line:63] - INFO: file1.png and file2.png exist
2024-04-17 15:01:33,452 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 15:01:35,814 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 15:01:36,303 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.0165488719940186秒
2024-04-17 15:01:36,303 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.0200371742248535秒
2024-04-17 15:06:02,257 - main.py[line:91] - INFO: main.py len of parameter: 2
2024-04-17 15:06:02,258 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
2024-04-17 15:06:02,258 - main.py[line:100] - INFO: 1
2024-04-17 15:06:02,258 - main.py[line:103] - INFO: 开始执行sys.stdin.read
2024-04-17 15:09:35,113 - main.py[line:126] - ERROR: KeyboardInterrupt
2024-04-17 15:10:50,601 - main.py[line:91] - INFO: main.py len of parameter: 2
2024-04-17 15:10:50,601 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
2024-04-17 15:10:50,602 - main.py[line:100] - INFO: 1
2024-04-17 15:10:50,602 - main.py[line:103] - INFO: 开始执行sys.stdin.read
2024-04-17 15:10:50,602 - main.py[line:108] - INFO:
2024-04-17 15:10:50,603 - main.py[line:109] - INFO: 316256
2024-04-17 15:10:50,603 - main.py[line:118] - INFO: 316256
2024-04-17 15:10:50,603 - main.py[line:119] - INFO: 0
2024-04-17 15:10:50,603 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
2024-04-17 15:10:50,603 - main.py[line:51] - INFO: not base64data1.empty()
2024-04-17 15:10:50,605 - main.py[line:68] - INFO: file1.png exist
2024-04-17 15:10:50,775 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 15:10:54,797 - main.py[line:41] - INFO: extractIdCardInfo 耗时4.191910982131958秒
2024-04-17 15:10:54,798 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时4.194473028182983秒
2024-04-17 15:15:35,913 - main.py[line:91] - INFO: main.py len of parameter: 2
2024-04-17 15:15:35,913 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
2024-04-17 15:15:35,913 - main.py[line:100] - INFO: 0
2024-04-17 15:15:35,913 - main.py[line:103] - INFO: 开始执行sys.stdin.read
2024-04-17 15:15:35,913 - main.py[line:108] - INFO:
2024-04-17 15:15:35,914 - main.py[line:109] - INFO: 56
2024-04-17 15:15:35,914 - main.py[line:118] - INFO: 55
2024-04-17 15:15:35,914 - main.py[line:119] - INFO: 0
2024-04-17 15:15:36,072 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 15:15:38,824 - main.py[line:41] - INFO: extractIdCardInfo 耗时2.9099318981170654秒
2024-04-17 15:41:57,020 - main.py[line:91] - INFO: main.py len of parameter: 2
2024-04-17 15:41:57,021 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
2024-04-17 15:41:57,021 - main.py[line:100] - INFO: 0
2024-04-17 15:41:57,021 - main.py[line:103] - INFO: 开始执行sys.stdin.read
2024-04-17 15:41:57,021 - main.py[line:108] - INFO:
2024-04-17 15:41:57,021 - main.py[line:109] - INFO: 56
2024-04-17 15:41:57,021 - main.py[line:118] - INFO: 55
2024-04-17 15:41:57,021 - main.py[line:119] - INFO: 0
2024-04-17 15:41:57,179 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 15:42:00,351 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.3298799991607666秒
2024-04-17 17:35:41,366 - main.py[line:91] - INFO: main.py len of parameter: 2
2024-04-17 17:35:41,366 - main.py[line:99] - INFO: python 脚本里的接收到的参数是:
2024-04-17 17:35:41,366 - main.py[line:100] - INFO: 0
2024-04-17 17:35:41,366 - main.py[line:103] - INFO: 开始执行sys.stdin.read
2024-04-17 17:35:41,366 - main.py[line:108] - INFO:
2024-04-17 17:35:41,366 - main.py[line:109] - INFO: 56
2024-04-17 17:35:41,366 - main.py[line:118] - INFO: 55
2024-04-17 17:35:41,366 - main.py[line:119] - INFO: 0
2024-04-17 17:35:41,810 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-17 17:35:44,799 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.4327380657196045秒
2024-04-18 14:28:41,233 - main.py[line:47] - INFO: extractIdCardInfoByBase64Data
2024-04-18 14:28:41,233 - main.py[line:51] - INFO: not base64data1.empty()
2024-04-18 14:28:41,235 - main.py[line:68] - INFO: file1.png exist
2024-04-18 14:28:41,607 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-18 14:28:44,850 - main.py[line:41] - INFO: extractIdCardInfo 耗时3.614640951156616秒
2024-04-18 14:28:44,850 - main.py[line:80] - INFO: extractIdCardInfoByBase64Data 耗时3.6167337894439697秒
2024-04-18 17:20:03,651 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
2024-04-18 17:20:03,651 - main.py[line:53] - INFO: not base64data1.empty()
2024-04-18 17:20:03,653 - main.py[line:70] - INFO: file1.png exist
2024-04-18 17:20:04,099 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-18 17:20:09,473 - main.py[line:42] - INFO: extractIdCardInfo 耗时5.820295810699463秒
2024-04-18 17:20:09,473 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时5.822228193283081秒
2024-04-19 08:58:12,184 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
2024-04-19 08:58:12,185 - main.py[line:53] - INFO: not base64data1.empty()
2024-04-19 08:58:12,186 - main.py[line:70] - INFO: file1.png exist
2024-04-19 08:58:12,373 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-19 08:58:18,685 - main.py[line:42] - INFO: extractIdCardInfo 耗时6.498270034790039秒
2024-04-19 08:58:18,685 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时6.499899864196777秒
2024-04-19 09:00:57,608 - main.py[line:48] - INFO: extractIdCardInfoByBase64Data
2024-04-19 09:00:57,609 - main.py[line:53] - INFO: not base64data1.empty()
2024-04-19 09:00:57,609 - main.py[line:70] - INFO: file1.png exist
2024-04-19 09:00:57,787 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-19 09:01:00,685 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.075079917907715秒
2024-04-19 09:01:00,685 - main.py[line:82] - INFO: extractIdCardInfoByBase64Data 耗时3.076277017593384秒
2024-04-19 11:35:24,397 - main.py[line:38] - ERROR: unstructured package not found, please install it with `pip install unstructured`
2024-04-19 11:35:24,398 - main.py[line:42] - INFO: extractIdCardInfo 耗时0.0006740093231201172秒
2024-04-19 11:35:56,826 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-19 11:36:04,713 - main.py[line:42] - INFO: extractIdCardInfo 耗时8.266455888748169秒
2024-04-19 11:36:11,025 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-19 11:36:25,929 - main.py[line:42] - INFO: extractIdCardInfo 耗时14.984557151794434秒
2024-04-19 11:36:38,161 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-19 11:36:44,925 - main.py[line:42] - INFO: extractIdCardInfo 耗时6.842967987060547秒
2024-04-21 11:10:25,753 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 11:11:01,024 - main.py[line:42] - INFO: extractIdCardInfo 耗时35.6481351852417秒
2024-04-21 11:12:12,110 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 11:12:14,951 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.035506248474121秒
2024-04-21 12:18:44,506 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 12:18:47,380 - main.py[line:42] - INFO: extractIdCardInfo 耗时3.074634075164795秒
2024-04-21 15:12:48,957 - main.py[line:96] - INFO: main.py len of parameter: 2
2024-04-21 15:12:48,957 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
2024-04-21 15:12:48,957 - main.py[line:105] - INFO: 0
2024-04-21 15:12:48,957 - main.py[line:106] - INFO: 开始执行sys.stdin.read
2024-04-21 15:12:48,957 - main.py[line:108] - INFO:
2024-04-21 15:12:48,957 - main.py[line:109] - INFO: 56
2024-04-21 15:12:48,957 - main.py[line:118] - INFO: 55
2024-04-21 15:12:48,958 - main.py[line:119] - INFO: 0
2024-04-21 15:12:48,958 - main.py[line:38] - ERROR: unstructured package not found, please install it with `pip install unstructured`
2024-04-21 15:12:48,958 - main.py[line:42] - INFO: extractIdCardInfo 耗时8.0108642578125e-05秒
2024-04-21 15:14:23,381 - main.py[line:96] - INFO: main.py len of parameter: 2
2024-04-21 15:14:23,381 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
2024-04-21 15:14:23,381 - main.py[line:105] - INFO: 0
2024-04-21 15:14:23,381 - main.py[line:106] - INFO: 开始执行sys.stdin.read
2024-04-21 15:14:23,382 - main.py[line:108] - INFO:
2024-04-21 15:14:23,382 - main.py[line:109] - INFO: 56
2024-04-21 15:14:23,382 - main.py[line:118] - INFO: 55
2024-04-21 15:14:23,382 - main.py[line:119] - INFO: 0
2024-04-21 15:14:23,653 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 15:14:55,795 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
2024-04-21 15:14:55,796 - main.py[line:42] - INFO: extractIdCardInfo 耗时32.41406607627869秒
2024-04-21 15:18:43,877 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 15:18:48,536 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
2024-04-21 15:18:48,536 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.845187187194824秒
2024-04-21 15:20:34,004 - main.py[line:96] - INFO: main.py len of parameter: 2
2024-04-21 15:20:34,005 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
2024-04-21 15:20:34,005 - main.py[line:105] - INFO: 0
2024-04-21 15:20:34,005 - main.py[line:106] - INFO: 开始执行sys.stdin.read
2024-04-21 15:20:34,005 - main.py[line:108] - INFO:
2024-04-21 15:20:34,005 - main.py[line:109] - INFO: 56
2024-04-21 15:20:34,005 - main.py[line:118] - INFO: 55
2024-04-21 15:20:34,005 - main.py[line:119] - INFO: 0
2024-04-21 15:20:34,176 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 15:20:36,793 - main.py[line:38] - ERROR: 'NoneType' object has no attribute 'strip'
2024-04-21 15:20:36,793 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.7879230976104736秒
2024-04-21 15:23:26,673 - main.py[line:96] - INFO: main.py len of parameter: 2
2024-04-21 15:23:26,673 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
2024-04-21 15:23:26,673 - main.py[line:105] - INFO: 0
2024-04-21 15:23:26,673 - main.py[line:106] - INFO: 开始执行sys.stdin.read
2024-04-21 15:23:26,673 - main.py[line:108] - INFO:
2024-04-21 15:23:26,674 - main.py[line:109] - INFO: 56
2024-04-21 15:23:26,674 - main.py[line:118] - INFO: 55
2024-04-21 15:23:26,674 - main.py[line:119] - INFO: 0
2024-04-21 15:23:26,856 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 15:23:36,818 - main.py[line:42] - INFO: extractIdCardInfo 耗时10.144625186920166秒
2024-04-21 15:25:57,305 - main.py[line:96] - INFO: main.py len of parameter: 2
2024-04-21 15:25:57,306 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
2024-04-21 15:25:57,306 - main.py[line:105] - INFO: 0
2024-04-21 15:25:57,306 - main.py[line:106] - INFO: 开始执行sys.stdin.read
2024-04-21 15:25:57,306 - main.py[line:108] - INFO:
2024-04-21 15:25:57,306 - main.py[line:109] - INFO: 56
2024-04-21 15:25:57,306 - main.py[line:118] - INFO: 55
2024-04-21 15:25:57,306 - main.py[line:119] - INFO: 0
2024-04-21 15:25:57,483 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 15:26:00,173 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.866658926010132秒
2024-04-22 11:01:10,204 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-22 11:01:14,447 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.689878702163696秒
2024-04-22 11:01:54,639 - main.py[line:96] - INFO: main.py len of parameter: 2
2024-04-22 11:01:54,640 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
2024-04-22 11:01:54,640 - main.py[line:105] - INFO: 0
2024-04-22 11:01:54,640 - main.py[line:106] - INFO: 开始执行sys.stdin.read
2024-04-22 11:01:54,640 - main.py[line:108] - INFO:
2024-04-22 11:01:54,640 - main.py[line:109] - INFO: 55
2024-04-22 11:01:54,640 - main.py[line:118] - INFO: 55
2024-04-22 11:01:54,640 - main.py[line:119] - INFO: 0
2024-04-22 11:01:54,819 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-22 11:01:57,426 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.785860061645508秒
2024-04-22 15:28:23,501 - maincopy.py[line:81] - ERROR: 'NoneType' object has no attribute 'strip'
2024-04-22 16:18:56,272 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
2024-04-22 16:21:44,808 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
2024-04-22 16:28:20,400 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
2024-04-22 16:28:20,400 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment
2024-04-22 16:29:46,501 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
2024-04-22 16:29:46,502 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment
2024-04-22 16:32:09,871 - maincopy.py[line:38] - ERROR: 'NoneType' object has no attribute 'shape'
2024-04-22 16:32:09,872 - maincopy.py[line:83] - ERROR: local variable 'jsonString' referenced before assignment

View File

@ -7,7 +7,6 @@ from configs.basic_config import logger
import base64 import base64
import time import time
import os import os
import json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
@ -34,8 +33,7 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
context += "\n".join([doc.page_content for doc in docs]) context += "\n".join([doc.page_content for doc in docs])
extractor = IdentityCardExtractor() extractor = IdentityCardExtractor()
tempdict = extractor.extract_text(context) jsonString = extractor.extract_text(context)
json_string = json.dumps(tempdict, ensure_ascii=False)
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)

View File

@ -1,15 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys import sys
import io
import os import os
from paddleocr import PaddleOCR from paddleocr import PaddleOCR, draw_ocr
import time import time
from configs.basic_config import logger from configs.basic_config import logger
from extractor.identitycard_extractor import IdentityCardExtractor from extractor.identitycard_extractor import IdentityCardExtractor
import base64 import base64
import json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str: def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str:
if (0 == type): if (0 == type):
return extractIdCardInfoByPath(filePath1, filePath2) return extractIdCardInfoByPath(filePath1, filePath2)
@ -22,7 +19,6 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
text = "" text = ""
start_time = time.time() # 记录结束时间 start_time = time.time() # 记录结束时间
jsonString = ""
try: try:
if os.path.exists(filePath1): if os.path.exists(filePath1):
result = ocr.ocr(filePath1, cls=False) result = ocr.ocr(filePath1, cls=False)
@ -44,8 +40,7 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
if 0 != len(text): if 0 != len(text):
logger.info(f"text:{text}") logger.info(f"text:{text}")
extractor = IdentityCardExtractor() extractor = IdentityCardExtractor()
tempdict = extractor.extract_textbyPaddle(text) jsonString = extractor.extract_textbyPaddle(text)
jsonString = json.dumps(tempdict, ensure_ascii=False)
end_time = time.time() # 记录结束时间 end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间 execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}") logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
@ -67,7 +62,7 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
image_data2 = base64.b64decode(base64Data2) image_data2 = base64.b64decode(base64Data2)
with open("file2.png", "wb") as file: with open("file2.png", "wb") as file:
file.write(image_data2) file.write(image_data2)
if os.path.exists("file1.png") and os.path.exists("file2.png"): if os.path.exists("file1.png") and os.path.exists("file2.png"):
logger.info(f"file1.png and file2.png exist") logger.info(f"file1.png and file2.png exist")
jsonString = extractIdCardInfoByPath("file1.png","file2.png") jsonString = extractIdCardInfoByPath("file1.png","file2.png")
@ -95,12 +90,11 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"") # jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") # jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString) # print(jsonString)
# # #
# jsonString = extractIdCardInfoByPath("./images/han.jpg","") #jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG","")
# logger.info(f"test")
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") # jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString) # print(jsonString)
#
if __name__ == "__main__": if __name__ == "__main__":
try: try:
logger.info(f"main.py len of parameter: {len(sys.argv)}") logger.info(f"main.py len of parameter: {len(sys.argv)}")

BIN
OCRTool/.DS_Store vendored

Binary file not shown.

View File

@ -12,29 +12,7 @@
<option name="autoReloadType" value="SELECTIVE" /> <option name="autoReloadType" value="SELECTIVE" />
</component> </component>
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="de15048c-2fb1-4f65-af80-34b29e3a0154" name="Changes" comment="first commit"> <list default="true" id="de15048c-2fb1-4f65-af80-34b29e3a0154" name="Changes" comment="first commit" />
<change beforePath="$PROJECT_DIR$/../OCRPython/extractor/identitycard_extractor.py" beforeDir="false" afterPath="$PROJECT_DIR$/../OCRPython/extractor/identitycard_extractor.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../OCRPython/logs/ocr_reconginition.log" beforeDir="false" afterPath="$PROJECT_DIR$/../OCRPython/logs/ocr_reconginition.log" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../OCRPython/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/../OCRPython/main.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../OCRPython/maincopy.py" beforeDir="false" afterPath="$PROJECT_DIR$/../OCRPython/maincopy.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/OCRTool-1.0-SNAPSHOT.jar" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/out/artifacts/OCRTool_jar/OCRTool.jar" beforeDir="false" afterPath="$PROJECT_DIR$/out/artifacts/OCRTool_jar/OCRTool.jar" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pom.xml" beforeDir="false" afterPath="$PROJECT_DIR$/pom.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/main/java/com/bonus/core/MySpringBootApplication.java" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/java/com/bonus/core/MySpringBootApplication.java" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/main/java/com/bonus/core/RecognitionController.java" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/java/com/bonus/core/RecognitionController.java" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/main/resources/application.yml" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/resources/application.yml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/main/resources/libtesseract.a" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/src/main/resources/libtesseract.dylib" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/target/OCRTool-1.0-SNAPSHOT.jar" beforeDir="false" afterPath="$PROJECT_DIR$/target/OCRTool-1.0-SNAPSHOT.jar" afterDir="false" />
<change beforePath="$PROJECT_DIR$/target/classes/application.yml" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/application.yml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/target/classes/com/bonus/core/MySpringBootApplication.class" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/com/bonus/core/MySpringBootApplication.class" afterDir="false" />
<change beforePath="$PROJECT_DIR$/target/classes/com/bonus/core/OCRRecognition.class" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/com/bonus/core/OCRRecognition.class" afterDir="false" />
<change beforePath="$PROJECT_DIR$/target/classes/com/bonus/core/RecognitionController.class" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/com/bonus/core/RecognitionController.class" afterDir="false" />
<change beforePath="$PROJECT_DIR$/target/classes/libtesseract.a" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/target/classes/libtesseract.dylib" beforeDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
@ -91,7 +69,7 @@
"node.js.selected.package.eslint": "(autodetect)", "node.js.selected.package.eslint": "(autodetect)",
"node.js.selected.package.tslint": "(autodetect)", "node.js.selected.package.tslint": "(autodetect)",
"nodejs_package_manager_path": "npm", "nodejs_package_manager_path": "npm",
"project.structure.last.edited": "Problems", "project.structure.last.edited": "Project",
"project.structure.proportion": "0.15", "project.structure.proportion": "0.15",
"project.structure.side.proportion": "0.1150748", "project.structure.side.proportion": "0.1150748",
"settings.editor.selected.configurable": "preferences.pluginManager", "settings.editor.selected.configurable": "preferences.pluginManager",
@ -106,7 +84,7 @@
<recent name="com.bonus.core" /> <recent name="com.bonus.core" />
</key> </key>
</component> </component>
<component name="RunManager" selected="Application.OCRTool"> <component name="RunManager">
<configuration name="OCRTool" type="Application" factoryName="Application"> <configuration name="OCRTool" type="Application" factoryName="Application">
<option name="MAIN_CLASS_NAME" value="com.bonus.core.MySpringBootApplication" /> <option name="MAIN_CLASS_NAME" value="com.bonus.core.MySpringBootApplication" />
<module name="OCRTool" /> <module name="OCRTool" />
@ -115,13 +93,6 @@
<option name="Make" enabled="true" /> <option name="Make" enabled="true" />
</method> </method>
</configuration> </configuration>
<configuration name="MySpringBootApplication" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot" nameIsGenerated="true">
<module name="OCRTool" />
<option name="SPRING_BOOT_MAIN_CLASS" value="com.bonus.core.MySpringBootApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component> </component>
<component name="SharedIndexes"> <component name="SharedIndexes">
<attachedChunks> <attachedChunks>
@ -147,7 +118,7 @@
<workItem from="1713668202906" duration="5970000" /> <workItem from="1713668202906" duration="5970000" />
<workItem from="1713747776583" duration="310000" /> <workItem from="1713747776583" duration="310000" />
<workItem from="1713748089331" duration="843000" /> <workItem from="1713748089331" duration="843000" />
<workItem from="1713756484781" duration="35613000" /> <workItem from="1713756484781" duration="10479000" />
</task> </task>
<task id="LOCAL-00001" summary="first commit"> <task id="LOCAL-00001" summary="first commit">
<option name="closed" value="true" /> <option name="closed" value="true" />
@ -184,19 +155,7 @@
<map> <map>
<entry key="MAIN"> <entry key="MAIN">
<value> <value>
<State> <State />
<option name="FILTERS">
<map>
<entry key="branch">
<value>
<list>
<option value="main" />
</list>
</value>
</entry>
</map>
</option>
</State>
</value> </value>
</entry> </entry>
</map> </map>
@ -209,14 +168,19 @@
<component name="XDebuggerManager"> <component name="XDebuggerManager">
<breakpoint-manager> <breakpoint-manager>
<breakpoints> <breakpoints>
<line-breakpoint enabled="true" type="java-line">
<url>file://$PROJECT_DIR$/src/main/java/com/bonus/core/RecognitionController.java</url>
<line>9</line>
<option name="timeStamp" value="2" />
</line-breakpoint>
<line-breakpoint enabled="true" type="java-line"> <line-breakpoint enabled="true" type="java-line">
<url>file://$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java</url> <url>file://$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java</url>
<line>29</line> <line>28</line>
<option name="timeStamp" value="20" /> <option name="timeStamp" value="20" />
</line-breakpoint> </line-breakpoint>
<line-breakpoint enabled="true" type="java-line"> <line-breakpoint enabled="true" type="java-line">
<url>file://$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java</url> <url>file://$PROJECT_DIR$/src/main/java/com/bonus/core/OCRRecognition.java</url>
<line>90</line> <line>79</line>
<option name="timeStamp" value="21" /> <option name="timeStamp" value="21" />
</line-breakpoint> </line-breakpoint>
</breakpoints> </breakpoints>

Binary file not shown.

View File

@ -23,32 +23,184 @@
<maven-jar-plugin.version>3.1.1</maven-jar-plugin.version> <maven-jar-plugin.version>3.1.1</maven-jar-plugin.version>
</properties> </properties>
<!-- <profiles>-->
<!-- <profile>-->
<!-- <id>dev</id>-->
<!-- <activation>-->
<!-- <activeByDefault>false</activeByDefault>-->
<!-- </activation>-->
<!-- <properties>-->
<!-- &lt;!&ndash; 在这里定义 dev 环境的配置 &ndash;&gt;-->
<!-- </properties>-->
<!-- </profile>-->
<!--&lt;!&ndash; <profile>&ndash;&gt;-->
<!--&lt;!&ndash; <id>prod</id>&ndash;&gt;-->
<!--&lt;!&ndash; <properties>&ndash;&gt;-->
<!--&lt;!&ndash; &lt;!&ndash; 在这里定义 prod 环境的配置 &ndash;&gt;&ndash;&gt;-->
<!--&lt;!&ndash; </properties>&ndash;&gt;-->
<!--&lt;!&ndash; </profile>&ndash;&gt;-->
<!-- </profiles>-->
<dependencies> <dependencies>
<!-- SpringBoot 核心包 --> <!-- SpringBoot 核心包 -->
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId> <artifactId>spring-boot-starter</artifactId>
</dependency> </dependency>
<!-- &lt;!&ndash; SpringBoot 测试 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.boot</groupId>-->
<!-- <artifactId>spring-boot-starter-test</artifactId>-->
<!-- <scope>test</scope>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; SpringBoot 拦截器 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.boot</groupId>-->
<!-- <artifactId>spring-boot-starter-aop</artifactId>-->
<!-- </dependency>-->
<!-- SpringBoot Web容器 --> <!-- SpringBoot Web容器 -->
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId> <artifactId>spring-boot-starter-web</artifactId>
</dependency> </dependency>
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>4.5.1</version> <!-- 根据需要替换为最新版本 -->
</dependency>
<dependency>
<groupId>net.java.dev.jna</groupId>
<artifactId>jna</artifactId>
<version>5.9.0</version> <!-- 根据需要替换为最新版本 -->
</dependency>
<!-- &lt;!&ndash; spring-boot-devtools &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.boot</groupId>-->
<!-- <artifactId>spring-boot-devtools</artifactId>-->
<!-- <optional>true</optional> &lt;!&ndash; 表示依赖不会传递 &ndash;&gt;-->
<!-- </dependency>-->
<!-- &lt;!&ndash; spring security 安全认证 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.boot</groupId>-->
<!-- <artifactId>spring-boot-starter-security</artifactId>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; redis 缓存操作 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.boot</groupId>-->
<!-- <artifactId>spring-boot-starter-data-redis</artifactId>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; pool 对象池 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.apache.commons</groupId>-->
<!-- <artifactId>commons-pool2</artifactId>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; Mysql驱动包 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>mysql</groupId>-->
<!-- <artifactId>mysql-connector-java</artifactId>-->
<!-- <scope>runtime</scope>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; pagehelper 分页插件 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>com.github.pagehelper</groupId>-->
<!-- <artifactId>pagehelper-spring-boot-starter</artifactId>-->
<!-- <version>${pagehelper.spring.boot.starter.version}</version>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; 阿里数据库连接池 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba</groupId>-->
<!-- <artifactId>druid-spring-boot-starter</artifactId>-->
<!-- <version>${druid.version}</version>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; 自定义验证注解 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.boot</groupId>-->
<!-- <artifactId>spring-boot-starter-validation</artifactId>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; 常用工具类 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.apache.commons</groupId>-->
<!-- <artifactId>commons-lang3</artifactId>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; io常用工具类 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>commons-io</groupId>-->
<!-- <artifactId>commons-io</artifactId>-->
<!-- <version>${commons.io.version}</version>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; 解析客户端操作系统、浏览器等 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>eu.bitwalker</groupId>-->
<!-- <artifactId>UserAgentUtils</artifactId>-->
<!-- <version>${bitwalker.version}</version>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; 阿里JSON解析器 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.fastjson2</groupId>-->
<!-- <artifactId>fastjson2</artifactId>-->
<!-- <version>${fastjson.version}</version>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; Spring框架基本的核心工具&ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.springframework</groupId>-->
<!-- <artifactId>spring-context-support</artifactId>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; Token生成与解析&ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>io.jsonwebtoken</groupId>-->
<!-- <artifactId>jjwt</artifactId>-->
<!-- <version>${jwt.version}</version>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; Jaxb &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>javax.xml.bind</groupId>-->
<!-- <artifactId>jaxb-api</artifactId>-->
<!-- </dependency>-->
<!-- &lt;!&ndash; Swagger3依赖 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>io.springfox</groupId>-->
<!-- <artifactId>springfox-boot-starter</artifactId>-->
<!-- <version>${swagger.version}</version>-->
<!-- <exclusions>-->
<!-- <exclusion>-->
<!-- <groupId>io.swagger</groupId>-->
<!-- <artifactId>swagger-models</artifactId>-->
<!-- </exclusion>-->
<!-- </exclusions>-->
<!-- </dependency>-->
</dependencies> </dependencies>
<build> <!-- <build>-->
<plugins> <!-- <plugins>-->
<plugin> <!-- <plugin>-->
<groupId>org.springframework.boot</groupId> <!-- <groupId>org.springframework.boot</groupId>-->
<artifactId>spring-boot-maven-plugin</artifactId> <!-- <artifactId>spring-boot-maven-plugin</artifactId>-->
<executions> <!-- <configuration>-->
<execution> <!-- <profiles>-->
<goals> <!-- <profile>default</profile>-->
<goal>repackage</goal> <!-- </profiles>-->
</goals> <!-- </configuration>-->
</execution> <!-- </plugin>-->
</executions> <!-- </plugins>-->
</plugin> <!-- </build>-->
</plugins>
</build>
</project> </project>

BIN
OCRTool/src/.DS_Store vendored

Binary file not shown.

Binary file not shown.

View File

@ -16,11 +16,11 @@ public class MySpringBootApplication {
context = SpringApplication.run(MySpringBootApplication.class, args); context = SpringApplication.run(MySpringBootApplication.class, args);
MySpringBootApplication app = new MySpringBootApplication(); MySpringBootApplication app = new MySpringBootApplication();
//String filePath1 = "E:\\OCRTool\\OCRPython\\images\\id_card.JPG"; //String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG";
// String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg"; String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg";
// String filePath2 = "/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg"; String filePath2 = "/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg";
//app.callExtractIdentityInfoByLocalPath(filePath1, ""); app.callExtractIdentityInfoByLocalPath(filePath1, filePath2);
app.callExtractIdentityInfoByBase64Data(filePath1, filePath2); //app.callExtractIdentityInfoByBase64Data(filePath1, filePath2);
} }
private static ApplicationContext context; private static ApplicationContext context;

View File

@ -4,9 +4,9 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -22,10 +22,9 @@ public class OCRRecognition {
public String extractInfo(IdentifyRecognitionParams recognitionParams){ public String extractInfo(IdentifyRecognitionParams recognitionParams){
Process proc; Process proc;
List<String> stringList = new ArrayList<>(); List<String> stringList = new ArrayList<>();
String lastElement = ""; String lastElement = null;
int offset = 0; int offset = 0;
// 设置每次写入的批次大小 int batchSize = 8092; // 设置每次写入的批次大小
int batchSize = 8092;
try { try {
System.out.println("开始打印从配置里读取的值:"); System.out.println("开始打印从配置里读取的值:");
System.out.println(this.configure.getPythonEnv()); System.out.println(this.configure.getPythonEnv());
@ -36,8 +35,7 @@ public class OCRRecognition {
System.out.println(recognitionParams.getRecognitionData1()); System.out.println(recognitionParams.getRecognitionData1());
System.out.println("参数2的base64 string 是:"); System.out.println("参数2的base64 string 是:");
System.out.println(recognitionParams.getRecognitionData2()); System.out.println(recognitionParams.getRecognitionData2());
//*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/ String[] str = new String[]{this.configure.getPythonEnv(),this.configure.getScriptEnv(), String.valueOf(recognitionParams.getType().ordinal())/*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/};//
String[] str = new String[]{this.configure.getPythonEnv(),this.configure.getScriptEnv(), String.valueOf(recognitionParams.getType().ordinal())};
Runtime runtime = Runtime.getRuntime(); Runtime runtime = Runtime.getRuntime();
if (runtime == null) { if (runtime == null) {
System.out.println("runtime == null"); System.out.println("runtime == null");
@ -48,40 +46,31 @@ public class OCRRecognition {
OutputStream outputStream = proc.getOutputStream(); OutputStream outputStream = proc.getOutputStream();
if (!recognitionParams.getRecognitionData1().isEmpty()) { if (!recognitionParams.getRecognitionData1().isEmpty()) {
byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8); byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
passParameter(outputStream, input); long len = input.length;
while (offset < len) {
int remaining = (int) (len - offset);
int bytesToWrite = Math.min(remaining, batchSize);
outputStream.write(input, offset, bytesToWrite);
offset += bytesToWrite;
}
} }
if (!recognitionParams.getRecognitionData2().isEmpty()) { if (!recognitionParams.getRecognitionData2().isEmpty()) {
byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8); outputStream.write(System.lineSeparator().getBytes());
passParameter(outputStream, input); byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
long len = input.length;
while (offset < len) {
int remaining = (int) (len - offset);
int bytesToWrite = Math.min(remaining, batchSize);
outputStream.write(input, offset, bytesToWrite);
offset += bytesToWrite;
}
} }
// if (!recognitionParams.getRecognitionData1().isEmpty()) {
// byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
// long len = input.length;
//
// while (offset < len) {
// int remaining = (int) (len - offset);
// int bytesToWrite = Math.min(remaining, batchSize);
// outputStream.write(input, offset, bytesToWrite);
// offset += bytesToWrite;
// }
// }
// if (!recognitionParams.getRecognitionData2().isEmpty()) {
// outputStream.write(System.lineSeparator().getBytes());
// byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
// long len = input.length;
// while (offset < len) {
// int remaining = (int) (len - offset);
// int bytesToWrite = Math.min(remaining, batchSize);
// outputStream.write(input, offset, bytesToWrite);
// offset += bytesToWrite;
// }
//
outputStream.flush(); outputStream.flush();
outputStream.close(); outputStream.close();
BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream(), StandardCharsets.UTF_8)); BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream(), StandardCharsets.UTF_8));
String line = null; String line = null;
while ((line = in.readLine()) != null) { while ((line = in.readLine()) != null) {
stringList.add(line); stringList.add(line);
} }
@ -96,20 +85,6 @@ public class OCRRecognition {
return lastElement; return lastElement;
} }
private void passParameter(OutputStream outputStream, byte[] input) throws IOException {
int offset = 0;
// 设置每次写入的批次大小
int batchSize = 8092;
long len = input.length;
while (offset < len) {
int remaining = (int) (len - offset);
int bytesToWrite = Math.min(remaining, batchSize);
outputStream.write(input, offset, bytesToWrite);
offset += bytesToWrite;
}
}
} }

View File

@ -5,12 +5,20 @@ import org.springframework.web.bind.annotation.*;
@RestController @RestController
public class RecognitionController { public class RecognitionController {
// @GetMapping("/hello")
// public String hello() {
// return "Hello, World!";
// }
@Autowired OCRRecognition ocrRecognition; @Autowired OCRRecognition ocrRecognition;
@PostMapping("/recognition") @PostMapping("/recognition")
public String recognition(@RequestBody IdentifyRecognitionParams identifyRecognitionParams){ public String recognition(@RequestBody IdentifyRecognitionParams identifyRecognitionParams){
String javaString = "";
IdentifyRecognitionParams.RecognitionType type = identifyRecognitionParams.getType(); IdentifyRecognitionParams.RecognitionType type = identifyRecognitionParams.getType();
return ocrRecognition.extractInfo(identifyRecognitionParams); String idData1 = identifyRecognitionParams.getRecognitionData1();
String idData2 = identifyRecognitionParams.getRecognitionData2();
javaString = ocrRecognition.extractInfo(identifyRecognitionParams);
return javaString;
} }
} }

Binary file not shown.

View File

@ -1,3 +1,7 @@
spring:
profiles:
active=dev:
recognition: recognition:
# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python # pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python
# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py # scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py

View File

@ -1,3 +1,7 @@
spring:
profiles:
active=dev:
recognition: recognition:
# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python # pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python
# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py # scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py

Binary file not shown.

Binary file not shown.