Compare commits

..

No commits in common. "87e0743de91390c46f3b8ad26ec234ed97fe36a7" and "22db9aa88e2aac67b31a7ef2a453ee42ccb35465" have entirely different histories.

6 changed files with 58 additions and 67 deletions

View File

@ -201,15 +201,3 @@
2024-04-21 15:25:57,306 - main.py[line:119] - INFO: 0 2024-04-21 15:25:57,306 - main.py[line:119] - INFO: 0
2024-04-21 15:25:57,483 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR 2024-04-21 15:25:57,483 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-21 15:26:00,173 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.866658926010132秒 2024-04-21 15:26:00,173 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.866658926010132秒
2024-04-22 11:01:10,204 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-22 11:01:14,447 - main.py[line:42] - INFO: extractIdCardInfo 耗时4.689878702163696秒
2024-04-22 11:01:54,639 - main.py[line:96] - INFO: main.py len of parameter: 2
2024-04-22 11:01:54,640 - main.py[line:104] - INFO: python 脚本里的接收到的参数是:
2024-04-22 11:01:54,640 - main.py[line:105] - INFO: 0
2024-04-22 11:01:54,640 - main.py[line:106] - INFO: 开始执行sys.stdin.read
2024-04-22 11:01:54,640 - main.py[line:108] - INFO:
2024-04-22 11:01:54,640 - main.py[line:109] - INFO: 55
2024-04-22 11:01:54,640 - main.py[line:118] - INFO: 55
2024-04-22 11:01:54,640 - main.py[line:119] - INFO: 0
2024-04-22 11:01:54,819 - ocr.py[line:19] - INFO: from rapidocr_onnxruntime import RapidOCR
2024-04-22 11:01:57,426 - main.py[line:42] - INFO: extractIdCardInfo 耗时2.785860061645508秒

View File

@ -95,12 +95,12 @@ if __name__ == "__main__":
try: try:
logger.info(f"main.py len of parameter: {len(sys.argv)}") logger.info(f"main.py len of parameter: {len(sys.argv)}")
jsonString = "" jsonString = ""
if len(sys.argv) > 3: if len(sys.argv) > 3:
logger.info(f"{sys.argv[1]}") logger.info(f"{sys.argv[1]}")
logger.info(f"{sys.argv[2]}") logger.info(f"{sys.argv[2]}")
logger.info(f"{sys.argv[3]}") logger.info(f"{sys.argv[3]}")
jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3]) jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3])
elif len(sys.argv) > 1: elif len(sys.argv) > 1:
logger.info(f"python 脚本里的接收到的参数是:") logger.info(f"python 脚本里的接收到的参数是:")
logger.info(f"{sys.argv[1]}") logger.info(f"{sys.argv[1]}")
logger.info(f"开始执行sys.stdin.read") logger.info(f"开始执行sys.stdin.read")
@ -117,7 +117,7 @@ if __name__ == "__main__":
data1 = split_data[0] data1 = split_data[0]
logger.info(f"{len(data1)}") logger.info(f"{len(data1)}")
logger.info(f"{len(data2)}") logger.info(f"{len(data2)}")
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2) jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
print(jsonString) print(jsonString)

View File

@ -37,7 +37,6 @@ def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
logger.info(f"text:{text}")
extractor = IdentityCardExtractor() extractor = IdentityCardExtractor()
jsonString = extractor.extract_textbyPaddle(text) jsonString = extractor.extract_textbyPaddle(text)
end_time = time.time() # 记录结束时间 end_time = time.time() # 记录结束时间
@ -87,59 +86,59 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file: # with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file:
# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8') # base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
#
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"") # jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
# print(jsonString) # print(jsonString)
#
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG","") jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG","")
# print(jsonString) print(jsonString)
if __name__ == "__main__": # if __name__ == "__main__":
try: # try:
logger.info(f"main.py len of parameter: {len(sys.argv)}") # logger.info(f"main.py len of parameter: {len(sys.argv)}")
jsonString = "" # jsonString = ""
if len(sys.argv) > 3: # if len(sys.argv) > 3:
logger.info(f"{sys.argv[1]}") # logger.info(f"{sys.argv[1]}")
logger.info(f"{sys.argv[2]}") # logger.info(f"{sys.argv[2]}")
logger.info(f"{sys.argv[3]}") # logger.info(f"{sys.argv[3]}")
jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3]) # jsonString = extractIdCardInfo(int(sys.argv[1]), sys.argv[2], sys.argv[3])
elif len(sys.argv) > 1: # elif len(sys.argv) > 1:
logger.info(f"python 脚本里的接收到的参数是:") # logger.info(f"python 脚本里的接收到的参数是:")
logger.info(f"{sys.argv[1]}") # logger.info(f"{sys.argv[1]}")
logger.info(f"开始执行sys.stdin.read") # logger.info(f"开始执行sys.stdin.read")
input_data = sys.stdin.read() # input_data = sys.stdin.read()
logger.info("") # logger.info("")
logger.info(f"len(input_data):{len(input_data)}") # logger.info(f"{len(input_data)}")
split_data = input_data.split(os.linesep) # split_data = input_data.split(os.linesep)
data1 = "" # data1 = ""
data2 = "" # data2 = ""
if 2 == len(split_data): # if 2 == len(split_data):
data1 = split_data[0] # data1 = split_data[0]
data2 = split_data[1] # data2 = split_data[1]
elif 1 == len(split_data): # elif 1 == len(split_data):
data1 = split_data[0] # data1 = split_data[0]
logger.info(f"{data1}") # logger.info(f"{len(data1)}")
logger.info(f"{data2}") # logger.info(f"{len(data2)}")
# jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2)
# print(jsonString)
# except KeyboardInterrupt:
# logger.error("KeyboardInterrupt")
jsonString = extractIdCardInfo(int(sys.argv[1]), data1, data2) # except IndexError:
print(jsonString) # # 如果参数过长,捕获 IndexError 异常并进行处理
except KeyboardInterrupt: # logger.error("参数过长,未指定足够的参数")
logger.error("KeyboardInterrupt")
except IndexError: # except OverflowError:
# 如果参数过长,捕获 IndexError 异常并进行处理 # # 如果发生了 OverflowError 异常,捕获并进行相应的处理
logger.error("参数过长,未指定足够的参数") # logger.error("命令行参数过长,导致溢出错误")
except OverflowError: # except EOFError:
# 如果发生了 OverflowError 异常,捕获并进行相应的处理 # # 如果达到输入流的末尾,捕获 EOFError 异常并进行处理
logger.error("命令行参数过长,导致溢出错误") # logger.error("已经到达输入流的末尾")
except EOFError: # except Exception as e:
# 如果达到输入流的末尾,捕获 EOFError 异常并进行处理 # # 如果发生了其他异常,捕获并进行相应的处理
logger.error("已经到达输入流的末尾") # logger.error("发生了异常:", e)
except Exception as e:
# 如果发生了其他异常,捕获并进行相应的处理
logger.error("发生了异常:", e)

3
OCRTool/.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

View File

@ -81,6 +81,7 @@ public class MySpringBootApplication {
tesseract.setLanguage("chi_sim"); tesseract.setLanguage("chi_sim");
// 从图像中提取文本 // 从图像中提取文本
String text = tesseract.doOCR(new File("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG")); String text = tesseract.doOCR(new File("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG"));
// 打印识别的文本
System.out.println("识别的文本:" + text); System.out.println("识别的文本:" + text);
return text; return text;
} catch (TesseractException e) { } catch (TesseractException e) {

View File

@ -3,8 +3,8 @@ spring:
active=dev: active=dev:
recognition: recognition:
# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python
# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCR/main.py
pythonEnv: /Users/wangvivi/miniconda3/envs/ocr/bin/python #pythonEnv: /Users/wangvivi/miniconda3/envs/ocr/bin/python
scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/maincopy.py #scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCR/maincopy.py