From 80530f8b77da0b33b47a263096851183cc82743e Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Thu, 25 Apr 2024 16:50:03 +0800 Subject: [PATCH] first commit --- OCRPython/validate.py | 103 ++++++++++++++++++ OCRTool/pom.xml | 9 ++ .../bonus/core/IdentifyRecognitionParams.java | 4 +- .../bonus/core/MySpringBootApplication.java | 15 ++- .../java/com/bonus/core/OCRRecognition.java | 75 +++++-------- .../com/bonus/core/RecognitionController.java | 1 - OCRTool/src/main/resources/application.yml | 3 - OCRTool/src/main/resources/logback.xml | 92 ++++++++++++++++ 8 files changed, 244 insertions(+), 58 deletions(-) create mode 100644 OCRPython/validate.py create mode 100644 OCRTool/src/main/resources/logback.xml diff --git a/OCRPython/validate.py b/OCRPython/validate.py new file mode 100644 index 0000000..892c7e6 --- /dev/null +++ b/OCRPython/validate.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +import sys +import io +import os +from paddleocr import PaddleOCR +import time +from configs.basic_config import logger +from extractor.identitycard_extractor import IdentityCardExtractor +import base64 +import json + +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') +def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str: + if (0 == type): + return extractIdCardInfoByPath(filePath1, filePath2) + elif (1 == type): + return extractIdCardInfoByBase64Data(filePath1,filePath2) + else: + pass + +def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str: + ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory + text = "" + start_time = time.time() # 记录结束时间 + jsonString = "" + try: + if os.path.exists(filePath1): + result = ocr.ocr(filePath1, cls=False) + for idx in range(len(result)): + res = result[idx] + for line in res: + text += (line[1][0] + '\n') + + if os.path.exists(filePath2): + result = ocr.ocr(filePath2, cls=False) + for idx in range(len(result)): + res = result[idx] + for line in res: + text += (line[1][0] + '\n') + except Exception as e: + logger.error(e) + print(e) + + if 0 != len(text): + logger.info(f"text:{text}") + extractor = IdentityCardExtractor() + tempdict = extractor.extract_textbyPaddle(text) + jsonString = json.dumps(tempdict, ensure_ascii=False) + end_time = time.time() # 记录结束时间 + execution_time = end_time - start_time # 计算执行时间 + logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") + return jsonString + +def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str: + logger.info(f"extractIdCardInfoByBase64Data") + start_time = time.time() # 记录结束时间 + jsonString = "" + try: + if 0!=len(base64data1): + logger.info(f"not base64data1.empty()") + image_data1 = base64.b64decode(base64data1) + with open("file1.png", "wb") as file: + file.write(image_data1) + + if 0!=len(base64Data2): + logger.info(f"not base64Data2.empty()") + image_data2 = base64.b64decode(base64Data2) + with open("file2.png", "wb") as file: + file.write(image_data2) + + if os.path.exists("file1.png") and os.path.exists("file2.png"): + logger.info(f"file1.png and file2.png exist") + jsonString = extractIdCardInfoByPath("file1.png","file2.png") + os.remove("file1.png") + os.remove("file2.png") + elif os.path.exists("file1.png"): + logger.info(f"file1.png exist") + jsonString = extractIdCardInfoByPath("file1.png","") + os.remove("file1.png") + elif os.path.exists("file2.png"): + logger.info(f"file2.png exist") + jsonString = extractIdCardInfoByPath("file2.png","") + os.remove("file2.png") + except Exception as e: + logger.error(e) + + end_time = time.time() # 记录结束时间 + execution_time = end_time - start_time # 计算执行时间 + logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") + return jsonString + +# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file: +# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8') +# +# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"") +# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") +# print(jsonString) +# # +jsonString = extractIdCardInfoByPath("./images/han.jpg","") +print(jsonString) +# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg") +# print(jsonString) + diff --git a/OCRTool/pom.xml b/OCRTool/pom.xml index 163a355..9debe2c 100644 --- a/OCRTool/pom.xml +++ b/OCRTool/pom.xml @@ -51,4 +51,13 @@ + + + + maven-central + BonusMavenRepo + http://192.168.0.56:8081/repository/maven-public/ + + + \ No newline at end of file diff --git a/OCRTool/src/main/java/com/bonus/core/IdentifyRecognitionParams.java b/OCRTool/src/main/java/com/bonus/core/IdentifyRecognitionParams.java index 4ffad96..dc42b3a 100644 --- a/OCRTool/src/main/java/com/bonus/core/IdentifyRecognitionParams.java +++ b/OCRTool/src/main/java/com/bonus/core/IdentifyRecognitionParams.java @@ -21,8 +21,8 @@ public class IdentifyRecognitionParams { this.recognitionData2 = recognitionData2; } - public RecognitionType getType() { - return type; + public int getType() { + return type.ordinal(); } public void setType(RecognitionType type) { diff --git a/OCRTool/src/main/java/com/bonus/core/MySpringBootApplication.java b/OCRTool/src/main/java/com/bonus/core/MySpringBootApplication.java index a32e6b0..b231741 100644 --- a/OCRTool/src/main/java/com/bonus/core/MySpringBootApplication.java +++ b/OCRTool/src/main/java/com/bonus/core/MySpringBootApplication.java @@ -1,4 +1,6 @@ package com.bonus.core; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.context.ApplicationContext; @@ -12,14 +14,15 @@ import java.util.Base64; */ @SpringBootApplication public class MySpringBootApplication { + private static final Logger logger = LoggerFactory.getLogger(MySpringBootApplication.class); public static void main(String[] args) { context = SpringApplication.run(MySpringBootApplication.class, args); MySpringBootApplication app = new MySpringBootApplication(); + //String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG"; //String filePath1 = "E:\\OCRTool\\OCRPython\\images\\id_card.JPG"; -// String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg"; // String filePath2 = "/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg"; - //app.callExtractIdentityInfoByLocalPath(filePath1, ""); + //app.callExtractIdentityInfoByLocalPath(filePath1,""); // app.callExtractIdentityInfoByBase64Data(filePath1, filePath2); } private static ApplicationContext context; @@ -31,8 +34,8 @@ public class MySpringBootApplication { params.setRecognitionData1(filePath1); params.setRecognitionData2(filePath2); String javaString = myComponent.extractInfo(params); - System.out.println("身份证信息是:"); - System.out.println(javaString); + logger.info("身份证信息是:"); + logger.info(javaString); } public String callExtractIdentityInfoByBase64Data(String file1Data, String file2Data ) { @@ -56,8 +59,8 @@ public class MySpringBootApplication { params.setRecognitionData2(base64Image2); javaString = myComponent.extractInfo(params); - System.out.println("身份证信息是:"); - System.out.println(javaString); + logger.info("身份证信息是:"); + logger.info(javaString); return javaString; } catch (IOException e) { e.printStackTrace(); diff --git a/OCRTool/src/main/java/com/bonus/core/OCRRecognition.java b/OCRTool/src/main/java/com/bonus/core/OCRRecognition.java index da9c7ec..fb6d719 100644 --- a/OCRTool/src/main/java/com/bonus/core/OCRRecognition.java +++ b/OCRTool/src/main/java/com/bonus/core/OCRRecognition.java @@ -1,6 +1,6 @@ package com.bonus.core; -import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import java.io.BufferedReader; @@ -11,40 +11,44 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * @author wangvivi + */ @Component public class OCRRecognition { - private final RecognitionConfigure configure; - @Autowired - public OCRRecognition(RecognitionConfigure properties) { - this.configure = properties; - } + private static final Logger logger = LoggerFactory.getLogger(OCRRecognition.class); + @Value("${recognition.pythonEnv}") + private String pythonEnv; + @Value("${recognition.scriptEnv}") + private String scriptEnv; public String extractInfo(IdentifyRecognitionParams recognitionParams){ Process proc; List stringList = new ArrayList<>(); String lastElement = ""; - int offset = 0; - // 设置每次写入的批次大小 - int batchSize = 8092; try { - System.out.println("开始打印从配置里读取的值:"); - System.out.println(this.configure.getPythonEnv()); - System.out.println(this.configure.getScriptEnv()); - System.out.println("开始打印从请求里带过来的参数:"); - System.out.println(String.valueOf(recognitionParams.getType())); - System.out.println("参数1的base64 string 是:"); - System.out.println(recognitionParams.getRecognitionData1()); - System.out.println("参数2的base64 string 是:"); - System.out.println(recognitionParams.getRecognitionData2()); + logger.info("开始打印从配置里读取的值:"); + logger.info(pythonEnv); + logger.info(scriptEnv); + logger.info("开始打印从请求里带过来的参数:"); + logger.info(String.valueOf(recognitionParams.getType())); + logger.info("参数1的string 是:"); + logger.info(recognitionParams.getRecognitionData1()); + logger.info("参数2的string 是:"); + logger.info(recognitionParams.getRecognitionData2()); //*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/ - String[] str = new String[]{this.configure.getPythonEnv(),this.configure.getScriptEnv(), String.valueOf(recognitionParams.getType().ordinal())}; + String[] str = new String[]{pythonEnv,scriptEnv, String.valueOf(recognitionParams.getType())}; Runtime runtime = Runtime.getRuntime(); if (runtime == null) { System.out.println("runtime == null"); return ""; } - System.out.println(str[0]); proc = runtime.exec(str); + logger.info("proc = runtime.exec(str)"); OutputStream outputStream = proc.getOutputStream(); if (!recognitionParams.getRecognitionData1().isEmpty()) { byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8); @@ -55,41 +59,20 @@ public class OCRRecognition { byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8); passParameter(outputStream, input); } - -// if (!recognitionParams.getRecognitionData1().isEmpty()) { -// byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8); -// long len = input.length; -// -// while (offset < len) { -// int remaining = (int) (len - offset); -// int bytesToWrite = Math.min(remaining, batchSize); -// outputStream.write(input, offset, bytesToWrite); -// offset += bytesToWrite; -// } -// } -// if (!recognitionParams.getRecognitionData2().isEmpty()) { -// outputStream.write(System.lineSeparator().getBytes()); -// byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8); -// long len = input.length; -// while (offset < len) { -// int remaining = (int) (len - offset); -// int bytesToWrite = Math.min(remaining, batchSize); -// outputStream.write(input, offset, bytesToWrite); -// offset += bytesToWrite; -// } -// outputStream.flush(); outputStream.close(); + logger.info("outputStream.close()"); BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream(), StandardCharsets.UTF_8)); - String line = null; - + logger.info("BufferedReader in = new BufferedReade"); + String line; while ((line = in.readLine()) != null) { + logger.info(line); stringList.add(line); } proc.waitFor(); in.close(); } catch (Exception e) { - e.printStackTrace(); + logger.error(e.getMessage()); } if (!stringList.isEmpty()) { lastElement = stringList.get(stringList.size() - 1); diff --git a/OCRTool/src/main/java/com/bonus/core/RecognitionController.java b/OCRTool/src/main/java/com/bonus/core/RecognitionController.java index b0c2a39..58b7a38 100644 --- a/OCRTool/src/main/java/com/bonus/core/RecognitionController.java +++ b/OCRTool/src/main/java/com/bonus/core/RecognitionController.java @@ -9,7 +9,6 @@ public class RecognitionController { @Autowired OCRRecognition ocrRecognition; @PostMapping("/recognition") public String recognition(@RequestBody IdentifyRecognitionParams identifyRecognitionParams){ - IdentifyRecognitionParams.RecognitionType type = identifyRecognitionParams.getType(); return ocrRecognition.extractInfo(identifyRecognitionParams); } diff --git a/OCRTool/src/main/resources/application.yml b/OCRTool/src/main/resources/application.yml index bf88177..5af952a 100644 --- a/OCRTool/src/main/resources/application.yml +++ b/OCRTool/src/main/resources/application.yml @@ -1,6 +1,3 @@ recognition: -# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python -# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py pythonEnv: /Users/wangvivi/miniconda3/envs/ocr/bin/python scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/maincopy.py - diff --git a/OCRTool/src/main/resources/logback.xml b/OCRTool/src/main/resources/logback.xml new file mode 100644 index 0000000..fbd9c28 --- /dev/null +++ b/OCRTool/src/main/resources/logback.xml @@ -0,0 +1,92 @@ + + + + + + + + + + ${log.pattern} + + + + + + ${log.path}/recognition-info.log + + + + ${log.path}/recognition-info.%d{yyyy-MM-dd}.log + + 60 + + + ${log.pattern} + + + + INFO + + ACCEPT + + DENY + + + + + ${log.path}/recognition-error.log + + + + ${log.path}/recognition-error.%d{yyyy-MM-dd}.log + + 60 + + + ${log.pattern} + + + + ERROR + + ACCEPT + + DENY + + + + + + ${log.path}/recognition-user.log + + + ${log.path}/recognition-user.%d{yyyy-MM-dd}.log + + 60 + + + ${log.pattern} + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file