first commit

This commit is contained in:
weiweiw 2024-04-25 16:50:03 +08:00
parent 2251f00bfb
commit 80530f8b77
8 changed files with 244 additions and 58 deletions

103
OCRPython/validate.py Normal file
View File

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
import sys
import io
import os
from paddleocr import PaddleOCR
import time
from configs.basic_config import logger
from extractor.identitycard_extractor import IdentityCardExtractor
import base64
import json
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str:
if (0 == type):
return extractIdCardInfoByPath(filePath1, filePath2)
elif (1 == type):
return extractIdCardInfoByBase64Data(filePath1,filePath2)
else:
pass
def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
text = ""
start_time = time.time() # 记录结束时间
jsonString = ""
try:
if os.path.exists(filePath1):
result = ocr.ocr(filePath1, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
if os.path.exists(filePath2):
result = ocr.ocr(filePath2, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
text += (line[1][0] + '\n')
except Exception as e:
logger.error(e)
print(e)
if 0 != len(text):
logger.info(f"text:{text}")
extractor = IdentityCardExtractor()
tempdict = extractor.extract_textbyPaddle(text)
jsonString = json.dumps(tempdict, ensure_ascii=False)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
return jsonString
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
logger.info(f"extractIdCardInfoByBase64Data")
start_time = time.time() # 记录结束时间
jsonString = ""
try:
if 0!=len(base64data1):
logger.info(f"not base64data1.empty()")
image_data1 = base64.b64decode(base64data1)
with open("file1.png", "wb") as file:
file.write(image_data1)
if 0!=len(base64Data2):
logger.info(f"not base64Data2.empty()")
image_data2 = base64.b64decode(base64Data2)
with open("file2.png", "wb") as file:
file.write(image_data2)
if os.path.exists("file1.png") and os.path.exists("file2.png"):
logger.info(f"file1.png and file2.png exist")
jsonString = extractIdCardInfoByPath("file1.png","file2.png")
os.remove("file1.png")
os.remove("file2.png")
elif os.path.exists("file1.png"):
logger.info(f"file1.png exist")
jsonString = extractIdCardInfoByPath("file1.png","")
os.remove("file1.png")
elif os.path.exists("file2.png"):
logger.info(f"file2.png exist")
jsonString = extractIdCardInfoByPath("file2.png","")
os.remove("file2.png")
except Exception as e:
logger.error(e)
end_time = time.time() # 记录结束时间
execution_time = end_time - start_time # 计算执行时间
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}")
return jsonString
# with open('/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG', 'rb') as image_file:
# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
#
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString)
# #
jsonString = extractIdCardInfoByPath("./images/han.jpg","")
print(jsonString)
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
# print(jsonString)

View File

@ -51,4 +51,13 @@
</plugin>
</plugins>
</build>
<distributionManagement>
<snapshotRepository>
<!-- 和mirror配置x的Nexus的id一致 -->
<id>maven-central</id>
<name>BonusMavenRepo</name>
<url>http://192.168.0.56:8081/repository/maven-public/</url>
</snapshotRepository>
</distributionManagement>
</project>

View File

@ -21,8 +21,8 @@ public class IdentifyRecognitionParams {
this.recognitionData2 = recognitionData2;
}
public RecognitionType getType() {
return type;
public int getType() {
return type.ordinal();
}
public void setType(RecognitionType type) {

View File

@ -1,4 +1,6 @@
package com.bonus.core;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.ApplicationContext;
@ -12,14 +14,15 @@ import java.util.Base64;
*/
@SpringBootApplication
public class MySpringBootApplication {
private static final Logger logger = LoggerFactory.getLogger(MySpringBootApplication.class);
public static void main(String[] args) {
context = SpringApplication.run(MySpringBootApplication.class, args);
MySpringBootApplication app = new MySpringBootApplication();
//String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG";
//String filePath1 = "E:\\OCRTool\\OCRPython\\images\\id_card.JPG";
// String filePath1 = "/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg";
// String filePath2 = "/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg";
//app.callExtractIdentityInfoByLocalPath(filePath1, "");
//app.callExtractIdentityInfoByLocalPath(filePath1,"");
// app.callExtractIdentityInfoByBase64Data(filePath1, filePath2);
}
private static ApplicationContext context;
@ -31,8 +34,8 @@ public class MySpringBootApplication {
params.setRecognitionData1(filePath1);
params.setRecognitionData2(filePath2);
String javaString = myComponent.extractInfo(params);
System.out.println("身份证信息是:");
System.out.println(javaString);
logger.info("身份证信息是:");
logger.info(javaString);
}
public String callExtractIdentityInfoByBase64Data(String file1Data, String file2Data ) {
@ -56,8 +59,8 @@ public class MySpringBootApplication {
params.setRecognitionData2(base64Image2);
javaString = myComponent.extractInfo(params);
System.out.println("身份证信息是:");
System.out.println(javaString);
logger.info("身份证信息是:");
logger.info(javaString);
return javaString;
} catch (IOException e) {
e.printStackTrace();

View File

@ -1,6 +1,6 @@
package com.bonus.core;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
@ -11,40 +11,44 @@ import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author wangvivi
*/
@Component
public class OCRRecognition {
private final RecognitionConfigure configure;
@Autowired
public OCRRecognition(RecognitionConfigure properties) {
this.configure = properties;
}
private static final Logger logger = LoggerFactory.getLogger(OCRRecognition.class);
@Value("${recognition.pythonEnv}")
private String pythonEnv;
@Value("${recognition.scriptEnv}")
private String scriptEnv;
public String extractInfo(IdentifyRecognitionParams recognitionParams){
Process proc;
List<String> stringList = new ArrayList<>();
String lastElement = "";
int offset = 0;
// 设置每次写入的批次大小
int batchSize = 8092;
try {
System.out.println("开始打印从配置里读取的值:");
System.out.println(this.configure.getPythonEnv());
System.out.println(this.configure.getScriptEnv());
System.out.println("开始打印从请求里带过来的参数:");
System.out.println(String.valueOf(recognitionParams.getType()));
System.out.println("参数1的base64 string 是:");
System.out.println(recognitionParams.getRecognitionData1());
System.out.println("参数2的base64 string 是:");
System.out.println(recognitionParams.getRecognitionData2());
logger.info("开始打印从配置里读取的值:");
logger.info(pythonEnv);
logger.info(scriptEnv);
logger.info("开始打印从请求里带过来的参数:");
logger.info(String.valueOf(recognitionParams.getType()));
logger.info("参数1的string 是:");
logger.info(recognitionParams.getRecognitionData1());
logger.info("参数2的string 是:");
logger.info(recognitionParams.getRecognitionData2());
//*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/
String[] str = new String[]{this.configure.getPythonEnv(),this.configure.getScriptEnv(), String.valueOf(recognitionParams.getType().ordinal())};
String[] str = new String[]{pythonEnv,scriptEnv, String.valueOf(recognitionParams.getType())};
Runtime runtime = Runtime.getRuntime();
if (runtime == null) {
System.out.println("runtime == null");
return "";
}
System.out.println(str[0]);
proc = runtime.exec(str);
logger.info("proc = runtime.exec(str)");
OutputStream outputStream = proc.getOutputStream();
if (!recognitionParams.getRecognitionData1().isEmpty()) {
byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
@ -55,41 +59,20 @@ public class OCRRecognition {
byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
passParameter(outputStream, input);
}
// if (!recognitionParams.getRecognitionData1().isEmpty()) {
// byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
// long len = input.length;
//
// while (offset < len) {
// int remaining = (int) (len - offset);
// int bytesToWrite = Math.min(remaining, batchSize);
// outputStream.write(input, offset, bytesToWrite);
// offset += bytesToWrite;
// }
// }
// if (!recognitionParams.getRecognitionData2().isEmpty()) {
// outputStream.write(System.lineSeparator().getBytes());
// byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
// long len = input.length;
// while (offset < len) {
// int remaining = (int) (len - offset);
// int bytesToWrite = Math.min(remaining, batchSize);
// outputStream.write(input, offset, bytesToWrite);
// offset += bytesToWrite;
// }
//
outputStream.flush();
outputStream.close();
logger.info("outputStream.close()");
BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream(), StandardCharsets.UTF_8));
String line = null;
logger.info("BufferedReader in = new BufferedReade");
String line;
while ((line = in.readLine()) != null) {
logger.info(line);
stringList.add(line);
}
proc.waitFor();
in.close();
} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
}
if (!stringList.isEmpty()) {
lastElement = stringList.get(stringList.size() - 1);

View File

@ -9,7 +9,6 @@ public class RecognitionController {
@Autowired OCRRecognition ocrRecognition;
@PostMapping("/recognition")
public String recognition(@RequestBody IdentifyRecognitionParams identifyRecognitionParams){
IdentifyRecognitionParams.RecognitionType type = identifyRecognitionParams.getType();
return ocrRecognition.extractInfo(identifyRecognitionParams);
}

View File

@ -1,6 +1,3 @@
recognition:
# pythonEnv: /Users/wangvivi/Desktop/MySelf/myenv/bin/python
# scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/main.py
pythonEnv: /Users/wangvivi/miniconda3/envs/ocr/bin/python
scriptEnv: /Users/wangvivi/Desktop/Code/Component/OCRPython/maincopy.py

View File

@ -0,0 +1,92 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<!-- 日志存放路径 -->
<property name="log.path" value="./logs/" />
<!-- 日志输出格式 -->
<property name="log.pattern" value="%d{HH:mm:ss.SSS} [%thread] %-5level %logger{20} - [%method,%line] - %msg%n" />
<!-- 控制台输出 -->
<appender name="console" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>${log.pattern}</pattern>
</encoder>
</appender>
<!-- 系统日志输出 -->
<appender name="file_info" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.path}/recognition-info.log</file>
<!-- 循环政策:基于时间创建日志文件 -->
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- 日志文件名格式 -->
<fileNamePattern>${log.path}/recognition-info.%d{yyyy-MM-dd}.log</fileNamePattern>
<!-- 日志最大的历史 60天 -->
<maxHistory>60</maxHistory>
</rollingPolicy>
<encoder>
<pattern>${log.pattern}</pattern>
</encoder>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<!-- 过滤的级别 -->
<level>INFO</level>
<!-- 匹配时的操作:接收(记录) -->
<onMatch>ACCEPT</onMatch>
<!-- 不匹配时的操作:拒绝(不记录) -->
<onMismatch>DENY</onMismatch>
</filter>
</appender>
<appender name="file_error" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.path}/recognition-error.log</file>
<!-- 循环政策:基于时间创建日志文件 -->
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- 日志文件名格式 -->
<fileNamePattern>${log.path}/recognition-error.%d{yyyy-MM-dd}.log</fileNamePattern>
<!-- 日志最大的历史 60天 -->
<maxHistory>60</maxHistory>
</rollingPolicy>
<encoder>
<pattern>${log.pattern}</pattern>
</encoder>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<!-- 过滤的级别 -->
<level>ERROR</level>
<!-- 匹配时的操作:接收(记录) -->
<onMatch>ACCEPT</onMatch>
<!-- 不匹配时的操作:拒绝(不记录) -->
<onMismatch>DENY</onMismatch>
</filter>
</appender>
<!-- 用户访问日志输出 -->
<appender name="sys-user" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.path}/recognition-user.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- 按天回滚 daily -->
<fileNamePattern>${log.path}/recognition-user.%d{yyyy-MM-dd}.log</fileNamePattern>
<!-- 日志最大的历史 60天 -->
<maxHistory>60</maxHistory>
</rollingPolicy>
<encoder>
<pattern>${log.pattern}</pattern>
</encoder>
</appender>
<!-- 系统模块日志级别控制 -->
<logger name="com.bonus" level="info" />
<!-- Spring日志级别控制 -->
<logger name="org.springframework" level="warn" />
<root level="info">
<appender-ref ref="console" />
</root>
<!--系统操作日志-->
<root level="info">
<appender-ref ref="file_info" />
<appender-ref ref="file_error" />
</root>
<!--系统用户操作日志-->
<logger name="sys-user" level="info">
<appender-ref ref="sys-user"/>
</logger>
</configuration>