From f2acf170caf31b0f402d5cab564635a0fdfc7bdb Mon Sep 17 00:00:00 2001 From: weiweiw <14335254+weiweiw22@user.noreply.gitee.com> Date: Mon, 27 May 2024 16:15:18 +0800 Subject: [PATCH] update --- OCRPython/paddleOcrTest.py | 32 +++++ OCRPython/util/__init__.py | 0 OCRPython/util/ocr_recognition.py | 122 ++++++++++++++++++ .../bonus/core/IdentifyRecognitionParams.java | 46 +++++++ 4 files changed, 200 insertions(+) create mode 100644 OCRPython/paddleOcrTest.py create mode 100644 OCRPython/util/__init__.py create mode 100644 OCRPython/util/ocr_recognition.py create mode 100644 OCRTool/src/main/java/com/bonus/core/IdentifyRecognitionParams.java diff --git a/OCRPython/paddleOcrTest.py b/OCRPython/paddleOcrTest.py new file mode 100644 index 0000000..5f9a437 --- /dev/null +++ b/OCRPython/paddleOcrTest.py @@ -0,0 +1,32 @@ +from paddleocr import PaddleOCR, draw_ocr +import time + +from extractor.identitycard_extractor import IdentityCardExtractor + +# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换 +# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan` + +stat_time = time.time() +ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory +img_path = './images/内蒙古1正.png' +result = ocr.ocr(img_path, cls=True) +for idx in range(len(result)): + res = result[idx] + for line in res: + print(line) + +end_time = time.time() +print(end_time - stat_time) + +# 显示结果 +# 如果本地没有simfang.ttf,可以在doc/fonts目录下下载 +from PIL import Image + +result = result[0] +image = Image.open(img_path).convert('RGB') +boxes = [line[0] for line in result] +txts = [line[1][0] for line in result] +scores = [line[1][1] for line in result] +im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf') +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') diff --git a/OCRPython/util/__init__.py b/OCRPython/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/OCRPython/util/ocr_recognition.py b/OCRPython/util/ocr_recognition.py new file mode 100644 index 0000000..b17e8c6 --- /dev/null +++ b/OCRPython/util/ocr_recognition.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +import time +import sys +import io +from paddleocr import PaddleOCR + +from configs.basic_config import * +from extractor.identitycard_extractor import IdentityCardExtractor +import base64 +import json + +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + + +class OCRRecognition: + @staticmethod + def extractIdCardInfo(type: int = 0, filePath1: str = "", filePath2: str = "") -> str: + if 0 == type: + logger.info("0 == type") + return OCRRecognition.extractIdCardInfoByPath(filePath1, filePath2) + elif 1 == type: + logger.info("1 == type") + return OCRRecognition.extractIdCardInfoByBase64Data(filePath1, filePath2) + else: + logger.info("type is other") + finalResult = {"code": NO_DEFINED_FUNCTION_ERROR, + "msg": error_codes[NO_DEFINED_FUNCTION_ERROR]} + return json.dumps(finalResult, ensure_ascii=False) + + @staticmethod + def extractIdCardInfoByPath(filePath1: str = "", filePath2: str = "") -> str: + logger.info(f"ocr加载开始计时") + start_time = time.time() # 记录结束时间 + ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory + text = "" + finalResult = { + "code": LOCAL_PATH_NOT_EXIST, + "msg": error_codes[LOCAL_PATH_NOT_EXIST], + + } + try: + if len(filePath1) > 0: + if os.path.exists(filePath1): + result = ocr.ocr(filePath1, cls=False) + for idx in range(len(result)): + res = result[idx] + for line in res: + text += (line[1][0] + '\n') + else: + logger.error(f"{filePath1} doesn't exist,error information:{finalResult}") + return json.dumps(finalResult, ensure_ascii=False) + + if len(filePath2) > 0: + if os.path.exists(filePath2): + result = ocr.ocr(filePath2, cls=False) + for idx in range(len(result)): + res = result[idx] + for line in res: + text += (line[1][0] + '\n') + else: + logger.error(f"{filePath2} doesn't exist,error information:{finalResult}") + return json.dumps(finalResult, ensure_ascii=False) + + except Exception as e: + finalResult["code"] = OCR_RECOGNIZE_OTHER_EXCEPTION + finalResult["msg"] = error_codes[OCR_RECOGNIZE_OTHER_EXCEPTION] + logger.error(f"{e} ,error information:{finalResult}") + return json.dumps(finalResult, ensure_ascii=False) + + if 0 != len(text): + extractor = IdentityCardExtractor() + tempdict = extractor.extract_textbyPaddle(text) + jsonString = json.dumps(tempdict, ensure_ascii=False) + end_time = time.time() # 记录结束时间 + execution_time = end_time - start_time # 计算执行时间 + logger.info(f"extractIdCardInfoByPath 耗时{execution_time}秒") + return jsonString + else: + finalResult["code"] = NO_TEXT_RECOGNIZED + finalResult["msg"] = error_codes[NO_TEXT_RECOGNIZED] + logger.error(f"{filePath1},{filePath2} can't be recognized,error information:{finalResult}") + return json.dumps(finalResult, ensure_ascii=False) + + @staticmethod + def extractIdCardInfoByBase64Data(base64data1: str, base64Data2: str) -> str: + finalResult = { + "code": BASE64_DATA_INCOMPLETE, + "msg": error_codes[BASE64_DATA_INCOMPLETE], + } + logger.info(f"extractIdCardInfoByBase64Data") + start_time = time.time() # 记录结束时间 + jsonString = "" + try: + if 0 != len(base64data1): + image_data1 = base64.b64decode(base64data1) + with open("file1.png", "wb") as file: + file.write(image_data1) + + if 0 != len(base64Data2): + image_data2 = base64.b64decode(base64Data2) + with open("file2.png", "wb") as file: + file.write(image_data2) + + if os.path.exists("file1.png") and os.path.exists("file2.png"): + jsonString = OCRRecognition.extractIdCardInfoByPath("file1.png", "file2.png") + os.remove("file1.png") + os.remove("file2.png") + elif os.path.exists("file1.png"): + jsonString = OCRRecognition.extractIdCardInfoByPath("file1.png", "") + os.remove("file1.png") + elif os.path.exists("file2.png"): + jsonString = OCRRecognition.extractIdCardInfoByPath("file2.png", "") + os.remove("file2.png") + except Exception as e: + logger.error(e) + logger.error(f"{e},error information:{finalResult}") + return json.dumps(finalResult, ensure_ascii=False) + + end_time = time.time() # 记录结束时间 + execution_time = end_time - start_time # 计算执行时间 + logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒") + return jsonString diff --git a/OCRTool/src/main/java/com/bonus/core/IdentifyRecognitionParams.java b/OCRTool/src/main/java/com/bonus/core/IdentifyRecognitionParams.java new file mode 100644 index 0000000..c07b813 --- /dev/null +++ b/OCRTool/src/main/java/com/bonus/core/IdentifyRecognitionParams.java @@ -0,0 +1,46 @@ +package com.bonus.core; + +/** + * @author wangvivi + */ +public class IdentifyRecognitionParams { + private RecognitionType type; + private String recognitionFrontData = ""; + private String recognitionBackData = ""; + + + public int getType() { + return type.ordinal(); + } + + public void setType(RecognitionType type) { + this.type = type; + } + + public String getRecognitionFrontData() { + return this.recognitionFrontData; + } + + public void setRecognitionFrontData(String recognitionFrontData) { + this.recognitionFrontData = recognitionFrontData; + } + + public String getRecognitionBackData() { + return this.recognitionBackData; + } + + public void setRecognitionBackData(String recognitionBackData) { + this.recognitionBackData = recognitionBackData; + } + + + // Enum for different recognition types + public enum RecognitionType { + //参数带入的是图片路径 + IDENTITY_CARD_PATH , + //参数传入的是图片的base64编码 + IDENTITY_CARD_BASE64_DATA, + INVOICE_PATH, + } + +}