根据技术评审委员会意见增加错误码的返回
This commit is contained in:
parent
51efb77261
commit
81edf77e84
|
|
@ -2,6 +2,24 @@ import logging
|
||||||
import os
|
import os
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
|
RECOGNITION_INFO_COMPLETE = 20000
|
||||||
|
RECOGNITION_INFO_INCOMPLETE = 20001
|
||||||
|
LOCAL_PATH_NOT_EXIST = 20002
|
||||||
|
BASE64_DATA_INCOMPLETE = 20003
|
||||||
|
NO_TEXT_RECOGNIZED = 20004
|
||||||
|
OCR_RECOGNIZE_OTHER_EXCEPTION = 20005
|
||||||
|
RECOGNITION_INFO_PARSE_OTHER_EXCEPTION = 20006
|
||||||
|
|
||||||
|
|
||||||
|
error_codes = {
|
||||||
|
RECOGNITION_INFO_COMPLETE: "身份证信息完整",
|
||||||
|
RECOGNITION_INFO_INCOMPLETE: "身份证信息不完整",
|
||||||
|
LOCAL_PATH_NOT_EXIST: "识别本地图片路径不存在",
|
||||||
|
BASE64_DATA_INCOMPLETE: "图片Base64编码异常",
|
||||||
|
NO_TEXT_RECOGNIZED: "识别本地图片路径存在,但没有识别出文字",
|
||||||
|
OCR_RECOGNIZE_OTHER_EXCEPTION: "OCR识别身份证其他异常",
|
||||||
|
RECOGNITION_INFO_PARSE_OTHER_EXCEPTION: "身份证信息解析其他异常",
|
||||||
|
}
|
||||||
# 是否显示详细日志
|
# 是否显示详细日志
|
||||||
log_verbose = True
|
log_verbose = True
|
||||||
# 通常情况下不需要更改以下内容
|
# 通常情况下不需要更改以下内容
|
||||||
|
|
@ -32,3 +50,6 @@ file_handler.setFormatter(formatter)
|
||||||
|
|
||||||
# 获取日志记录器并添加文件处理程序
|
# 获取日志记录器并添加文件处理程序
|
||||||
logger.addHandler(file_handler)
|
logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
|
||||||
|
# print(f"error_codes[0].value:{error_codes['200']}")
|
||||||
|
|
@ -1,78 +1,25 @@
|
||||||
import re
|
import re
|
||||||
from extractor import Extractor
|
from extractor import Extractor
|
||||||
from configs.basic_config import logger
|
from configs.basic_config import *
|
||||||
|
|
||||||
class IdentityCardExtractor(Extractor):
|
class IdentityCardExtractor(Extractor):
|
||||||
def extract_text(self, text:str)->dict:
|
|
||||||
try:
|
|
||||||
patterns = {
|
|
||||||
"issuingAuthority": r"签发机关\n(.+?)\n",
|
|
||||||
"validTime": r"有效期限\n(.+?)\n",
|
|
||||||
"name": r"(\S+)\n(?:男|女)",
|
|
||||||
"gender": r"(\S+)民族",
|
|
||||||
"ethnicity": r"民族(\S+)",
|
|
||||||
"dateOfBirth": r"(\d+年\d+月\d+日)",
|
|
||||||
"address": r"(住址|佳址)\s*(.*?)公民身份号码",
|
|
||||||
"idNumber": r"(\d{18}|\d{17}[Xx])"
|
|
||||||
}
|
|
||||||
tempText = self.remove_blank_lines(text)
|
|
||||||
# 提取信息
|
|
||||||
info = {}
|
|
||||||
for key, pattern in patterns.items():
|
|
||||||
match = re.search(pattern, tempText,re.DOTALL)
|
|
||||||
if match:
|
|
||||||
if "address" == key:
|
|
||||||
tempStr = match.group(2).strip()
|
|
||||||
else:
|
|
||||||
tempStr = match.group(1).strip()
|
|
||||||
info[key] = tempStr.replace("\n", "")
|
|
||||||
return info
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
logger.error(e)
|
|
||||||
return {}
|
|
||||||
|
|
||||||
# def extract_textbyPaddle(self, text:str)->dict:
|
|
||||||
# try:
|
|
||||||
# patterns = {
|
|
||||||
# "issuingAuthority": r"签发机关\n*(.+?)\n",
|
|
||||||
# "validTime": r"有效期限\n*(.+?)\n",
|
|
||||||
# "name": r"姓名(.*?)\n", #####
|
|
||||||
# "gender": r"(\S)民族",
|
|
||||||
# "ethnicity": r"民族(\S+)",
|
|
||||||
# "dateOfBirth": r"(\d+年\d+月\d+日)",
|
|
||||||
# "address": r"(住址|佳址)\s*(.*?)公民身份号码",
|
|
||||||
# "idNumber": r"(\d{18}|\d{17}[Xx])"
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# tempText = self.remove_blank_lines(text)
|
|
||||||
# # 提取信息
|
|
||||||
# info = {}
|
|
||||||
# for key, pattern in patterns.items():
|
|
||||||
# match = re.search(pattern, tempText,re.DOTALL)
|
|
||||||
# if match:
|
|
||||||
# if "address" == key:
|
|
||||||
# tempStr = match.group(2).strip()
|
|
||||||
# else:
|
|
||||||
# tempStr = match.group(1).strip()
|
|
||||||
# info[key] = tempStr.replace("\n", "")
|
|
||||||
# return info
|
|
||||||
# except Exception as e:
|
|
||||||
# print(e)
|
|
||||||
# logger.error(e)
|
|
||||||
# return {}
|
|
||||||
|
|
||||||
def extract_textbyPaddle(self, text:str)->dict:
|
def extract_textbyPaddle(self, text:str)->dict:
|
||||||
|
data = {
|
||||||
|
"issuingAuthority": "",
|
||||||
|
"validTime": "",
|
||||||
|
"name": "",
|
||||||
|
"gender": "",
|
||||||
|
"ethnicity": "汉",
|
||||||
|
"dateOfBirth": "",
|
||||||
|
"address": "",
|
||||||
|
"idNumber": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"code": RECOGNITION_INFO_COMPLETE,
|
||||||
|
"msg": error_codes[RECOGNITION_INFO_COMPLETE],
|
||||||
|
"data": data
|
||||||
|
}
|
||||||
try:
|
try:
|
||||||
result = {
|
|
||||||
"issuingAuthority": "",
|
|
||||||
"validTime": "",
|
|
||||||
"name": "",
|
|
||||||
"gender": "",
|
|
||||||
"ethnicity": "汉",
|
|
||||||
"dateOfBirth": "",
|
|
||||||
"address": "",
|
|
||||||
"idNumber": ""}
|
|
||||||
# 提取签发机关
|
# 提取签发机关
|
||||||
issuing_authority = re.search(r"(签发机关|签发机美)\n*(.+?)\n", text, re.DOTALL)
|
issuing_authority = re.search(r"(签发机关|签发机美)\n*(.+?)\n", text, re.DOTALL)
|
||||||
if issuing_authority:
|
if issuing_authority:
|
||||||
|
|
@ -80,14 +27,14 @@ class IdentityCardExtractor(Extractor):
|
||||||
if "公安局" not in tempAuthority:
|
if "公安局" not in tempAuthority:
|
||||||
issuing_authority = re.search(r"(\S+)\n*(签发机关|签发机美)\n", text, re.DOTALL)
|
issuing_authority = re.search(r"(\S+)\n*(签发机关|签发机美)\n", text, re.DOTALL)
|
||||||
if issuing_authority:
|
if issuing_authority:
|
||||||
result["issuingAuthority"] = issuing_authority.group(1).strip()
|
data["issuingAuthority"] = issuing_authority.group(1).strip()
|
||||||
else:
|
else:
|
||||||
result["issuingAuthority"] = issuing_authority.group(2).strip()
|
data["issuingAuthority"] = issuing_authority.group(2).strip()
|
||||||
|
|
||||||
# 提取有效期限
|
# 提取有效期限
|
||||||
valid_time = re.search(r"有效期限\n*(\d{4}\.\d{2}\.\d{2}-\S+)", text, re.DOTALL)
|
valid_time = re.search(r"有效期限\n*(\d{4}\.\d{2}\.\d{2}-\S+)", text, re.DOTALL)
|
||||||
if valid_time:
|
if valid_time:
|
||||||
result["validTime"] = valid_time.group(1).strip()
|
data["validTime"] = valid_time.group(1).strip()
|
||||||
|
|
||||||
# 提取姓名
|
# 提取姓名
|
||||||
name = re.search(r"姓名\s*(.*?)\n", text,re.DOTALL)
|
name = re.search(r"姓名\s*(.*?)\n", text,re.DOTALL)
|
||||||
|
|
@ -95,78 +42,85 @@ class IdentityCardExtractor(Extractor):
|
||||||
tempName = name.group(1).strip()
|
tempName = name.group(1).strip()
|
||||||
if tempName in "性别男" or tempName in "性别女" or "性别男" in tempName or "性别女" in tempName or tempName.isupper() or tempName.islower():
|
if tempName in "性别男" or tempName in "性别女" or "性别男" in tempName or "性别女" in tempName or tempName.isupper() or tempName.islower():
|
||||||
name = re.search(r"(\S+)\s*姓名", text, re.DOTALL)
|
name = re.search(r"(\S+)\s*姓名", text, re.DOTALL)
|
||||||
result["name"] = name.group(1).strip()
|
data["name"] = name.group(1).strip()
|
||||||
else:
|
else:
|
||||||
result["name"] = name.group(1).strip()
|
data["name"] = name.group(1).strip()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
name = re.search(r"米名(\S*)址\s*(\S+)", text, re.DOTALL)
|
name = re.search(r"米名(\S*)址\s*(\S+)", text, re.DOTALL)
|
||||||
if name:
|
if name:
|
||||||
result["name"] = name.group(2).strip()
|
data["name"] = name.group(2).strip()
|
||||||
else:
|
else:
|
||||||
name = re.search(r"名\s*(\S+)\s*姓\s*", text, re.DOTALL)
|
name = re.search(r"名\s*(\S+)\s*姓\s*", text, re.DOTALL)
|
||||||
if name:
|
if name:
|
||||||
result["name"] = name.group(1).strip()
|
data["name"] = name.group(1).strip()
|
||||||
else:
|
else:
|
||||||
name = re.search(r"(\S+)\s*(男|女|性别)", text, re.DOTALL)
|
name = re.search(r"(\S+)\s*(男|女|性别)", text, re.DOTALL)
|
||||||
if name:
|
if name:
|
||||||
result["name"] = name.group(1).strip()
|
data["name"] = name.group(1).strip()
|
||||||
tempName = result["name"]
|
tempName = data["name"]
|
||||||
if tempName.startswith("名"):
|
if tempName.startswith("名"):
|
||||||
result["name"] = tempName[len("名"):]
|
data["name"] = tempName[len("名"):]
|
||||||
|
|
||||||
# 提取民族
|
# 提取民族
|
||||||
ethnicity = re.search(r"民\s*(族|旅)\s*(\S+)", text, re.DOTALL)
|
ethnicity = re.search(r"民\s*(族|旅)\s*(\S+)", text, re.DOTALL)
|
||||||
if ethnicity:
|
if ethnicity:
|
||||||
result["ethnicity"] = ethnicity.group(2).strip()
|
data["ethnicity"] = ethnicity.group(2).strip()
|
||||||
|
|
||||||
# 提取地址
|
# 提取地址
|
||||||
address = re.search(r"(住址|佳址)(.*?)公民身份号码", text, re.DOTALL)
|
address = re.search(r"(住址|佳址)(.*?)公民身份号码", text, re.DOTALL)
|
||||||
if address:
|
if address:
|
||||||
result["address"] = address.group(2).strip().replace("\n", "")
|
data["address"] = address.group(2).strip().replace("\n", "")
|
||||||
else:
|
else:
|
||||||
address = re.search(r"(\S+省)(.*?)公民身份号码", text, re.DOTALL)
|
address = re.search(r"(\S+省)(.*?)公民身份号码", text, re.DOTALL)
|
||||||
if address:
|
if address:
|
||||||
result["address"] = address.group(1).strip().replace("\n", "") + address.group(2).strip().replace("\n", "")
|
data["address"] = address.group(1).strip().replace("\n", "") + address.group(2).strip().replace("\n", "")
|
||||||
else:
|
else:
|
||||||
address = re.search(r"(\S+市)(.*?)公民身份号码", text, re.DOTALL)
|
address = re.search(r"(\S+市)(.*?)公民身份号码", text, re.DOTALL)
|
||||||
if address:
|
if address:
|
||||||
result["address"] = address.group(1).strip().replace("\n", "") + address.group(2).strip().replace("\n", "")
|
data["address"] = address.group(1).strip().replace("\n", "") + address.group(2).strip().replace("\n", "")
|
||||||
else:
|
else:
|
||||||
address = re.search(r"(\S+县)(.*?)公民身份号码", text, re.DOTALL)
|
address = re.search(r"(\S+县)(.*?)公民身份号码", text, re.DOTALL)
|
||||||
if address:
|
if address:
|
||||||
result["address"] = address.group(1).strip().replace("\n", "") + address.group(2).strip().replace("\n", "")
|
data["address"] = address.group(1).strip().replace("\n", "") + address.group(2).strip().replace("\n", "")
|
||||||
if result["address"]:
|
if data["address"]:
|
||||||
result["address"] = re.sub(r'[A-Z]', '', result["address"])
|
data["address"] = re.sub(r'[A-Z]', '', data["address"])
|
||||||
pattern = r"\d+[号组室房]"
|
pattern = r"\d+[号组室房]"
|
||||||
# 匹配数字+号、数字+组、数字+室、数字+房 的最后一个匹配项
|
# 匹配数字+号、数字+组、数字+室、数字+房 的最后一个匹配项
|
||||||
tempAddress = result["address"]
|
tempAddress = data["address"]
|
||||||
matchAddress = None
|
matchAddress = None
|
||||||
for m in re.finditer(pattern, tempAddress):
|
for m in re.finditer(pattern, tempAddress):
|
||||||
matchAddress = m
|
matchAddress = m
|
||||||
if matchAddress:
|
if matchAddress:
|
||||||
index = matchAddress.end() # 获取匹配项的结束位置
|
index = matchAddress.end() # 获取匹配项的结束位置
|
||||||
result["address"] = tempAddress[:index] # 截取匹配项及之前的字符串
|
data["address"] = tempAddress[:index] # 截取匹配项及之前的字符串
|
||||||
#去掉地址后面的
|
#去掉地址后面的
|
||||||
else:
|
else:
|
||||||
pattern = r"\d{18,}$" # 匹配1位以上的数字结尾
|
pattern = r"\d{18,}$" # 匹配1位以上的数字结尾
|
||||||
result["address"] = re.sub(pattern, "", result["address"])
|
data["address"] = re.sub(pattern, "", data["address"])
|
||||||
# 提取身份证号码
|
# 提取身份证号码
|
||||||
id_number = re.search(r"([123456]\d{17}|[123456]\d{16}[Xx])", text, re.DOTALL)
|
id_number = re.search(r"([123456]\d{17}|[123456]\d{16}[Xx])", text, re.DOTALL)
|
||||||
if id_number:
|
if id_number:
|
||||||
result["idNumber"] = id_number.group(1).strip()
|
data["idNumber"] = id_number.group(1).strip()
|
||||||
|
|
||||||
if result["idNumber"]:
|
if data["idNumber"]:
|
||||||
# 提取出生日期
|
# 提取出生日期
|
||||||
result["dateOfBirth"] = self.extract_birthday_from_id(result["idNumber"])
|
data["dateOfBirth"] = self.extract_birthday_from_id(data["idNumber"])
|
||||||
# 提取性别
|
# 提取性别
|
||||||
result["gender"] = self.get_gender_from_id(result["idNumber"])
|
data["gender"] = self.get_gender_from_id(data["idNumber"])
|
||||||
|
|
||||||
|
for key, value in data.items():
|
||||||
|
if not value or len(value) == 0:
|
||||||
|
result["code"] = RECOGNITION_INFO_INCOMPLETE
|
||||||
|
result["msg"] = error_codes[RECOGNITION_INFO_INCOMPLETE]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
result["code"] = RECOGNITION_INFO_PARSE_OTHER_EXCEPTION
|
||||||
|
result["msg"] = error_codes[RECOGNITION_INFO_PARSE_OTHER_EXCEPTION]
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
return {}
|
return result
|
||||||
|
|
||||||
class InvoiceExtractor(Extractor):
|
class InvoiceExtractor(Extractor):
|
||||||
def extract_text(self,text:str)->dict:
|
def extract_text(self,text:str)->dict:
|
||||||
|
|
@ -294,25 +248,25 @@ class IdentityCardExtractor(Extractor):
|
||||||
# 公民身份号码
|
# 公民身份号码
|
||||||
# 513401197807087411
|
# 513401197807087411
|
||||||
# """
|
# """
|
||||||
# text = """中华人民共和国
|
text = """中华人民共和国
|
||||||
# 居民身份证
|
居民身份证
|
||||||
# 签发机关
|
签发机关
|
||||||
# 木里县公安局
|
木里县公安局
|
||||||
# 有效期限
|
有效期限
|
||||||
# 2020.03.16-2025.03.16
|
2020.03.16-2025.03.16
|
||||||
# 名
|
名
|
||||||
# 蒋子古
|
蒋子古
|
||||||
# 姓
|
姓
|
||||||
# 男
|
男
|
||||||
# 民族彝
|
民族彝
|
||||||
# 出生
|
出生
|
||||||
# 2005年1月4日
|
2005年1月4日
|
||||||
# 住址
|
住址
|
||||||
# 四川省木里藏族自治县耗
|
四川省木里藏族自治县耗
|
||||||
# 牛坪乡泥珠村5组29号
|
牛坪乡泥珠村5组29号
|
||||||
# 公民身份号码
|
公民身份号码
|
||||||
# 513422200501044415
|
513422200501044415
|
||||||
# """
|
"""
|
||||||
#
|
#
|
||||||
# text = """姓名苏龙格德·胡尔查巴特尔
|
# text = """姓名苏龙格德·胡尔查巴特尔
|
||||||
# 性别男民族蒙古
|
# 性别男民族蒙古
|
||||||
|
|
@ -364,7 +318,7 @@ class IdentityCardExtractor(Extractor):
|
||||||
# 林村10组
|
# 林村10组
|
||||||
# 8430124197005132917
|
# 8430124197005132917
|
||||||
# 公民身份号码"""
|
# 公民身份号码"""
|
||||||
#
|
|
||||||
# text = """中华人民共和国
|
# text = """中华人民共和国
|
||||||
# 居民身份证
|
# 居民身份证
|
||||||
# 签发机关醴陵市公安局
|
# 签发机关醴陵市公安局
|
||||||
|
|
|
||||||
|
|
@ -1,95 +1,107 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import sys
|
import sys
|
||||||
import io
|
import io
|
||||||
import os
|
|
||||||
from paddleocr import PaddleOCR
|
from paddleocr import PaddleOCR
|
||||||
import time
|
import time
|
||||||
from configs.basic_config import logger
|
from configs.basic_config import *
|
||||||
from extractor.identitycard_extractor import IdentityCardExtractor
|
from extractor.identitycard_extractor import IdentityCardExtractor
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
|
|
||||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||||
def extractIdCardInfo(type:int, filePath1: str, filePath2: str)->str:
|
def extractIdCardInfo(type:int=0, filePath1: str= "", filePath2: str= "")->str:
|
||||||
if (0 == type):
|
if 0 == type:
|
||||||
return extractIdCardInfoByPath(filePath1, filePath2)
|
return extractIdCardInfoByPath(filePath1, filePath2)
|
||||||
elif (1 == type):
|
elif 1 == type:
|
||||||
return extractIdCardInfoByBase64Data(filePath1,filePath2)
|
return extractIdCardInfoByBase64Data(filePath1,filePath2)
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def extractIdCardInfoByPath(filePath1: str, filePath2: str)->str:
|
def extractIdCardInfoByPath(filePath1: str = "", filePath2: str = "")->str:
|
||||||
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
|
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
|
||||||
text = ""
|
text = ""
|
||||||
start_time = time.time() # 记录结束时间
|
start_time = time.time() # 记录结束时间
|
||||||
jsonString = ""
|
finalResult = {
|
||||||
try:
|
"code": LOCAL_PATH_NOT_EXIST,
|
||||||
if os.path.exists(filePath1):
|
"msg": error_codes[LOCAL_PATH_NOT_EXIST],
|
||||||
result = ocr.ocr(filePath1, cls=False)
|
|
||||||
for idx in range(len(result)):
|
|
||||||
res = result[idx]
|
|
||||||
for line in res:
|
|
||||||
text += (line[1][0] + '\n')
|
|
||||||
else:
|
|
||||||
logger.error(f"{filePath1} doesn't exist")
|
|
||||||
|
|
||||||
if os.path.exists(filePath2):
|
}
|
||||||
result = ocr.ocr(filePath2, cls=False)
|
try:
|
||||||
for idx in range(len(result)):
|
if len(filePath1) > 0:
|
||||||
res = result[idx]
|
if os.path.exists(filePath1):
|
||||||
for line in res:
|
result = ocr.ocr(filePath1, cls=False)
|
||||||
text += (line[1][0] + '\n')
|
for idx in range(len(result)):
|
||||||
else:
|
res = result[idx]
|
||||||
logger.error(f"{filePath2} doesn't exist")
|
for line in res:
|
||||||
|
text += (line[1][0] + '\n')
|
||||||
|
else:
|
||||||
|
logger.error(f"{filePath1} doesn't exist,error information:{finalResult}")
|
||||||
|
return json.dumps(finalResult, ensure_ascii=False)
|
||||||
|
|
||||||
|
if len(filePath2) > 0:
|
||||||
|
if os.path.exists(filePath2):
|
||||||
|
result = ocr.ocr(filePath2, cls=False)
|
||||||
|
for idx in range(len(result)):
|
||||||
|
res = result[idx]
|
||||||
|
for line in res:
|
||||||
|
text += (line[1][0] + '\n')
|
||||||
|
else:
|
||||||
|
logger.error(f"{filePath2} doesn't exist,error information:{finalResult}")
|
||||||
|
return json.dumps(finalResult, ensure_ascii=False)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
finalResult["code"] = OCR_RECOGNIZE_OTHER_EXCEPTION
|
||||||
print(e)
|
finalResult["msg"] = error_codes[OCR_RECOGNIZE_OTHER_EXCEPTION]
|
||||||
|
logger.error(f"{e} ,error information:{finalResult}")
|
||||||
|
return json.dumps(finalResult, ensure_ascii=False)
|
||||||
|
|
||||||
if 0 != len(text):
|
if 0 != len(text):
|
||||||
logger.info(f"text:{text}")
|
|
||||||
extractor = IdentityCardExtractor()
|
extractor = IdentityCardExtractor()
|
||||||
tempdict = extractor.extract_textbyPaddle(text)
|
tempdict = extractor.extract_textbyPaddle(text)
|
||||||
jsonString = json.dumps(tempdict, ensure_ascii=False)
|
jsonString = json.dumps(tempdict, ensure_ascii=False)
|
||||||
end_time = time.time() # 记录结束时间
|
end_time = time.time() # 记录结束时间
|
||||||
execution_time = end_time - start_time # 计算执行时间
|
execution_time = end_time - start_time # 计算执行时间
|
||||||
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
|
logger.info(f"extractIdCardInfoByBase64Data 耗时{execution_time}秒")
|
||||||
|
return jsonString
|
||||||
else:
|
else:
|
||||||
logger.error(f"{filePath1},{filePath2} can't be recognized")
|
finalResult["code"] = NO_TEXT_RECOGNIZED
|
||||||
return jsonString
|
finalResult["msg"] = error_codes[NO_TEXT_RECOGNIZED]
|
||||||
|
logger.error(f"{filePath1},{filePath2} can't be recognized,error information:{finalResult}")
|
||||||
|
return json.dumps(finalResult, ensure_ascii=False)
|
||||||
|
|
||||||
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
||||||
|
finalResult = {
|
||||||
|
"code": BASE64_DATA_INCOMPLETE,
|
||||||
|
"msg": error_codes[BASE64_DATA_INCOMPLETE],
|
||||||
|
}
|
||||||
logger.info(f"extractIdCardInfoByBase64Data")
|
logger.info(f"extractIdCardInfoByBase64Data")
|
||||||
start_time = time.time() # 记录结束时间
|
start_time = time.time() # 记录结束时间
|
||||||
jsonString = ""
|
jsonString = ""
|
||||||
try:
|
try:
|
||||||
if 0!=len(base64data1):
|
if 0 != len(base64data1):
|
||||||
logger.info(f"not base64data1.empty()")
|
|
||||||
image_data1 = base64.b64decode(base64data1)
|
image_data1 = base64.b64decode(base64data1)
|
||||||
with open("file1.png", "wb") as file:
|
with open("file1.png", "wb") as file:
|
||||||
file.write(image_data1)
|
file.write(image_data1)
|
||||||
|
|
||||||
if 0!=len(base64Data2):
|
if 0 != len(base64Data2):
|
||||||
logger.info(f"not base64Data2.empty()")
|
|
||||||
image_data2 = base64.b64decode(base64Data2)
|
image_data2 = base64.b64decode(base64Data2)
|
||||||
with open("file2.png", "wb") as file:
|
with open("file2.png", "wb") as file:
|
||||||
file.write(image_data2)
|
file.write(image_data2)
|
||||||
|
|
||||||
if os.path.exists("file1.png") and os.path.exists("file2.png"):
|
if os.path.exists("file1.png") and os.path.exists("file2.png"):
|
||||||
logger.info(f"file1.png and file2.png exist")
|
|
||||||
jsonString = extractIdCardInfoByPath("file1.png","file2.png")
|
jsonString = extractIdCardInfoByPath("file1.png","file2.png")
|
||||||
os.remove("file1.png")
|
os.remove("file1.png")
|
||||||
os.remove("file2.png")
|
os.remove("file2.png")
|
||||||
elif os.path.exists("file1.png"):
|
elif os.path.exists("file1.png"):
|
||||||
logger.info(f"file1.png exist")
|
|
||||||
jsonString = extractIdCardInfoByPath("file1.png","")
|
jsonString = extractIdCardInfoByPath("file1.png","")
|
||||||
os.remove("file1.png")
|
os.remove("file1.png")
|
||||||
elif os.path.exists("file2.png"):
|
elif os.path.exists("file2.png"):
|
||||||
logger.info(f"file2.png exist")
|
|
||||||
jsonString = extractIdCardInfoByPath("file2.png","")
|
jsonString = extractIdCardInfoByPath("file2.png","")
|
||||||
os.remove("file2.png")
|
os.remove("file2.png")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
|
logger.error(f"{e},error information:{finalResult}")
|
||||||
|
return json.dumps(finalResult, ensure_ascii=False)
|
||||||
|
|
||||||
end_time = time.time() # 记录结束时间
|
end_time = time.time() # 记录结束时间
|
||||||
execution_time = end_time - start_time # 计算执行时间
|
execution_time = end_time - start_time # 计算执行时间
|
||||||
|
|
@ -100,7 +112,11 @@ def extractIdCardInfoByBase64Data(base64data1:str, base64Data2: str)->str:
|
||||||
# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
|
# base64_image_string = base64.b64encode(image_file.read()).decode('utf-8')
|
||||||
#
|
#
|
||||||
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
|
# jsonString = extractIdCardInfoByBase64Data(base64_image_string,"")
|
||||||
# jsonString = extractIdCardInfoByBase64Data("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
|
#jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG")
|
||||||
|
#jsonString = extractIdCardInfoByPath("./images/bf4343cfb5806c77ae21c56a8c35f474.jpeg")
|
||||||
|
#print(jsonString)
|
||||||
|
|
||||||
|
# jsonString = extractIdCardInfoByPath("/Users/wangvivi/Desktop/Code/ocrtest/images/2.jpg","/Users/wangvivi/Desktop/Code/ocrtest/images/1.jpg")
|
||||||
# print(jsonString)
|
# print(jsonString)
|
||||||
# #
|
# #
|
||||||
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
|
# jsonString = extractIdCardInfoByPath("./images/han.jpg","")
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
<groupId>com.bonus.core</groupId>
|
<groupId>com.bonus.core</groupId>
|
||||||
<artifactId>OCRTool</artifactId>
|
<artifactId>OCRTool</artifactId>
|
||||||
<version>1.0-SNAPSHOT</version>
|
<version>1.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<parent>
|
<parent>
|
||||||
|
|
@ -34,6 +34,12 @@
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-web</artifactId>
|
<artifactId>spring-boot-starter-web</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.code.gson</groupId>
|
||||||
|
<artifactId>gson</artifactId>
|
||||||
|
<version>2.8.6</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|
@ -56,7 +62,7 @@
|
||||||
<!-- 和mirror配置x的Nexus的id一致 -->
|
<!-- 和mirror配置x的Nexus的id一致 -->
|
||||||
<id>maven-central</id>
|
<id>maven-central</id>
|
||||||
<name>BonusMavenRepo</name>
|
<name>BonusMavenRepo</name>
|
||||||
<url>http://192.168.0.56:8081/repository/maven-public/</url>
|
<url>http://192.168.0.56:8081/repository/maven-central/</url>
|
||||||
</snapshotRepository>
|
</snapshotRepository>
|
||||||
</distributionManagement>
|
</distributionManagement>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,39 @@
|
||||||
|
package com.bonus.core;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author wangvivi
|
||||||
|
*/
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
public class ErrorHandler {
|
||||||
|
public static final int PYTHON_INTERPRETER_NO_EXIST = 30001;
|
||||||
|
public static final int PYTHON_SCRIPT_NO_EXIST = 30002;
|
||||||
|
public static final int PYTHON_PROCESS_START_ERROR = 30003;
|
||||||
|
public static final int PYTHON_SCRIPT_EXECUTE_ERROR = 30004;
|
||||||
|
public static final int PYTHON_EXECUTE_OTHER_ERROR = 30005;
|
||||||
|
public static final int JAVA_CALL_PYTHON_UNKWON_ERROR = 30006;
|
||||||
|
|
||||||
|
public static String getErrorInfo(int errorCode) {
|
||||||
|
switch (errorCode) {
|
||||||
|
case PYTHON_INTERPRETER_NO_EXIST:
|
||||||
|
return getInternalErrorInfo(errorCode, "python解释器文件不存在");
|
||||||
|
case PYTHON_SCRIPT_NO_EXIST:
|
||||||
|
return getInternalErrorInfo(errorCode, "python脚本不存在");
|
||||||
|
case PYTHON_PROCESS_START_ERROR:
|
||||||
|
return getInternalErrorInfo(errorCode, "启动python进程报错");
|
||||||
|
case PYTHON_SCRIPT_EXECUTE_ERROR:
|
||||||
|
return getInternalErrorInfo(errorCode, "python脚本执行异常");
|
||||||
|
case PYTHON_EXECUTE_OTHER_ERROR:
|
||||||
|
return getInternalErrorInfo(errorCode, "python脚本执行其他错误");
|
||||||
|
default:
|
||||||
|
return getInternalErrorInfo(JAVA_CALL_PYTHON_UNKWON_ERROR, "未知错误");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getInternalErrorInfo(int errorCode, String errorMsg){
|
||||||
|
ErrorInfo errorInfo = new ErrorInfo(errorCode, errorMsg);
|
||||||
|
Gson gson = new Gson();
|
||||||
|
return gson.toJson(errorInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,32 @@
|
||||||
|
package com.bonus.core;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author wangvivi
|
||||||
|
*/
|
||||||
|
public class ErrorInfo {
|
||||||
|
private int code;
|
||||||
|
private String msg;
|
||||||
|
|
||||||
|
public ErrorInfo(int errorCode, String errorMessage) {
|
||||||
|
this.code = errorCode;
|
||||||
|
this.msg = errorMessage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMsg() {
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMsg(String errormsg) {
|
||||||
|
this.msg = errormsg;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMsg(int code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -2,24 +2,9 @@ package com.bonus.core;
|
||||||
|
|
||||||
public class IdentifyRecognitionParams {
|
public class IdentifyRecognitionParams {
|
||||||
private RecognitionType type;
|
private RecognitionType type;
|
||||||
private String recognitionData1="";
|
private String recognitionFrontData = "";
|
||||||
private String recognitionData2="";
|
private String recognitionBackData = "";
|
||||||
|
|
||||||
public String getRecognitionData1() {
|
|
||||||
return recognitionData1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRecognitionData1(String recognitionData1) {
|
|
||||||
this.recognitionData1 = recognitionData1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getRecognitionData2() {
|
|
||||||
return recognitionData2;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRecognitionData2(String recognitionData2) {
|
|
||||||
this.recognitionData2 = recognitionData2;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getType() {
|
public int getType() {
|
||||||
return type.ordinal();
|
return type.ordinal();
|
||||||
|
|
@ -29,6 +14,23 @@ public class IdentifyRecognitionParams {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getRecognitionFrontData() {
|
||||||
|
return this.recognitionFrontData;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecognitionFrontData(String recognitionFrontData) {
|
||||||
|
this.recognitionFrontData = recognitionFrontData;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecognitionBackData() {
|
||||||
|
return this.recognitionBackData;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRecognitionBackData(String recognitionBackData) {
|
||||||
|
this.recognitionBackData = recognitionBackData;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Enum for different recognition types
|
// Enum for different recognition types
|
||||||
public enum RecognitionType {
|
public enum RecognitionType {
|
||||||
//参数带入的是图片路径
|
//参数带入的是图片路径
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,7 @@ package com.bonus.core;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.*;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -35,28 +32,35 @@ public class OCRRecognition {
|
||||||
logger.info(pythonEnv);
|
logger.info(pythonEnv);
|
||||||
logger.info(scriptEnv);
|
logger.info(scriptEnv);
|
||||||
logger.info("开始打印从请求里带过来的参数:");
|
logger.info("开始打印从请求里带过来的参数:");
|
||||||
logger.info(String.valueOf(recognitionParams.getType()));
|
String typeStr = Integer.toString(recognitionParams.getType());
|
||||||
logger.info("参数1的string 是:");
|
logger.info(typeStr);
|
||||||
logger.info(recognitionParams.getRecognitionData1());
|
File file = new File(pythonEnv);
|
||||||
logger.info("参数2的string 是:");
|
if (!file.exists()) {
|
||||||
logger.info(recognitionParams.getRecognitionData2());
|
logger.info("pythonEnv不存在");
|
||||||
//*, recognitionParams.getRecognitionData1(), recognitionParams.getRecognitionData2()*/
|
return ErrorHandler.getErrorInfo(ErrorHandler.PYTHON_INTERPRETER_NO_EXIST);
|
||||||
|
}
|
||||||
|
file = new File(scriptEnv);
|
||||||
|
if (!file.exists()) {
|
||||||
|
logger.info("scriptEnv 不存在");
|
||||||
|
return ErrorHandler.getErrorInfo(ErrorHandler.PYTHON_SCRIPT_NO_EXIST);
|
||||||
|
}
|
||||||
|
|
||||||
String[] str = new String[]{pythonEnv,scriptEnv, String.valueOf(recognitionParams.getType())};
|
String[] str = new String[]{pythonEnv,scriptEnv, String.valueOf(recognitionParams.getType())};
|
||||||
Runtime runtime = Runtime.getRuntime();
|
Runtime runtime = Runtime.getRuntime();
|
||||||
if (runtime == null) {
|
if (runtime == null) {
|
||||||
System.out.println("runtime == null");
|
logger.info("runtime == null");
|
||||||
return "";
|
return ErrorHandler.getErrorInfo(ErrorHandler.PYTHON_PROCESS_START_ERROR);
|
||||||
}
|
}
|
||||||
proc = runtime.exec(str);
|
proc = runtime.exec(str);
|
||||||
logger.info("proc = runtime.exec(str)");
|
logger.info("proc = runtime.exec(str)");
|
||||||
OutputStream outputStream = proc.getOutputStream();
|
OutputStream outputStream = proc.getOutputStream();
|
||||||
if (!recognitionParams.getRecognitionData1().isEmpty()) {
|
if (!recognitionParams.getRecognitionFrontData().isEmpty()) {
|
||||||
byte[] input = recognitionParams.getRecognitionData1().getBytes(StandardCharsets.UTF_8);
|
byte[] input = recognitionParams.getRecognitionFrontData().getBytes(StandardCharsets.UTF_8);
|
||||||
passParameter(outputStream, input);
|
passParameter(outputStream, input);
|
||||||
}
|
}
|
||||||
if (!recognitionParams.getRecognitionData2().isEmpty()) {
|
if (!recognitionParams.getRecognitionBackData().isEmpty()) {
|
||||||
outputStream.write(System.lineSeparator().getBytes());
|
outputStream.write(System.lineSeparator().getBytes());
|
||||||
byte[] input = recognitionParams.getRecognitionData2().getBytes(StandardCharsets.UTF_8);
|
byte[] input = recognitionParams.getRecognitionBackData().getBytes(StandardCharsets.UTF_8);
|
||||||
passParameter(outputStream, input);
|
passParameter(outputStream, input);
|
||||||
}
|
}
|
||||||
outputStream.flush();
|
outputStream.flush();
|
||||||
|
|
@ -72,7 +76,9 @@ public class OCRRecognition {
|
||||||
proc.waitFor();
|
proc.waitFor();
|
||||||
in.close();
|
in.close();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
logger.error(e.getMessage());
|
logger.error(e.getMessage());
|
||||||
|
return ErrorHandler.getErrorInfo(ErrorHandler.PYTHON_EXECUTE_OTHER_ERROR);
|
||||||
}
|
}
|
||||||
if (!stringList.isEmpty()) {
|
if (!stringList.isEmpty()) {
|
||||||
lastElement = stringList.get(stringList.size() - 1);
|
lastElement = stringList.get(stringList.size() - 1);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue