85 lines
2.7 KiB
Python
85 lines
2.7 KiB
Python
|
|
from paddleocr import PaddleOCR, draw_ocr
|
|||
|
|
import time
|
|||
|
|
|
|||
|
|
from extractor.identitycard_extractor import IdentityCardExtractor
|
|||
|
|
|
|||
|
|
# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
|
|||
|
|
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
|
|||
|
|
|
|||
|
|
# stat_time = time.time()
|
|||
|
|
# print("start.....")
|
|||
|
|
# # 加载图片
|
|||
|
|
# # image = cv2.imread(image_path)
|
|||
|
|
# # print(image.shape)
|
|||
|
|
# # # 旋转图片(如果高度大于宽度)
|
|||
|
|
# # height, width = image.shape[:2]
|
|||
|
|
# # if height < width:
|
|||
|
|
# # # 顺时针旋转图片90度
|
|||
|
|
# # image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
|
|||
|
|
# ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
|
|||
|
|
# print("PaddleOCR end.....")
|
|||
|
|
# img_path = '/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG'
|
|||
|
|
# print(img_path)
|
|||
|
|
# result = ocr.ocr(img_path, cls=False)
|
|||
|
|
# print("ocr.ocr end.....")
|
|||
|
|
# text = ""
|
|||
|
|
# print(f"len(result): {len(result)}")
|
|||
|
|
# for idx in range(len(result)):
|
|||
|
|
# res = result[idx]
|
|||
|
|
# for line in res:
|
|||
|
|
# #tempText = (line[1][0])
|
|||
|
|
# # tempText = tempText.replace("\n", "")
|
|||
|
|
# # tempText = tempText.replace(" ", "")
|
|||
|
|
# text += (line[1][0] + '\n')
|
|||
|
|
# print(text)
|
|||
|
|
|
|||
|
|
#text = ""
|
|||
|
|
# for idx in range(len(result)):
|
|||
|
|
# res = result[idx]
|
|||
|
|
# for line in res:
|
|||
|
|
# print(line[0][1][0])
|
|||
|
|
#text =+ '/n'.join(line)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# if result:
|
|||
|
|
# ocr_texts = result[0][0][1][0]
|
|||
|
|
|
|||
|
|
# end_time = time.time()
|
|||
|
|
# print(end_time - stat_time)
|
|||
|
|
|
|||
|
|
# 显示结果
|
|||
|
|
# 如果本地没有simfang.ttf,可以在doc/fonts目录下下载
|
|||
|
|
# from PIL import Image
|
|||
|
|
|
|||
|
|
# result = result[0]
|
|||
|
|
# image = Image.open(img_path).convert('RGB')
|
|||
|
|
# boxes = [line[0] for line in result]
|
|||
|
|
# txts = [line[1][0] for line in result]
|
|||
|
|
# scores = [line[1][1] for line in result]
|
|||
|
|
# im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
|
|||
|
|
# im_show = Image.fromarray(im_show)
|
|||
|
|
# im_show.save('result.jpg')
|
|||
|
|
|
|||
|
|
|
|||
|
|
def ocrIdCardInfo(type:int, filePath1: str, filePath2: str)->str:
|
|||
|
|
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
|
|||
|
|
print("PaddleOCR end.....")
|
|||
|
|
img_path = '/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG'
|
|||
|
|
print(img_path)
|
|||
|
|
result = ocr.ocr(img_path, cls=False)
|
|||
|
|
print("ocr.ocr end.....")
|
|||
|
|
text = ""
|
|||
|
|
print(f"len(result): {len(result)}")
|
|||
|
|
for idx in range(len(result)):
|
|||
|
|
res = result[idx]
|
|||
|
|
for line in res:
|
|||
|
|
text += (line[1][0] + '\n')
|
|||
|
|
|
|||
|
|
# extractor = IdentityCardExtractor()
|
|||
|
|
# jsonString = extractor.extract_text(text)
|
|||
|
|
|
|||
|
|
return jsonString
|
|||
|
|
|
|||
|
|
|
|||
|
|
jsonString = ocrIdCardInfo("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG","/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG")
|
|||
|
|
print(jsonString)
|