26 lines
883 B
Python
26 lines
883 B
Python
import cv2
|
|
import pytesseract
|
|
import base64
|
|
import numpy as np
|
|
# 从图片路径进行OCR识别
|
|
def ocr_from_path(image_path):
|
|
# 读取图片
|
|
image = cv2.imread(image_path, cv2.COLOR_BGR2GRAY)
|
|
# 使用Tesseract进行OCR识别
|
|
ocr_text = pytesseract.image_to_string(image, lang='chi_sim')#chi_sim ocrd
|
|
return ocr_text
|
|
|
|
# 从Base64编码的图像进行OCR识别
|
|
def ocr_from_base64(base64_string):
|
|
# 解码Base64编码的字符串为字节数据
|
|
image_data = base64.b64decode(base64_string)
|
|
# 将字节数据转换为OpenCV图像
|
|
nparr = np.frombuffer(image_data, np.uint8)
|
|
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
|
# 使用Tesseract进行OCR识别
|
|
ocr_text = pytesseract.image_to_string(image, lang='chi_sim')
|
|
return ocr_text
|
|
|
|
|
|
string = ocr_from_path("/Users/wangvivi/Desktop/Code/ocrtest/images/id_card.JPG")
|
|
print(f"{string}") |