Jiale/FaceRegWeb5.2/FaceRegWeb/models/facedetector.py

410 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
输入:原图
输出图片中face框
"""
import cv2
import numpy as np
import onnxruntime as ort
class Box:
def __init__(self, x1, y1, x2, y2, score, label=1, label_text = 'face' ,flag=True):
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
self.score = score
self.label = label
self.label_text = label_text
self.flag = flag
def iou_of(self, other):
inter_x1 = max(self.x1, other.x1)
inter_y1 = max(self.y1, other.y1)
inter_x2 = min(self.x2, other.x2)
inter_y2 = min(self.y2, other.y2)
if inter_x1 < inter_x2 and inter_y1 < inter_y2:
inter_area = (inter_x2 - inter_x1 + 1.0) * (inter_y2 - inter_y1 + 1.0)
self_area = (self.x2 - self.x1 + 1.0) * (self.y2 - self.y1 + 1.0)
other_area = (other.x2 - other.x1 + 1.0) * (other.y2 - other.y1 + 1.0)
union_area = self_area + other_area - inter_area
return inter_area / union_area
else:
return 0
def area(self):
return (self.x2 - self.x1 + 1) * (self.y2 - self.y1 + 1)
def hard_nms(boxes, iou_threshold, topk):
if not boxes:
return []
boxes.sort(key=lambda x: x.score, reverse=True)
merged = [0] * len(boxes)
output = []
count = 0
for i in range(len(boxes)):
if merged[i]:
continue
buf = [boxes[i]]
merged[i] = 1
for j in range(i + 1, len(boxes)):
if merged[j]:
continue
iou = boxes[i].iou_of(boxes[j])
if iou > iou_threshold:
merged[j] = 1
buf.append(boxes[j])
output.append(buf[0])
count += 1
if count >= topk:
break
return output
def blending_nms(boxes, iou_threshold, topk):
if not boxes:
return []
boxes.sort(key=lambda x: x.score, reverse=True)
merged = [0] * len(boxes)
output = []
count = 0
for i in range(len(boxes)):
if merged[i]:
continue
buf = [boxes[i]]
merged[i] = 1
for j in range(i + 1, len(boxes)):
if merged[j]:
continue
iou = boxes[i].iou_of(boxes[j])
if iou > iou_threshold:
merged[j] = 1
buf.append(boxes[j])
total = sum([np.exp(box.score) for box in buf])
rects = Box(0, 0, 0, 0, 0)
for box in buf:
rate = np.exp(box.score) / total
rects.x1 += box.x1 * rate
rects.y1 += box.y1 * rate
rects.x2 += box.x2 * rate
rects.y2 += box.y2 * rate
rects.score += box.score * rate
rects.flag = True
output.append(rects)
count += 1
if count >= topk:
break
return output
def offset_nms(boxes, iou_threshold, topk):
if not boxes:
return []
boxes.sort(key=lambda x: x.score, reverse=True)
merged = [0] * len(boxes)
offset = 4096.0
for box in boxes:
box.x1 += box.label * offset
box.y1 += box.label * offset
box.x2 += box.label * offset
box.y2 += box.label * offset
output = []
count = 0
for i in range(len(boxes)):
if merged[i]:
continue
buf = [boxes[i]]
merged[i] = 1
for j in range(i + 1, len(boxes)):
if merged[j]:
continue
iou = boxes[i].iou_of(boxes[j])
if iou > iou_threshold:
merged[j] = 1
buf.append(boxes[j])
output.append(buf[0])
count += 1
if count >= topk:
break
for box in output:
box.x1 -= box.label * offset
box.y1 -= box.label * offset
box.x2 -= box.label * offset
box.y2 -= box.label * offset
return output
def draw_rectface(img, box):
x = max(0,int(box.x1))
y = max(0,int(box.y1))
w = min(img.shape[1]-x, int(box.x2-x+1))
h = min(img.shape[0]-y, int(box.y2-y+1))
cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255),3)
# return img
def cut_rectface(img, box):
x = max(0,int(box.x1))
y = max(0,int(box.y1))
w = min(img.shape[1]-x, int(box.x2-x+1))
h = min(img.shape[0]-y, int(box.y2-y+1))
return img[y:y+h,x:x+w]
def normalize_inplace(mat, mean, scale):
mat = mat.astype(np.float32)
mat -= mean
mat *= scale
return mat
def create_tensor(mat, tensor_dims, memory_info_handler, data_format):
rows, cols, channels = mat.shape
if len(tensor_dims) != 4:
raise RuntimeError("dims mismatch.")
if tensor_dims[0] != 1:
raise RuntimeError("batch != 1")
if data_format == "CHW":
target_height = tensor_dims[2]
target_width = tensor_dims[3]
target_channel = tensor_dims[1]
# target_tensor_size = target_channel * target_height * target_width
if target_channel != channels:
raise RuntimeError("channel mismatch.")
if target_height != rows or target_width != cols:
print("in create_tensor, resize mat...")
mat = cv2.resize(mat, (target_width, target_height))
mat = mat.transpose(2, 0, 1) # HWC -> CHW # 这儿存疑。
mat = np.expand_dims(mat, axis=0)
return ort.OrtValue.ortvalue_from_numpy(mat, 'cpu')
elif data_format == "HWC":
target_height = tensor_dims[1]
target_width = tensor_dims[2]
target_channel = tensor_dims[3]
target_tensor_size = target_channel * target_height * target_width
if target_channel != channels:
raise RuntimeError("channel mismatch.")
if target_height != rows or target_width != cols:
mat = cv2.resize(mat, (target_width, target_height))
return ort.OrtValue.ortvalue_from_numpy(mat, 'cpu')
class BasicOrtHandler:
def __init__(self, onnx_path, num_threads=1):
self.onnx_path = onnx_path
self.num_threads = num_threads
self.initialize_handler()
def initialize_handler(self):
# self.ort_env = ort.Env(ort.logging.ERROR)
session_options = ort.SessionOptions()
session_options.intra_op_num_threads = self.num_threads
session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
# self.ort_session = ort.InferenceSession(self.onnx_path, session_options)
# self.memory_info_handler = ort.OrtMemoryInfo("cpu", ort.OrtAllocatorType.ORT_ARENA_ALLOCATOR)
# Initialize session
self.ort_session = ort.InferenceSession(self.onnx_path, session_options)
self.memory_info_handler = ort.OrtMemoryInfo("Cpu", ort.OrtAllocatorType.ORT_ARENA_ALLOCATOR, 0, ort.OrtMemType.DEFAULT)
self.input_node_names = [self.ort_session.get_inputs()[0].name]
self.input_node_dims = self.ort_session.get_inputs()[0].shape # 获取输入张量的shape
self.input_tensor_size = np.prod(self.input_node_dims)
self.output_node_names = [out.name for out in self.ort_session.get_outputs()]
self.output_node_dims = [out.shape for out in self.ort_session.get_outputs()]
self.num_outputs = len(self.output_node_names)
def __del__(self):
del self.ort_session
class FaceBoxesV2(BasicOrtHandler):
def __init__(self, onnx_path, num_threads=1):
super().__init__(onnx_path, num_threads)
self.mean_vals = np.array([104.0, 117.0, 123.0], dtype=np.float32)
self.scale_vals = np.array([1.0, 1.0, 1.0], dtype=np.float32)
self.variance = [0.1, 0.2]
self.steps = [32, 64, 128]
self.min_sizes = [
[32, 64, 128],
[256],
[512]
]
self.max_nms = 30000
def transform(self, mat):
canvas = cv2.resize(mat, (self.input_node_dims[3], self.input_node_dims[2]))
canvas = normalize_inplace(canvas, self.mean_vals, self.scale_vals)
return create_tensor(canvas, self.input_node_dims, self.memory_info_handler, "CHW")
def detect(self, mat, score_threshold=0.35, iou_threshold=0.45, topk=300, nms_type=0):
if mat is None or mat.size == 0:
return
img_height = float(mat.shape[0])
img_width = float(mat.shape[1])
# 1. make input tensor
input_tensor = self.transform(mat)
# 2. inference scores & boxes.
output_tensors = self.ort_session.run(self.output_node_names, {self.input_node_names[0]: input_tensor})
# 3. rescale & exclude.
bbox_collection = []
bbox_collection = self.generate_bboxes(output_tensors, score_threshold, img_height, img_width)
# 4. hard|blend|offset nms with topk. return detected_boxes
return self.nms(bbox_collection, iou_threshold, topk, nms_type)
def generate_bboxes(self, output_tensors, score_threshold, img_height, img_width):
bboxes = output_tensors[0] # e.g (1,n,4)
probs = output_tensors[1] # e.g (1,n,2) after softmax
bbox_dims = self.output_node_dims[0] # (1,n,4)
bbox_num = bbox_dims[1] # n = ?
input_height = self.input_node_dims[2] # e.g 640
input_width = self.input_node_dims[3] # e.g 640
anchors = self.generate_anchors(input_height, input_width)
num_anchors = len(anchors)
if num_anchors != bbox_num:
print(f"num_anchors={num_anchors} but detected bbox_num={bbox_num}")
raise RuntimeError("mismatch num_anchors != bbox_num")
bbox_collection = []
count = 0
for i in range(num_anchors):
conf = probs[0, i, 1]
if conf < score_threshold:
continue # filter first.
# prior_cx = anchors[i].cx
# prior_cy = anchors[i].cy
# prior_s_kx = anchors[i].s_kx
# prior_s_ky = anchors[i].s_ky
prior_cx, prior_cy, prior_s_kx, prior_s_ky = anchors[i]
dx = bboxes[0, i, 0]
dy = bboxes[0, i, 1]
dw = bboxes[0, i, 2]
dh = bboxes[0, i, 3]
cx = prior_cx + dx * self.variance[0] * prior_s_kx
cy = prior_cy + dy * self.variance[0] * prior_s_ky
w = prior_s_kx * np.exp(dw * self.variance[1])
h = prior_s_ky * np.exp(dh * self.variance[1]) # norm coor (0.,1.)
box = Box(
x1=(cx - w / 2.0) * img_width,
y1=(cy - h / 2.0) * img_height,
x2=(cx + w / 2.0) * img_width,
y2=(cy + h / 2.0) * img_height,
score=conf,
label=1,
label_text="face",
flag=True
)
bbox_collection.append(box)
count += 1 # limit boxes for nms.
if count > self.max_nms:
break
return bbox_collection
def nms(self, input_boxes, iou_threshold, topk, nms_type):
if nms_type == 1:
output_boxes = blending_nms(input_boxes, iou_threshold, topk)
elif nms_type == 2:
output_boxes = offset_nms(input_boxes, iou_threshold, topk)
elif nms_type == 0:
output_boxes = hard_nms(input_boxes, iou_threshold, topk)
else:
raise NotImplementedError
return output_boxes
def generate_anchors(self, target_height, target_width):
feature_maps = []
for step in self.steps:
feature_maps.append([
int(np.ceil(target_height / step)),
int(np.ceil(target_width / step))
])
anchors = []
for k, f_map in enumerate(feature_maps):
tmp_min_sizes = self.min_sizes[k]
f_h, f_w = f_map
offset_32 = [0.0, 0.25, 0.5, 0.75]
offset_64 = [0.0, 0.5]
for i in range(f_h):
for j in range(f_w):
for min_size in tmp_min_sizes:
s_kx = min_size / target_width
s_ky = min_size / target_height
if min_size == 32:
for offset_y in offset_32:
for offset_x in offset_32:
cx = (j + offset_x) * self.steps[k] / target_width
cy = (i + offset_y) * self.steps[k] / target_height
anchors.append([cx, cy, s_kx, s_ky])
elif min_size == 64:
for offset_y in offset_64:
for offset_x in offset_64:
cx = (j + offset_x) * self.steps[k] / target_width
cy = (i + offset_y) * self.steps[k] / target_height
anchors.append([cx, cy, s_kx, s_ky])
else:
cx = (j + 0.5) * self.steps[k] / target_width
cy = (i + 0.5) * self.steps[k] / target_height
anchors.append([cx, cy, s_kx, s_ky])
return anchors
# Usage example
if __name__ == "__main__":
import sys
import os
img_path = sys.argv[1]
reta = FaceBoxesV2(r"./checkpoints/faceboxesv2-640x640.onnx",4)
img = cv2.imread(img_path)
detected_boxes = reta.detect(img)
count = 0
for box in detected_boxes:
print(f"({box.x1:.3f},{box.y1:.3f},{box.x2:.3f},{box.y2:.3f})", end=" ")
count += 1
print("total face number:",count)
for box in detected_boxes:
draw_rectface(img, box)
filename = os.path.basename(img_path)
cv2.imwrite("./" + filename, img)