Face_reg_app/FaceFeatureExtractorAPI/models/facedetector.py

410 lines
14 KiB
Python
Raw Normal View History

2025-12-17 13:13:26 +08:00
"""
输入原图
输出图片中face框
"""
import cv2
import numpy as np
import onnxruntime as ort
class Box:
def __init__(self, x1, y1, x2, y2, score, label=1, label_text = 'face' ,flag=True):
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
self.score = score
self.label = label
self.label_text = label_text
self.flag = flag
def iou_of(self, other):
inter_x1 = max(self.x1, other.x1)
inter_y1 = max(self.y1, other.y1)
inter_x2 = min(self.x2, other.x2)
inter_y2 = min(self.y2, other.y2)
if inter_x1 < inter_x2 and inter_y1 < inter_y2:
inter_area = (inter_x2 - inter_x1 + 1.0) * (inter_y2 - inter_y1 + 1.0)
self_area = (self.x2 - self.x1 + 1.0) * (self.y2 - self.y1 + 1.0)
other_area = (other.x2 - other.x1 + 1.0) * (other.y2 - other.y1 + 1.0)
union_area = self_area + other_area - inter_area
return inter_area / union_area
else:
return 0
def area(self):
return (self.x2 - self.x1 + 1) * (self.y2 - self.y1 + 1)
def hard_nms(boxes, iou_threshold, topk):
if not boxes:
return []
boxes.sort(key=lambda x: x.score, reverse=True)
merged = [0] * len(boxes)
output = []
count = 0
for i in range(len(boxes)):
if merged[i]:
continue
buf = [boxes[i]]
merged[i] = 1
for j in range(i + 1, len(boxes)):
if merged[j]:
continue
iou = boxes[i].iou_of(boxes[j])
if iou > iou_threshold:
merged[j] = 1
buf.append(boxes[j])
output.append(buf[0])
count += 1
if count >= topk:
break
return output
def blending_nms(boxes, iou_threshold, topk):
if not boxes:
return []
boxes.sort(key=lambda x: x.score, reverse=True)
merged = [0] * len(boxes)
output = []
count = 0
for i in range(len(boxes)):
if merged[i]:
continue
buf = [boxes[i]]
merged[i] = 1
for j in range(i + 1, len(boxes)):
if merged[j]:
continue
iou = boxes[i].iou_of(boxes[j])
if iou > iou_threshold:
merged[j] = 1
buf.append(boxes[j])
total = sum([np.exp(box.score) for box in buf])
rects = Box(0, 0, 0, 0, 0)
for box in buf:
rate = np.exp(box.score) / total
rects.x1 += box.x1 * rate
rects.y1 += box.y1 * rate
rects.x2 += box.x2 * rate
rects.y2 += box.y2 * rate
rects.score += box.score * rate
rects.flag = True
output.append(rects)
count += 1
if count >= topk:
break
return output
def offset_nms(boxes, iou_threshold, topk):
if not boxes:
return []
boxes.sort(key=lambda x: x.score, reverse=True)
merged = [0] * len(boxes)
offset = 4096.0
for box in boxes:
box.x1 += box.label * offset
box.y1 += box.label * offset
box.x2 += box.label * offset
box.y2 += box.label * offset
output = []
count = 0
for i in range(len(boxes)):
if merged[i]:
continue
buf = [boxes[i]]
merged[i] = 1
for j in range(i + 1, len(boxes)):
if merged[j]:
continue
iou = boxes[i].iou_of(boxes[j])
if iou > iou_threshold:
merged[j] = 1
buf.append(boxes[j])
output.append(buf[0])
count += 1
if count >= topk:
break
for box in output:
box.x1 -= box.label * offset
box.y1 -= box.label * offset
box.x2 -= box.label * offset
box.y2 -= box.label * offset
return output
def draw_rectface(img, box):
x = max(0,int(box.x1))
y = max(0,int(box.y1))
w = min(img.shape[1]-x, int(box.x2-x+1))
h = min(img.shape[0]-y, int(box.y2-y+1))
cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255),3)
# return img
def cut_rectface(img, box):
x = max(0,int(box.x1))
y = max(0,int(box.y1))
w = min(img.shape[1]-x, int(box.x2-x+1))
h = min(img.shape[0]-y, int(box.y2-y+1))
return img[y:y+h,x:x+w]
def normalize_inplace(mat, mean, scale):
mat = mat.astype(np.float32)
mat -= mean
mat *= scale
return mat
def create_tensor(mat, tensor_dims, memory_info_handler, data_format):
rows, cols, channels = mat.shape
if len(tensor_dims) != 4:
raise RuntimeError("dims mismatch.")
if tensor_dims[0] != 1:
raise RuntimeError("batch != 1")
if data_format == "CHW":
target_height = tensor_dims[2]
target_width = tensor_dims[3]
target_channel = tensor_dims[1]
# target_tensor_size = target_channel * target_height * target_width
if target_channel != channels:
raise RuntimeError("channel mismatch.")
if target_height != rows or target_width != cols:
print("in create_tensor, resize mat...")
mat = cv2.resize(mat, (target_width, target_height))
mat = mat.transpose(2, 0, 1) # HWC -> CHW # 这儿存疑。
mat = np.expand_dims(mat, axis=0)
return ort.OrtValue.ortvalue_from_numpy(mat, 'cpu')
elif data_format == "HWC":
target_height = tensor_dims[1]
target_width = tensor_dims[2]
target_channel = tensor_dims[3]
target_tensor_size = target_channel * target_height * target_width
if target_channel != channels:
raise RuntimeError("channel mismatch.")
if target_height != rows or target_width != cols:
mat = cv2.resize(mat, (target_width, target_height))
return ort.OrtValue.ortvalue_from_numpy(mat, 'cpu')
class BasicOrtHandler:
def __init__(self, onnx_path, num_threads=1):
self.onnx_path = onnx_path
self.num_threads = num_threads
self.initialize_handler()
def initialize_handler(self):
# self.ort_env = ort.Env(ort.logging.ERROR)
session_options = ort.SessionOptions()
session_options.intra_op_num_threads = self.num_threads
session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
# self.ort_session = ort.InferenceSession(self.onnx_path, session_options)
# self.memory_info_handler = ort.OrtMemoryInfo("cpu", ort.OrtAllocatorType.ORT_ARENA_ALLOCATOR)
# Initialize session
self.ort_session = ort.InferenceSession(self.onnx_path, session_options)
self.memory_info_handler = ort.OrtMemoryInfo("Cpu", ort.OrtAllocatorType.ORT_ARENA_ALLOCATOR, 0, ort.OrtMemType.DEFAULT)
self.input_node_names = [self.ort_session.get_inputs()[0].name]
self.input_node_dims = self.ort_session.get_inputs()[0].shape # 获取输入张量的shape
self.input_tensor_size = np.prod(self.input_node_dims)
self.output_node_names = [out.name for out in self.ort_session.get_outputs()]
self.output_node_dims = [out.shape for out in self.ort_session.get_outputs()]
self.num_outputs = len(self.output_node_names)
def __del__(self):
del self.ort_session
class FaceBoxesV2(BasicOrtHandler):
def __init__(self, onnx_path, num_threads=1):
super().__init__(onnx_path, num_threads)
self.mean_vals = np.array([104.0, 117.0, 123.0], dtype=np.float32)
self.scale_vals = np.array([1.0, 1.0, 1.0], dtype=np.float32)
self.variance = [0.1, 0.2]
self.steps = [32, 64, 128]
self.min_sizes = [
[32, 64, 128],
[256],
[512]
]
self.max_nms = 30000
def transform(self, mat):
canvas = cv2.resize(mat, (self.input_node_dims[3], self.input_node_dims[2]))
canvas = normalize_inplace(canvas, self.mean_vals, self.scale_vals)
return create_tensor(canvas, self.input_node_dims, self.memory_info_handler, "CHW")
def detect(self, mat, score_threshold=0.35, iou_threshold=0.45, topk=300, nms_type=0):
if mat is None or mat.size == 0:
return
img_height = float(mat.shape[0])
img_width = float(mat.shape[1])
# 1. make input tensor
input_tensor = self.transform(mat)
# 2. inference scores & boxes.
output_tensors = self.ort_session.run(self.output_node_names, {self.input_node_names[0]: input_tensor})
# 3. rescale & exclude.
bbox_collection = []
bbox_collection = self.generate_bboxes(output_tensors, score_threshold, img_height, img_width)
# 4. hard|blend|offset nms with topk. return detected_boxes
return self.nms(bbox_collection, iou_threshold, topk, nms_type)
def generate_bboxes(self, output_tensors, score_threshold, img_height, img_width):
bboxes = output_tensors[0] # e.g (1,n,4)
probs = output_tensors[1] # e.g (1,n,2) after softmax
bbox_dims = self.output_node_dims[0] # (1,n,4)
bbox_num = bbox_dims[1] # n = ?
input_height = self.input_node_dims[2] # e.g 640
input_width = self.input_node_dims[3] # e.g 640
anchors = self.generate_anchors(input_height, input_width)
num_anchors = len(anchors)
if num_anchors != bbox_num:
print(f"num_anchors={num_anchors} but detected bbox_num={bbox_num}")
raise RuntimeError("mismatch num_anchors != bbox_num")
bbox_collection = []
count = 0
for i in range(num_anchors):
conf = probs[0, i, 1]
if conf < score_threshold:
continue # filter first.
# prior_cx = anchors[i].cx
# prior_cy = anchors[i].cy
# prior_s_kx = anchors[i].s_kx
# prior_s_ky = anchors[i].s_ky
prior_cx, prior_cy, prior_s_kx, prior_s_ky = anchors[i]
dx = bboxes[0, i, 0]
dy = bboxes[0, i, 1]
dw = bboxes[0, i, 2]
dh = bboxes[0, i, 3]
cx = prior_cx + dx * self.variance[0] * prior_s_kx
cy = prior_cy + dy * self.variance[0] * prior_s_ky
w = prior_s_kx * np.exp(dw * self.variance[1])
h = prior_s_ky * np.exp(dh * self.variance[1]) # norm coor (0.,1.)
box = Box(
x1=(cx - w / 2.0) * img_width,
y1=(cy - h / 2.0) * img_height,
x2=(cx + w / 2.0) * img_width,
y2=(cy + h / 2.0) * img_height,
score=conf,
label=1,
label_text="face",
flag=True
)
bbox_collection.append(box)
count += 1 # limit boxes for nms.
if count > self.max_nms:
break
return bbox_collection
def nms(self, input_boxes, iou_threshold, topk, nms_type):
if nms_type == 1:
output_boxes = blending_nms(input_boxes, iou_threshold, topk)
elif nms_type == 2:
output_boxes = offset_nms(input_boxes, iou_threshold, topk)
elif nms_type == 0:
output_boxes = hard_nms(input_boxes, iou_threshold, topk)
else:
raise NotImplementedError
return output_boxes
def generate_anchors(self, target_height, target_width):
feature_maps = []
for step in self.steps:
feature_maps.append([
int(np.ceil(target_height / step)),
int(np.ceil(target_width / step))
])
anchors = []
for k, f_map in enumerate(feature_maps):
tmp_min_sizes = self.min_sizes[k]
f_h, f_w = f_map
offset_32 = [0.0, 0.25, 0.5, 0.75]
offset_64 = [0.0, 0.5]
for i in range(f_h):
for j in range(f_w):
for min_size in tmp_min_sizes:
s_kx = min_size / target_width
s_ky = min_size / target_height
if min_size == 32:
for offset_y in offset_32:
for offset_x in offset_32:
cx = (j + offset_x) * self.steps[k] / target_width
cy = (i + offset_y) * self.steps[k] / target_height
anchors.append([cx, cy, s_kx, s_ky])
elif min_size == 64:
for offset_y in offset_64:
for offset_x in offset_64:
cx = (j + offset_x) * self.steps[k] / target_width
cy = (i + offset_y) * self.steps[k] / target_height
anchors.append([cx, cy, s_kx, s_ky])
else:
cx = (j + 0.5) * self.steps[k] / target_width
cy = (i + 0.5) * self.steps[k] / target_height
anchors.append([cx, cy, s_kx, s_ky])
return anchors
# Usage example
if __name__ == "__main__":
import sys
import os
img_path = sys.argv[1]
reta = FaceBoxesV2(r"./checkpoints/faceboxesv2-640x640.onnx",4)
img = cv2.imread(img_path)
detected_boxes = reta.detect(img)
count = 0
for box in detected_boxes:
print(f"({box.x1:.3f},{box.y1:.3f},{box.x2:.3f},{box.y2:.3f})", end=" ")
count += 1
print("total face number:",count)
for box in detected_boxes:
draw_rectface(img, box)
filename = os.path.basename(img_path)
cv2.imwrite("./" + filename, img)