FaceRecog/src/face_pipeline.cpp

793 lines
30 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "face_pipeline.h"
#include <vector>
#include <string>
// 构造函数
FacePipeline::FacePipeline(const std::string &model_dir)
: m_env(ORT_LOGGING_LEVEL_WARNING, "FaceSDK"),
m_memory_info(Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault))
{
m_session_options.SetIntraOpNumThreads(4); // 使用4线程
m_session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
m_initialized = LoadModels(model_dir);
if (m_initialized)
{
InitMemoryAllocators();
LOGI("FacePipeline initialized successfully.");
}
else
{
LOGE("FacePipeline initialization failed.");
}
}
FacePipeline::~FacePipeline() {}
// (私有) 加载所有模型
bool FacePipeline::LoadModels(const std::string &model_dir)
{
auto load_session = [&](std::unique_ptr<Ort::Session> &session, const std::string &model_name)
{
std::string model_path = model_dir + "/" + model_name;
try
{
session = std::make_unique<Ort::Session>(m_env, model_path.c_str(), m_session_options);
LOGI("Loaded model: %s", model_path.c_str());
}
catch (const Ort::Exception &e)
{
LOGE("Error loading model %s: %s", model_path.c_str(), e.what());
return false;
}
return true;
};
if (!load_session(m_session_rotator, "model_gray_mobilenetv2_rotcls.onnx"))
return false;
if (!load_session(m_session_detector, "faceboxesv2-640x640.onnx"))
return false;
if (!load_session(m_session_pose_var, "fsanet-var.onnx"))
return false;
if (!load_session(m_session_pose_conv, "fsanet-conv.onnx"))
return false;
if (!load_session(m_session_landmarker1, "face_landmarker_pts5_net1.onnx"))
return false;
if (!load_session(m_session_landmarker2, "face_landmarker_pts5_net2.onnx"))
return false;
if (!load_session(m_session_recognizer, "face_recognizer.onnx"))
return false;
LOGI("All 7 models loaded successfully.");
return true;
}
// (私有) 获取模型输入/输出信息
void FacePipeline::InitMemoryAllocators()
{
// 【【【 最终修正版 v3 】】】
auto get_io_names = [&](Ort::Session *session,
std::vector<const char *> &input_names,
std::vector<const char *> &output_names,
std::vector<int64_t> &input_shape,
const char *model_name)
{
input_names.clear();
output_names.clear();
input_shape.clear();
size_t input_count = session->GetInputCount();
for (size_t i = 0; i < input_count; ++i)
{
auto input_name_ptr = session->GetInputNameAllocated(i, m_allocator);
if (input_name_ptr == nullptr || input_name_ptr.get() == nullptr)
{
LOGE("Model %s input name %zu is null!", model_name, i);
throw std::runtime_error("Failed to get model input name");
}
input_names.push_back(strdup(input_name_ptr.get()));
}
size_t output_count = session->GetOutputCount();
for (size_t i = 0; i < output_count; ++i)
{
auto output_name_ptr = session->GetOutputNameAllocated(i, m_allocator);
if (output_name_ptr == nullptr || output_name_ptr.get() == nullptr)
{
LOGE("Model %s output name %zu is null!", model_name, i);
throw std::runtime_error("Failed to get model output name");
}
output_names.push_back(strdup(output_name_ptr.get()));
}
if (input_count > 0)
{
auto input_type_info = session->GetInputTypeInfo(0);
auto tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
input_shape = tensor_info.GetShape();
if (input_shape.empty())
{
LOGE("Model %s input shape is empty!", model_name);
throw std::runtime_error("Model input shape is empty");
}
// 【【【 修正:更详细的 shape 日志 】】】
std::string shape_str = "[";
for (long long dim : input_shape)
shape_str += std::to_string(dim) + ", ";
shape_str += "]";
LOGI("Model %s input shape: %s", model_name, shape_str.c_str());
if (input_shape[0] < 1)
input_shape[0] = 1; // Set batch size to 1
}
else
{
LOGE("Model %s has no inputs!", model_name);
}
};
// 为7个模型初始化
get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names, m_rot_input_shape, "Rotator");
get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names, m_det_input_shape, "Detector");
get_io_names(m_session_pose_var.get(), m_pose_var_input_names, m_pose_var_output_names, m_pose_var_input_shape, "PoseVar");
get_io_names(m_session_pose_conv.get(), m_pose_conv_input_names, m_pose_conv_output_names, m_pose_conv_input_shape, "PoseConv");
get_io_names(m_session_landmarker1.get(), m_lm1_input_names, m_lm1_output_names, m_lm1_input_shape, "Landmarker1");
get_io_names(m_session_landmarker2.get(), m_lm2_input_names, m_lm2_output_names, m_lm2_input_shape, "Landmarker2");
get_io_names(m_session_recognizer.get(), m_rec_input_names, m_rec_output_names, m_rec_input_shape, "Recognizer");
// 检查 Detector 形状
if (m_det_input_shape.size() < 4)
{
LOGE("Detector input shape has < 4 dimensions! Cannot generate anchors.");
throw std::runtime_error("Detector input shape invalid");
}
// 【【【 修正:检查 -1 维度 】】】
if (m_det_input_shape[2] < 0 || m_det_input_shape[3] < 0)
{
LOGE("Detector input shape is dynamic (H/W is -1). This is not supported by the Python logic.");
// 我们从 Python 源码知道它是 640x640
LOGI("Forcing detector H/W to 640x640.");
m_det_input_shape[2] = 640;
m_det_input_shape[3] = 640;
}
generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]);
// 调整Blob缓冲区大小
size_t max_blob_size = 0;
// 【【【 修正:安全的 update_max 逻辑 】】】
auto update_max = [&](const std::vector<int64_t> &shape, const char *model_name)
{
if (shape.size() <= 1)
{
return; // 忽略 (e.g., [1]) 或空 shape
}
size_t s = 1;
// 从 C (dim 1) 开始循环
for (size_t i = 1; i < shape.size(); ++i)
{
if (shape[i] < 0)
{
// 如果是动态维度 (e.g., -1),我们不能用它来计算 max_blob_size
LOGE("Model %s has dynamic dimension at index %zu. Skipping for max_blob_size calculation.", model_name, i);
return; // 跳过这个模型
}
s *= static_cast<size_t>(shape[i]);
}
if (s > max_blob_size)
{
max_blob_size = s;
}
};
update_max(m_rot_input_shape, "Rotator");
update_max(m_det_input_shape, "Detector");
update_max(m_pose_var_input_shape, "PoseVar");
update_max(m_lm1_input_shape, "Landmarker1");
update_max(m_rec_input_shape, "Recognizer");
// (我们不调用 lm2因为它不使用公共 blob)
if (max_blob_size == 0)
{
LOGE("Max blob size is 0, something went wrong with model shape detection!");
throw std::runtime_error("Max blob size is 0");
}
LOGI("Calculated max blob size: %zu", max_blob_size);
m_blob_buffer.resize(max_blob_size);
LOGI("m_blob_buffer resized successfully.");
}
// --- 图像预处理辅助函数 ---
void FacePipeline::image_to_blob(const cv::Mat &img, std::vector<float> &blob, const float *mean, const float *std)
{
int channels = img.channels();
int height = img.rows;
int width = img.cols;
for (int c = 0; c < channels; c++)
{
for (int h = 0; h < height; h++)
{
for (int w = 0; w < width; w++)
{
float val;
if (channels == 3)
{
val = static_cast<float>(img.at<cv::Vec3b>(h, w)[c]);
}
else
{
val = static_cast<float>(img.at<uchar>(h, w));
}
blob[c * width * height + h * width + w] = (val - mean[c]) * std[c];
}
}
}
}
Ort::Value FacePipeline::create_tensor(const std::vector<float> &blob_data, const std::vector<int64_t> &input_shape)
{
return Ort::Value::CreateTensor<float>(m_memory_info,
const_cast<float *>(blob_data.data()),
blob_data.size(),
input_shape.data(),
input_shape.size());
}
// --- 核心管线实现 ---
bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature)
{
if (!m_initialized)
{
LOGE("Extract failed: Pipeline is not initialized.");
return false;
}
if (image.empty())
{
LOGE("Extract failed: Input image is empty.");
return false;
}
// --- 1. 旋转检测 ---
int rot_angle_code = RunRotation(image);
cv::Mat upright_image;
if (rot_angle_code >= 0)
{
cv::rotate(image, upright_image, rot_angle_code);
}
else
{
upright_image = image;
}
// --- 2. 人脸检测 ---
std::vector<FaceBox> boxes;
if (!RunDetection(upright_image, boxes))
{
LOGI("Extract failed: No face detected.");
return false;
}
// (Python 使用 topk=2, NMS 后 boxes[0] 即是最佳)
FaceBox best_box = boxes[0];
// 裁剪人脸 (用于姿态和关键点)
// crop_face, (assess_quality)
// Python 的 crop_face 实现了带 padding 的裁剪
cv::Rect face_rect_raw(best_box.x1, best_box.y1, best_box.x2 - best_box.x1, best_box.y2 - best_box.y1);
int pad_top = std::max(0, -face_rect_raw.y);
int pad_bottom = std::max(0, (face_rect_raw.y + face_rect_raw.height) - upright_image.rows);
int pad_left = std::max(0, -face_rect_raw.x);
int pad_right = std::max(0, (face_rect_raw.x + face_rect_raw.width) - upright_image.cols);
cv::Mat face_crop_padded;
cv::copyMakeBorder(upright_image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
cv::Rect face_rect_padded(face_rect_raw.x + pad_left, face_rect_raw.y + pad_top, face_rect_raw.width, face_rect_raw.height);
cv::Mat face_crop = face_crop_padded(face_rect_padded);
// --- 5. 人脸对齐 (在姿态检测前,因为姿态检测需要对齐的脸) ---
// (assess_quality) 调用 self.pose_checker.check(aligned_face)
// QualityOfPose.check()
// Landmark5er.inference() -> crop_face -> resize(112, 112)
// FaceAlign.align() -> 256x256
//
// **逻辑冲突**:
// face_feature_extractor.py L345 (assess_quality) 调用 pose_checker.check(aligned_face)
// 但 L336 (align_face) 依赖 landmarks
// 但 L330 (extract_landmarks) 依赖 boxes
//
// **修正**: Python 源码 L306 `QualityOfPose` 构造函数 -> L416 `check` -> L389 `detect_angle` -> L370 `transform`
// QualityOfPose.transform() 接收的是 *未对齐* 的脸部裁剪 (L379 canvas[ny1:ny1 + h, nx1:nx1 + w] = mat)
// **我的 C++ 逻辑错了**。 姿态检测不需要对齐的脸,它需要 *原始裁剪*。
// --- 3. 姿态估计 (质量过滤) ---
FacePose pose;
if (!RunPose(face_crop, pose))
{
LOGI("Extract failed: Pose estimation failed.");
return false;
}
if (std::abs(pose.yaw) > m_pose_threshold || std::abs(pose.pitch) > m_pose_threshold)
{
LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold (%.1f)", pose.yaw, pose.pitch, m_pose_threshold);
return false;
}
// --- 4. 关键点检测 ---
FaceLandmark landmark;
if (!RunLandmark(upright_image, best_box, landmark))
{
LOGI("Extract failed: Landmark detection failed.");
return false;
}
// --- 5. 人脸对齐 ---
cv::Mat aligned_face = RunAlignment(upright_image, landmark);
// --- 6. 特征提取 ---
if (!RunRecognition(aligned_face, feature))
{
LOGI("Extract failed: Feature recognition failed.");
return false;
}
// --- 7. 归一化 (在 RunRecognition 内部完成) ---
LOGI("Extract success.");
return true;
}
// --- 步骤 1: 旋转检测 (来自 face_feature_extractor.py) ---
void FacePipeline::preprocess_rotation(const cv::Mat &image, std::vector<float> &blob_data)
{
cv::Mat gray_img, resized, cropped, gray_3d;
cv::cvtColor(image, gray_img, cv::COLOR_BGR2GRAY);
cv::resize(gray_img, resized, cv::Size(256, 256), 0, 0, cv::INTER_LINEAR);
int start = (256 - 224) / 2;
cv::Rect crop_rect(start, start, 224, 224);
cropped = resized(crop_rect);
cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR);
// 归一化: / 255.0 (mean=[0,0,0], std=[1,1,1])
const float mean[3] = {0.0f, 0.0f, 0.0f};
const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; // 乘以 1/255 等于除以 255
image_to_blob(gray_3d, blob_data, mean, std);
}
int FacePipeline::RunRotation(const cv::Mat &image)
{
preprocess_rotation(image, m_blob_buffer);
auto input_tensor = create_tensor(m_blob_buffer, m_rot_input_shape);
auto output_tensors = m_session_rotator->Run(Ort::RunOptions{nullptr},
m_rot_input_names.data(), &input_tensor, 1,
m_rot_output_names.data(), 1);
float *output_data = output_tensors[0].GetTensorMutableData<float>();
int max_index = std::distance(output_data, std::max_element(output_data, output_data + 4));
// (correct_image_rotation)
if (max_index == 1)
return cv::ROTATE_90_CLOCKWISE;
if (max_index == 2)
return cv::ROTATE_180;
if (max_index == 3)
return cv::ROTATE_90_COUNTERCLOCKWISE;
return -1;
}
// --- 步骤 2: 人脸检测 (来自 facedetector.py) ---
void FacePipeline::preprocess_detection(const cv::Mat &img, std::vector<float> &blob_data)
{
cv::Mat resized;
cv::resize(img, resized, cv::Size(m_det_input_shape[3], m_det_input_shape[2])); // 640x640
// 归一化: (img - [104, 117, 123]) * 1.0
const float mean[3] = {104.0f, 117.0f, 123.0f}; // BGR
const float std[3] = {1.0f, 1.0f, 1.0f};
image_to_blob(resized, blob_data, mean, std);
}
bool FacePipeline::RunDetection(const cv::Mat &image, std::vector<FaceBox> &boxes)
{
float img_height = (float)image.rows;
float img_width = (float)image.cols;
preprocess_detection(image, m_blob_buffer);
auto input_tensor = create_tensor(m_blob_buffer, m_det_input_shape);
auto output_tensors = m_session_detector->Run(Ort::RunOptions{nullptr},
m_det_input_names.data(), &input_tensor, 1,
m_det_output_names.data(), 2); // 2 outputs!
const float *bboxes_data = output_tensors[0].GetTensorData<float>(); // [1, N, 4]
const float *probs_data = output_tensors[1].GetTensorData<float>(); // [1, N, 2]
long num_anchors = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1];
if (num_anchors != m_anchors.size())
{
LOGE("Anchor size mismatch! Expected %zu, Got %ld", m_anchors.size(), num_anchors);
return false;
}
std::vector<FaceBox> bbox_collection;
const float variance[2] = {0.1f, 0.2f}; //
for (long i = 0; i < num_anchors; ++i)
{
float conf = probs_data[i * 2 + 1]; // (probs[0, i, 1])
if (conf < m_det_threshold)
continue;
const Anchor &anchor = m_anchors[i];
float dx = bboxes_data[i * 4 + 0];
float dy = bboxes_data[i * 4 + 1];
float dw = bboxes_data[i * 4 + 2];
float dh = bboxes_data[i * 4 + 3];
float cx = anchor.cx + dx * variance[0] * anchor.s_kx; //
float cy = anchor.cy + dy * variance[0] * anchor.s_ky; //
float w = anchor.s_kx * std::exp(dw * variance[1]); //
float h = anchor.s_ky * std::exp(dh * variance[1]); //
bbox_collection.push_back({(cx - w / 2.0f) * img_width,
(cy - h / 2.0f) * img_height,
(cx + w / 2.0f) * img_width,
(cy + h / 2.0f) * img_height,
conf});
}
boxes = hard_nms(bbox_collection, m_det_iou_threshold, m_det_topk); // (nms_type=0)
return !boxes.empty();
}
void FacePipeline::generate_anchors_faceboxes(int target_height, int target_width)
{
// (generate_anchors)
m_anchors.clear();
std::vector<int> steps = {32, 64, 128};
std::vector<std::vector<int>> min_sizes = {{32, 64, 128}, {256}, {512}};
std::vector<std::vector<int>> feature_maps;
for (int step : steps)
{
feature_maps.push_back({(int)std::ceil((float)target_height / step), (int)std::ceil((float)target_width / step)});
}
std::vector<float> offset_32 = {0.0f, 0.25f, 0.5f, 0.75f};
std::vector<float> offset_64 = {0.0f, 0.5f};
for (int k = 0; k < feature_maps.size(); ++k)
{
auto f_map = feature_maps[k];
auto tmp_min_sizes = min_sizes[k];
int f_h = f_map[0];
int f_w = f_map[1];
for (int i = 0; i < f_h; ++i)
{
for (int j = 0; j < f_w; ++j)
{
for (int min_size : tmp_min_sizes)
{
float s_kx = (float)min_size / target_width;
float s_ky = (float)min_size / target_height;
if (min_size == 32)
{
for (float offset_y : offset_32)
for (float offset_x : offset_32)
m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky});
}
else if (min_size == 64)
{
for (float offset_y : offset_64)
for (float offset_x : offset_64)
m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky});
}
else
{
m_anchors.push_back({(j + 0.5f) * steps[k] / target_width, (i + 0.5f) * steps[k] / target_height, s_kx, s_ky});
}
}
}
}
}
}
// --- 步骤 3: 姿态估计 (来自 imgchecker.py) ---
void FacePipeline::preprocess_pose(const cv::Mat &img, std::vector<float> &blob_data)
{
float pad = 0.3f; //
int h = img.rows;
int w = img.cols;
int nh = (int)(h + pad * h);
int nw = (int)(w + pad * w);
int nx1 = std::max(0, (nw - w) / 2);
int ny1 = std::max(0, (nh - h) / 2);
cv::Mat canvas = cv::Mat::zeros(nh, nw, CV_8UC3);
img.copyTo(canvas(cv::Rect(nx1, ny1, w, h)));
cv::Mat resized;
cv::resize(canvas, resized, cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2])); // 64x64
// 归一化: (img - 127.5) / 127.5
const float mean[3] = {127.5f, 127.5f, 127.5f};
const float std[3] = {1.0f / 127.5f, 1.0f / 127.5f, 1.0f / 127.5f};
image_to_blob(resized, blob_data, mean, std);
}
bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose)
{
preprocess_pose(face_crop, m_blob_buffer);
// 运行 VAR
auto input_tensor_var = create_tensor(m_blob_buffer, m_pose_var_input_shape);
auto output_var = m_session_pose_var->Run(Ort::RunOptions{nullptr},
m_pose_var_input_names.data(), &input_tensor_var, 1,
m_pose_var_output_names.data(), 1);
// 运行 CONV (使用相同的 blob)
auto input_tensor_conv = create_tensor(m_blob_buffer, m_pose_conv_input_shape);
auto output_conv = m_session_pose_conv->Run(Ort::RunOptions{nullptr},
m_pose_conv_input_names.data(), &input_tensor_conv, 1,
m_pose_conv_output_names.data(), 1);
const float *data_var = output_var[0].GetTensorData<float>();
const float *data_conv = output_conv[0].GetTensorData<float>();
// 结合 (平均)
pose.yaw = (data_var[0] + data_conv[0]) / 2.0f;
pose.pitch = (data_var[1] + data_conv[1]) / 2.0f;
pose.roll = (data_var[2] + data_conv[2]) / 2.0f;
return true;
}
// --- 步骤 4: 关键点检测 (来自 facelandmarks5er.py) ---
void FacePipeline::preprocess_landmark_net1(const cv::Mat &img, std::vector<float> &blob_data)
{
cv::Mat resized, gray_img;
cv::resize(img, resized, cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2])); // 112x112
cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY); //
// 归一化: 无 (0-255)
const float mean[1] = {0.0f};
const float std[1] = {1.0f};
image_to_blob(gray_img, blob_data, mean, std);
}
// C++ 转译 facelandmarks5er.py::shape_index_process
std::vector<float> FacePipeline::shape_index_process(const Ort::Value &feat_val, const Ort::Value &pos_val)
{
auto feat_shape = feat_val.GetTensorTypeAndShapeInfo().GetShape();
auto pos_shape = pos_val.GetTensorTypeAndShapeInfo().GetShape();
const float *feat_data = feat_val.GetTensorData<float>();
const float *pos_data = pos_val.GetTensorData<float>();
long feat_n = feat_shape[0]; // 1
long feat_c = feat_shape[1];
long feat_h = feat_shape[2];
long feat_w = feat_shape[3];
long pos_n = pos_shape[0]; // 1
long landmark_x2 = pos_shape[1]; // 10
int landmark_num = landmark_x2 / 2; // 5
float m_origin[] = {112.0f, 112.0f};
float m_origin_patch[] = {15.0f, 15.0f};
int x_patch_h = (int)(m_origin_patch[0] * feat_h / m_origin[0] + 0.5f);
int x_patch_w = (int)(m_origin_patch[1] * feat_w / m_origin[1] + 0.5f);
int feat_patch_h = x_patch_h;
int feat_patch_w = x_patch_w;
float r_h = (feat_patch_h - 1) / 2.0f;
float r_w = (feat_patch_w - 1) / 2.0f;
std::vector<long> out_shape = {feat_n, feat_c, x_patch_h, (long)landmark_num, x_patch_w};
std::vector<float> buff(feat_n * feat_c * x_patch_h * landmark_num * x_patch_w, 0.0f);
for (int i = 0; i < landmark_num; ++i)
{
for (int n = 0; n < feat_n; ++n)
{
float y_pos = pos_data[n * landmark_x2 + 2 * i + 1];
float x_pos = pos_data[n * landmark_x2 + 2 * i];
int y = (int)(y_pos * (feat_h - 1) - r_h + 0.5f);
int x = (int)(x_pos * (feat_w - 1) - r_w + 0.5f);
for (int c = 0; c < feat_c; ++c)
{
for (int ph = 0; ph < feat_patch_h; ++ph)
{
for (int pw = 0; pw < feat_patch_w; ++pw)
{
int y_p = y + ph;
int x_p = x + pw;
long out_idx = n * (feat_c * x_patch_h * landmark_num * x_patch_w) +
c * (x_patch_h * landmark_num * x_patch_w) +
ph * (landmark_num * x_patch_w) +
i * (x_patch_w) +
pw;
if (y_p < 0 || y_p >= feat_h || x_p < 0 || x_p >= feat_w)
{
buff[out_idx] = 0.0f;
}
else
{
long feat_idx = n * (feat_c * feat_h * feat_w) +
c * (feat_h * feat_w) +
y_p * (feat_w) +
x_p;
buff[out_idx] = feat_data[feat_idx];
}
}
}
}
}
}
return buff;
}
bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box, FaceLandmark &landmark)
{
// 1. 裁剪人脸
cv::Rect face_rect_raw(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1);
int pad_top = std::max(0, -face_rect_raw.y);
int pad_bottom = std::max(0, (face_rect_raw.y + face_rect_raw.height) - image.rows);
int pad_left = std::max(0, -face_rect_raw.x);
int pad_right = std::max(0, (face_rect_raw.x + face_rect_raw.width) - image.cols);
cv::Mat face_crop_padded;
cv::copyMakeBorder(image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
cv::Rect face_rect_padded(face_rect_raw.x + pad_left, face_rect_raw.y + pad_top, face_rect_raw.width, face_rect_raw.height);
cv::Mat face_crop = face_crop_padded(face_rect_padded);
// 2. 预处理 Net1
preprocess_landmark_net1(face_crop, m_blob_buffer);
auto input_tensor_net1 = create_tensor(m_blob_buffer, m_lm1_input_shape);
// 3. 运行 Net1
auto output_net1 = m_session_landmarker1->Run(Ort::RunOptions{nullptr},
m_lm1_input_names.data(), &input_tensor_net1, 1,
m_lm1_output_names.data(), 2); // 2 outputs
// 4. Shape Index Process
std::vector<float> shape_index_blob = shape_index_process(output_net1[0], output_net1[1]);
// 5. 准备 Net2 输入
auto input_tensor_net2 = Ort::Value::CreateTensor<float>(m_memory_info,
shape_index_blob.data(),
shape_index_blob.size(),
m_lm2_input_shape.data(),
m_lm2_input_shape.size());
// 6. 运行 Net2
auto output_net2 = m_session_landmarker2->Run(Ort::RunOptions{nullptr},
m_lm2_input_names.data(), &input_tensor_net2, 1,
m_lm2_output_names.data(), 1);
// 7. 后处理
const float *data_net1_pos = output_net1[1].GetTensorData<float>();
const float *data_net2 = output_net2[0].GetTensorData<float>();
auto shape_net1_pos = output_net1[1].GetTensorTypeAndShapeInfo().GetShape(); // [1, 10]
int landmark_x2 = shape_net1_pos[1];
float scale_x = (box.x2 - box.x1) / 112.0f;
float scale_y = (box.y2 - box.y1) / 112.0f;
for (int i = 0; i < 5; ++i)
{
float x_norm = (data_net2[i * 2 + 0] + data_net1_pos[i * 2 + 0]) * 112.0f;
float y_norm = (data_net2[i * 2 + 1] + data_net1_pos[i * 2 + 1]) * 112.0f;
float x = box.x1 + x_norm * scale_x;
float y = box.y1 + y_norm * scale_y;
x = std::max(0.01f, std::min(x, (float)image.cols - 0.01f));
y = std::max(0.01f, std::min(y, (float)image.rows - 0.01f));
landmark.points[i] = cv::Point2f(x, y);
}
return true;
}
// --- 步骤 5: 人脸对齐 (来自 facealign.py) ---
cv::Mat FacePipeline::RunAlignment(const cv::Mat &image, const FaceLandmark &landmark)
{
// (align)
std::vector<cv::Point2f> src_points;
std::vector<cv::Point2f> dst_points;
for (int i = 0; i < 5; ++i)
{
src_points.push_back(landmark.points[i]);
dst_points.push_back(cv::Point2f(m_landmark_template.at<float>(i, 0),
m_landmark_template.at<float>(i, 1)));
}
// (transformation_maker) -> estimateAffinePartial2D
cv::Mat transform_matrix = cv::estimateAffinePartial2D(src_points, dst_points);
cv::Mat aligned_face;
// (spatial_transform) -> warpAffine
// (crop_width, crop_height = 256, 256)
cv::warpAffine(image, aligned_face, transform_matrix, m_align_output_size, cv::INTER_LINEAR);
return aligned_face;
}
// --- 步骤 6: 特征提取 (来自 facerecoger.py) ---
void FacePipeline::preprocess_recognition(const cv::Mat &img, std::vector<float> &blob_data)
{
cv::Mat resized, rgb_img;
const cv::Size target_size(248, 248);
// (resize to 248, 248)
cv::resize(img, resized, target_size);
// (BGR -> RGB)
cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB);
// 归一化: 无 (0-255)
const float mean[3] = {0.0f, 0.0f, 0.0f};
const float std[3] = {1.0f, 1.0f, 1.0f};
image_to_blob(rgb_img, blob_data, mean, std);
}
void FacePipeline::normalize_sqrt_l2(std::vector<float> &v)
{
// (temp_result = np.sqrt(pred_result[0]))
double norm = 0.0;
for (float &val : v)
{
val = std::sqrt(std::max(0.0f, val)); // 取 sqrt
norm += val * val;
}
// (norm = temp_result / np.linalg.norm(...))
if (norm > 1e-6)
{
norm = std::sqrt(norm);
for (float &val : v)
{
val = static_cast<float>(val / norm);
}
}
}
bool FacePipeline::RunRecognition(const cv::Mat &aligned_face, std::vector<float> &feature)
{
// 【【【 最终修正 v5 】】】
// 1. 预处理 (这部分是正确的,它生成了 248x248 的 blob)
preprocess_recognition(aligned_face, m_blob_buffer);
// 2. (BUG 在这里) 我们不能使用 m_rec_input_shape (它是 [-1, -1, -1, -1])
// 我们必须硬编码 Python 源码 (facerecoger.py) 中使用的 shape。
const std::vector<int64_t> hardcoded_shape = {1, 3, 248, 248};
// 3. (修正) 使用 hardcoded_shape 创建 Tensor
auto input_tensor = create_tensor(m_blob_buffer, hardcoded_shape);
// 4. 运行
auto output_tensors = m_session_recognizer->Run(Ort::RunOptions{nullptr},
m_rec_input_names.data(), &input_tensor, 1,
m_rec_output_names.data(), 1);
long feature_dim = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1];
const float *output_data = output_tensors[0].GetTensorData<float>();
feature.resize(feature_dim);
memcpy(feature.data(), output_data, feature_dim * sizeof(float));
// 5. 后处理 (SQRT-L2 Norm)
normalize_sqrt_l2(feature);
return true;
}