From 8ba6a046ffdec2cb8c3a20568574c16cee0e9016 Mon Sep 17 00:00:00 2001 From: guanyuankai Date: Fri, 31 Oct 2025 13:56:31 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BA=BA=E8=84=B8=E8=AF=86?= =?UTF-8?q?=E5=88=AB=E5=AE=89=E5=8D=93SDK=E5=BC=80=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/face_pipeline.cpp | 540 ++++++++++++++++++++++++++++-------------- 1 file changed, 357 insertions(+), 183 deletions(-) diff --git a/src/face_pipeline.cpp b/src/face_pipeline.cpp index 34b9995..83db7e8 100644 --- a/src/face_pipeline.cpp +++ b/src/face_pipeline.cpp @@ -3,7 +3,7 @@ #include // 构造函数 -FacePipeline::FacePipeline(const std::string& model_dir) +FacePipeline::FacePipeline(const std::string &model_dir) : m_env(ORT_LOGGING_LEVEL_WARNING, "FaceSDK"), m_memory_info(Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault)) { @@ -11,10 +11,13 @@ FacePipeline::FacePipeline(const std::string& model_dir) m_session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); m_initialized = LoadModels(model_dir); - if (m_initialized) { + if (m_initialized) + { InitMemoryAllocators(); LOGI("FacePipeline initialized successfully."); - } else { + } + else + { LOGE("FacePipeline initialization failed."); } } @@ -22,96 +25,203 @@ FacePipeline::FacePipeline(const std::string& model_dir) FacePipeline::~FacePipeline() {} // (私有) 加载所有模型 -bool FacePipeline::LoadModels(const std::string& model_dir) { - auto load_session = [&](std::unique_ptr& session, const std::string& model_name) { +bool FacePipeline::LoadModels(const std::string &model_dir) +{ + auto load_session = [&](std::unique_ptr &session, const std::string &model_name) + { std::string model_path = model_dir + "/" + model_name; - try { + try + { session = std::make_unique(m_env, model_path.c_str(), m_session_options); LOGI("Loaded model: %s", model_path.c_str()); - } catch (const Ort::Exception& e) { + } + catch (const Ort::Exception &e) + { LOGE("Error loading model %s: %s", model_path.c_str(), e.what()); return false; } return true; }; - if (!load_session(m_session_rotator, "model_gray_mobilenetv2_rotcls.onnx")) return false; - if (!load_session(m_session_detector, "faceboxesv2-640x640.onnx")) return false; - if (!load_session(m_session_pose_var, "fsanet-var.onnx")) return false; - if (!load_session(m_session_pose_conv, "fsanet-conv.onnx")) return false; - if (!load_session(m_session_landmarker1, "face_landmarker_pts5_net1.onnx")) return false; - if (!load_session(m_session_landmarker2, "face_landmarker_pts5_net2.onnx")) return false; - if (!load_session(m_session_recognizer, "face_recognizer.onnx")) return false; + if (!load_session(m_session_rotator, "model_gray_mobilenetv2_rotcls.onnx")) + return false; + if (!load_session(m_session_detector, "faceboxesv2-640x640.onnx")) + return false; + if (!load_session(m_session_pose_var, "fsanet-var.onnx")) + return false; + if (!load_session(m_session_pose_conv, "fsanet-conv.onnx")) + return false; + if (!load_session(m_session_landmarker1, "face_landmarker_pts5_net1.onnx")) + return false; + if (!load_session(m_session_landmarker2, "face_landmarker_pts5_net2.onnx")) + return false; + if (!load_session(m_session_recognizer, "face_recognizer.onnx")) + return false; LOGI("All 7 models loaded successfully."); return true; } // (私有) 获取模型输入/输出信息 -void FacePipeline::InitMemoryAllocators() { - auto get_io_names = [&](Ort::Session* session, - std::vector& input_names, - std::vector& output_names, - std::vector& input_shape) +void FacePipeline::InitMemoryAllocators() +{ + // 【【【 最终修正版 v3 】】】 + auto get_io_names = [&](Ort::Session *session, + std::vector &input_names, + std::vector &output_names, + std::vector &input_shape, + const char *model_name) { input_names.clear(); output_names.clear(); input_shape.clear(); - for (size_t i = 0; i < session->GetInputCount(); ++i) { + size_t input_count = session->GetInputCount(); + for (size_t i = 0; i < input_count; ++i) + { auto input_name_ptr = session->GetInputNameAllocated(i, m_allocator); + if (input_name_ptr == nullptr || input_name_ptr.get() == nullptr) + { + LOGE("Model %s input name %zu is null!", model_name, i); + throw std::runtime_error("Failed to get model input name"); + } input_names.push_back(strdup(input_name_ptr.get())); } - for (size_t i = 0; i < session->GetOutputCount(); ++i) { + size_t output_count = session->GetOutputCount(); + for (size_t i = 0; i < output_count; ++i) + { auto output_name_ptr = session->GetOutputNameAllocated(i, m_allocator); + if (output_name_ptr == nullptr || output_name_ptr.get() == nullptr) + { + LOGE("Model %s output name %zu is null!", model_name, i); + throw std::runtime_error("Failed to get model output name"); + } output_names.push_back(strdup(output_name_ptr.get())); } - auto input_type_info = session->GetInputTypeInfo(0); - auto tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); - input_shape = tensor_info.GetShape(); - if (input_shape[0] < 1) input_shape[0] = 1; + if (input_count > 0) + { + auto input_type_info = session->GetInputTypeInfo(0); + auto tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); + input_shape = tensor_info.GetShape(); + + if (input_shape.empty()) + { + LOGE("Model %s input shape is empty!", model_name); + throw std::runtime_error("Model input shape is empty"); + } + + // 【【【 修正:更详细的 shape 日志 】】】 + std::string shape_str = "["; + for (long long dim : input_shape) + shape_str += std::to_string(dim) + ", "; + shape_str += "]"; + LOGI("Model %s input shape: %s", model_name, shape_str.c_str()); + + if (input_shape[0] < 1) + input_shape[0] = 1; // Set batch size to 1 + } + else + { + LOGE("Model %s has no inputs!", model_name); + } }; - get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names, m_rot_input_shape); - get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names, m_det_input_shape); - get_io_names(m_session_pose_var.get(), m_pose_var_input_names, m_pose_var_output_names, m_pose_var_input_shape); - get_io_names(m_session_pose_conv.get(), m_pose_conv_input_names, m_pose_conv_output_names, m_pose_conv_input_shape); - get_io_names(m_session_landmarker1.get(), m_lm1_input_names, m_lm1_output_names, m_lm1_input_shape); - get_io_names(m_session_landmarker2.get(), m_lm2_input_names, m_lm2_output_names, m_lm2_input_shape); - get_io_names(m_session_recognizer.get(), m_rec_input_names, m_rec_output_names, m_rec_input_shape); - - // 生成 FaceBoxesV2 的锚点 - generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]); // H, W (640, 640) - - // 调整Blob缓冲区大小 (查找最大所需size) + // 为7个模型初始化 + get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names, m_rot_input_shape, "Rotator"); + get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names, m_det_input_shape, "Detector"); + get_io_names(m_session_pose_var.get(), m_pose_var_input_names, m_pose_var_output_names, m_pose_var_input_shape, "PoseVar"); + get_io_names(m_session_pose_conv.get(), m_pose_conv_input_names, m_pose_conv_output_names, m_pose_conv_input_shape, "PoseConv"); + get_io_names(m_session_landmarker1.get(), m_lm1_input_names, m_lm1_output_names, m_lm1_input_shape, "Landmarker1"); + get_io_names(m_session_landmarker2.get(), m_lm2_input_names, m_lm2_output_names, m_lm2_input_shape, "Landmarker2"); + get_io_names(m_session_recognizer.get(), m_rec_input_names, m_rec_output_names, m_rec_input_shape, "Recognizer"); + + // 检查 Detector 形状 + if (m_det_input_shape.size() < 4) + { + LOGE("Detector input shape has < 4 dimensions! Cannot generate anchors."); + throw std::runtime_error("Detector input shape invalid"); + } + // 【【【 修正:检查 -1 维度 】】】 + if (m_det_input_shape[2] < 0 || m_det_input_shape[3] < 0) + { + LOGE("Detector input shape is dynamic (H/W is -1). This is not supported by the Python logic."); + // 我们从 Python 源码知道它是 640x640 + LOGI("Forcing detector H/W to 640x640."); + m_det_input_shape[2] = 640; + m_det_input_shape[3] = 640; + } + generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]); + + // 调整Blob缓冲区大小 size_t max_blob_size = 0; - auto update_max = [&](const std::vector& shape) { - size_t s = std::accumulate(shape.begin() + 1, shape.end(), 1, std::multiplies()); - if (s > max_blob_size) max_blob_size = s; + + // 【【【 修正:安全的 update_max 逻辑 】】】 + auto update_max = [&](const std::vector &shape, const char *model_name) + { + if (shape.size() <= 1) + { + return; // 忽略 (e.g., [1]) 或空 shape + } + + size_t s = 1; + // 从 C (dim 1) 开始循环 + for (size_t i = 1; i < shape.size(); ++i) + { + if (shape[i] < 0) + { + // 如果是动态维度 (e.g., -1),我们不能用它来计算 max_blob_size + LOGE("Model %s has dynamic dimension at index %zu. Skipping for max_blob_size calculation.", model_name, i); + return; // 跳过这个模型 + } + s *= static_cast(shape[i]); + } + + if (s > max_blob_size) + { + max_blob_size = s; + } }; - update_max(m_rot_input_shape); - update_max(m_det_input_shape); - update_max(m_pose_var_input_shape); - update_max(m_lm1_input_shape); - update_max(m_rec_input_shape); + + update_max(m_rot_input_shape, "Rotator"); + update_max(m_det_input_shape, "Detector"); + update_max(m_pose_var_input_shape, "PoseVar"); + update_max(m_lm1_input_shape, "Landmarker1"); + update_max(m_rec_input_shape, "Recognizer"); + // (我们不调用 lm2,因为它不使用公共 blob) + + if (max_blob_size == 0) + { + LOGE("Max blob size is 0, something went wrong with model shape detection!"); + throw std::runtime_error("Max blob size is 0"); + } + + LOGI("Calculated max blob size: %zu", max_blob_size); m_blob_buffer.resize(max_blob_size); + LOGI("m_blob_buffer resized successfully."); } // --- 图像预处理辅助函数 --- -void FacePipeline::image_to_blob(const cv::Mat& img, std::vector& blob, const float* mean, const float* std) { +void FacePipeline::image_to_blob(const cv::Mat &img, std::vector &blob, const float *mean, const float *std) +{ int channels = img.channels(); int height = img.rows; int width = img.cols; - - for (int c = 0; c < channels; c++) { - for (int h = 0; h < height; h++) { - for (int w = 0; w < width; w++) { + + for (int c = 0; c < channels; c++) + { + for (int h = 0; h < height; h++) + { + for (int w = 0; w < width; w++) + { float val; - if (channels == 3) { + if (channels == 3) + { val = static_cast(img.at(h, w)[c]); - } else { + } + else + { val = static_cast(img.at(h, w)); } blob[c * width * height + h * width + w] = (val - mean[c]) * std[c]; @@ -120,23 +230,26 @@ void FacePipeline::image_to_blob(const cv::Mat& img, std::vector& blob, c } } -Ort::Value FacePipeline::create_tensor(const std::vector& blob_data, const std::vector& input_shape) { - return Ort::Value::CreateTensor(m_memory_info, - const_cast(blob_data.data()), - blob_data.size(), - input_shape.data(), +Ort::Value FacePipeline::create_tensor(const std::vector &blob_data, const std::vector &input_shape) +{ + return Ort::Value::CreateTensor(m_memory_info, + const_cast(blob_data.data()), + blob_data.size(), + input_shape.data(), input_shape.size()); } - // --- 核心管线实现 --- -bool FacePipeline::Extract(const cv::Mat& image, std::vector& feature) { - if (!m_initialized) { +bool FacePipeline::Extract(const cv::Mat &image, std::vector &feature) +{ + if (!m_initialized) + { LOGE("Extract failed: Pipeline is not initialized."); return false; } - if (image.empty()) { + if (image.empty()) + { LOGE("Extract failed: Input image is empty."); return false; } @@ -144,15 +257,19 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector& feature) { // --- 1. 旋转检测 --- int rot_angle_code = RunRotation(image); cv::Mat upright_image; - if (rot_angle_code >= 0) { + if (rot_angle_code >= 0) + { cv::rotate(image, upright_image, rot_angle_code); - } else { + } + else + { upright_image = image; } // --- 2. 人脸检测 --- std::vector boxes; - if (!RunDetection(upright_image, boxes)) { + if (!RunDetection(upright_image, boxes)) + { LOGI("Extract failed: No face detected."); return false; } @@ -169,41 +286,43 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector& feature) { int pad_right = std::max(0, (face_rect_raw.x + face_rect_raw.width) - upright_image.cols); cv::Mat face_crop_padded; - cv::copyMakeBorder(upright_image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0,0,0)); + cv::copyMakeBorder(upright_image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); cv::Rect face_rect_padded(face_rect_raw.x + pad_left, face_rect_raw.y + pad_top, face_rect_raw.width, face_rect_raw.height); cv::Mat face_crop = face_crop_padded(face_rect_padded); - // --- 5. 人脸对齐 (在姿态检测前,因为姿态检测需要对齐的脸) --- // (assess_quality) 调用 self.pose_checker.check(aligned_face) // QualityOfPose.check() // Landmark5er.inference() -> crop_face -> resize(112, 112) // FaceAlign.align() -> 256x256 - // - // **逻辑冲突**: + // + // **逻辑冲突**: // face_feature_extractor.py L345 (assess_quality) 调用 pose_checker.check(aligned_face) // 但 L336 (align_face) 依赖 landmarks // 但 L330 (extract_landmarks) 依赖 boxes - // + // // **修正**: Python 源码 L306 `QualityOfPose` 构造函数 -> L416 `check` -> L389 `detect_angle` -> L370 `transform` // QualityOfPose.transform() 接收的是 *未对齐* 的脸部裁剪 (L379 canvas[ny1:ny1 + h, nx1:nx1 + w] = mat) // **我的 C++ 逻辑错了**。 姿态检测不需要对齐的脸,它需要 *原始裁剪*。 - + // --- 3. 姿态估计 (质量过滤) --- FacePose pose; - if (!RunPose(face_crop, pose)) { + if (!RunPose(face_crop, pose)) + { LOGI("Extract failed: Pose estimation failed."); return false; } - if (std::abs(pose.yaw) > m_pose_threshold || std::abs(pose.pitch) > m_pose_threshold) { + if (std::abs(pose.yaw) > m_pose_threshold || std::abs(pose.pitch) > m_pose_threshold) + { LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold (%.1f)", pose.yaw, pose.pitch, m_pose_threshold); return false; } // --- 4. 关键点检测 --- FaceLandmark landmark; - if (!RunLandmark(upright_image, best_box, landmark)) { + if (!RunLandmark(upright_image, best_box, landmark)) + { LOGI("Extract failed: Landmark detection failed."); return false; } @@ -212,7 +331,8 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector& feature) { cv::Mat aligned_face = RunAlignment(upright_image, landmark); // --- 6. 特征提取 --- - if (!RunRecognition(aligned_face, feature)) { + if (!RunRecognition(aligned_face, feature)) + { LOGI("Extract failed: Feature recognition failed."); return false; } @@ -222,9 +342,9 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector& feature) { return true; } - // --- 步骤 1: 旋转检测 (来自 face_feature_extractor.py) --- -void FacePipeline::preprocess_rotation(const cv::Mat& image, std::vector& blob_data) { +void FacePipeline::preprocess_rotation(const cv::Mat &image, std::vector &blob_data) +{ cv::Mat gray_img, resized, cropped, gray_3d; cv::cvtColor(image, gray_img, cv::COLOR_BGR2GRAY); cv::resize(gray_img, resized, cv::Size(256, 256), 0, 0, cv::INTER_LINEAR); @@ -232,58 +352,65 @@ void FacePipeline::preprocess_rotation(const cv::Mat& image, std::vector& cv::Rect crop_rect(start, start, 224, 224); cropped = resized(crop_rect); cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR); - + // 归一化: / 255.0 (mean=[0,0,0], std=[1,1,1]) const float mean[3] = {0.0f, 0.0f, 0.0f}; const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; // 乘以 1/255 等于除以 255 image_to_blob(gray_3d, blob_data, mean, std); } -int FacePipeline::RunRotation(const cv::Mat& image) { +int FacePipeline::RunRotation(const cv::Mat &image) +{ preprocess_rotation(image, m_blob_buffer); auto input_tensor = create_tensor(m_blob_buffer, m_rot_input_shape); - - auto output_tensors = m_session_rotator->Run(Ort::RunOptions{nullptr}, - m_rot_input_names.data(), &input_tensor, 1, + + auto output_tensors = m_session_rotator->Run(Ort::RunOptions{nullptr}, + m_rot_input_names.data(), &input_tensor, 1, m_rot_output_names.data(), 1); - - float* output_data = output_tensors[0].GetTensorMutableData(); + + float *output_data = output_tensors[0].GetTensorMutableData(); int max_index = std::distance(output_data, std::max_element(output_data, output_data + 4)); - + // (correct_image_rotation) - if (max_index == 1) return cv::ROTATE_90_CLOCKWISE; - if (max_index == 2) return cv::ROTATE_180; - if (max_index == 3) return cv::ROTATE_90_COUNTERCLOCKWISE; + if (max_index == 1) + return cv::ROTATE_90_CLOCKWISE; + if (max_index == 2) + return cv::ROTATE_180; + if (max_index == 3) + return cv::ROTATE_90_COUNTERCLOCKWISE; return -1; } // --- 步骤 2: 人脸检测 (来自 facedetector.py) --- -void FacePipeline::preprocess_detection(const cv::Mat& img, std::vector& blob_data) { +void FacePipeline::preprocess_detection(const cv::Mat &img, std::vector &blob_data) +{ cv::Mat resized; cv::resize(img, resized, cv::Size(m_det_input_shape[3], m_det_input_shape[2])); // 640x640 - + // 归一化: (img - [104, 117, 123]) * 1.0 const float mean[3] = {104.0f, 117.0f, 123.0f}; // BGR const float std[3] = {1.0f, 1.0f, 1.0f}; image_to_blob(resized, blob_data, mean, std); } -bool FacePipeline::RunDetection(const cv::Mat& image, std::vector& boxes) { +bool FacePipeline::RunDetection(const cv::Mat &image, std::vector &boxes) +{ float img_height = (float)image.rows; float img_width = (float)image.cols; - + preprocess_detection(image, m_blob_buffer); auto input_tensor = create_tensor(m_blob_buffer, m_det_input_shape); auto output_tensors = m_session_detector->Run(Ort::RunOptions{nullptr}, m_det_input_names.data(), &input_tensor, 1, m_det_output_names.data(), 2); // 2 outputs! - - const float* bboxes_data = output_tensors[0].GetTensorData(); // [1, N, 4] - const float* probs_data = output_tensors[1].GetTensorData(); // [1, N, 2] + + const float *bboxes_data = output_tensors[0].GetTensorData(); // [1, N, 4] + const float *probs_data = output_tensors[1].GetTensorData(); // [1, N, 2] long num_anchors = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1]; - if (num_anchors != m_anchors.size()) { + if (num_anchors != m_anchors.size()) + { LOGE("Anchor size mismatch! Expected %zu, Got %ld", m_anchors.size(), num_anchors); return false; } @@ -291,11 +418,13 @@ bool FacePipeline::RunDetection(const cv::Mat& image, std::vector& boxe std::vector bbox_collection; const float variance[2] = {0.1f, 0.2f}; // - for (long i = 0; i < num_anchors; ++i) { + for (long i = 0; i < num_anchors; ++i) + { float conf = probs_data[i * 2 + 1]; // (probs[0, i, 1]) - if (conf < m_det_threshold) continue; + if (conf < m_det_threshold) + continue; - const Anchor& anchor = m_anchors[i]; + const Anchor &anchor = m_anchors[i]; float dx = bboxes_data[i * 4 + 0]; float dy = bboxes_data[i * 4 + 1]; float dw = bboxes_data[i * 4 + 2]; @@ -303,53 +432,64 @@ bool FacePipeline::RunDetection(const cv::Mat& image, std::vector& boxe float cx = anchor.cx + dx * variance[0] * anchor.s_kx; // float cy = anchor.cy + dy * variance[0] * anchor.s_ky; // - float w = anchor.s_kx * std::exp(dw * variance[1]); // - float h = anchor.s_ky * std::exp(dh * variance[1]); // + float w = anchor.s_kx * std::exp(dw * variance[1]); // + float h = anchor.s_ky * std::exp(dh * variance[1]); // - bbox_collection.push_back({ - (cx - w / 2.0f) * img_width, - (cy - h / 2.0f) * img_height, - (cx + w / 2.0f) * img_width, - (cy + h / 2.0f) * img_height, - conf - }); + bbox_collection.push_back({(cx - w / 2.0f) * img_width, + (cy - h / 2.0f) * img_height, + (cx + w / 2.0f) * img_width, + (cy + h / 2.0f) * img_height, + conf}); } - + boxes = hard_nms(bbox_collection, m_det_iou_threshold, m_det_topk); // (nms_type=0) return !boxes.empty(); } -void FacePipeline::generate_anchors_faceboxes(int target_height, int target_width) { +void FacePipeline::generate_anchors_faceboxes(int target_height, int target_width) +{ // (generate_anchors) m_anchors.clear(); std::vector steps = {32, 64, 128}; std::vector> min_sizes = {{32, 64, 128}, {256}, {512}}; std::vector> feature_maps; - for (int step : steps) { + for (int step : steps) + { feature_maps.push_back({(int)std::ceil((float)target_height / step), (int)std::ceil((float)target_width / step)}); } std::vector offset_32 = {0.0f, 0.25f, 0.5f, 0.75f}; std::vector offset_64 = {0.0f, 0.5f}; - for (int k = 0; k < feature_maps.size(); ++k) { + for (int k = 0; k < feature_maps.size(); ++k) + { auto f_map = feature_maps[k]; auto tmp_min_sizes = min_sizes[k]; int f_h = f_map[0]; int f_w = f_map[1]; - for (int i = 0; i < f_h; ++i) { - for (int j = 0; j < f_w; ++j) { - for (int min_size : tmp_min_sizes) { + for (int i = 0; i < f_h; ++i) + { + for (int j = 0; j < f_w; ++j) + { + for (int min_size : tmp_min_sizes) + { float s_kx = (float)min_size / target_width; float s_ky = (float)min_size / target_height; - - if (min_size == 32) { - for (float offset_y : offset_32) for (float offset_x : offset_32) - m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky}); - } else if (min_size == 64) { - for (float offset_y : offset_64) for (float offset_x : offset_64) - m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky}); - } else { + + if (min_size == 32) + { + for (float offset_y : offset_32) + for (float offset_x : offset_32) + m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky}); + } + else if (min_size == 64) + { + for (float offset_y : offset_64) + for (float offset_x : offset_64) + m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky}); + } + else + { m_anchors.push_back({(j + 0.5f) * steps[k] / target_width, (i + 0.5f) * steps[k] / target_height, s_kx, s_ky}); } } @@ -358,9 +498,9 @@ void FacePipeline::generate_anchors_faceboxes(int target_height, int target_widt } } - // --- 步骤 3: 姿态估计 (来自 imgchecker.py) --- -void FacePipeline::preprocess_pose(const cv::Mat& img, std::vector& blob_data) { +void FacePipeline::preprocess_pose(const cv::Mat &img, std::vector &blob_data) +{ float pad = 0.3f; // int h = img.rows; int w = img.cols; @@ -371,33 +511,34 @@ void FacePipeline::preprocess_pose(const cv::Mat& img, std::vector& blob_ cv::Mat canvas = cv::Mat::zeros(nh, nw, CV_8UC3); img.copyTo(canvas(cv::Rect(nx1, ny1, w, h))); - + cv::Mat resized; cv::resize(canvas, resized, cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2])); // 64x64 - + // 归一化: (img - 127.5) / 127.5 const float mean[3] = {127.5f, 127.5f, 127.5f}; const float std[3] = {1.0f / 127.5f, 1.0f / 127.5f, 1.0f / 127.5f}; image_to_blob(resized, blob_data, mean, std); } -bool FacePipeline::RunPose(const cv::Mat& face_crop, FacePose& pose) { +bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose) +{ preprocess_pose(face_crop, m_blob_buffer); - + // 运行 VAR auto input_tensor_var = create_tensor(m_blob_buffer, m_pose_var_input_shape); auto output_var = m_session_pose_var->Run(Ort::RunOptions{nullptr}, m_pose_var_input_names.data(), &input_tensor_var, 1, m_pose_var_output_names.data(), 1); - + // 运行 CONV (使用相同的 blob) auto input_tensor_conv = create_tensor(m_blob_buffer, m_pose_conv_input_shape); auto output_conv = m_session_pose_conv->Run(Ort::RunOptions{nullptr}, m_pose_conv_input_names.data(), &input_tensor_conv, 1, m_pose_conv_output_names.data(), 1); - const float* data_var = output_var[0].GetTensorData(); - const float* data_conv = output_conv[0].GetTensorData(); + const float *data_var = output_var[0].GetTensorData(); + const float *data_conv = output_conv[0].GetTensorData(); // 结合 (平均) pose.yaw = (data_var[0] + data_conv[0]) / 2.0f; @@ -407,11 +548,12 @@ bool FacePipeline::RunPose(const cv::Mat& face_crop, FacePose& pose) { } // --- 步骤 4: 关键点检测 (来自 facelandmarks5er.py) --- -void FacePipeline::preprocess_landmark_net1(const cv::Mat& img, std::vector& blob_data) { +void FacePipeline::preprocess_landmark_net1(const cv::Mat &img, std::vector &blob_data) +{ cv::Mat resized, gray_img; cv::resize(img, resized, cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2])); // 112x112 - cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY); // - + cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY); // + // 归一化: 无 (0-255) const float mean[1] = {0.0f}; const float std[1] = {1.0f}; @@ -419,18 +561,19 @@ void FacePipeline::preprocess_landmark_net1(const cv::Mat& img, std::vector FacePipeline::shape_index_process(const Ort::Value& feat_val, const Ort::Value& pos_val) { +std::vector FacePipeline::shape_index_process(const Ort::Value &feat_val, const Ort::Value &pos_val) +{ auto feat_shape = feat_val.GetTensorTypeAndShapeInfo().GetShape(); auto pos_shape = pos_val.GetTensorTypeAndShapeInfo().GetShape(); - const float* feat_data = feat_val.GetTensorData(); - const float* pos_data = pos_val.GetTensorData(); + const float *feat_data = feat_val.GetTensorData(); + const float *pos_data = pos_val.GetTensorData(); long feat_n = feat_shape[0]; // 1 long feat_c = feat_shape[1]; long feat_h = feat_shape[2]; long feat_w = feat_shape[3]; - long pos_n = pos_shape[0]; // 1 - long landmark_x2 = pos_shape[1]; // 10 + long pos_n = pos_shape[0]; // 1 + long landmark_x2 = pos_shape[1]; // 10 int landmark_num = landmark_x2 / 2; // 5 float m_origin[] = {112.0f, 112.0f}; @@ -447,33 +590,41 @@ std::vector FacePipeline::shape_index_process(const Ort::Value& feat_val, std::vector out_shape = {feat_n, feat_c, x_patch_h, (long)landmark_num, x_patch_w}; std::vector buff(feat_n * feat_c * x_patch_h * landmark_num * x_patch_w, 0.0f); - for (int i = 0; i < landmark_num; ++i) { - for (int n = 0; n < feat_n; ++n) { + for (int i = 0; i < landmark_num; ++i) + { + for (int n = 0; n < feat_n; ++n) + { float y_pos = pos_data[n * landmark_x2 + 2 * i + 1]; float x_pos = pos_data[n * landmark_x2 + 2 * i]; - + int y = (int)(y_pos * (feat_h - 1) - r_h + 0.5f); int x = (int)(x_pos * (feat_w - 1) - r_w + 0.5f); - for (int c = 0; c < feat_c; ++c) { - for (int ph = 0; ph < feat_patch_h; ++ph) { - for (int pw = 0; pw < feat_patch_w; ++pw) { + for (int c = 0; c < feat_c; ++c) + { + for (int ph = 0; ph < feat_patch_h; ++ph) + { + for (int pw = 0; pw < feat_patch_w; ++pw) + { int y_p = y + ph; int x_p = x + pw; - + long out_idx = n * (feat_c * x_patch_h * landmark_num * x_patch_w) + c * (x_patch_h * landmark_num * x_patch_w) + ph * (landmark_num * x_patch_w) + i * (x_patch_w) + pw; - if (y_p < 0 || y_p >= feat_h || x_p < 0 || x_p >= feat_w) { + if (y_p < 0 || y_p >= feat_h || x_p < 0 || x_p >= feat_w) + { buff[out_idx] = 0.0f; - } else { + } + else + { long feat_idx = n * (feat_c * feat_h * feat_w) + - c * (feat_h * feat_w) + - y_p * (feat_w) + - x_p; + c * (feat_h * feat_w) + + y_p * (feat_w) + + x_p; buff[out_idx] = feat_data[feat_idx]; } } @@ -484,8 +635,8 @@ std::vector FacePipeline::shape_index_process(const Ort::Value& feat_val, return buff; } - -bool FacePipeline::RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLandmark& landmark) { +bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box, FaceLandmark &landmark) +{ // 1. 裁剪人脸 cv::Rect face_rect_raw(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1); int pad_top = std::max(0, -face_rect_raw.y); @@ -493,10 +644,10 @@ bool FacePipeline::RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLan int pad_left = std::max(0, -face_rect_raw.x); int pad_right = std::max(0, (face_rect_raw.x + face_rect_raw.width) - image.cols); cv::Mat face_crop_padded; - cv::copyMakeBorder(image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0,0,0)); + cv::copyMakeBorder(image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); cv::Rect face_rect_padded(face_rect_raw.x + pad_left, face_rect_raw.y + pad_top, face_rect_raw.width, face_rect_raw.height); cv::Mat face_crop = face_crop_padded(face_rect_padded); - + // 2. 预处理 Net1 preprocess_landmark_net1(face_crop, m_blob_buffer); auto input_tensor_net1 = create_tensor(m_blob_buffer, m_lm1_input_shape); @@ -505,10 +656,10 @@ bool FacePipeline::RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLan auto output_net1 = m_session_landmarker1->Run(Ort::RunOptions{nullptr}, m_lm1_input_names.data(), &input_tensor_net1, 1, m_lm1_output_names.data(), 2); // 2 outputs - + // 4. Shape Index Process std::vector shape_index_blob = shape_index_process(output_net1[0], output_net1[1]); - + // 5. 准备 Net2 输入 auto input_tensor_net2 = Ort::Value::CreateTensor(m_memory_info, shape_index_blob.data(), @@ -522,21 +673,22 @@ bool FacePipeline::RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLan m_lm2_output_names.data(), 1); // 7. 后处理 - const float* data_net1_pos = output_net1[1].GetTensorData(); - const float* data_net2 = output_net2[0].GetTensorData(); + const float *data_net1_pos = output_net1[1].GetTensorData(); + const float *data_net2 = output_net2[0].GetTensorData(); auto shape_net1_pos = output_net1[1].GetTensorTypeAndShapeInfo().GetShape(); // [1, 10] int landmark_x2 = shape_net1_pos[1]; float scale_x = (box.x2 - box.x1) / 112.0f; float scale_y = (box.y2 - box.y1) / 112.0f; - for (int i = 0; i < 5; ++i) { + for (int i = 0; i < 5; ++i) + { float x_norm = (data_net2[i * 2 + 0] + data_net1_pos[i * 2 + 0]) * 112.0f; float y_norm = (data_net2[i * 2 + 1] + data_net1_pos[i * 2 + 1]) * 112.0f; - + float x = box.x1 + x_norm * scale_x; float y = box.y1 + y_norm * scale_y; - + x = std::max(0.01f, std::min(x, (float)image.cols - 0.01f)); y = std::max(0.01f, std::min(y, (float)image.rows - 0.01f)); landmark.points[i] = cv::Point2f(x, y); @@ -545,74 +697,96 @@ bool FacePipeline::RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLan } // --- 步骤 5: 人脸对齐 (来自 facealign.py) --- -cv::Mat FacePipeline::RunAlignment(const cv::Mat& image, const FaceLandmark& landmark) { +cv::Mat FacePipeline::RunAlignment(const cv::Mat &image, const FaceLandmark &landmark) +{ // (align) std::vector src_points; std::vector dst_points; - - for (int i = 0; i < 5; ++i) { + + for (int i = 0; i < 5; ++i) + { src_points.push_back(landmark.points[i]); - dst_points.push_back(cv::Point2f(m_landmark_template.at(i, 0), + dst_points.push_back(cv::Point2f(m_landmark_template.at(i, 0), m_landmark_template.at(i, 1))); } - + // (transformation_maker) -> estimateAffinePartial2D cv::Mat transform_matrix = cv::estimateAffinePartial2D(src_points, dst_points); - + cv::Mat aligned_face; // (spatial_transform) -> warpAffine // (crop_width, crop_height = 256, 256) cv::warpAffine(image, aligned_face, transform_matrix, m_align_output_size, cv::INTER_LINEAR); - + return aligned_face; } // --- 步骤 6: 特征提取 (来自 facerecoger.py) --- -void FacePipeline::preprocess_recognition(const cv::Mat& img, std::vector& blob_data) { +void FacePipeline::preprocess_recognition(const cv::Mat &img, std::vector &blob_data) +{ cv::Mat resized, rgb_img; + + const cv::Size target_size(248, 248); + // (resize to 248, 248) - cv::resize(img, resized, cv::Size(m_rec_input_shape[3], m_rec_input_shape[2])); + cv::resize(img, resized, target_size); + // (BGR -> RGB) - cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB); - + cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB); + // 归一化: 无 (0-255) const float mean[3] = {0.0f, 0.0f, 0.0f}; const float std[3] = {1.0f, 1.0f, 1.0f}; image_to_blob(rgb_img, blob_data, mean, std); } -void FacePipeline::normalize_sqrt_l2(std::vector& v) { +void FacePipeline::normalize_sqrt_l2(std::vector &v) +{ // (temp_result = np.sqrt(pred_result[0])) double norm = 0.0; - for (float& val : v) { + for (float &val : v) + { val = std::sqrt(std::max(0.0f, val)); // 取 sqrt norm += val * val; } - + // (norm = temp_result / np.linalg.norm(...)) - if (norm > 1e-6) { + if (norm > 1e-6) + { norm = std::sqrt(norm); - for (float& val : v) { + for (float &val : v) + { val = static_cast(val / norm); } } } -bool FacePipeline::RunRecognition(const cv::Mat& aligned_face, std::vector& feature) { +bool FacePipeline::RunRecognition(const cv::Mat &aligned_face, std::vector &feature) +{ + // 【【【 最终修正 v5 】】】 + + // 1. 预处理 (这部分是正确的,它生成了 248x248 的 blob) preprocess_recognition(aligned_face, m_blob_buffer); - auto input_tensor = create_tensor(m_blob_buffer, m_rec_input_shape); - + + // 2. (BUG 在这里) 我们不能使用 m_rec_input_shape (它是 [-1, -1, -1, -1]) + // 我们必须硬编码 Python 源码 (facerecoger.py) 中使用的 shape。 + const std::vector hardcoded_shape = {1, 3, 248, 248}; + + // 3. (修正) 使用 hardcoded_shape 创建 Tensor + auto input_tensor = create_tensor(m_blob_buffer, hardcoded_shape); + + // 4. 运行 auto output_tensors = m_session_recognizer->Run(Ort::RunOptions{nullptr}, m_rec_input_names.data(), &input_tensor, 1, m_rec_output_names.data(), 1); long feature_dim = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1]; - const float* output_data = output_tensors[0].GetTensorData(); + const float *output_data = output_tensors[0].GetTensorData(); feature.resize(feature_dim); memcpy(feature.data(), output_data, feature_dim * sizeof(float)); - // (后处理: SQRT-L2 Norm) + // 5. 后处理 (SQRT-L2 Norm) normalize_sqrt_l2(feature); return true;