From ed875db3e54db2b2a32a6c32f449e25efb2bf54a Mon Sep 17 00:00:00 2001 From: guanyuankai Date: Mon, 17 Nov 2025 15:54:25 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9gitignore,=E7=A7=BB=E9=99=A4?= =?UTF-8?q?=E6=97=A0=E7=94=A8=E7=9A=84=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 +- Distribution/FaceSDKWrapper.java | 101 +-- src/face_pipeline.cpp | 1422 ++++++++++++++---------------- src/face_pipeline.h | 275 +++--- 4 files changed, 828 insertions(+), 972 deletions(-) diff --git a/.gitignore b/.gitignore index 9491a2f..4f4cb70 100644 --- a/.gitignore +++ b/.gitignore @@ -44,7 +44,7 @@ Generated\ Files/ # MSTest test Results [Tt]est[Rr]esult*/ [Bb]uild[Ll]og.* - +[Bb]uild/ # NUnit *.VisualState.xml TestResult.xml diff --git a/Distribution/FaceSDKWrapper.java b/Distribution/FaceSDKWrapper.java index 47db17c..d59727e 100644 --- a/Distribution/FaceSDKWrapper.java +++ b/Distribution/FaceSDKWrapper.java @@ -1,7 +1,7 @@ -// 声明包名,必须与您的项目一致 + package com.facesdk.wrapper; -// 导入所有必需的 Android 工具 + import android.content.Context; import android.content.res.AssetManager; import android.graphics.Bitmap; @@ -13,72 +13,63 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -/** - * 封装了所有 JNI 调用的 SDK 包装器。 - * 这是 Android App 唯一应该交互的类。 - */ + public class FaceSDKWrapper { private static final String TAG = "FaceSDKWrapper"; static { try { - // 【【【关键修正】】】 - // 我们必须在加载我们自己的库之前, - // 首先加载我们的库所依赖的第三方库。 + + + System.loadLibrary("c++_shared"); - // 1. 加载 ONNX Runtime (来自 build.gradle.kts) + System.loadLibrary("onnxruntime"); - // 2. 加载 OpenCV (来自 build.gradle.kts, 库名是 opencv_java4) + System.loadLibrary("opencv_java4"); - // 3. 最后加载我们自己的库 + System.loadLibrary("face_sdk_jni"); } catch (UnsatisfiedLinkError e) { - // 如果这里出错,App 无法运行 + Log.e(TAG, "!!! 致命错误: 无法加载一个或多个 Native 库 !!!", e); - // 抛出运行时异常,使 App 立即停止,而不是稍后崩溃 + throw new RuntimeException("Failed to load native libraries", e); } } - // 2. 声明 C++ JNI 函数 (必须与 face_sdk_jni.cpp 匹配) - // 这些是 Java 和 C++ 之间的“秘密通道” + + private native long nativeInit(String modelDir); private native float[] nativeExtractFeature(Bitmap bitmap); private native float nativeCompare(float[] feat1, float[] feat2); private native void nativeRelease(); - // ----------------------------------------------------------------- - // 公开的 Java API 接口 - // ----------------------------------------------------------------- + + + - // C++ FaceSDK 实例的指针 (句柄) + private long nativeHandle = 0; - /** - * [API 1] 初始化 SDK - * 负责复制模型并初始化 C++ 引擎 - * - * @param context Android 应用上下文 - * @return true 成功, false 失败 - */ + public boolean init(Context context) { if (nativeHandle != 0) { Log.w(TAG, "SDK already initialized."); return true; } - // 步骤 1: 复制模型 + String modelPath = copyModelsFromAssets(context); if (modelPath == null) { Log.e(TAG, "Failed to copy models from assets."); return false; } - // 步骤 2: 调用 C++ JNI 进行初始化 + try { this.nativeHandle = nativeInit(modelPath); if (this.nativeHandle == 0) { @@ -94,12 +85,7 @@ public class FaceSDKWrapper { return true; } - /** - * [API 2] 提取特征 - * - * @param bitmap 包含人脸的 Bitmap - * @return 512维特征向量, 或在失败时返回 null - */ + public float[] extractFeature(Bitmap bitmap) { if (nativeHandle == 0) { Log.e(TAG, "SDK not initialized. Call init() first."); @@ -118,17 +104,11 @@ public class FaceSDKWrapper { } } - /** - * [API 3] 比较特征 - * - * @param feat1 特征1 - * @param feat2 特征2 - * @return 余弦相似度 - */ + public float compare(float[] feat1, float[] feat2) { if (nativeHandle == 0) { Log.e(TAG, "SDK not initialized."); - return -2.0f; // -2.0 作为错误码 + return -2.0f; } if (feat1 == null || feat2 == null || feat1.length != 512 || feat2.length != 512) { Log.e(TAG, "Invalid feature vectors for comparison."); @@ -143,10 +123,7 @@ public class FaceSDKWrapper { } } - /** - * [API 4] 释放 SDK - * 在 App 退出时调用 - */ + public void release() { if (nativeHandle != 0) { nativeRelease(); @@ -155,19 +132,13 @@ public class FaceSDKWrapper { } } - // ----------------------------------------------------------------- - // 任务 2.6: 模型复制逻辑 - // ----------------------------------------------------------------- + + + - /** - * 将 assets 中的所有 .onnx 模型复制到应用的内部存储。 - * C++ 只能从内部存储读取,不能直接读取 assets。 - * - * @param context 上下文 - * @return 模型的存储目录路径, 或在失败时返回 null - */ + private String copyModelsFromAssets(Context context) { - // 这是 C++ 需要的 7 个模型 + final String[] modelFiles = { "faceboxesv2-640x640.onnx", "face_landmarker_pts5_net1.onnx", @@ -178,7 +149,7 @@ public class FaceSDKWrapper { "fsanet-conv.onnx" }; - // 目标目录: /data/data/com.facesdk.wrapper/files/models + File modelDir = new File(context.getFilesDir(), "models"); if (!modelDir.exists()) { if (!modelDir.mkdirs()) { @@ -189,11 +160,11 @@ public class FaceSDKWrapper { AssetManager assetManager = context.getAssets(); - // 循环复制每一个模型 + for (String filename : modelFiles) { File outFile = new File(modelDir, filename); - // 如果文件已存在,跳过复制 (提高启动速度) + if (outFile.exists()) { Log.i(TAG, "Model exists, skipping: " + filename); continue; @@ -210,15 +181,15 @@ public class FaceSDKWrapper { } } catch (IOException e) { Log.e(TAG, "Failed to copy model: " + filename, e); - // 如果任何一个文件复制失败,则清理并返回失败 - // (清理是可选的,但更健壮) - // cleanUpModels(modelDir); + + + return null; } } Log.i(TAG, "All models copied successfully to: " + modelDir.getAbsolutePath()); - // 返回包含模型的目录路径 + return modelDir.getAbsolutePath(); } } \ No newline at end of file diff --git a/src/face_pipeline.cpp b/src/face_pipeline.cpp index 83db7e8..b1dac43 100644 --- a/src/face_pipeline.cpp +++ b/src/face_pipeline.cpp @@ -1,793 +1,693 @@ #include "face_pipeline.h" -#include #include +#include -// 构造函数 FacePipeline::FacePipeline(const std::string &model_dir) : m_env(ORT_LOGGING_LEVEL_WARNING, "FaceSDK"), - m_memory_info(Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault)) -{ - m_session_options.SetIntraOpNumThreads(4); // 使用4线程 - m_session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); + m_memory_info( + Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault)) { + m_session_options.SetIntraOpNumThreads(4); + m_session_options.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_ENABLE_ALL); - m_initialized = LoadModels(model_dir); - if (m_initialized) - { - InitMemoryAllocators(); - LOGI("FacePipeline initialized successfully."); - } - else - { - LOGE("FacePipeline initialization failed."); - } + m_initialized = LoadModels(model_dir); + if (m_initialized) { + InitMemoryAllocators(); + LOGI("FacePipeline initialized successfully."); + } else { + LOGE("FacePipeline initialization failed."); + } } FacePipeline::~FacePipeline() {} -// (私有) 加载所有模型 -bool FacePipeline::LoadModels(const std::string &model_dir) -{ - auto load_session = [&](std::unique_ptr &session, const std::string &model_name) - { - std::string model_path = model_dir + "/" + model_name; - try - { - session = std::make_unique(m_env, model_path.c_str(), m_session_options); - LOGI("Loaded model: %s", model_path.c_str()); - } - catch (const Ort::Exception &e) - { - LOGE("Error loading model %s: %s", model_path.c_str(), e.what()); - return false; - } - return true; - }; - - if (!load_session(m_session_rotator, "model_gray_mobilenetv2_rotcls.onnx")) - return false; - if (!load_session(m_session_detector, "faceboxesv2-640x640.onnx")) - return false; - if (!load_session(m_session_pose_var, "fsanet-var.onnx")) - return false; - if (!load_session(m_session_pose_conv, "fsanet-conv.onnx")) - return false; - if (!load_session(m_session_landmarker1, "face_landmarker_pts5_net1.onnx")) - return false; - if (!load_session(m_session_landmarker2, "face_landmarker_pts5_net2.onnx")) - return false; - if (!load_session(m_session_recognizer, "face_recognizer.onnx")) - return false; - - LOGI("All 7 models loaded successfully."); - return true; -} - -// (私有) 获取模型输入/输出信息 -void FacePipeline::InitMemoryAllocators() -{ - // 【【【 最终修正版 v3 】】】 - auto get_io_names = [&](Ort::Session *session, - std::vector &input_names, - std::vector &output_names, - std::vector &input_shape, - const char *model_name) - { - input_names.clear(); - output_names.clear(); - input_shape.clear(); - - size_t input_count = session->GetInputCount(); - for (size_t i = 0; i < input_count; ++i) - { - auto input_name_ptr = session->GetInputNameAllocated(i, m_allocator); - if (input_name_ptr == nullptr || input_name_ptr.get() == nullptr) - { - LOGE("Model %s input name %zu is null!", model_name, i); - throw std::runtime_error("Failed to get model input name"); - } - input_names.push_back(strdup(input_name_ptr.get())); - } - - size_t output_count = session->GetOutputCount(); - for (size_t i = 0; i < output_count; ++i) - { - auto output_name_ptr = session->GetOutputNameAllocated(i, m_allocator); - if (output_name_ptr == nullptr || output_name_ptr.get() == nullptr) - { - LOGE("Model %s output name %zu is null!", model_name, i); - throw std::runtime_error("Failed to get model output name"); - } - output_names.push_back(strdup(output_name_ptr.get())); - } - - if (input_count > 0) - { - auto input_type_info = session->GetInputTypeInfo(0); - auto tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); - input_shape = tensor_info.GetShape(); - - if (input_shape.empty()) - { - LOGE("Model %s input shape is empty!", model_name); - throw std::runtime_error("Model input shape is empty"); - } - - // 【【【 修正:更详细的 shape 日志 】】】 - std::string shape_str = "["; - for (long long dim : input_shape) - shape_str += std::to_string(dim) + ", "; - shape_str += "]"; - LOGI("Model %s input shape: %s", model_name, shape_str.c_str()); - - if (input_shape[0] < 1) - input_shape[0] = 1; // Set batch size to 1 - } - else - { - LOGE("Model %s has no inputs!", model_name); - } - }; - - // 为7个模型初始化 - get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names, m_rot_input_shape, "Rotator"); - get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names, m_det_input_shape, "Detector"); - get_io_names(m_session_pose_var.get(), m_pose_var_input_names, m_pose_var_output_names, m_pose_var_input_shape, "PoseVar"); - get_io_names(m_session_pose_conv.get(), m_pose_conv_input_names, m_pose_conv_output_names, m_pose_conv_input_shape, "PoseConv"); - get_io_names(m_session_landmarker1.get(), m_lm1_input_names, m_lm1_output_names, m_lm1_input_shape, "Landmarker1"); - get_io_names(m_session_landmarker2.get(), m_lm2_input_names, m_lm2_output_names, m_lm2_input_shape, "Landmarker2"); - get_io_names(m_session_recognizer.get(), m_rec_input_names, m_rec_output_names, m_rec_input_shape, "Recognizer"); - - // 检查 Detector 形状 - if (m_det_input_shape.size() < 4) - { - LOGE("Detector input shape has < 4 dimensions! Cannot generate anchors."); - throw std::runtime_error("Detector input shape invalid"); - } - // 【【【 修正:检查 -1 维度 】】】 - if (m_det_input_shape[2] < 0 || m_det_input_shape[3] < 0) - { - LOGE("Detector input shape is dynamic (H/W is -1). This is not supported by the Python logic."); - // 我们从 Python 源码知道它是 640x640 - LOGI("Forcing detector H/W to 640x640."); - m_det_input_shape[2] = 640; - m_det_input_shape[3] = 640; - } - generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]); - - // 调整Blob缓冲区大小 - size_t max_blob_size = 0; - - // 【【【 修正:安全的 update_max 逻辑 】】】 - auto update_max = [&](const std::vector &shape, const char *model_name) - { - if (shape.size() <= 1) - { - return; // 忽略 (e.g., [1]) 或空 shape - } - - size_t s = 1; - // 从 C (dim 1) 开始循环 - for (size_t i = 1; i < shape.size(); ++i) - { - if (shape[i] < 0) - { - // 如果是动态维度 (e.g., -1),我们不能用它来计算 max_blob_size - LOGE("Model %s has dynamic dimension at index %zu. Skipping for max_blob_size calculation.", model_name, i); - return; // 跳过这个模型 - } - s *= static_cast(shape[i]); - } - - if (s > max_blob_size) - { - max_blob_size = s; - } - }; - - update_max(m_rot_input_shape, "Rotator"); - update_max(m_det_input_shape, "Detector"); - update_max(m_pose_var_input_shape, "PoseVar"); - update_max(m_lm1_input_shape, "Landmarker1"); - update_max(m_rec_input_shape, "Recognizer"); - // (我们不调用 lm2,因为它不使用公共 blob) - - if (max_blob_size == 0) - { - LOGE("Max blob size is 0, something went wrong with model shape detection!"); - throw std::runtime_error("Max blob size is 0"); - } - - LOGI("Calculated max blob size: %zu", max_blob_size); - m_blob_buffer.resize(max_blob_size); - LOGI("m_blob_buffer resized successfully."); -} - -// --- 图像预处理辅助函数 --- -void FacePipeline::image_to_blob(const cv::Mat &img, std::vector &blob, const float *mean, const float *std) -{ - int channels = img.channels(); - int height = img.rows; - int width = img.cols; - - for (int c = 0; c < channels; c++) - { - for (int h = 0; h < height; h++) - { - for (int w = 0; w < width; w++) - { - float val; - if (channels == 3) - { - val = static_cast(img.at(h, w)[c]); - } - else - { - val = static_cast(img.at(h, w)); - } - blob[c * width * height + h * width + w] = (val - mean[c]) * std[c]; - } - } - } -} - -Ort::Value FacePipeline::create_tensor(const std::vector &blob_data, const std::vector &input_shape) -{ - return Ort::Value::CreateTensor(m_memory_info, - const_cast(blob_data.data()), - blob_data.size(), - input_shape.data(), - input_shape.size()); -} - -// --- 核心管线实现 --- - -bool FacePipeline::Extract(const cv::Mat &image, std::vector &feature) -{ - if (!m_initialized) - { - LOGE("Extract failed: Pipeline is not initialized."); - return false; - } - if (image.empty()) - { - LOGE("Extract failed: Input image is empty."); - return false; - } - - // --- 1. 旋转检测 --- - int rot_angle_code = RunRotation(image); - cv::Mat upright_image; - if (rot_angle_code >= 0) - { - cv::rotate(image, upright_image, rot_angle_code); - } - else - { - upright_image = image; - } - - // --- 2. 人脸检测 --- - std::vector boxes; - if (!RunDetection(upright_image, boxes)) - { - LOGI("Extract failed: No face detected."); - return false; - } - // (Python 使用 topk=2, NMS 后 boxes[0] 即是最佳) - FaceBox best_box = boxes[0]; - - // 裁剪人脸 (用于姿态和关键点) - // crop_face, (assess_quality) - // Python 的 crop_face 实现了带 padding 的裁剪 - cv::Rect face_rect_raw(best_box.x1, best_box.y1, best_box.x2 - best_box.x1, best_box.y2 - best_box.y1); - int pad_top = std::max(0, -face_rect_raw.y); - int pad_bottom = std::max(0, (face_rect_raw.y + face_rect_raw.height) - upright_image.rows); - int pad_left = std::max(0, -face_rect_raw.x); - int pad_right = std::max(0, (face_rect_raw.x + face_rect_raw.width) - upright_image.cols); - - cv::Mat face_crop_padded; - cv::copyMakeBorder(upright_image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); - cv::Rect face_rect_padded(face_rect_raw.x + pad_left, face_rect_raw.y + pad_top, face_rect_raw.width, face_rect_raw.height); - cv::Mat face_crop = face_crop_padded(face_rect_padded); - - // --- 5. 人脸对齐 (在姿态检测前,因为姿态检测需要对齐的脸) --- - // (assess_quality) 调用 self.pose_checker.check(aligned_face) - // QualityOfPose.check() - // Landmark5er.inference() -> crop_face -> resize(112, 112) - // FaceAlign.align() -> 256x256 - // - // **逻辑冲突**: - // face_feature_extractor.py L345 (assess_quality) 调用 pose_checker.check(aligned_face) - // 但 L336 (align_face) 依赖 landmarks - // 但 L330 (extract_landmarks) 依赖 boxes - // - // **修正**: Python 源码 L306 `QualityOfPose` 构造函数 -> L416 `check` -> L389 `detect_angle` -> L370 `transform` - // QualityOfPose.transform() 接收的是 *未对齐* 的脸部裁剪 (L379 canvas[ny1:ny1 + h, nx1:nx1 + w] = mat) - // **我的 C++ 逻辑错了**。 姿态检测不需要对齐的脸,它需要 *原始裁剪*。 - - // --- 3. 姿态估计 (质量过滤) --- - FacePose pose; - if (!RunPose(face_crop, pose)) - { - LOGI("Extract failed: Pose estimation failed."); - return false; - } - - if (std::abs(pose.yaw) > m_pose_threshold || std::abs(pose.pitch) > m_pose_threshold) - { - LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold (%.1f)", pose.yaw, pose.pitch, m_pose_threshold); - return false; - } - - // --- 4. 关键点检测 --- - FaceLandmark landmark; - if (!RunLandmark(upright_image, best_box, landmark)) - { - LOGI("Extract failed: Landmark detection failed."); - return false; - } - - // --- 5. 人脸对齐 --- - cv::Mat aligned_face = RunAlignment(upright_image, landmark); - - // --- 6. 特征提取 --- - if (!RunRecognition(aligned_face, feature)) - { - LOGI("Extract failed: Feature recognition failed."); - return false; - } - - // --- 7. 归一化 (在 RunRecognition 内部完成) --- - LOGI("Extract success."); - return true; -} - -// --- 步骤 1: 旋转检测 (来自 face_feature_extractor.py) --- -void FacePipeline::preprocess_rotation(const cv::Mat &image, std::vector &blob_data) -{ - cv::Mat gray_img, resized, cropped, gray_3d; - cv::cvtColor(image, gray_img, cv::COLOR_BGR2GRAY); - cv::resize(gray_img, resized, cv::Size(256, 256), 0, 0, cv::INTER_LINEAR); - int start = (256 - 224) / 2; - cv::Rect crop_rect(start, start, 224, 224); - cropped = resized(crop_rect); - cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR); - - // 归一化: / 255.0 (mean=[0,0,0], std=[1,1,1]) - const float mean[3] = {0.0f, 0.0f, 0.0f}; - const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; // 乘以 1/255 等于除以 255 - image_to_blob(gray_3d, blob_data, mean, std); -} - -int FacePipeline::RunRotation(const cv::Mat &image) -{ - preprocess_rotation(image, m_blob_buffer); - auto input_tensor = create_tensor(m_blob_buffer, m_rot_input_shape); - - auto output_tensors = m_session_rotator->Run(Ort::RunOptions{nullptr}, - m_rot_input_names.data(), &input_tensor, 1, - m_rot_output_names.data(), 1); - - float *output_data = output_tensors[0].GetTensorMutableData(); - int max_index = std::distance(output_data, std::max_element(output_data, output_data + 4)); - - // (correct_image_rotation) - if (max_index == 1) - return cv::ROTATE_90_CLOCKWISE; - if (max_index == 2) - return cv::ROTATE_180; - if (max_index == 3) - return cv::ROTATE_90_COUNTERCLOCKWISE; - return -1; -} - -// --- 步骤 2: 人脸检测 (来自 facedetector.py) --- -void FacePipeline::preprocess_detection(const cv::Mat &img, std::vector &blob_data) -{ - cv::Mat resized; - cv::resize(img, resized, cv::Size(m_det_input_shape[3], m_det_input_shape[2])); // 640x640 - - // 归一化: (img - [104, 117, 123]) * 1.0 - const float mean[3] = {104.0f, 117.0f, 123.0f}; // BGR - const float std[3] = {1.0f, 1.0f, 1.0f}; - image_to_blob(resized, blob_data, mean, std); -} - -bool FacePipeline::RunDetection(const cv::Mat &image, std::vector &boxes) -{ - float img_height = (float)image.rows; - float img_width = (float)image.cols; - - preprocess_detection(image, m_blob_buffer); - auto input_tensor = create_tensor(m_blob_buffer, m_det_input_shape); - - auto output_tensors = m_session_detector->Run(Ort::RunOptions{nullptr}, - m_det_input_names.data(), &input_tensor, 1, - m_det_output_names.data(), 2); // 2 outputs! - - const float *bboxes_data = output_tensors[0].GetTensorData(); // [1, N, 4] - const float *probs_data = output_tensors[1].GetTensorData(); // [1, N, 2] - long num_anchors = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1]; - - if (num_anchors != m_anchors.size()) - { - LOGE("Anchor size mismatch! Expected %zu, Got %ld", m_anchors.size(), num_anchors); - return false; - } - - std::vector bbox_collection; - const float variance[2] = {0.1f, 0.2f}; // - - for (long i = 0; i < num_anchors; ++i) - { - float conf = probs_data[i * 2 + 1]; // (probs[0, i, 1]) - if (conf < m_det_threshold) - continue; - - const Anchor &anchor = m_anchors[i]; - float dx = bboxes_data[i * 4 + 0]; - float dy = bboxes_data[i * 4 + 1]; - float dw = bboxes_data[i * 4 + 2]; - float dh = bboxes_data[i * 4 + 3]; - - float cx = anchor.cx + dx * variance[0] * anchor.s_kx; // - float cy = anchor.cy + dy * variance[0] * anchor.s_ky; // - float w = anchor.s_kx * std::exp(dw * variance[1]); // - float h = anchor.s_ky * std::exp(dh * variance[1]); // - - bbox_collection.push_back({(cx - w / 2.0f) * img_width, - (cy - h / 2.0f) * img_height, - (cx + w / 2.0f) * img_width, - (cy + h / 2.0f) * img_height, - conf}); - } - - boxes = hard_nms(bbox_collection, m_det_iou_threshold, m_det_topk); // (nms_type=0) - return !boxes.empty(); -} - -void FacePipeline::generate_anchors_faceboxes(int target_height, int target_width) -{ - // (generate_anchors) - m_anchors.clear(); - std::vector steps = {32, 64, 128}; - std::vector> min_sizes = {{32, 64, 128}, {256}, {512}}; - std::vector> feature_maps; - for (int step : steps) - { - feature_maps.push_back({(int)std::ceil((float)target_height / step), (int)std::ceil((float)target_width / step)}); - } - - std::vector offset_32 = {0.0f, 0.25f, 0.5f, 0.75f}; - std::vector offset_64 = {0.0f, 0.5f}; - - for (int k = 0; k < feature_maps.size(); ++k) - { - auto f_map = feature_maps[k]; - auto tmp_min_sizes = min_sizes[k]; - int f_h = f_map[0]; - int f_w = f_map[1]; - for (int i = 0; i < f_h; ++i) - { - for (int j = 0; j < f_w; ++j) - { - for (int min_size : tmp_min_sizes) - { - float s_kx = (float)min_size / target_width; - float s_ky = (float)min_size / target_height; - - if (min_size == 32) - { - for (float offset_y : offset_32) - for (float offset_x : offset_32) - m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky}); - } - else if (min_size == 64) - { - for (float offset_y : offset_64) - for (float offset_x : offset_64) - m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky}); - } - else - { - m_anchors.push_back({(j + 0.5f) * steps[k] / target_width, (i + 0.5f) * steps[k] / target_height, s_kx, s_ky}); - } - } - } - } - } -} - -// --- 步骤 3: 姿态估计 (来自 imgchecker.py) --- -void FacePipeline::preprocess_pose(const cv::Mat &img, std::vector &blob_data) -{ - float pad = 0.3f; // - int h = img.rows; - int w = img.cols; - int nh = (int)(h + pad * h); - int nw = (int)(w + pad * w); - int nx1 = std::max(0, (nw - w) / 2); - int ny1 = std::max(0, (nh - h) / 2); - - cv::Mat canvas = cv::Mat::zeros(nh, nw, CV_8UC3); - img.copyTo(canvas(cv::Rect(nx1, ny1, w, h))); - - cv::Mat resized; - cv::resize(canvas, resized, cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2])); // 64x64 - - // 归一化: (img - 127.5) / 127.5 - const float mean[3] = {127.5f, 127.5f, 127.5f}; - const float std[3] = {1.0f / 127.5f, 1.0f / 127.5f, 1.0f / 127.5f}; - image_to_blob(resized, blob_data, mean, std); -} - -bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose) -{ - preprocess_pose(face_crop, m_blob_buffer); - - // 运行 VAR - auto input_tensor_var = create_tensor(m_blob_buffer, m_pose_var_input_shape); - auto output_var = m_session_pose_var->Run(Ort::RunOptions{nullptr}, - m_pose_var_input_names.data(), &input_tensor_var, 1, - m_pose_var_output_names.data(), 1); - - // 运行 CONV (使用相同的 blob) - auto input_tensor_conv = create_tensor(m_blob_buffer, m_pose_conv_input_shape); - auto output_conv = m_session_pose_conv->Run(Ort::RunOptions{nullptr}, - m_pose_conv_input_names.data(), &input_tensor_conv, 1, - m_pose_conv_output_names.data(), 1); - - const float *data_var = output_var[0].GetTensorData(); - const float *data_conv = output_conv[0].GetTensorData(); - - // 结合 (平均) - pose.yaw = (data_var[0] + data_conv[0]) / 2.0f; - pose.pitch = (data_var[1] + data_conv[1]) / 2.0f; - pose.roll = (data_var[2] + data_conv[2]) / 2.0f; - return true; -} - -// --- 步骤 4: 关键点检测 (来自 facelandmarks5er.py) --- -void FacePipeline::preprocess_landmark_net1(const cv::Mat &img, std::vector &blob_data) -{ - cv::Mat resized, gray_img; - cv::resize(img, resized, cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2])); // 112x112 - cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY); // - - // 归一化: 无 (0-255) - const float mean[1] = {0.0f}; - const float std[1] = {1.0f}; - image_to_blob(gray_img, blob_data, mean, std); -} - -// C++ 转译 facelandmarks5er.py::shape_index_process -std::vector FacePipeline::shape_index_process(const Ort::Value &feat_val, const Ort::Value &pos_val) -{ - auto feat_shape = feat_val.GetTensorTypeAndShapeInfo().GetShape(); - auto pos_shape = pos_val.GetTensorTypeAndShapeInfo().GetShape(); - const float *feat_data = feat_val.GetTensorData(); - const float *pos_data = pos_val.GetTensorData(); - - long feat_n = feat_shape[0]; // 1 - long feat_c = feat_shape[1]; - long feat_h = feat_shape[2]; - long feat_w = feat_shape[3]; - long pos_n = pos_shape[0]; // 1 - long landmark_x2 = pos_shape[1]; // 10 - int landmark_num = landmark_x2 / 2; // 5 - - float m_origin[] = {112.0f, 112.0f}; - float m_origin_patch[] = {15.0f, 15.0f}; - - int x_patch_h = (int)(m_origin_patch[0] * feat_h / m_origin[0] + 0.5f); - int x_patch_w = (int)(m_origin_patch[1] * feat_w / m_origin[1] + 0.5f); - int feat_patch_h = x_patch_h; - int feat_patch_w = x_patch_w; - - float r_h = (feat_patch_h - 1) / 2.0f; - float r_w = (feat_patch_w - 1) / 2.0f; - - std::vector out_shape = {feat_n, feat_c, x_patch_h, (long)landmark_num, x_patch_w}; - std::vector buff(feat_n * feat_c * x_patch_h * landmark_num * x_patch_w, 0.0f); - - for (int i = 0; i < landmark_num; ++i) - { - for (int n = 0; n < feat_n; ++n) - { - float y_pos = pos_data[n * landmark_x2 + 2 * i + 1]; - float x_pos = pos_data[n * landmark_x2 + 2 * i]; - - int y = (int)(y_pos * (feat_h - 1) - r_h + 0.5f); - int x = (int)(x_pos * (feat_w - 1) - r_w + 0.5f); - - for (int c = 0; c < feat_c; ++c) - { - for (int ph = 0; ph < feat_patch_h; ++ph) - { - for (int pw = 0; pw < feat_patch_w; ++pw) - { - int y_p = y + ph; - int x_p = x + pw; - - long out_idx = n * (feat_c * x_patch_h * landmark_num * x_patch_w) + - c * (x_patch_h * landmark_num * x_patch_w) + - ph * (landmark_num * x_patch_w) + - i * (x_patch_w) + - pw; - - if (y_p < 0 || y_p >= feat_h || x_p < 0 || x_p >= feat_w) - { - buff[out_idx] = 0.0f; - } - else - { - long feat_idx = n * (feat_c * feat_h * feat_w) + - c * (feat_h * feat_w) + - y_p * (feat_w) + - x_p; - buff[out_idx] = feat_data[feat_idx]; - } - } - } - } - } - } - return buff; -} - -bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box, FaceLandmark &landmark) -{ - // 1. 裁剪人脸 - cv::Rect face_rect_raw(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1); - int pad_top = std::max(0, -face_rect_raw.y); - int pad_bottom = std::max(0, (face_rect_raw.y + face_rect_raw.height) - image.rows); - int pad_left = std::max(0, -face_rect_raw.x); - int pad_right = std::max(0, (face_rect_raw.x + face_rect_raw.width) - image.cols); - cv::Mat face_crop_padded; - cv::copyMakeBorder(image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); - cv::Rect face_rect_padded(face_rect_raw.x + pad_left, face_rect_raw.y + pad_top, face_rect_raw.width, face_rect_raw.height); - cv::Mat face_crop = face_crop_padded(face_rect_padded); - - // 2. 预处理 Net1 - preprocess_landmark_net1(face_crop, m_blob_buffer); - auto input_tensor_net1 = create_tensor(m_blob_buffer, m_lm1_input_shape); - - // 3. 运行 Net1 - auto output_net1 = m_session_landmarker1->Run(Ort::RunOptions{nullptr}, - m_lm1_input_names.data(), &input_tensor_net1, 1, - m_lm1_output_names.data(), 2); // 2 outputs - - // 4. Shape Index Process - std::vector shape_index_blob = shape_index_process(output_net1[0], output_net1[1]); - - // 5. 准备 Net2 输入 - auto input_tensor_net2 = Ort::Value::CreateTensor(m_memory_info, - shape_index_blob.data(), - shape_index_blob.size(), - m_lm2_input_shape.data(), - m_lm2_input_shape.size()); - - // 6. 运行 Net2 - auto output_net2 = m_session_landmarker2->Run(Ort::RunOptions{nullptr}, - m_lm2_input_names.data(), &input_tensor_net2, 1, - m_lm2_output_names.data(), 1); - - // 7. 后处理 - const float *data_net1_pos = output_net1[1].GetTensorData(); - const float *data_net2 = output_net2[0].GetTensorData(); - auto shape_net1_pos = output_net1[1].GetTensorTypeAndShapeInfo().GetShape(); // [1, 10] - int landmark_x2 = shape_net1_pos[1]; - - float scale_x = (box.x2 - box.x1) / 112.0f; - float scale_y = (box.y2 - box.y1) / 112.0f; - - for (int i = 0; i < 5; ++i) - { - float x_norm = (data_net2[i * 2 + 0] + data_net1_pos[i * 2 + 0]) * 112.0f; - float y_norm = (data_net2[i * 2 + 1] + data_net1_pos[i * 2 + 1]) * 112.0f; - - float x = box.x1 + x_norm * scale_x; - float y = box.y1 + y_norm * scale_y; - - x = std::max(0.01f, std::min(x, (float)image.cols - 0.01f)); - y = std::max(0.01f, std::min(y, (float)image.rows - 0.01f)); - landmark.points[i] = cv::Point2f(x, y); +bool FacePipeline::LoadModels(const std::string &model_dir) { + auto load_session = [&](std::unique_ptr &session, + const std::string &model_name) { + std::string model_path = model_dir + "/" + model_name; + try { + session = std::make_unique(m_env, model_path.c_str(), + m_session_options); + LOGI("Loaded model: %s", model_path.c_str()); + } catch (const Ort::Exception &e) { + LOGE("Error loading model %s: %s", model_path.c_str(), e.what()); + return false; } return true; + }; + + if (!load_session(m_session_rotator, "model_gray_mobilenetv2_rotcls.onnx")) + return false; + if (!load_session(m_session_detector, "faceboxesv2-640x640.onnx")) + return false; + if (!load_session(m_session_pose_var, "fsanet-var.onnx")) + return false; + if (!load_session(m_session_pose_conv, "fsanet-conv.onnx")) + return false; + if (!load_session(m_session_landmarker1, "face_landmarker_pts5_net1.onnx")) + return false; + if (!load_session(m_session_landmarker2, "face_landmarker_pts5_net2.onnx")) + return false; + if (!load_session(m_session_recognizer, "face_recognizer.onnx")) + return false; + + LOGI("All 7 models loaded successfully."); + return true; } -// --- 步骤 5: 人脸对齐 (来自 facealign.py) --- -cv::Mat FacePipeline::RunAlignment(const cv::Mat &image, const FaceLandmark &landmark) -{ - // (align) - std::vector src_points; - std::vector dst_points; +void FacePipeline::InitMemoryAllocators() { - for (int i = 0; i < 5; ++i) - { - src_points.push_back(landmark.points[i]); - dst_points.push_back(cv::Point2f(m_landmark_template.at(i, 0), - m_landmark_template.at(i, 1))); + auto get_io_names = [&](Ort::Session *session, + std::vector &input_names, + std::vector &output_names, + std::vector &input_shape, + const char *model_name) { + input_names.clear(); + output_names.clear(); + input_shape.clear(); + + size_t input_count = session->GetInputCount(); + for (size_t i = 0; i < input_count; ++i) { + auto input_name_ptr = session->GetInputNameAllocated(i, m_allocator); + if (input_name_ptr == nullptr || input_name_ptr.get() == nullptr) { + LOGE("Model %s input name %zu is null!", model_name, i); + throw std::runtime_error("Failed to get model input name"); + } + input_names.push_back(strdup(input_name_ptr.get())); } - // (transformation_maker) -> estimateAffinePartial2D - cv::Mat transform_matrix = cv::estimateAffinePartial2D(src_points, dst_points); - - cv::Mat aligned_face; - // (spatial_transform) -> warpAffine - // (crop_width, crop_height = 256, 256) - cv::warpAffine(image, aligned_face, transform_matrix, m_align_output_size, cv::INTER_LINEAR); - - return aligned_face; -} - -// --- 步骤 6: 特征提取 (来自 facerecoger.py) --- -void FacePipeline::preprocess_recognition(const cv::Mat &img, std::vector &blob_data) -{ - cv::Mat resized, rgb_img; - - const cv::Size target_size(248, 248); - - // (resize to 248, 248) - cv::resize(img, resized, target_size); - - // (BGR -> RGB) - cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB); - - // 归一化: 无 (0-255) - const float mean[3] = {0.0f, 0.0f, 0.0f}; - const float std[3] = {1.0f, 1.0f, 1.0f}; - image_to_blob(rgb_img, blob_data, mean, std); -} - -void FacePipeline::normalize_sqrt_l2(std::vector &v) -{ - // (temp_result = np.sqrt(pred_result[0])) - double norm = 0.0; - for (float &val : v) - { - val = std::sqrt(std::max(0.0f, val)); // 取 sqrt - norm += val * val; + size_t output_count = session->GetOutputCount(); + for (size_t i = 0; i < output_count; ++i) { + auto output_name_ptr = session->GetOutputNameAllocated(i, m_allocator); + if (output_name_ptr == nullptr || output_name_ptr.get() == nullptr) { + LOGE("Model %s output name %zu is null!", model_name, i); + throw std::runtime_error("Failed to get model output name"); + } + output_names.push_back(strdup(output_name_ptr.get())); } - // (norm = temp_result / np.linalg.norm(...)) - if (norm > 1e-6) - { - norm = std::sqrt(norm); - for (float &val : v) - { - val = static_cast(val / norm); + if (input_count > 0) { + auto input_type_info = session->GetInputTypeInfo(0); + auto tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); + input_shape = tensor_info.GetShape(); + + if (input_shape.empty()) { + LOGE("Model %s input shape is empty!", model_name); + throw std::runtime_error("Model input shape is empty"); + } + + std::string shape_str = "["; + for (long long dim : input_shape) + shape_str += std::to_string(dim) + ", "; + shape_str += "]"; + LOGI("Model %s input shape: %s", model_name, shape_str.c_str()); + + if (input_shape[0] < 1) + input_shape[0] = 1; + } else { + LOGE("Model %s has no inputs!", model_name); + } + }; + + get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names, + m_rot_input_shape, "Rotator"); + get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names, + m_det_input_shape, "Detector"); + get_io_names(m_session_pose_var.get(), m_pose_var_input_names, + m_pose_var_output_names, m_pose_var_input_shape, "PoseVar"); + get_io_names(m_session_pose_conv.get(), m_pose_conv_input_names, + m_pose_conv_output_names, m_pose_conv_input_shape, "PoseConv"); + get_io_names(m_session_landmarker1.get(), m_lm1_input_names, + m_lm1_output_names, m_lm1_input_shape, "Landmarker1"); + get_io_names(m_session_landmarker2.get(), m_lm2_input_names, + m_lm2_output_names, m_lm2_input_shape, "Landmarker2"); + get_io_names(m_session_recognizer.get(), m_rec_input_names, + m_rec_output_names, m_rec_input_shape, "Recognizer"); + + if (m_det_input_shape.size() < 4) { + LOGE("Detector input shape has < 4 dimensions! Cannot generate anchors."); + throw std::runtime_error("Detector input shape invalid"); + } + + if (m_det_input_shape[2] < 0 || m_det_input_shape[3] < 0) { + LOGE("Detector input shape is dynamic (H/W is -1). This is not supported " + "by the Python logic."); + + LOGI("Forcing detector H/W to 640x640."); + m_det_input_shape[2] = 640; + m_det_input_shape[3] = 640; + } + generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]); + + size_t max_blob_size = 0; + + auto update_max = [&](const std::vector &shape, + const char *model_name) { + if (shape.size() <= 1) { + return; + } + + size_t s = 1; + + for (size_t i = 1; i < shape.size(); ++i) { + if (shape[i] < 0) { + + LOGE("Model %s has dynamic dimension at index %zu. Skipping for " + "max_blob_size calculation.", + model_name, i); + return; + } + s *= static_cast(shape[i]); + } + + if (s > max_blob_size) { + max_blob_size = s; + } + }; + + update_max(m_rot_input_shape, "Rotator"); + update_max(m_det_input_shape, "Detector"); + update_max(m_pose_var_input_shape, "PoseVar"); + update_max(m_lm1_input_shape, "Landmarker1"); + update_max(m_rec_input_shape, "Recognizer"); + + if (max_blob_size == 0) { + LOGE( + "Max blob size is 0, something went wrong with model shape detection!"); + throw std::runtime_error("Max blob size is 0"); + } + + LOGI("Calculated max blob size: %zu", max_blob_size); + m_blob_buffer.resize(max_blob_size); + LOGI("m_blob_buffer resized successfully."); +} + +void FacePipeline::image_to_blob(const cv::Mat &img, std::vector &blob, + const float *mean, const float *std) { + int channels = img.channels(); + int height = img.rows; + int width = img.cols; + + for (int c = 0; c < channels; c++) { + for (int h = 0; h < height; h++) { + for (int w = 0; w < width; w++) { + float val; + if (channels == 3) { + val = static_cast(img.at(h, w)[c]); + } else { + val = static_cast(img.at(h, w)); } + blob[c * width * height + h * width + w] = (val - mean[c]) * std[c]; + } } + } } -bool FacePipeline::RunRecognition(const cv::Mat &aligned_face, std::vector &feature) -{ - // 【【【 最终修正 v5 】】】 +Ort::Value +FacePipeline::create_tensor(const std::vector &blob_data, + const std::vector &input_shape) { + return Ort::Value::CreateTensor( + m_memory_info, const_cast(blob_data.data()), blob_data.size(), + input_shape.data(), input_shape.size()); +} - // 1. 预处理 (这部分是正确的,它生成了 248x248 的 blob) - preprocess_recognition(aligned_face, m_blob_buffer); +bool FacePipeline::Extract(const cv::Mat &image, std::vector &feature) { + if (!m_initialized) { + LOGE("Extract failed: Pipeline is not initialized."); + return false; + } + if (image.empty()) { + LOGE("Extract failed: Input image is empty."); + return false; + } - // 2. (BUG 在这里) 我们不能使用 m_rec_input_shape (它是 [-1, -1, -1, -1]) - // 我们必须硬编码 Python 源码 (facerecoger.py) 中使用的 shape。 - const std::vector hardcoded_shape = {1, 3, 248, 248}; + int rot_angle_code = RunRotation(image); + cv::Mat upright_image; + if (rot_angle_code >= 0) { + cv::rotate(image, upright_image, rot_angle_code); + } else { + upright_image = image; + } - // 3. (修正) 使用 hardcoded_shape 创建 Tensor - auto input_tensor = create_tensor(m_blob_buffer, hardcoded_shape); + std::vector boxes; + if (!RunDetection(upright_image, boxes)) { + LOGI("Extract failed: No face detected."); + return false; + } - // 4. 运行 - auto output_tensors = m_session_recognizer->Run(Ort::RunOptions{nullptr}, - m_rec_input_names.data(), &input_tensor, 1, - m_rec_output_names.data(), 1); + FaceBox best_box = boxes[0]; - long feature_dim = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1]; - const float *output_data = output_tensors[0].GetTensorData(); + cv::Rect face_rect_raw(best_box.x1, best_box.y1, best_box.x2 - best_box.x1, + best_box.y2 - best_box.y1); + int pad_top = std::max(0, -face_rect_raw.y); + int pad_bottom = std::max(0, (face_rect_raw.y + face_rect_raw.height) - + upright_image.rows); + int pad_left = std::max(0, -face_rect_raw.x); + int pad_right = + std::max(0, (face_rect_raw.x + face_rect_raw.width) - upright_image.cols); - feature.resize(feature_dim); - memcpy(feature.data(), output_data, feature_dim * sizeof(float)); + cv::Mat face_crop_padded; + cv::copyMakeBorder(upright_image, face_crop_padded, pad_top, pad_bottom, + pad_left, pad_right, cv::BORDER_CONSTANT, + cv::Scalar(0, 0, 0)); + cv::Rect face_rect_padded(face_rect_raw.x + pad_left, + face_rect_raw.y + pad_top, face_rect_raw.width, + face_rect_raw.height); + cv::Mat face_crop = face_crop_padded(face_rect_padded); - // 5. 后处理 (SQRT-L2 Norm) - normalize_sqrt_l2(feature); + FacePose pose; + if (!RunPose(face_crop, pose)) { + LOGI("Extract failed: Pose estimation failed."); + return false; + } - return true; + if (std::abs(pose.yaw) > m_pose_threshold || + std::abs(pose.pitch) > m_pose_threshold) { + LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold (%.1f)", + pose.yaw, pose.pitch, m_pose_threshold); + return false; + } + + FaceLandmark landmark; + if (!RunLandmark(upright_image, best_box, landmark)) { + LOGI("Extract failed: Landmark detection failed."); + return false; + } + + cv::Mat aligned_face = RunAlignment(upright_image, landmark); + + if (!RunRecognition(aligned_face, feature)) { + LOGI("Extract failed: Feature recognition failed."); + return false; + } + + LOGI("Extract success."); + return true; +} + +void FacePipeline::preprocess_rotation(const cv::Mat &image, + std::vector &blob_data) { + cv::Mat gray_img, resized, cropped, gray_3d; + cv::cvtColor(image, gray_img, cv::COLOR_BGR2GRAY); + cv::resize(gray_img, resized, cv::Size(256, 256), 0, 0, cv::INTER_LINEAR); + int start = (256 - 224) / 2; + cv::Rect crop_rect(start, start, 224, 224); + cropped = resized(crop_rect); + cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR); + + const float mean[3] = {0.0f, 0.0f, 0.0f}; + const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + image_to_blob(gray_3d, blob_data, mean, std); +} + +int FacePipeline::RunRotation(const cv::Mat &image) { + preprocess_rotation(image, m_blob_buffer); + auto input_tensor = create_tensor(m_blob_buffer, m_rot_input_shape); + + auto output_tensors = + m_session_rotator->Run(Ort::RunOptions{nullptr}, m_rot_input_names.data(), + &input_tensor, 1, m_rot_output_names.data(), 1); + + float *output_data = output_tensors[0].GetTensorMutableData(); + int max_index = std::distance(output_data, + std::max_element(output_data, output_data + 4)); + + if (max_index == 1) + return cv::ROTATE_90_CLOCKWISE; + if (max_index == 2) + return cv::ROTATE_180; + if (max_index == 3) + return cv::ROTATE_90_COUNTERCLOCKWISE; + return -1; +} + +void FacePipeline::preprocess_detection(const cv::Mat &img, + std::vector &blob_data) { + cv::Mat resized; + cv::resize(img, resized, + cv::Size(m_det_input_shape[3], m_det_input_shape[2])); + + const float mean[3] = {104.0f, 117.0f, 123.0f}; + const float std[3] = {1.0f, 1.0f, 1.0f}; + image_to_blob(resized, blob_data, mean, std); +} + +bool FacePipeline::RunDetection(const cv::Mat &image, + std::vector &boxes) { + float img_height = (float)image.rows; + float img_width = (float)image.cols; + + preprocess_detection(image, m_blob_buffer); + auto input_tensor = create_tensor(m_blob_buffer, m_det_input_shape); + + auto output_tensors = m_session_detector->Run( + Ort::RunOptions{nullptr}, m_det_input_names.data(), &input_tensor, 1, + m_det_output_names.data(), 2); + + const float *bboxes_data = output_tensors[0].GetTensorData(); + const float *probs_data = output_tensors[1].GetTensorData(); + long num_anchors = + output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1]; + + if (num_anchors != m_anchors.size()) { + LOGE("Anchor size mismatch! Expected %zu, Got %ld", m_anchors.size(), + num_anchors); + return false; + } + + std::vector bbox_collection; + const float variance[2] = {0.1f, 0.2f}; + + for (long i = 0; i < num_anchors; ++i) { + float conf = probs_data[i * 2 + 1]; + if (conf < m_det_threshold) + continue; + + const Anchor &anchor = m_anchors[i]; + float dx = bboxes_data[i * 4 + 0]; + float dy = bboxes_data[i * 4 + 1]; + float dw = bboxes_data[i * 4 + 2]; + float dh = bboxes_data[i * 4 + 3]; + + float cx = anchor.cx + dx * variance[0] * anchor.s_kx; + float cy = anchor.cy + dy * variance[0] * anchor.s_ky; + float w = anchor.s_kx * std::exp(dw * variance[1]); + float h = anchor.s_ky * std::exp(dh * variance[1]); + + bbox_collection.push_back( + {(cx - w / 2.0f) * img_width, (cy - h / 2.0f) * img_height, + (cx + w / 2.0f) * img_width, (cy + h / 2.0f) * img_height, conf}); + } + + boxes = hard_nms(bbox_collection, m_det_iou_threshold, m_det_topk); + return !boxes.empty(); +} + +void FacePipeline::generate_anchors_faceboxes(int target_height, + int target_width) { + + m_anchors.clear(); + std::vector steps = {32, 64, 128}; + std::vector> min_sizes = {{32, 64, 128}, {256}, {512}}; + std::vector> feature_maps; + for (int step : steps) { + feature_maps.push_back({(int)std::ceil((float)target_height / step), + (int)std::ceil((float)target_width / step)}); + } + + std::vector offset_32 = {0.0f, 0.25f, 0.5f, 0.75f}; + std::vector offset_64 = {0.0f, 0.5f}; + + for (int k = 0; k < feature_maps.size(); ++k) { + auto f_map = feature_maps[k]; + auto tmp_min_sizes = min_sizes[k]; + int f_h = f_map[0]; + int f_w = f_map[1]; + for (int i = 0; i < f_h; ++i) { + for (int j = 0; j < f_w; ++j) { + for (int min_size : tmp_min_sizes) { + float s_kx = (float)min_size / target_width; + float s_ky = (float)min_size / target_height; + + if (min_size == 32) { + for (float offset_y : offset_32) + for (float offset_x : offset_32) + m_anchors.push_back({(j + offset_x) * steps[k] / target_width, + (i + offset_y) * steps[k] / target_height, + s_kx, s_ky}); + } else if (min_size == 64) { + for (float offset_y : offset_64) + for (float offset_x : offset_64) + m_anchors.push_back({(j + offset_x) * steps[k] / target_width, + (i + offset_y) * steps[k] / target_height, + s_kx, s_ky}); + } else { + m_anchors.push_back({(j + 0.5f) * steps[k] / target_width, + (i + 0.5f) * steps[k] / target_height, s_kx, + s_ky}); + } + } + } + } + } +} + +void FacePipeline::preprocess_pose(const cv::Mat &img, + std::vector &blob_data) { + float pad = 0.3f; + int h = img.rows; + int w = img.cols; + int nh = (int)(h + pad * h); + int nw = (int)(w + pad * w); + int nx1 = std::max(0, (nw - w) / 2); + int ny1 = std::max(0, (nh - h) / 2); + + cv::Mat canvas = cv::Mat::zeros(nh, nw, CV_8UC3); + img.copyTo(canvas(cv::Rect(nx1, ny1, w, h))); + + cv::Mat resized; + cv::resize(canvas, resized, + cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2])); + + const float mean[3] = {127.5f, 127.5f, 127.5f}; + const float std[3] = {1.0f / 127.5f, 1.0f / 127.5f, 1.0f / 127.5f}; + image_to_blob(resized, blob_data, mean, std); +} + +bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose) { + preprocess_pose(face_crop, m_blob_buffer); + + auto input_tensor_var = create_tensor(m_blob_buffer, m_pose_var_input_shape); + auto output_var = m_session_pose_var->Run( + Ort::RunOptions{nullptr}, m_pose_var_input_names.data(), + &input_tensor_var, 1, m_pose_var_output_names.data(), 1); + + auto input_tensor_conv = + create_tensor(m_blob_buffer, m_pose_conv_input_shape); + auto output_conv = m_session_pose_conv->Run( + Ort::RunOptions{nullptr}, m_pose_conv_input_names.data(), + &input_tensor_conv, 1, m_pose_conv_output_names.data(), 1); + + const float *data_var = output_var[0].GetTensorData(); + const float *data_conv = output_conv[0].GetTensorData(); + + pose.yaw = (data_var[0] + data_conv[0]) / 2.0f; + pose.pitch = (data_var[1] + data_conv[1]) / 2.0f; + pose.roll = (data_var[2] + data_conv[2]) / 2.0f; + return true; +} + +void FacePipeline::preprocess_landmark_net1(const cv::Mat &img, + std::vector &blob_data) { + cv::Mat resized, gray_img; + cv::resize(img, resized, + cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2])); + cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY); + + const float mean[1] = {0.0f}; + const float std[1] = {1.0f}; + image_to_blob(gray_img, blob_data, mean, std); +} + +std::vector +FacePipeline::shape_index_process(const Ort::Value &feat_val, + const Ort::Value &pos_val) { + auto feat_shape = feat_val.GetTensorTypeAndShapeInfo().GetShape(); + auto pos_shape = pos_val.GetTensorTypeAndShapeInfo().GetShape(); + const float *feat_data = feat_val.GetTensorData(); + const float *pos_data = pos_val.GetTensorData(); + + long feat_n = feat_shape[0]; + long feat_c = feat_shape[1]; + long feat_h = feat_shape[2]; + long feat_w = feat_shape[3]; + long pos_n = pos_shape[0]; + long landmark_x2 = pos_shape[1]; + int landmark_num = landmark_x2 / 2; + + float m_origin[] = {112.0f, 112.0f}; + float m_origin_patch[] = {15.0f, 15.0f}; + + int x_patch_h = (int)(m_origin_patch[0] * feat_h / m_origin[0] + 0.5f); + int x_patch_w = (int)(m_origin_patch[1] * feat_w / m_origin[1] + 0.5f); + int feat_patch_h = x_patch_h; + int feat_patch_w = x_patch_w; + + float r_h = (feat_patch_h - 1) / 2.0f; + float r_w = (feat_patch_w - 1) / 2.0f; + + std::vector out_shape = {feat_n, feat_c, x_patch_h, (long)landmark_num, + x_patch_w}; + std::vector buff( + feat_n * feat_c * x_patch_h * landmark_num * x_patch_w, 0.0f); + + for (int i = 0; i < landmark_num; ++i) { + for (int n = 0; n < feat_n; ++n) { + float y_pos = pos_data[n * landmark_x2 + 2 * i + 1]; + float x_pos = pos_data[n * landmark_x2 + 2 * i]; + + int y = (int)(y_pos * (feat_h - 1) - r_h + 0.5f); + int x = (int)(x_pos * (feat_w - 1) - r_w + 0.5f); + + for (int c = 0; c < feat_c; ++c) { + for (int ph = 0; ph < feat_patch_h; ++ph) { + for (int pw = 0; pw < feat_patch_w; ++pw) { + int y_p = y + ph; + int x_p = x + pw; + + long out_idx = n * (feat_c * x_patch_h * landmark_num * x_patch_w) + + c * (x_patch_h * landmark_num * x_patch_w) + + ph * (landmark_num * x_patch_w) + i * (x_patch_w) + + pw; + + if (y_p < 0 || y_p >= feat_h || x_p < 0 || x_p >= feat_w) { + buff[out_idx] = 0.0f; + } else { + long feat_idx = n * (feat_c * feat_h * feat_w) + + c * (feat_h * feat_w) + y_p * (feat_w) + x_p; + buff[out_idx] = feat_data[feat_idx]; + } + } + } + } + } + } + return buff; +} + +bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box, + FaceLandmark &landmark) { + + cv::Rect face_rect_raw(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1); + int pad_top = std::max(0, -face_rect_raw.y); + int pad_bottom = + std::max(0, (face_rect_raw.y + face_rect_raw.height) - image.rows); + int pad_left = std::max(0, -face_rect_raw.x); + int pad_right = + std::max(0, (face_rect_raw.x + face_rect_raw.width) - image.cols); + cv::Mat face_crop_padded; + cv::copyMakeBorder(image, face_crop_padded, pad_top, pad_bottom, pad_left, + pad_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + cv::Rect face_rect_padded(face_rect_raw.x + pad_left, + face_rect_raw.y + pad_top, face_rect_raw.width, + face_rect_raw.height); + cv::Mat face_crop = face_crop_padded(face_rect_padded); + + preprocess_landmark_net1(face_crop, m_blob_buffer); + auto input_tensor_net1 = create_tensor(m_blob_buffer, m_lm1_input_shape); + + auto output_net1 = m_session_landmarker1->Run( + Ort::RunOptions{nullptr}, m_lm1_input_names.data(), &input_tensor_net1, 1, + m_lm1_output_names.data(), 2); + + std::vector shape_index_blob = + shape_index_process(output_net1[0], output_net1[1]); + + auto input_tensor_net2 = Ort::Value::CreateTensor( + m_memory_info, shape_index_blob.data(), shape_index_blob.size(), + m_lm2_input_shape.data(), m_lm2_input_shape.size()); + + auto output_net2 = m_session_landmarker2->Run( + Ort::RunOptions{nullptr}, m_lm2_input_names.data(), &input_tensor_net2, 1, + m_lm2_output_names.data(), 1); + + const float *data_net1_pos = output_net1[1].GetTensorData(); + const float *data_net2 = output_net2[0].GetTensorData(); + auto shape_net1_pos = output_net1[1].GetTensorTypeAndShapeInfo().GetShape(); + int landmark_x2 = shape_net1_pos[1]; + + float scale_x = (box.x2 - box.x1) / 112.0f; + float scale_y = (box.y2 - box.y1) / 112.0f; + + for (int i = 0; i < 5; ++i) { + float x_norm = (data_net2[i * 2 + 0] + data_net1_pos[i * 2 + 0]) * 112.0f; + float y_norm = (data_net2[i * 2 + 1] + data_net1_pos[i * 2 + 1]) * 112.0f; + + float x = box.x1 + x_norm * scale_x; + float y = box.y1 + y_norm * scale_y; + + x = std::max(0.01f, std::min(x, (float)image.cols - 0.01f)); + y = std::max(0.01f, std::min(y, (float)image.rows - 0.01f)); + landmark.points[i] = cv::Point2f(x, y); + } + return true; +} + +cv::Mat FacePipeline::RunAlignment(const cv::Mat &image, + const FaceLandmark &landmark) { + + std::vector src_points; + std::vector dst_points; + + for (int i = 0; i < 5; ++i) { + src_points.push_back(landmark.points[i]); + dst_points.push_back(cv::Point2f(m_landmark_template.at(i, 0), + m_landmark_template.at(i, 1))); + } + + cv::Mat transform_matrix = + cv::estimateAffinePartial2D(src_points, dst_points); + + cv::Mat aligned_face; + + cv::warpAffine(image, aligned_face, transform_matrix, m_align_output_size, + cv::INTER_LINEAR); + + return aligned_face; +} + +void FacePipeline::preprocess_recognition(const cv::Mat &img, + std::vector &blob_data) { + cv::Mat resized, rgb_img; + + const cv::Size target_size(248, 248); + + cv::resize(img, resized, target_size); + + cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB); + + const float mean[3] = {0.0f, 0.0f, 0.0f}; + const float std[3] = {1.0f, 1.0f, 1.0f}; + image_to_blob(rgb_img, blob_data, mean, std); +} + +void FacePipeline::normalize_sqrt_l2(std::vector &v) { + + double norm = 0.0; + for (float &val : v) { + val = std::sqrt(std::max(0.0f, val)); + norm += val * val; + } + + if (norm > 1e-6) { + norm = std::sqrt(norm); + for (float &val : v) { + val = static_cast(val / norm); + } + } +} + +bool FacePipeline::RunRecognition(const cv::Mat &aligned_face, + std::vector &feature) { + + preprocess_recognition(aligned_face, m_blob_buffer); + + const std::vector hardcoded_shape = {1, 3, 248, 248}; + + auto input_tensor = create_tensor(m_blob_buffer, hardcoded_shape); + + auto output_tensors = m_session_recognizer->Run( + Ort::RunOptions{nullptr}, m_rec_input_names.data(), &input_tensor, 1, + m_rec_output_names.data(), 1); + + long feature_dim = + output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1]; + const float *output_data = output_tensors[0].GetTensorData(); + + feature.resize(feature_dim); + memcpy(feature.data(), output_data, feature_dim * sizeof(float)); + + normalize_sqrt_l2(feature); + + return true; } \ No newline at end of file diff --git a/src/face_pipeline.h b/src/face_pipeline.h index 8d413db..4a3390c 100644 --- a/src/face_pipeline.h +++ b/src/face_pipeline.h @@ -1,196 +1,181 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include #include "onnxruntime_cxx_api.h" +#include "opencv2/calib3d.hpp" #include "opencv2/opencv.hpp" -#include "opencv2/calib3d.hpp" // for estimateAffinePartial2D -// --- 日志宏 --- #define LOG_TAG "FacePipeline_CPP" #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__) - -/** - * @brief 计算两个L2归一化的特征向量的余弦相似度 - */ -inline float compare_features(const std::vector& v1, const std::vector& v2) { - if (v1.empty() || v1.size() != v2.size()) { - return 0.0f; - } - double dot_product = 0.0; - for (size_t i = 0; i < v1.size(); ++i) { - dot_product += v1[i] * v2[i]; - } - return std::max(-1.0f, std::min(1.0f, static_cast(dot_product))); +inline float compare_features(const std::vector &v1, + const std::vector &v2) { + if (v1.empty() || v1.size() != v2.size()) { + return 0.0f; + } + double dot_product = 0.0; + for (size_t i = 0; i < v1.size(); ++i) { + dot_product += v1[i] * v2[i]; + } + return std::max(-1.0f, std::min(1.0f, static_cast(dot_product))); } -// --- 辅助结构体 (与 facedetector.py::Box 对应) --- struct FaceBox { - float x1, y1, x2, y2, score; + float x1, y1, x2, y2, score; }; struct FaceLandmark { - std::array points; + std::array points; }; struct FacePose { - float yaw, pitch, roll; + float yaw, pitch, roll; }; -// --- NMS 辅助函数 (与 facedetector.py::hard_nms 对应) --- -inline float iou_of(const FaceBox& a, const FaceBox& b) { - float inter_x1 = std::max(a.x1, b.x1); - float inter_y1 = std::max(a.y1, b.y1); - float inter_x2 = std::min(a.x2, b.x2); - float inter_y2 = std::min(a.y2, b.y2); +inline float iou_of(const FaceBox &a, const FaceBox &b) { + float inter_x1 = std::max(a.x1, b.x1); + float inter_y1 = std::max(a.y1, b.y1); + float inter_x2 = std::min(a.x2, b.x2); + float inter_y2 = std::min(a.y2, b.y2); - if (inter_x1 < inter_x2 && inter_y1 < inter_y2) { - float inter_area = (inter_x2 - inter_x1 + 1.0f) * (inter_y2 - inter_y1 + 1.0f); - float a_area = (a.x2 - a.x1 + 1.0f) * (a.y2 - a.y1 + 1.0f); - float b_area = (b.x2 - b.x1 + 1.0f) * (b.y2 - b.y1 + 1.0f); - float union_area = a_area + b_area - inter_area; - return inter_area / union_area; - } - return 0.0f; + if (inter_x1 < inter_x2 && inter_y1 < inter_y2) { + float inter_area = + (inter_x2 - inter_x1 + 1.0f) * (inter_y2 - inter_y1 + 1.0f); + float a_area = (a.x2 - a.x1 + 1.0f) * (a.y2 - a.y1 + 1.0f); + float b_area = (b.x2 - b.x1 + 1.0f) * (b.y2 - b.y1 + 1.0f); + float union_area = a_area + b_area - inter_area; + return inter_area / union_area; + } + return 0.0f; } -inline std::vector hard_nms(std::vector& boxes, float iou_threshold, int topk) { - if (boxes.empty()) return {}; - std::sort(boxes.begin(), boxes.end(), [](const FaceBox& a, const FaceBox& b) { - return a.score > b.score; - }); +inline std::vector hard_nms(std::vector &boxes, + float iou_threshold, int topk) { + if (boxes.empty()) + return {}; + std::sort(boxes.begin(), boxes.end(), [](const FaceBox &a, const FaceBox &b) { + return a.score > b.score; + }); - std::vector merged(boxes.size(), 0); - std::vector output; + std::vector merged(boxes.size(), 0); + std::vector output; - for (int i = 0; i < boxes.size(); ++i) { - if (merged[i]) continue; - - output.push_back(boxes[i]); - merged[i] = 1; + for (int i = 0; i < boxes.size(); ++i) { + if (merged[i]) + continue; - for (int j = i + 1; j < boxes.size(); ++j) { - if (merged[j]) continue; - if (iou_of(boxes[i], boxes[j]) > iou_threshold) { - merged[j] = 1; - } - } - if (output.size() >= topk) break; + output.push_back(boxes[i]); + merged[i] = 1; + + for (int j = i + 1; j < boxes.size(); ++j) { + if (merged[j]) + continue; + if (iou_of(boxes[i], boxes[j]) > iou_threshold) { + merged[j] = 1; + } } - return output; + if (output.size() >= topk) + break; + } + return output; } - -/** - * @class FacePipeline - * @brief 管理7个ONNX模型并执行完整的人脸识别管线 - */ class FacePipeline { public: - FacePipeline(const std::string& model_dir); - ~FacePipeline(); - bool IsInitialized() const { return m_initialized; } - bool Extract(const cv::Mat& image, std::vector& feature); + FacePipeline(const std::string &model_dir); + ~FacePipeline(); + bool IsInitialized() const { return m_initialized; } + bool Extract(const cv::Mat &image, std::vector &feature); private: - // --- 模型加载与初始化 --- - bool LoadModels(const std::string& model_dir); - void InitMemoryAllocators(); + bool LoadModels(const std::string &model_dir); + void InitMemoryAllocators(); - // --- 核心管线步骤 --- - void preprocess_rotation(const cv::Mat &image, std::vector &blob_data); - int RunRotation(const cv::Mat& image); // [模型5] - bool RunDetection(const cv::Mat& image, std::vector& boxes); // [模型1] - bool RunPose(const cv::Mat& face_crop, FacePose& pose); // [模型6, 7] - bool RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLandmark& landmark); // [模型2, 3] - cv::Mat RunAlignment(const cv::Mat& image, const FaceLandmark& landmark); // - bool RunRecognition(const cv::Mat& aligned_face, std::vector& feature); // [模型4] - - // --- 预处理/后处理 辅助函数 --- - - // [模型1] FaceBoxesV2 - struct Anchor { float cx, cy, s_kx, s_ky; }; - std::vector m_anchors; - void generate_anchors_faceboxes(int target_height, int target_width); - void preprocess_detection(const cv::Mat& img, std::vector& blob_data); + void preprocess_rotation(const cv::Mat &image, std::vector &blob_data); + int RunRotation(const cv::Mat &image); + bool RunDetection(const cv::Mat &image, std::vector &boxes); + bool RunPose(const cv::Mat &face_crop, FacePose &pose); + bool RunLandmark(const cv::Mat &image, const FaceBox &box, + FaceLandmark &landmark); + cv::Mat RunAlignment(const cv::Mat &image, const FaceLandmark &landmark); + bool RunRecognition(const cv::Mat &aligned_face, std::vector &feature); - // [模型6, 7] FSANet - void preprocess_pose(const cv::Mat& img, std::vector& blob_data); + struct Anchor { + float cx, cy, s_kx, s_ky; + }; + std::vector m_anchors; + void generate_anchors_faceboxes(int target_height, int target_width); + void preprocess_detection(const cv::Mat &img, std::vector &blob_data); - // [模型2, 3] Landmark5er - void preprocess_landmark_net1(const cv::Mat& img, std::vector& blob_data); - std::vector shape_index_process(const Ort::Value& feat_data, const Ort::Value& pos_data); + void preprocess_pose(const cv::Mat &img, std::vector &blob_data); - // [模型4] FaceRecognizer - void preprocess_recognition(const cv::Mat& img, std::vector& blob_data); - void normalize_sqrt_l2(std::vector& v); // + void preprocess_landmark_net1(const cv::Mat &img, + std::vector &blob_data); + std::vector shape_index_process(const Ort::Value &feat_data, + const Ort::Value &pos_data); - // 通用 - void image_to_blob(const cv::Mat& img, std::vector& blob, const float* mean, const float* std); - Ort::Value create_tensor(const std::vector& blob_data, const std::vector& input_shape); + void preprocess_recognition(const cv::Mat &img, + std::vector &blob_data); + void normalize_sqrt_l2(std::vector &v); - // --- ONNX Runtime 核心组件 --- - Ort::Env m_env; - Ort::SessionOptions m_session_options; - Ort::AllocatorWithDefaultOptions m_allocator; - Ort::MemoryInfo m_memory_info; - bool m_initialized = false; + void image_to_blob(const cv::Mat &img, std::vector &blob, + const float *mean, const float *std); + Ort::Value create_tensor(const std::vector &blob_data, + const std::vector &input_shape); - // --- 7个模型的会话 (Session) --- - std::unique_ptr m_session_detector; - std::unique_ptr m_session_landmarker1; - std::unique_ptr m_session_landmarker2; - std::unique_ptr m_session_recognizer; - std::unique_ptr m_session_rotator; - std::unique_ptr m_session_pose_var; - std::unique_ptr m_session_pose_conv; + Ort::Env m_env; + Ort::SessionOptions m_session_options; + Ort::AllocatorWithDefaultOptions m_allocator; + Ort::MemoryInfo m_memory_info; + bool m_initialized = false; - // --- ONNX模型输入/输出名称 (C-style strings) --- - // 我们在加载模型时获取这些 - std::vector m_rot_input_names, m_rot_output_names; - std::vector m_rot_input_shape; + std::unique_ptr m_session_detector; + std::unique_ptr m_session_landmarker1; + std::unique_ptr m_session_landmarker2; + std::unique_ptr m_session_recognizer; + std::unique_ptr m_session_rotator; + std::unique_ptr m_session_pose_var; + std::unique_ptr m_session_pose_conv; - std::vector m_det_input_names, m_det_output_names; - std::vector m_det_input_shape; - - std::vector m_pose_var_input_names, m_pose_var_output_names; - std::vector m_pose_var_input_shape; + std::vector m_rot_input_names, m_rot_output_names; + std::vector m_rot_input_shape; - std::vector m_pose_conv_input_names, m_pose_conv_output_names; - std::vector m_pose_conv_input_shape; + std::vector m_det_input_names, m_det_output_names; + std::vector m_det_input_shape; - std::vector m_lm1_input_names, m_lm1_output_names; - std::vector m_lm1_input_shape; + std::vector m_pose_var_input_names, m_pose_var_output_names; + std::vector m_pose_var_input_shape; - std::vector m_lm2_input_names, m_lm2_output_names; - std::vector m_lm2_input_shape; + std::vector m_pose_conv_input_names, m_pose_conv_output_names; + std::vector m_pose_conv_input_shape; - std::vector m_rec_input_names, m_rec_output_names; - std::vector m_rec_input_shape; - - // --- 临时缓冲区 --- - std::vector m_blob_buffer; + std::vector m_lm1_input_names, m_lm1_output_names; + std::vector m_lm1_input_shape; - // --- 常量 (来自 Python) --- - const float m_det_threshold = 0.35f; - const float m_det_iou_threshold = 0.45f; - const int m_det_topk = 300; - const float m_pose_threshold = 30.0f; // (来自 face_feature_extractor.py) - const cv::Mat m_landmark_template = (cv::Mat_(5, 2) << - 89.3095f, 72.9025f, // (来自 facealign.py) - 169.3095f, 72.9025f, // - 127.8949f, 127.0441f, // - 96.8796f, 184.8907f, // - 159.1065f, 184.7601f); // - const cv::Size m_align_output_size = cv::Size(256, 256); // + std::vector m_lm2_input_names, m_lm2_output_names; + std::vector m_lm2_input_shape; + + std::vector m_rec_input_names, m_rec_output_names; + std::vector m_rec_input_shape; + + std::vector m_blob_buffer; + + const float m_det_threshold = 0.35f; + const float m_det_iou_threshold = 0.45f; + const int m_det_topk = 300; + const float m_pose_threshold = 30.0f; + const cv::Mat m_landmark_template = + (cv::Mat_(5, 2) << 89.3095f, 72.9025f, 169.3095f, 72.9025f, + 127.8949f, 127.0441f, 96.8796f, 184.8907f, 159.1065f, 184.7601f); + const cv::Size m_align_output_size = cv::Size(256, 256); }; \ No newline at end of file