修改face_pipeline，同步python的更新

2025-11-17 16:03:18 +08:00 · 2025-11-17 16:03:18 +08:00 · aafd81fc5f
parent ed875db3e5
commit aafd81fc5f
4 changed files with 378 additions and 118 deletions
--- a/src/face_pipeline.cpp
+++ b/src/face_pipeline.cpp
@ -2,11 +2,12 @@
 #include <string>
 #include <vector>

+
 FacePipeline::FacePipeline(const std::string &model_dir)
    : m_env(ORT_LOGGING_LEVEL_WARNING, "FaceSDK"),
      m_memory_info(
          Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault)) {
-  m_session_options.SetIntraOpNumThreads(4);
+  m_session_options.SetIntraOpNumThreads(4); 
  m_session_options.SetGraphOptimizationLevel(
      GraphOptimizationLevel::ORT_ENABLE_ALL);

@ -21,6 +22,7 @@ FacePipeline::FacePipeline(const std::string &model_dir)

 FacePipeline::~FacePipeline() {}

+
 bool FacePipeline::LoadModels(const std::string &model_dir) {
  auto load_session = [&](std::unique_ptr<Ort::Session> &session,
                          const std::string &model_name) {
@ -55,8 +57,9 @@ bool FacePipeline::LoadModels(const std::string &model_dir) {
  return true;
 }

-void FacePipeline::InitMemoryAllocators() {

+void FacePipeline::InitMemoryAllocators() {
+  
  auto get_io_names = [&](Ort::Session *session,
                          std::vector<const char *> &input_names,
                          std::vector<const char *> &output_names,
@ -96,6 +99,7 @@ void FacePipeline::InitMemoryAllocators() {
        throw std::runtime_error("Model input shape is empty");
      }

+      
      std::string shape_str = "[";
      for (long long dim : input_shape)
        shape_str += std::to_string(dim) + ", ";
@ -103,12 +107,13 @@ void FacePipeline::InitMemoryAllocators() {
      LOGI("Model %s input shape: %s", model_name, shape_str.c_str());

      if (input_shape[0] < 1)
-        input_shape[0] = 1;
+        input_shape[0] = 1; 
    } else {
      LOGE("Model %s has no inputs!", model_name);
    }
  };

+  
  get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names,
               m_rot_input_shape, "Rotator");
  get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names,
@ -124,38 +129,41 @@ void FacePipeline::InitMemoryAllocators() {
  get_io_names(m_session_recognizer.get(), m_rec_input_names,
               m_rec_output_names, m_rec_input_shape, "Recognizer");

+  
  if (m_det_input_shape.size() < 4) {
    LOGE("Detector input shape has < 4 dimensions! Cannot generate anchors.");
    throw std::runtime_error("Detector input shape invalid");
  }
-
+  
  if (m_det_input_shape[2] < 0 || m_det_input_shape[3] < 0) {
    LOGE("Detector input shape is dynamic (H/W is -1). This is not supported "
         "by the Python logic.");
-
+    
    LOGI("Forcing detector H/W to 640x640.");
    m_det_input_shape[2] = 640;
    m_det_input_shape[3] = 640;
  }
  generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]);

+  
  size_t max_blob_size = 0;

+  
  auto update_max = [&](const std::vector<int64_t> &shape,
                        const char *model_name) {
    if (shape.size() <= 1) {
-      return;
+      return; 
    }

    size_t s = 1;
-
+    
    for (size_t i = 1; i < shape.size(); ++i) {
      if (shape[i] < 0) {
-
+        
        LOGE("Model %s has dynamic dimension at index %zu. Skipping for "
             "max_blob_size calculation.",
             model_name, i);
-        return;
+        return; 
      }
      s *= static_cast<size_t>(shape[i]);
    }
@ -170,6 +178,7 @@ void FacePipeline::InitMemoryAllocators() {
  update_max(m_pose_var_input_shape, "PoseVar");
  update_max(m_lm1_input_shape, "Landmarker1");
  update_max(m_rec_input_shape, "Recognizer");
+  

  if (max_blob_size == 0) {
    LOGE(
@ -182,6 +191,7 @@ void FacePipeline::InitMemoryAllocators() {
  LOGI("m_blob_buffer resized successfully.");
 }

+
 void FacePipeline::image_to_blob(const cv::Mat &img, std::vector<float> &blob,
                                 const float *mean, const float *std) {
  int channels = img.channels();
@ -211,6 +221,8 @@ FacePipeline::create_tensor(const std::vector<float> &blob_data,
      input_shape.data(), input_shape.size());
 }

+
+
 bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
  if (!m_initialized) {
    LOGE("Extract failed: Pipeline is not initialized.");
@ -221,6 +233,8 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
    return false;
  }

+  
+  
  int rot_angle_code = RunRotation(image);
  cv::Mat upright_image;
  if (rot_angle_code >= 0) {
@ -229,14 +243,18 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
    upright_image = image;
  }

+  
+  
  std::vector<FaceBox> boxes;
  if (!RunDetection(upright_image, boxes)) {
    LOGI("Extract failed: No face detected.");
    return false;
  }
-
  FaceBox best_box = boxes[0];

+  
+  
+  
  cv::Rect face_rect_raw(best_box.x1, best_box.y1, best_box.x2 - best_box.x1,
                         best_box.y2 - best_box.y1);
  int pad_top = std::max(0, -face_rect_raw.y);
@ -250,41 +268,99 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
  cv::copyMakeBorder(upright_image, face_crop_padded, pad_top, pad_bottom,
                     pad_left, pad_right, cv::BORDER_CONSTANT,
                     cv::Scalar(0, 0, 0));
+
  cv::Rect face_rect_padded(face_rect_raw.x + pad_left,
                            face_rect_raw.y + pad_top, face_rect_raw.width,
                            face_rect_raw.height);
-  cv::Mat face_crop = face_crop_padded(face_rect_padded);

-  FacePose pose;
-  if (!RunPose(face_crop, pose)) {
-    LOGI("Extract failed: Pose estimation failed.");
+  
+  if (face_rect_padded.width <= 0 || face_rect_padded.height <= 0 ||
+      face_rect_padded.x < 0 || face_rect_padded.y < 0 ||
+      face_rect_padded.x + face_rect_padded.width > face_crop_padded.cols ||
+      face_rect_padded.y + face_rect_padded.height > face_crop_padded.rows) {
+    LOGE("Extract failed: Invalid face crop rectangle after padding.");
    return false;
  }

-  if (std::abs(pose.yaw) > m_pose_threshold ||
-      std::abs(pose.pitch) > m_pose_threshold) {
-    LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold (%.1f)",
-         pose.yaw, pose.pitch, m_pose_threshold);
+  cv::Mat face_region = face_crop_padded(face_rect_padded);
+  if (face_region.empty()) {
+    LOGI("Extract failed: face_region is empty after cropping.");
    return false;
  }

+  
+  
  FaceLandmark landmark;
  if (!RunLandmark(upright_image, best_box, landmark)) {
    LOGI("Extract failed: Landmark detection failed.");
    return false;
  }

+  
+  
  cv::Mat aligned_face = RunAlignment(upright_image, landmark);
+  if (aligned_face.empty()) {
+    LOGI("Extract failed: Alignment produced an empty image.");
+    return false;
+  }

+  
+  
+  FacePose pose;
+  if (!RunPose(aligned_face, pose)) 
+  {
+    LOGI("Extract failed: Pose estimation failed.");
+    return false;
+  }
+
+  
+  
+  if (std::abs(pose.yaw) > m_pose_yaw_threshold ||
+      std::abs(pose.pitch) > m_pose_pitch_threshold) {
+    LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold "
+         "(Y:%.1f, P:%.1f)",
+         pose.yaw, pose.pitch, m_pose_yaw_threshold, m_pose_pitch_threshold);
+    return false;
+  }
+
+  
+  
+  if (!CheckResolution(face_region)) {
+    LOGI("Extract failed: Resolution (H:%d, W:%d) below threshold (%d, %d)",
+         face_region.rows, face_region.cols, m_quality_min_resolution.height,
+         m_quality_min_resolution.width);
+    return false;
+  }
+
+  
+  
+  if (!CheckBrightness(face_region)) {
+    LOGI("Extract failed: Brightness check failed (thresholds [%.1f, %.1f]).",
+         m_quality_bright_v1, m_quality_bright_v2);
+    return false;
+  }
+
+  
+  
+  if (!CheckClarity(face_region)) {
+    LOGI("Extract failed: Clarity check failed (threshold [%.2f]).",
+         m_quality_clarity_low_thresh);
+    return false;
+  }
+
+  
+  
  if (!RunRecognition(aligned_face, feature)) {
    LOGI("Extract failed: Feature recognition failed.");
    return false;
  }

+  
  LOGI("Extract success.");
  return true;
 }

+
 void FacePipeline::preprocess_rotation(const cv::Mat &image,
                                       std::vector<float> &blob_data) {
  cv::Mat gray_img, resized, cropped, gray_3d;
@ -293,10 +369,12 @@ void FacePipeline::preprocess_rotation(const cv::Mat &image,
  int start = (256 - 224) / 2;
  cv::Rect crop_rect(start, start, 224, 224);
  cropped = resized(crop_rect);
-  cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR);
+  cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR); 

+  
  const float mean[3] = {0.0f, 0.0f, 0.0f};
-  const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f,
+                        1.0f / 255.0f}; 
  image_to_blob(gray_3d, blob_data, mean, std);
 }

@ -312,6 +390,7 @@ int FacePipeline::RunRotation(const cv::Mat &image) {
  int max_index = std::distance(output_data,
                                std::max_element(output_data, output_data + 4));

+  
  if (max_index == 1)
    return cv::ROTATE_90_CLOCKWISE;
  if (max_index == 2)
@ -321,13 +400,15 @@ int FacePipeline::RunRotation(const cv::Mat &image) {
  return -1;
 }

+
 void FacePipeline::preprocess_detection(const cv::Mat &img,
                                        std::vector<float> &blob_data) {
  cv::Mat resized;
  cv::resize(img, resized,
-             cv::Size(m_det_input_shape[3], m_det_input_shape[2]));
+             cv::Size(m_det_input_shape[3], m_det_input_shape[2])); 

-  const float mean[3] = {104.0f, 117.0f, 123.0f};
+  
+  const float mean[3] = {104.0f, 117.0f, 123.0f}; 
  const float std[3] = {1.0f, 1.0f, 1.0f};
  image_to_blob(resized, blob_data, mean, std);
 }
@ -342,10 +423,12 @@ bool FacePipeline::RunDetection(const cv::Mat &image,

  auto output_tensors = m_session_detector->Run(
      Ort::RunOptions{nullptr}, m_det_input_names.data(), &input_tensor, 1,
-      m_det_output_names.data(), 2);
+      m_det_output_names.data(), 2); 

-  const float *bboxes_data = output_tensors[0].GetTensorData<float>();
-  const float *probs_data = output_tensors[1].GetTensorData<float>();
+  const float *bboxes_data =
+      output_tensors[0].GetTensorData<float>(); 
+  const float *probs_data =
+      output_tensors[1].GetTensorData<float>(); 
  long num_anchors =
      output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1];

@ -356,10 +439,10 @@ bool FacePipeline::RunDetection(const cv::Mat &image,
  }

  std::vector<FaceBox> bbox_collection;
-  const float variance[2] = {0.1f, 0.2f};
+  const float variance[2] = {0.1f, 0.2f}; 

  for (long i = 0; i < num_anchors; ++i) {
-    float conf = probs_data[i * 2 + 1];
+    float conf = probs_data[i * 2 + 1]; 
    if (conf < m_det_threshold)
      continue;

@ -369,23 +452,24 @@ bool FacePipeline::RunDetection(const cv::Mat &image,
    float dw = bboxes_data[i * 4 + 2];
    float dh = bboxes_data[i * 4 + 3];

-    float cx = anchor.cx + dx * variance[0] * anchor.s_kx;
-    float cy = anchor.cy + dy * variance[0] * anchor.s_ky;
-    float w = anchor.s_kx * std::exp(dw * variance[1]);
-    float h = anchor.s_ky * std::exp(dh * variance[1]);
+    float cx = anchor.cx + dx * variance[0] * anchor.s_kx; 
+    float cy = anchor.cy + dy * variance[0] * anchor.s_ky; 
+    float w = anchor.s_kx * std::exp(dw * variance[1]);    
+    float h = anchor.s_ky * std::exp(dh * variance[1]);    

    bbox_collection.push_back(
        {(cx - w / 2.0f) * img_width, (cy - h / 2.0f) * img_height,
         (cx + w / 2.0f) * img_width, (cy + h / 2.0f) * img_height, conf});
  }

-  boxes = hard_nms(bbox_collection, m_det_iou_threshold, m_det_topk);
+  boxes = hard_nms(bbox_collection, m_det_iou_threshold,
+                   m_det_topk); 
  return !boxes.empty();
 }

 void FacePipeline::generate_anchors_faceboxes(int target_height,
                                              int target_width) {
-
+  
  m_anchors.clear();
  std::vector<int> steps = {32, 64, 128};
  std::vector<std::vector<int>> min_sizes = {{32, 64, 128}, {256}, {512}};
@ -432,8 +516,13 @@ void FacePipeline::generate_anchors_faceboxes(int target_height,
  }
 }

+
 void FacePipeline::preprocess_pose(const cv::Mat &img,
                                   std::vector<float> &blob_data) {
+  
+  
+  
+  
  float pad = 0.3f;
  int h = img.rows;
  int w = img.cols;
@ -446,22 +535,27 @@ void FacePipeline::preprocess_pose(const cv::Mat &img,
  img.copyTo(canvas(cv::Rect(nx1, ny1, w, h)));

  cv::Mat resized;
-  cv::resize(canvas, resized,
-             cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2]));
+  cv::resize(
+      canvas, resized,
+      cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2])); 

+  
  const float mean[3] = {127.5f, 127.5f, 127.5f};
  const float std[3] = {1.0f / 127.5f, 1.0f / 127.5f, 1.0f / 127.5f};
  image_to_blob(resized, blob_data, mean, std);
 }

-bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose) {
-  preprocess_pose(face_crop, m_blob_buffer);
+bool FacePipeline::RunPose(const cv::Mat &face_input, FacePose &pose) {
+  
+  preprocess_pose(face_input, m_blob_buffer);

+  
  auto input_tensor_var = create_tensor(m_blob_buffer, m_pose_var_input_shape);
  auto output_var = m_session_pose_var->Run(
      Ort::RunOptions{nullptr}, m_pose_var_input_names.data(),
      &input_tensor_var, 1, m_pose_var_output_names.data(), 1);

+  
  auto input_tensor_conv =
      create_tensor(m_blob_buffer, m_pose_conv_input_shape);
  auto output_conv = m_session_pose_conv->Run(
@ -471,24 +565,28 @@ bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose) {
  const float *data_var = output_var[0].GetTensorData<float>();
  const float *data_conv = output_conv[0].GetTensorData<float>();

+  
  pose.yaw = (data_var[0] + data_conv[0]) / 2.0f;
  pose.pitch = (data_var[1] + data_conv[1]) / 2.0f;
  pose.roll = (data_var[2] + data_conv[2]) / 2.0f;
  return true;
 }

+
 void FacePipeline::preprocess_landmark_net1(const cv::Mat &img,
                                            std::vector<float> &blob_data) {
  cv::Mat resized, gray_img;
  cv::resize(img, resized,
-             cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2]));
-  cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY);
+             cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2])); 
+  cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY);              

+  
  const float mean[1] = {0.0f};
  const float std[1] = {1.0f};
  image_to_blob(gray_img, blob_data, mean, std);
 }

+
 std::vector<float>
 FacePipeline::shape_index_process(const Ort::Value &feat_val,
                                  const Ort::Value &pos_val) {
@ -497,13 +595,13 @@ FacePipeline::shape_index_process(const Ort::Value &feat_val,
  const float *feat_data = feat_val.GetTensorData<float>();
  const float *pos_data = pos_val.GetTensorData<float>();

-  long feat_n = feat_shape[0];
+  long feat_n = feat_shape[0]; 
  long feat_c = feat_shape[1];
  long feat_h = feat_shape[2];
  long feat_w = feat_shape[3];
-  long pos_n = pos_shape[0];
-  long landmark_x2 = pos_shape[1];
-  int landmark_num = landmark_x2 / 2;
+  long pos_n = pos_shape[0];          
+  long landmark_x2 = pos_shape[1];    
+  int landmark_num = landmark_x2 / 2; 

  float m_origin[] = {112.0f, 112.0f};
  float m_origin_patch[] = {15.0f, 15.0f};
@ -557,7 +655,7 @@ FacePipeline::shape_index_process(const Ort::Value &feat_val,

 bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box,
                               FaceLandmark &landmark) {
-
+  
  cv::Rect face_rect_raw(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1);
  int pad_top = std::max(0, -face_rect_raw.y);
  int pad_bottom =
@ -573,33 +671,41 @@ bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box,
                            face_rect_raw.height);
  cv::Mat face_crop = face_crop_padded(face_rect_padded);

+  
  preprocess_landmark_net1(face_crop, m_blob_buffer);
  auto input_tensor_net1 = create_tensor(m_blob_buffer, m_lm1_input_shape);

+  
  auto output_net1 = m_session_landmarker1->Run(
      Ort::RunOptions{nullptr}, m_lm1_input_names.data(), &input_tensor_net1, 1,
-      m_lm1_output_names.data(), 2);
+      m_lm1_output_names.data(), 2); 

+  
  std::vector<float> shape_index_blob =
      shape_index_process(output_net1[0], output_net1[1]);

+  
  auto input_tensor_net2 = Ort::Value::CreateTensor<float>(
      m_memory_info, shape_index_blob.data(), shape_index_blob.size(),
      m_lm2_input_shape.data(), m_lm2_input_shape.size());

+  
  auto output_net2 = m_session_landmarker2->Run(
      Ort::RunOptions{nullptr}, m_lm2_input_names.data(), &input_tensor_net2, 1,
      m_lm2_output_names.data(), 1);

+  
  const float *data_net1_pos = output_net1[1].GetTensorData<float>();
  const float *data_net2 = output_net2[0].GetTensorData<float>();
-  auto shape_net1_pos = output_net1[1].GetTensorTypeAndShapeInfo().GetShape();
+  auto shape_net1_pos =
+      output_net1[1].GetTensorTypeAndShapeInfo().GetShape(); 
  int landmark_x2 = shape_net1_pos[1];

  float scale_x = (box.x2 - box.x1) / 112.0f;
  float scale_y = (box.y2 - box.y1) / 112.0f;

  for (int i = 0; i < 5; ++i) {
+    
    float x_norm = (data_net2[i * 2 + 0] + data_net1_pos[i * 2 + 0]) * 112.0f;
    float y_norm = (data_net2[i * 2 + 1] + data_net1_pos[i * 2 + 1]) * 112.0f;

@ -613,9 +719,10 @@ bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box,
  return true;
 }

+
 cv::Mat FacePipeline::RunAlignment(const cv::Mat &image,
                                   const FaceLandmark &landmark) {
-
+  
  std::vector<cv::Point2f> src_points;
  std::vector<cv::Point2f> dst_points;

@ -625,40 +732,49 @@ cv::Mat FacePipeline::RunAlignment(const cv::Mat &image,
                                     m_landmark_template.at<float>(i, 1)));
  }

+  
+  
+  
  cv::Mat transform_matrix =
      cv::estimateAffinePartial2D(src_points, dst_points);

  cv::Mat aligned_face;
-
+  
+  
  cv::warpAffine(image, aligned_face, transform_matrix, m_align_output_size,
                 cv::INTER_LINEAR);

  return aligned_face;
 }

+
 void FacePipeline::preprocess_recognition(const cv::Mat &img,
                                          std::vector<float> &blob_data) {
  cv::Mat resized, rgb_img;

  const cv::Size target_size(248, 248);

+  
  cv::resize(img, resized, target_size);

+  
  cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB);

+  
  const float mean[3] = {0.0f, 0.0f, 0.0f};
  const float std[3] = {1.0f, 1.0f, 1.0f};
  image_to_blob(rgb_img, blob_data, mean, std);
 }

 void FacePipeline::normalize_sqrt_l2(std::vector<float> &v) {
-
+  
  double norm = 0.0;
  for (float &val : v) {
-    val = std::sqrt(std::max(0.0f, val));
+    val = std::sqrt(std::max(0.0f, val)); 
    norm += val * val;
  }

+  
  if (norm > 1e-6) {
    norm = std::sqrt(norm);
    for (float &val : v) {
@ -669,13 +785,19 @@ void FacePipeline::normalize_sqrt_l2(std::vector<float> &v) {

 bool FacePipeline::RunRecognition(const cv::Mat &aligned_face,
                                  std::vector<float> &feature) {
+  

+  
  preprocess_recognition(aligned_face, m_blob_buffer);

-  const std::vector<int64_t> hardcoded_shape = {1, 3, 248, 248};
+  
+  
+  const std::vector<int64_t> hardcoded_shape = {1, 3, 248, 248}; 

+  
  auto input_tensor = create_tensor(m_blob_buffer, hardcoded_shape);

+  
  auto output_tensors = m_session_recognizer->Run(
      Ort::RunOptions{nullptr}, m_rec_input_names.data(), &input_tensor, 1,
      m_rec_output_names.data(), 1);
@ -687,7 +809,147 @@ bool FacePipeline::RunRecognition(const cv::Mat &aligned_face,
  feature.resize(feature_dim);
  memcpy(feature.data(), output_data, feature_dim * sizeof(float));

+  
  normalize_sqrt_l2(feature);

  return true;
+}
+
+
+
+
+bool FacePipeline::CheckResolution(const cv::Mat &face_region) {
+  if (face_region.rows < m_quality_min_resolution.height ||
+      face_region.cols < m_quality_min_resolution.width) {
+    return false;
+  }
+  return true;
+}
+
+
+bool FacePipeline::CheckBrightness(const cv::Mat &face_region) {
+  cv::Mat gray;
+  if (face_region.channels() == 3)
+    cv::cvtColor(face_region, gray, cv::COLOR_BGR2GRAY);
+  else
+    gray = face_region;
+
+  float bright_value = grid_max_bright(gray, 3, 3);
+
+  
+  return (bright_value >= m_quality_bright_v1 &&
+          bright_value <= m_quality_bright_v2);
+}
+
+
+float FacePipeline::grid_max_bright(const cv::Mat &gray_img, int rows,
+                                    int cols) {
+  float max_bright = 0.0f;
+
+  
+  if (rows == 0 || cols == 0)
+    return 0.0f;
+  int row_height = gray_img.rows / rows;
+  int col_width = gray_img.cols / cols;
+  if (row_height == 0 || col_width == 0)
+    return 0.0f;
+
+  for (int y = 0; y < rows; ++y) {
+    for (int x = 0; x < cols; ++x) {
+      cv::Rect grid_rect(x * col_width, y * row_height, col_width, row_height);
+      cv::Mat grid = gray_img(grid_rect);
+      cv::Scalar mean_val = cv::mean(grid);
+      if (mean_val[0] > max_bright) {
+        max_bright = static_cast<float>(mean_val[0]);
+      }
+    }
+  }
+  return max_bright;
+}
+
+
+bool FacePipeline::CheckClarity(const cv::Mat &face_region) {
+  float clarity = clarity_estimate(face_region);
+  
+  return (clarity >= m_quality_clarity_low_thresh);
+}
+
+
+float FacePipeline::clarity_estimate(const cv::Mat &image) {
+  cv::Mat gray;
+  if (image.channels() == 3)
+    cv::cvtColor(image, gray, cv::COLOR_BGR2GRAY);
+  else
+    gray = image;
+
+  float blur_val = grid_max_reblur(gray, 2, 2);
+  float clarity = 1.0f - blur_val;
+
+  
+  return std::max(0.0f, std::min(1.0f, clarity));
+}
+
+
+float FacePipeline::grid_max_reblur(const cv::Mat &img, int rows, int cols) {
+  
+  int row_height = img.rows / rows;
+  int col_width = img.cols / cols;
+  if (row_height == 0 || col_width == 0)
+    return 1.0f; 
+
+  float max_blur_val = -FLT_MAX;
+  cv::Mat data_float;
+  img.convertTo(data_float, CV_32F); 
+
+  for (int y = 0; y < rows; ++y) {
+    for (int x = 0; x < cols; ++x) {
+      cv::Rect grid_rect(x * col_width, y * row_height, col_width, row_height);
+      if (grid_rect.width < 1 || grid_rect.height < 1)
+        continue;
+
+      float blur_val = reblur(data_float(grid_rect));
+      if (blur_val > max_blur_val) {
+        max_blur_val = blur_val;
+      }
+    }
+  }
+  return std::max(max_blur_val, 0.0f);
+}
+
+
+float FacePipeline::reblur(const cv::Mat &data) {
+  
+  if (data.rows <= 1 || data.cols <= 1)
+    return 1.0f; 
+
+  cv::Mat kernel_v = cv::Mat::ones(9, 1, CV_32F) / 9.0f; 
+  cv::Mat kernel_h = cv::Mat::ones(1, 9, CV_32F) / 9.0f; 
+  cv::Mat BVer, BHor;
+
+  cv::filter2D(data, BVer, CV_32F, kernel_v, cv::Point(-1, -1), 0,
+               cv::BORDER_REPLICATE);
+  cv::filter2D(data, BHor, CV_32F, kernel_h, cv::Point(-1, -1), 0,
+               cv::BORDER_REPLICATE);
+
+  cv::Mat D_Fver, D_BVer, D_FHor, D_BHor;
+  cv::absdiff(data.rowRange(1, data.rows), data.rowRange(0, data.rows - 1),
+              D_Fver);
+  cv::absdiff(BVer.rowRange(1, BVer.rows), BVer.rowRange(0, BVer.rows - 1),
+              D_BVer);
+  cv::absdiff(data.colRange(1, data.cols), data.colRange(0, data.cols - 1),
+              D_FHor);
+  cv::absdiff(BHor.colRange(1, BHor.cols), BHor.colRange(0, BHor.cols - 1),
+              D_BHor);
+
+  double s_FVer = cv::sum(D_Fver)[0];
+  double s_Vver = cv::sum(cv::max(0.0, D_Fver - D_BVer))[0];
+  double s_FHor = cv::sum(D_FHor)[0];
+  double s_VHor = cv::sum(cv::max(0.0, D_FHor - D_BHor))[0];
+
+  float b_FVer =
+      (s_FVer > 1e-6) ? static_cast<float>((s_FVer - s_Vver) / s_FVer) : 0.0f;
+  float b_FHor =
+      (s_FHor > 1e-6) ? static_cast<float>((s_FHor - s_VHor) / s_FHor) : 0.0f;
+
+  return std::max(b_FVer, b_FHor);
 }
--- a/src/face_pipeline.h
+++ b/src/face_pipeline.h
@ -128,6 +128,15 @@ private:
                              std::vector<float> &blob_data);
  void normalize_sqrt_l2(std::vector<float> &v);

+  bool CheckResolution(const cv::Mat &face_region);
+  bool CheckBrightness(const cv::Mat &face_region);
+  bool CheckClarity(const cv::Mat &face_region);
+
+  float grid_max_bright(const cv::Mat &gray_img, int rows, int cols);
+  float reblur(const cv::Mat &data);
+  float grid_max_reblur(const cv::Mat &img, int rows, int cols);
+  float clarity_estimate(const cv::Mat &image);
+
  void image_to_blob(const cv::Mat &img, std::vector<float> &blob,
                     const float *mean, const float *std);
  Ort::Value create_tensor(const std::vector<float> &blob_data,
@ -173,9 +182,19 @@ private:
  const float m_det_threshold = 0.35f;
  const float m_det_iou_threshold = 0.45f;
  const int m_det_topk = 300;
-  const float m_pose_threshold = 30.0f;
+
+  const float m_pose_yaw_threshold = 30.0f;
+  const float m_pose_pitch_threshold = 25.0f;
+
  const cv::Mat m_landmark_template =
      (cv::Mat_<float>(5, 2) << 89.3095f, 72.9025f, 169.3095f, 72.9025f,
       127.8949f, 127.0441f, 96.8796f, 184.8907f, 159.1065f, 184.7601f);
  const cv::Size m_align_output_size = cv::Size(256, 256);
+
+  const cv::Size m_quality_min_resolution = cv::Size(112, 112);
+
+  const float m_quality_bright_v1 = 70.0f;
+  const float m_quality_bright_v2 = 230.0f;
+
+  const float m_quality_clarity_low_thresh = 0.10f;
 };
--- a/src/face_sdk_api.cpp
+++ b/src/face_sdk_api.cpp
@ -1,52 +1,52 @@
 #include "face_sdk.h"
-#include "face_pipeline.h" // 在 .cpp 中包含实现
+#include "face_pipeline.h" 
 #include <memory>

-// --- Pimpl 实现 ---
-// 我们在 .cpp 文件中定义私有实现类
+
+
 class FaceSDK::Impl {
 public:
-    // Impl 的构造函数真正创建了 FacePipeline
+    
    Impl(const std::string& model_dir) 
        : pipeline(std::make_unique<FacePipeline>(model_dir)) 
    {
-        // 构造函数体
+        
    }

-    // 检查内部管线是否OK
+    
    bool IsInitialized() const {
        return pipeline && pipeline->IsInitialized();
    }

-    // 持有核心管线的智能指针
+    
    std::unique_ptr<FacePipeline> pipeline;
 };

-// --- FaceSDK 公共方法的实现 ---

-// 构造函数：创建 Impl 实例
+
+
 FaceSDK::FaceSDK(const std::string& model_dir) 
    : m_impl(std::make_unique<Impl>(model_dir))
 {
-    // 构造函数体
+    
 }

-// 析构函数：必须在 .cpp 中定义，因为 Impl 是不完整类型
+
 FaceSDK::~FaceSDK() = default;

-// IsInitialized 的实现
+
 bool FaceSDK::IsInitialized() const {
    if (!m_impl) return false;
    return m_impl->IsInitialized();
 }

-// Compare 的实现 (这是一个简单的辅助函数)
+
 float FaceSDK::Compare(const std::vector<float>& feat1, const std::vector<float>& feat2) {
-    // 调用我们在 face_pipeline.h 中定义的全局辅助函数
+    
    return compare_features(feat1, feat2);
 }

-// ExtractFeature 的实现
+
 SDKExtractResult FaceSDK::ExtractFeature(const cv::Mat& image) {
    if (!IsInitialized()) {
        return { SDKStatus::NOT_INITIALIZED, {}, "SDK 未初始化" };
--- a/src/face_sdk_jni.cpp
+++ b/src/face_sdk_jni.cpp
@ -2,22 +2,22 @@
 #include <string>
 #include <vector>
 #include <android/log.h>
-#include <android/bitmap.h> // 用于 Bitmap -> cv::Mat
+#include <android/bitmap.h> 

-#include "face_sdk.h"       // 我们的 C++ API
+#include "face_sdk.h"       
 #include "opencv2/opencv.hpp"

-// --- 日志宏 ---
+
 #define LOG_TAG "FaceSDK_JNI"
 #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
 #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)

-// --- 全局 SDK 实例 ---
-// 我们将 C++ SDK 实例保存在一个全局智能指针中
-// 指针 (long) 将被返回给 Java 层持有
+
+
+
 std::unique_ptr<FaceSDK> g_sdk_instance;

-// --- 辅助函数：Bitmap 转 cv::Mat ---
+
 bool ConvertBitmapToMat(JNIEnv* env, jobject j_bitmap, cv::Mat& out_mat) {
    AndroidBitmapInfo bmp_info;
    if (AndroidBitmap_getInfo(env, j_bitmap, &bmp_info) < 0) {
@ -25,7 +25,7 @@ bool ConvertBitmapToMat(JNIEnv* env, jobject j_bitmap, cv::Mat& out_mat) {
        return false;
    }

-    // 只支持 RGBA_8888
+    
    if (bmp_info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) {
        LOGE("Unsupported bitmap format. Only RGBA_8888 is supported.");
        return false;
@ -37,12 +37,12 @@ bool ConvertBitmapToMat(JNIEnv* env, jobject j_bitmap, cv::Mat& out_mat) {
        return false;
    }

-    // 创建一个 cv::Mat 来包装 Bitmap 像素
-    // 注意：这是 RGBA 格式
+    
+    
    cv::Mat tmp_mat(bmp_info.height, bmp_info.width, CV_8UC4, bmp_pixels);
    
-    // 我们的人脸管线需要 BGR 格式
-    // TODO: 确认 python 管线是否需要 RGB。cv::cvtColor更安全。
+    
+    
    cv::cvtColor(tmp_mat, out_mat, cv::COLOR_RGBA2BGR);

    AndroidBitmap_unlockPixels(env, j_bitmap);
@ -50,28 +50,22 @@ bool ConvertBitmapToMat(JNIEnv* env, jobject j_bitmap, cv::Mat& out_mat) {
 }


-// --- JNI 接口实现 ---
+

 #ifdef __cplusplus
 extern "C" {
 #endif

-// JNI 函数命名规则: Java_包名_类名_方法名
-// 【【【请将 "com_facesdk_wrapper_FaceSDKWrapper" 替换为您自己的包名和类名】】】

-/**
- * @brief 初始化 SDK
- * @param env JNIEnv
- * @param thiz Java 层的 'this'
- * @param j_model_dir (String) 包含 .onnx 文件的路径
- * @return (long) 指向 C++ FaceSDK 实例的指针。如果为 0，则初始化失败。
- */
+
+
+
 JNIEXPORT jlong JNICALL
 Java_com_facesdk_wrapper_FaceSDKWrapper_nativeInit(JNIEnv *env, jobject thiz, jstring j_model_dir) {
    const char *model_dir_cstr = env->GetStringUTFChars(j_model_dir, nullptr);
    if (model_dir_cstr == nullptr) {
        LOGE("Failed to get model dir string");
-        return 0; // 返回 0 (null)
+        return 0; 
    }
    
    std::string model_dir(model_dir_cstr);
@ -84,11 +78,11 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeInit(JNIEnv *env, jobject thiz, js
        
        if (g_sdk_instance && g_sdk_instance->IsInitialized()) {
            LOGI("SDK Initialized successfully.");
-            // 返回实例的指针地址 (转为 long)
+            
            return (jlong)g_sdk_instance.get();
        } else {
            LOGE("SDK g_sdk_instance->IsInitialized() failed.");
-            g_sdk_instance.reset(); // 释放内存
+            g_sdk_instance.reset(); 
            return 0;
        }
    } catch (const std::exception& e) {
@ -98,22 +92,14 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeInit(JNIEnv *env, jobject thiz, js
    }
 }

-/**
- * @brief 释放 SDK
- */
+
 JNIEXPORT void JNICALL
 Java_com_facesdk_wrapper_FaceSDKWrapper_nativeRelease(JNIEnv *env, jobject thiz) {
    LOGI("Releasing SDK instance.");
-    g_sdk_instance.reset(); // 释放智能指针管理的内存
+    g_sdk_instance.reset(); 
 }

-/**
- * @brief 提取特征
- * @param env JNIEnv
- * @param thiz Java 层的 'this'
- * @param j_bitmap (Bitmap) 待处理的图像
- * @return (float[]) 512维特征向量，如果失败则返回 null
- */
+
 JNIEXPORT jfloatArray JNICALL
 Java_com_facesdk_wrapper_FaceSDKWrapper_nativeExtractFeature(JNIEnv *env, jobject thiz, jobject j_bitmap) {
    if (!g_sdk_instance) {
@ -121,14 +107,14 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeExtractFeature(JNIEnv *env, jobjec
        return nullptr;
    }

-    // 1. Bitmap -> cv::Mat
+    
    cv::Mat image_bgr;
    if (!ConvertBitmapToMat(env, j_bitmap, image_bgr)) {
        LOGE("Failed to convert Bitmap to cv::Mat");
        return nullptr;
    }

-    // 2. 调用 C++ API
+    
    SDKExtractResult result = g_sdk_instance->ExtractFeature(image_bgr);

    if (result.status != SDKStatus::SUCCESS) {
@ -136,7 +122,7 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeExtractFeature(JNIEnv *env, jobjec
        return nullptr;
    }

-    // 3. std::vector<float> -> jfloatArray
+    
    jfloatArray j_feature = env->NewFloatArray(result.feature.size());
    if (j_feature == nullptr) {
        LOGE("Failed to create new jfloatArray");
@ -147,34 +133,27 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeExtractFeature(JNIEnv *env, jobjec
    return j_feature;
 }

-/**
- * @brief 比较特征
- * @param env JNIEnv
- * @param thiz Java 层的 'this'
- * @param j_feat1 (float[]) 特征1
- * @param j_feat2 (float[]) 特征2
- * @return (float) 余弦相似度
- */
+
 JNIEXPORT jfloat JNICALL
 Java_com_facesdk_wrapper_FaceSDKWrapper_nativeCompare(JNIEnv *env, jobject thiz, jfloatArray j_feat1, jfloatArray j_feat2) {
    if (!g_sdk_instance) {
        LOGE("SDK not initialized.");
-        return -2.0f; // 返回无效值
+        return -2.0f; 
    }

-    // 1. jfloatArray -> std::vector<float>
+    
    jsize len1 = env->GetArrayLength(j_feat1);
    jfloat* body1 = env->GetFloatArrayElements(j_feat1, nullptr);
    std::vector<float> feat1(body1, body1 + len1);
    env->ReleaseFloatArrayElements(j_feat1, body1, 0);

-    // 2. jfloatArray -> std::vector<float>
+    
    jsize len2 = env->GetArrayLength(j_feat2);
    jfloat* body2 = env->GetFloatArrayElements(j_feat2, nullptr);
    std::vector<float> feat2(body2, body2 + len2);
    env->ReleaseFloatArrayElements(j_feat2, body2, 0);

-    // 3. 调用 C++ API
+    
    return g_sdk_instance->Compare(feat1, feat2);
 }