增加模糊图片增强功能

2025-11-17 16:39:05 +08:00 · 2025-11-17 16:39:05 +08:00 · 54d5deb832
parent aafd81fc5f
commit 54d5deb832
2 changed files with 99 additions and 153 deletions
--- a/src/face_pipeline.cpp
+++ b/src/face_pipeline.cpp
@ -2,7 +2,6 @@
 #include <string>
 #include <vector>

-
 FacePipeline::FacePipeline(const std::string &model_dir)
    : m_env(ORT_LOGGING_LEVEL_WARNING, "FaceSDK"),
      m_memory_info(
@ -22,7 +21,6 @@ FacePipeline::FacePipeline(const std::string &model_dir)

 FacePipeline::~FacePipeline() {}

-
 bool FacePipeline::LoadModels(const std::string &model_dir) {
  auto load_session = [&](std::unique_ptr<Ort::Session> &session,
                          const std::string &model_name) {
@ -57,7 +55,6 @@ bool FacePipeline::LoadModels(const std::string &model_dir) {
  return true;
 }

-
 void FacePipeline::InitMemoryAllocators() {

  auto get_io_names = [&](Ort::Session *session,
@ -99,7 +96,6 @@ void FacePipeline::InitMemoryAllocators() {
        throw std::runtime_error("Model input shape is empty");
      }

-      
      std::string shape_str = "[";
      for (long long dim : input_shape)
        shape_str += std::to_string(dim) + ", ";
@ -113,7 +109,6 @@ void FacePipeline::InitMemoryAllocators() {
    }
  };

-  
  get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names,
               m_rot_input_shape, "Rotator");
  get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names,
@ -129,7 +124,6 @@ void FacePipeline::InitMemoryAllocators() {
  get_io_names(m_session_recognizer.get(), m_rec_input_names,
               m_rec_output_names, m_rec_input_shape, "Recognizer");

-  
  if (m_det_input_shape.size() < 4) {
    LOGE("Detector input shape has < 4 dimensions! Cannot generate anchors.");
    throw std::runtime_error("Detector input shape invalid");
@ -145,10 +139,8 @@ void FacePipeline::InitMemoryAllocators() {
  }
  generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]);

-  
  size_t max_blob_size = 0;

-  
  auto update_max = [&](const std::vector<int64_t> &shape,
                        const char *model_name) {
    if (shape.size() <= 1) {
@ -179,7 +171,6 @@ void FacePipeline::InitMemoryAllocators() {
  update_max(m_lm1_input_shape, "Landmarker1");
  update_max(m_rec_input_shape, "Recognizer");

-
  if (max_blob_size == 0) {
    LOGE(
        "Max blob size is 0, something went wrong with model shape detection!");
@ -191,7 +182,6 @@ void FacePipeline::InitMemoryAllocators() {
  LOGI("m_blob_buffer resized successfully.");
 }

-
 void FacePipeline::image_to_blob(const cv::Mat &img, std::vector<float> &blob,
                                 const float *mean, const float *std) {
  int channels = img.channels();
@ -221,8 +211,6 @@ FacePipeline::create_tensor(const std::vector<float> &blob_data,
      input_shape.data(), input_shape.size());
 }

-
-
 bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
  if (!m_initialized) {
    LOGE("Extract failed: Pipeline is not initialized.");
@ -233,8 +221,6 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
    return false;
  }

-  
-  
  int rot_angle_code = RunRotation(image);
  cv::Mat upright_image;
  if (rot_angle_code >= 0) {
@ -243,8 +229,6 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
    upright_image = image;
  }

-  
-  
  std::vector<FaceBox> boxes;
  if (!RunDetection(upright_image, boxes)) {
    LOGI("Extract failed: No face detected.");
@ -252,9 +236,6 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
  }
  FaceBox best_box = boxes[0];

-  
-  
-  
  cv::Rect face_rect_raw(best_box.x1, best_box.y1, best_box.x2 - best_box.x1,
                         best_box.y2 - best_box.y1);
  int pad_top = std::max(0, -face_rect_raw.y);
@ -273,7 +254,6 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
                            face_rect_raw.y + pad_top, face_rect_raw.width,
                            face_rect_raw.height);

-  
  if (face_rect_padded.width <= 0 || face_rect_padded.height <= 0 ||
      face_rect_padded.x < 0 || face_rect_padded.y < 0 ||
      face_rect_padded.x + face_rect_padded.width > face_crop_padded.cols ||
@ -288,33 +268,24 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
    return false;
  }

-  
-  
  FaceLandmark landmark;
  if (!RunLandmark(upright_image, best_box, landmark)) {
    LOGI("Extract failed: Landmark detection failed.");
    return false;
  }

-  
-  
  cv::Mat aligned_face = RunAlignment(upright_image, landmark);
  if (aligned_face.empty()) {
    LOGI("Extract failed: Alignment produced an empty image.");
    return false;
  }

-  
-  
  FacePose pose;
-  if (!RunPose(aligned_face, pose)) 
-  {
+  if (!RunPose(aligned_face, pose)) {
    LOGI("Extract failed: Pose estimation failed.");
    return false;
  }

-  
-  
  if (std::abs(pose.yaw) > m_pose_yaw_threshold ||
      std::abs(pose.pitch) > m_pose_pitch_threshold) {
    LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold "
@ -322,45 +293,36 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature) {
         pose.yaw, pose.pitch, m_pose_yaw_threshold, m_pose_pitch_threshold);
    return false;
  }
+  cv::Mat enhanced_face_region = PreprocessSmallFace(face_region);

-  
-  
-  if (!CheckResolution(face_region)) {
+  if (!CheckResolution(enhanced_face_region)) {
    LOGI("Extract failed: Resolution (H:%d, W:%d) below threshold (%d, %d)",
-         face_region.rows, face_region.cols, m_quality_min_resolution.height,
-         m_quality_min_resolution.width);
+         enhanced_face_region.rows, enhanced_face_region.cols,
+         m_quality_min_resolution.height, m_quality_min_resolution.width);
    return false;
  }

-  
-  
-  if (!CheckBrightness(face_region)) {
+  if (!CheckBrightness(enhanced_face_region)) {
    LOGI("Extract failed: Brightness check failed (thresholds [%.1f, %.1f]).",
         m_quality_bright_v1, m_quality_bright_v2);
    return false;
  }

-  
-  
-  if (!CheckClarity(face_region)) {
+  if (!CheckClarity(enhanced_face_region)) {
    LOGI("Extract failed: Clarity check failed (threshold [%.2f]).",
         m_quality_clarity_low_thresh);
    return false;
  }

-  
-  
  if (!RunRecognition(aligned_face, feature)) {
    LOGI("Extract failed: Feature recognition failed.");
    return false;
  }

-  
  LOGI("Extract success.");
  return true;
 }

-
 void FacePipeline::preprocess_rotation(const cv::Mat &image,
                                       std::vector<float> &blob_data) {
  cv::Mat gray_img, resized, cropped, gray_3d;
@ -371,10 +333,8 @@ void FacePipeline::preprocess_rotation(const cv::Mat &image,
  cropped = resized(crop_rect);
  cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR);

-  
  const float mean[3] = {0.0f, 0.0f, 0.0f};
-  const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f,
-                        1.0f / 255.0f}; 
+  const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
  image_to_blob(gray_3d, blob_data, mean, std);
 }

@ -390,7 +350,6 @@ int FacePipeline::RunRotation(const cv::Mat &image) {
  int max_index = std::distance(output_data,
                                std::max_element(output_data, output_data + 4));

-  
  if (max_index == 1)
    return cv::ROTATE_90_CLOCKWISE;
  if (max_index == 2)
@ -400,14 +359,12 @@ int FacePipeline::RunRotation(const cv::Mat &image) {
  return -1;
 }

-
 void FacePipeline::preprocess_detection(const cv::Mat &img,
                                        std::vector<float> &blob_data) {
  cv::Mat resized;
  cv::resize(img, resized,
             cv::Size(m_det_input_shape[3], m_det_input_shape[2]));

-  
  const float mean[3] = {104.0f, 117.0f, 123.0f};
  const float std[3] = {1.0f, 1.0f, 1.0f};
  image_to_blob(resized, blob_data, mean, std);
@ -425,10 +382,8 @@ bool FacePipeline::RunDetection(const cv::Mat &image,
      Ort::RunOptions{nullptr}, m_det_input_names.data(), &input_tensor, 1,
      m_det_output_names.data(), 2);

-  const float *bboxes_data =
-      output_tensors[0].GetTensorData<float>(); 
-  const float *probs_data =
-      output_tensors[1].GetTensorData<float>(); 
+  const float *bboxes_data = output_tensors[0].GetTensorData<float>();
+  const float *probs_data = output_tensors[1].GetTensorData<float>();
  long num_anchors =
      output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1];

@ -462,8 +417,7 @@ bool FacePipeline::RunDetection(const cv::Mat &image,
         (cx + w / 2.0f) * img_width, (cy + h / 2.0f) * img_height, conf});
  }

-  boxes = hard_nms(bbox_collection, m_det_iou_threshold,
-                   m_det_topk); 
+  boxes = hard_nms(bbox_collection, m_det_iou_threshold, m_det_topk);
  return !boxes.empty();
 }

@ -516,13 +470,9 @@ void FacePipeline::generate_anchors_faceboxes(int target_height,
  }
 }

-
 void FacePipeline::preprocess_pose(const cv::Mat &img,
                                   std::vector<float> &blob_data) {

-  
-  
-  
  float pad = 0.3f;
  int h = img.rows;
  int w = img.cols;
@ -535,11 +485,9 @@ void FacePipeline::preprocess_pose(const cv::Mat &img,
  img.copyTo(canvas(cv::Rect(nx1, ny1, w, h)));

  cv::Mat resized;
-  cv::resize(
-      canvas, resized,
+  cv::resize(canvas, resized,
             cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2]));

-  
  const float mean[3] = {127.5f, 127.5f, 127.5f};
  const float std[3] = {1.0f / 127.5f, 1.0f / 127.5f, 1.0f / 127.5f};
  image_to_blob(resized, blob_data, mean, std);
@ -549,13 +497,11 @@ bool FacePipeline::RunPose(const cv::Mat &face_input, FacePose &pose) {

  preprocess_pose(face_input, m_blob_buffer);

-  
  auto input_tensor_var = create_tensor(m_blob_buffer, m_pose_var_input_shape);
  auto output_var = m_session_pose_var->Run(
      Ort::RunOptions{nullptr}, m_pose_var_input_names.data(),
      &input_tensor_var, 1, m_pose_var_output_names.data(), 1);

-  
  auto input_tensor_conv =
      create_tensor(m_blob_buffer, m_pose_conv_input_shape);
  auto output_conv = m_session_pose_conv->Run(
@ -565,14 +511,12 @@ bool FacePipeline::RunPose(const cv::Mat &face_input, FacePose &pose) {
  const float *data_var = output_var[0].GetTensorData<float>();
  const float *data_conv = output_conv[0].GetTensorData<float>();

-  
  pose.yaw = (data_var[0] + data_conv[0]) / 2.0f;
  pose.pitch = (data_var[1] + data_conv[1]) / 2.0f;
  pose.roll = (data_var[2] + data_conv[2]) / 2.0f;
  return true;
 }

-
 void FacePipeline::preprocess_landmark_net1(const cv::Mat &img,
                                            std::vector<float> &blob_data) {
  cv::Mat resized, gray_img;
@ -580,13 +524,11 @@ void FacePipeline::preprocess_landmark_net1(const cv::Mat &img,
             cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2]));
  cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY);

-  
  const float mean[1] = {0.0f};
  const float std[1] = {1.0f};
  image_to_blob(gray_img, blob_data, mean, std);
 }

-
 std::vector<float>
 FacePipeline::shape_index_process(const Ort::Value &feat_val,
                                  const Ort::Value &pos_val) {
@ -671,34 +613,27 @@ bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box,
                            face_rect_raw.height);
  cv::Mat face_crop = face_crop_padded(face_rect_padded);

-  
  preprocess_landmark_net1(face_crop, m_blob_buffer);
  auto input_tensor_net1 = create_tensor(m_blob_buffer, m_lm1_input_shape);

-  
  auto output_net1 = m_session_landmarker1->Run(
      Ort::RunOptions{nullptr}, m_lm1_input_names.data(), &input_tensor_net1, 1,
      m_lm1_output_names.data(), 2);

-  
  std::vector<float> shape_index_blob =
      shape_index_process(output_net1[0], output_net1[1]);

-  
  auto input_tensor_net2 = Ort::Value::CreateTensor<float>(
      m_memory_info, shape_index_blob.data(), shape_index_blob.size(),
      m_lm2_input_shape.data(), m_lm2_input_shape.size());

-  
  auto output_net2 = m_session_landmarker2->Run(
      Ort::RunOptions{nullptr}, m_lm2_input_names.data(), &input_tensor_net2, 1,
      m_lm2_output_names.data(), 1);

-  
  const float *data_net1_pos = output_net1[1].GetTensorData<float>();
  const float *data_net2 = output_net2[0].GetTensorData<float>();
-  auto shape_net1_pos =
-      output_net1[1].GetTensorTypeAndShapeInfo().GetShape(); 
+  auto shape_net1_pos = output_net1[1].GetTensorTypeAndShapeInfo().GetShape();
  int landmark_x2 = shape_net1_pos[1];

  float scale_x = (box.x2 - box.x1) / 112.0f;
@ -719,7 +654,6 @@ bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box,
  return true;
 }

-
 cv::Mat FacePipeline::RunAlignment(const cv::Mat &image,
                                   const FaceLandmark &landmark) {

@ -732,35 +666,27 @@ cv::Mat FacePipeline::RunAlignment(const cv::Mat &image,
                                     m_landmark_template.at<float>(i, 1)));
  }

-  
-  
-  
  cv::Mat transform_matrix =
      cv::estimateAffinePartial2D(src_points, dst_points);

  cv::Mat aligned_face;

-  
  cv::warpAffine(image, aligned_face, transform_matrix, m_align_output_size,
                 cv::INTER_LINEAR);

  return aligned_face;
 }

-
 void FacePipeline::preprocess_recognition(const cv::Mat &img,
                                          std::vector<float> &blob_data) {
  cv::Mat resized, rgb_img;

  const cv::Size target_size(248, 248);

-  
  cv::resize(img, resized, target_size);

-  
  cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB);

-  
  const float mean[3] = {0.0f, 0.0f, 0.0f};
  const float std[3] = {1.0f, 1.0f, 1.0f};
  image_to_blob(rgb_img, blob_data, mean, std);
@ -774,7 +700,6 @@ void FacePipeline::normalize_sqrt_l2(std::vector<float> &v) {
    norm += val * val;
  }

-  
  if (norm > 1e-6) {
    norm = std::sqrt(norm);
    for (float &val : v) {
@ -786,18 +711,12 @@ void FacePipeline::normalize_sqrt_l2(std::vector<float> &v) {
 bool FacePipeline::RunRecognition(const cv::Mat &aligned_face,
                                  std::vector<float> &feature) {

-
-  
  preprocess_recognition(aligned_face, m_blob_buffer);

-  
-  
  const std::vector<int64_t> hardcoded_shape = {1, 3, 248, 248};

-  
  auto input_tensor = create_tensor(m_blob_buffer, hardcoded_shape);

-  
  auto output_tensors = m_session_recognizer->Run(
      Ort::RunOptions{nullptr}, m_rec_input_names.data(), &input_tensor, 1,
      m_rec_output_names.data(), 1);
@ -809,15 +728,11 @@ bool FacePipeline::RunRecognition(const cv::Mat &aligned_face,
  feature.resize(feature_dim);
  memcpy(feature.data(), output_data, feature_dim * sizeof(float));

-  
  normalize_sqrt_l2(feature);

  return true;
 }

-
-
-
 bool FacePipeline::CheckResolution(const cv::Mat &face_region) {
  if (face_region.rows < m_quality_min_resolution.height ||
      face_region.cols < m_quality_min_resolution.width) {
@ -826,7 +741,6 @@ bool FacePipeline::CheckResolution(const cv::Mat &face_region) {
  return true;
 }

-
 bool FacePipeline::CheckBrightness(const cv::Mat &face_region) {
  cv::Mat gray;
  if (face_region.channels() == 3)
@ -836,17 +750,14 @@ bool FacePipeline::CheckBrightness(const cv::Mat &face_region) {

  float bright_value = grid_max_bright(gray, 3, 3);

-  
  return (bright_value >= m_quality_bright_v1 &&
          bright_value <= m_quality_bright_v2);
 }

-
 float FacePipeline::grid_max_bright(const cv::Mat &gray_img, int rows,
                                    int cols) {
  float max_bright = 0.0f;

-  
  if (rows == 0 || cols == 0)
    return 0.0f;
  int row_height = gray_img.rows / rows;
@ -867,14 +778,12 @@ float FacePipeline::grid_max_bright(const cv::Mat &gray_img, int rows,
  return max_bright;
 }

-
 bool FacePipeline::CheckClarity(const cv::Mat &face_region) {
  float clarity = clarity_estimate(face_region);

  return (clarity >= m_quality_clarity_low_thresh);
 }

-
 float FacePipeline::clarity_estimate(const cv::Mat &image) {
  cv::Mat gray;
  if (image.channels() == 3)
@ -885,11 +794,9 @@ float FacePipeline::clarity_estimate(const cv::Mat &image) {
  float blur_val = grid_max_reblur(gray, 2, 2);
  float clarity = 1.0f - blur_val;

-  
  return std::max(0.0f, std::min(1.0f, clarity));
 }

-
 float FacePipeline::grid_max_reblur(const cv::Mat &img, int rows, int cols) {

  int row_height = img.rows / rows;
@ -916,7 +823,6 @@ float FacePipeline::grid_max_reblur(const cv::Mat &img, int rows, int cols) {
  return std::max(max_blur_val, 0.0f);
 }

-
 float FacePipeline::reblur(const cv::Mat &data) {

  if (data.rows <= 1 || data.cols <= 1)
@ -953,3 +859,41 @@ float FacePipeline::reblur(const cv::Mat &data) {

  return std::max(b_FVer, b_FHor);
 }
+
+cv::Mat FacePipeline::PreprocessSmallFace(const cv::Mat &face_region) {
+  int h = face_region.rows;
+  int w = face_region.cols;
+
+  if (h >= m_quality_min_resolution.height &&
+      w >= m_quality_min_resolution.width) {
+    return face_region;
+  }
+
+  LOGI("PreprocessSmallFace: Input (H:%d, W:%d) is small. Enhancing...", h, w);
+
+  float scale_w = (w < m_quality_min_resolution.width)
+                      ? (float)m_quality_min_resolution.width / w
+                      : 1.0f;
+  float scale_h = (h < m_quality_min_resolution.height)
+                      ? (float)m_quality_min_resolution.height / h
+                      : 1.0f;
+  float scale = std::max(scale_w, scale_h);
+
+  int new_width = static_cast<int>(w * scale);
+  int new_height = static_cast<int>(h * scale);
+
+  cv::Mat resized;
+
+  cv::resize(face_region, resized, cv::Size(new_width, new_height), 0, 0,
+             cv::INTER_CUBIC);
+
+  cv::Mat blurred;
+
+  cv::GaussianBlur(resized, blurred, cv::Size(0, 0), 2.0);
+
+  cv::Mat sharpened;
+
+  cv::addWeighted(resized, 1.5, blurred, -0.5, 0, sharpened);
+
+  return sharpened;
+}
--- a/src/face_pipeline.h
+++ b/src/face_pipeline.h
@ -197,4 +197,6 @@ private:
  const float m_quality_bright_v2 = 230.0f;

  const float m_quality_clarity_low_thresh = 0.10f;
+
+  cv::Mat PreprocessSmallFace(const cv::Mat &face_region);
 };