diff --git a/src/face_pipeline.cpp b/src/face_pipeline.cpp index b1dac43..b927bb0 100644 --- a/src/face_pipeline.cpp +++ b/src/face_pipeline.cpp @@ -2,11 +2,12 @@ #include #include + FacePipeline::FacePipeline(const std::string &model_dir) : m_env(ORT_LOGGING_LEVEL_WARNING, "FaceSDK"), m_memory_info( Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault)) { - m_session_options.SetIntraOpNumThreads(4); + m_session_options.SetIntraOpNumThreads(4); m_session_options.SetGraphOptimizationLevel( GraphOptimizationLevel::ORT_ENABLE_ALL); @@ -21,6 +22,7 @@ FacePipeline::FacePipeline(const std::string &model_dir) FacePipeline::~FacePipeline() {} + bool FacePipeline::LoadModels(const std::string &model_dir) { auto load_session = [&](std::unique_ptr &session, const std::string &model_name) { @@ -55,8 +57,9 @@ bool FacePipeline::LoadModels(const std::string &model_dir) { return true; } -void FacePipeline::InitMemoryAllocators() { +void FacePipeline::InitMemoryAllocators() { + auto get_io_names = [&](Ort::Session *session, std::vector &input_names, std::vector &output_names, @@ -96,6 +99,7 @@ void FacePipeline::InitMemoryAllocators() { throw std::runtime_error("Model input shape is empty"); } + std::string shape_str = "["; for (long long dim : input_shape) shape_str += std::to_string(dim) + ", "; @@ -103,12 +107,13 @@ void FacePipeline::InitMemoryAllocators() { LOGI("Model %s input shape: %s", model_name, shape_str.c_str()); if (input_shape[0] < 1) - input_shape[0] = 1; + input_shape[0] = 1; } else { LOGE("Model %s has no inputs!", model_name); } }; + get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names, m_rot_input_shape, "Rotator"); get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names, @@ -124,38 +129,41 @@ void FacePipeline::InitMemoryAllocators() { get_io_names(m_session_recognizer.get(), m_rec_input_names, m_rec_output_names, m_rec_input_shape, "Recognizer"); + if (m_det_input_shape.size() < 4) { LOGE("Detector input shape has < 4 dimensions! Cannot generate anchors."); throw std::runtime_error("Detector input shape invalid"); } - + if (m_det_input_shape[2] < 0 || m_det_input_shape[3] < 0) { LOGE("Detector input shape is dynamic (H/W is -1). This is not supported " "by the Python logic."); - + LOGI("Forcing detector H/W to 640x640."); m_det_input_shape[2] = 640; m_det_input_shape[3] = 640; } generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]); + size_t max_blob_size = 0; + auto update_max = [&](const std::vector &shape, const char *model_name) { if (shape.size() <= 1) { - return; + return; } size_t s = 1; - + for (size_t i = 1; i < shape.size(); ++i) { if (shape[i] < 0) { - + LOGE("Model %s has dynamic dimension at index %zu. Skipping for " "max_blob_size calculation.", model_name, i); - return; + return; } s *= static_cast(shape[i]); } @@ -170,6 +178,7 @@ void FacePipeline::InitMemoryAllocators() { update_max(m_pose_var_input_shape, "PoseVar"); update_max(m_lm1_input_shape, "Landmarker1"); update_max(m_rec_input_shape, "Recognizer"); + if (max_blob_size == 0) { LOGE( @@ -182,6 +191,7 @@ void FacePipeline::InitMemoryAllocators() { LOGI("m_blob_buffer resized successfully."); } + void FacePipeline::image_to_blob(const cv::Mat &img, std::vector &blob, const float *mean, const float *std) { int channels = img.channels(); @@ -211,6 +221,8 @@ FacePipeline::create_tensor(const std::vector &blob_data, input_shape.data(), input_shape.size()); } + + bool FacePipeline::Extract(const cv::Mat &image, std::vector &feature) { if (!m_initialized) { LOGE("Extract failed: Pipeline is not initialized."); @@ -221,6 +233,8 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector &feature) { return false; } + + int rot_angle_code = RunRotation(image); cv::Mat upright_image; if (rot_angle_code >= 0) { @@ -229,14 +243,18 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector &feature) { upright_image = image; } + + std::vector boxes; if (!RunDetection(upright_image, boxes)) { LOGI("Extract failed: No face detected."); return false; } - FaceBox best_box = boxes[0]; + + + cv::Rect face_rect_raw(best_box.x1, best_box.y1, best_box.x2 - best_box.x1, best_box.y2 - best_box.y1); int pad_top = std::max(0, -face_rect_raw.y); @@ -250,41 +268,99 @@ bool FacePipeline::Extract(const cv::Mat &image, std::vector &feature) { cv::copyMakeBorder(upright_image, face_crop_padded, pad_top, pad_bottom, pad_left, pad_right, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + cv::Rect face_rect_padded(face_rect_raw.x + pad_left, face_rect_raw.y + pad_top, face_rect_raw.width, face_rect_raw.height); - cv::Mat face_crop = face_crop_padded(face_rect_padded); - FacePose pose; - if (!RunPose(face_crop, pose)) { - LOGI("Extract failed: Pose estimation failed."); + + if (face_rect_padded.width <= 0 || face_rect_padded.height <= 0 || + face_rect_padded.x < 0 || face_rect_padded.y < 0 || + face_rect_padded.x + face_rect_padded.width > face_crop_padded.cols || + face_rect_padded.y + face_rect_padded.height > face_crop_padded.rows) { + LOGE("Extract failed: Invalid face crop rectangle after padding."); return false; } - if (std::abs(pose.yaw) > m_pose_threshold || - std::abs(pose.pitch) > m_pose_threshold) { - LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold (%.1f)", - pose.yaw, pose.pitch, m_pose_threshold); + cv::Mat face_region = face_crop_padded(face_rect_padded); + if (face_region.empty()) { + LOGI("Extract failed: face_region is empty after cropping."); return false; } + + FaceLandmark landmark; if (!RunLandmark(upright_image, best_box, landmark)) { LOGI("Extract failed: Landmark detection failed."); return false; } + + cv::Mat aligned_face = RunAlignment(upright_image, landmark); + if (aligned_face.empty()) { + LOGI("Extract failed: Alignment produced an empty image."); + return false; + } + + + FacePose pose; + if (!RunPose(aligned_face, pose)) + { + LOGI("Extract failed: Pose estimation failed."); + return false; + } + + + + if (std::abs(pose.yaw) > m_pose_yaw_threshold || + std::abs(pose.pitch) > m_pose_pitch_threshold) { + LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold " + "(Y:%.1f, P:%.1f)", + pose.yaw, pose.pitch, m_pose_yaw_threshold, m_pose_pitch_threshold); + return false; + } + + + + if (!CheckResolution(face_region)) { + LOGI("Extract failed: Resolution (H:%d, W:%d) below threshold (%d, %d)", + face_region.rows, face_region.cols, m_quality_min_resolution.height, + m_quality_min_resolution.width); + return false; + } + + + + if (!CheckBrightness(face_region)) { + LOGI("Extract failed: Brightness check failed (thresholds [%.1f, %.1f]).", + m_quality_bright_v1, m_quality_bright_v2); + return false; + } + + + + if (!CheckClarity(face_region)) { + LOGI("Extract failed: Clarity check failed (threshold [%.2f]).", + m_quality_clarity_low_thresh); + return false; + } + + + if (!RunRecognition(aligned_face, feature)) { LOGI("Extract failed: Feature recognition failed."); return false; } + LOGI("Extract success."); return true; } + void FacePipeline::preprocess_rotation(const cv::Mat &image, std::vector &blob_data) { cv::Mat gray_img, resized, cropped, gray_3d; @@ -293,10 +369,12 @@ void FacePipeline::preprocess_rotation(const cv::Mat &image, int start = (256 - 224) / 2; cv::Rect crop_rect(start, start, 224, 224); cropped = resized(crop_rect); - cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR); + cv::cvtColor(cropped, gray_3d, cv::COLOR_GRAY2BGR); + const float mean[3] = {0.0f, 0.0f, 0.0f}; - const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; + const float std[3] = {1.0f / 255.0f, 1.0f / 255.0f, + 1.0f / 255.0f}; image_to_blob(gray_3d, blob_data, mean, std); } @@ -312,6 +390,7 @@ int FacePipeline::RunRotation(const cv::Mat &image) { int max_index = std::distance(output_data, std::max_element(output_data, output_data + 4)); + if (max_index == 1) return cv::ROTATE_90_CLOCKWISE; if (max_index == 2) @@ -321,13 +400,15 @@ int FacePipeline::RunRotation(const cv::Mat &image) { return -1; } + void FacePipeline::preprocess_detection(const cv::Mat &img, std::vector &blob_data) { cv::Mat resized; cv::resize(img, resized, - cv::Size(m_det_input_shape[3], m_det_input_shape[2])); + cv::Size(m_det_input_shape[3], m_det_input_shape[2])); - const float mean[3] = {104.0f, 117.0f, 123.0f}; + + const float mean[3] = {104.0f, 117.0f, 123.0f}; const float std[3] = {1.0f, 1.0f, 1.0f}; image_to_blob(resized, blob_data, mean, std); } @@ -342,10 +423,12 @@ bool FacePipeline::RunDetection(const cv::Mat &image, auto output_tensors = m_session_detector->Run( Ort::RunOptions{nullptr}, m_det_input_names.data(), &input_tensor, 1, - m_det_output_names.data(), 2); + m_det_output_names.data(), 2); - const float *bboxes_data = output_tensors[0].GetTensorData(); - const float *probs_data = output_tensors[1].GetTensorData(); + const float *bboxes_data = + output_tensors[0].GetTensorData(); + const float *probs_data = + output_tensors[1].GetTensorData(); long num_anchors = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1]; @@ -356,10 +439,10 @@ bool FacePipeline::RunDetection(const cv::Mat &image, } std::vector bbox_collection; - const float variance[2] = {0.1f, 0.2f}; + const float variance[2] = {0.1f, 0.2f}; for (long i = 0; i < num_anchors; ++i) { - float conf = probs_data[i * 2 + 1]; + float conf = probs_data[i * 2 + 1]; if (conf < m_det_threshold) continue; @@ -369,23 +452,24 @@ bool FacePipeline::RunDetection(const cv::Mat &image, float dw = bboxes_data[i * 4 + 2]; float dh = bboxes_data[i * 4 + 3]; - float cx = anchor.cx + dx * variance[0] * anchor.s_kx; - float cy = anchor.cy + dy * variance[0] * anchor.s_ky; - float w = anchor.s_kx * std::exp(dw * variance[1]); - float h = anchor.s_ky * std::exp(dh * variance[1]); + float cx = anchor.cx + dx * variance[0] * anchor.s_kx; + float cy = anchor.cy + dy * variance[0] * anchor.s_ky; + float w = anchor.s_kx * std::exp(dw * variance[1]); + float h = anchor.s_ky * std::exp(dh * variance[1]); bbox_collection.push_back( {(cx - w / 2.0f) * img_width, (cy - h / 2.0f) * img_height, (cx + w / 2.0f) * img_width, (cy + h / 2.0f) * img_height, conf}); } - boxes = hard_nms(bbox_collection, m_det_iou_threshold, m_det_topk); + boxes = hard_nms(bbox_collection, m_det_iou_threshold, + m_det_topk); return !boxes.empty(); } void FacePipeline::generate_anchors_faceboxes(int target_height, int target_width) { - + m_anchors.clear(); std::vector steps = {32, 64, 128}; std::vector> min_sizes = {{32, 64, 128}, {256}, {512}}; @@ -432,8 +516,13 @@ void FacePipeline::generate_anchors_faceboxes(int target_height, } } + void FacePipeline::preprocess_pose(const cv::Mat &img, std::vector &blob_data) { + + + + float pad = 0.3f; int h = img.rows; int w = img.cols; @@ -446,22 +535,27 @@ void FacePipeline::preprocess_pose(const cv::Mat &img, img.copyTo(canvas(cv::Rect(nx1, ny1, w, h))); cv::Mat resized; - cv::resize(canvas, resized, - cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2])); + cv::resize( + canvas, resized, + cv::Size(m_pose_var_input_shape[3], m_pose_var_input_shape[2])); + const float mean[3] = {127.5f, 127.5f, 127.5f}; const float std[3] = {1.0f / 127.5f, 1.0f / 127.5f, 1.0f / 127.5f}; image_to_blob(resized, blob_data, mean, std); } -bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose) { - preprocess_pose(face_crop, m_blob_buffer); +bool FacePipeline::RunPose(const cv::Mat &face_input, FacePose &pose) { + + preprocess_pose(face_input, m_blob_buffer); + auto input_tensor_var = create_tensor(m_blob_buffer, m_pose_var_input_shape); auto output_var = m_session_pose_var->Run( Ort::RunOptions{nullptr}, m_pose_var_input_names.data(), &input_tensor_var, 1, m_pose_var_output_names.data(), 1); + auto input_tensor_conv = create_tensor(m_blob_buffer, m_pose_conv_input_shape); auto output_conv = m_session_pose_conv->Run( @@ -471,24 +565,28 @@ bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose) { const float *data_var = output_var[0].GetTensorData(); const float *data_conv = output_conv[0].GetTensorData(); + pose.yaw = (data_var[0] + data_conv[0]) / 2.0f; pose.pitch = (data_var[1] + data_conv[1]) / 2.0f; pose.roll = (data_var[2] + data_conv[2]) / 2.0f; return true; } + void FacePipeline::preprocess_landmark_net1(const cv::Mat &img, std::vector &blob_data) { cv::Mat resized, gray_img; cv::resize(img, resized, - cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2])); - cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY); + cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2])); + cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY); + const float mean[1] = {0.0f}; const float std[1] = {1.0f}; image_to_blob(gray_img, blob_data, mean, std); } + std::vector FacePipeline::shape_index_process(const Ort::Value &feat_val, const Ort::Value &pos_val) { @@ -497,13 +595,13 @@ FacePipeline::shape_index_process(const Ort::Value &feat_val, const float *feat_data = feat_val.GetTensorData(); const float *pos_data = pos_val.GetTensorData(); - long feat_n = feat_shape[0]; + long feat_n = feat_shape[0]; long feat_c = feat_shape[1]; long feat_h = feat_shape[2]; long feat_w = feat_shape[3]; - long pos_n = pos_shape[0]; - long landmark_x2 = pos_shape[1]; - int landmark_num = landmark_x2 / 2; + long pos_n = pos_shape[0]; + long landmark_x2 = pos_shape[1]; + int landmark_num = landmark_x2 / 2; float m_origin[] = {112.0f, 112.0f}; float m_origin_patch[] = {15.0f, 15.0f}; @@ -557,7 +655,7 @@ FacePipeline::shape_index_process(const Ort::Value &feat_val, bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box, FaceLandmark &landmark) { - + cv::Rect face_rect_raw(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1); int pad_top = std::max(0, -face_rect_raw.y); int pad_bottom = @@ -573,33 +671,41 @@ bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box, face_rect_raw.height); cv::Mat face_crop = face_crop_padded(face_rect_padded); + preprocess_landmark_net1(face_crop, m_blob_buffer); auto input_tensor_net1 = create_tensor(m_blob_buffer, m_lm1_input_shape); + auto output_net1 = m_session_landmarker1->Run( Ort::RunOptions{nullptr}, m_lm1_input_names.data(), &input_tensor_net1, 1, - m_lm1_output_names.data(), 2); + m_lm1_output_names.data(), 2); + std::vector shape_index_blob = shape_index_process(output_net1[0], output_net1[1]); + auto input_tensor_net2 = Ort::Value::CreateTensor( m_memory_info, shape_index_blob.data(), shape_index_blob.size(), m_lm2_input_shape.data(), m_lm2_input_shape.size()); + auto output_net2 = m_session_landmarker2->Run( Ort::RunOptions{nullptr}, m_lm2_input_names.data(), &input_tensor_net2, 1, m_lm2_output_names.data(), 1); + const float *data_net1_pos = output_net1[1].GetTensorData(); const float *data_net2 = output_net2[0].GetTensorData(); - auto shape_net1_pos = output_net1[1].GetTensorTypeAndShapeInfo().GetShape(); + auto shape_net1_pos = + output_net1[1].GetTensorTypeAndShapeInfo().GetShape(); int landmark_x2 = shape_net1_pos[1]; float scale_x = (box.x2 - box.x1) / 112.0f; float scale_y = (box.y2 - box.y1) / 112.0f; for (int i = 0; i < 5; ++i) { + float x_norm = (data_net2[i * 2 + 0] + data_net1_pos[i * 2 + 0]) * 112.0f; float y_norm = (data_net2[i * 2 + 1] + data_net1_pos[i * 2 + 1]) * 112.0f; @@ -613,9 +719,10 @@ bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box, return true; } + cv::Mat FacePipeline::RunAlignment(const cv::Mat &image, const FaceLandmark &landmark) { - + std::vector src_points; std::vector dst_points; @@ -625,40 +732,49 @@ cv::Mat FacePipeline::RunAlignment(const cv::Mat &image, m_landmark_template.at(i, 1))); } + + + cv::Mat transform_matrix = cv::estimateAffinePartial2D(src_points, dst_points); cv::Mat aligned_face; - + + cv::warpAffine(image, aligned_face, transform_matrix, m_align_output_size, cv::INTER_LINEAR); return aligned_face; } + void FacePipeline::preprocess_recognition(const cv::Mat &img, std::vector &blob_data) { cv::Mat resized, rgb_img; const cv::Size target_size(248, 248); + cv::resize(img, resized, target_size); + cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB); + const float mean[3] = {0.0f, 0.0f, 0.0f}; const float std[3] = {1.0f, 1.0f, 1.0f}; image_to_blob(rgb_img, blob_data, mean, std); } void FacePipeline::normalize_sqrt_l2(std::vector &v) { - + double norm = 0.0; for (float &val : v) { - val = std::sqrt(std::max(0.0f, val)); + val = std::sqrt(std::max(0.0f, val)); norm += val * val; } + if (norm > 1e-6) { norm = std::sqrt(norm); for (float &val : v) { @@ -669,13 +785,19 @@ void FacePipeline::normalize_sqrt_l2(std::vector &v) { bool FacePipeline::RunRecognition(const cv::Mat &aligned_face, std::vector &feature) { + + preprocess_recognition(aligned_face, m_blob_buffer); - const std::vector hardcoded_shape = {1, 3, 248, 248}; + + + const std::vector hardcoded_shape = {1, 3, 248, 248}; + auto input_tensor = create_tensor(m_blob_buffer, hardcoded_shape); + auto output_tensors = m_session_recognizer->Run( Ort::RunOptions{nullptr}, m_rec_input_names.data(), &input_tensor, 1, m_rec_output_names.data(), 1); @@ -687,7 +809,147 @@ bool FacePipeline::RunRecognition(const cv::Mat &aligned_face, feature.resize(feature_dim); memcpy(feature.data(), output_data, feature_dim * sizeof(float)); + normalize_sqrt_l2(feature); return true; +} + + + + +bool FacePipeline::CheckResolution(const cv::Mat &face_region) { + if (face_region.rows < m_quality_min_resolution.height || + face_region.cols < m_quality_min_resolution.width) { + return false; + } + return true; +} + + +bool FacePipeline::CheckBrightness(const cv::Mat &face_region) { + cv::Mat gray; + if (face_region.channels() == 3) + cv::cvtColor(face_region, gray, cv::COLOR_BGR2GRAY); + else + gray = face_region; + + float bright_value = grid_max_bright(gray, 3, 3); + + + return (bright_value >= m_quality_bright_v1 && + bright_value <= m_quality_bright_v2); +} + + +float FacePipeline::grid_max_bright(const cv::Mat &gray_img, int rows, + int cols) { + float max_bright = 0.0f; + + + if (rows == 0 || cols == 0) + return 0.0f; + int row_height = gray_img.rows / rows; + int col_width = gray_img.cols / cols; + if (row_height == 0 || col_width == 0) + return 0.0f; + + for (int y = 0; y < rows; ++y) { + for (int x = 0; x < cols; ++x) { + cv::Rect grid_rect(x * col_width, y * row_height, col_width, row_height); + cv::Mat grid = gray_img(grid_rect); + cv::Scalar mean_val = cv::mean(grid); + if (mean_val[0] > max_bright) { + max_bright = static_cast(mean_val[0]); + } + } + } + return max_bright; +} + + +bool FacePipeline::CheckClarity(const cv::Mat &face_region) { + float clarity = clarity_estimate(face_region); + + return (clarity >= m_quality_clarity_low_thresh); +} + + +float FacePipeline::clarity_estimate(const cv::Mat &image) { + cv::Mat gray; + if (image.channels() == 3) + cv::cvtColor(image, gray, cv::COLOR_BGR2GRAY); + else + gray = image; + + float blur_val = grid_max_reblur(gray, 2, 2); + float clarity = 1.0f - blur_val; + + + return std::max(0.0f, std::min(1.0f, clarity)); +} + + +float FacePipeline::grid_max_reblur(const cv::Mat &img, int rows, int cols) { + + int row_height = img.rows / rows; + int col_width = img.cols / cols; + if (row_height == 0 || col_width == 0) + return 1.0f; + + float max_blur_val = -FLT_MAX; + cv::Mat data_float; + img.convertTo(data_float, CV_32F); + + for (int y = 0; y < rows; ++y) { + for (int x = 0; x < cols; ++x) { + cv::Rect grid_rect(x * col_width, y * row_height, col_width, row_height); + if (grid_rect.width < 1 || grid_rect.height < 1) + continue; + + float blur_val = reblur(data_float(grid_rect)); + if (blur_val > max_blur_val) { + max_blur_val = blur_val; + } + } + } + return std::max(max_blur_val, 0.0f); +} + + +float FacePipeline::reblur(const cv::Mat &data) { + + if (data.rows <= 1 || data.cols <= 1) + return 1.0f; + + cv::Mat kernel_v = cv::Mat::ones(9, 1, CV_32F) / 9.0f; + cv::Mat kernel_h = cv::Mat::ones(1, 9, CV_32F) / 9.0f; + cv::Mat BVer, BHor; + + cv::filter2D(data, BVer, CV_32F, kernel_v, cv::Point(-1, -1), 0, + cv::BORDER_REPLICATE); + cv::filter2D(data, BHor, CV_32F, kernel_h, cv::Point(-1, -1), 0, + cv::BORDER_REPLICATE); + + cv::Mat D_Fver, D_BVer, D_FHor, D_BHor; + cv::absdiff(data.rowRange(1, data.rows), data.rowRange(0, data.rows - 1), + D_Fver); + cv::absdiff(BVer.rowRange(1, BVer.rows), BVer.rowRange(0, BVer.rows - 1), + D_BVer); + cv::absdiff(data.colRange(1, data.cols), data.colRange(0, data.cols - 1), + D_FHor); + cv::absdiff(BHor.colRange(1, BHor.cols), BHor.colRange(0, BHor.cols - 1), + D_BHor); + + double s_FVer = cv::sum(D_Fver)[0]; + double s_Vver = cv::sum(cv::max(0.0, D_Fver - D_BVer))[0]; + double s_FHor = cv::sum(D_FHor)[0]; + double s_VHor = cv::sum(cv::max(0.0, D_FHor - D_BHor))[0]; + + float b_FVer = + (s_FVer > 1e-6) ? static_cast((s_FVer - s_Vver) / s_FVer) : 0.0f; + float b_FHor = + (s_FHor > 1e-6) ? static_cast((s_FHor - s_VHor) / s_FHor) : 0.0f; + + return std::max(b_FVer, b_FHor); } \ No newline at end of file diff --git a/src/face_pipeline.h b/src/face_pipeline.h index 4a3390c..87ef5df 100644 --- a/src/face_pipeline.h +++ b/src/face_pipeline.h @@ -128,6 +128,15 @@ private: std::vector &blob_data); void normalize_sqrt_l2(std::vector &v); + bool CheckResolution(const cv::Mat &face_region); + bool CheckBrightness(const cv::Mat &face_region); + bool CheckClarity(const cv::Mat &face_region); + + float grid_max_bright(const cv::Mat &gray_img, int rows, int cols); + float reblur(const cv::Mat &data); + float grid_max_reblur(const cv::Mat &img, int rows, int cols); + float clarity_estimate(const cv::Mat &image); + void image_to_blob(const cv::Mat &img, std::vector &blob, const float *mean, const float *std); Ort::Value create_tensor(const std::vector &blob_data, @@ -173,9 +182,19 @@ private: const float m_det_threshold = 0.35f; const float m_det_iou_threshold = 0.45f; const int m_det_topk = 300; - const float m_pose_threshold = 30.0f; + + const float m_pose_yaw_threshold = 30.0f; + const float m_pose_pitch_threshold = 25.0f; + const cv::Mat m_landmark_template = (cv::Mat_(5, 2) << 89.3095f, 72.9025f, 169.3095f, 72.9025f, 127.8949f, 127.0441f, 96.8796f, 184.8907f, 159.1065f, 184.7601f); const cv::Size m_align_output_size = cv::Size(256, 256); + + const cv::Size m_quality_min_resolution = cv::Size(112, 112); + + const float m_quality_bright_v1 = 70.0f; + const float m_quality_bright_v2 = 230.0f; + + const float m_quality_clarity_low_thresh = 0.10f; }; \ No newline at end of file diff --git a/src/face_sdk_api.cpp b/src/face_sdk_api.cpp index 991699b..848d8ce 100644 --- a/src/face_sdk_api.cpp +++ b/src/face_sdk_api.cpp @@ -1,52 +1,52 @@ #include "face_sdk.h" -#include "face_pipeline.h" // 在 .cpp 中包含实现 +#include "face_pipeline.h" #include -// --- Pimpl 实现 --- -// 我们在 .cpp 文件中定义私有实现类 + + class FaceSDK::Impl { public: - // Impl 的构造函数真正创建了 FacePipeline + Impl(const std::string& model_dir) : pipeline(std::make_unique(model_dir)) { - // 构造函数体 + } - // 检查内部管线是否OK + bool IsInitialized() const { return pipeline && pipeline->IsInitialized(); } - // 持有核心管线的智能指针 + std::unique_ptr pipeline; }; -// --- FaceSDK 公共方法的实现 --- -// 构造函数:创建 Impl 实例 + + FaceSDK::FaceSDK(const std::string& model_dir) : m_impl(std::make_unique(model_dir)) { - // 构造函数体 + } -// 析构函数:必须在 .cpp 中定义,因为 Impl 是不完整类型 + FaceSDK::~FaceSDK() = default; -// IsInitialized 的实现 + bool FaceSDK::IsInitialized() const { if (!m_impl) return false; return m_impl->IsInitialized(); } -// Compare 的实现 (这是一个简单的辅助函数) + float FaceSDK::Compare(const std::vector& feat1, const std::vector& feat2) { - // 调用我们在 face_pipeline.h 中定义的全局辅助函数 + return compare_features(feat1, feat2); } -// ExtractFeature 的实现 + SDKExtractResult FaceSDK::ExtractFeature(const cv::Mat& image) { if (!IsInitialized()) { return { SDKStatus::NOT_INITIALIZED, {}, "SDK 未初始化" }; diff --git a/src/face_sdk_jni.cpp b/src/face_sdk_jni.cpp index dc7ff56..dc18916 100644 --- a/src/face_sdk_jni.cpp +++ b/src/face_sdk_jni.cpp @@ -2,22 +2,22 @@ #include #include #include -#include // 用于 Bitmap -> cv::Mat +#include -#include "face_sdk.h" // 我们的 C++ API +#include "face_sdk.h" #include "opencv2/opencv.hpp" -// --- 日志宏 --- + #define LOG_TAG "FaceSDK_JNI" #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__) -// --- 全局 SDK 实例 --- -// 我们将 C++ SDK 实例保存在一个全局智能指针中 -// 指针 (long) 将被返回给 Java 层持有 + + + std::unique_ptr g_sdk_instance; -// --- 辅助函数:Bitmap 转 cv::Mat --- + bool ConvertBitmapToMat(JNIEnv* env, jobject j_bitmap, cv::Mat& out_mat) { AndroidBitmapInfo bmp_info; if (AndroidBitmap_getInfo(env, j_bitmap, &bmp_info) < 0) { @@ -25,7 +25,7 @@ bool ConvertBitmapToMat(JNIEnv* env, jobject j_bitmap, cv::Mat& out_mat) { return false; } - // 只支持 RGBA_8888 + if (bmp_info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) { LOGE("Unsupported bitmap format. Only RGBA_8888 is supported."); return false; @@ -37,12 +37,12 @@ bool ConvertBitmapToMat(JNIEnv* env, jobject j_bitmap, cv::Mat& out_mat) { return false; } - // 创建一个 cv::Mat 来包装 Bitmap 像素 - // 注意:这是 RGBA 格式 + + cv::Mat tmp_mat(bmp_info.height, bmp_info.width, CV_8UC4, bmp_pixels); - // 我们的人脸管线需要 BGR 格式 - // TODO: 确认 python 管线是否需要 RGB。cv::cvtColor更安全。 + + cv::cvtColor(tmp_mat, out_mat, cv::COLOR_RGBA2BGR); AndroidBitmap_unlockPixels(env, j_bitmap); @@ -50,28 +50,22 @@ bool ConvertBitmapToMat(JNIEnv* env, jobject j_bitmap, cv::Mat& out_mat) { } -// --- JNI 接口实现 --- + #ifdef __cplusplus extern "C" { #endif -// JNI 函数命名规则: Java_包名_类名_方法名 -// 【【【请将 "com_facesdk_wrapper_FaceSDKWrapper" 替换为您自己的包名和类名】】】 -/** - * @brief 初始化 SDK - * @param env JNIEnv - * @param thiz Java 层的 'this' - * @param j_model_dir (String) 包含 .onnx 文件的路径 - * @return (long) 指向 C++ FaceSDK 实例的指针。如果为 0,则初始化失败。 - */ + + + JNIEXPORT jlong JNICALL Java_com_facesdk_wrapper_FaceSDKWrapper_nativeInit(JNIEnv *env, jobject thiz, jstring j_model_dir) { const char *model_dir_cstr = env->GetStringUTFChars(j_model_dir, nullptr); if (model_dir_cstr == nullptr) { LOGE("Failed to get model dir string"); - return 0; // 返回 0 (null) + return 0; } std::string model_dir(model_dir_cstr); @@ -84,11 +78,11 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeInit(JNIEnv *env, jobject thiz, js if (g_sdk_instance && g_sdk_instance->IsInitialized()) { LOGI("SDK Initialized successfully."); - // 返回实例的指针地址 (转为 long) + return (jlong)g_sdk_instance.get(); } else { LOGE("SDK g_sdk_instance->IsInitialized() failed."); - g_sdk_instance.reset(); // 释放内存 + g_sdk_instance.reset(); return 0; } } catch (const std::exception& e) { @@ -98,22 +92,14 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeInit(JNIEnv *env, jobject thiz, js } } -/** - * @brief 释放 SDK - */ + JNIEXPORT void JNICALL Java_com_facesdk_wrapper_FaceSDKWrapper_nativeRelease(JNIEnv *env, jobject thiz) { LOGI("Releasing SDK instance."); - g_sdk_instance.reset(); // 释放智能指针管理的内存 + g_sdk_instance.reset(); } -/** - * @brief 提取特征 - * @param env JNIEnv - * @param thiz Java 层的 'this' - * @param j_bitmap (Bitmap) 待处理的图像 - * @return (float[]) 512维特征向量,如果失败则返回 null - */ + JNIEXPORT jfloatArray JNICALL Java_com_facesdk_wrapper_FaceSDKWrapper_nativeExtractFeature(JNIEnv *env, jobject thiz, jobject j_bitmap) { if (!g_sdk_instance) { @@ -121,14 +107,14 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeExtractFeature(JNIEnv *env, jobjec return nullptr; } - // 1. Bitmap -> cv::Mat + cv::Mat image_bgr; if (!ConvertBitmapToMat(env, j_bitmap, image_bgr)) { LOGE("Failed to convert Bitmap to cv::Mat"); return nullptr; } - // 2. 调用 C++ API + SDKExtractResult result = g_sdk_instance->ExtractFeature(image_bgr); if (result.status != SDKStatus::SUCCESS) { @@ -136,7 +122,7 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeExtractFeature(JNIEnv *env, jobjec return nullptr; } - // 3. std::vector -> jfloatArray + jfloatArray j_feature = env->NewFloatArray(result.feature.size()); if (j_feature == nullptr) { LOGE("Failed to create new jfloatArray"); @@ -147,34 +133,27 @@ Java_com_facesdk_wrapper_FaceSDKWrapper_nativeExtractFeature(JNIEnv *env, jobjec return j_feature; } -/** - * @brief 比较特征 - * @param env JNIEnv - * @param thiz Java 层的 'this' - * @param j_feat1 (float[]) 特征1 - * @param j_feat2 (float[]) 特征2 - * @return (float) 余弦相似度 - */ + JNIEXPORT jfloat JNICALL Java_com_facesdk_wrapper_FaceSDKWrapper_nativeCompare(JNIEnv *env, jobject thiz, jfloatArray j_feat1, jfloatArray j_feat2) { if (!g_sdk_instance) { LOGE("SDK not initialized."); - return -2.0f; // 返回无效值 + return -2.0f; } - // 1. jfloatArray -> std::vector + jsize len1 = env->GetArrayLength(j_feat1); jfloat* body1 = env->GetFloatArrayElements(j_feat1, nullptr); std::vector feat1(body1, body1 + len1); env->ReleaseFloatArrayElements(j_feat1, body1, 0); - // 2. jfloatArray -> std::vector + jsize len2 = env->GetArrayLength(j_feat2); jfloat* body2 = env->GetFloatArrayElements(j_feat2, nullptr); std::vector feat2(body2, body2 + len2); env->ReleaseFloatArrayElements(j_feat2, body2, 0); - // 3. 调用 C++ API + return g_sdk_instance->Compare(feat1, feat2); }