完成人脸识别安卓SDK开发
This commit is contained in:
parent
9f87f90af9
commit
8ba6a046ff
|
|
@ -11,10 +11,13 @@ FacePipeline::FacePipeline(const std::string& model_dir)
|
|||
m_session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
|
||||
|
||||
m_initialized = LoadModels(model_dir);
|
||||
if (m_initialized) {
|
||||
if (m_initialized)
|
||||
{
|
||||
InitMemoryAllocators();
|
||||
LOGI("FacePipeline initialized successfully.");
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGE("FacePipeline initialization failed.");
|
||||
}
|
||||
}
|
||||
|
|
@ -22,96 +25,203 @@ FacePipeline::FacePipeline(const std::string& model_dir)
|
|||
FacePipeline::~FacePipeline() {}
|
||||
|
||||
// (私有) 加载所有模型
|
||||
bool FacePipeline::LoadModels(const std::string& model_dir) {
|
||||
auto load_session = [&](std::unique_ptr<Ort::Session>& session, const std::string& model_name) {
|
||||
bool FacePipeline::LoadModels(const std::string &model_dir)
|
||||
{
|
||||
auto load_session = [&](std::unique_ptr<Ort::Session> &session, const std::string &model_name)
|
||||
{
|
||||
std::string model_path = model_dir + "/" + model_name;
|
||||
try {
|
||||
try
|
||||
{
|
||||
session = std::make_unique<Ort::Session>(m_env, model_path.c_str(), m_session_options);
|
||||
LOGI("Loaded model: %s", model_path.c_str());
|
||||
} catch (const Ort::Exception& e) {
|
||||
}
|
||||
catch (const Ort::Exception &e)
|
||||
{
|
||||
LOGE("Error loading model %s: %s", model_path.c_str(), e.what());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!load_session(m_session_rotator, "model_gray_mobilenetv2_rotcls.onnx")) return false;
|
||||
if (!load_session(m_session_detector, "faceboxesv2-640x640.onnx")) return false;
|
||||
if (!load_session(m_session_pose_var, "fsanet-var.onnx")) return false;
|
||||
if (!load_session(m_session_pose_conv, "fsanet-conv.onnx")) return false;
|
||||
if (!load_session(m_session_landmarker1, "face_landmarker_pts5_net1.onnx")) return false;
|
||||
if (!load_session(m_session_landmarker2, "face_landmarker_pts5_net2.onnx")) return false;
|
||||
if (!load_session(m_session_recognizer, "face_recognizer.onnx")) return false;
|
||||
if (!load_session(m_session_rotator, "model_gray_mobilenetv2_rotcls.onnx"))
|
||||
return false;
|
||||
if (!load_session(m_session_detector, "faceboxesv2-640x640.onnx"))
|
||||
return false;
|
||||
if (!load_session(m_session_pose_var, "fsanet-var.onnx"))
|
||||
return false;
|
||||
if (!load_session(m_session_pose_conv, "fsanet-conv.onnx"))
|
||||
return false;
|
||||
if (!load_session(m_session_landmarker1, "face_landmarker_pts5_net1.onnx"))
|
||||
return false;
|
||||
if (!load_session(m_session_landmarker2, "face_landmarker_pts5_net2.onnx"))
|
||||
return false;
|
||||
if (!load_session(m_session_recognizer, "face_recognizer.onnx"))
|
||||
return false;
|
||||
|
||||
LOGI("All 7 models loaded successfully.");
|
||||
return true;
|
||||
}
|
||||
|
||||
// (私有) 获取模型输入/输出信息
|
||||
void FacePipeline::InitMemoryAllocators() {
|
||||
void FacePipeline::InitMemoryAllocators()
|
||||
{
|
||||
// 【【【 最终修正版 v3 】】】
|
||||
auto get_io_names = [&](Ort::Session *session,
|
||||
std::vector<const char *> &input_names,
|
||||
std::vector<const char *> &output_names,
|
||||
std::vector<int64_t>& input_shape)
|
||||
std::vector<int64_t> &input_shape,
|
||||
const char *model_name)
|
||||
{
|
||||
input_names.clear();
|
||||
output_names.clear();
|
||||
input_shape.clear();
|
||||
|
||||
for (size_t i = 0; i < session->GetInputCount(); ++i) {
|
||||
size_t input_count = session->GetInputCount();
|
||||
for (size_t i = 0; i < input_count; ++i)
|
||||
{
|
||||
auto input_name_ptr = session->GetInputNameAllocated(i, m_allocator);
|
||||
if (input_name_ptr == nullptr || input_name_ptr.get() == nullptr)
|
||||
{
|
||||
LOGE("Model %s input name %zu is null!", model_name, i);
|
||||
throw std::runtime_error("Failed to get model input name");
|
||||
}
|
||||
input_names.push_back(strdup(input_name_ptr.get()));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < session->GetOutputCount(); ++i) {
|
||||
size_t output_count = session->GetOutputCount();
|
||||
for (size_t i = 0; i < output_count; ++i)
|
||||
{
|
||||
auto output_name_ptr = session->GetOutputNameAllocated(i, m_allocator);
|
||||
if (output_name_ptr == nullptr || output_name_ptr.get() == nullptr)
|
||||
{
|
||||
LOGE("Model %s output name %zu is null!", model_name, i);
|
||||
throw std::runtime_error("Failed to get model output name");
|
||||
}
|
||||
output_names.push_back(strdup(output_name_ptr.get()));
|
||||
}
|
||||
|
||||
if (input_count > 0)
|
||||
{
|
||||
auto input_type_info = session->GetInputTypeInfo(0);
|
||||
auto tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
|
||||
input_shape = tensor_info.GetShape();
|
||||
if (input_shape[0] < 1) input_shape[0] = 1;
|
||||
|
||||
if (input_shape.empty())
|
||||
{
|
||||
LOGE("Model %s input shape is empty!", model_name);
|
||||
throw std::runtime_error("Model input shape is empty");
|
||||
}
|
||||
|
||||
// 【【【 修正:更详细的 shape 日志 】】】
|
||||
std::string shape_str = "[";
|
||||
for (long long dim : input_shape)
|
||||
shape_str += std::to_string(dim) + ", ";
|
||||
shape_str += "]";
|
||||
LOGI("Model %s input shape: %s", model_name, shape_str.c_str());
|
||||
|
||||
if (input_shape[0] < 1)
|
||||
input_shape[0] = 1; // Set batch size to 1
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGE("Model %s has no inputs!", model_name);
|
||||
}
|
||||
};
|
||||
|
||||
get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names, m_rot_input_shape);
|
||||
get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names, m_det_input_shape);
|
||||
get_io_names(m_session_pose_var.get(), m_pose_var_input_names, m_pose_var_output_names, m_pose_var_input_shape);
|
||||
get_io_names(m_session_pose_conv.get(), m_pose_conv_input_names, m_pose_conv_output_names, m_pose_conv_input_shape);
|
||||
get_io_names(m_session_landmarker1.get(), m_lm1_input_names, m_lm1_output_names, m_lm1_input_shape);
|
||||
get_io_names(m_session_landmarker2.get(), m_lm2_input_names, m_lm2_output_names, m_lm2_input_shape);
|
||||
get_io_names(m_session_recognizer.get(), m_rec_input_names, m_rec_output_names, m_rec_input_shape);
|
||||
// 为7个模型初始化
|
||||
get_io_names(m_session_rotator.get(), m_rot_input_names, m_rot_output_names, m_rot_input_shape, "Rotator");
|
||||
get_io_names(m_session_detector.get(), m_det_input_names, m_det_output_names, m_det_input_shape, "Detector");
|
||||
get_io_names(m_session_pose_var.get(), m_pose_var_input_names, m_pose_var_output_names, m_pose_var_input_shape, "PoseVar");
|
||||
get_io_names(m_session_pose_conv.get(), m_pose_conv_input_names, m_pose_conv_output_names, m_pose_conv_input_shape, "PoseConv");
|
||||
get_io_names(m_session_landmarker1.get(), m_lm1_input_names, m_lm1_output_names, m_lm1_input_shape, "Landmarker1");
|
||||
get_io_names(m_session_landmarker2.get(), m_lm2_input_names, m_lm2_output_names, m_lm2_input_shape, "Landmarker2");
|
||||
get_io_names(m_session_recognizer.get(), m_rec_input_names, m_rec_output_names, m_rec_input_shape, "Recognizer");
|
||||
|
||||
// 生成 FaceBoxesV2 的锚点
|
||||
generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]); // H, W (640, 640)
|
||||
// 检查 Detector 形状
|
||||
if (m_det_input_shape.size() < 4)
|
||||
{
|
||||
LOGE("Detector input shape has < 4 dimensions! Cannot generate anchors.");
|
||||
throw std::runtime_error("Detector input shape invalid");
|
||||
}
|
||||
// 【【【 修正:检查 -1 维度 】】】
|
||||
if (m_det_input_shape[2] < 0 || m_det_input_shape[3] < 0)
|
||||
{
|
||||
LOGE("Detector input shape is dynamic (H/W is -1). This is not supported by the Python logic.");
|
||||
// 我们从 Python 源码知道它是 640x640
|
||||
LOGI("Forcing detector H/W to 640x640.");
|
||||
m_det_input_shape[2] = 640;
|
||||
m_det_input_shape[3] = 640;
|
||||
}
|
||||
generate_anchors_faceboxes(m_det_input_shape[2], m_det_input_shape[3]);
|
||||
|
||||
// 调整Blob缓冲区大小 (查找最大所需size)
|
||||
// 调整Blob缓冲区大小
|
||||
size_t max_blob_size = 0;
|
||||
auto update_max = [&](const std::vector<int64_t>& shape) {
|
||||
size_t s = std::accumulate(shape.begin() + 1, shape.end(), 1, std::multiplies<size_t>());
|
||||
if (s > max_blob_size) max_blob_size = s;
|
||||
|
||||
// 【【【 修正:安全的 update_max 逻辑 】】】
|
||||
auto update_max = [&](const std::vector<int64_t> &shape, const char *model_name)
|
||||
{
|
||||
if (shape.size() <= 1)
|
||||
{
|
||||
return; // 忽略 (e.g., [1]) 或空 shape
|
||||
}
|
||||
|
||||
size_t s = 1;
|
||||
// 从 C (dim 1) 开始循环
|
||||
for (size_t i = 1; i < shape.size(); ++i)
|
||||
{
|
||||
if (shape[i] < 0)
|
||||
{
|
||||
// 如果是动态维度 (e.g., -1),我们不能用它来计算 max_blob_size
|
||||
LOGE("Model %s has dynamic dimension at index %zu. Skipping for max_blob_size calculation.", model_name, i);
|
||||
return; // 跳过这个模型
|
||||
}
|
||||
s *= static_cast<size_t>(shape[i]);
|
||||
}
|
||||
|
||||
if (s > max_blob_size)
|
||||
{
|
||||
max_blob_size = s;
|
||||
}
|
||||
};
|
||||
update_max(m_rot_input_shape);
|
||||
update_max(m_det_input_shape);
|
||||
update_max(m_pose_var_input_shape);
|
||||
update_max(m_lm1_input_shape);
|
||||
update_max(m_rec_input_shape);
|
||||
|
||||
update_max(m_rot_input_shape, "Rotator");
|
||||
update_max(m_det_input_shape, "Detector");
|
||||
update_max(m_pose_var_input_shape, "PoseVar");
|
||||
update_max(m_lm1_input_shape, "Landmarker1");
|
||||
update_max(m_rec_input_shape, "Recognizer");
|
||||
// (我们不调用 lm2,因为它不使用公共 blob)
|
||||
|
||||
if (max_blob_size == 0)
|
||||
{
|
||||
LOGE("Max blob size is 0, something went wrong with model shape detection!");
|
||||
throw std::runtime_error("Max blob size is 0");
|
||||
}
|
||||
|
||||
LOGI("Calculated max blob size: %zu", max_blob_size);
|
||||
m_blob_buffer.resize(max_blob_size);
|
||||
LOGI("m_blob_buffer resized successfully.");
|
||||
}
|
||||
|
||||
// --- 图像预处理辅助函数 ---
|
||||
void FacePipeline::image_to_blob(const cv::Mat& img, std::vector<float>& blob, const float* mean, const float* std) {
|
||||
void FacePipeline::image_to_blob(const cv::Mat &img, std::vector<float> &blob, const float *mean, const float *std)
|
||||
{
|
||||
int channels = img.channels();
|
||||
int height = img.rows;
|
||||
int width = img.cols;
|
||||
|
||||
for (int c = 0; c < channels; c++) {
|
||||
for (int h = 0; h < height; h++) {
|
||||
for (int w = 0; w < width; w++) {
|
||||
for (int c = 0; c < channels; c++)
|
||||
{
|
||||
for (int h = 0; h < height; h++)
|
||||
{
|
||||
for (int w = 0; w < width; w++)
|
||||
{
|
||||
float val;
|
||||
if (channels == 3) {
|
||||
if (channels == 3)
|
||||
{
|
||||
val = static_cast<float>(img.at<cv::Vec3b>(h, w)[c]);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
val = static_cast<float>(img.at<uchar>(h, w));
|
||||
}
|
||||
blob[c * width * height + h * width + w] = (val - mean[c]) * std[c];
|
||||
|
|
@ -120,7 +230,8 @@ void FacePipeline::image_to_blob(const cv::Mat& img, std::vector<float>& blob, c
|
|||
}
|
||||
}
|
||||
|
||||
Ort::Value FacePipeline::create_tensor(const std::vector<float>& blob_data, const std::vector<int64_t>& input_shape) {
|
||||
Ort::Value FacePipeline::create_tensor(const std::vector<float> &blob_data, const std::vector<int64_t> &input_shape)
|
||||
{
|
||||
return Ort::Value::CreateTensor<float>(m_memory_info,
|
||||
const_cast<float *>(blob_data.data()),
|
||||
blob_data.size(),
|
||||
|
|
@ -128,15 +239,17 @@ Ort::Value FacePipeline::create_tensor(const std::vector<float>& blob_data, cons
|
|||
input_shape.size());
|
||||
}
|
||||
|
||||
|
||||
// --- 核心管线实现 ---
|
||||
|
||||
bool FacePipeline::Extract(const cv::Mat& image, std::vector<float>& feature) {
|
||||
if (!m_initialized) {
|
||||
bool FacePipeline::Extract(const cv::Mat &image, std::vector<float> &feature)
|
||||
{
|
||||
if (!m_initialized)
|
||||
{
|
||||
LOGE("Extract failed: Pipeline is not initialized.");
|
||||
return false;
|
||||
}
|
||||
if (image.empty()) {
|
||||
if (image.empty())
|
||||
{
|
||||
LOGE("Extract failed: Input image is empty.");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -144,15 +257,19 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector<float>& feature) {
|
|||
// --- 1. 旋转检测 ---
|
||||
int rot_angle_code = RunRotation(image);
|
||||
cv::Mat upright_image;
|
||||
if (rot_angle_code >= 0) {
|
||||
if (rot_angle_code >= 0)
|
||||
{
|
||||
cv::rotate(image, upright_image, rot_angle_code);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
upright_image = image;
|
||||
}
|
||||
|
||||
// --- 2. 人脸检测 ---
|
||||
std::vector<FaceBox> boxes;
|
||||
if (!RunDetection(upright_image, boxes)) {
|
||||
if (!RunDetection(upright_image, boxes))
|
||||
{
|
||||
LOGI("Extract failed: No face detected.");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -173,7 +290,6 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector<float>& feature) {
|
|||
cv::Rect face_rect_padded(face_rect_raw.x + pad_left, face_rect_raw.y + pad_top, face_rect_raw.width, face_rect_raw.height);
|
||||
cv::Mat face_crop = face_crop_padded(face_rect_padded);
|
||||
|
||||
|
||||
// --- 5. 人脸对齐 (在姿态检测前,因为姿态检测需要对齐的脸) ---
|
||||
// (assess_quality) 调用 self.pose_checker.check(aligned_face)
|
||||
// QualityOfPose.check()
|
||||
|
|
@ -191,19 +307,22 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector<float>& feature) {
|
|||
|
||||
// --- 3. 姿态估计 (质量过滤) ---
|
||||
FacePose pose;
|
||||
if (!RunPose(face_crop, pose)) {
|
||||
if (!RunPose(face_crop, pose))
|
||||
{
|
||||
LOGI("Extract failed: Pose estimation failed.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (std::abs(pose.yaw) > m_pose_threshold || std::abs(pose.pitch) > m_pose_threshold) {
|
||||
if (std::abs(pose.yaw) > m_pose_threshold || std::abs(pose.pitch) > m_pose_threshold)
|
||||
{
|
||||
LOGI("Extract failed: Face pose (Y:%.1f, P:%.1f) exceeds threshold (%.1f)", pose.yaw, pose.pitch, m_pose_threshold);
|
||||
return false;
|
||||
}
|
||||
|
||||
// --- 4. 关键点检测 ---
|
||||
FaceLandmark landmark;
|
||||
if (!RunLandmark(upright_image, best_box, landmark)) {
|
||||
if (!RunLandmark(upright_image, best_box, landmark))
|
||||
{
|
||||
LOGI("Extract failed: Landmark detection failed.");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -212,7 +331,8 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector<float>& feature) {
|
|||
cv::Mat aligned_face = RunAlignment(upright_image, landmark);
|
||||
|
||||
// --- 6. 特征提取 ---
|
||||
if (!RunRecognition(aligned_face, feature)) {
|
||||
if (!RunRecognition(aligned_face, feature))
|
||||
{
|
||||
LOGI("Extract failed: Feature recognition failed.");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -222,9 +342,9 @@ bool FacePipeline::Extract(const cv::Mat& image, std::vector<float>& feature) {
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
// --- 步骤 1: 旋转检测 (来自 face_feature_extractor.py) ---
|
||||
void FacePipeline::preprocess_rotation(const cv::Mat& image, std::vector<float>& blob_data) {
|
||||
void FacePipeline::preprocess_rotation(const cv::Mat &image, std::vector<float> &blob_data)
|
||||
{
|
||||
cv::Mat gray_img, resized, cropped, gray_3d;
|
||||
cv::cvtColor(image, gray_img, cv::COLOR_BGR2GRAY);
|
||||
cv::resize(gray_img, resized, cv::Size(256, 256), 0, 0, cv::INTER_LINEAR);
|
||||
|
|
@ -239,7 +359,8 @@ void FacePipeline::preprocess_rotation(const cv::Mat& image, std::vector<float>&
|
|||
image_to_blob(gray_3d, blob_data, mean, std);
|
||||
}
|
||||
|
||||
int FacePipeline::RunRotation(const cv::Mat& image) {
|
||||
int FacePipeline::RunRotation(const cv::Mat &image)
|
||||
{
|
||||
preprocess_rotation(image, m_blob_buffer);
|
||||
auto input_tensor = create_tensor(m_blob_buffer, m_rot_input_shape);
|
||||
|
||||
|
|
@ -251,14 +372,18 @@ int FacePipeline::RunRotation(const cv::Mat& image) {
|
|||
int max_index = std::distance(output_data, std::max_element(output_data, output_data + 4));
|
||||
|
||||
// (correct_image_rotation)
|
||||
if (max_index == 1) return cv::ROTATE_90_CLOCKWISE;
|
||||
if (max_index == 2) return cv::ROTATE_180;
|
||||
if (max_index == 3) return cv::ROTATE_90_COUNTERCLOCKWISE;
|
||||
if (max_index == 1)
|
||||
return cv::ROTATE_90_CLOCKWISE;
|
||||
if (max_index == 2)
|
||||
return cv::ROTATE_180;
|
||||
if (max_index == 3)
|
||||
return cv::ROTATE_90_COUNTERCLOCKWISE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// --- 步骤 2: 人脸检测 (来自 facedetector.py) ---
|
||||
void FacePipeline::preprocess_detection(const cv::Mat& img, std::vector<float>& blob_data) {
|
||||
void FacePipeline::preprocess_detection(const cv::Mat &img, std::vector<float> &blob_data)
|
||||
{
|
||||
cv::Mat resized;
|
||||
cv::resize(img, resized, cv::Size(m_det_input_shape[3], m_det_input_shape[2])); // 640x640
|
||||
|
||||
|
|
@ -268,7 +393,8 @@ void FacePipeline::preprocess_detection(const cv::Mat& img, std::vector<float>&
|
|||
image_to_blob(resized, blob_data, mean, std);
|
||||
}
|
||||
|
||||
bool FacePipeline::RunDetection(const cv::Mat& image, std::vector<FaceBox>& boxes) {
|
||||
bool FacePipeline::RunDetection(const cv::Mat &image, std::vector<FaceBox> &boxes)
|
||||
{
|
||||
float img_height = (float)image.rows;
|
||||
float img_width = (float)image.cols;
|
||||
|
||||
|
|
@ -283,7 +409,8 @@ bool FacePipeline::RunDetection(const cv::Mat& image, std::vector<FaceBox>& boxe
|
|||
const float *probs_data = output_tensors[1].GetTensorData<float>(); // [1, N, 2]
|
||||
long num_anchors = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape()[1];
|
||||
|
||||
if (num_anchors != m_anchors.size()) {
|
||||
if (num_anchors != m_anchors.size())
|
||||
{
|
||||
LOGE("Anchor size mismatch! Expected %zu, Got %ld", m_anchors.size(), num_anchors);
|
||||
return false;
|
||||
}
|
||||
|
|
@ -291,9 +418,11 @@ bool FacePipeline::RunDetection(const cv::Mat& image, std::vector<FaceBox>& boxe
|
|||
std::vector<FaceBox> bbox_collection;
|
||||
const float variance[2] = {0.1f, 0.2f}; //
|
||||
|
||||
for (long i = 0; i < num_anchors; ++i) {
|
||||
for (long i = 0; i < num_anchors; ++i)
|
||||
{
|
||||
float conf = probs_data[i * 2 + 1]; // (probs[0, i, 1])
|
||||
if (conf < m_det_threshold) continue;
|
||||
if (conf < m_det_threshold)
|
||||
continue;
|
||||
|
||||
const Anchor &anchor = m_anchors[i];
|
||||
float dx = bboxes_data[i * 4 + 0];
|
||||
|
|
@ -306,50 +435,61 @@ bool FacePipeline::RunDetection(const cv::Mat& image, std::vector<FaceBox>& boxe
|
|||
float w = anchor.s_kx * std::exp(dw * variance[1]); //
|
||||
float h = anchor.s_ky * std::exp(dh * variance[1]); //
|
||||
|
||||
bbox_collection.push_back({
|
||||
(cx - w / 2.0f) * img_width,
|
||||
bbox_collection.push_back({(cx - w / 2.0f) * img_width,
|
||||
(cy - h / 2.0f) * img_height,
|
||||
(cx + w / 2.0f) * img_width,
|
||||
(cy + h / 2.0f) * img_height,
|
||||
conf
|
||||
});
|
||||
conf});
|
||||
}
|
||||
|
||||
boxes = hard_nms(bbox_collection, m_det_iou_threshold, m_det_topk); // (nms_type=0)
|
||||
return !boxes.empty();
|
||||
}
|
||||
|
||||
void FacePipeline::generate_anchors_faceboxes(int target_height, int target_width) {
|
||||
void FacePipeline::generate_anchors_faceboxes(int target_height, int target_width)
|
||||
{
|
||||
// (generate_anchors)
|
||||
m_anchors.clear();
|
||||
std::vector<int> steps = {32, 64, 128};
|
||||
std::vector<std::vector<int>> min_sizes = {{32, 64, 128}, {256}, {512}};
|
||||
std::vector<std::vector<int>> feature_maps;
|
||||
for (int step : steps) {
|
||||
for (int step : steps)
|
||||
{
|
||||
feature_maps.push_back({(int)std::ceil((float)target_height / step), (int)std::ceil((float)target_width / step)});
|
||||
}
|
||||
|
||||
std::vector<float> offset_32 = {0.0f, 0.25f, 0.5f, 0.75f};
|
||||
std::vector<float> offset_64 = {0.0f, 0.5f};
|
||||
|
||||
for (int k = 0; k < feature_maps.size(); ++k) {
|
||||
for (int k = 0; k < feature_maps.size(); ++k)
|
||||
{
|
||||
auto f_map = feature_maps[k];
|
||||
auto tmp_min_sizes = min_sizes[k];
|
||||
int f_h = f_map[0];
|
||||
int f_w = f_map[1];
|
||||
for (int i = 0; i < f_h; ++i) {
|
||||
for (int j = 0; j < f_w; ++j) {
|
||||
for (int min_size : tmp_min_sizes) {
|
||||
for (int i = 0; i < f_h; ++i)
|
||||
{
|
||||
for (int j = 0; j < f_w; ++j)
|
||||
{
|
||||
for (int min_size : tmp_min_sizes)
|
||||
{
|
||||
float s_kx = (float)min_size / target_width;
|
||||
float s_ky = (float)min_size / target_height;
|
||||
|
||||
if (min_size == 32) {
|
||||
for (float offset_y : offset_32) for (float offset_x : offset_32)
|
||||
if (min_size == 32)
|
||||
{
|
||||
for (float offset_y : offset_32)
|
||||
for (float offset_x : offset_32)
|
||||
m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky});
|
||||
} else if (min_size == 64) {
|
||||
for (float offset_y : offset_64) for (float offset_x : offset_64)
|
||||
}
|
||||
else if (min_size == 64)
|
||||
{
|
||||
for (float offset_y : offset_64)
|
||||
for (float offset_x : offset_64)
|
||||
m_anchors.push_back({(j + offset_x) * steps[k] / target_width, (i + offset_y) * steps[k] / target_height, s_kx, s_ky});
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
m_anchors.push_back({(j + 0.5f) * steps[k] / target_width, (i + 0.5f) * steps[k] / target_height, s_kx, s_ky});
|
||||
}
|
||||
}
|
||||
|
|
@ -358,9 +498,9 @@ void FacePipeline::generate_anchors_faceboxes(int target_height, int target_widt
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// --- 步骤 3: 姿态估计 (来自 imgchecker.py) ---
|
||||
void FacePipeline::preprocess_pose(const cv::Mat& img, std::vector<float>& blob_data) {
|
||||
void FacePipeline::preprocess_pose(const cv::Mat &img, std::vector<float> &blob_data)
|
||||
{
|
||||
float pad = 0.3f; //
|
||||
int h = img.rows;
|
||||
int w = img.cols;
|
||||
|
|
@ -381,7 +521,8 @@ void FacePipeline::preprocess_pose(const cv::Mat& img, std::vector<float>& blob_
|
|||
image_to_blob(resized, blob_data, mean, std);
|
||||
}
|
||||
|
||||
bool FacePipeline::RunPose(const cv::Mat& face_crop, FacePose& pose) {
|
||||
bool FacePipeline::RunPose(const cv::Mat &face_crop, FacePose &pose)
|
||||
{
|
||||
preprocess_pose(face_crop, m_blob_buffer);
|
||||
|
||||
// 运行 VAR
|
||||
|
|
@ -407,7 +548,8 @@ bool FacePipeline::RunPose(const cv::Mat& face_crop, FacePose& pose) {
|
|||
}
|
||||
|
||||
// --- 步骤 4: 关键点检测 (来自 facelandmarks5er.py) ---
|
||||
void FacePipeline::preprocess_landmark_net1(const cv::Mat& img, std::vector<float>& blob_data) {
|
||||
void FacePipeline::preprocess_landmark_net1(const cv::Mat &img, std::vector<float> &blob_data)
|
||||
{
|
||||
cv::Mat resized, gray_img;
|
||||
cv::resize(img, resized, cv::Size(m_lm1_input_shape[3], m_lm1_input_shape[2])); // 112x112
|
||||
cv::cvtColor(resized, gray_img, cv::COLOR_BGR2GRAY); //
|
||||
|
|
@ -419,7 +561,8 @@ void FacePipeline::preprocess_landmark_net1(const cv::Mat& img, std::vector<floa
|
|||
}
|
||||
|
||||
// C++ 转译 facelandmarks5er.py::shape_index_process
|
||||
std::vector<float> FacePipeline::shape_index_process(const Ort::Value& feat_val, const Ort::Value& pos_val) {
|
||||
std::vector<float> FacePipeline::shape_index_process(const Ort::Value &feat_val, const Ort::Value &pos_val)
|
||||
{
|
||||
auto feat_shape = feat_val.GetTensorTypeAndShapeInfo().GetShape();
|
||||
auto pos_shape = pos_val.GetTensorTypeAndShapeInfo().GetShape();
|
||||
const float *feat_data = feat_val.GetTensorData<float>();
|
||||
|
|
@ -447,17 +590,22 @@ std::vector<float> FacePipeline::shape_index_process(const Ort::Value& feat_val,
|
|||
std::vector<long> out_shape = {feat_n, feat_c, x_patch_h, (long)landmark_num, x_patch_w};
|
||||
std::vector<float> buff(feat_n * feat_c * x_patch_h * landmark_num * x_patch_w, 0.0f);
|
||||
|
||||
for (int i = 0; i < landmark_num; ++i) {
|
||||
for (int n = 0; n < feat_n; ++n) {
|
||||
for (int i = 0; i < landmark_num; ++i)
|
||||
{
|
||||
for (int n = 0; n < feat_n; ++n)
|
||||
{
|
||||
float y_pos = pos_data[n * landmark_x2 + 2 * i + 1];
|
||||
float x_pos = pos_data[n * landmark_x2 + 2 * i];
|
||||
|
||||
int y = (int)(y_pos * (feat_h - 1) - r_h + 0.5f);
|
||||
int x = (int)(x_pos * (feat_w - 1) - r_w + 0.5f);
|
||||
|
||||
for (int c = 0; c < feat_c; ++c) {
|
||||
for (int ph = 0; ph < feat_patch_h; ++ph) {
|
||||
for (int pw = 0; pw < feat_patch_w; ++pw) {
|
||||
for (int c = 0; c < feat_c; ++c)
|
||||
{
|
||||
for (int ph = 0; ph < feat_patch_h; ++ph)
|
||||
{
|
||||
for (int pw = 0; pw < feat_patch_w; ++pw)
|
||||
{
|
||||
int y_p = y + ph;
|
||||
int x_p = x + pw;
|
||||
|
||||
|
|
@ -467,9 +615,12 @@ std::vector<float> FacePipeline::shape_index_process(const Ort::Value& feat_val,
|
|||
i * (x_patch_w) +
|
||||
pw;
|
||||
|
||||
if (y_p < 0 || y_p >= feat_h || x_p < 0 || x_p >= feat_w) {
|
||||
if (y_p < 0 || y_p >= feat_h || x_p < 0 || x_p >= feat_w)
|
||||
{
|
||||
buff[out_idx] = 0.0f;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
long feat_idx = n * (feat_c * feat_h * feat_w) +
|
||||
c * (feat_h * feat_w) +
|
||||
y_p * (feat_w) +
|
||||
|
|
@ -484,8 +635,8 @@ std::vector<float> FacePipeline::shape_index_process(const Ort::Value& feat_val,
|
|||
return buff;
|
||||
}
|
||||
|
||||
|
||||
bool FacePipeline::RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLandmark& landmark) {
|
||||
bool FacePipeline::RunLandmark(const cv::Mat &image, const FaceBox &box, FaceLandmark &landmark)
|
||||
{
|
||||
// 1. 裁剪人脸
|
||||
cv::Rect face_rect_raw(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1);
|
||||
int pad_top = std::max(0, -face_rect_raw.y);
|
||||
|
|
@ -530,7 +681,8 @@ bool FacePipeline::RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLan
|
|||
float scale_x = (box.x2 - box.x1) / 112.0f;
|
||||
float scale_y = (box.y2 - box.y1) / 112.0f;
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
for (int i = 0; i < 5; ++i)
|
||||
{
|
||||
float x_norm = (data_net2[i * 2 + 0] + data_net1_pos[i * 2 + 0]) * 112.0f;
|
||||
float y_norm = (data_net2[i * 2 + 1] + data_net1_pos[i * 2 + 1]) * 112.0f;
|
||||
|
||||
|
|
@ -545,12 +697,14 @@ bool FacePipeline::RunLandmark(const cv::Mat& image, const FaceBox& box, FaceLan
|
|||
}
|
||||
|
||||
// --- 步骤 5: 人脸对齐 (来自 facealign.py) ---
|
||||
cv::Mat FacePipeline::RunAlignment(const cv::Mat& image, const FaceLandmark& landmark) {
|
||||
cv::Mat FacePipeline::RunAlignment(const cv::Mat &image, const FaceLandmark &landmark)
|
||||
{
|
||||
// (align)
|
||||
std::vector<cv::Point2f> src_points;
|
||||
std::vector<cv::Point2f> dst_points;
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
for (int i = 0; i < 5; ++i)
|
||||
{
|
||||
src_points.push_back(landmark.points[i]);
|
||||
dst_points.push_back(cv::Point2f(m_landmark_template.at<float>(i, 0),
|
||||
m_landmark_template.at<float>(i, 1)));
|
||||
|
|
@ -568,10 +722,15 @@ cv::Mat FacePipeline::RunAlignment(const cv::Mat& image, const FaceLandmark& lan
|
|||
}
|
||||
|
||||
// --- 步骤 6: 特征提取 (来自 facerecoger.py) ---
|
||||
void FacePipeline::preprocess_recognition(const cv::Mat& img, std::vector<float>& blob_data) {
|
||||
void FacePipeline::preprocess_recognition(const cv::Mat &img, std::vector<float> &blob_data)
|
||||
{
|
||||
cv::Mat resized, rgb_img;
|
||||
|
||||
const cv::Size target_size(248, 248);
|
||||
|
||||
// (resize to 248, 248)
|
||||
cv::resize(img, resized, cv::Size(m_rec_input_shape[3], m_rec_input_shape[2]));
|
||||
cv::resize(img, resized, target_size);
|
||||
|
||||
// (BGR -> RGB)
|
||||
cv::cvtColor(resized, rgb_img, cv::COLOR_BGR2RGB);
|
||||
|
||||
|
|
@ -581,27 +740,42 @@ void FacePipeline::preprocess_recognition(const cv::Mat& img, std::vector<float>
|
|||
image_to_blob(rgb_img, blob_data, mean, std);
|
||||
}
|
||||
|
||||
void FacePipeline::normalize_sqrt_l2(std::vector<float>& v) {
|
||||
void FacePipeline::normalize_sqrt_l2(std::vector<float> &v)
|
||||
{
|
||||
// (temp_result = np.sqrt(pred_result[0]))
|
||||
double norm = 0.0;
|
||||
for (float& val : v) {
|
||||
for (float &val : v)
|
||||
{
|
||||
val = std::sqrt(std::max(0.0f, val)); // 取 sqrt
|
||||
norm += val * val;
|
||||
}
|
||||
|
||||
// (norm = temp_result / np.linalg.norm(...))
|
||||
if (norm > 1e-6) {
|
||||
if (norm > 1e-6)
|
||||
{
|
||||
norm = std::sqrt(norm);
|
||||
for (float& val : v) {
|
||||
for (float &val : v)
|
||||
{
|
||||
val = static_cast<float>(val / norm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool FacePipeline::RunRecognition(const cv::Mat& aligned_face, std::vector<float>& feature) {
|
||||
preprocess_recognition(aligned_face, m_blob_buffer);
|
||||
auto input_tensor = create_tensor(m_blob_buffer, m_rec_input_shape);
|
||||
bool FacePipeline::RunRecognition(const cv::Mat &aligned_face, std::vector<float> &feature)
|
||||
{
|
||||
// 【【【 最终修正 v5 】】】
|
||||
|
||||
// 1. 预处理 (这部分是正确的,它生成了 248x248 的 blob)
|
||||
preprocess_recognition(aligned_face, m_blob_buffer);
|
||||
|
||||
// 2. (BUG 在这里) 我们不能使用 m_rec_input_shape (它是 [-1, -1, -1, -1])
|
||||
// 我们必须硬编码 Python 源码 (facerecoger.py) 中使用的 shape。
|
||||
const std::vector<int64_t> hardcoded_shape = {1, 3, 248, 248};
|
||||
|
||||
// 3. (修正) 使用 hardcoded_shape 创建 Tensor
|
||||
auto input_tensor = create_tensor(m_blob_buffer, hardcoded_shape);
|
||||
|
||||
// 4. 运行
|
||||
auto output_tensors = m_session_recognizer->Run(Ort::RunOptions{nullptr},
|
||||
m_rec_input_names.data(), &input_tensor, 1,
|
||||
m_rec_output_names.data(), 1);
|
||||
|
|
@ -612,7 +786,7 @@ bool FacePipeline::RunRecognition(const cv::Mat& aligned_face, std::vector<float
|
|||
feature.resize(feature_dim);
|
||||
memcpy(feature.data(), output_data, feature_dim * sizeof(float));
|
||||
|
||||
// (后处理: SQRT-L2 Norm)
|
||||
// 5. 后处理 (SQRT-L2 Norm)
|
||||
normalize_sqrt_l2(feature);
|
||||
|
||||
return true;
|
||||
|
|
|
|||
Loading…
Reference in New Issue