Jiale/test2_ort/pipeline/faceReg_realtime.cpp

//
// created by wangjiale on 2024/5/9
//
#include "lite/lite.h"
#include "dirent.h"
#include <sys/stat.h>


static void faceReg(const std::string& detect_onnx, const std::string& reg_onnx, const std::string& test_img_path1, const std::string& test_img_path2){
    lite::cv::face::detect::RetinaFace *retinaface = new lite::cv::face::detect::RetinaFace(detect_onnx);  //default: Pytorch_RetinaFace_resnet50.onnx
    lite::cv::faceid::FocalAsiaArcFace *focal_asia_arcface = new lite::cv::faceid::FocalAsiaArcFace(reg_onnx); //default: focal-arcface-bh-ir50-asia.onnx

    lite::types::FaceContent known_face_content;
    lite::types::FaceContent unknown_face_content;

    std::vector<lite::types::Boxf> detected_boxes1;
    std::vector<cv::Mat> bgr_faces;
    cv::Mat img_bgr1 = cv::imread(test_img_path1);
    retinaface->detect(img_bgr1, detected_boxes1);

    if (detected_boxes1.empty() || detected_boxes1.size() > 1 || !detected_boxes1[0].flag ){
        throw "known_img have no/many face";
    }
    cv::Mat known_bgr_face = img_bgr1(detected_boxes1[0].rect());
    focal_asia_arcface->detect(known_bgr_face, known_face_content);

    cv::Mat img_bgr2 = cv::imread(test_img_path2);
    std::vector<lite::types::Boxf> detected_boxes2;
    std::vector<float> sims;
    retinaface->detect(img_bgr2, detected_boxes2);
    for (const auto &box: detected_boxes2)
    {
        if (box.flag)
        {
            cv::Mat cropped_img_bgr = img_bgr2(box.rect());
            bgr_faces.push_back(cropped_img_bgr);
            focal_asia_arcface->detect(cropped_img_bgr, unknown_face_content);
            float sim = lite::utils::math::cosine_similarity<float>(
                known_face_content.embedding, unknown_face_content.embedding) ;
            cv::rectangle(img_bgr2, box.rect(), cv::Scalar(255,255,0),2);
            cv::putText(img_bgr2, std::to_string(sim).substr(0,5), box.tl(), cv::FONT_HERSHEY_SIMPLEX, 0.6f, cv::Scalar(0,255,0), 2);
        }
    }

    std::string target_img_path = test_img_path2;
    size_t pos = target_img_path.find("sources");
    target_img_path.replace(target_img_path.begin() + pos, target_img_path.begin() + pos + 7, "log");
    cv::imwrite(target_img_path, img_bgr2);

    delete retinaface;
    delete focal_asia_arcface;
}

void get_content_from_folder(
    std::string face_folder, lite::cv::face::detect::RetinaFace *detector,
    lite::cv::faceid::FocalAsiaArcFace *reger,
    std::vector<std::string>& known_names,
    std::vector<lite::types::FaceContent>& known_face_contents
    )
{
    if (face_folder.back() != '/' || face_folder.back() != '\\')
        face_folder.push_back('/');
    DIR* dir = opendir(face_folder.c_str());
    if (dir == nullptr){
        std::cerr << "Cannot open directory: " << face_folder << std::endl;
        return ;
    }
    struct dirent* entry;
    while ((entry = readdir(dir)) != nullptr)
    {
        std::string fileName = entry->d_name;
        if(fileName == "." || fileName ==".." || fileName.find(".jpg") == std::string::npos)
            continue;
        std::string filePath = face_folder + fileName;

        cv::Mat img_bgr = cv::imread(filePath);
        lite::types::FaceContent known_face_content;
        std::vector<lite::types::Boxf> detected_boxes;
        detector->detect(img_bgr, detected_boxes);
        if (detected_boxes.empty() || detected_boxes.size() > 1 || !detected_boxes[0].flag )
            throw "known_img have no/many face";
        reger->detect(img_bgr(detected_boxes[0].rect()), known_face_content);
        if (known_face_content.flag){
            known_face_contents.push_back(known_face_content);
            known_names.push_back(fileName.substr(0,fileName.size()-4));
        }
        std::cout << "File name: " << fileName << std::endl;
        std::cout << "File path: " << filePath << std::endl;
    }
    closedir(dir);
}

static void cameraReg(const std::string& face_folder, const std::string& detect_onnx, const std::string& reg_onnx, int camera = 0)
{
    lite::cv::face::detect::RetinaFace *retinaface = new lite::cv::face::detect::RetinaFace(detect_onnx);  //default: Pytorch_RetinaFace_resnet50.onnx
    lite::cv::faceid::FocalAsiaArcFace *focal_asia_arcface = new lite::cv::faceid::FocalAsiaArcFace(reg_onnx); //default: focal-arcface-bh-ir50-asia.onnx

    std::vector<lite::types::FaceContent> known_face_contents;
    std::vector<std::string> known_names;
    lite::types::FaceContent unknown_face_content;
    float fxy = 0.5;

    // get content of known faces
    get_content_from_folder(face_folder, retinaface, focal_asia_arcface, known_names, known_face_contents);

    // realtime face reg
    cv::VideoCapture capture(camera);
    if (!capture.isOpened()){
        std::cerr << "Error: Could not open camera" <<std::endl;
        return;
    }
    cv::namedWindow("faceReg",cv::WINDOW_NORMAL);
    cv::Mat frame_bgr, res_frame_bgr;
    while(true){
        // capture the next frame froem the webcam
        capture >> frame_bgr;
        cv::resize(frame_bgr, frame_bgr, cv::Size(), fxy, fxy);
        std::cout << "size: " << frame_bgr.size() << std::endl;
        res_frame_bgr = frame_bgr;

        //process frame_bgr
        std::vector<lite::types::Boxf> detected_boxes;
        retinaface->detect(frame_bgr, detected_boxes);

        auto fsize = frame_bgr.size();
        cv::Rect spec_rect(0, 0, fsize.width, fsize.height);
        cv::Point2i spec_point(fsize.width,fsize.height);
        if (spec_point.inside(spec_rect)){
            std::cerr << "spec_point.inside(spec_rect) is True " << std::endl;
            return ;
        }
        for(const auto& box : detected_boxes){
            ::cv::Rect rect = box.rect() & spec_rect;
            // if ( !(rect.tl().inside(spec_rect) && rect.br().inside(spec_rect)) ){
            //     std::cout << rect.tl() << rect.br() <<std::endl;
            //     std::cerr << "!(rect.tl().inside(spec_rect) && rect.br().inside(spec_rect))  is True" << std::endl;
            //     return;
            // }
            focal_asia_arcface->detect(frame_bgr(rect), unknown_face_content);
            if(!unknown_face_content.flag)
                continue;
            int max_idx = -1, idx = 0; //init is invalid
            float max_value = -100;
            for(const auto& known_face_content: known_face_contents)
            {
                float sim = lite::utils::math::cosine_similarity<float>(
                    known_face_content.embedding, unknown_face_content.embedding
                );
                if(sim > max_value)
                {
                    max_idx = idx;
                    max_value = sim;
                }
                idx ++;
            }
            cv::rectangle(res_frame_bgr, rect, cv::Scalar(255,255,0), 4*fxy);
            cv::putText(res_frame_bgr, known_names[max_idx] +": "+ std::to_string(max_value).substr(0,5), rect.tl(), cv::FONT_HERSHEY_SIMPLEX, 0.6f*fxy, cv::Scalar(0,255,0), 4*fxy);
        }

        cv::resize(res_frame_bgr, res_frame_bgr, cv::Size(), 1/fxy, 1/fxy);
        cv::imshow("faceReg", res_frame_bgr);
        if (cv::waitKey(1) == 'q')
            break;
    }
}

int main(int argc, char* argv[]){
    const std::string detect_onnx = R"(C:\Users\JIALE\Desktop\bns_proj\test2_ort\pipeline\hub\Pytorch_RetinaFace_resnet50.onnx)";
    const std::string reg_onnx = R"(C:\Users\JIALE\Desktop\bns_proj\test2_ort\pipeline\hub\focal-arcface-bh-ir50-asia.onnx)";
    if (argc <= 1)
    {
        std::cerr << "face_folder is None" <<std::endl;
        return -1;
    }
    const std::string face_folder = argv[1];

    cameraReg(face_folder, detect_onnx, reg_onnx, 0);
    return 0;
}