bonus-yolo-sdk-sourcecode/Yolo11_ONNX/Yolo_ONNX.cpp

#include "pch.h"
#include "Yolo_ONNX.h"

//
#include <onnxruntime_c_api.h>
#include <onnxruntime_cxx_api.h>
#include <opencv2/opencv.hpp>
#include <vector>
#include <string>
#include <iostream>
#include <memory>
#include <stdexcept> //

namespace {

    cv::Mat preprocess(const cv::Mat& img, int target_width, int target_height, int& pad_w, int& pad_h, float& scale);

    std::vector<Detection> postprocess(Ort::Value& output_tensor, float scale, int pad_w, int pad_h, int img_w, int img_h, float conf_threshold, float iou_threshold);

    class YoloDetector {
    public:
        //
        Ort::Env env;
        std::unique_ptr<Ort::Session> session;

        //
        int input_width = 0;
        int input_height = 0;

        //
        Ort::AllocatorWithDefaultOptions allocator;
        std::string input_name_str;
        std::string output_name_str;
        std::vector<const char*> input_node_names;
        std::vector<const char*> output_node_names;

    public:
        /**
         * @brief
         */
        YoloDetector(const wchar_t* model_path, int in_width, int in_height)
            : env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8-ONNX-GPU"),
            input_width(in_width),
            input_height(in_height)
        {
            //
            Ort::SessionOptions session_options;
            OrtCUDAProviderOptions cuda_options; //
            session_options.AppendExecutionProvider_CUDA(cuda_options);

            //
            session = std::make_unique<Ort::Session>(env, model_path, session_options);

            //
            //
            input_name_str = session->GetInputNameAllocated(0, allocator).get();
            output_name_str = session->GetOutputNameAllocated(0, allocator).get();
            input_node_names.push_back(input_name_str.c_str());
            output_node_names.push_back(output_name_str.c_str());
        }

        /**
         * @brief
         */
        std::vector<Detection> detect(
            unsigned char* image_bytes,
            int image_width,
            int image_height,
            float conf_threshold,
            float iou_threshold)
        {
            //
            cv::Mat image(image_height, image_width, CV_8UC3, image_bytes);
            if (image.empty()) {
                throw std::runtime_error("Input image is empty.");
            }

            //
            int pad_w, pad_h;
            float scale;
            cv::Mat preprocessed_img = preprocess(image, input_width, input_height, pad_w, pad_h, scale); //

            //
            cv::Mat blob;
            cv::dnn::blobFromImage(preprocessed_img, blob, 1 / 255.0, cv::Size(), cv::Scalar(), true, false);
            std::vector<int64_t> input_shape = { 1, 3, (int64_t)input_height, (int64_t)input_width };
            std::cout <<"input shape: " << input_shape[0] << "," << input_shape[1] << "," << input_shape[2] << "," << input_shape[3] << "," << std::endl;

            //
            auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
            Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, blob.ptr<float>(), blob.total(), input_shape.data(), input_shape.size());

            //
            auto output_tensors = session->Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);

            //
            return postprocess(output_tensors[0], scale, pad_w, pad_h, image_width, image_height, conf_threshold, iou_threshold);
        }
    };


    // ========================================================================
    //
    // ========================================================================

    //
    cv::Mat preprocess(const cv::Mat& img, int target_width, int target_height, int& pad_w, int& pad_h, float& scale) {
        cv::Mat resized_img;
        int w = img.cols;
        int h = img.rows;
        scale = std::min(static_cast<float>(target_width) / w, static_cast<float>(target_height) / h);
        int new_w = static_cast<int>(w * scale);
        int new_h = static_cast<int>(h * scale);
        cv::resize(img, resized_img, cv::Size(new_w, new_h), 0, 0, cv::INTER_AREA);

        pad_w = target_width - new_w; //
        pad_h = target_height - new_h; //

        //
        int top = pad_h / 2;
        int bottom = pad_h - top;
        int left = pad_w / 2;
        int right = pad_w - left;
        //

        cv::Mat padded_img;
        //
        cv::copyMakeBorder(resized_img, padded_img, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
        return padded_img;
    }

    //
    std::vector<Detection> postprocess(Ort::Value& output_tensor, float scale, int pad_w, int pad_h, int img_w, int img_h, float conf_threshold, float iou_threshold) {
        const auto output_shape = output_tensor.GetTensorTypeAndShapeInfo().GetShape();
        const float* raw_output = output_tensor.GetTensorData<float>();
        int num_classes = static_cast<int>(output_shape[1]) - 4;
        int num_proposals = static_cast<int>(output_shape[2]);
        std::vector<cv::Rect> boxes;
        std::vector<float> scores;
        std::vector<int> class_ids;
        cv::Mat raw_data_mat(num_classes + 4, num_proposals, CV_32F, (void*)raw_output);
        raw_data_mat = raw_data_mat.t();
        for (int i = 0; i < num_proposals; ++i) {
            const float* proposal = raw_data_mat.ptr<float>(i);
            const float* class_scores = proposal + 4;
            float max_score = 0.0f;
            int class_id = -1;
            for (int j = 0; j < num_classes; ++j) {
                if (class_scores[j] > max_score) {
                    max_score = class_scores[j];
                    class_id = j;
                }
            }
            if (max_score > conf_threshold) {
                float cx = proposal[0];
                float cy = proposal[1];
                float w = proposal[2];
                float h = proposal[3];
                int left = static_cast<int>((cx - w / 2 - (pad_w / 2.0f)) / scale);
                int top = static_cast<int>((cy - h / 2 - (pad_w / 2.0f)) / scale);
                int width = static_cast<int>(w / scale);
                int height = static_cast<int>(h / scale);
                left = std::max(0, std::min(left, img_w - 1));
                top = std::max(0, std::min(top, img_h - 1));
                width = std::min(width, img_w - left);
                height = std::min(height, img_h - top);
                boxes.push_back(cv::Rect(left, top, width, height));
                scores.push_back(max_score);
                class_ids.push_back(class_id);
            }
        }
        std::vector<int> nms_result;
        cv::dnn::NMSBoxes(boxes, scores, conf_threshold, iou_threshold, nms_result);
        std::vector<Detection> detections;
        for (int idx : nms_result) {
            detections.push_back({ class_ids[idx], scores[idx], boxes[idx].x, boxes[idx].y, boxes[idx].width, boxes[idx].height });
        }
        return detections;
    }
} //


extern "C" {

    // ========================================================================
    //
    // ========================================================================

    YOLO_API void* create_detector(
        const wchar_t* model_path,
        int input_width,
        int input_height)
    {
        try {
            //
            YoloDetector* detector = new YoloDetector(model_path, input_width, input_height);
            //
            return static_cast<void*>(detector);
        }
        catch (const Ort::Exception& e) {
            std::cerr << "ONNX Runtime Error creating detector: " << e.what() << std::endl;
            return nullptr;
        }
        catch (const std::exception& e) {
            std::cerr << "Error creating detector: " << e.what() << std::endl;
            return nullptr;
        }
    }

    YOLO_API void free_detector(void* detector_handle)
    {
        if (detector_handle) {
            YoloDetector* detector = static_cast<YoloDetector*>(detector_handle);
            delete detector;
        }
    }

    YOLO_API int perform_detection_on_session(
        void* detector_handle,
        unsigned char* image_bytes,
        int image_width,
        int image_height,
        Detection** out_detections,
        int* out_detections_count,
        float conf_threshold,
        float iou_threshold)
    {
        if (!detector_handle) return -1; //

        //
        YoloDetector* detector = static_cast<YoloDetector*>(detector_handle);

        try {
            //
            std::vector<Detection> detections = detector->detect(
                image_bytes, image_width, image_height,
                conf_threshold, iou_threshold
            );

            //
            *out_detections_count = static_cast<int>(detections.size());
            if (*out_detections_count > 0) {
                *out_detections = new Detection[*out_detections_count];
                std::copy(detections.begin(), detections.end(), *out_detections);
            }
            else {
                *out_detections = nullptr;
            }
            return 0; //
        }
        catch (const Ort::Exception& e) {
            std::cerr << "ONNX Runtime Error during detection: " << e.what() << std::endl;
            return -2;
        }
        catch (const std::exception& e) {
            std::cerr << "Error during detection: " << e.what() << std::endl;
            return -4;
        }
    }


    // ========================================================================
    //
    // ========================================================================

    YOLO_API int perform_detection(
        const wchar_t* model_path,
        unsigned char* image_bytes,
        int image_width,
        int image_height,
        Detection** out_detections,
        int* out_detections_count,
        const char** class_names,
        int class_names_count,
        float conf_threshold,
        float iou_threshold,
        int input_width,
        int input_height
    ) {
        //
        static Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8-ONNX-GPU");
        static std::unique_ptr<Ort::Session> session = nullptr;
        static std::wstring current_model_path = L"";

        try {
            if (!session || current_model_path != model_path) {
                Ort::SessionOptions session_options;
                OrtCUDAProviderOptions cuda_options;
                session_options.AppendExecutionProvider_CUDA(cuda_options);
                session = std::make_unique<Ort::Session>(env, model_path, session_options);
                current_model_path = model_path;
            }

            std::vector<int64_t> input_shape = { 1, 3, input_height, input_width };
            Ort::AllocatorWithDefaultOptions allocator;
            std::string input_name_str = session->GetInputNameAllocated(0, allocator).get();
            std::vector<const char*> input_node_names = { input_name_str.c_str() };
            std::string output_name_str = session->GetOutputNameAllocated(0, allocator).get();
            std::vector<const char*> output_node_names = { output_name_str.c_str() };

            cv::Mat image(image_height, image_width, CV_8UC3, image_bytes);
            if (image.empty()) return -1;

            int pad_w, pad_h;
            float scale;
            cv::Mat preprocessed_img = preprocess(image, input_width, input_height, pad_w, pad_h, scale);

            cv::Mat blob;
            cv::dnn::blobFromImage(preprocessed_img, blob, 1 / 255.0, cv::Size(), cv::Scalar(), true, false);

            auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
            Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, blob.ptr<float>(), blob.total(), input_shape.data(), input_shape.size());
            auto output_tensors = session->Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);

            std::vector<Detection> detections = postprocess(output_tensors[0], scale, pad_w, pad_h, image_width, image_height, conf_threshold, iou_threshold);

            *out_detections_count = static_cast<int>(detections.size());
            if (*out_detections_count > 0) {
                *out_detections = new Detection[*out_detections_count];
                std::copy(detections.begin(), detections.end(), *out_detections);
            }
            else {
                *out_detections = nullptr;
            }
        }
        catch (const Ort::Exception& e) {
            std::cerr << "ONNX Runtime <20>쳣: " << e.what() << std::endl;
            return -2;
        }
        catch (const cv::Exception& e) {
            std::cerr << "OpenCV <20>쳣: " << e.what() << std::endl;
            return -3;
        }
        catch (const std::exception& e) {
            std::cerr << "<EFBFBD><EFBFBD>׼<EFBFBD><EFBFBD><EFBFBD>쳣: " << e.what() << std::endl;
            return -4;
        }
        return 0;
    }

    //
    YOLO_API void free_memory(Detection* detections) {
        delete[] detections;
    }

    YOLO_API void draw_and_encode_image(
        unsigned char* image_bytes,
        int image_width,
        int image_height,
        const Detection* detections,
        int detections_count,
        const char** class_names,
        int class_names_count,
        unsigned char** out_image_bytes,
        int* out_image_size) {

        cv::Mat image(image_height, image_width, CV_8UC3, image_bytes);
        if (image.empty()) {
            *out_image_bytes = nullptr;
            *out_image_size = 0;
            return;
        }

        for (int i = 0; i < detections_count; ++i) {
            const auto& d = detections[i];
            cv::rectangle(image, cv::Rect(d.x, d.y, d.width, d.height), cv::Scalar(0, 255, 0), 2);
            std::string label = "Unknown";
            if (d.class_id >= 0 && d.class_id < class_names_count) {
                label = class_names[d.class_id];
            }
            label += " " + std::to_string(d.score).substr(0, 4);
            cv::putText(image, label, cv::Point(d.x, d.y - 10), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 255, 0), 2);
        }

        std::vector<unsigned char> buf;
        cv::imencode(".jpg", image, buf);
        *out_image_size = static_cast<int>(buf.size());
        *out_image_bytes = new unsigned char[*out_image_size];
        std::copy(buf.begin(), buf.end(), *out_image_bytes);
    }

    YOLO_API void free_image_memory(unsigned char* image_bytes) {
        delete[] image_bytes;
    }
}