217 lines
8.7 KiB
C++
217 lines
8.7 KiB
C++
|
|
// YoloV8_ONNX.cpp (<28><><EFBFBD><EFBFBD><EFBFBD>Ż<EFBFBD><C5BB><EFBFBD>)
|
|||
|
|
|
|||
|
|
#include "pch.h"
|
|||
|
|
#include "Yolo_ONNX.h"
|
|||
|
|
|
|||
|
|
#include <onnxruntime_c_api.h>
|
|||
|
|
#include <onnxruntime_cxx_api.h>
|
|||
|
|
#include <opencv2/opencv.hpp>
|
|||
|
|
#include <vector>
|
|||
|
|
#include <string>
|
|||
|
|
#include <iostream>
|
|||
|
|
#include <memory>
|
|||
|
|
|
|||
|
|
namespace {
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD>Ż<EFBFBD><C5BB><EFBFBD>Ԥ<EFBFBD><D4A4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
cv::Mat preprocess(const cv::Mat& img, int target_width, int target_height, int& pad_w, int& pad_h, float& scale) {
|
|||
|
|
cv::Mat resized_img;
|
|||
|
|
int w = img.cols;
|
|||
|
|
int h = img.rows;
|
|||
|
|
scale = std::min(static_cast<float>(target_width) / w, static_cast<float>(target_height) / h);
|
|||
|
|
int new_w = static_cast<int>(w * scale);
|
|||
|
|
int new_h = static_cast<int>(h * scale);
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD>Ż<EFBFBD><C5BB><EFBFBD>ʹ<EFBFBD><CAB9> INTER_AREA <20><>ֵ<EFBFBD>㷨<EFBFBD><E3B7A8><EFBFBD><EFBFBD><EFBFBD>ʺ<EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD><EFBFBD>С<EFBFBD><D0A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Python<6F><6E><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD>ӽ<EFBFBD>
|
|||
|
|
cv::resize(img, resized_img, cv::Size(new_w, new_h), 0, 0, cv::INTER_AREA);
|
|||
|
|
|
|||
|
|
pad_w = target_width - new_w;
|
|||
|
|
pad_h = target_height - new_h;
|
|||
|
|
cv::Mat padded_img;
|
|||
|
|
cv::copyMakeBorder(resized_img, padded_img, 0, pad_h, 0, pad_w, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
|
|||
|
|
return padded_img;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
std::vector<Detection> postprocess(Ort::Value& output_tensor, float scale, int pad_w, int pad_h, int img_w, int img_h, float conf_threshold, float iou_threshold) {
|
|||
|
|
const auto output_shape = output_tensor.GetTensorTypeAndShapeInfo().GetShape();
|
|||
|
|
const float* raw_output = output_tensor.GetTensorData<float>();
|
|||
|
|
int num_classes = static_cast<int>(output_shape[1]) - 4;
|
|||
|
|
int num_proposals = static_cast<int>(output_shape[2]);
|
|||
|
|
|
|||
|
|
std::vector<cv::Rect> boxes;
|
|||
|
|
std::vector<float> scores;
|
|||
|
|
std::vector<int> class_ids;
|
|||
|
|
|
|||
|
|
cv::Mat raw_data_mat(num_classes + 4, num_proposals, CV_32F, (void*)raw_output);
|
|||
|
|
raw_data_mat = raw_data_mat.t();
|
|||
|
|
|
|||
|
|
for (int i = 0; i < num_proposals; ++i) {
|
|||
|
|
const float* proposal = raw_data_mat.ptr<float>(i);
|
|||
|
|
const float* class_scores = proposal + 4;
|
|||
|
|
float max_score = 0.0f;
|
|||
|
|
int class_id = -1;
|
|||
|
|
for (int j = 0; j < num_classes; ++j) {
|
|||
|
|
if (class_scores[j] > max_score) {
|
|||
|
|
max_score = class_scores[j];
|
|||
|
|
class_id = j;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if (max_score > conf_threshold) {
|
|||
|
|
float cx = proposal[0];
|
|||
|
|
float cy = proposal[1];
|
|||
|
|
float w = proposal[2];
|
|||
|
|
float h = proposal[3];
|
|||
|
|
int left = static_cast<int>((cx - w / 2 - (pad_w / 2.0f)) / scale);
|
|||
|
|
int top = static_cast<int>((cy - h / 2 - (pad_h / 2.0f)) / scale);
|
|||
|
|
int width = static_cast<int>(w / scale);
|
|||
|
|
int height = static_cast<int>(h / scale);
|
|||
|
|
left = std::max(0, std::min(left, img_w - 1));
|
|||
|
|
top = std::max(0, std::min(top, img_h - 1));
|
|||
|
|
width = std::min(width, img_w - left);
|
|||
|
|
height = std::min(height, img_h - top);
|
|||
|
|
boxes.push_back(cv::Rect(left, top, width, height));
|
|||
|
|
scores.push_back(max_score);
|
|||
|
|
class_ids.push_back(class_id);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
std::vector<int> nms_result;
|
|||
|
|
cv::dnn::NMSBoxes(boxes, scores, conf_threshold, iou_threshold, nms_result);
|
|||
|
|
std::vector<Detection> detections;
|
|||
|
|
for (int idx : nms_result) {
|
|||
|
|
detections.push_back({ class_ids[idx], scores[idx], boxes[idx].x, boxes[idx].y, boxes[idx].width, boxes[idx].height });
|
|||
|
|
}
|
|||
|
|
return detections;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
extern "C" {
|
|||
|
|
// <20><><EFBFBD>ġ<DEB8><C4A1><EFBFBD><EFBFBD><EFBFBD>ǩ<EFBFBD><C7A9><EFBFBD><EFBFBD><EFBFBD>£<EFBFBD><C2A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8>²<EFBFBD><C2B2><EFBFBD>
|
|||
|
|
YOLO_API int perform_detection(
|
|||
|
|
const wchar_t* model_path,
|
|||
|
|
unsigned char* image_bytes,
|
|||
|
|
int image_width,
|
|||
|
|
int image_height,
|
|||
|
|
Detection** out_detections,
|
|||
|
|
int* out_detections_count,
|
|||
|
|
const char** class_names,
|
|||
|
|
int class_names_count,
|
|||
|
|
float conf_threshold, // ʹ<>ô<EFBFBD><C3B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŷ<EFBFBD><C5B6><EFBFBD>ֵ
|
|||
|
|
float iou_threshold, // ʹ<>ô<EFBFBD><C3B4><EFBFBD><EFBFBD><EFBFBD>IOU<4F><55>ֵ
|
|||
|
|
int input_width, // ʹ<>ô<EFBFBD><C3B4><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD><C4A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
int input_height // ʹ<>ô<EFBFBD><C3B4><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD><C4A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>߶<EFBFBD>
|
|||
|
|
) {
|
|||
|
|
static Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8-ONNX-GPU");
|
|||
|
|
static std::unique_ptr<Ort::Session> session = nullptr;
|
|||
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ж<EFBFBD>ģ<EFBFBD><C4A3><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD>¼<EFBFBD><C2BC>صı<D8B5><C4B1><EFBFBD>
|
|||
|
|
static std::wstring current_model_path = L"";
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
// <20><><EFBFBD><EFBFBD>ģ<EFBFBD><C4A3>·<EFBFBD><C2B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>仯<EFBFBD><E4BBAF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>´<EFBFBD><C2B4><EFBFBD>Session
|
|||
|
|
if (!session || current_model_path != model_path) {
|
|||
|
|
Ort::SessionOptions session_options;
|
|||
|
|
OrtCUDAProviderOptions cuda_options;
|
|||
|
|
session_options.AppendExecutionProvider_CUDA(cuda_options);
|
|||
|
|
session = std::make_unique<Ort::Session>(env, model_path, session_options);
|
|||
|
|
current_model_path = model_path;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD>ġ<DEB8><C4A1>Ƴ<EFBFBD>Ӳ<EFBFBD><D3B2><EFBFBD><EFBFBD><EFBFBD>ijߴ磬ʹ<E7A3AC>ýӿڴ<D3BF><DAB4><EFBFBD><EFBFBD>IJ<EFBFBD><C4B2><EFBFBD>
|
|||
|
|
std::vector<int64_t> input_shape = { 1, 3, input_height, input_width };
|
|||
|
|
|
|||
|
|
Ort::AllocatorWithDefaultOptions allocator;
|
|||
|
|
std::string input_name_str = session->GetInputNameAllocated(0, allocator).get();
|
|||
|
|
std::vector<const char*> input_node_names = { input_name_str.c_str() };
|
|||
|
|
|
|||
|
|
std::string output_name_str = session->GetOutputNameAllocated(0, allocator).get();
|
|||
|
|
std::vector<const char*> output_node_names = { output_name_str.c_str() };
|
|||
|
|
|
|||
|
|
cv::Mat image(image_height, image_width, CV_8UC3, image_bytes);
|
|||
|
|
if (image.empty()) return -1;
|
|||
|
|
|
|||
|
|
int pad_w, pad_h;
|
|||
|
|
float scale;
|
|||
|
|
// <20><><EFBFBD>ġ<DEB8>ʹ<EFBFBD>ýӿڴ<D3BF><DAB4><EFBFBD><EFBFBD>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ԥ<EFBFBD><D4A4><EFBFBD><EFBFBD>
|
|||
|
|
cv::Mat preprocessed_img = preprocess(image, input_width, input_height, pad_w, pad_h, scale);
|
|||
|
|
|
|||
|
|
cv::Mat blob;
|
|||
|
|
cv::dnn::blobFromImage(preprocessed_img, blob, 1 / 255.0, cv::Size(), cv::Scalar(), false, false);
|
|||
|
|
|
|||
|
|
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
|||
|
|
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, blob.ptr<float>(), blob.total(), input_shape.data(), input_shape.size());
|
|||
|
|
auto output_tensors = session->Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD>ġ<DEB8>ʹ<EFBFBD>ýӿڴ<D3BF><DAB4><EFBFBD><EFBFBD>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD>к<EFBFBD><D0BA><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
std::vector<Detection> detections = postprocess(output_tensors[0], scale, pad_w, pad_h, image_width, image_height, conf_threshold, iou_threshold);
|
|||
|
|
|
|||
|
|
*out_detections_count = static_cast<int>(detections.size());
|
|||
|
|
if (*out_detections_count > 0) {
|
|||
|
|
*out_detections = new Detection[*out_detections_count];
|
|||
|
|
std::copy(detections.begin(), detections.end(), *out_detections);
|
|||
|
|
}
|
|||
|
|
else {
|
|||
|
|
*out_detections = nullptr;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
catch (const Ort::Exception& e) {
|
|||
|
|
std::cerr << "ONNX Runtime <20>쳣: " << e.what() << std::endl;
|
|||
|
|
return -2;
|
|||
|
|
}
|
|||
|
|
catch (const cv::Exception& e) {
|
|||
|
|
std::cerr << "OpenCV <20>쳣: " << e.what() << std::endl;
|
|||
|
|
return -3;
|
|||
|
|
}
|
|||
|
|
catch (const std::exception& e) {
|
|||
|
|
std::cerr << "<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>쳣: " << e.what() << std::endl;
|
|||
|
|
return -4;
|
|||
|
|
}
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// <20><><EFBFBD>º<EFBFBD><C2BA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֲ<EFBFBD><D6B2><EFBFBD>
|
|||
|
|
YOLO_API void free_memory(Detection* detections) {
|
|||
|
|
delete[] detections;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
YOLO_API void draw_and_encode_image(
|
|||
|
|
unsigned char* image_bytes,
|
|||
|
|
int image_width,
|
|||
|
|
int image_height,
|
|||
|
|
const Detection* detections,
|
|||
|
|
int detections_count,
|
|||
|
|
const char** class_names,
|
|||
|
|
int class_names_count,
|
|||
|
|
unsigned char** out_image_bytes,
|
|||
|
|
int* out_image_size) {
|
|||
|
|
|
|||
|
|
cv::Mat image(image_height, image_width, CV_8UC3, image_bytes);
|
|||
|
|
if (image.empty()) {
|
|||
|
|
*out_image_bytes = nullptr;
|
|||
|
|
*out_image_size = 0;
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for (int i = 0; i < detections_count; ++i) {
|
|||
|
|
const auto& d = detections[i];
|
|||
|
|
cv::rectangle(image, cv::Rect(d.x, d.y, d.width, d.height), cv::Scalar(0, 255, 0), 2);
|
|||
|
|
std::string label = "Unknown";
|
|||
|
|
if (d.class_id >= 0 && d.class_id < class_names_count) {
|
|||
|
|
label = class_names[d.class_id];
|
|||
|
|
}
|
|||
|
|
label += " " + std::to_string(d.score).substr(0, 4);
|
|||
|
|
cv::putText(image, label, cv::Point(d.x, d.y - 10), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 255, 0), 2);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
std::vector<unsigned char> buf;
|
|||
|
|
cv::imencode(".jpg", image, buf);
|
|||
|
|
*out_image_size = static_cast<int>(buf.size());
|
|||
|
|
*out_image_bytes = new unsigned char[*out_image_size];
|
|||
|
|
std::copy(buf.begin(), buf.end(), *out_image_bytes);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
YOLO_API void free_image_memory(unsigned char* image_bytes) {
|
|||
|
|
delete[] image_bytes;
|
|||
|
|
}
|
|||
|
|
}
|