#include "pch.h" #include "Yolo_ONNX.h" // #include #include #include #include #include #include #include #include // namespace { cv::Mat preprocess(const cv::Mat& img, int target_width, int target_height, int& pad_w, int& pad_h, float& scale); std::vector postprocess(Ort::Value& output_tensor, float scale, int pad_w, int pad_h, int img_w, int img_h, float conf_threshold, float iou_threshold); class YoloDetector { public: // Ort::Env env; std::unique_ptr session; // int input_width = 0; int input_height = 0; // Ort::AllocatorWithDefaultOptions allocator; std::string input_name_str; std::string output_name_str; std::vector input_node_names; std::vector output_node_names; public: /** * @brief */ YoloDetector(const wchar_t* model_path, int in_width, int in_height) : env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8-ONNX-GPU"), input_width(in_width), input_height(in_height) { // Ort::SessionOptions session_options; OrtCUDAProviderOptions cuda_options; // session_options.AppendExecutionProvider_CUDA(cuda_options); // session = std::make_unique(env, model_path, session_options); // // input_name_str = session->GetInputNameAllocated(0, allocator).get(); output_name_str = session->GetOutputNameAllocated(0, allocator).get(); input_node_names.push_back(input_name_str.c_str()); output_node_names.push_back(output_name_str.c_str()); } /** * @brief */ std::vector detect( unsigned char* image_bytes, int image_width, int image_height, float conf_threshold, float iou_threshold) { // cv::Mat image(image_height, image_width, CV_8UC3, image_bytes); if (image.empty()) { throw std::runtime_error("Input image is empty."); } // int pad_w, pad_h; float scale; cv::Mat preprocessed_img = preprocess(image, input_width, input_height, pad_w, pad_h, scale); // // cv::Mat blob; cv::dnn::blobFromImage(preprocessed_img, blob, 1 / 255.0, cv::Size(), cv::Scalar(), true, false); std::vector input_shape = { 1, 3, (int64_t)input_height, (int64_t)input_width }; std::cout <<"input shape: " << input_shape[0] << "," << input_shape[1] << "," << input_shape[2] << "," << input_shape[3] << "," << std::endl; // auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, blob.ptr(), blob.total(), input_shape.data(), input_shape.size()); // auto output_tensors = session->Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1); // return postprocess(output_tensors[0], scale, pad_w, pad_h, image_width, image_height, conf_threshold, iou_threshold); } }; // ======================================================================== // // ======================================================================== // cv::Mat preprocess(const cv::Mat& img, int target_width, int target_height, int& pad_w, int& pad_h, float& scale) { cv::Mat resized_img; int w = img.cols; int h = img.rows; scale = std::min(static_cast(target_width) / w, static_cast(target_height) / h); int new_w = static_cast(w * scale); int new_h = static_cast(h * scale); cv::resize(img, resized_img, cv::Size(new_w, new_h), 0, 0, cv::INTER_AREA); pad_w = target_width - new_w; // pad_h = target_height - new_h; // // int top = pad_h / 2; int bottom = pad_h - top; int left = pad_w / 2; int right = pad_w - left; // cv::Mat padded_img; // cv::copyMakeBorder(resized_img, padded_img, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114)); return padded_img; } // std::vector postprocess(Ort::Value& output_tensor, float scale, int pad_w, int pad_h, int img_w, int img_h, float conf_threshold, float iou_threshold) { const auto output_shape = output_tensor.GetTensorTypeAndShapeInfo().GetShape(); const float* raw_output = output_tensor.GetTensorData(); int num_classes = static_cast(output_shape[1]) - 4; int num_proposals = static_cast(output_shape[2]); std::vector boxes; std::vector scores; std::vector class_ids; cv::Mat raw_data_mat(num_classes + 4, num_proposals, CV_32F, (void*)raw_output); raw_data_mat = raw_data_mat.t(); for (int i = 0; i < num_proposals; ++i) { const float* proposal = raw_data_mat.ptr(i); const float* class_scores = proposal + 4; float max_score = 0.0f; int class_id = -1; for (int j = 0; j < num_classes; ++j) { if (class_scores[j] > max_score) { max_score = class_scores[j]; class_id = j; } } if (max_score > conf_threshold) { float cx = proposal[0]; float cy = proposal[1]; float w = proposal[2]; float h = proposal[3]; int left = static_cast((cx - w / 2 - (pad_w / 2.0f)) / scale); int top = static_cast((cy - h / 2 - (pad_w / 2.0f)) / scale); int width = static_cast(w / scale); int height = static_cast(h / scale); left = std::max(0, std::min(left, img_w - 1)); top = std::max(0, std::min(top, img_h - 1)); width = std::min(width, img_w - left); height = std::min(height, img_h - top); boxes.push_back(cv::Rect(left, top, width, height)); scores.push_back(max_score); class_ids.push_back(class_id); } } std::vector nms_result; cv::dnn::NMSBoxes(boxes, scores, conf_threshold, iou_threshold, nms_result); std::vector detections; for (int idx : nms_result) { detections.push_back({ class_ids[idx], scores[idx], boxes[idx].x, boxes[idx].y, boxes[idx].width, boxes[idx].height }); } return detections; } } // extern "C" { // ======================================================================== // // ======================================================================== YOLO_API void* create_detector( const wchar_t* model_path, int input_width, int input_height) { try { // YoloDetector* detector = new YoloDetector(model_path, input_width, input_height); // return static_cast(detector); } catch (const Ort::Exception& e) { std::cerr << "ONNX Runtime Error creating detector: " << e.what() << std::endl; return nullptr; } catch (const std::exception& e) { std::cerr << "Error creating detector: " << e.what() << std::endl; return nullptr; } } YOLO_API void free_detector(void* detector_handle) { if (detector_handle) { YoloDetector* detector = static_cast(detector_handle); delete detector; } } YOLO_API int perform_detection_on_session( void* detector_handle, unsigned char* image_bytes, int image_width, int image_height, Detection** out_detections, int* out_detections_count, float conf_threshold, float iou_threshold) { if (!detector_handle) return -1; // // YoloDetector* detector = static_cast(detector_handle); try { // std::vector detections = detector->detect( image_bytes, image_width, image_height, conf_threshold, iou_threshold ); // *out_detections_count = static_cast(detections.size()); if (*out_detections_count > 0) { *out_detections = new Detection[*out_detections_count]; std::copy(detections.begin(), detections.end(), *out_detections); } else { *out_detections = nullptr; } return 0; // } catch (const Ort::Exception& e) { std::cerr << "ONNX Runtime Error during detection: " << e.what() << std::endl; return -2; } catch (const std::exception& e) { std::cerr << "Error during detection: " << e.what() << std::endl; return -4; } } // ======================================================================== // // ======================================================================== YOLO_API int perform_detection( const wchar_t* model_path, unsigned char* image_bytes, int image_width, int image_height, Detection** out_detections, int* out_detections_count, const char** class_names, int class_names_count, float conf_threshold, float iou_threshold, int input_width, int input_height ) { // static Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8-ONNX-GPU"); static std::unique_ptr session = nullptr; static std::wstring current_model_path = L""; try { if (!session || current_model_path != model_path) { Ort::SessionOptions session_options; OrtCUDAProviderOptions cuda_options; session_options.AppendExecutionProvider_CUDA(cuda_options); session = std::make_unique(env, model_path, session_options); current_model_path = model_path; } std::vector input_shape = { 1, 3, input_height, input_width }; Ort::AllocatorWithDefaultOptions allocator; std::string input_name_str = session->GetInputNameAllocated(0, allocator).get(); std::vector input_node_names = { input_name_str.c_str() }; std::string output_name_str = session->GetOutputNameAllocated(0, allocator).get(); std::vector output_node_names = { output_name_str.c_str() }; cv::Mat image(image_height, image_width, CV_8UC3, image_bytes); if (image.empty()) return -1; int pad_w, pad_h; float scale; cv::Mat preprocessed_img = preprocess(image, input_width, input_height, pad_w, pad_h, scale); cv::Mat blob; cv::dnn::blobFromImage(preprocessed_img, blob, 1 / 255.0, cv::Size(), cv::Scalar(), true, false); auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, blob.ptr(), blob.total(), input_shape.data(), input_shape.size()); auto output_tensors = session->Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1); std::vector detections = postprocess(output_tensors[0], scale, pad_w, pad_h, image_width, image_height, conf_threshold, iou_threshold); *out_detections_count = static_cast(detections.size()); if (*out_detections_count > 0) { *out_detections = new Detection[*out_detections_count]; std::copy(detections.begin(), detections.end(), *out_detections); } else { *out_detections = nullptr; } } catch (const Ort::Exception& e) { std::cerr << "ONNX Runtime 异常: " << e.what() << std::endl; return -2; } catch (const cv::Exception& e) { std::cerr << "OpenCV 异常: " << e.what() << std::endl; return -3; } catch (const std::exception& e) { std::cerr << "标准库异常: " << e.what() << std::endl; return -4; } return 0; } // YOLO_API void free_memory(Detection* detections) { delete[] detections; } YOLO_API void draw_and_encode_image( unsigned char* image_bytes, int image_width, int image_height, const Detection* detections, int detections_count, const char** class_names, int class_names_count, unsigned char** out_image_bytes, int* out_image_size) { cv::Mat image(image_height, image_width, CV_8UC3, image_bytes); if (image.empty()) { *out_image_bytes = nullptr; *out_image_size = 0; return; } for (int i = 0; i < detections_count; ++i) { const auto& d = detections[i]; cv::rectangle(image, cv::Rect(d.x, d.y, d.width, d.height), cv::Scalar(0, 255, 0), 2); std::string label = "Unknown"; if (d.class_id >= 0 && d.class_id < class_names_count) { label = class_names[d.class_id]; } label += " " + std::to_string(d.score).substr(0, 4); cv::putText(image, label, cv::Point(d.x, d.y - 10), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 255, 0), 2); } std::vector buf; cv::imencode(".jpg", image, buf); *out_image_size = static_cast(buf.size()); *out_image_bytes = new unsigned char[*out_image_size]; std::copy(buf.begin(), buf.end(), *out_image_bytes); } YOLO_API void free_image_memory(unsigned char* image_bytes) { delete[] image_bytes; } }