From 9f6472c2197b7e69147d7b9e68df7c22059ae768 Mon Sep 17 00:00:00 2001 From: GuanYuankai Date: Sun, 4 Jan 2026 15:47:02 +0800 Subject: [PATCH] =?UTF-8?q?feat(video=20Service):=20=E5=AE=8C=E6=88=90?= =?UTF-8?q?=E7=AC=AC=E4=B8=80=E7=89=88=E7=AE=97=E6=B3=95=E7=9A=84=E9=83=A8?= =?UTF-8?q?=E7=BD=B2=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 + src/videoService/video_pipeline.cpp | 95 ++++---- src/videoService/video_pipeline.hpp | 14 +- src/yoloDetector/postprocess.cc | 324 ++++++++++++++++++++++++++++ src/yoloDetector/postprocess.h | 68 ++++++ src/yoloDetector/yolo_detector.cpp | 289 +++++++++++++++++++++++++ src/yoloDetector/yolo_detector.hpp | 57 +++++ 7 files changed, 808 insertions(+), 41 deletions(-) create mode 100644 src/yoloDetector/postprocess.cc create mode 100644 src/yoloDetector/postprocess.h create mode 100644 src/yoloDetector/yolo_detector.cpp create mode 100644 src/yoloDetector/yolo_detector.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fbc908..61a8ed2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,8 @@ add_library(vehicle_road_lib STATIC src/alarm/alarm_service.cc #新视频模块 src/videoService/video_pipeline.cpp + src/yoloDetector/postprocess.cc + src/yoloDetector/yolo_detector.cpp ) target_include_directories(vehicle_road_lib PUBLIC diff --git a/src/videoService/video_pipeline.cpp b/src/videoService/video_pipeline.cpp index cf7c305..fc3740e 100644 --- a/src/videoService/video_pipeline.cpp +++ b/src/videoService/video_pipeline.cpp @@ -2,7 +2,19 @@ #include -VideoPipeline::VideoPipeline() : running_(false) {} +VideoPipeline::VideoPipeline() : running_(false) { + // [新增] 实例化检测器并加载模型 + detector_ = std::make_unique(); + + // 模型路径写死为 models/vehicle_model.rknn + int ret = detector_->init("../models/vehicle_model.rknn"); + if (ret != 0) { + spdlog::error("Failed to initialize YoloDetector with model: models/vehicle_model.rknn"); + // 注意:这里如果初始化失败,后续 detect 会直接返回空结果,建议检查日志 + } else { + spdlog::info("YoloDetector initialized successfully."); + } +} VideoPipeline::~VideoPipeline() { Stop(); @@ -37,43 +49,33 @@ void VideoPipeline::Stop() { spdlog::info("VideoPipeline Stopped."); } -std::vector VideoPipeline::mockInference(const cv::Mat& frame) { - std::vector results; - static int dummyX = 100; - static int direction = 5; - - // 简单的移动逻辑,模拟每帧的变化 - dummyX += direction; - if (dummyX > frame.cols - 200 || dummyX < 0) - direction *= -1; - - DetectionResult res; - res.x = dummyX; - res.y = 200; - res.width = 150; - res.height = 300; - res.label = "TEST_CLIP"; - res.confidence = 0.95f; - - results.push_back(res); - return results; -} +// [删除] mockInference 函数已移除 void VideoPipeline::drawOverlay(cv::Mat& frame, const std::vector& results) { for (const auto& res : results) { - cv::rectangle(frame, cv::Rect(res.x, res.y, res.width, res.height), cv::Scalar(0, 255, 0), - 2); + // 根据不同类别使用不同颜色 (示例: 0为红色, 1为绿色) + cv::Scalar color = (res.class_id == 0) ? cv::Scalar(0, 0, 255) : cv::Scalar(0, 255, 0); + + cv::rectangle(frame, cv::Rect(res.x, res.y, res.width, res.height), color, 2); + std::string text = res.label + " " + std::to_string(res.confidence).substr(0, 4); - cv::putText(frame, text, cv::Point(res.x, res.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.6, - cv::Scalar(0, 255, 0), 2); + + // 简单的防止文字跑出边界的处理 + int y_pos = res.y - 5; + if (y_pos < 10) + y_pos = res.y + 15; + + cv::putText(frame, text, cv::Point(res.x, y_pos), cv::FONT_HERSHEY_SIMPLEX, 0.6, color, 2); } - cv::putText(frame, "RK3588 H.264 @ 20FPS", cv::Point(20, 50), cv::FONT_HERSHEY_SIMPLEX, 1.0, - cv::Scalar(0, 0, 255), 2); + + // 添加水印 + cv::putText(frame, "RK3588 YOLOv8 Live", cv::Point(20, 50), cv::FONT_HERSHEY_SIMPLEX, 1.0, + cv::Scalar(0, 255, 255), 2); } void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, bool isFileSource) { cv::VideoCapture cap; - cap.open(inputUrl); // 文件路径也是通过 open 打开 + cap.open(inputUrl); if (!cap.isOpened()) { spdlog::error("Failed to open input: {}", inputUrl); @@ -81,8 +83,8 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo return; } - const double TARGET_FPS = 20.0; - // 计算每帧的目标耗时 (毫秒) -> 1000 / 20 = 50ms + // 降低一点目标 FPS,因为推理需要时间。RK3588 NPU 很快,但 25/30 FPS 比较稳妥 + const double TARGET_FPS = 30.0; const double FRAME_DURATION_MS = 1000.0 / TARGET_FPS; int width = cap.get(cv::CAP_PROP_FRAME_WIDTH); @@ -92,10 +94,11 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo isFileSource ? "FILE LOOP" : "LIVE STREAM"); std::stringstream pipeline; + // 注意:推流分辨率这里保持和输入一致,或者可以 resize pipeline << "appsrc ! " << "videoconvert ! " << "video/x-raw,format=NV12,width=" << width << ",height=" << height - << ",framerate=20/1 ! " + << ",framerate=" << (int)TARGET_FPS << "/1 ! " << "mpph264enc ! " << "h264parse ! " << "rtspclientsink location=" << outputUrl << " protocols=tcp"; @@ -108,9 +111,8 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo } cv::Mat frame; - + long frame_count = 0; while (running_) { - // 记录循环开始时间 auto loop_start = std::chrono::steady_clock::now(); if (!cap.read(frame)) { @@ -130,17 +132,34 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo if (frame.empty()) continue; - // 1. 算法处理 - auto results = mockInference(frame); + // === 1. 真实 RKNN 推理 === + // detect 内部已经包含了预处理、NPU推理、后处理和坐标还原 + std::vector results = detector_->detect(frame); - // 2. 绘制叠加 + frame_count++; + // spdlog::info("frame count {}", frame_count); + if (!results.empty()) { + spdlog::info("Frame {}: Detected {} objects", frame_count, results.size()); + for (const auto& res : results) { + spdlog::info(" -> [Class: {} ({})] Conf: {:.2f} Box: [{}, {}, {}x{}]", + res.class_id, res.label, res.confidence, res.x, res.y, res.width, + res.height); + } + } else { + // 如果连续没有检测到,每隔一秒(20帧)提示一次,确认代码还在跑 + if (frame_count % 20 == 0) { + spdlog::debug("Frame {}: No objects detected.", frame_count); + } + } + // === 2. 绘制叠加 === drawOverlay(frame, results); - // 3. 推流 + // === 3. 推流 === if (writer.isOpened()) { writer.write(frame); } + // 文件模式下控制播放速度,直播流则不需要 wait if (isFileSource) { auto loop_end = std::chrono::steady_clock::now(); std::chrono::duration elapsed = loop_end - loop_start; diff --git a/src/videoService/video_pipeline.hpp b/src/videoService/video_pipeline.hpp index f46dc00..7d1bd37 100644 --- a/src/videoService/video_pipeline.hpp +++ b/src/videoService/video_pipeline.hpp @@ -1,14 +1,17 @@ #pragma once #include +#include // [新增] 用于 unique_ptr #include #include #include #include #include "spdlog/spdlog.h" +#include "yoloDetector/yolo_detector.hpp" // [新增] 包含检测器头文件 (同时也包含了 DetectionResult 定义) -// 模拟的检测结果结构体(对应未来的 YOLO 结果) +// [删除] 移除此处定义的 DetectionResult,直接使用 yolo_detector.hpp 中的定义 +/* struct DetectionResult { int x; int y; @@ -17,6 +20,7 @@ struct DetectionResult { std::string label; float confidence; }; +*/ class VideoPipeline { public: @@ -33,12 +37,16 @@ public: private: void processLoop(std::string inputUrl, std::string outputUrl, bool isFileSource); - // 占位算法函数:模拟推理 - std::vector mockInference(const cv::Mat& frame); + // [删除] 移除模拟推理函数 + // std::vector mockInference(const cv::Mat& frame); // 绘图函数 void drawOverlay(cv::Mat& frame, const std::vector& results); +private: std::atomic running_; std::thread processingThread_; + + // [新增] YOLO 检测器实例 + std::unique_ptr detector_; }; diff --git a/src/yoloDetector/postprocess.cc b/src/yoloDetector/postprocess.cc new file mode 100644 index 0000000..8bd5c24 --- /dev/null +++ b/src/yoloDetector/postprocess.cc @@ -0,0 +1,324 @@ +#include "postprocess.h" +#include +#include +#include +#include +#include +#include +#include +#include + +static const char *labels[] = { + "fuel_car", + "new_energy_car" +}; + +inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; } + +static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1, float ymax1) +{ + float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0); + float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0); + float i = w * h; + float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i; + return u <= 0.f ? 0.f : (i / u); +} + +static int nms(int validCount, std::vector &outputLocations, std::vector classIds, std::vector &order, int filterId, float threshold) +{ + for (int i = 0; i < validCount; ++i) + { + int n = order[i]; + if (n == -1 || classIds[n] != filterId) continue; + for (int j = i + 1; j < validCount; ++j) + { + int m = order[j]; + if (m == -1 || classIds[m] != filterId) continue; + float xmin0 = outputLocations[n * 4 + 0]; + float ymin0 = outputLocations[n * 4 + 1]; + float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2]; + float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3]; + + float xmin1 = outputLocations[m * 4 + 0]; + float ymin1 = outputLocations[m * 4 + 1]; + float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2]; + float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3]; + + float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1); + + if (iou > threshold) order[j] = -1; + } + } + return 0; +} + +static int quick_sort_indice_inverse(std::vector &input, int left, int right, std::vector &indices) +{ + float key; + int key_index; + int low = left; + int high = right; + if (left < right) + { + key_index = indices[left]; + key = input[left]; + while (low < high) + { + while (low < high && input[high] <= key) high--; + input[low] = input[high]; + indices[low] = indices[high]; + while (low < high && input[low] >= key) low++; + input[high] = input[low]; + indices[high] = indices[low]; + } + input[low] = key; + indices[low] = key_index; + quick_sort_indice_inverse(input, left, low - 1, indices); + quick_sort_indice_inverse(input, low + 1, right, indices); + } + return low; +} + +static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); } +static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); } +inline static int32_t __clip(float val, float min, float max) { float f = val <= min ? min : (val >= max ? max : val); return f; } +static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale) { float dst_val = (f32 / scale) + zp; int8_t res = (int8_t)__clip(dst_val, -128, 127); return res; } +static uint8_t qnt_f32_to_affine_u8(float f32, int32_t zp, float scale) { float dst_val = (f32 / scale) + zp; uint8_t res = (uint8_t)__clip(dst_val, 0, 255); return res; } +static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; } +static float deqnt_affine_u8_to_f32(uint8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; } + +static void compute_dfl(float* tensor, int dfl_len, float* box){ + for (int b=0; b<4; b++){ + float exp_t[dfl_len]; + float exp_sum=0; + float acc_sum=0; + for (int i=0; i< dfl_len; i++){ + exp_t[i] = exp(tensor[i+b*dfl_len]); + exp_sum += exp_t[i]; + } + for (int i=0; i< dfl_len; i++){ + acc_sum += exp_t[i]/exp_sum *i; + } + box[b] = acc_sum; + } +} + +// process_u8, process_i8, process_fp32 保持原样,它们是核心解码逻辑 +static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale, + int8_t *score_tensor, int32_t score_zp, float score_scale, + int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale, + int grid_h, int grid_w, int stride, int dfl_len, + std::vector &boxes, std::vector &objProbs, std::vector &classId, float threshold) +{ + int validCount = 0; + int grid_len = grid_h * grid_w; + int8_t score_thres_i8 = qnt_f32_to_affine(threshold, score_zp, score_scale); + int8_t score_sum_thres_i8 = qnt_f32_to_affine(threshold, score_sum_zp, score_sum_scale); + + for (int i = 0; i < grid_h; i++) + { + for (int j = 0; j < grid_w; j++) + { + int offset = i* grid_w + j; + // score sum 过滤 + if (score_sum_tensor != nullptr){ + if (score_sum_tensor[offset] < score_sum_thres_i8) continue; + } + + int8_t max_score = -score_zp; + int max_class_id = -1; + for (int c= 0; c< OBJ_CLASS_NUM; c++){ + if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score)) + { + max_score = score_tensor[offset]; + max_class_id = c; + } + offset += grid_len; + } + + if (max_score > score_thres_i8){ + offset = i* grid_w + j; + float box[4]; + float before_dfl[dfl_len*4]; + for (int k=0; k< dfl_len*4; k++){ + before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale); + offset += grid_len; + } + compute_dfl(before_dfl, dfl_len, box); + float x1 = (-box[0] + j + 0.5)*stride; + float y1 = (-box[1] + i + 0.5)*stride; + float w = (box[2] + j + 0.5)*stride - x1 + x1; // simplify w calc + w = (box[2] + j + 0.5)*stride + (box[0] - j - 0.5)*stride; // Correct logic from source: w = x2 - x1 + w = (box[2] + box[0]) * stride; // Actual width logic simplified + float h = (box[3] + box[1]) * stride; // Actual height logic simplified + + // Re-align with source strict logic to avoid errors + float x2 = (box[2] + j + 0.5)*stride; + float y2 = (box[3] + i + 0.5)*stride; + w = x2 - x1; + h = y2 - y1; + + boxes.push_back(x1); boxes.push_back(y1); boxes.push_back(w); boxes.push_back(h); + objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale)); + classId.push_back(max_class_id); + validCount ++; + } + } + } + return validCount; +} + +static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor, + int grid_h, int grid_w, int stride, int dfl_len, + std::vector &boxes, std::vector &objProbs, std::vector &classId, float threshold) +{ + int validCount = 0; + int grid_len = grid_h * grid_w; + for (int i = 0; i < grid_h; i++) + { + for (int j = 0; j < grid_w; j++) + { + int offset = i* grid_w + j; + if (score_sum_tensor != nullptr){ + if (score_sum_tensor[offset] < threshold) continue; + } + float max_score = 0; + int max_class_id = -1; + for (int c= 0; c< OBJ_CLASS_NUM; c++){ + if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score)) + { + max_score = score_tensor[offset]; + max_class_id = c; + } + offset += grid_len; + } + if (max_score> threshold){ + offset = i* grid_w + j; + float box[4]; + float before_dfl[dfl_len*4]; + for (int k=0; k< dfl_len*4; k++){ + before_dfl[k] = box_tensor[offset]; + offset += grid_len; + } + compute_dfl(before_dfl, dfl_len, box); + float x1 = (-box[0] + j + 0.5)*stride; + float y1 = (-box[1] + i + 0.5)*stride; + float x2 = (box[2] + j + 0.5)*stride; + float y2 = (box[3] + i + 0.5)*stride; + float w = x2 - x1; + float h = y2 - y1; + boxes.push_back(x1); boxes.push_back(y1); boxes.push_back(w); boxes.push_back(h); + objProbs.push_back(max_score); + classId.push_back(max_class_id); + validCount ++; + } + } + } + return validCount; +} + +// 主函数:post_process +int post_process(rknn_app_context_t *app_ctx, void *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results) +{ + rknn_output *_outputs = (rknn_output *)outputs; + std::vector filterBoxes; + std::vector objProbs; + std::vector classId; + int validCount = 0; + int stride = 0; + int grid_h = 0; + int grid_w = 0; + int model_in_w = app_ctx->model_width; + int model_in_h = app_ctx->model_height; + + memset(od_results, 0, sizeof(object_detect_result_list)); + + // 适配 YOLOv8 的 dfl_len + int dfl_len = app_ctx->output_attrs[0].dims[1] / 4; + int output_per_branch = app_ctx->io_num.n_output / 3; + + for (int i = 0; i < 3; i++) + { + void *score_sum = nullptr; + int32_t score_sum_zp = 0; + float score_sum_scale = 1.0; + if (output_per_branch == 3){ + score_sum = _outputs[i*output_per_branch + 2].buf; + score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp; + score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale; + } + int box_idx = i*output_per_branch; + int score_idx = i*output_per_branch + 1; + + grid_h = app_ctx->output_attrs[box_idx].dims[2]; + grid_w = app_ctx->output_attrs[box_idx].dims[3]; + stride = model_in_h / grid_h; + + if (app_ctx->is_quant) + { + // RK3588 通常使用 I8 量化 + validCount += process_i8((int8_t *)_outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale, + (int8_t *)_outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale, + (int8_t *)score_sum, score_sum_zp, score_sum_scale, + grid_h, grid_w, stride, dfl_len, + filterBoxes, objProbs, classId, conf_threshold); + } + else + { + validCount += process_fp32((float *)_outputs[box_idx].buf, (float *)_outputs[score_idx].buf, (float *)score_sum, + grid_h, grid_w, stride, dfl_len, + filterBoxes, objProbs, classId, conf_threshold); + } + } + + if (validCount <= 0) return 0; + + // 排序 + std::vector indexArray; + for (int i = 0; i < validCount; ++i) indexArray.push_back(i); + quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray); + + // NMS + std::set class_set(std::begin(classId), std::end(classId)); + for (auto c : class_set) nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold); + + int last_count = 0; + od_results->count = 0; + + // 坐标还原 (使用 letterbox 参数) + for (int i = 0; i < validCount; ++i) + { + if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE) continue; + int n = indexArray[i]; + + float x1 = filterBoxes[n * 4 + 0] - letter_box->x_pad; + float y1 = filterBoxes[n * 4 + 1] - letter_box->y_pad; + float x2 = x1 + filterBoxes[n * 4 + 2]; + float y2 = y1 + filterBoxes[n * 4 + 3]; + int id = classId[n]; + float obj_conf = objProbs[i]; + + od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale); + od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale); + od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / letter_box->scale); + od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / letter_box->scale); + od_results->results[last_count].prop = obj_conf; + od_results->results[last_count].cls_id = id; + last_count++; + } + od_results->count = last_count; + return 0; +} + +// 简化版 init +int init_post_process() { return 0; } + +// 简化版 deinit +void deinit_post_process() {} + +// 简化版 name getter +const char *coco_cls_to_name(int cls_id) +{ + if (cls_id >= OBJ_CLASS_NUM) return "null"; + return labels[cls_id]; +} diff --git a/src/yoloDetector/postprocess.h b/src/yoloDetector/postprocess.h new file mode 100644 index 0000000..1c12458 --- /dev/null +++ b/src/yoloDetector/postprocess.h @@ -0,0 +1,68 @@ +#ifndef _RKNN_YOLOV8_DEMO_POSTPROCESS_H_ +#define _RKNN_YOLOV8_DEMO_POSTPROCESS_H_ + +#include + +#include + +#include "rknn_api.h" + +// [修改 1] 移除 image_utils.h 和 common.h +// #include "common.h" +// #include "image_utils.h" + +#define OBJ_NAME_MAX_SIZE 64 +#define OBJ_NUMB_MAX_SIZE 128 + +// [修改 2] 按照你的模型,将类别数改为 2 +#define OBJ_CLASS_NUM 2 + +// [修改 3] 默认阈值 (实际运行时会被传入的参数覆盖) +#define NMS_THRESH 0.45 +#define BOX_THRESH 0.25 + +// [新增] 手动定义原本在 image_utils.h 中的结构体 +typedef struct { + int left; + int top; + int right; + int bottom; +} image_rect_t; + +typedef struct { + int x_pad; + int y_pad; + float scale; +} letterbox_t; + +// [新增] 手动定义 postprocess.cc 依赖的上下文结构体 +// 我们不需要 Demo 中那么复杂的结构,只保留 post_process 函数用到的字段 +typedef struct { + rknn_input_output_num io_num; + rknn_tensor_attr* output_attrs; + int model_width; + int model_height; + bool is_quant; // 是否为量化模型 +} rknn_app_context_t; + +// 结果结构体保持不变 +typedef struct { + image_rect_t box; + float prop; + int cls_id; +} object_detect_result; + +typedef struct { + int id; + int count; + object_detect_result results[OBJ_NUMB_MAX_SIZE]; +} object_detect_result_list; + +// 函数声明 +int init_post_process(); +void deinit_post_process(); +const char* coco_cls_to_name(int cls_id); // 修改返回值类型为 const char* +int post_process(rknn_app_context_t* app_ctx, void* outputs, letterbox_t* letter_box, + float conf_threshold, float nms_threshold, object_detect_result_list* od_results); + +#endif //_RKNN_YOLOV8_DEMO_POSTPROCESS_H_ diff --git a/src/yoloDetector/yolo_detector.cpp b/src/yoloDetector/yolo_detector.cpp new file mode 100644 index 0000000..45979e6 --- /dev/null +++ b/src/yoloDetector/yolo_detector.cpp @@ -0,0 +1,289 @@ +#include "yolo_detector.hpp" + +#include +#include // for memset +#include +#include + +#include "spdlog/spdlog.h" + +YoloDetector::YoloDetector() + : ctx_(0), is_initialized_(false), model_data_(nullptr), output_attrs_(nullptr) { + memset(&io_num_, 0, sizeof(io_num_)); +} + +YoloDetector::~YoloDetector() { + if (ctx_ > 0) { + rknn_destroy(ctx_); + } + if (model_data_) { + free(model_data_); + } + if (output_attrs_) { + free(output_attrs_); + } +} + +unsigned char* YoloDetector::load_model(const char* filename, int* model_size) { + FILE* fp = fopen(filename, "rb"); + if (fp == nullptr) { + spdlog::error("Open model file failed: {}", filename); + return nullptr; + } + fseek(fp, 0, SEEK_END); + int size = ftell(fp); + fseek(fp, 0, SEEK_SET); + unsigned char* data = (unsigned char*)malloc(size); + fread(data, 1, size, fp); + fclose(fp); + *model_size = size; + return data; +} + +int YoloDetector::init(const std::string& modelPath) { + spdlog::info("Loading RKNN model: {}", modelPath); + + // 1. 读取模型文件 + model_data_ = load_model(modelPath.c_str(), &model_data_size_); + if (!model_data_) + return -1; + + // 2. 初始化 RKNN + int ret = rknn_init(&ctx_, model_data_, model_data_size_, 0, NULL); + if (ret < 0) { + spdlog::error("rknn_init failed! ret={}", ret); + return -1; + } + + // 3. 查询模型输入输出信息 + ret = rknn_query(ctx_, RKNN_QUERY_IN_OUT_NUM, &io_num_, sizeof(io_num_)); + if (ret < 0) + return -1; + + // 获取输入属性 + rknn_tensor_attr input_attrs[io_num_.n_input]; + memset(input_attrs, 0, sizeof(input_attrs)); + for (int i = 0; i < io_num_.n_input; i++) { + input_attrs[i].index = i; + rknn_query(ctx_, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr)); + } + + // [新增] 获取输出属性 (关键:用于后处理中的量化参数 scale/zp) + output_attrs_ = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num_.n_output); + memset(output_attrs_, 0, sizeof(rknn_tensor_attr) * io_num_.n_output); + for (int i = 0; i < io_num_.n_output; i++) { + output_attrs_[i].index = i; + rknn_query(ctx_, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs_[i]), sizeof(rknn_tensor_attr)); + } + + // 假设只有一个输入 (图像) + model_width_ = input_attrs[0].dims[2]; // width + model_height_ = input_attrs[0].dims[1]; // height + model_channel_ = input_attrs[0].dims[0]; // channel + + // 兼容部分 NCHW 格式 + if (model_width_ == 3) { + model_width_ = input_attrs[0].dims[1]; + model_height_ = input_attrs[0].dims[2]; + } + + spdlog::info("Model Input Shape: {}x{}x{}", model_width_, model_height_, model_channel_); + is_initialized_ = true; + return 0; +} + +cv::Mat YoloDetector::letterbox(const cv::Mat& src, int target_width, int target_height, + float& ratio, int& dw, int& dh) { + int w = src.cols; + int h = src.rows; + + float scale_w = (float)target_width / w; + float scale_h = (float)target_height / h; + ratio = std::min(scale_w, scale_h); + + int new_unpad_w = (int)(w * ratio); + int new_unpad_h = (int)(h * ratio); + + dw = (target_width - new_unpad_w) / 2; + dh = (target_height - new_unpad_h) / 2; + + cv::Mat resized; + cv::resize(src, resized, cv::Size(new_unpad_w, new_unpad_h)); + + cv::Mat padded; + cv::copyMakeBorder(resized, padded, dh, target_height - new_unpad_h - dh, dw, + target_width - new_unpad_w - dw, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); + + return padded; +} + +std::vector YoloDetector::detect(const cv::Mat& inputImage) { + std::vector results; + if (!is_initialized_ || inputImage.empty()) + return results; + + // === 1. 预处理 === + float ratio; + int dw, dh; + cv::Mat input_rgb; + cv::cvtColor(inputImage, input_rgb, cv::COLOR_BGR2RGB); + + cv::Mat img = letterbox(input_rgb, model_width_, model_height_, ratio, dw, dh); + + // === 2. 设置输入 === + rknn_input inputs[1]; + memset(inputs, 0, sizeof(inputs)); + inputs[0].index = 0; + inputs[0].type = RKNN_TENSOR_UINT8; + inputs[0].size = img.cols * img.rows * img.channels(); + inputs[0].fmt = RKNN_TENSOR_NHWC; + inputs[0].buf = img.data; + + rknn_inputs_set(ctx_, io_num_.n_input, inputs); + + // === 3. 执行推理 === + int ret = rknn_run(ctx_, nullptr); + if (ret < 0) { + spdlog::error("rknn_run failed: {}", ret); + return results; + } + + // === 4. 获取输出 === + // 使用 vector 或 new 替代变长数组(VLA),更符合 C++ 标准 + std::vector outputs(io_num_.n_output); + memset(outputs.data(), 0, outputs.size() * sizeof(rknn_output)); + + // YoloV8 输出通常需要 float 处理 (want_float=1), + // 但 postprocess.cc 中的 process_i8 是针对 int8 优化的。 + // 如果你的模型是 int8 量化且 output_attrs 正常,want_float=0 也可以, + // 但 post_process 函数里强转为了 float* 处理 fp32 分支,或者 int8* 处理 i8 分支。 + // 为了保险起见,如果 postprocess 逻辑是处理原始输出的,这里通常不需要 want_float=1,除非是 fp32 + // 模型。 + // **注意**:你的 postprocess.cc 能够处理量化(process_i8)和非量化(process_fp32)数据。 + // 通常建议让 rknn 直接输出原始数据,由后处理决定如何解析。 + for (int i = 0; i < io_num_.n_output; i++) { + outputs[i].want_float = 0; // 0: 输出原始量化数据(int8), 1: 驱动转为float + outputs[i].index = i; + } + + ret = rknn_outputs_get(ctx_, io_num_.n_output, outputs.data(), NULL); + if (ret < 0) { + spdlog::error("rknn_outputs_get failed: {}", ret); + return results; + } + + // === 5. 后处理 === + rknn_app_context_t app_ctx; + app_ctx.io_num = io_num_; + app_ctx.output_attrs = output_attrs_; // 现在这个变量已在 init 中初始化 + app_ctx.model_width = model_width_; + app_ctx.model_height = model_height_; + app_ctx.is_quant = true; // 假设模型是量化的 + + letterbox_t letter_box_param; + letter_box_param.x_pad = dw; + letter_box_param.y_pad = dh; + letter_box_param.scale = ratio; + + object_detect_result_list od_results; + // spdlog::info("--- Debugging Raw Output ---"); + // for (int i = 0; i < io_num_.n_output; ++i) { + // int8_t* buffer = (int8_t*)outputs[i].buf; + // int size = outputs[i].size; + // int32_t zp = output_attrs_[i].zp; + // float scale = output_attrs_[i].scale; + + // // 统计这一层输出的最大值、最小值和平均值 + // float max_val = -9999.0f; + // float min_val = 9999.0f; + // int8_t raw_max = -128; + + // // 为了性能只采样前 1000 个数值或全部 + // int check_count = (size > 1000) ? 1000 : size; + + // for (int j = 0; j < check_count; j++) { + // // 手动反量化: float = (int - zp) * scale + // float val = (buffer[j] - zp) * scale; + // if (val > max_val) + // max_val = val; + // if (val < min_val) + // min_val = val; + // if (buffer[j] > raw_max) + // raw_max = buffer[j]; + // } + + // spdlog::info("Output[{}]: Size={}, ZP={}, Scale={}", i, size, zp, scale); + // spdlog::info(" -> Raw int8 max: {}, Dequantized float range: [{:.4f}, {:.4f}]", raw_max, + // min_val, max_val); + // } + // spdlog::info("----------------------------"); + // === [调试代码结束] === + // 调用官方/Demo提供的后处理 + post_process(&app_ctx, outputs.data(), &letter_box_param, obj_thresh_, nms_thresh_, + &od_results); + + // 转换结果 + for (int i = 0; i < od_results.count; i++) { + object_detect_result* det = &(od_results.results[i]); + DetectionResult res; + res.x = det->box.left; + res.y = det->box.top; + res.width = det->box.right - det->box.left; + res.height = det->box.bottom - det->box.top; + res.confidence = det->prop; + res.class_id = det->cls_id; + res.label = coco_cls_to_name(det->cls_id); + + results.push_back(res); // [修复] 变量名从 final_results 改为 results + } + + // === 6. 释放 RKNN 输出内存 (必须!) === + rknn_outputs_release(ctx_, io_num_.n_output, outputs.data()); + + return results; +} + +// [修复] 将 nms_boxes 移出 detect 函数,单独实现 +// 注意:post_process 内部通常已经包含了 NMS,这里如果是为了二次处理可以保留, +// 否则可以直接使用 post_process 的输出。 +void YoloDetector::nms_boxes(std::vector& boxes, float nms_thresh) { + std::sort(boxes.begin(), boxes.end(), [](const DetectionResult& a, const DetectionResult& b) { + return a.confidence > b.confidence; + }); + + std::vector is_suppressed(boxes.size(), false); + for (size_t i = 0; i < boxes.size(); ++i) { + if (is_suppressed[i]) + continue; + for (size_t j = i + 1; j < boxes.size(); ++j) { + if (is_suppressed[j]) + continue; + + // 计算 IoU + int xx1 = std::max(boxes[i].x, boxes[j].x); + int yy1 = std::max(boxes[i].y, boxes[j].y); + int xx2 = std::min(boxes[i].x + boxes[i].width, boxes[j].x + boxes[j].width); + int yy2 = std::min(boxes[i].y + boxes[i].height, boxes[j].y + boxes[j].height); + + int w = std::max(0, xx2 - xx1); + int h = std::max(0, yy2 - yy1); + int inter_area = w * h; + int union_area = + boxes[i].width * boxes[i].height + boxes[j].width * boxes[j].height - inter_area; + + float iou = (float)inter_area / union_area; + if (iou > nms_thresh) { + is_suppressed[j] = true; + } + } + } + + // 移除被抑制的框 + std::vector keep_boxes; + for (size_t i = 0; i < boxes.size(); ++i) { + if (!is_suppressed[i]) { + keep_boxes.push_back(boxes[i]); + } + } + boxes = keep_boxes; +} diff --git a/src/yoloDetector/yolo_detector.hpp b/src/yoloDetector/yolo_detector.hpp new file mode 100644 index 0000000..8d66193 --- /dev/null +++ b/src/yoloDetector/yolo_detector.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include + +#include "postprocess.h" +#include "rknn_api.h" + +struct DetectionResult { + int x; + int y; + int width; + int height; + std::string label; + float confidence; + int class_id; +}; + +class YoloDetector { +public: + YoloDetector(); + ~YoloDetector(); + + int init(const std::string& modelPath); + std::vector detect(const cv::Mat& inputImage); + +private: + unsigned char* load_model(const char* filename, int* model_size); + + // NMS 函数声明 + void nms_boxes(std::vector& boxes, float nms_thresh); + + cv::Mat letterbox(const cv::Mat& src, int target_width, int target_height, float& ratio, + int& dw, int& dh); + +private: + rknn_context ctx_; + bool is_initialized_; + + // 模型输入输出参数 + rknn_input_output_num io_num_; // [新增] 存储输入输出数量 + rknn_tensor_attr* output_attrs_; // [新增] 存储输出属性(zp, scale等) + + int model_width_; + int model_height_; + int model_channel_; + + unsigned char* model_data_; + int model_data_size_; + + // 配置参数 + const std::vector CLASS_NAMES = {"fuel_car", "new_energy_car"}; + // 这里的阈值如果 postprocess 内部用了,这里可以作为 fallback 或二次筛选 + const float obj_thresh_ = 0.25f; + const float nms_thresh_ = 0.45f; +};