feat(video Service): 完成第一版算法的部署。

This commit is contained in:
GuanYuankai 2026-01-04 15:47:02 +08:00
parent 5fbade0625
commit 9f6472c219
7 changed files with 808 additions and 41 deletions

View File

@ -64,6 +64,8 @@ add_library(vehicle_road_lib STATIC
src/alarm/alarm_service.cc
#
src/videoService/video_pipeline.cpp
src/yoloDetector/postprocess.cc
src/yoloDetector/yolo_detector.cpp
)
target_include_directories(vehicle_road_lib PUBLIC

View File

@ -2,7 +2,19 @@
#include <chrono>
VideoPipeline::VideoPipeline() : running_(false) {}
VideoPipeline::VideoPipeline() : running_(false) {
// [新增] 实例化检测器并加载模型
detector_ = std::make_unique<YoloDetector>();
// 模型路径写死为 models/vehicle_model.rknn
int ret = detector_->init("../models/vehicle_model.rknn");
if (ret != 0) {
spdlog::error("Failed to initialize YoloDetector with model: models/vehicle_model.rknn");
// 注意:这里如果初始化失败,后续 detect 会直接返回空结果,建议检查日志
} else {
spdlog::info("YoloDetector initialized successfully.");
}
}
VideoPipeline::~VideoPipeline() {
Stop();
@ -37,43 +49,33 @@ void VideoPipeline::Stop() {
spdlog::info("VideoPipeline Stopped.");
}
std::vector<DetectionResult> VideoPipeline::mockInference(const cv::Mat& frame) {
std::vector<DetectionResult> results;
static int dummyX = 100;
static int direction = 5;
// 简单的移动逻辑,模拟每帧的变化
dummyX += direction;
if (dummyX > frame.cols - 200 || dummyX < 0)
direction *= -1;
DetectionResult res;
res.x = dummyX;
res.y = 200;
res.width = 150;
res.height = 300;
res.label = "TEST_CLIP";
res.confidence = 0.95f;
results.push_back(res);
return results;
}
// [删除] mockInference 函数已移除
void VideoPipeline::drawOverlay(cv::Mat& frame, const std::vector<DetectionResult>& results) {
for (const auto& res : results) {
cv::rectangle(frame, cv::Rect(res.x, res.y, res.width, res.height), cv::Scalar(0, 255, 0),
2);
// 根据不同类别使用不同颜色 (示例: 0为红色, 1为绿色)
cv::Scalar color = (res.class_id == 0) ? cv::Scalar(0, 0, 255) : cv::Scalar(0, 255, 0);
cv::rectangle(frame, cv::Rect(res.x, res.y, res.width, res.height), color, 2);
std::string text = res.label + " " + std::to_string(res.confidence).substr(0, 4);
cv::putText(frame, text, cv::Point(res.x, res.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.6,
cv::Scalar(0, 255, 0), 2);
// 简单的防止文字跑出边界的处理
int y_pos = res.y - 5;
if (y_pos < 10)
y_pos = res.y + 15;
cv::putText(frame, text, cv::Point(res.x, y_pos), cv::FONT_HERSHEY_SIMPLEX, 0.6, color, 2);
}
cv::putText(frame, "RK3588 H.264 @ 20FPS", cv::Point(20, 50), cv::FONT_HERSHEY_SIMPLEX, 1.0,
cv::Scalar(0, 0, 255), 2);
// 添加水印
cv::putText(frame, "RK3588 YOLOv8 Live", cv::Point(20, 50), cv::FONT_HERSHEY_SIMPLEX, 1.0,
cv::Scalar(0, 255, 255), 2);
}
void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, bool isFileSource) {
cv::VideoCapture cap;
cap.open(inputUrl); // 文件路径也是通过 open 打开
cap.open(inputUrl);
if (!cap.isOpened()) {
spdlog::error("Failed to open input: {}", inputUrl);
@ -81,8 +83,8 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo
return;
}
const double TARGET_FPS = 20.0;
// 计算每帧的目标耗时 (毫秒) -> 1000 / 20 = 50ms
// 降低一点目标 FPS因为推理需要时间。RK3588 NPU 很快,但 25/30 FPS 比较稳妥
const double TARGET_FPS = 30.0;
const double FRAME_DURATION_MS = 1000.0 / TARGET_FPS;
int width = cap.get(cv::CAP_PROP_FRAME_WIDTH);
@ -92,10 +94,11 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo
isFileSource ? "FILE LOOP" : "LIVE STREAM");
std::stringstream pipeline;
// 注意:推流分辨率这里保持和输入一致,或者可以 resize
pipeline << "appsrc ! "
<< "videoconvert ! "
<< "video/x-raw,format=NV12,width=" << width << ",height=" << height
<< ",framerate=20/1 ! "
<< ",framerate=" << (int)TARGET_FPS << "/1 ! "
<< "mpph264enc ! "
<< "h264parse ! "
<< "rtspclientsink location=" << outputUrl << " protocols=tcp";
@ -108,9 +111,8 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo
}
cv::Mat frame;
long frame_count = 0;
while (running_) {
// 记录循环开始时间
auto loop_start = std::chrono::steady_clock::now();
if (!cap.read(frame)) {
@ -130,17 +132,34 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo
if (frame.empty())
continue;
// 1. 算法处理
auto results = mockInference(frame);
// === 1. 真实 RKNN 推理 ===
// detect 内部已经包含了预处理、NPU推理、后处理和坐标还原
std::vector<DetectionResult> results = detector_->detect(frame);
// 2. 绘制叠加
frame_count++;
// spdlog::info("frame count {}", frame_count);
if (!results.empty()) {
spdlog::info("Frame {}: Detected {} objects", frame_count, results.size());
for (const auto& res : results) {
spdlog::info(" -> [Class: {} ({})] Conf: {:.2f} Box: [{}, {}, {}x{}]",
res.class_id, res.label, res.confidence, res.x, res.y, res.width,
res.height);
}
} else {
// 如果连续没有检测到,每隔一秒(20帧)提示一次,确认代码还在跑
if (frame_count % 20 == 0) {
spdlog::debug("Frame {}: No objects detected.", frame_count);
}
}
// === 2. 绘制叠加 ===
drawOverlay(frame, results);
// 3. 推流
// === 3. 推流 ===
if (writer.isOpened()) {
writer.write(frame);
}
// 文件模式下控制播放速度,直播流则不需要 wait
if (isFileSource) {
auto loop_end = std::chrono::steady_clock::now();
std::chrono::duration<double, std::milli> elapsed = loop_end - loop_start;

View File

@ -1,14 +1,17 @@
#pragma once
#include <atomic>
#include <memory> // [新增] 用于 unique_ptr
#include <opencv2/opencv.hpp>
#include <string>
#include <thread>
#include <vector>
#include "spdlog/spdlog.h"
#include "yoloDetector/yolo_detector.hpp" // [新增] 包含检测器头文件 (同时也包含了 DetectionResult 定义)
// 模拟的检测结果结构体(对应未来的 YOLO 结果)
// [删除] 移除此处定义的 DetectionResult直接使用 yolo_detector.hpp 中的定义
/*
struct DetectionResult {
int x;
int y;
@ -17,6 +20,7 @@ struct DetectionResult {
std::string label;
float confidence;
};
*/
class VideoPipeline {
public:
@ -33,12 +37,16 @@ public:
private:
void processLoop(std::string inputUrl, std::string outputUrl, bool isFileSource);
// 占位算法函数:模拟推理
std::vector<DetectionResult> mockInference(const cv::Mat& frame);
// [删除] 移除模拟推理函数
// std::vector<DetectionResult> mockInference(const cv::Mat& frame);
// 绘图函数
void drawOverlay(cv::Mat& frame, const std::vector<DetectionResult>& results);
private:
std::atomic<bool> running_;
std::thread processingThread_;
// [新增] YOLO 检测器实例
std::unique_ptr<YoloDetector> detector_;
};

View File

@ -0,0 +1,324 @@
#include "postprocess.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <set>
#include <vector>
static const char *labels[] = {
"fuel_car",
"new_energy_car"
};
inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; }
static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1, float ymax1)
{
float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
float i = w * h;
float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
return u <= 0.f ? 0.f : (i / u);
}
static int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> classIds, std::vector<int> &order, int filterId, float threshold)
{
for (int i = 0; i < validCount; ++i)
{
int n = order[i];
if (n == -1 || classIds[n] != filterId) continue;
for (int j = i + 1; j < validCount; ++j)
{
int m = order[j];
if (m == -1 || classIds[m] != filterId) continue;
float xmin0 = outputLocations[n * 4 + 0];
float ymin0 = outputLocations[n * 4 + 1];
float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
float xmin1 = outputLocations[m * 4 + 0];
float ymin1 = outputLocations[m * 4 + 1];
float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
if (iou > threshold) order[j] = -1;
}
}
return 0;
}
static int quick_sort_indice_inverse(std::vector<float> &input, int left, int right, std::vector<int> &indices)
{
float key;
int key_index;
int low = left;
int high = right;
if (left < right)
{
key_index = indices[left];
key = input[left];
while (low < high)
{
while (low < high && input[high] <= key) high--;
input[low] = input[high];
indices[low] = indices[high];
while (low < high && input[low] >= key) low++;
input[high] = input[low];
indices[high] = indices[low];
}
input[low] = key;
indices[low] = key_index;
quick_sort_indice_inverse(input, left, low - 1, indices);
quick_sort_indice_inverse(input, low + 1, right, indices);
}
return low;
}
static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }
inline static int32_t __clip(float val, float min, float max) { float f = val <= min ? min : (val >= max ? max : val); return f; }
static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale) { float dst_val = (f32 / scale) + zp; int8_t res = (int8_t)__clip(dst_val, -128, 127); return res; }
static uint8_t qnt_f32_to_affine_u8(float f32, int32_t zp, float scale) { float dst_val = (f32 / scale) + zp; uint8_t res = (uint8_t)__clip(dst_val, 0, 255); return res; }
static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
static float deqnt_affine_u8_to_f32(uint8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
static void compute_dfl(float* tensor, int dfl_len, float* box){
for (int b=0; b<4; b++){
float exp_t[dfl_len];
float exp_sum=0;
float acc_sum=0;
for (int i=0; i< dfl_len; i++){
exp_t[i] = exp(tensor[i+b*dfl_len]);
exp_sum += exp_t[i];
}
for (int i=0; i< dfl_len; i++){
acc_sum += exp_t[i]/exp_sum *i;
}
box[b] = acc_sum;
}
}
// process_u8, process_i8, process_fp32 保持原样,它们是核心解码逻辑
static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
int8_t *score_tensor, int32_t score_zp, float score_scale,
int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale,
int grid_h, int grid_w, int stride, int dfl_len,
std::vector<float> &boxes, std::vector<float> &objProbs, std::vector<int> &classId, float threshold)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
int8_t score_thres_i8 = qnt_f32_to_affine(threshold, score_zp, score_scale);
int8_t score_sum_thres_i8 = qnt_f32_to_affine(threshold, score_sum_zp, score_sum_scale);
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int offset = i* grid_w + j;
// score sum 过滤
if (score_sum_tensor != nullptr){
if (score_sum_tensor[offset] < score_sum_thres_i8) continue;
}
int8_t max_score = -score_zp;
int max_class_id = -1;
for (int c= 0; c< OBJ_CLASS_NUM; c++){
if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score))
{
max_score = score_tensor[offset];
max_class_id = c;
}
offset += grid_len;
}
if (max_score > score_thres_i8){
offset = i* grid_w + j;
float box[4];
float before_dfl[dfl_len*4];
for (int k=0; k< dfl_len*4; k++){
before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale);
offset += grid_len;
}
compute_dfl(before_dfl, dfl_len, box);
float x1 = (-box[0] + j + 0.5)*stride;
float y1 = (-box[1] + i + 0.5)*stride;
float w = (box[2] + j + 0.5)*stride - x1 + x1; // simplify w calc
w = (box[2] + j + 0.5)*stride + (box[0] - j - 0.5)*stride; // Correct logic from source: w = x2 - x1
w = (box[2] + box[0]) * stride; // Actual width logic simplified
float h = (box[3] + box[1]) * stride; // Actual height logic simplified
// Re-align with source strict logic to avoid errors
float x2 = (box[2] + j + 0.5)*stride;
float y2 = (box[3] + i + 0.5)*stride;
w = x2 - x1;
h = y2 - y1;
boxes.push_back(x1); boxes.push_back(y1); boxes.push_back(w); boxes.push_back(h);
objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale));
classId.push_back(max_class_id);
validCount ++;
}
}
}
return validCount;
}
static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor,
int grid_h, int grid_w, int stride, int dfl_len,
std::vector<float> &boxes, std::vector<float> &objProbs, std::vector<int> &classId, float threshold)
{
int validCount = 0;
int grid_len = grid_h * grid_w;
for (int i = 0; i < grid_h; i++)
{
for (int j = 0; j < grid_w; j++)
{
int offset = i* grid_w + j;
if (score_sum_tensor != nullptr){
if (score_sum_tensor[offset] < threshold) continue;
}
float max_score = 0;
int max_class_id = -1;
for (int c= 0; c< OBJ_CLASS_NUM; c++){
if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score))
{
max_score = score_tensor[offset];
max_class_id = c;
}
offset += grid_len;
}
if (max_score> threshold){
offset = i* grid_w + j;
float box[4];
float before_dfl[dfl_len*4];
for (int k=0; k< dfl_len*4; k++){
before_dfl[k] = box_tensor[offset];
offset += grid_len;
}
compute_dfl(before_dfl, dfl_len, box);
float x1 = (-box[0] + j + 0.5)*stride;
float y1 = (-box[1] + i + 0.5)*stride;
float x2 = (box[2] + j + 0.5)*stride;
float y2 = (box[3] + i + 0.5)*stride;
float w = x2 - x1;
float h = y2 - y1;
boxes.push_back(x1); boxes.push_back(y1); boxes.push_back(w); boxes.push_back(h);
objProbs.push_back(max_score);
classId.push_back(max_class_id);
validCount ++;
}
}
}
return validCount;
}
// 主函数post_process
int post_process(rknn_app_context_t *app_ctx, void *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results)
{
rknn_output *_outputs = (rknn_output *)outputs;
std::vector<float> filterBoxes;
std::vector<float> objProbs;
std::vector<int> classId;
int validCount = 0;
int stride = 0;
int grid_h = 0;
int grid_w = 0;
int model_in_w = app_ctx->model_width;
int model_in_h = app_ctx->model_height;
memset(od_results, 0, sizeof(object_detect_result_list));
// 适配 YOLOv8 的 dfl_len
int dfl_len = app_ctx->output_attrs[0].dims[1] / 4;
int output_per_branch = app_ctx->io_num.n_output / 3;
for (int i = 0; i < 3; i++)
{
void *score_sum = nullptr;
int32_t score_sum_zp = 0;
float score_sum_scale = 1.0;
if (output_per_branch == 3){
score_sum = _outputs[i*output_per_branch + 2].buf;
score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp;
score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale;
}
int box_idx = i*output_per_branch;
int score_idx = i*output_per_branch + 1;
grid_h = app_ctx->output_attrs[box_idx].dims[2];
grid_w = app_ctx->output_attrs[box_idx].dims[3];
stride = model_in_h / grid_h;
if (app_ctx->is_quant)
{
// RK3588 通常使用 I8 量化
validCount += process_i8((int8_t *)_outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale,
(int8_t *)_outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale,
(int8_t *)score_sum, score_sum_zp, score_sum_scale,
grid_h, grid_w, stride, dfl_len,
filterBoxes, objProbs, classId, conf_threshold);
}
else
{
validCount += process_fp32((float *)_outputs[box_idx].buf, (float *)_outputs[score_idx].buf, (float *)score_sum,
grid_h, grid_w, stride, dfl_len,
filterBoxes, objProbs, classId, conf_threshold);
}
}
if (validCount <= 0) return 0;
// 排序
std::vector<int> indexArray;
for (int i = 0; i < validCount; ++i) indexArray.push_back(i);
quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
// NMS
std::set<int> class_set(std::begin(classId), std::end(classId));
for (auto c : class_set) nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold);
int last_count = 0;
od_results->count = 0;
// 坐标还原 (使用 letterbox 参数)
for (int i = 0; i < validCount; ++i)
{
if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE) continue;
int n = indexArray[i];
float x1 = filterBoxes[n * 4 + 0] - letter_box->x_pad;
float y1 = filterBoxes[n * 4 + 1] - letter_box->y_pad;
float x2 = x1 + filterBoxes[n * 4 + 2];
float y2 = y1 + filterBoxes[n * 4 + 3];
int id = classId[n];
float obj_conf = objProbs[i];
od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale);
od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale);
od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / letter_box->scale);
od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / letter_box->scale);
od_results->results[last_count].prop = obj_conf;
od_results->results[last_count].cls_id = id;
last_count++;
}
od_results->count = last_count;
return 0;
}
// 简化版 init
int init_post_process() { return 0; }
// 简化版 deinit
void deinit_post_process() {}
// 简化版 name getter
const char *coco_cls_to_name(int cls_id)
{
if (cls_id >= OBJ_CLASS_NUM) return "null";
return labels[cls_id];
}

View File

@ -0,0 +1,68 @@
#ifndef _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
#define _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
#include <stdint.h>
#include <vector>
#include "rknn_api.h"
// [修改 1] 移除 image_utils.h 和 common.h
// #include "common.h"
// #include "image_utils.h"
#define OBJ_NAME_MAX_SIZE 64
#define OBJ_NUMB_MAX_SIZE 128
// [修改 2] 按照你的模型,将类别数改为 2
#define OBJ_CLASS_NUM 2
// [修改 3] 默认阈值 (实际运行时会被传入的参数覆盖)
#define NMS_THRESH 0.45
#define BOX_THRESH 0.25
// [新增] 手动定义原本在 image_utils.h 中的结构体
typedef struct {
int left;
int top;
int right;
int bottom;
} image_rect_t;
typedef struct {
int x_pad;
int y_pad;
float scale;
} letterbox_t;
// [新增] 手动定义 postprocess.cc 依赖的上下文结构体
// 我们不需要 Demo 中那么复杂的结构,只保留 post_process 函数用到的字段
typedef struct {
rknn_input_output_num io_num;
rknn_tensor_attr* output_attrs;
int model_width;
int model_height;
bool is_quant; // 是否为量化模型
} rknn_app_context_t;
// 结果结构体保持不变
typedef struct {
image_rect_t box;
float prop;
int cls_id;
} object_detect_result;
typedef struct {
int id;
int count;
object_detect_result results[OBJ_NUMB_MAX_SIZE];
} object_detect_result_list;
// 函数声明
int init_post_process();
void deinit_post_process();
const char* coco_cls_to_name(int cls_id); // 修改返回值类型为 const char*
int post_process(rknn_app_context_t* app_ctx, void* outputs, letterbox_t* letter_box,
float conf_threshold, float nms_threshold, object_detect_result_list* od_results);
#endif //_RKNN_YOLOV8_DEMO_POSTPROCESS_H_

View File

@ -0,0 +1,289 @@
#include "yolo_detector.hpp"
#include <algorithm>
#include <cstring> // for memset
#include <fstream>
#include <iostream>
#include "spdlog/spdlog.h"
YoloDetector::YoloDetector()
: ctx_(0), is_initialized_(false), model_data_(nullptr), output_attrs_(nullptr) {
memset(&io_num_, 0, sizeof(io_num_));
}
YoloDetector::~YoloDetector() {
if (ctx_ > 0) {
rknn_destroy(ctx_);
}
if (model_data_) {
free(model_data_);
}
if (output_attrs_) {
free(output_attrs_);
}
}
unsigned char* YoloDetector::load_model(const char* filename, int* model_size) {
FILE* fp = fopen(filename, "rb");
if (fp == nullptr) {
spdlog::error("Open model file failed: {}", filename);
return nullptr;
}
fseek(fp, 0, SEEK_END);
int size = ftell(fp);
fseek(fp, 0, SEEK_SET);
unsigned char* data = (unsigned char*)malloc(size);
fread(data, 1, size, fp);
fclose(fp);
*model_size = size;
return data;
}
int YoloDetector::init(const std::string& modelPath) {
spdlog::info("Loading RKNN model: {}", modelPath);
// 1. 读取模型文件
model_data_ = load_model(modelPath.c_str(), &model_data_size_);
if (!model_data_)
return -1;
// 2. 初始化 RKNN
int ret = rknn_init(&ctx_, model_data_, model_data_size_, 0, NULL);
if (ret < 0) {
spdlog::error("rknn_init failed! ret={}", ret);
return -1;
}
// 3. 查询模型输入输出信息
ret = rknn_query(ctx_, RKNN_QUERY_IN_OUT_NUM, &io_num_, sizeof(io_num_));
if (ret < 0)
return -1;
// 获取输入属性
rknn_tensor_attr input_attrs[io_num_.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num_.n_input; i++) {
input_attrs[i].index = i;
rknn_query(ctx_, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
}
// [新增] 获取输出属性 (关键:用于后处理中的量化参数 scale/zp)
output_attrs_ = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num_.n_output);
memset(output_attrs_, 0, sizeof(rknn_tensor_attr) * io_num_.n_output);
for (int i = 0; i < io_num_.n_output; i++) {
output_attrs_[i].index = i;
rknn_query(ctx_, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs_[i]), sizeof(rknn_tensor_attr));
}
// 假设只有一个输入 (图像)
model_width_ = input_attrs[0].dims[2]; // width
model_height_ = input_attrs[0].dims[1]; // height
model_channel_ = input_attrs[0].dims[0]; // channel
// 兼容部分 NCHW 格式
if (model_width_ == 3) {
model_width_ = input_attrs[0].dims[1];
model_height_ = input_attrs[0].dims[2];
}
spdlog::info("Model Input Shape: {}x{}x{}", model_width_, model_height_, model_channel_);
is_initialized_ = true;
return 0;
}
cv::Mat YoloDetector::letterbox(const cv::Mat& src, int target_width, int target_height,
float& ratio, int& dw, int& dh) {
int w = src.cols;
int h = src.rows;
float scale_w = (float)target_width / w;
float scale_h = (float)target_height / h;
ratio = std::min(scale_w, scale_h);
int new_unpad_w = (int)(w * ratio);
int new_unpad_h = (int)(h * ratio);
dw = (target_width - new_unpad_w) / 2;
dh = (target_height - new_unpad_h) / 2;
cv::Mat resized;
cv::resize(src, resized, cv::Size(new_unpad_w, new_unpad_h));
cv::Mat padded;
cv::copyMakeBorder(resized, padded, dh, target_height - new_unpad_h - dh, dw,
target_width - new_unpad_w - dw, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
return padded;
}
std::vector<DetectionResult> YoloDetector::detect(const cv::Mat& inputImage) {
std::vector<DetectionResult> results;
if (!is_initialized_ || inputImage.empty())
return results;
// === 1. 预处理 ===
float ratio;
int dw, dh;
cv::Mat input_rgb;
cv::cvtColor(inputImage, input_rgb, cv::COLOR_BGR2RGB);
cv::Mat img = letterbox(input_rgb, model_width_, model_height_, ratio, dw, dh);
// === 2. 设置输入 ===
rknn_input inputs[1];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].type = RKNN_TENSOR_UINT8;
inputs[0].size = img.cols * img.rows * img.channels();
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].buf = img.data;
rknn_inputs_set(ctx_, io_num_.n_input, inputs);
// === 3. 执行推理 ===
int ret = rknn_run(ctx_, nullptr);
if (ret < 0) {
spdlog::error("rknn_run failed: {}", ret);
return results;
}
// === 4. 获取输出 ===
// 使用 vector 或 new 替代变长数组(VLA),更符合 C++ 标准
std::vector<rknn_output> outputs(io_num_.n_output);
memset(outputs.data(), 0, outputs.size() * sizeof(rknn_output));
// YoloV8 输出通常需要 float 处理 (want_float=1)
// 但 postprocess.cc 中的 process_i8 是针对 int8 优化的。
// 如果你的模型是 int8 量化且 output_attrs 正常want_float=0 也可以,
// 但 post_process 函数里强转为了 float* 处理 fp32 分支,或者 int8* 处理 i8 分支。
// 为了保险起见,如果 postprocess 逻辑是处理原始输出的,这里通常不需要 want_float=1除非是 fp32
// 模型。
// **注意**:你的 postprocess.cc 能够处理量化(process_i8)和非量化(process_fp32)数据。
// 通常建议让 rknn 直接输出原始数据,由后处理决定如何解析。
for (int i = 0; i < io_num_.n_output; i++) {
outputs[i].want_float = 0; // 0: 输出原始量化数据(int8), 1: 驱动转为float
outputs[i].index = i;
}
ret = rknn_outputs_get(ctx_, io_num_.n_output, outputs.data(), NULL);
if (ret < 0) {
spdlog::error("rknn_outputs_get failed: {}", ret);
return results;
}
// === 5. 后处理 ===
rknn_app_context_t app_ctx;
app_ctx.io_num = io_num_;
app_ctx.output_attrs = output_attrs_; // 现在这个变量已在 init 中初始化
app_ctx.model_width = model_width_;
app_ctx.model_height = model_height_;
app_ctx.is_quant = true; // 假设模型是量化的
letterbox_t letter_box_param;
letter_box_param.x_pad = dw;
letter_box_param.y_pad = dh;
letter_box_param.scale = ratio;
object_detect_result_list od_results;
// spdlog::info("--- Debugging Raw Output ---");
// for (int i = 0; i < io_num_.n_output; ++i) {
// int8_t* buffer = (int8_t*)outputs[i].buf;
// int size = outputs[i].size;
// int32_t zp = output_attrs_[i].zp;
// float scale = output_attrs_[i].scale;
// // 统计这一层输出的最大值、最小值和平均值
// float max_val = -9999.0f;
// float min_val = 9999.0f;
// int8_t raw_max = -128;
// // 为了性能只采样前 1000 个数值或全部
// int check_count = (size > 1000) ? 1000 : size;
// for (int j = 0; j < check_count; j++) {
// // 手动反量化: float = (int - zp) * scale
// float val = (buffer[j] - zp) * scale;
// if (val > max_val)
// max_val = val;
// if (val < min_val)
// min_val = val;
// if (buffer[j] > raw_max)
// raw_max = buffer[j];
// }
// spdlog::info("Output[{}]: Size={}, ZP={}, Scale={}", i, size, zp, scale);
// spdlog::info(" -> Raw int8 max: {}, Dequantized float range: [{:.4f}, {:.4f}]", raw_max,
// min_val, max_val);
// }
// spdlog::info("----------------------------");
// === [调试代码结束] ===
// 调用官方/Demo提供的后处理
post_process(&app_ctx, outputs.data(), &letter_box_param, obj_thresh_, nms_thresh_,
&od_results);
// 转换结果
for (int i = 0; i < od_results.count; i++) {
object_detect_result* det = &(od_results.results[i]);
DetectionResult res;
res.x = det->box.left;
res.y = det->box.top;
res.width = det->box.right - det->box.left;
res.height = det->box.bottom - det->box.top;
res.confidence = det->prop;
res.class_id = det->cls_id;
res.label = coco_cls_to_name(det->cls_id);
results.push_back(res); // [修复] 变量名从 final_results 改为 results
}
// === 6. 释放 RKNN 输出内存 (必须!) ===
rknn_outputs_release(ctx_, io_num_.n_output, outputs.data());
return results;
}
// [修复] 将 nms_boxes 移出 detect 函数,单独实现
// 注意post_process 内部通常已经包含了 NMS这里如果是为了二次处理可以保留
// 否则可以直接使用 post_process 的输出。
void YoloDetector::nms_boxes(std::vector<DetectionResult>& boxes, float nms_thresh) {
std::sort(boxes.begin(), boxes.end(), [](const DetectionResult& a, const DetectionResult& b) {
return a.confidence > b.confidence;
});
std::vector<bool> is_suppressed(boxes.size(), false);
for (size_t i = 0; i < boxes.size(); ++i) {
if (is_suppressed[i])
continue;
for (size_t j = i + 1; j < boxes.size(); ++j) {
if (is_suppressed[j])
continue;
// 计算 IoU
int xx1 = std::max(boxes[i].x, boxes[j].x);
int yy1 = std::max(boxes[i].y, boxes[j].y);
int xx2 = std::min(boxes[i].x + boxes[i].width, boxes[j].x + boxes[j].width);
int yy2 = std::min(boxes[i].y + boxes[i].height, boxes[j].y + boxes[j].height);
int w = std::max(0, xx2 - xx1);
int h = std::max(0, yy2 - yy1);
int inter_area = w * h;
int union_area =
boxes[i].width * boxes[i].height + boxes[j].width * boxes[j].height - inter_area;
float iou = (float)inter_area / union_area;
if (iou > nms_thresh) {
is_suppressed[j] = true;
}
}
}
// 移除被抑制的框
std::vector<DetectionResult> keep_boxes;
for (size_t i = 0; i < boxes.size(); ++i) {
if (!is_suppressed[i]) {
keep_boxes.push_back(boxes[i]);
}
}
boxes = keep_boxes;
}

View File

@ -0,0 +1,57 @@
#pragma once
#include <opencv2/opencv.hpp>
#include <string>
#include <vector>
#include "postprocess.h"
#include "rknn_api.h"
struct DetectionResult {
int x;
int y;
int width;
int height;
std::string label;
float confidence;
int class_id;
};
class YoloDetector {
public:
YoloDetector();
~YoloDetector();
int init(const std::string& modelPath);
std::vector<DetectionResult> detect(const cv::Mat& inputImage);
private:
unsigned char* load_model(const char* filename, int* model_size);
// NMS 函数声明
void nms_boxes(std::vector<DetectionResult>& boxes, float nms_thresh);
cv::Mat letterbox(const cv::Mat& src, int target_width, int target_height, float& ratio,
int& dw, int& dh);
private:
rknn_context ctx_;
bool is_initialized_;
// 模型输入输出参数
rknn_input_output_num io_num_; // [新增] 存储输入输出数量
rknn_tensor_attr* output_attrs_; // [新增] 存储输出属性(zp, scale等)
int model_width_;
int model_height_;
int model_channel_;
unsigned char* model_data_;
int model_data_size_;
// 配置参数
const std::vector<std::string> CLASS_NAMES = {"fuel_car", "new_energy_car"};
// 这里的阈值如果 postprocess 内部用了,这里可以作为 fallback 或二次筛选
const float obj_thresh_ = 0.25f;
const float nms_thresh_ = 0.45f;
};