From 9f6472c2197b7e69147d7b9e68df7c22059ae768 Mon Sep 17 00:00:00 2001
From: GuanYuankai <whistle_op@hotmail.com>
Date: Sun, 4 Jan 2026 15:47:02 +0800
Subject: [PATCH] =?UTF-8?q?feat(video=20Service):=20=E5=AE=8C=E6=88=90?=
 =?UTF-8?q?=E7=AC=AC=E4=B8=80=E7=89=88=E7=AE=97=E6=B3=95=E7=9A=84=E9=83=A8?=
 =?UTF-8?q?=E7=BD=B2=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CMakeLists.txt                      |   2 +
 src/videoService/video_pipeline.cpp |  95 ++++----
 src/videoService/video_pipeline.hpp |  14 +-
 src/yoloDetector/postprocess.cc     | 324 ++++++++++++++++++++++++++++
 src/yoloDetector/postprocess.h      |  68 ++++++
 src/yoloDetector/yolo_detector.cpp  | 289 +++++++++++++++++++++++++
 src/yoloDetector/yolo_detector.hpp  |  57 +++++
 7 files changed, 808 insertions(+), 41 deletions(-)
 create mode 100644 src/yoloDetector/postprocess.cc
 create mode 100644 src/yoloDetector/postprocess.h
 create mode 100644 src/yoloDetector/yolo_detector.cpp
 create mode 100644 src/yoloDetector/yolo_detector.hpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3fbc908..61a8ed2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,8 @@ add_library(vehicle_road_lib STATIC
     src/alarm/alarm_service.cc
 	#新视频模块
     src/videoService/video_pipeline.cpp
+	src/yoloDetector/postprocess.cc
+	src/yoloDetector/yolo_detector.cpp
 )
 
 target_include_directories(vehicle_road_lib PUBLIC
diff --git a/src/videoService/video_pipeline.cpp b/src/videoService/video_pipeline.cpp
index cf7c305..fc3740e 100644
--- a/src/videoService/video_pipeline.cpp
+++ b/src/videoService/video_pipeline.cpp
@@ -2,7 +2,19 @@
 
 #include <chrono>
 
-VideoPipeline::VideoPipeline() : running_(false) {}
+VideoPipeline::VideoPipeline() : running_(false) {
+	// [新增] 实例化检测器并加载模型
+	detector_ = std::make_unique<YoloDetector>();
+
+	// 模型路径写死为 models/vehicle_model.rknn
+	int ret = detector_->init("../models/vehicle_model.rknn");
+	if (ret != 0) {
+		spdlog::error("Failed to initialize YoloDetector with model: models/vehicle_model.rknn");
+		// 注意：这里如果初始化失败，后续 detect 会直接返回空结果，建议检查日志
+	} else {
+		spdlog::info("YoloDetector initialized successfully.");
+	}
+}
 
 VideoPipeline::~VideoPipeline() {
 	Stop();
@@ -37,43 +49,33 @@ void VideoPipeline::Stop() {
 	spdlog::info("VideoPipeline Stopped.");
 }
 
-std::vector<DetectionResult> VideoPipeline::mockInference(const cv::Mat& frame) {
-	std::vector<DetectionResult> results;
-	static int dummyX = 100;
-	static int direction = 5;
-
-	// 简单的移动逻辑，模拟每帧的变化
-	dummyX += direction;
-	if (dummyX > frame.cols - 200 || dummyX < 0)
-		direction *= -1;
-
-	DetectionResult res;
-	res.x = dummyX;
-	res.y = 200;
-	res.width = 150;
-	res.height = 300;
-	res.label = "TEST_CLIP";
-	res.confidence = 0.95f;
-
-	results.push_back(res);
-	return results;
-}
+// [删除] mockInference 函数已移除
 
 void VideoPipeline::drawOverlay(cv::Mat& frame, const std::vector<DetectionResult>& results) {
 	for (const auto& res : results) {
-		cv::rectangle(frame, cv::Rect(res.x, res.y, res.width, res.height), cv::Scalar(0, 255, 0),
-					  2);
+		// 根据不同类别使用不同颜色 (示例: 0为红色, 1为绿色)
+		cv::Scalar color = (res.class_id == 0) ? cv::Scalar(0, 0, 255) : cv::Scalar(0, 255, 0);
+
+		cv::rectangle(frame, cv::Rect(res.x, res.y, res.width, res.height), color, 2);
+
 		std::string text = res.label + " " + std::to_string(res.confidence).substr(0, 4);
-		cv::putText(frame, text, cv::Point(res.x, res.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.6,
-					cv::Scalar(0, 255, 0), 2);
+
+		// 简单的防止文字跑出边界的处理
+		int y_pos = res.y - 5;
+		if (y_pos < 10)
+			y_pos = res.y + 15;
+
+		cv::putText(frame, text, cv::Point(res.x, y_pos), cv::FONT_HERSHEY_SIMPLEX, 0.6, color, 2);
 	}
-	cv::putText(frame, "RK3588 H.264 @ 20FPS", cv::Point(20, 50), cv::FONT_HERSHEY_SIMPLEX, 1.0,
-				cv::Scalar(0, 0, 255), 2);
+
+	// 添加水印
+	cv::putText(frame, "RK3588 YOLOv8 Live", cv::Point(20, 50), cv::FONT_HERSHEY_SIMPLEX, 1.0,
+				cv::Scalar(0, 255, 255), 2);
 }
 
 void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, bool isFileSource) {
 	cv::VideoCapture cap;
-	cap.open(inputUrl);	 // 文件路径也是通过 open 打开
+	cap.open(inputUrl);
 
 	if (!cap.isOpened()) {
 		spdlog::error("Failed to open input: {}", inputUrl);
@@ -81,8 +83,8 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo
 		return;
 	}
 
-	const double TARGET_FPS = 20.0;
-	// 计算每帧的目标耗时 (毫秒) -> 1000 / 20 = 50ms
+	// 降低一点目标 FPS，因为推理需要时间。RK3588 NPU 很快，但 25/30 FPS 比较稳妥
+	const double TARGET_FPS = 30.0;
 	const double FRAME_DURATION_MS = 1000.0 / TARGET_FPS;
 
 	int width = cap.get(cv::CAP_PROP_FRAME_WIDTH);
@@ -92,10 +94,11 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo
 				 isFileSource ? "FILE LOOP" : "LIVE STREAM");
 
 	std::stringstream pipeline;
+	// 注意：推流分辨率这里保持和输入一致，或者可以 resize
 	pipeline << "appsrc ! "
 			 << "videoconvert ! "
 			 << "video/x-raw,format=NV12,width=" << width << ",height=" << height
-			 << ",framerate=20/1 ! "
+			 << ",framerate=" << (int)TARGET_FPS << "/1 ! "
 			 << "mpph264enc ! "
 			 << "h264parse ! "
 			 << "rtspclientsink location=" << outputUrl << " protocols=tcp";
@@ -108,9 +111,8 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo
 	}
 
 	cv::Mat frame;
-
+	long frame_count = 0;
 	while (running_) {
-		// 记录循环开始时间
 		auto loop_start = std::chrono::steady_clock::now();
 
 		if (!cap.read(frame)) {
@@ -130,17 +132,34 @@ void VideoPipeline::processLoop(std::string inputUrl, std::string outputUrl, boo
 		if (frame.empty())
 			continue;
 
-		// 1. 算法处理
-		auto results = mockInference(frame);
+		// === 1. 真实 RKNN 推理 ===
+		// detect 内部已经包含了预处理、NPU推理、后处理和坐标还原
+		std::vector<DetectionResult> results = detector_->detect(frame);
 
-		// 2. 绘制叠加
+		frame_count++;
+		// spdlog::info("frame count {}", frame_count);
+		if (!results.empty()) {
+			spdlog::info("Frame {}: Detected {} objects", frame_count, results.size());
+			for (const auto& res : results) {
+				spdlog::info("  -> [Class: {} ({})] Conf: {:.2f} Box: [{}, {}, {}x{}]",
+							 res.class_id, res.label, res.confidence, res.x, res.y, res.width,
+							 res.height);
+			}
+		} else {
+			// 如果连续没有检测到，每隔一秒(20帧)提示一次，确认代码还在跑
+			if (frame_count % 20 == 0) {
+				spdlog::debug("Frame {}: No objects detected.", frame_count);
+			}
+		}
+		// === 2. 绘制叠加 ===
 		drawOverlay(frame, results);
 
-		// 3. 推流
+		// === 3. 推流 ===
 		if (writer.isOpened()) {
 			writer.write(frame);
 		}
 
+		// 文件模式下控制播放速度，直播流则不需要 wait
 		if (isFileSource) {
 			auto loop_end = std::chrono::steady_clock::now();
 			std::chrono::duration<double, std::milli> elapsed = loop_end - loop_start;
diff --git a/src/videoService/video_pipeline.hpp b/src/videoService/video_pipeline.hpp
index f46dc00..7d1bd37 100644
--- a/src/videoService/video_pipeline.hpp
+++ b/src/videoService/video_pipeline.hpp
@@ -1,14 +1,17 @@
 #pragma once
 
 #include <atomic>
+#include <memory>  // [新增] 用于 unique_ptr
 #include <opencv2/opencv.hpp>
 #include <string>
 #include <thread>
 #include <vector>
 
 #include "spdlog/spdlog.h"
+#include "yoloDetector/yolo_detector.hpp"  // [新增] 包含检测器头文件 (同时也包含了 DetectionResult 定义)
 
-// 模拟的检测结果结构体（对应未来的 YOLO 结果）
+// [删除] 移除此处定义的 DetectionResult，直接使用 yolo_detector.hpp 中的定义
+/*
 struct DetectionResult {
 	int x;
 	int y;
@@ -17,6 +20,7 @@ struct DetectionResult {
 	std::string label;
 	float confidence;
 };
+*/
 
 class VideoPipeline {
 public:
@@ -33,12 +37,16 @@ public:
 private:
 	void processLoop(std::string inputUrl, std::string outputUrl, bool isFileSource);
 
-	// 占位算法函数：模拟推理
-	std::vector<DetectionResult> mockInference(const cv::Mat& frame);
+	// [删除] 移除模拟推理函数
+	// std::vector<DetectionResult> mockInference(const cv::Mat& frame);
 
 	// 绘图函数
 	void drawOverlay(cv::Mat& frame, const std::vector<DetectionResult>& results);
 
+private:
 	std::atomic<bool> running_;
 	std::thread processingThread_;
+
+	// [新增] YOLO 检测器实例
+	std::unique_ptr<YoloDetector> detector_;
 };
diff --git a/src/yoloDetector/postprocess.cc b/src/yoloDetector/postprocess.cc
new file mode 100644
index 0000000..8bd5c24
--- /dev/null
+++ b/src/yoloDetector/postprocess.cc
@@ -0,0 +1,324 @@
+#include "postprocess.h"
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <set>
+#include <vector>
+
+static const char *labels[] = {
+    "fuel_car",
+    "new_energy_car"
+};
+
+inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; }
+
+static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1, float ymax1)
+{
+    float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
+    float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
+    float i = w * h;
+    float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
+    return u <= 0.f ? 0.f : (i / u);
+}
+
+static int nms(int validCount, std::vector<float> &outputLocations, std::vector<int> classIds, std::vector<int> &order, int filterId, float threshold)
+{
+    for (int i = 0; i < validCount; ++i)
+    {
+        int n = order[i];
+        if (n == -1 || classIds[n] != filterId) continue;
+        for (int j = i + 1; j < validCount; ++j)
+        {
+            int m = order[j];
+            if (m == -1 || classIds[m] != filterId) continue;
+            float xmin0 = outputLocations[n * 4 + 0];
+            float ymin0 = outputLocations[n * 4 + 1];
+            float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
+            float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
+
+            float xmin1 = outputLocations[m * 4 + 0];
+            float ymin1 = outputLocations[m * 4 + 1];
+            float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
+            float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
+
+            float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
+
+            if (iou > threshold) order[j] = -1;
+        }
+    }
+    return 0;
+}
+
+static int quick_sort_indice_inverse(std::vector<float> &input, int left, int right, std::vector<int> &indices)
+{
+    float key;
+    int key_index;
+    int low = left;
+    int high = right;
+    if (left < right)
+    {
+        key_index = indices[left];
+        key = input[left];
+        while (low < high)
+        {
+            while (low < high && input[high] <= key) high--;
+            input[low] = input[high];
+            indices[low] = indices[high];
+            while (low < high && input[low] >= key) low++;
+            input[high] = input[low];
+            indices[high] = indices[low];
+        }
+        input[low] = key;
+        indices[low] = key_index;
+        quick_sort_indice_inverse(input, left, low - 1, indices);
+        quick_sort_indice_inverse(input, low + 1, right, indices);
+    }
+    return low;
+}
+
+static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); }
+static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); }
+inline static int32_t __clip(float val, float min, float max) { float f = val <= min ? min : (val >= max ? max : val); return f; }
+static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale) { float dst_val = (f32 / scale) + zp; int8_t res = (int8_t)__clip(dst_val, -128, 127); return res; }
+static uint8_t qnt_f32_to_affine_u8(float f32, int32_t zp, float scale) { float dst_val = (f32 / scale) + zp; uint8_t res = (uint8_t)__clip(dst_val, 0, 255); return res; }
+static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
+static float deqnt_affine_u8_to_f32(uint8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
+
+static void compute_dfl(float* tensor, int dfl_len, float* box){
+    for (int b=0; b<4; b++){
+        float exp_t[dfl_len];
+        float exp_sum=0;
+        float acc_sum=0;
+        for (int i=0; i< dfl_len; i++){
+            exp_t[i] = exp(tensor[i+b*dfl_len]);
+            exp_sum += exp_t[i];
+        }
+        for (int i=0; i< dfl_len; i++){
+            acc_sum += exp_t[i]/exp_sum *i;
+        }
+        box[b] = acc_sum;
+    }
+}
+
+// process_u8, process_i8, process_fp32 保持原样，它们是核心解码逻辑
+static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
+                      int8_t *score_tensor, int32_t score_zp, float score_scale,
+                      int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale,
+                      int grid_h, int grid_w, int stride, int dfl_len,
+                      std::vector<float> &boxes, std::vector<float> &objProbs, std::vector<int> &classId, float threshold)
+{
+    int validCount = 0;
+    int grid_len = grid_h * grid_w;
+    int8_t score_thres_i8 = qnt_f32_to_affine(threshold, score_zp, score_scale);
+    int8_t score_sum_thres_i8 = qnt_f32_to_affine(threshold, score_sum_zp, score_sum_scale);
+
+    for (int i = 0; i < grid_h; i++)
+    {
+        for (int j = 0; j < grid_w; j++)
+        {
+            int offset = i* grid_w + j;
+            // score sum 过滤
+            if (score_sum_tensor != nullptr){
+                if (score_sum_tensor[offset] < score_sum_thres_i8) continue;
+            }
+
+            int8_t max_score = -score_zp;
+            int max_class_id = -1;
+            for (int c= 0; c< OBJ_CLASS_NUM; c++){
+                if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score))
+                {
+                    max_score = score_tensor[offset];
+                    max_class_id = c;
+                }
+                offset += grid_len;
+            }
+
+            if (max_score > score_thres_i8){
+                offset = i* grid_w + j;
+                float box[4];
+                float before_dfl[dfl_len*4];
+                for (int k=0; k< dfl_len*4; k++){
+                    before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale);
+                    offset += grid_len;
+                }
+                compute_dfl(before_dfl, dfl_len, box);
+                float x1 = (-box[0] + j + 0.5)*stride;
+                float y1 = (-box[1] + i + 0.5)*stride;
+                float w = (box[2] + j + 0.5)*stride - x1 + x1; // simplify w calc
+                w = (box[2] + j + 0.5)*stride + (box[0] - j - 0.5)*stride; // Correct logic from source: w = x2 - x1
+                w = (box[2] + box[0]) * stride; // Actual width logic simplified
+                float h = (box[3] + box[1]) * stride; // Actual height logic simplified
+
+                // Re-align with source strict logic to avoid errors
+                float x2 = (box[2] + j + 0.5)*stride;
+                float y2 = (box[3] + i + 0.5)*stride;
+                w = x2 - x1;
+                h = y2 - y1;
+
+                boxes.push_back(x1); boxes.push_back(y1); boxes.push_back(w); boxes.push_back(h);
+                objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale));
+                classId.push_back(max_class_id);
+                validCount ++;
+            }
+        }
+    }
+    return validCount;
+}
+
+static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor,
+                        int grid_h, int grid_w, int stride, int dfl_len,
+                        std::vector<float> &boxes, std::vector<float> &objProbs, std::vector<int> &classId, float threshold)
+{
+    int validCount = 0;
+    int grid_len = grid_h * grid_w;
+    for (int i = 0; i < grid_h; i++)
+    {
+        for (int j = 0; j < grid_w; j++)
+        {
+            int offset = i* grid_w + j;
+            if (score_sum_tensor != nullptr){
+                if (score_sum_tensor[offset] < threshold) continue;
+            }
+            float max_score = 0;
+            int max_class_id = -1;
+            for (int c= 0; c< OBJ_CLASS_NUM; c++){
+                if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score))
+                {
+                    max_score = score_tensor[offset];
+                    max_class_id = c;
+                }
+                offset += grid_len;
+            }
+            if (max_score> threshold){
+                offset = i* grid_w + j;
+                float box[4];
+                float before_dfl[dfl_len*4];
+                for (int k=0; k< dfl_len*4; k++){
+                    before_dfl[k] = box_tensor[offset];
+                    offset += grid_len;
+                }
+                compute_dfl(before_dfl, dfl_len, box);
+                float x1 = (-box[0] + j + 0.5)*stride;
+                float y1 = (-box[1] + i + 0.5)*stride;
+                float x2 = (box[2] + j + 0.5)*stride;
+                float y2 = (box[3] + i + 0.5)*stride;
+                float w = x2 - x1;
+                float h = y2 - y1;
+                boxes.push_back(x1); boxes.push_back(y1); boxes.push_back(w); boxes.push_back(h);
+                objProbs.push_back(max_score);
+                classId.push_back(max_class_id);
+                validCount ++;
+            }
+        }
+    }
+    return validCount;
+}
+
+// 主函数：post_process
+int post_process(rknn_app_context_t *app_ctx, void *outputs, letterbox_t *letter_box, float conf_threshold, float nms_threshold, object_detect_result_list *od_results)
+{
+    rknn_output *_outputs = (rknn_output *)outputs;
+    std::vector<float> filterBoxes;
+    std::vector<float> objProbs;
+    std::vector<int> classId;
+    int validCount = 0;
+    int stride = 0;
+    int grid_h = 0;
+    int grid_w = 0;
+    int model_in_w = app_ctx->model_width;
+    int model_in_h = app_ctx->model_height;
+
+    memset(od_results, 0, sizeof(object_detect_result_list));
+
+    // 适配 YOLOv8 的 dfl_len
+    int dfl_len = app_ctx->output_attrs[0].dims[1] / 4;
+    int output_per_branch = app_ctx->io_num.n_output / 3;
+
+    for (int i = 0; i < 3; i++)
+    {
+        void *score_sum = nullptr;
+        int32_t score_sum_zp = 0;
+        float score_sum_scale = 1.0;
+        if (output_per_branch == 3){
+            score_sum = _outputs[i*output_per_branch + 2].buf;
+            score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp;
+            score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale;
+        }
+        int box_idx = i*output_per_branch;
+        int score_idx = i*output_per_branch + 1;
+
+        grid_h = app_ctx->output_attrs[box_idx].dims[2];
+        grid_w = app_ctx->output_attrs[box_idx].dims[3];
+        stride = model_in_h / grid_h;
+
+        if (app_ctx->is_quant)
+        {
+            // RK3588 通常使用 I8 量化
+            validCount += process_i8((int8_t *)_outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale,
+                                     (int8_t *)_outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale,
+                                     (int8_t *)score_sum, score_sum_zp, score_sum_scale,
+                                     grid_h, grid_w, stride, dfl_len,
+                                     filterBoxes, objProbs, classId, conf_threshold);
+        }
+        else
+        {
+            validCount += process_fp32((float *)_outputs[box_idx].buf, (float *)_outputs[score_idx].buf, (float *)score_sum,
+                                       grid_h, grid_w, stride, dfl_len,
+                                       filterBoxes, objProbs, classId, conf_threshold);
+        }
+    }
+
+    if (validCount <= 0) return 0;
+
+    // 排序
+    std::vector<int> indexArray;
+    for (int i = 0; i < validCount; ++i) indexArray.push_back(i);
+    quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
+
+    // NMS
+    std::set<int> class_set(std::begin(classId), std::end(classId));
+    for (auto c : class_set) nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold);
+
+    int last_count = 0;
+    od_results->count = 0;
+
+    // 坐标还原 (使用 letterbox 参数)
+    for (int i = 0; i < validCount; ++i)
+    {
+        if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE) continue;
+        int n = indexArray[i];
+
+        float x1 = filterBoxes[n * 4 + 0] - letter_box->x_pad;
+        float y1 = filterBoxes[n * 4 + 1] - letter_box->y_pad;
+        float x2 = x1 + filterBoxes[n * 4 + 2];
+        float y2 = y1 + filterBoxes[n * 4 + 3];
+        int id = classId[n];
+        float obj_conf = objProbs[i];
+
+        od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w) / letter_box->scale);
+        od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h) / letter_box->scale);
+        od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / letter_box->scale);
+        od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / letter_box->scale);
+        od_results->results[last_count].prop = obj_conf;
+        od_results->results[last_count].cls_id = id;
+        last_count++;
+    }
+    od_results->count = last_count;
+    return 0;
+}
+
+// 简化版 init
+int init_post_process() { return 0; }
+
+// 简化版 deinit
+void deinit_post_process() {}
+
+// 简化版 name getter
+const char *coco_cls_to_name(int cls_id)
+{
+    if (cls_id >= OBJ_CLASS_NUM) return "null";
+    return labels[cls_id];
+}
diff --git a/src/yoloDetector/postprocess.h b/src/yoloDetector/postprocess.h
new file mode 100644
index 0000000..1c12458
--- /dev/null
+++ b/src/yoloDetector/postprocess.h
@@ -0,0 +1,68 @@
+#ifndef _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
+#define _RKNN_YOLOV8_DEMO_POSTPROCESS_H_
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "rknn_api.h"
+
+// [修改 1] 移除 image_utils.h 和 common.h
+// #include "common.h"
+// #include "image_utils.h"
+
+#define OBJ_NAME_MAX_SIZE 64
+#define OBJ_NUMB_MAX_SIZE 128
+
+// [修改 2] 按照你的模型，将类别数改为 2
+#define OBJ_CLASS_NUM 2
+
+// [修改 3] 默认阈值 (实际运行时会被传入的参数覆盖)
+#define NMS_THRESH 0.45
+#define BOX_THRESH 0.25
+
+// [新增] 手动定义原本在 image_utils.h 中的结构体
+typedef struct {
+	int left;
+	int top;
+	int right;
+	int bottom;
+} image_rect_t;
+
+typedef struct {
+	int x_pad;
+	int y_pad;
+	float scale;
+} letterbox_t;
+
+// [新增] 手动定义 postprocess.cc 依赖的上下文结构体
+// 我们不需要 Demo 中那么复杂的结构，只保留 post_process 函数用到的字段
+typedef struct {
+	rknn_input_output_num io_num;
+	rknn_tensor_attr* output_attrs;
+	int model_width;
+	int model_height;
+	bool is_quant;	// 是否为量化模型
+} rknn_app_context_t;
+
+// 结果结构体保持不变
+typedef struct {
+	image_rect_t box;
+	float prop;
+	int cls_id;
+} object_detect_result;
+
+typedef struct {
+	int id;
+	int count;
+	object_detect_result results[OBJ_NUMB_MAX_SIZE];
+} object_detect_result_list;
+
+// 函数声明
+int init_post_process();
+void deinit_post_process();
+const char* coco_cls_to_name(int cls_id);  // 修改返回值类型为 const char*
+int post_process(rknn_app_context_t* app_ctx, void* outputs, letterbox_t* letter_box,
+				 float conf_threshold, float nms_threshold, object_detect_result_list* od_results);
+
+#endif	//_RKNN_YOLOV8_DEMO_POSTPROCESS_H_
diff --git a/src/yoloDetector/yolo_detector.cpp b/src/yoloDetector/yolo_detector.cpp
new file mode 100644
index 0000000..45979e6
--- /dev/null
+++ b/src/yoloDetector/yolo_detector.cpp
@@ -0,0 +1,289 @@
+#include "yolo_detector.hpp"
+
+#include <algorithm>
+#include <cstring>	// for memset
+#include <fstream>
+#include <iostream>
+
+#include "spdlog/spdlog.h"
+
+YoloDetector::YoloDetector()
+	: ctx_(0), is_initialized_(false), model_data_(nullptr), output_attrs_(nullptr) {
+	memset(&io_num_, 0, sizeof(io_num_));
+}
+
+YoloDetector::~YoloDetector() {
+	if (ctx_ > 0) {
+		rknn_destroy(ctx_);
+	}
+	if (model_data_) {
+		free(model_data_);
+	}
+	if (output_attrs_) {
+		free(output_attrs_);
+	}
+}
+
+unsigned char* YoloDetector::load_model(const char* filename, int* model_size) {
+	FILE* fp = fopen(filename, "rb");
+	if (fp == nullptr) {
+		spdlog::error("Open model file failed: {}", filename);
+		return nullptr;
+	}
+	fseek(fp, 0, SEEK_END);
+	int size = ftell(fp);
+	fseek(fp, 0, SEEK_SET);
+	unsigned char* data = (unsigned char*)malloc(size);
+	fread(data, 1, size, fp);
+	fclose(fp);
+	*model_size = size;
+	return data;
+}
+
+int YoloDetector::init(const std::string& modelPath) {
+	spdlog::info("Loading RKNN model: {}", modelPath);
+
+	// 1. 读取模型文件
+	model_data_ = load_model(modelPath.c_str(), &model_data_size_);
+	if (!model_data_)
+		return -1;
+
+	// 2. 初始化 RKNN
+	int ret = rknn_init(&ctx_, model_data_, model_data_size_, 0, NULL);
+	if (ret < 0) {
+		spdlog::error("rknn_init failed! ret={}", ret);
+		return -1;
+	}
+
+	// 3. 查询模型输入输出信息
+	ret = rknn_query(ctx_, RKNN_QUERY_IN_OUT_NUM, &io_num_, sizeof(io_num_));
+	if (ret < 0)
+		return -1;
+
+	// 获取输入属性
+	rknn_tensor_attr input_attrs[io_num_.n_input];
+	memset(input_attrs, 0, sizeof(input_attrs));
+	for (int i = 0; i < io_num_.n_input; i++) {
+		input_attrs[i].index = i;
+		rknn_query(ctx_, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
+	}
+
+	// [新增] 获取输出属性 (关键：用于后处理中的量化参数 scale/zp)
+	output_attrs_ = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num_.n_output);
+	memset(output_attrs_, 0, sizeof(rknn_tensor_attr) * io_num_.n_output);
+	for (int i = 0; i < io_num_.n_output; i++) {
+		output_attrs_[i].index = i;
+		rknn_query(ctx_, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs_[i]), sizeof(rknn_tensor_attr));
+	}
+
+	// 假设只有一个输入 (图像)
+	model_width_ = input_attrs[0].dims[2];	  // width
+	model_height_ = input_attrs[0].dims[1];	  // height
+	model_channel_ = input_attrs[0].dims[0];  // channel
+
+	// 兼容部分 NCHW 格式
+	if (model_width_ == 3) {
+		model_width_ = input_attrs[0].dims[1];
+		model_height_ = input_attrs[0].dims[2];
+	}
+
+	spdlog::info("Model Input Shape: {}x{}x{}", model_width_, model_height_, model_channel_);
+	is_initialized_ = true;
+	return 0;
+}
+
+cv::Mat YoloDetector::letterbox(const cv::Mat& src, int target_width, int target_height,
+								float& ratio, int& dw, int& dh) {
+	int w = src.cols;
+	int h = src.rows;
+
+	float scale_w = (float)target_width / w;
+	float scale_h = (float)target_height / h;
+	ratio = std::min(scale_w, scale_h);
+
+	int new_unpad_w = (int)(w * ratio);
+	int new_unpad_h = (int)(h * ratio);
+
+	dw = (target_width - new_unpad_w) / 2;
+	dh = (target_height - new_unpad_h) / 2;
+
+	cv::Mat resized;
+	cv::resize(src, resized, cv::Size(new_unpad_w, new_unpad_h));
+
+	cv::Mat padded;
+	cv::copyMakeBorder(resized, padded, dh, target_height - new_unpad_h - dh, dw,
+					   target_width - new_unpad_w - dw, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
+
+	return padded;
+}
+
+std::vector<DetectionResult> YoloDetector::detect(const cv::Mat& inputImage) {
+	std::vector<DetectionResult> results;
+	if (!is_initialized_ || inputImage.empty())
+		return results;
+
+	// === 1. 预处理 ===
+	float ratio;
+	int dw, dh;
+	cv::Mat input_rgb;
+	cv::cvtColor(inputImage, input_rgb, cv::COLOR_BGR2RGB);
+
+	cv::Mat img = letterbox(input_rgb, model_width_, model_height_, ratio, dw, dh);
+
+	// === 2. 设置输入 ===
+	rknn_input inputs[1];
+	memset(inputs, 0, sizeof(inputs));
+	inputs[0].index = 0;
+	inputs[0].type = RKNN_TENSOR_UINT8;
+	inputs[0].size = img.cols * img.rows * img.channels();
+	inputs[0].fmt = RKNN_TENSOR_NHWC;
+	inputs[0].buf = img.data;
+
+	rknn_inputs_set(ctx_, io_num_.n_input, inputs);
+
+	// === 3. 执行推理 ===
+	int ret = rknn_run(ctx_, nullptr);
+	if (ret < 0) {
+		spdlog::error("rknn_run failed: {}", ret);
+		return results;
+	}
+
+	// === 4. 获取输出 ===
+	// 使用 vector 或 new 替代变长数组(VLA)，更符合 C++ 标准
+	std::vector<rknn_output> outputs(io_num_.n_output);
+	memset(outputs.data(), 0, outputs.size() * sizeof(rknn_output));
+
+	// YoloV8 输出通常需要 float 处理 (want_float=1)，
+	// 但 postprocess.cc 中的 process_i8 是针对 int8 优化的。
+	// 如果你的模型是 int8 量化且 output_attrs 正常，want_float=0 也可以，
+	// 但 post_process 函数里强转为了 float* 处理 fp32 分支，或者 int8* 处理 i8 分支。
+	// 为了保险起见，如果 postprocess 逻辑是处理原始输出的，这里通常不需要 want_float=1，除非是 fp32
+	// 模型。
+	// **注意**：你的 postprocess.cc 能够处理量化(process_i8)和非量化(process_fp32)数据。
+	// 通常建议让 rknn 直接输出原始数据，由后处理决定如何解析。
+	for (int i = 0; i < io_num_.n_output; i++) {
+		outputs[i].want_float = 0;	// 0: 输出原始量化数据(int8), 1: 驱动转为float
+		outputs[i].index = i;
+	}
+
+	ret = rknn_outputs_get(ctx_, io_num_.n_output, outputs.data(), NULL);
+	if (ret < 0) {
+		spdlog::error("rknn_outputs_get failed: {}", ret);
+		return results;
+	}
+
+	// === 5. 后处理 ===
+	rknn_app_context_t app_ctx;
+	app_ctx.io_num = io_num_;
+	app_ctx.output_attrs = output_attrs_;  // 现在这个变量已在 init 中初始化
+	app_ctx.model_width = model_width_;
+	app_ctx.model_height = model_height_;
+	app_ctx.is_quant = true;  // 假设模型是量化的
+
+	letterbox_t letter_box_param;
+	letter_box_param.x_pad = dw;
+	letter_box_param.y_pad = dh;
+	letter_box_param.scale = ratio;
+
+	object_detect_result_list od_results;
+	// spdlog::info("--- Debugging Raw Output ---");
+	// for (int i = 0; i < io_num_.n_output; ++i) {
+	// 	int8_t* buffer = (int8_t*)outputs[i].buf;
+	// 	int size = outputs[i].size;
+	// 	int32_t zp = output_attrs_[i].zp;
+	// 	float scale = output_attrs_[i].scale;
+
+	// 	// 统计这一层输出的最大值、最小值和平均值
+	// 	float max_val = -9999.0f;
+	// 	float min_val = 9999.0f;
+	// 	int8_t raw_max = -128;
+
+	// 	// 为了性能只采样前 1000 个数值或全部
+	// 	int check_count = (size > 1000) ? 1000 : size;
+
+	// 	for (int j = 0; j < check_count; j++) {
+	// 		// 手动反量化： float = (int - zp) * scale
+	// 		float val = (buffer[j] - zp) * scale;
+	// 		if (val > max_val)
+	// 			max_val = val;
+	// 		if (val < min_val)
+	// 			min_val = val;
+	// 		if (buffer[j] > raw_max)
+	// 			raw_max = buffer[j];
+	// 	}
+
+	// 	spdlog::info("Output[{}]: Size={}, ZP={}, Scale={}", i, size, zp, scale);
+	// 	spdlog::info("  -> Raw int8 max: {}, Dequantized float range: [{:.4f}, {:.4f}]", raw_max,
+	// 				 min_val, max_val);
+	// }
+	// spdlog::info("----------------------------");
+	// === [调试代码结束] ===
+	// 调用官方/Demo提供的后处理
+	post_process(&app_ctx, outputs.data(), &letter_box_param, obj_thresh_, nms_thresh_,
+				 &od_results);
+
+	// 转换结果
+	for (int i = 0; i < od_results.count; i++) {
+		object_detect_result* det = &(od_results.results[i]);
+		DetectionResult res;
+		res.x = det->box.left;
+		res.y = det->box.top;
+		res.width = det->box.right - det->box.left;
+		res.height = det->box.bottom - det->box.top;
+		res.confidence = det->prop;
+		res.class_id = det->cls_id;
+		res.label = coco_cls_to_name(det->cls_id);
+
+		results.push_back(res);	 // [修复] 变量名从 final_results 改为 results
+	}
+
+	// === 6. 释放 RKNN 输出内存 (必须！) ===
+	rknn_outputs_release(ctx_, io_num_.n_output, outputs.data());
+
+	return results;
+}
+
+// [修复] 将 nms_boxes 移出 detect 函数，单独实现
+// 注意：post_process 内部通常已经包含了 NMS，这里如果是为了二次处理可以保留，
+// 否则可以直接使用 post_process 的输出。
+void YoloDetector::nms_boxes(std::vector<DetectionResult>& boxes, float nms_thresh) {
+	std::sort(boxes.begin(), boxes.end(), [](const DetectionResult& a, const DetectionResult& b) {
+		return a.confidence > b.confidence;
+	});
+
+	std::vector<bool> is_suppressed(boxes.size(), false);
+	for (size_t i = 0; i < boxes.size(); ++i) {
+		if (is_suppressed[i])
+			continue;
+		for (size_t j = i + 1; j < boxes.size(); ++j) {
+			if (is_suppressed[j])
+				continue;
+
+			// 计算 IoU
+			int xx1 = std::max(boxes[i].x, boxes[j].x);
+			int yy1 = std::max(boxes[i].y, boxes[j].y);
+			int xx2 = std::min(boxes[i].x + boxes[i].width, boxes[j].x + boxes[j].width);
+			int yy2 = std::min(boxes[i].y + boxes[i].height, boxes[j].y + boxes[j].height);
+
+			int w = std::max(0, xx2 - xx1);
+			int h = std::max(0, yy2 - yy1);
+			int inter_area = w * h;
+			int union_area =
+				boxes[i].width * boxes[i].height + boxes[j].width * boxes[j].height - inter_area;
+
+			float iou = (float)inter_area / union_area;
+			if (iou > nms_thresh) {
+				is_suppressed[j] = true;
+			}
+		}
+	}
+
+	// 移除被抑制的框
+	std::vector<DetectionResult> keep_boxes;
+	for (size_t i = 0; i < boxes.size(); ++i) {
+		if (!is_suppressed[i]) {
+			keep_boxes.push_back(boxes[i]);
+		}
+	}
+	boxes = keep_boxes;
+}
diff --git a/src/yoloDetector/yolo_detector.hpp b/src/yoloDetector/yolo_detector.hpp
new file mode 100644
index 0000000..8d66193
--- /dev/null
+++ b/src/yoloDetector/yolo_detector.hpp
@@ -0,0 +1,57 @@
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include <string>
+#include <vector>
+
+#include "postprocess.h"
+#include "rknn_api.h"
+
+struct DetectionResult {
+	int x;
+	int y;
+	int width;
+	int height;
+	std::string label;
+	float confidence;
+	int class_id;
+};
+
+class YoloDetector {
+public:
+	YoloDetector();
+	~YoloDetector();
+
+	int init(const std::string& modelPath);
+	std::vector<DetectionResult> detect(const cv::Mat& inputImage);
+
+private:
+	unsigned char* load_model(const char* filename, int* model_size);
+
+	// NMS 函数声明
+	void nms_boxes(std::vector<DetectionResult>& boxes, float nms_thresh);
+
+	cv::Mat letterbox(const cv::Mat& src, int target_width, int target_height, float& ratio,
+					  int& dw, int& dh);
+
+private:
+	rknn_context ctx_;
+	bool is_initialized_;
+
+	// 模型输入输出参数
+	rknn_input_output_num io_num_;	  // [新增] 存储输入输出数量
+	rknn_tensor_attr* output_attrs_;  // [新增] 存储输出属性(zp, scale等)
+
+	int model_width_;
+	int model_height_;
+	int model_channel_;
+
+	unsigned char* model_data_;
+	int model_data_size_;
+
+	// 配置参数
+	const std::vector<std::string> CLASS_NAMES = {"fuel_car", "new_energy_car"};
+	// 这里的阈值如果 postprocess 内部用了，这里可以作为 fallback 或二次筛选
+	const float obj_thresh_ = 0.25f;
+	const float nms_thresh_ = 0.45f;
+};