From f9a48f07b7708707d0cb79277e9f83874fe1ae52 Mon Sep 17 00:00:00 2001
From: GuanYuankai <whistle_op@hotmail.com>
Date: Mon, 27 Oct 2025 07:23:35 +0000
Subject: [PATCH] =?UTF-8?q?=E9=99=8D=E4=BD=8E=E8=A7=86=E9=A2=91=E9=87=87?=
 =?UTF-8?q?=E9=9B=86=E7=9A=84CPU=E5=8D=A0=E6=9C=89=E7=8E=87?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/rknn/video_service.cc | 157 ++++++++++++++++++++++++++------------
 1 file changed, 110 insertions(+), 47 deletions(-)
diff --git a/src/rknn/video_service.cc b/src/rknn/video_service.cc
index 53056d0..baf9a2a 100644
--- a/src/rknn/video_service.cc
+++ b/src/rknn/video_service.cc
@@ -53,13 +53,24 @@ bool VideoService::start() {
     }
     printf("rknnPool init success.\n");
 
-    setenv("OPENCV_FFMPEG_CAPTURE_OPTIONS", "rtsp_transport;tcp", 1);
-    printf("Set RTSP transport protocol to TCP\n");
+    // setenv("OPENCV_FFMPEG_CAPTURE_OPTIONS", "rtsp_transport;tcp", 1);
+    // printf("Set RTSP transport protocol to TCP\n");
+    std::string gst_input_pipeline = 
+        "rtspsrc location=" + input_url_ + " latency=0 protocols=tcp ! "
+        "rtph265depay ! "
+        "h265parse ! "
+        "mppvideodec format=16 ! "     
+        "video/x-raw,format=BGR ! " // <-- 关键：直接请求 mppvideodec 输出 BGR 格式
+        "appsink";
 
-    capture_.open(input_url_, cv::CAP_FFMPEG);
+    spdlog::info("Try to Open RTSP Stream");
+    capture_.open(gst_input_pipeline, cv::CAP_GSTREAMER);
+    
     if (!capture_.isOpened()) {
         printf("Error: Could not open RTSP stream: %s\n", input_url_.c_str());
         return false;
+    }else{
+        spdlog::info("RTSP Stream Opened!");
     }
     
     frame_width_ = static_cast<int>(capture_.get(cv::CAP_PROP_FRAME_WIDTH));
@@ -70,13 +81,13 @@ bool VideoService::start() {
     printf("RTSP stream opened successfully! (%dx%d @ %.2f FPS)\n", frame_width_, frame_height_, frame_fps_);
 
 
-    std::string gst_pipeline = 
+    std::string gst_pipeline =
     "appsrc ! "
-    "queue max-size-buffers=2 leaky=downstream ! " 
-    "video/x-raw,format=BGR ! " 
-    "videoconvert ! "
-    "video/x-raw,format=NV12 ! "
-    "mpph265enc gop=25 rc-mode=fixqp qp-init=26 ! " 
+    "queue max-size-buffers=2 leaky=downstream ! "
+    "video/x-raw,format=BGR ! " // OpenCV VideoWriter 输入 BGR 数据
+    "videoconvert ! "           // <-- 使用 CPU 将 BGR 转换为 NV12
+    "video/x-raw,format=NV12 ! " // 明确指定 videoconvert 输出 NV12
+    "mpph265enc gop=25 rc-mode=fixqp qp-init=26 ! " // 硬件编码器接收 NV12 数据
     "h265parse ! "
     "rtspclientsink location=" + output_rtsp_url_ + " latency=0 protocols=tcp";
 
@@ -131,18 +142,48 @@ void VideoService::stop() {
 
 void VideoService::update_tracker(detect_result_group_t &detect_result_group, const cv::Size& frame_size)
 {
-    if (intrusion_zone_.width == 0 || intrusion_zone_.height == 0) {
+    // 如果入侵区域无效，则设置为帧中心的 1/4 区域 (基于原始帧大小)
+    if (intrusion_zone_.width <= 0 || intrusion_zone_.height <= 0) {
         intrusion_zone_ = cv::Rect(frame_size.width / 4, frame_size.height / 4, frame_size.width / 2, frame_size.height / 2);
     }
 
-    std::vector<cv::Rect> current_detections;
+    // --- 缩放比例计算 ---
+    // !!! 重要: 请确保这里的 640.0f 与您 OpenCV resize 时的目标尺寸一致 !!!
+    const float model_input_width = 640.0f;
+    const float model_input_height = 640.0f;
+    float scale_x = (float)frame_size.width / model_input_width;
+    float scale_y = (float)frame_size.height / model_input_height;
+    // --- 结束缩放比例计算 ---
+
+    std::vector<cv::Rect> current_detections; // 存储当前帧检测到的、已缩放到原始尺寸的框
     for (int i = 0; i < detect_result_group.count; i++) {
         detect_result_t *det_result = &(detect_result_group.results[i]);
+        // 只处理 "person" 类别
         if (strcmp(det_result->name, "person") == 0) {
-            current_detections.push_back(cv::Rect(
-                det_result->box.left, det_result->box.top,
-                det_result->box.right - det_result->box.left,
-                det_result->box.bottom - det_result->box.top));
+            // --- 将模型输出坐标按比例缩放回原始帧坐标 ---
+            int original_left = static_cast<int>(det_result->box.left * scale_x);
+            int original_top = static_cast<int>(det_result->box.top * scale_y);
+            int original_right = static_cast<int>(det_result->box.right * scale_x);
+            int original_bottom = static_cast<int>(det_result->box.bottom * scale_y);
+
+            // --- 边界检查与修正 ---
+            // 确保坐标不会超出原始图像边界
+            original_left = std::max(0, std::min(original_left, frame_size.width - 1));
+            original_top = std::max(0, std::min(original_top, frame_size.height - 1));
+            // 确保 right >= left, bottom >= top
+            original_right = std::max(original_left, std::min(original_right, frame_size.width));
+            original_bottom = std::max(original_top, std::min(original_bottom, frame_size.height));
+            // --- 结束边界检查 ---
+
+            // 只有当框有效时（宽度和高度大于0）才添加到检测列表
+            if (original_right > original_left && original_bottom > original_top) {
+                current_detections.push_back(cv::Rect(
+                    original_left, original_top,
+                    original_right - original_left,  // width
+                    original_bottom - original_top   // height
+                ));
+            }
+            // --- 结束坐标缩放 ---
         }
     }
 
@@ -150,65 +191,86 @@ void VideoService::update_tracker(detect_result_group_t &detect_result_group, co
         it->second.frames_unseen++;
     }
 
-    for (const auto& det_box : current_detections) {
-        bool is_matched = false;
-        int best_match_id = -1;
-        double max_iou = 0.3; // IoU阈值
+    std::vector<int> matched_track_ids; // 记录本帧已匹配到的跟踪ID，防止一个跟踪ID匹配多个检测框
 
-        for (auto const& [id, person] : this->tracked_persons_) {
+    // 尝试将当前检测框与已跟踪目标进行匹配
+    for (const auto& det_box : current_detections) {
+        int best_match_id = -1;
+        double max_iou_threshold = 0.3; // IoU 匹配阈值
+        double best_iou = 0.0;          // 用于寻找最佳匹配
+
+        for (auto const& [id, person] : tracked_persons_) {
+            // 检查该跟踪ID是否已在本帧匹配过
+            bool already_matched = false;
+            for(int matched_id : matched_track_ids) {
+                if (id == matched_id) {
+                    already_matched = true;
+                    break;
+                }
+            }
+            if (already_matched) {
+                continue; // 跳过已匹配的跟踪目标
+            }
+
+            // 计算 IoU (现在 det_box 和 person.box 都是原始坐标系)
             double iou = (double)(det_box & person.box).area() / (double)(det_box | person.box).area();
-            if (iou > max_iou) {
-                max_iou = iou;
+            if (iou > best_iou && iou >= max_iou_threshold) { // 必须大于等于阈值才能成为候选
+                best_iou = iou;
                 best_match_id = id;
             }
         }
-        
+
         if (best_match_id != -1) {
+            // 找到匹配，更新跟踪信息
             tracked_persons_[best_match_id].box = det_box;
             tracked_persons_[best_match_id].frames_unseen = 0;
-            is_matched = true;
+            matched_track_ids.push_back(best_match_id); // 记录已匹配
         } else {
+            // 没有找到匹配，创建新的跟踪目标
             TrackedPerson new_person;
-            new_person.id = this->next_track_id_++; // 使用成员变量
+            new_person.id = next_track_id_++; // 分配新ID
             new_person.box = det_box;
             new_person.entry_time = 0;
             new_person.is_in_zone = false;
             new_person.alarm_triggered = false;
             new_person.frames_unseen = 0;
-            tracked_persons_[new_person.id] = new_person; // 使用成员变量
+            tracked_persons_[new_person.id] = new_person;
         }
     }
 
+    // 更新每个跟踪目标的区域状态和报警状态
     double current_time = get_current_time_seconds();
     for (auto it = tracked_persons_.begin(); it != tracked_persons_.end(); ++it) {
         TrackedPerson& person = it->second;
-        // 使用成员变量
-        bool currently_in_zone = (this->intrusion_zone_ & person.box).area() > 0; 
+        // 检查人与入侵区域的交集 (现在都是原始坐标系)
+        bool currently_in_zone = (intrusion_zone_ & person.box).area() > 0;
 
         if (currently_in_zone) {
-            if (!person.is_in_zone) {
+            if (!person.is_in_zone) { // 刚进入区域
                 person.is_in_zone = true;
                 person.entry_time = current_time;
-            } else {
-                // 使用成员变量
-                if (!person.alarm_triggered && (current_time - person.entry_time) > this->intrusion_time_threshold_) {
+                person.alarm_triggered = false; // 重置报警状态
+            } else { // 持续在区域内
+                // 检查是否达到报警时间阈值且尚未报警
+                if (!person.alarm_triggered && (current_time - person.entry_time) >= intrusion_time_threshold_) {
                     person.alarm_triggered = true;
-                    trigger_alarm(person.id, person.box); // 调用成员函数
+                    trigger_alarm(person.id, person.box); // 触发报警
                 }
             }
-        } else {
-            person.is_in_zone = false;
-            person.entry_time = 0;
-            person.alarm_triggered = false;
+        } else { // 当前不在区域内
+            if (person.is_in_zone) { // 刚离开区域
+                person.is_in_zone = false;
+                person.entry_time = 0; // 重置进入时间
+                person.alarm_triggered = false; // 重置报警状态
+            }
         }
     }
 
-    for (auto it = tracked_persons_.begin(); it != tracked_persons_.end(); ) {
-         // (建议) 增加到50帧 (约2秒) 提高鲁棒性，减少ID切换
-        if (it->second.frames_unseen > 50) {
-            it = tracked_persons_.erase(it);
+    for (auto it = tracked_persons_.begin(); it != tracked_persons_.end(); /* 无自增 */) {
+        if (it->second.frames_unseen > 50) { // 超过 50 帧未见则移除
+            it = tracked_persons_.erase(it); // erase 返回下一个有效迭代器
         } else {
-            ++it;
+            ++it; // 手动移动到下一个
         }
     }
 }
@@ -265,8 +327,6 @@ void VideoService::processing_loop() {
     detect_result_group_t detection_results;
     
     while (running_) {
-        // auto t_start = std::chrono::high_resolution_clock::now();
-
         {
             std::unique_lock<std::mutex> lock(frame_mutex_);
 
@@ -283,14 +343,17 @@ void VideoService::processing_loop() {
             new_frame_available_ = false; 
         }
         
-        // auto t_read = std::chrono::high_resolution_clock::now(); 
 
         if (frame.empty()) {
             continue;
         }
+        cv::Mat model_input_image;
+        cv::resize(frame, model_input_image, cv::Size(640, 640));
+        if (!model_input_image.isContinuous()) {
+            model_input_image = model_input_image.clone();
+        }   
 
-        // [保留] 后续的 AI 推理、跟踪、绘图、推流逻辑完全不变
-        if (rknn_pool_->put(frame) != 0) {
+        if (rknn_pool_->put(model_input_image) != 0) {
             spdlog::error("VideoService: Failed to put frame into rknnPool. Stopping.");
             running_ = false;
             break;