diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ea87b5..8a814ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,10 +13,16 @@ find_package(PahoMqttCpp REQUIRED) find_package(SQLite3 REQUIRED) find_package(PkgConfig REQUIRED) -# pkg_check_modules(GST REQUIRED gstreamer-1.0 gstreamer-app-1.0 gstreamer-allocators-1.0) pkg_check_modules(GST REQUIRED gstreamer-1.0 gstreamer-app-1.0 gstreamer-allocators-1.0 gstreamer-video-1.0) find_package(OpenCV REQUIRED) +# 查找 FFmpeg 库 +find_package(PkgConfig REQUIRED) +pkg_check_modules(AVCODEC REQUIRED libavcodec) +pkg_check_modules(AVFORMAT REQUIRED libavformat) +pkg_check_modules(AVUTIL REQUIRED libavutil) +pkg_check_modules(SWSCALE REQUIRED libswscale) + add_subdirectory(src/vendor/crow) set(BYTETRACK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/algorithm/bytetrack/src) @@ -82,6 +88,7 @@ add_library(edge_proxy_lib STATIC src/rknn/rkYolov8.cc src/rknn/preprocess.cc src/rknn/postprocess.cc + src/rknn/ffmpeg_rga_decoder.cpp src/videoServiceManager/video_service_manager.cc src/algorithm/IntrusionModule.cc src/algorithm/HumanDetectionModule.cc @@ -96,7 +103,13 @@ target_include_directories(edge_proxy_lib PUBLIC /usr/include/rga ${EIGEN3_INCLUDE_DIRS} # Eigen3 头文件 - ${CMAKE_CURRENT_SOURCE_DIR}/src/algorithm/bytetrack/include # ByteTrack 头文件 + ${CMAKE_CURRENT_SOURCE_DIR}/src/algorithm/bytetrack/include + + #ffmpeg + ${AVCODEC_INCLUDE_DIRS} + ${AVFORMAT_INCLUDE_DIRS} + ${AVUTIL_INCLUDE_DIRS} + ${SWSCALE_INCLUDE_DIRS} ) target_link_libraries(edge_proxy_lib PRIVATE @@ -113,6 +126,12 @@ target_link_libraries(edge_proxy_lib PRIVATE rga ${OpenCV_LIBS} ${GST_LIBRARIES} + + #ffmpeg + ${AVCODEC_LIBRARIES} + ${AVFORMAT_LIBRARIES} + ${AVUTIL_LIBRARIES} + ${SWSCALE_LIBRARIES} ) diff --git a/config/video_config.json b/config/video_config.json index ed6ad85..3761716 100644 --- a/config/video_config.json +++ b/config/video_config.json @@ -1,213 +1,225 @@ { - "video_service": { - "enabled": true - }, - "video_streams": [ - { - "enabled": true, - "id": "cam_01_intrusion", - "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/3101", - "module_config": { - "class_num": 3, - "intrusion_zone": [ - [ - 4, - 8 - ], - [ - 1057, - 13 - ], - [ - 1053, - 267 - ], - [ - 1347, - 289 - ], - [ - 1365, - 8 - ], - [ - 1911, - 6 - ], - [ - 1898, - 1077 - ], - [ - 7, - 1077 - ] - ], - "label_path": "/app/models/human.txt", - "light_device_ids": [ - "351", - "352", - "353", - "349", - "354", - "350" - ], - "mode": 1, - "model_path": "/app/models/human_detection.rknn", - "rknn_thread_num": 3, - "time_threshold_sec": 3 - }, - "module_type": "human_detection", - "output_rtsp": "rtsp://127.0.0.1:8554/ch1901" - }, - { - "enabled": true, - "id": "cam_02_intrusion", - "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/3201", - "module_config": { - "class_num": 3, - "intrusion_zone": [ - [ - 11, - 13 - ], - [ - 1907, - 13 - ], - [ - 1911, - 1074 - ], - [ - 7, - 1070 - ] - ], - "label_path": "/app/models/human.txt", - "light_device_ids": [ - "351", - "352", - "353", - "349", - "354", - "350" - ], - "mode": 1, - "model_path": "/app/models/human_detection.rknn", - "rknn_thread_num": 3, - "time_threshold_sec": 3 - }, - "module_type": "human_detection", - "output_rtsp": "rtsp://127.0.0.1:8554/ch1801" - }, - { - "enabled": true, - "id": "cam_03_intrusion", - "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/1601", - "module_config": { - "class_num": 3, - "intrusion_zone": [ - [ - 20, - 19 - ], - [ - 1891, - 35 - ], - [ - 1433, - 466 - ], - [ - 1006, - 619 - ], - [ - 1210, - 1001 - ], - [ - 1201, - 1072 - ], - [ - 2, - 1077 - ] - ], - "label_path": "/app/models/human.txt", - "model_path": "/app/models/human_detection.rknn", - "rknn_thread_num": 3, - "time_threshold_sec": 3 - }, - "module_type": "human_detection", - "output_rtsp": "rtsp://127.0.0.1:8554/ch2201" - }, - { - "enabled": false, - "id": "cam_04_intrusion", - "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/1101", - "module_config": { - "class_num": 3, - "intrusion_zone": [ - [ - 400, - 400 - ], - [ - 800, - 400 - ], - [ - 900, - 600 - ], - [ - 300, - 600 - ] - ], - "label_path": "/app/models/human.txt", - "model_path": "/app/models/human_detection.rknn", - "rknn_thread_num": 3, - "time_threshold_sec": 3 - }, - "module_type": "human_detection", - "output_rtsp": "rtsp://127.0.0.1:8554/ch1201" - }, - { - "enabled": false, - "id": "cam_01_intrusion", - "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/1101", - "module_config": { - "class_num": 3, - "intrusion_zone": [ - [ - 400, - 400 - ], - [ - 800, - 400 - ], - [ - 900, - 600 - ], - [ - 300, - 600 - ] - ], - "label_path": "/app/models/human.txt", - "model_path": "/app/models/human_detection.rknn", - "rknn_thread_num": 3, - "time_threshold_sec": 3 - }, - "module_type": "human_detection", - "output_rtsp": "rtsp://127.0.0.1:8554/ch1101" - } - ] + "video_service": { + "enabled": true + }, + "video_streams": [ + { + "enabled": true, + "id": "cam_01_intrusion", + "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/3101", + "module_config": { + "class_num": 3, + "intrusion_zone": [ + [ + 4, + 8 + ], + [ + 1057, + 13 + ], + [ + 1053, + 267 + ], + [ + 1347, + 289 + ], + [ + 1365, + 8 + ], + [ + 1911, + 6 + ], + [ + 1898, + 1077 + ], + [ + 7, + 1077 + ] + ], + "label_path": "/app/models/human.txt", + "light_device_ids": [ + "351", + "352", + "353", + "349", + "354", + "350" + ], + "mode": 1, + "model_path": "/app/models/human_detection.rknn", + "rknn_thread_num": 3, + "time_threshold_sec": 3 + }, + "module_type": "human_detection", + "output_rtsp": "rtsp://127.0.0.1:8554/ch1901" + }, + { + "enabled": true, + "id": "cam_02_intrusion", + "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/3201", + "module_config": { + "class_num": 3, + "intrusion_zone": [ + [ + 11, + 13 + ], + [ + 1907, + 13 + ], + [ + 1911, + 1074 + ], + [ + 7, + 1070 + ] + ], + "label_path": "/app/models/human.txt", + "light_device_ids": [ + "351", + "352", + "353", + "349", + "354", + "350" + ], + "mode": 1, + "model_path": "/app/models/human_detection.rknn", + "rknn_thread_num": 3, + "time_threshold_sec": 3 + }, + "module_type": "human_detection", + "output_rtsp": "rtsp://127.0.0.1:8554/ch1801" + }, + { + "enabled": true, + "id": "cam_03_intrusion", + "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/1601", + "module_config": { + "class_num": 3, + "intrusion_zone": [ + [ + 20, + 19 + ], + [ + 1891, + 35 + ], + [ + 1433, + 466 + ], + [ + 1006, + 619 + ], + [ + 1210, + 1001 + ], + [ + 1201, + 1072 + ], + [ + 2, + 1077 + ] + ], + "label_path": "/app/models/human.txt", + "model_path": "/app/models/human_detection.rknn", + "rknn_thread_num": 3, + "time_threshold_sec": 3 + }, + "module_type": "human_detection", + "output_rtsp": "rtsp://127.0.0.1:8554/ch2201" + }, + { + "enabled": true, + "id": "cam_05_intrusion", + "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/1501", + "module_config": { + "class_num": 3, + "intrusion_zone": [ + [ + 20, + 19 + ], + [ + 1891, + 35 + ], + [ + 1433, + 466 + ], + [ + 1006, + 619 + ], + [ + 1210, + 1001 + ], + [ + 1201, + 1072 + ], + [ + 2, + 1077 + ] + ], + "label_path": "/app/models/human.txt", + "model_path": "/app/models/human_detection.rknn", + "rknn_thread_num": 3, + "time_threshold_sec": 3 + }, + "module_type": "human_detection", + "output_rtsp": "rtsp://127.0.0.1:8554/ch1501" + }, + { + "enabled": false, + "id": "cam_06_intrusion", + "input_url": "rtsp://admin:hzx12345@192.168.1.10:554/Streaming/Channels/1101", + "module_config": { + "class_num": 3, + "intrusion_zone": [ + [ + 400, + 400 + ], + [ + 800, + 400 + ], + [ + 900, + 600 + ], + [ + 300, + 600 + ] + ], + "label_path": "/app/models/human.txt", + "model_path": "/app/models/human_detection.rknn", + "rknn_thread_num": 3, + "time_threshold_sec": 3 + }, + "module_type": "human_detection", + "output_rtsp": "rtsp://127.0.0.1:8554/ch1101" + } + ] } \ No newline at end of file diff --git a/cs.mp4 b/cs.mp4 new file mode 100644 index 0000000..8a83418 Binary files /dev/null and b/cs.mp4 differ diff --git a/src/rknn/dma_allocator.hpp b/src/rknn/dma_allocator.hpp index cf4ab42..20bf8c8 100644 --- a/src/rknn/dma_allocator.hpp +++ b/src/rknn/dma_allocator.hpp @@ -1,115 +1,109 @@ -#ifndef DMA_ALLOCATOR_HPP -#define DMA_ALLOCATOR_HPP +#pragma once #include -#include +#include +#include +#include #include #include #include -#include #include -#include +#include #include "im2d.h" #include "rga.h" -#define DMA_HEAP_IOCTL_MAGIC 'H' -struct dma_heap_allocation_data { - __u64 len; - __u32 fd; - __u32 fd_flags; - __u64 heap_flags; -}; -#define DMA_HEAP_IOCTL_ALLOC _IOWR(DMA_HEAP_IOCTL_MAGIC, 0, struct dma_heap_allocation_data) - class DmaBuffer { public: - void* vaddr = nullptr; - int fd = -1; - rga_buffer_handle_t handle = 0; - size_t size = 0; - int width = 0; - int height = 0; - int format = 0; + int fd; + void* vaddr; + size_t size; + size_t actual_alloc_size; - DmaBuffer(int w, int h, int fmt) : width(w), height(h), format(fmt) { - int bpp = (fmt == RK_FORMAT_RGB_888 || fmt == RK_FORMAT_BGR_888) ? 3 : 4; + int width; + int height; - // 4K 对齐 - size_t raw_size = w * h * bpp; - size = (raw_size + 4095) & (~4095); + // 构造函数 1: 仅大小 + DmaBuffer(size_t size, const std::string& heap_name = "/dev/dma_heap/system-uncached") { + this->width = 0; + this->height = 0; + allocate(size, heap_name); + } - if (alloc_dma_buffer(size, &fd, &vaddr) < 0) { - std::cerr << "[DmaBuffer] Error: Failed to allocate DMA buffer!" << std::endl; - return; + // 构造函数 2: 宽高 + DmaBuffer(int width, int height, int format, + const std::string& heap_name = "/dev/dma_heap/system-uncached") { + this->width = width; + this->height = height; + + size_t alloc_size = 0; + if (format == RK_FORMAT_RGB_888 || format == RK_FORMAT_BGR_888) { + alloc_size = width * height * 3; + } else if (format == RK_FORMAT_YCbCr_420_SP || format == RK_FORMAT_YCrCb_420_SP) { + alloc_size = width * height * 3 / 2; + } else { + alloc_size = width * height * 4; } - handle = (rga_buffer_handle_t)fd; + allocate(alloc_size, heap_name); } ~DmaBuffer() { - if (vaddr && vaddr != MAP_FAILED) - munmap(vaddr, size); + if (vaddr != MAP_FAILED && vaddr != nullptr) + munmap(vaddr, actual_alloc_size); if (fd >= 0) close(fd); } - rga_buffer_t getRgaBuffer() { - return wrapbuffer_fd(fd, width, height, format); + bool isValid() const { + return fd >= 0; } - bool isValid() const { - return fd >= 0 && vaddr != nullptr; + rga_buffer_t getRgaBuffer() { + return wrapbuffer_fd(fd, width, height, RK_FORMAT_RGB_888, width, height); } private: - int alloc_dma_buffer(size_t size, int* fd, void** vaddr) { - // [关键修复] 调整优先级 - // 1. system-uncached-dma32: 物理地址 < 4G (解决RGA报错),且非Root用户可访问 - // 2. cma: 物理地址 < 4G,但通常需要 Root 权限 - // 3. system-uncached: 最后的退路,但可能会分配到 > 4G 导致 RGA 崩溃 - const char* heap_paths[] = {"/dev/dma_heap/system-uncached-dma32", // 首选! - "/dev/dma_heap/system-dma32", - "/dev/dma_heap/cma", // 需要 Root - "/dev/dma_heap/linux,cma", "/dev/dma_heap/system-uncached"}; + void allocate(size_t req_size, const std::string& heap_name) { + // [关键] 向上对齐到 4KB 页大小 + long page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) + page_size = 4096; - int heap_fd = -1; - for (const char* path : heap_paths) { - heap_fd = open(path, O_RDWR | O_CLOEXEC); - if (heap_fd >= 0) { - // std::cout << "[DmaBuffer] Success: Using heap " << path << std::endl; - break; + this->actual_alloc_size = (req_size + page_size - 1) & ~(page_size - 1); + this->size = req_size; + + this->vaddr = MAP_FAILED; + this->fd = -1; + + int heap_fd = open(heap_name.c_str(), O_RDONLY | O_CLOEXEC); + if (heap_fd < 0) { + if (heap_name.find("system") != std::string::npos) { + heap_fd = open("/dev/dma_heap/cma", O_RDONLY | O_CLOEXEC); + } + if (heap_fd < 0) { + perror("Failed to open dma heap"); + return; } } - if (heap_fd < 0) { - std::cerr << "[DmaBuffer] Fatal: Could not open any suitable DMA heap! (Check " - "permissions for /dev/dma_heap/)" - << std::endl; - return -1; - } - struct dma_heap_allocation_data data = {0}; - data.len = size; - data.fd_flags = O_CLOEXEC | O_RDWR; + data.len = this->actual_alloc_size; + data.fd_flags = O_RDWR | O_CLOEXEC; if (ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data) < 0) { - std::cerr << "[DmaBuffer] Error: DMA allocation ioctl failed." << std::endl; + perror("dma heap alloc failed"); close(heap_fd); - return -1; + return; } - *fd = data.fd; + this->fd = data.fd; close(heap_fd); - *vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0); - if (*vaddr == MAP_FAILED) { - close(*fd); - *fd = -1; - return -1; + this->vaddr = + mmap(NULL, this->actual_alloc_size, PROT_READ | PROT_WRITE, MAP_SHARED, this->fd, 0); + if (this->vaddr == MAP_FAILED) { + perror("mmap failed"); } - return 0; } -}; - -#endif \ No newline at end of file +}; \ No newline at end of file diff --git a/src/rknn/ffmpeg_rga_decoder.cpp b/src/rknn/ffmpeg_rga_decoder.cpp new file mode 100644 index 0000000..34b02ad --- /dev/null +++ b/src/rknn/ffmpeg_rga_decoder.cpp @@ -0,0 +1,355 @@ +#include "ffmpeg_rga_decoder.h" + +#include +#include + +#include "spdlog/spdlog.h" + +// 结构体定义 +struct RgaSrcInfo { + int fd; + int wstride; + int hstride; + void* vaddr; + int format; +}; + +FFmpegRGADecoder::FFmpegRGADecoder() { + pkt_ = av_packet_alloc(); + frame_ = av_frame_alloc(); +} + +FFmpegRGADecoder::~FFmpegRGADecoder() { + release(); + if (pkt_) + av_packet_free(&pkt_); + if (frame_) + av_frame_free(&frame_); +} + +void FFmpegRGADecoder::cleanUp() { + if (dec_ctx_) { + avcodec_free_context(&dec_ctx_); + dec_ctx_ = nullptr; + } + if (fmt_ctx_) { + avformat_close_input(&fmt_ctx_); + fmt_ctx_ = nullptr; + } + is_opened_ = false; +} + +void FFmpegRGADecoder::release() { + cleanUp(); + output_dma_buf_.reset(); +} + +bool FFmpegRGADecoder::isOpened() const { + return is_opened_; +} + +double FFmpegRGADecoder::get(int propId) { + if (propId == cv::CAP_PROP_FRAME_WIDTH) + return (double)width_; + if (propId == cv::CAP_PROP_FRAME_HEIGHT) + return (double)height_; + if (propId == cv::CAP_PROP_FPS) + return fps_; + return 0.0; +} + +bool FFmpegRGADecoder::open(const std::string& url) { + cleanUp(); + int ret; + AVDictionary* options = nullptr; + + // RTSP 优化参数 + av_dict_set(&options, "rtsp_transport", "tcp", 0); + av_dict_set(&options, "buffer_size", "1024000", 0); + av_dict_set(&options, "stimeout", "5000000", 0); + av_dict_set(&options, "flags", "low_delay", 0); + + spdlog::info("FFmpeg: Opening stream: {}", url); + if ((ret = avformat_open_input(&fmt_ctx_, url.c_str(), nullptr, &options)) < 0) { + spdlog::error("FFmpeg: Could not open input URL: {}", url); + return false; + } + + if ((ret = avformat_find_stream_info(fmt_ctx_, nullptr)) < 0) + return false; + + video_stream_idx_ = av_find_best_stream(fmt_ctx_, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); + if (video_stream_idx_ < 0) + return false; + + AVStream* st = fmt_ctx_->streams[video_stream_idx_]; + AVCodecParameters* codecpar = st->codecpar; + + const char* decoder_name = nullptr; + if (codecpar->codec_id == AV_CODEC_ID_HEVC) + decoder_name = "hevc_rkmpp"; + else if (codecpar->codec_id == AV_CODEC_ID_H264) + decoder_name = "h264_rkmpp"; + + if (decoder_name) { + decoder_ = avcodec_find_decoder_by_name(decoder_name); + if (decoder_) + spdlog::info("FFmpeg: Using Hardware Decoder: {}", decoder_name); + } + + if (!decoder_) { + spdlog::warn("FFmpeg: Hardware decoder not found, falling back."); + decoder_ = avcodec_find_decoder(codecpar->codec_id); + } + if (!decoder_) + return false; + + dec_ctx_ = avcodec_alloc_context3(decoder_); + avcodec_parameters_to_context(dec_ctx_, codecpar); + + if ((ret = avcodec_open2(dec_ctx_, decoder_, nullptr)) < 0) { + spdlog::error("FFmpeg: Failed to open codec"); + return false; + } + + width_ = codecpar->width; + height_ = codecpar->height; + fps_ = (st->avg_frame_rate.den > 0) ? av_q2d(st->avg_frame_rate) : 25.0; + + is_opened_ = true; + spdlog::info("FFmpeg: Decoder Ready. {}x{} @ {:.2f} fps", width_, height_, fps_); + return true; +} + +bool FFmpegRGADecoder::ensure_output_buffer(int width, int height) { + size_t img_size = width * height * 3; + if (!output_dma_buf_ || output_dma_buf_->size < img_size || width_ != width || + height_ != height) { + width_ = width; + height_ = height; + output_dma_buf_ = std::make_unique(img_size, "/dev/dma_heap/system-uncached"); + if (output_dma_buf_->fd < 0) { + spdlog::error("FFmpeg: Failed to allocate output DMA buffer!"); + return false; + } + spdlog::info("FFmpeg: Allocated Output DMA Buffer (FD: {})", output_dma_buf_->fd); + } + return true; +} + +// 获取源信息 +bool get_src_info(const AVFrame* frame, RgaSrcInfo& info) { + info.fd = 0; + info.vaddr = nullptr; + info.wstride = frame->width; + info.hstride = frame->height; + info.format = RK_FORMAT_YCbCr_420_SP; // Default NV12 + + if (frame->format == AV_PIX_FMT_DRM_PRIME && frame->data[0]) { + AVDRMFrameDescriptor* desc = (AVDRMFrameDescriptor*)frame->data[0]; + info.fd = desc->objects[0].fd; + + // 1. 获取 Pitch (wstride) + info.wstride = desc->layers[0].planes[0].pitch; + + // 2. 计算 hstride + if (desc->layers[0].nb_planes >= 2) { + int y_offset = desc->layers[0].planes[0].offset; + int uv_offset = desc->layers[0].planes[1].offset; + info.hstride = (uv_offset - y_offset) / info.wstride; + } else { + // [修改] 如果没有多平面信息,默认假设紧凑排列 (Compact) + // 之前强制对齐到 16 可能导致读取位置错误(跳过了真正的 UV) + // 如果画面变灰,通常是因为 hstride 算大了。 + info.hstride = frame->height; + } + return true; + } else if (frame->format == AV_PIX_FMT_NV12) { + info.wstride = frame->linesize[0]; + info.vaddr = (void*)frame->data[0]; + info.hstride = frame->height; + return true; + } + // [新增] 检测 10bit 格式 (H.265 常见) + else if (frame->format == AV_PIX_FMT_P010) { + info.wstride = frame->linesize[0] / 2; // P010 也是 2 bytes per pixel? 不,stride 是 bytes + // RGA 对 P010 的处理需要特殊 flag,这里暂时回退到 NV12 处理尝试 + // 或者直接报错让软件处理 + info.wstride = frame->linesize[0]; + info.vaddr = (void*)frame->data[0]; + info.hstride = frame->height; + info.format = RK_FORMAT_YCbCr_420_SP_10B; // RGA 支持的 10bit 格式 + return true; + } + + return false; +} + +bool FFmpegRGADecoder::read(cv::Mat& output_mat) { + if (!is_opened_) + return false; + + static int frame_count = 0; + frame_count++; + + int ret; + while (true) { + ret = av_read_frame(fmt_ctx_, pkt_); + if (ret < 0) + return false; + + if (pkt_->stream_index == video_stream_idx_) { + ret = avcodec_send_packet(dec_ctx_, pkt_); + if (ret < 0) { + av_packet_unref(pkt_); + continue; + } + + ret = avcodec_receive_frame(dec_ctx_, frame_); + if (ret == 0) { + if (!ensure_output_buffer(frame_->width, frame_->height)) { + av_packet_unref(pkt_); + return false; + } + + RgaSrcInfo src_info; + bool has_info = get_src_info(frame_, src_info); + + // 调试日志:每 100 帧打印一次 Stride 信息,帮你确认计算是否正确 + if (frame_count % 100 == 0) { + spdlog::info("Debug Frame: FD={} W={} H={} WStride={} HStride={} Fmt={}", + src_info.fd, frame_->width, frame_->height, src_info.wstride, + src_info.hstride, frame_->format); + } + + if (has_info && src_info.fd > 0) { + // --- 硬件路径 --- + rga_buffer_t src_img, dst_img; + rga_buffer_handle_t src_handle, dst_handle; + + // Import + size_t src_size = src_info.wstride * src_info.hstride * 3 / 2; + // 如果是 10bit,大小要翻倍? 不,P010 是 2 bytes per pixel + if (src_info.format == RK_FORMAT_YCbCr_420_SP_10B) + src_size *= 2; + + src_handle = importbuffer_fd(src_info.fd, src_size); + dst_handle = + importbuffer_fd(output_dma_buf_->fd, output_dma_buf_->actual_alloc_size); + + if (src_handle && dst_handle) { + src_img = wrapbuffer_handle(src_handle, frame_->width, frame_->height, + src_info.format); + dst_img = wrapbuffer_handle(dst_handle, width_, height_, RK_FORMAT_BGR_888); + + // 设置 Stride + src_img.wstride = src_info.wstride; + src_img.hstride = src_info.hstride; + dst_img.wstride = width_; + dst_img.hstride = height_; + + imsetColorSpace(&src_img, IM_YUV_BT709_LIMIT_RANGE); + imsetColorSpace(&dst_img, IM_RGB_FULL); + + IM_STATUS status = + imcvtcolor(src_img, dst_img, src_info.format, RK_FORMAT_BGR_888); + + releasebuffer_handle(src_handle); + releasebuffer_handle(dst_handle); + + if (status == IM_STATUS_SUCCESS) { + output_mat = cv::Mat(height_, width_, CV_8UC3, output_dma_buf_->vaddr); + av_packet_unref(pkt_); + return true; + } else { + spdlog::error("RGA Fail: {}", imStrError(status)); + } + } + } + + // --- 软件回退 --- + // 如果走到这里,说明硬件路径失败或无 FD + // 简单的软件转换,确保颜色正常 + if (frame_->format == AV_PIX_FMT_NV12) { + // 1. 将有效数据拷贝到连续内存 (去除 stride 影响) + cv::Mat mYUV_Continous(frame_->height * 3 / 2, frame_->width, CV_8UC1); + + // 拷贝 Y + uint8_t* src_y = frame_->data[0]; + uint8_t* dst_y = mYUV_Continous.data; + for (int i = 0; i < frame_->height; i++) { + memcpy(dst_y + i * frame_->width, src_y + i * frame_->linesize[0], + frame_->width); + } + + // 拷贝 UV + uint8_t* src_uv = frame_->data[1]; + // 如果 data[1] 为空,尝试根据 height 计算偏移 + if (!src_uv) + src_uv = src_y + src_info.hstride * src_info.wstride; // 尝试用 hstride + + uint8_t* dst_uv = dst_y + frame_->width * frame_->height; + for (int i = 0; i < frame_->height / 2; i++) { + memcpy(dst_uv + i * frame_->width, src_uv + i * frame_->linesize[0], + frame_->width); + } + + cv::Mat mBGR(height_, width_, CV_8UC3, output_dma_buf_->vaddr); + cv::cvtColor(mYUV_Continous, mBGR, cv::COLOR_YUV2BGR_NV12); + + if (width_ != frame_->width) { + cv::resize(mBGR, mBGR, cv::Size(width_, height_)); + } + + output_mat = mBGR; + av_packet_unref(pkt_); + return true; + } + } + } + av_packet_unref(pkt_); + } +} + +bool FFmpegRGADecoder::read_raw(RgaFrameInfo& output_info) { + if (!is_opened_) + return false; + int ret; + while (true) { + ret = av_read_frame(fmt_ctx_, pkt_); + if (ret < 0) + return false; + + if (pkt_->stream_index == video_stream_idx_) { + ret = avcodec_send_packet(dec_ctx_, pkt_); + if (ret < 0) { + av_packet_unref(pkt_); + continue; + } + + ret = avcodec_receive_frame(dec_ctx_, frame_); + if (ret == 0) { + // 利用现有的逻辑获取信息 + RgaSrcInfo info; + get_src_info(frame_, info); + + output_info.fd = info.fd; + output_info.vaddr = info.vaddr; + output_info.width = frame_->width; + output_info.height = frame_->height; + output_info.wstride = info.wstride; + output_info.hstride = info.hstride; + output_info.format = info.format; + + // 注意:这里我们返回了 info,但没有释放 AVPacket 或 AVFrame + // 在单线程模型中,下一帧读取会覆盖 frame_。 + // 如果是 Zero-Copy,必须确保在 NPU 推理完成前,frame_ 内存不被释放/覆盖。 + // 但 rknn_run 是阻塞的,所以这里直接返回是安全的(前提是 infer 结束后才读下一帧)。 + + av_packet_unref(pkt_); + return true; + } + } + av_packet_unref(pkt_); + } +} \ No newline at end of file diff --git a/src/rknn/ffmpeg_rga_decoder.h b/src/rknn/ffmpeg_rga_decoder.h new file mode 100644 index 0000000..ed9da94 --- /dev/null +++ b/src/rknn/ffmpeg_rga_decoder.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include +#include + +extern "C" { +#include +#include +#include +#include +#include +} + +#include "im2d.h" +#include "rga.h" +// [修改] 引用统一的分配器头文件 +#include "dma_allocator.hpp" +#include "rknn/rkYolov8.hpp" + +class FFmpegRGADecoder { +public: + FFmpegRGADecoder(); + ~FFmpegRGADecoder(); + + bool open(const std::string& url); + bool read(cv::Mat& frame); + void release(); + double get(int propId); + + // [新增] 添加 isOpened 方法 + bool isOpened() const; + + // 新增接口:获取原始解码信息(用于 Zero-Copy 推理) + bool read_raw(RgaFrameInfo& info); + +private: + void cleanUp(); + bool get_rga_src_buffer(const AVFrame* frame, rga_buffer_t& src_buf); + bool ensure_output_buffer(int width, int height); + + AVFormatContext* fmt_ctx_ = nullptr; + AVCodecContext* dec_ctx_ = nullptr; + const AVCodec* decoder_ = nullptr; + AVPacket* pkt_ = nullptr; + AVFrame* frame_ = nullptr; + + int video_stream_idx_ = -1; + bool is_opened_ = false; + + std::unique_ptr output_dma_buf_; + + int width_ = 0; + int height_ = 0; + double fps_ = 0.0; +}; \ No newline at end of file diff --git a/src/rknn/rkYolov8.cc b/src/rknn/rkYolov8.cc index c69c329..ddb754a 100644 --- a/src/rknn/rkYolov8.cc +++ b/src/rknn/rkYolov8.cc @@ -7,15 +7,17 @@ #include #include #include -#include #include #include "RgaApi.h" -#include "im2d.h" -#include "rga.h" + +// 命名空间保持一致 (如果头文件里有,这里也要有;如果没有,这里也不要) +// 假设 rkYolov8.hpp 是 rknn_test 命名空间 +// namespace rknn_test { static std::mutex rga_mtx; +// 辅助:DFL 计算 static void compute_dfl(float* tensor, int dfl_len, float* box) { for (int b = 0; b < 4; b++) { float exp_t[16]; @@ -52,10 +54,17 @@ rkYolov8::rkYolov8(const std::string& model_path, const std::string& label_path, this->m_class_num = class_num; this->conf_threshold = 0.45f; this->nms_threshold = 0.45f; - this->ctx = 0; } rkYolov8::~rkYolov8() { + if (ctx) { + if (input_mems_[0]) + rknn_destroy_mem(ctx, input_mems_[0]); + for (int i = 0; i < io_num.n_output; ++i) { + if (output_mems_[i]) + rknn_destroy_mem(ctx, output_mems_[i]); + } + } if (input_attrs) free(input_attrs); if (output_attrs) @@ -95,6 +104,7 @@ int rkYolov8::init(rknn_context* ctx_in, bool is_slave) { for (int i = 0; i < io_num.n_output; i++) { output_attrs[i].index = i; rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr)); + // 不需要保存 zp 和 scale 了,因为我们让驱动直接输出 float } if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) { @@ -107,151 +117,196 @@ int rkYolov8::init(rknn_context* ctx_in, bool is_slave) { channel = input_attrs[0].dims[3]; } - printf("[rkYolov8] Init: %dx%d, Output Num: %d\n", width, height, io_num.n_output); + printf("[rkYolov8] Init Zero-Copy: %dx%d, Output Num: %d\n", width, height, io_num.n_output); - // [NPU Input] 640x640 RGBA - input_dma_buf_ = std::make_unique(width, height, RK_FORMAT_RGBA_8888); - if (!input_dma_buf_->isValid()) { - printf("[rkYolov8] Error: Failed to allocate NPU DMA buffer!\n"); + // 1. 输入内存 + input_attrs[0].type = RKNN_TENSOR_UINT8; + input_attrs[0].fmt = RKNN_TENSOR_NHWC; + input_mems_[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride); + if (!input_mems_[0]) + return -1; + ret = rknn_set_io_mem(ctx, input_mems_[0], &input_attrs[0]); + if (ret < 0) + return -1; + + // 2. 输出内存 + for (int i = 0; i < io_num.n_output; i++) { + output_mems_[i] = rknn_create_mem(ctx, output_attrs[i].size_with_stride); + ret = rknn_set_io_mem(ctx, output_mems_[i], &output_attrs[i]); + if (ret < 0) + return -1; + } + + return 0; +} + +int rkYolov8::Preprocess_RGA(rga_buffer_t src, rga_buffer_t dst, LetterBoxInfo& letter_box_info) { + int dst_width = width; + int dst_height = height; + int src_width = src.width; + int src_height = src.height; + + float target_wh_ratio = static_cast(dst_width) / dst_height; + float src_wh_ratio = static_cast(src_width) / src_height; + + int resize_w, resize_h; + int pad_w = 0, pad_h = 0; + + if (src_wh_ratio > target_wh_ratio) { + resize_w = dst_width; + resize_h = (int)((float)dst_width / src_width * src_height); + pad_h = (dst_height - resize_h) / 2; + letter_box_info.hor = false; + letter_box_info.pad = pad_h; + } else { + resize_h = dst_height; + resize_w = (int)((float)dst_height / src_height * src_width); + pad_w = (dst_width - resize_w) / 2; + letter_box_info.hor = true; + letter_box_info.pad = pad_w; + } + + letter_box_info.x_pad = pad_w; + letter_box_info.y_pad = pad_h; + letter_box_info.scale = std::min((float)dst_width / src_width, (float)dst_height / src_height); + + im_rect src_rect = {0, 0, src_width, src_height}; + im_rect dst_rect = {pad_w, pad_h, resize_w, resize_h}; + + if (input_mems_[0]->virt_addr) { + memset(input_mems_[0]->virt_addr, 114, input_mems_[0]->size); + } + + IM_STATUS status = improcess(src, dst, {}, src_rect, dst_rect, {}, IM_SYNC); + if (status != IM_STATUS_SUCCESS) { return -1; } return 0; } -detect_result_group_t rkYolov8::infer(const cv::Mat& ori_img) { +detect_result_group_t rkYolov8::infer_raw(const RgaFrameInfo& src_info) { detect_result_group_t detect_result; memset(&detect_result, 0, sizeof(detect_result_group_t)); - if (ori_img.empty()) - return detect_result; - int img_w = ori_img.cols; - int img_h = ori_img.rows; + rga_buffer_t src_img; + rga_buffer_handle_t src_handle = 0; + rga_buffer_handle_t dst_handle = 0; - // [内存对齐] 驱动要求输入 buffer 必须对齐 - int aligned_w = 2304; - int aligned_h = (img_h + 15) & ~15; // 1088 - - if (!src_dma_buf_ || src_dma_buf_->width != aligned_w || src_dma_buf_->height != aligned_h) { - src_dma_buf_ = std::make_unique(aligned_w, aligned_h, RK_FORMAT_BGR_888); - if (!src_dma_buf_->isValid()) { - printf("[rkYolov8] Fatal: Failed to allocate Source Buffer!\n"); + // 1. 源句柄 + if (src_info.fd > 0) { + size_t src_size = src_info.wstride * src_info.hstride * 3 / 2; + src_handle = importbuffer_fd(src_info.fd, src_size); + if (src_handle == 0) return detect_result; - } + src_img = wrapbuffer_handle(src_handle, src_info.width, src_info.height, src_info.format); + } else { + src_img = wrapbuffer_virtualaddr(src_info.vaddr, src_info.width, src_info.height, + src_info.format); } + src_img.wstride = src_info.wstride; + src_img.hstride = src_info.hstride; - // 1. CPU Copy - if (src_dma_buf_->vaddr) { - uint8_t* src_ptr = ori_img.data; - uint8_t* dst_ptr = (uint8_t*)src_dma_buf_->vaddr; - int row_bytes_src = img_w * 3; - int row_bytes_dst = aligned_w * 3; - for (int i = 0; i < img_h; i++) { - memcpy(dst_ptr + i * row_bytes_dst, src_ptr + i * row_bytes_src, row_bytes_src); - } + // 2. 目标句柄 (NPU Input) + size_t dst_size = width * height * 3; // RGB888, 640*640*3 + dst_handle = importbuffer_fd(input_mems_[0]->fd, dst_size); + if (dst_handle == 0) { + if (src_handle) + releasebuffer_handle(src_handle); + return detect_result; } + // [关键修正] 目标是 Packed RGB,Stride = Width + rga_buffer_t dst_img = wrapbuffer_handle(dst_handle, width, height, RK_FORMAT_RGB_888); - // 2. Letterbox Params - float scale = std::min((float)width / img_w, (float)height / img_h); - int new_w = (int)(img_w * scale); - int new_h = (int)(img_h * scale); - int pad_w = (width - new_w) / 2; - int pad_h = (height - new_h) / 2; - - // ========================================================================= - // [模仿参考代码] 结构体初始化流程 - // ========================================================================= - rga_buffer_t src_img, dst_img; - - // 1. 清零 (和参考代码一致) - memset(&src_img, 0, sizeof(src_img)); - memset(&dst_img, 0, sizeof(dst_img)); - - // 2. 包装 FD (替代参考代码的 wrapbuffer_virtualaddr) - // 注意:这里传的是“真实有效”的宽高,不是对齐后的 - src_img = wrapbuffer_fd(src_dma_buf_->fd, img_w, img_h, RK_FORMAT_BGR_888); - dst_img = wrapbuffer_fd(input_dma_buf_->fd, width, height, RK_FORMAT_RGBA_8888); - - // 3. [关键修正] 覆盖 stride (物理层面的修正) - // wrapbuffer_fd 默认会把 wstride 设为 img_w (1920),这会导致 Invalid argument - // 我们必须手动把它改成物理对齐后的 2304 - src_img.wstride = aligned_w; // 2304 - src_img.hstride = aligned_h; // 1088 - - // 目标图 stride 必须是 640 - dst_img.wstride = width; - dst_img.hstride = height; - - // 4. 定义裁剪区域 (逻辑层面) - // imcheck/improcess 会根据这个来计算缩放比例 - im_rect src_rect = {0, 0, img_w, img_h}; - im_rect dst_rect = {pad_w, pad_h, new_w, new_h}; - - // 5. 清空背景 (NPU 要求 Letterbox 留黑) - if (input_dma_buf_->vaddr) - memset(input_dma_buf_->vaddr, 114, input_dma_buf_->size); - - // 6. 执行 (带锁) + // 3. 预处理 + LetterBoxInfo box_info; { std::lock_guard lock(rga_mtx); + // 这里必须设置色彩空间,否则 NV12->RGB 颜色会发灰 + imsetColorSpace(&src_img, IM_YUV_BT709_LIMIT_RANGE); + imsetColorSpace(&dst_img, IM_RGB_FULL); - // [调试] 打印参数,确保对齐生效 - // printf("RGA Call: SRC[%dx%d stride=%d] -> DST[%dx%d stride=%d]\n", - // src_img.width, src_img.height, src_img.wstride, - // dst_img.width, dst_img.height, dst_img.wstride); - - // 调用 improcess (自动处理 resize + cvtcolor) - // 这里的用法等同于 imcvtcolor 但支持缩放 - IM_STATUS status = improcess(src_img, dst_img, {}, src_rect, dst_rect, {}, IM_SYNC); - - if (status != IM_STATUS_SUCCESS) { - printf("[rkYolov8] RGA Fail: %s (Try imresize if this persists)\n", imStrError(status)); - - // 如果 improcess 失败,尝试降级为 imresize (虽然颜色可能不对,但先确保存活) - // IM_STATUS retry = imresize(src_img, dst_img); + if (Preprocess_RGA(src_img, dst_img, box_info) != 0) { + if (src_handle) + releasebuffer_handle(src_handle); + if (dst_handle) + releasebuffer_handle(dst_handle); return detect_result; } } - // 7. NPU 格式转换 (RGBA -> RGB) - std::vector npu_rgb_buf(width * height * 3); - if (input_dma_buf_->vaddr) { - uint8_t* s = (uint8_t*)input_dma_buf_->vaddr; - uint8_t* d = npu_rgb_buf.data(); - int total = width * height; - for (int i = 0; i < total; i++) { - d[0] = s[0]; - d[1] = s[1]; - d[2] = s[2]; - s += 4; - d += 3; - } - } + if (src_handle) + releasebuffer_handle(src_handle); + if (dst_handle) + releasebuffer_handle(dst_handle); - // 8. NPU 推理 - rknn_input inputs[1]; - memset(inputs, 0, sizeof(inputs)); - inputs[0].index = 0; - inputs[0].type = RKNN_TENSOR_UINT8; - inputs[0].size = width * height * 3; - inputs[0].fmt = RKNN_TENSOR_NHWC; - inputs[0].buf = npu_rgb_buf.data(); - - rknn_inputs_set(ctx, io_num.n_input, inputs); + // 4. 推理 + int ret = rknn_run(ctx, nullptr); + if (ret < 0) + return detect_result; + // 5. 后处理 + // 使用 rknn_outputs_get 自动转 float rknn_output outputs[io_num.n_output]; memset(outputs, 0, sizeof(outputs)); for (int i = 0; i < io_num.n_output; i++) - outputs[i].want_float = 1; + outputs[i].want_float = 1; // [重要] 驱动代劳反量化 - rknn_run(ctx, nullptr); - rknn_outputs_get(ctx, io_num.n_output, outputs, nullptr); + ret = rknn_outputs_get(ctx, io_num.n_output, outputs, nullptr); + if (ret < 0) + return detect_result; - post_process_v8_dfl(outputs, scale, pad_w, pad_h, &detect_result); + post_process_v8_dfl(outputs, box_info.scale, box_info.x_pad, box_info.y_pad, &detect_result); rknn_outputs_release(ctx, io_num.n_output, outputs); + return detect_result; } -// ... (post_process_v8_dfl 保持不变) ... +detect_result_group_t rkYolov8::infer(const cv::Mat& ori_img) { + if (ori_img.empty()) + return {}; + + int img_w = ori_img.cols; + int img_h = ori_img.rows; + int aligned_w = (img_w + 15) & ~15; + int aligned_h = (img_h + 15) & ~15; + + if (!src_dma_buf_ || src_dma_buf_->width != aligned_w || src_dma_buf_->height != aligned_h) { + src_dma_buf_ = std::make_unique(aligned_w, aligned_h, RK_FORMAT_BGR_888); + } + + if (src_dma_buf_->vaddr) { + uint8_t* src_ptr = ori_img.data; + uint8_t* dst_ptr = (uint8_t*)src_dma_buf_->vaddr; + int row_bytes = img_w * 3; + + if (aligned_w == img_w) { + memcpy(dst_ptr, src_ptr, row_bytes * img_h); + } else { + for (int i = 0; i < img_h; ++i) { + memcpy(dst_ptr + i * aligned_w * 3, src_ptr + i * row_bytes, row_bytes); + } + } + } + + RgaFrameInfo info; + info.fd = src_dma_buf_->fd; + info.vaddr = nullptr; + info.width = img_w; + info.height = img_h; + info.wstride = aligned_w; + info.hstride = aligned_h; + info.format = RK_FORMAT_BGR_888; + + return infer_raw(info); +} + +// [重要修改] 现在直接返回 buf 指针即可,不需要 get_output_float_buffer 手动计算了 +// 因为 rknn_outputs_get 已经把 buf 变成了 float* +float* rkYolov8::get_output_float_buffer(int index) { + // 此函数作废,逻辑移入 post_process + return nullptr; +} + void rkYolov8::post_process_v8_dfl(rknn_output* outputs, float scale, int pad_w, int pad_h, detect_result_group_t* group) { std::vector filterBoxes; @@ -261,8 +316,11 @@ void rkYolov8::post_process_v8_dfl(rknn_output* outputs, float scale, int pad_w, for (int i = 0; i < 3; i++) { int box_idx = i * output_per_branch; int cls_idx = i * output_per_branch + 1; + + // [修复] 直接使用 outputs[].buf 作为 float 指针 float* box_tensor = (float*)outputs[box_idx].buf; float* cls_tensor = (float*)outputs[cls_idx].buf; + int grid_h = output_attrs[box_idx].dims[2]; int grid_w = output_attrs[box_idx].dims[3]; int stride = height / grid_h; @@ -329,3 +387,5 @@ void rkYolov8::post_process_v8_dfl(rknn_output* outputs, float scale, int pad_w, } group->count = count; } + +// } // namespace rknn_test \ No newline at end of file diff --git a/src/rknn/rkYolov8.hpp b/src/rknn/rkYolov8.hpp index b12118d..eff4729 100644 --- a/src/rknn/rkYolov8.hpp +++ b/src/rknn/rkYolov8.hpp @@ -6,10 +6,32 @@ #include #include +#include "im2d.h" +#include "rga.h" #include "rknn/dma_allocator.hpp" #include "rknn/postprocess.h" #include "rknn/rknn_api.h" +// RgaFrameInfo 定义 +struct RgaFrameInfo { + int fd; + void* vaddr; + int width; + int height; + int wstride; + int hstride; + int format; +}; + +// LetterBoxInfo 定义 +struct LetterBoxInfo { + bool hor; + int pad; + float scale; + int x_pad; + int y_pad; +}; + class rkYolov8 { public: rkYolov8(const std::string& model_path, const std::string& label_path, int class_num); @@ -17,34 +39,44 @@ public: int init(rknn_context* ctx_in, bool is_slave); rknn_context* get_pctx(); + + // 零拷贝接口 + detect_result_group_t infer_raw(const RgaFrameInfo& src_info); + + // 兼容接口 detect_result_group_t infer(const cv::Mat& ori_img); private: unsigned char* load_model(const char* filename, int* model_size); + int Preprocess_RGA(rga_buffer_t src, rga_buffer_t dst, LetterBoxInfo& letter_box_info); + + // [修复] 补回 rknn_output* 参数,与 .cc 文件保持一致 void post_process_v8_dfl(rknn_output* outputs, float scale, int pad_w, int pad_h, detect_result_group_t* group); + float* get_output_float_buffer(int index); + private: std::string model_path; std::string m_label_path; int m_class_num; - rknn_context ctx; - bool is_slave = false; + rknn_context ctx = 0; unsigned char* model_data = nullptr; rknn_input_output_num io_num; rknn_tensor_attr* input_attrs = nullptr; rknn_tensor_attr* output_attrs = nullptr; - // [NPU 输入] 目标 buffer (640x640) - std::unique_ptr input_dma_buf_; + // NPU 内存 + rknn_tensor_mem* input_mems_[1] = {nullptr}; + rknn_tensor_mem* output_mems_[9] = {nullptr}; - // [新增] [RGA 输入] 源图像缓存 buffer (1920x1080) - // 用于将 OpenCV 的虚拟地址转为物理连续内存,解决 RGA 驱动崩溃问题 + // 中间 DMA 缓存 (用于 Mat 兼容) std::unique_ptr src_dma_buf_; - rknn_input inputs[1]; + std::vector out_zps; + std::vector out_scales; int width = 0; int height = 0; diff --git a/src/rknn/video_service.cc b/src/rknn/video_service.cc index 0d7e298..d4a8262 100644 --- a/src/rknn/video_service.cc +++ b/src/rknn/video_service.cc @@ -1,11 +1,17 @@ -// video_service.cc (修改后) #include "video_service.h" #include +#include + #include "algorithm/HumanDetectionModule.h" +#include "im2d.h" #include "opencv2/imgproc/imgproc.hpp" +#include "rga.h" +#include "rknn/dma_allocator.hpp" // [新增] 包含 DmaBuffer +#include "rknn/rkYolov8.hpp" // 包含 RgaFrameInfo 定义 #include "spdlog/spdlog.h" + VideoService::VideoService(std::unique_ptr module, std::string input_url, std::string output_rtsp_url, nlohmann::json module_config) : module_(std::move(module)), @@ -22,6 +28,7 @@ VideoService::~VideoService() { if (running_) { stop(); } + // rtsp_dma_buf_ 会被智能指针自动释放 } bool VideoService::start() { @@ -31,66 +38,42 @@ bool VideoService::start() { } spdlog::info("{} Analysis module initialized successfully.", log_prefix_); - std::string gst_input_pipeline = "rtspsrc location=" + input_url_ + - " latency=0 protocols=tcp ! " - "rtph265depay ! " - "h265parse ! " - "mppvideodec format=16 ! " - "videoconvert ! " - "video/x-raw,format=BGR ! " - "appsink"; + spdlog::info("{} Try to Open RTSP Stream (H.265/FFmpeg/RGA)...", log_prefix_); - spdlog::info("Try to Open RTSP Stream"); - capture_.open(gst_input_pipeline, cv::CAP_GSTREAMER); - - if (!capture_.isOpened()) { - printf("Error: Could not open RTSP stream: %s\n", input_url_.c_str()); + // 使用新的解码器打开 RTSP URL + if (!capture_.open(input_url_)) { + spdlog::error("{} Failed to open RTSP stream: {}", log_prefix_, input_url_); return false; - } else { - spdlog::info("RTSP Stream Opened!"); } - frame_width_ = static_cast(capture_.get(cv::CAP_PROP_FRAME_WIDTH)); - frame_height_ = static_cast(capture_.get(cv::CAP_PROP_FRAME_HEIGHT)); + // 获取参数 + frame_width_ = (int)capture_.get(cv::CAP_PROP_FRAME_WIDTH); + frame_height_ = (int)capture_.get(cv::CAP_PROP_FRAME_HEIGHT); frame_fps_ = capture_.get(cv::CAP_PROP_FPS); + if (frame_fps_ <= 0) frame_fps_ = 25.0; - if (frame_width_ == 0 || frame_height_ == 0) { - spdlog::error( - "{} Failed to get valid frame width or height from GStreamer " - "pipeline (got {}x{}).", - log_prefix_, frame_width_, frame_height_); - spdlog::error( - "{} This usually means the RTSP stream is unavailable or the " - "GStreamer input pipeline (mppvideodec?) failed.", - log_prefix_); + // ----------------------------------------------------------------- + // [关键修改] 初始化 RTSP 推流专用的 DMA Buffer + // ----------------------------------------------------------------- + // 宽和高向上对齐到 16,防止 RGA 写入越界 + int aligned_w = (frame_width_ + 15) & (~15); + int aligned_h = (frame_height_ + 15) & (~15); - cv::Mat test_frame; - if (capture_.read(test_frame) && !test_frame.empty()) { - frame_width_ = test_frame.cols; - frame_height_ = test_frame.rows; - spdlog::info("{} Successfully got frame size by reading first frame: {}x{}", - log_prefix_, frame_width_, frame_height_); + // 申请物理连续内存 (BGR格式) + rtsp_dma_buf_ = std::make_unique(aligned_w, aligned_h, RK_FORMAT_BGR_888); - { - std::lock_guard lock(frame_mutex_); - latest_frame_ = test_frame; - new_frame_available_ = true; - } - frame_cv_.notify_one(); - - } else { - spdlog::error("{} Failed to read first frame to determine size. Aborting.", - log_prefix_); - capture_.release(); - return false; - } + if (!rtsp_dma_buf_->isValid()) { + spdlog::error("{} Failed to allocate RTSP DMA Buffer!", log_prefix_); + return false; } + spdlog::info("{} Allocated RTSP Buffer (FD: {})", log_prefix_, rtsp_dma_buf_->fd); - printf("RTSP stream opened successfully! (%dx%d @ %.2f FPS)\n", frame_width_, frame_height_, - frame_fps_); + spdlog::info("{} Stream Opened! {}x{} @ {:.2f} FPS", log_prefix_, frame_width_, frame_height_, + frame_fps_); + // GStreamer Pipeline std::string gst_pipeline = "appsrc ! " "queue max-size-buffers=2 leaky=downstream ! " @@ -121,11 +104,11 @@ bool VideoService::start() { return true; } + bool VideoService::set_analysis_mode(int mode_val) { if (!module_) return false; - // 检查是否为 HumanDetectionModule auto* human_module = dynamic_cast(module_.get()); if (human_module) { @@ -143,91 +126,152 @@ bool VideoService::set_analysis_mode(int mode_val) { spdlog::warn("{} Module is not HumanDetectionModule, cannot set mode.", log_prefix_); return false; } - - return false; // [修复 3] 添加这个缺少 return 的补丁 } + void VideoService::stop() { printf("Stopping VideoService...\n"); running_ = false; - frame_cv_.notify_all(); - if (reading_thread_.joinable()) { + if (reading_thread_.joinable()) reading_thread_.join(); - } - - if (processing_thread_.joinable()) { + if (processing_thread_.joinable()) processing_thread_.join(); - } + printf("Processing thread joined.\n"); - if (capture_.isOpened()) { + if (capture_.isOpened()) capture_.release(); - } - if (writer_.isOpened()) { + if (writer_.isOpened()) writer_.release(); - } + module_->stop(); module_.reset(); + rtsp_dma_buf_.reset(); // [新增] 释放 DMA buffer printf("VideoService stopped.\n"); } void VideoService::reading_loop() { - cv::Mat frame; + // 使用全局的 RgaFrameInfo (如果不使用 namespace rknn_test) + RgaFrameInfo info; spdlog::info("Reading thread started."); while (running_) { - if (!capture_.read(frame)) { - spdlog::warn("Reading loop: Failed to read frame from capture. Stopping service."); + // [修改] 调用 read_raw 获取原始 FD 和 Stride + if (!capture_.read_raw(info)) { + spdlog::warn("Reading loop: Failed to read frame or stream EOF."); running_ = false; break; } - if (frame.empty()) { - continue; - } - { std::lock_guard lock(frame_mutex_); - latest_frame_ = frame; + // 存储最新的原始帧信息 + latest_frame_info_ = info; new_frame_available_ = true; } frame_cv_.notify_one(); } - frame_cv_.notify_all(); // 确保 processing_loop 也会退出 + frame_cv_.notify_all(); spdlog::info("Reading loop finished."); } void VideoService::processing_loop() { - cv::Mat frame; + RgaFrameInfo current_info; + cv::Mat display_frame; while (running_) { { - // 1. (不变) 获取帧 std::unique_lock lock(frame_mutex_); - frame_cv_.wait(lock, [&] { return new_frame_available_ || !running_; }); - if (!running_) { + if (!running_) break; - } - frame = latest_frame_.clone(); + current_info = latest_frame_info_; new_frame_available_ = false; } - if (frame.empty()) { + // ========================================================= + // [核心逻辑] 使用 RGA 将 NV12(FD) 转为 BGR(FD) + // ========================================================= + bool rga_success = false; + + // 只有当源 FD 有效且目标 Buffer 就绪时才执行硬件转换 + if (current_info.fd > 0 && rtsp_dma_buf_ && rtsp_dma_buf_->isValid()) { + rga_buffer_t src_img, dst_img; + memset(&src_img, 0, sizeof(src_img)); + memset(&dst_img, 0, sizeof(dst_img)); + + // 1. 设置源 (NV12 from Decoder) + src_img = wrapbuffer_fd(current_info.fd, current_info.width, current_info.height, + current_info.format); + src_img.wstride = current_info.wstride; // [关键] 设置正确的 stride + src_img.hstride = current_info.hstride; // [关键] 设置正确的 stride + + // 2. 设置目标 (BGR for RTSP/Analysis) + dst_img = + wrapbuffer_fd(rtsp_dma_buf_->fd, frame_width_, frame_height_, RK_FORMAT_BGR_888); + // 目标 buffer 是紧凑的,或者按照 alloc 的对齐来 + dst_img.wstride = rtsp_dma_buf_->width; // 这里取 alloc 时的 aligned_w + dst_img.hstride = rtsp_dma_buf_->height; + + // 3. 执行转换 (NV12 -> BGR) + // 使用 improcess 可以同时处理裁剪和格式转换 + im_rect src_rect = {0, 0, current_info.width, current_info.height}; + im_rect dst_rect = {0, 0, frame_width_, frame_height_}; + + // 设置色彩空间 (BT.709 Limit -> RGB Full) 解决发灰问题 + imsetColorSpace(&src_img, IM_YUV_BT709_LIMIT_RANGE); + imsetColorSpace(&dst_img, IM_RGB_FULL); + + IM_STATUS status = improcess(src_img, dst_img, {}, src_rect, dst_rect, {}, IM_SYNC); + + if (status == IM_STATUS_SUCCESS) { + // 转换成功,构造一个指向该 DMA 内存的 Mat + // 注意:Mat 的 step 必须匹配 buffer 的 stride + display_frame = cv::Mat(frame_height_, frame_width_, CV_8UC3, rtsp_dma_buf_->vaddr, + rtsp_dma_buf_->width * 3); + rga_success = true; + } else { + spdlog::warn("{} RGA Conversion Failed: {}", log_prefix_, imStrError(status)); + } + } + + // ========================================================= + // [软件回退] 如果 RGA 失败或无 FD,使用 CPU 转换 + // ========================================================= + if (!rga_success) { + if (current_info.vaddr && current_info.format == RK_FORMAT_YCbCr_420_SP) { + // 简易软转逻辑 (NV12 -> BGR) + // 为了防止 stride 问题,这里简化处理,假设是紧凑的 + // 实际生产中应参考 ffmpeg_rga_decoder.cpp 中的逐行拷贝逻辑 + cv::Mat mYUV(current_info.height * 3 / 2, current_info.width, CV_8UC1, + current_info.vaddr); + cv::cvtColor(mYUV, display_frame, cv::COLOR_YUV2BGR_NV12); + } else { + continue; // 无法处理,跳过 + } + } + + if (display_frame.empty()) continue; + + // --------------------------------------------------------- + // 推理与推流 + // --------------------------------------------------------- + + // 将转换好的 BGR Mat 传给算法模块 + // 这里的 display_frame 数据位于 DMA Buffer 中,对于后续可能的 RGA 操作是友好的 + if (!module_->process(display_frame)) { + // spdlog::warn("{} Module failed to process frame.", log_prefix_); } - if (!module_->process(frame)) { - // 模块报告处理失败 - spdlog::warn("{} Module failed to process frame. Skipping.", log_prefix_); - } + if (writer_.isOpened()) { - writer_.write(frame); + writer_.write(display_frame); } } diff --git a/src/rknn/video_service.h b/src/rknn/video_service.h index 176fa87..ce4edc2 100644 --- a/src/rknn/video_service.h +++ b/src/rknn/video_service.h @@ -12,6 +12,7 @@ #include #include "algorithm/IAnalysisModule.h" +#include "ffmpeg_rga_decoder.h" #include "nlohmann/json.hpp" class VideoService { @@ -42,7 +43,8 @@ private: int frame_width_ = 0; int frame_height_ = 0; double frame_fps_ = 0.0; - cv::VideoCapture capture_; + // cv::VideoCapture capture_; + FFmpegRGADecoder capture_; cv::VideoWriter writer_; std::thread processing_thread_; @@ -54,4 +56,8 @@ private: bool new_frame_available_{false}; std::string log_prefix_; + + // [新增] 用于存储从解码器获取的最新帧信息 + RgaFrameInfo latest_frame_info_; // 存储原始帧信息 + std::unique_ptr rtsp_dma_buf_; // 存储转换后的BGR数据 }; \ No newline at end of file diff --git a/src/rknn_test/DmaBuffer.h b/src/rknn_test/DmaBuffer.h deleted file mode 100644 index ed682b7..0000000 --- a/src/rknn_test/DmaBuffer.h +++ /dev/null @@ -1,53 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include - -class DmaBuffer { -public: - int fd; - void* vaddr; // 虚拟地址(仅供调试或CPU必须介入时使用,如画字) - size_t size; - - DmaBuffer(size_t size, const std::string& heap_name = "/dev/dma_heap/system") { - this->size = size; - this->vaddr = MAP_FAILED; - this->fd = -1; - - int heap_fd = open(heap_name.c_str(), O_RDONLY | O_CLOEXEC); - if (heap_fd < 0) { - perror("Failed to open dma heap"); - return; - } - - struct dma_heap_allocation_data data = {0}; - data.len = size; - data.fd_flags = O_RDWR | O_CLOEXEC; - - if (ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data) < 0) { - perror("dma heap alloc failed"); - close(heap_fd); - return; - } - - this->fd = data.fd; - close(heap_fd); - - // 如果需要CPU访问(例如写OSD文字),则映射,否则可以不映射 - this->vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, this->fd, 0); - if (this->vaddr == MAP_FAILED) { - perror("mmap failed"); - } - } - - ~DmaBuffer() { - if (vaddr != MAP_FAILED) - munmap(vaddr, size); - if (fd >= 0) - close(fd); - } -}; \ No newline at end of file