133 lines
4.4 KiB
Python
133 lines
4.4 KiB
Python
import cv2
|
|
import numpy as np
|
|
from pycocotools.coco import COCO
|
|
|
|
import os
|
|
|
|
from ..dataloading import get_yolox_datadir
|
|
from .datasets_wrapper import Dataset
|
|
|
|
|
|
class MOTDataset(Dataset):
|
|
"""
|
|
COCO dataset class.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
data_dir=None,
|
|
json_file="train_half.json",
|
|
name="train",
|
|
img_size=(608, 1088),
|
|
preproc=None,
|
|
):
|
|
"""
|
|
COCO dataset initialization. Annotation data are read into memory by COCO API.
|
|
Args:
|
|
data_dir (str): dataset root directory
|
|
json_file (str): COCO json file name
|
|
name (str): COCO data name (e.g. 'train2017' or 'val2017')
|
|
img_size (int): target image size after pre-processing
|
|
preproc: data augmentation strategy
|
|
"""
|
|
super().__init__(img_size)
|
|
if data_dir is None:
|
|
data_dir = os.path.join(get_yolox_datadir(), "mot")
|
|
self.data_dir = data_dir
|
|
self.json_file = json_file
|
|
|
|
self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
|
|
self.ids = self.coco.getImgIds()
|
|
self.class_ids = sorted(self.coco.getCatIds())
|
|
cats = self.coco.loadCats(self.coco.getCatIds())
|
|
self._classes = tuple([c["name"] for c in cats])
|
|
self.annotations = self._load_coco_annotations()
|
|
self.name = name
|
|
self.img_size = img_size
|
|
self.preproc = preproc
|
|
|
|
def __len__(self):
|
|
return len(self.ids)
|
|
|
|
def _load_coco_annotations(self):
|
|
return [self.load_anno_from_ids(_ids) for _ids in self.ids]
|
|
|
|
def load_anno_from_ids(self, id_):
|
|
im_ann = self.coco.loadImgs(id_)[0]
|
|
width = im_ann["width"]
|
|
height = im_ann["height"]
|
|
frame_id = im_ann["frame_id"]
|
|
video_id = im_ann["video_id"]
|
|
anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
|
|
annotations = self.coco.loadAnns(anno_ids)
|
|
objs = []
|
|
for obj in annotations:
|
|
x1 = obj["bbox"][0]
|
|
y1 = obj["bbox"][1]
|
|
x2 = x1 + obj["bbox"][2]
|
|
y2 = y1 + obj["bbox"][3]
|
|
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
|
|
obj["clean_bbox"] = [x1, y1, x2, y2]
|
|
objs.append(obj)
|
|
|
|
num_objs = len(objs)
|
|
|
|
res = np.zeros((num_objs, 6))
|
|
|
|
for ix, obj in enumerate(objs):
|
|
cls = self.class_ids.index(obj["category_id"])
|
|
res[ix, 0:4] = obj["clean_bbox"]
|
|
res[ix, 4] = cls
|
|
res[ix, 5] = obj["track_id"]
|
|
|
|
file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
|
|
img_info = (height, width, frame_id, video_id, file_name)
|
|
|
|
del im_ann, annotations
|
|
|
|
return (res, img_info, file_name)
|
|
|
|
def load_anno(self, index):
|
|
return self.annotations[index][0]
|
|
|
|
def pull_item(self, index):
|
|
id_ = self.ids[index]
|
|
|
|
res, img_info, file_name = self.annotations[index]
|
|
# load image and preprocess
|
|
img_file = os.path.join(
|
|
self.data_dir, self.name, file_name
|
|
)
|
|
img = cv2.imread(img_file)
|
|
assert img is not None
|
|
|
|
return img, res.copy(), img_info, np.array([id_])
|
|
|
|
@Dataset.resize_getitem
|
|
def __getitem__(self, index):
|
|
"""
|
|
One image / label pair for the given index is picked up and pre-processed.
|
|
|
|
Args:
|
|
index (int): data index
|
|
|
|
Returns:
|
|
img (numpy.ndarray): pre-processed image
|
|
padded_labels (torch.Tensor): pre-processed label data.
|
|
The shape is :math:`[max_labels, 5]`.
|
|
each label consists of [class, xc, yc, w, h]:
|
|
class (float): class index.
|
|
xc, yc (float) : center of bbox whose values range from 0 to 1.
|
|
w, h (float) : size of bbox whose values range from 0 to 1.
|
|
info_img : tuple of h, w, nh, nw, dx, dy.
|
|
h, w (int): original shape of the image
|
|
nh, nw (int): shape of the resized image without padding
|
|
dx, dy (int): pad size
|
|
img_id (int): same as the input index. Used for evaluation.
|
|
"""
|
|
img, target, img_info, img_id = self.pull_item(index)
|
|
|
|
if self.preproc is not None:
|
|
img, target = self.preproc(img, target, self.input_dim)
|
|
return img, target, img_info, img_id
|