Source code for gluoncv.model_zoo.rcnn.faster_rcnn.rcnn_target

"""RCNN Target Generator."""
from __future__ import absolute_import

from mxnet import autograd
from mxnet import gluon

from ....nn.coder import MultiClassEncoder, NormalizedPerClassBoxCenterEncoder


[docs]class RCNNTargetSampler(gluon.HybridBlock): """A sampler to choose positive/negative samples from RCNN Proposals Parameters ---------- num_image: int Number of input images. num_proposal: int Number of input proposals. num_sample : int Number of samples for RCNN targets. pos_iou_thresh : float Proposal whose IOU larger than ``pos_iou_thresh`` is regarded as positive samples. Proposal whose IOU smaller than ``pos_iou_thresh`` is regarded as negative samples. pos_ratio : float ``pos_ratio`` defines how many positive samples (``pos_ratio * num_sample``) is to be sampled. max_num_gt : int Maximum ground-truth number for each example. This is only an upper bound, not necessarily very precise. However, using a very big number may impact the training speed. """ def __init__(self, num_image, num_proposal, num_sample, pos_iou_thresh, pos_ratio, max_num_gt): super(RCNNTargetSampler, self).__init__() self._num_image = num_image self._num_proposal = num_proposal self._num_sample = num_sample self._max_pos = int(round(num_sample * pos_ratio)) self._pos_iou_thresh = pos_iou_thresh self._max_num_gt = max_num_gt # pylint: disable=arguments-differ
[docs] def hybrid_forward(self, F, rois, scores, gt_boxes): """Handle B=self._num_image by a for loop. Parameters ---------- rois: (B, self._num_proposal, 4) encoded in (x1, y1, x2, y2). scores: (B, self._num_proposal, 1), value range [0, 1] with ignore value -1. gt_boxes: (B, M, 4) encoded in (x1, y1, x2, y2), invalid box should have area of 0. Returns ------- rois: (B, self._num_sample, 4), randomly drawn from proposals samples: (B, self._num_sample), value +1: positive / 0: ignore / -1: negative. matches: (B, self._num_sample), value between [0, M) """ with autograd.pause(): # collect results into list new_rois = [] new_samples = [] new_matches = [] for i in range(self._num_image): roi = F.squeeze(F.slice_axis(rois, axis=0, begin=i, end=i + 1), axis=0) score = F.squeeze(F.slice_axis(scores, axis=0, begin=i, end=i + 1), axis=0) gt_box = F.squeeze(F.slice_axis(gt_boxes, axis=0, begin=i, end=i + 1), axis=0) gt_score = F.sign(F.sum(gt_box, axis=-1, keepdims=True) + 1) # concat rpn roi with ground truth. mix gt with generated boxes. all_roi = F.concat(roi, gt_box, dim=0) all_score = F.concat(score, gt_score, dim=0).squeeze(axis=-1) # calculate (N, M) ious between (N, 4) anchors and (M, 4) bbox ground-truths # cannot do batch op, will get (B, N, B, M) ious ious = F.contrib.box_iou(all_roi, gt_box, format='corner') # match to argmax iou ious_max = ious.max(axis=-1) ious_argmax = ious.argmax(axis=-1) # init with 2, which are neg samples mask = F.ones_like(ious_max) * 2 # mark all ignore to 0 mask = F.where(all_score < 0, F.zeros_like(mask), mask) # mark positive samples with 3 pos_mask = ious_max >= self._pos_iou_thresh mask = F.where(pos_mask, F.ones_like(mask) * 3, mask) # shuffle mask rand = F.random.uniform(0, 1, shape=(self._num_proposal + self._max_num_gt,)) rand = F.slice_like(rand, ious_argmax) index = F.argsort(rand) mask = F.take(mask, index) ious_argmax = F.take(ious_argmax, index) # sample pos samples order = F.argsort(mask, is_ascend=False) topk = F.slice_axis(order, axis=0, begin=0, end=self._max_pos) topk_indices = F.take(index, topk) topk_samples = F.take(mask, topk) topk_matches = F.take(ious_argmax, topk) # reset output: 3 pos 2 neg 0 ignore -> 1 pos -1 neg 0 ignore topk_samples = F.where(topk_samples == 3, F.ones_like(topk_samples), topk_samples) topk_samples = F.where(topk_samples == 2, F.ones_like(topk_samples) * -1, topk_samples) # sample neg samples index = F.slice_axis(index, axis=0, begin=self._max_pos, end=None) mask = F.slice_axis(mask, axis=0, begin=self._max_pos, end=None) ious_argmax = F.slice_axis(ious_argmax, axis=0, begin=self._max_pos, end=None) # change mask: 4 neg 3 pos 0 ignore mask = F.where(mask == 2, F.ones_like(mask) * 4, mask) order = F.argsort(mask, is_ascend=False) num_neg = self._num_sample - self._max_pos bottomk = F.slice_axis(order, axis=0, begin=0, end=num_neg) bottomk_indices = F.take(index, bottomk) bottomk_samples = F.take(mask, bottomk) bottomk_matches = F.take(ious_argmax, bottomk) # reset output: 4 neg 3 pos 0 ignore -> 1 pos -1 neg 0 ignore bottomk_samples = F.where(bottomk_samples == 3, F.ones_like(bottomk_samples), bottomk_samples) bottomk_samples = F.where(bottomk_samples == 4, F.ones_like(bottomk_samples) * -1, bottomk_samples) # output indices = F.concat(topk_indices, bottomk_indices, dim=0) samples = F.concat(topk_samples, bottomk_samples, dim=0) matches = F.concat(topk_matches, bottomk_matches, dim=0) sampled_rois = all_roi.take(indices) x1, y1, x2, y2 = F.split(sampled_rois, axis=-1, num_outputs=4, squeeze_axis=True) rois_area = (x2 - x1) * (y2 - y1) ind = F.argsort(rois_area) new_rois.append(sampled_rois.take(ind)) new_samples.append(samples.take(ind)) new_matches.append(matches.take(ind)) # stack all samples together new_rois = F.stack(*new_rois, axis=0) new_samples = F.stack(*new_samples, axis=0) new_matches = F.stack(*new_matches, axis=0) return new_rois, new_samples, new_matches
[docs]class RCNNTargetGenerator(gluon.HybridBlock): """RCNN target encoder to generate matching target and regression target values. Parameters ---------- num_class : int Number of total number of positive classes. max_pos : int, default is 128 Upper bound of Number of positive samples. per_device_batch_size : int, default is 1 Per device batch size means : iterable of float, default is (0., 0., 0., 0.) Mean values to be subtracted from regression targets. stds : iterable of float, default is (.1, .1, .2, .2) Standard deviations to be divided from regression targets. """ def __init__(self, num_class, max_pos=128, per_device_batch_size=1, means=(0., 0., 0., 0.), stds=(.1, .1, .2, .2)): super(RCNNTargetGenerator, self).__init__() self._cls_encoder = MultiClassEncoder() self._box_encoder = NormalizedPerClassBoxCenterEncoder( num_class=num_class, max_pos=max_pos, per_device_batch_size=per_device_batch_size, means=means, stds=stds) # pylint: disable=arguments-differ, unused-argument
[docs] def hybrid_forward(self, F, roi, samples, matches, gt_label, gt_box): """Components can handle batch images Parameters ---------- roi: (B, N, 4), input proposals samples: (B, N), value +1: positive / -1: negative. matches: (B, N), value [0, M), index to gt_label and gt_box. gt_label: (B, M), value [0, num_class), excluding background class. gt_box: (B, M, 4), input ground truth box corner coordinates. Returns ------- cls_target: (B, N), value [0, num_class + 1), including background. box_target: (B, N, C, 4), only foreground class has nonzero target. box_weight: (B, N, C, 4), only foreground class has nonzero weight. """ with autograd.pause(): # cls_target (B, N) cls_target = self._cls_encoder(samples, matches, gt_label) # box_target, box_weight (C, B, N, 4) box_target, box_mask, indices = self._box_encoder(samples, matches, roi, gt_label, gt_box) return cls_target, box_target, box_mask, indices