Source code for gluoncv.model_zoo.rcnn.faster_rcnn.rcnn_target
"""RCNN Target Generator."""
from __future__ import absolute_import
from mxnet import autograd
from mxnet import gluon
from ....nn.coder import MultiClassEncoder, NormalizedPerClassBoxCenterEncoder
[docs]class RCNNTargetSampler(gluon.HybridBlock):
"""A sampler to choose positive/negative samples from RCNN Proposals
Parameters
----------
num_image: int
Number of input images.
num_proposal: int
Number of input proposals.
num_sample : int
Number of samples for RCNN targets.
pos_iou_thresh : float
Proposal whose IOU larger than ``pos_iou_thresh`` is regarded as positive samples.
Proposal whose IOU smaller than ``pos_iou_thresh`` is regarded as negative samples.
pos_ratio : float
``pos_ratio`` defines how many positive samples (``pos_ratio * num_sample``) is
to be sampled.
max_num_gt : int
Maximum ground-truth number for each example. This is only an upper bound, not
necessarily very precise. However, using a very big number may impact the training speed.
"""
def __init__(self, num_image, num_proposal, num_sample, pos_iou_thresh, pos_ratio, max_num_gt):
super(RCNNTargetSampler, self).__init__()
self._num_image = num_image
self._num_proposal = num_proposal
self._num_sample = num_sample
self._max_pos = int(round(num_sample * pos_ratio))
self._pos_iou_thresh = pos_iou_thresh
self._max_num_gt = max_num_gt
# pylint: disable=arguments-differ
[docs] def hybrid_forward(self, F, rois, scores, gt_boxes):
"""Handle B=self._num_image by a for loop.
Parameters
----------
rois: (B, self._num_proposal, 4) encoded in (x1, y1, x2, y2).
scores: (B, self._num_proposal, 1), value range [0, 1] with ignore value -1.
gt_boxes: (B, M, 4) encoded in (x1, y1, x2, y2), invalid box should have area of 0.
Returns
-------
rois: (B, self._num_sample, 4), randomly drawn from proposals
samples: (B, self._num_sample), value +1: positive / 0: ignore / -1: negative.
matches: (B, self._num_sample), value between [0, M)
"""
with autograd.pause():
# collect results into list
new_rois = []
new_samples = []
new_matches = []
for i in range(self._num_image):
roi = F.squeeze(F.slice_axis(rois, axis=0, begin=i, end=i + 1), axis=0)
score = F.squeeze(F.slice_axis(scores, axis=0, begin=i, end=i + 1), axis=0)
gt_box = F.squeeze(F.slice_axis(gt_boxes, axis=0, begin=i, end=i + 1), axis=0)
gt_score = F.sign(F.sum(gt_box, axis=-1, keepdims=True) + 1)
# concat rpn roi with ground truth. mix gt with generated boxes.
all_roi = F.concat(roi, gt_box, dim=0)
all_score = F.concat(score, gt_score, dim=0).squeeze(axis=-1)
# calculate (N, M) ious between (N, 4) anchors and (M, 4) bbox ground-truths
# cannot do batch op, will get (B, N, B, M) ious
ious = F.contrib.box_iou(all_roi, gt_box, format='corner')
# match to argmax iou
ious_max = ious.max(axis=-1)
ious_argmax = ious.argmax(axis=-1)
# init with 2, which are neg samples
mask = F.ones_like(ious_max) * 2
# mark all ignore to 0
mask = F.where(all_score < 0, F.zeros_like(mask), mask)
# mark positive samples with 3
pos_mask = ious_max >= self._pos_iou_thresh
mask = F.where(pos_mask, F.ones_like(mask) * 3, mask)
# shuffle mask
rand = F.random.uniform(0, 1, shape=(self._num_proposal + self._max_num_gt,))
rand = F.slice_like(rand, ious_argmax)
index = F.argsort(rand)
mask = F.take(mask, index)
ious_argmax = F.take(ious_argmax, index)
# sample pos samples
order = F.argsort(mask, is_ascend=False)
topk = F.slice_axis(order, axis=0, begin=0, end=self._max_pos)
topk_indices = F.take(index, topk)
topk_samples = F.take(mask, topk)
topk_matches = F.take(ious_argmax, topk)
# reset output: 3 pos 2 neg 0 ignore -> 1 pos -1 neg 0 ignore
topk_samples = F.where(topk_samples == 3,
F.ones_like(topk_samples), topk_samples)
topk_samples = F.where(topk_samples == 2,
F.ones_like(topk_samples) * -1, topk_samples)
# sample neg samples
index = F.slice_axis(index, axis=0, begin=self._max_pos, end=None)
mask = F.slice_axis(mask, axis=0, begin=self._max_pos, end=None)
ious_argmax = F.slice_axis(ious_argmax, axis=0, begin=self._max_pos, end=None)
# change mask: 4 neg 3 pos 0 ignore
mask = F.where(mask == 2, F.ones_like(mask) * 4, mask)
order = F.argsort(mask, is_ascend=False)
num_neg = self._num_sample - self._max_pos
bottomk = F.slice_axis(order, axis=0, begin=0, end=num_neg)
bottomk_indices = F.take(index, bottomk)
bottomk_samples = F.take(mask, bottomk)
bottomk_matches = F.take(ious_argmax, bottomk)
# reset output: 4 neg 3 pos 0 ignore -> 1 pos -1 neg 0 ignore
bottomk_samples = F.where(bottomk_samples == 3,
F.ones_like(bottomk_samples), bottomk_samples)
bottomk_samples = F.where(bottomk_samples == 4,
F.ones_like(bottomk_samples) * -1, bottomk_samples)
# output
indices = F.concat(topk_indices, bottomk_indices, dim=0)
samples = F.concat(topk_samples, bottomk_samples, dim=0)
matches = F.concat(topk_matches, bottomk_matches, dim=0)
sampled_rois = all_roi.take(indices)
x1, y1, x2, y2 = F.split(sampled_rois, axis=-1, num_outputs=4, squeeze_axis=True)
rois_area = (x2 - x1) * (y2 - y1)
ind = F.argsort(rois_area)
new_rois.append(sampled_rois.take(ind))
new_samples.append(samples.take(ind))
new_matches.append(matches.take(ind))
# stack all samples together
new_rois = F.stack(*new_rois, axis=0)
new_samples = F.stack(*new_samples, axis=0)
new_matches = F.stack(*new_matches, axis=0)
return new_rois, new_samples, new_matches
[docs]class RCNNTargetGenerator(gluon.HybridBlock):
"""RCNN target encoder to generate matching target and regression target values.
Parameters
----------
num_class : int
Number of total number of positive classes.
max_pos : int, default is 128
Upper bound of Number of positive samples.
per_device_batch_size : int, default is 1
Per device batch size
means : iterable of float, default is (0., 0., 0., 0.)
Mean values to be subtracted from regression targets.
stds : iterable of float, default is (.1, .1, .2, .2)
Standard deviations to be divided from regression targets.
"""
def __init__(self, num_class, max_pos=128, per_device_batch_size=1, means=(0., 0., 0., 0.),
stds=(.1, .1, .2, .2)):
super(RCNNTargetGenerator, self).__init__()
self._cls_encoder = MultiClassEncoder()
self._box_encoder = NormalizedPerClassBoxCenterEncoder(
num_class=num_class, max_pos=max_pos, per_device_batch_size=per_device_batch_size,
means=means, stds=stds)
# pylint: disable=arguments-differ, unused-argument
[docs] def hybrid_forward(self, F, roi, samples, matches, gt_label, gt_box):
"""Components can handle batch images
Parameters
----------
roi: (B, N, 4), input proposals
samples: (B, N), value +1: positive / -1: negative.
matches: (B, N), value [0, M), index to gt_label and gt_box.
gt_label: (B, M), value [0, num_class), excluding background class.
gt_box: (B, M, 4), input ground truth box corner coordinates.
Returns
-------
cls_target: (B, N), value [0, num_class + 1), including background.
box_target: (B, N, C, 4), only foreground class has nonzero target.
box_weight: (B, N, C, 4), only foreground class has nonzero weight.
"""
with autograd.pause():
# cls_target (B, N)
cls_target = self._cls_encoder(samples, matches, gt_label)
# box_target, box_weight (C, B, N, 4)
box_target, box_mask, indices = self._box_encoder(samples, matches, roi, gt_label,
gt_box)
return cls_target, box_target, box_mask, indices