Source code for gluoncv.model_zoo.yolo.yolo3
"""You Only Look Once Object Detection v3"""
# pylint: disable=arguments-differ
from __future__ import absolute_import
from __future__ import division
import os
import warnings
import numpy as np
import mxnet as mx
from mxnet import gluon
from mxnet import autograd
from mxnet.gluon import nn
from mxnet.gluon.nn import BatchNorm
from .darknet import _conv2d, darknet53
from ..mobilenet import get_mobilenet
from .yolo_target import YOLOV3TargetMerger
from ...loss import YOLOV3Loss
__all__ = ['YOLOV3',
'get_yolov3',
'custom_yolov3',
'yolo3_darknet53_voc',
'yolo3_darknet53_coco',
'yolo3_darknet53_custom',
'yolo3_mobilenet1_0_coco',
'yolo3_mobilenet1_0_voc',
'yolo3_mobilenet1_0_custom',
'yolo3_mobilenet0_25_coco',
'yolo3_mobilenet0_25_voc',
'yolo3_mobilenet0_25_custom']
def _upsample(x, stride=2):
"""Simple upsampling layer by stack pixel alongside horizontal and vertical directions.
Parameters
----------
x : mxnet.nd.NDArray or mxnet.symbol.Symbol
The input array.
stride : int, default is 2
Upsampling stride
"""
return x.repeat(axis=-1, repeats=stride).repeat(axis=-2, repeats=stride)
class YOLOOutputV3(gluon.HybridBlock):
"""YOLO output layer V3.
Parameters
----------
index : int
Index of the yolo output layer, to avoid naming conflicts only.
num_class : int
Number of foreground objects.
anchors : iterable
The anchor setting. Reference: https://arxiv.org/pdf/1804.02767.pdf.
stride : int
Stride of feature map.
alloc_size : tuple of int, default is (128, 128)
For advanced users. Define `alloc_size` to generate large enough anchor
maps, which will later saved in parameters. During inference, we support arbitrary
input image by cropping corresponding area of the anchor map. This allow us
to export to symbol so we can run it in c++, Scalar, etc.
"""
def __init__(self, index, num_class, anchors, stride,
alloc_size=(128, 128), **kwargs):
super(YOLOOutputV3, self).__init__(**kwargs)
anchors = np.array(anchors).astype('float32')
self._classes = num_class
self._num_pred = 1 + 4 + num_class # 1 objness + 4 box + num_class
self._num_anchors = anchors.size // 2
self._stride = stride
with self.name_scope():
all_pred = self._num_pred * self._num_anchors
self.prediction = nn.Conv2D(all_pred, kernel_size=1, padding=0, strides=1)
# anchors will be multiplied to predictions
anchors = anchors.reshape(1, 1, -1, 2)
self.anchors = self.params.get_constant('anchor_%d'%(index), anchors)
# offsets will be added to predictions
grid_x = np.arange(alloc_size[1])
grid_y = np.arange(alloc_size[0])
grid_x, grid_y = np.meshgrid(grid_x, grid_y)
# stack to (n, n, 2)
offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1)
# expand dims to (1, 1, n, n, 2) so it's easier for broadcasting
offsets = np.expand_dims(np.expand_dims(offsets, axis=0), axis=0)
self.offsets = self.params.get_constant('offset_%d'%(index), offsets)
def reset_class(self, classes, reuse_weights=None):
"""Reset class prediction.
Parameters
----------
classes : type
Description of parameter `classes`.
reuse_weights : dict
A {new_integer : old_integer} mapping dict that allows the new predictor to reuse the
previously trained weights specified by the integer index.
Returns
-------
type
Description of returned object.
"""
self._clear_cached_op()
# keep old records
old_classes = self._classes
old_pred = self.prediction
old_num_pred = self._num_pred
ctx = list(old_pred.params.values())[0].list_ctx()
self._classes = len(classes)
self._num_pred = 1 + 4 + len(classes)
all_pred = self._num_pred * self._num_anchors
# to avoid deferred init, number of in_channels must be defined
in_channels = list(old_pred.params.values())[0].shape[1]
self.prediction = nn.Conv2D(
all_pred, kernel_size=1, padding=0, strides=1,
in_channels=in_channels, prefix=old_pred.prefix)
self.prediction.initialize(ctx=ctx)
if reuse_weights:
new_pred = self.prediction
assert isinstance(reuse_weights, dict)
for old_params, new_params in zip(old_pred.params.values(), new_pred.params.values()):
old_data = old_params.data()
new_data = new_params.data()
for k, v in reuse_weights.items():
if k >= self._classes or v >= old_classes:
warnings.warn("reuse mapping {}/{} -> {}/{} out of range".format(
k, self._classes, v, old_classes))
continue
for i in range(self._num_anchors):
off_new = i * self._num_pred
off_old = i * old_num_pred
# copy along the first dimension
new_data[1 + 4 + k + off_new] = old_data[1 + 4 + v + off_old]
# copy non-class weights as well
new_data[off_new : 1 + 4 + off_new] = old_data[off_old : 1 + 4 + off_old]
# set data to new conv layers
new_params.set_data(new_data)
def hybrid_forward(self, F, x, anchors, offsets):
"""Hybrid Forward of YOLOV3Output layer.
Parameters
----------
F : mxnet.nd or mxnet.sym
`F` is mxnet.sym if hybridized or mxnet.nd if not.
x : mxnet.nd.NDArray
Input feature map.
anchors : mxnet.nd.NDArray
Anchors loaded from self, no need to supply.
offsets : mxnet.nd.NDArray
Offsets loaded from self, no need to supply.
Returns
-------
(tuple of) mxnet.nd.NDArray
During training, return (bbox, raw_box_centers, raw_box_scales, objness,
class_pred, anchors, offsets).
During inference, return detections.
"""
# prediction flat to (batch, pred per pixel, height * width)
pred = self.prediction(x).reshape((0, self._num_anchors * self._num_pred, -1))
# transpose to (batch, height * width, num_anchor, num_pred)
pred = pred.transpose(axes=(0, 2, 1)).reshape((0, -1, self._num_anchors, self._num_pred))
# components
raw_box_centers = pred.slice_axis(axis=-1, begin=0, end=2)
raw_box_scales = pred.slice_axis(axis=-1, begin=2, end=4)
objness = pred.slice_axis(axis=-1, begin=4, end=5)
class_pred = pred.slice_axis(axis=-1, begin=5, end=None)
# valid offsets, (1, 1, height, width, 2)
offsets = F.slice_like(offsets, x * 0, axes=(2, 3))
# reshape to (1, height*width, 1, 2)
offsets = offsets.reshape((1, -1, 1, 2))
box_centers = F.broadcast_add(F.sigmoid(raw_box_centers), offsets) * self._stride
box_scales = F.broadcast_mul(F.exp(raw_box_scales), anchors)
confidence = F.sigmoid(objness)
class_score = F.broadcast_mul(F.sigmoid(class_pred), confidence)
wh = box_scales / 2.0
bbox = F.concat(box_centers - wh, box_centers + wh, dim=-1)
if autograd.is_training():
# during training, we don't need to convert whole bunch of info to detection results
return (bbox.reshape((0, -1, 4)), raw_box_centers, raw_box_scales,
objness, class_pred, anchors, offsets)
# prediction per class
bboxes = F.tile(bbox, reps=(self._classes, 1, 1, 1, 1))
scores = F.transpose(class_score, axes=(3, 0, 1, 2)).expand_dims(axis=-1)
ids = F.broadcast_add(scores * 0, F.arange(0, self._classes).reshape((0, 1, 1, 1, 1)))
detections = F.concat(ids, scores, bboxes, dim=-1)
# reshape to (B, xx, 6)
detections = F.reshape(detections.transpose(axes=(1, 0, 2, 3, 4)), (0, -1, 6))
return detections
class YOLODetectionBlockV3(gluon.HybridBlock):
"""YOLO V3 Detection Block which does the following:
- add a few conv layers
- return the output
- have a branch that do yolo detection.
Parameters
----------
channel : int
Number of channels for 1x1 conv. 3x3 Conv will have 2*channel.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
"""
def __init__(self, channel, norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
super(YOLODetectionBlockV3, self).__init__(**kwargs)
assert channel % 2 == 0, "channel {} cannot be divided by 2".format(channel)
with self.name_scope():
self.body = nn.HybridSequential(prefix='')
for _ in range(2):
# 1x1 reduce
self.body.add(_conv2d(channel, 1, 0, 1,
norm_layer=norm_layer, norm_kwargs=norm_kwargs))
# 3x3 expand
self.body.add(_conv2d(channel * 2, 3, 1, 1,
norm_layer=norm_layer, norm_kwargs=norm_kwargs))
self.body.add(_conv2d(channel, 1, 0, 1,
norm_layer=norm_layer, norm_kwargs=norm_kwargs))
self.tip = _conv2d(channel * 2, 3, 1, 1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)
# pylint: disable=unused-argument
def hybrid_forward(self, F, x):
route = self.body(x)
tip = self.tip(route)
return route, tip
[docs]class YOLOV3(gluon.HybridBlock):
"""YOLO V3 detection network.
Reference: https://arxiv.org/pdf/1804.02767.pdf.
Parameters
----------
stages : mxnet.gluon.HybridBlock
Staged feature extraction blocks.
For example, 3 stages and 3 YOLO output layers are used original paper.
channels : iterable
Number of conv channels for each appended stage.
`len(channels)` should match `len(stages)`.
num_class : int
Number of foreground objects.
anchors : iterable
The anchor setting. `len(anchors)` should match `len(stages)`.
strides : iterable
Strides of feature map. `len(strides)` should match `len(stages)`.
alloc_size : tuple of int, default is (128, 128)
For advanced users. Define `alloc_size` to generate large enough anchor
maps, which will later saved in parameters. During inference, we support arbitrary
input image by cropping corresponding area of the anchor map. This allow us
to export to symbol so we can run it in c++, Scalar, etc.
nms_thresh : float, default is 0.45.
Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS.
nms_topk : int, default is 400
Apply NMS to top k detection results, use -1 to disable so that every Detection
result is used in NMS.
post_nms : int, default is 100
Only return top `post_nms` detection results, the rest is discarded. The number is
based on COCO dataset which has maximum 100 objects per image. You can adjust this
number if expecting more objects. You can use -1 to return all detections.
pos_iou_thresh : float, default is 1.0
IOU threshold for true anchors that match real objects.
'pos_iou_thresh < 1' is not implemented.
ignore_iou_thresh : float
Anchors that has IOU in `range(ignore_iou_thresh, pos_iou_thresh)` don't get
penalized of objectness score.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
"""
def __init__(self, stages, channels, anchors, strides, classes, alloc_size=(128, 128),
nms_thresh=0.45, nms_topk=400, post_nms=100, pos_iou_thresh=1.0,
ignore_iou_thresh=0.7, norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
super(YOLOV3, self).__init__(**kwargs)
self._classes = classes
self.nms_thresh = nms_thresh
self.nms_topk = nms_topk
self.post_nms = post_nms
self._pos_iou_thresh = pos_iou_thresh
self._ignore_iou_thresh = ignore_iou_thresh
if pos_iou_thresh >= 1:
self._target_generator = YOLOV3TargetMerger(len(classes), ignore_iou_thresh)
else:
raise NotImplementedError(
"pos_iou_thresh({}) < 1.0 is not implemented!".format(pos_iou_thresh))
self._loss = YOLOV3Loss()
with self.name_scope():
self.stages = nn.HybridSequential()
self.transitions = nn.HybridSequential()
self.yolo_blocks = nn.HybridSequential()
self.yolo_outputs = nn.HybridSequential()
# note that anchors and strides should be used in reverse order
for i, stage, channel, anchor, stride in zip(
range(len(stages)), stages, channels, anchors[::-1], strides[::-1]):
self.stages.add(stage)
block = YOLODetectionBlockV3(
channel, norm_layer=norm_layer, norm_kwargs=norm_kwargs)
self.yolo_blocks.add(block)
output = YOLOOutputV3(i, len(classes), anchor, stride, alloc_size=alloc_size)
self.yolo_outputs.add(output)
if i > 0:
self.transitions.add(_conv2d(channel, 1, 0, 1,
norm_layer=norm_layer, norm_kwargs=norm_kwargs))
@property
def num_class(self):
"""Number of (non-background) categories.
Returns
-------
int
Number of (non-background) categories.
"""
return self._num_class
@property
def classes(self):
"""Return names of (non-background) categories.
Returns
-------
iterable of str
Names of (non-background) categories.
"""
return self._classes
[docs] def hybrid_forward(self, F, x, *args):
"""YOLOV3 network hybrid forward.
Parameters
----------
F : mxnet.nd or mxnet.sym
`F` is mxnet.sym if hybridized or mxnet.nd if not.
x : mxnet.nd.NDArray
Input data.
*args : optional, mxnet.nd.NDArray
During training, extra inputs are required:
(gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t)
These are generated by YOLOV3PrefetchTargetGenerator in dataloader transform function.
Returns
-------
(tuple of) mxnet.nd.NDArray
During inference, return detections in shape (B, N, 6)
with format (cid, score, xmin, ymin, xmax, ymax)
During training, return losses only: (obj_loss, center_loss, scale_loss, cls_loss).
"""
if len(args) != 0 and not autograd.is_training():
raise TypeError('YOLOV3 inference only need one input data.')
all_box_centers = []
all_box_scales = []
all_objectness = []
all_class_pred = []
all_anchors = []
all_offsets = []
all_feat_maps = []
all_detections = []
routes = []
for stage, block, output in zip(self.stages, self.yolo_blocks, self.yolo_outputs):
x = stage(x)
routes.append(x)
# the YOLO output layers are used in reverse order, i.e., from very deep layers to shallow
for i, block, output in zip(range(len(routes)), self.yolo_blocks, self.yolo_outputs):
x, tip = block(x)
if autograd.is_training():
dets, box_centers, box_scales, objness, class_pred, anchors, offsets = output(tip)
all_box_centers.append(box_centers.reshape((0, -3, -1)))
all_box_scales.append(box_scales.reshape((0, -3, -1)))
all_objectness.append(objness.reshape((0, -3, -1)))
all_class_pred.append(class_pred.reshape((0, -3, -1)))
all_anchors.append(anchors)
all_offsets.append(offsets)
# here we use fake featmap to reduce memory consuption, only shape[2, 3] is used
fake_featmap = F.zeros_like(tip.slice_axis(
axis=0, begin=0, end=1).slice_axis(axis=1, begin=0, end=1))
all_feat_maps.append(fake_featmap)
else:
dets = output(tip)
all_detections.append(dets)
if i >= len(routes) - 1:
break
# add transition layers
x = self.transitions[i](x)
# upsample feature map reverse to shallow layers
upsample = _upsample(x, stride=2)
route_now = routes[::-1][i + 1]
x = F.concat(F.slice_like(upsample, route_now * 0, axes=(2, 3)), route_now, dim=1)
if autograd.is_training():
# during training, the network behaves differently since we don't need detection results
if autograd.is_recording():
# generate losses and return them directly
box_preds = F.concat(*all_detections, dim=1)
all_preds = [F.concat(*p, dim=1) for p in [
all_objectness, all_box_centers, all_box_scales, all_class_pred]]
all_targets = self._target_generator(box_preds, *args)
return self._loss(*(all_preds + all_targets))
# return raw predictions, this is only used in DataLoader transform function.
return (F.concat(*all_detections, dim=1), all_anchors, all_offsets, all_feat_maps,
F.concat(*all_box_centers, dim=1), F.concat(*all_box_scales, dim=1),
F.concat(*all_objectness, dim=1), F.concat(*all_class_pred, dim=1))
# concat all detection results from different stages
result = F.concat(*all_detections, dim=1)
# apply nms per class
if self.nms_thresh > 0 and self.nms_thresh < 1:
result = F.contrib.box_nms(
result, overlap_thresh=self.nms_thresh, valid_thresh=0.01,
topk=self.nms_topk, id_index=0, score_index=1, coord_start=2, force_suppress=False)
if self.post_nms > 0:
result = result.slice_axis(axis=1, begin=0, end=self.post_nms)
ids = result.slice_axis(axis=-1, begin=0, end=1)
scores = result.slice_axis(axis=-1, begin=1, end=2)
bboxes = result.slice_axis(axis=-1, begin=2, end=None)
return ids, scores, bboxes
[docs] def set_nms(self, nms_thresh=0.45, nms_topk=400, post_nms=100):
"""Set non-maximum suppression parameters.
Parameters
----------
nms_thresh : float, default is 0.45.
Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS.
nms_topk : int, default is 400
Apply NMS to top k detection results, use -1 to disable so that every Detection
result is used in NMS.
post_nms : int, default is 100
Only return top `post_nms` detection results, the rest is discarded. The number is
based on COCO dataset which has maximum 100 objects per image. You can adjust this
number if expecting more objects. You can use -1 to return all detections.
Returns
-------
None
"""
self._clear_cached_op()
self.nms_thresh = nms_thresh
self.nms_topk = nms_topk
self.post_nms = post_nms
[docs] def reset_class(self, classes, reuse_weights=None):
"""Reset class categories and class predictors.
Parameters
----------
classes : iterable of str
The new categories. ['apple', 'orange'] for example.
reuse_weights : dict
A {new_integer : old_integer} or mapping dict or {new_name : old_name} mapping dict,
or a list of [name0, name1,...] if class names don't change.
This allows the new predictor to reuse the
previously trained weights specified.
Example
-------
>>> net = gluoncv.model_zoo.get_model('yolo3_darknet53_voc', pretrained=True)
>>> # use direct name to name mapping to reuse weights
>>> net.reset_class(classes=['person'], reuse_weights={'person':'person'})
>>> # or use interger mapping, person is the 14th category in VOC
>>> net.reset_class(classes=['person'], reuse_weights={0:14})
>>> # you can even mix them
>>> net.reset_class(classes=['person'], reuse_weights={'person':14})
>>> # or use a list of string if class name don't change
>>> net.reset_class(classes=['person'], reuse_weights=['person'])
"""
self._clear_cached_op()
old_classes = self._classes
self._classes = classes
if self._pos_iou_thresh >= 1:
self._target_generator = YOLOV3TargetMerger(len(classes), self._ignore_iou_thresh)
if isinstance(reuse_weights, (dict, list)):
if isinstance(reuse_weights, dict):
# trying to replace str with indices
new_keys = []
new_vals = []
for k, v in reuse_weights.items():
if isinstance(v, str):
try:
new_vals.append(old_classes.index(v)) # raise ValueError if not found
except ValueError:
raise ValueError(
"{} not found in old class names {}".format(v, old_classes))
else:
if v < 0 or v >= len(old_classes):
raise ValueError(
"Index {} out of bounds for old class names".format(v))
new_vals.append(v)
if isinstance(k, str):
try:
new_keys.append(self.classes.index(k)) # raise ValueError if not found
except ValueError:
raise ValueError(
"{} not found in new class names {}".format(k, self.classes))
else:
if k < 0 or k >= len(self.classes):
raise ValueError(
"Index {} out of bounds for new class names".format(k))
new_keys.append(k)
reuse_weights = dict(zip(new_keys, new_vals))
else:
new_map = {}
for x in reuse_weights:
try:
new_idx = self._classes.index(x)
old_idx = old_classes.index(x)
new_map[new_idx] = old_idx
except ValueError:
warnings.warn("{} not found in old: {} or new class names: {}".format(
x, old_classes, self._classes))
reuse_weights = new_map
for outputs in self.yolo_outputs:
outputs.reset_class(classes, reuse_weights=reuse_weights)
[docs]def get_yolov3(name, stages, filters, anchors, strides, classes,
dataset, pretrained=False, ctx=mx.cpu(),
root=os.path.join('~', '.mxnet', 'models'), **kwargs):
"""Get YOLOV3 models.
Parameters
----------
name : str or None
Model name, if `None` is used, you must specify `features` to be a `HybridBlock`.
stages : iterable of str or `HybridBlock`
List of network internal output names, in order to specify which layers are
used for predicting bbox values.
If `name` is `None`, `features` must be a `HybridBlock` which generate multiple
outputs for prediction.
filters : iterable of float or None
List of convolution layer channels which is going to be appended to the base
network feature extractor. If `name` is `None`, this is ignored.
sizes : iterable fo float
Sizes of anchor boxes, this should be a list of floats, in incremental order.
The length of `sizes` must be len(layers) + 1. For example, a two stage SSD
model can have ``sizes = [30, 60, 90]``, and it converts to `[30, 60]` and
`[60, 90]` for the two stages, respectively. For more details, please refer
to original paper.
ratios : iterable of list
Aspect ratios of anchors in each output layer. Its length must be equals
to the number of SSD output layers.
steps : list of int
Step size of anchor boxes in each output layer.
classes : iterable of str
Names of categories.
dataset : str
Name of dataset. This is used to identify model name because models trained on
different datasets are going to be very different.
pretrained : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
pretrained_base : bool or str, optional, default is True
Load pretrained base network, the extra layers are randomized. Note that
if pretrained is `True`, this has no effect.
ctx : mxnet.Context
Context such as mx.cpu(), mx.gpu(0).
root : str
Model weights storing path.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
HybridBlock
A YOLOV3 detection network.
"""
net = YOLOV3(stages, filters, anchors, strides, classes=classes, **kwargs)
if pretrained:
from ..model_store import get_model_file
full_name = '_'.join(('yolo3', name, dataset))
net.load_parameters(get_model_file(full_name, tag=pretrained, root=root), ctx=ctx)
return net
[docs]def custom_yolov3(base_network_name, filters, anchors, strides, classes, dataset,
pretrained_base=True, pretrained=False, norm_layer=BatchNorm,
norm_kwargs=None, **kwargs):
"""Custom YOLO models."""
pretrained_base = False if pretrained else pretrained_base
if base_network_name == 'darknet53':
base_net = darknet53(
pretrained=pretrained_base,
norm_layer=norm_layer,
norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:15],
base_net.features[15:24],
base_net.features[24:]]
elif base_network_name == 'mobilenet1.0':
base_net = get_mobilenet(
multiplier=1,
pretrained=pretrained_base,
norm_layer=norm_layer,
norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:33],
base_net.features[33:69],
base_net.features[69:-2]]
elif base_network_name == 'mobilenet0.25':
base_net = get_mobilenet(
multiplier=0.25,
pretrained=pretrained_base,
norm_layer=norm_layer,
norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:33],
base_net.features[33:69],
base_net.features[69:-2]]
else:
raise NotImplementedError('Unsupported network: ', base_network_name)
net = get_yolov3(name=base_network_name,
stages=stages,
filters=filters,
anchors=anchors,
strides=strides,
classes=classes,
dataset=dataset,
pretrained=pretrained,
**kwargs)
return net
[docs]def yolo3_darknet53_voc(pretrained_base=True, pretrained=False,
norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with darknet53 base network on VOC dataset.
Parameters
----------
pretrained_base : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
pretrained : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
from ...data import VOCDetection
pretrained_base = False if pretrained else pretrained_base
base_net = darknet53(
pretrained=pretrained_base, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
stages = [base_net.features[:15], base_net.features[15:24], base_net.features[24:]]
anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
classes = VOCDetection.CLASSES
return get_yolov3(
'darknet53', stages, [512, 256, 128], anchors, strides, classes, 'voc',
pretrained=pretrained, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
[docs]def yolo3_darknet53_coco(pretrained_base=True, pretrained=False,
norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with darknet53 base network on COCO dataset.
Parameters
----------
pretrained_base : boolean
Whether fetch and load pretrained weights for base network.
pretrained : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
from ...data import COCODetection
pretrained_base = False if pretrained else pretrained_base
base_net = darknet53(
pretrained=pretrained_base, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
stages = [base_net.features[:15], base_net.features[15:24], base_net.features[24:]]
anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
classes = COCODetection.CLASSES
return get_yolov3(
'darknet53', stages, [512, 256, 128], anchors, strides, classes, 'coco',
pretrained=pretrained, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
[docs]def yolo3_darknet53_custom(classes, transfer=None, pretrained_base=True, pretrained=False,
norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with darknet53 base network on custom dataset.
Parameters
----------
classes : iterable of str
Names of custom foreground classes. `len(classes)` is the number of foreground classes.
transfer : str or None
If not `None`, will try to reuse pre-trained weights from yolo networks trained on other
datasets.
pretrained_base : boolean
Whether fetch and load pretrained weights for base network.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
if pretrained:
warnings.warn("Custom models don't provide `pretrained` weights, ignored.")
if transfer is None:
base_net = darknet53(
pretrained=pretrained_base, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
stages = [base_net.features[:15], base_net.features[15:24], base_net.features[24:]]
anchors = [
[10, 13, 16, 30, 33, 23],
[30, 61, 62, 45, 59, 119],
[116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
net = get_yolov3(
'darknet53', stages, [512, 256, 128], anchors, strides, classes, '',
norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
else:
from ...model_zoo import get_model
net = get_model('yolo3_darknet53_' + str(transfer), pretrained=True, **kwargs)
reuse_classes = [x for x in classes if x in net.classes]
net.reset_class(classes, reuse_weights=reuse_classes)
return net
[docs]def yolo3_mobilenet1_0_voc(pretrained_base=True, pretrained=False,
norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with mobilenet base network on VOC dataset.
Parameters
----------
pretrained_base : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
pretrained : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
from ...data import VOCDetection
pretrained_base = False if pretrained else pretrained_base
base_net = get_mobilenet(
multiplier=1,
pretrained=pretrained_base,
norm_layer=norm_layer, norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:33],
base_net.features[33:69],
base_net.features[69:-2]]
anchors = [[10, 13, 16, 30, 33, 23],
[30, 61, 62, 45, 59, 119],
[116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
classes = VOCDetection.CLASSES
return get_yolov3(
'mobilenet1.0', stages, [512, 256, 128], anchors, strides, classes, 'voc',
pretrained=pretrained, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
[docs]def yolo3_mobilenet1_0_custom(classes, transfer=None, pretrained_base=True, pretrained=False,
norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with mobilenet base network on custom dataset.
Parameters
----------
classes : iterable of str
Names of custom foreground classes. `len(classes)` is the number of foreground classes.
transfer : str or None
If not `None`, will try to reuse pre-trained weights from yolo networks trained on other
datasets.
pretrained_base : boolean
Whether fetch and load pretrained weights for base network.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
if pretrained:
warnings.warn("Custom models don't provide `pretrained` weights, ignored.")
if transfer is None:
base_net = get_mobilenet(multiplier=1,
pretrained=pretrained_base,
norm_layer=norm_layer, norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:33],
base_net.features[33:69],
base_net.features[69:-2]]
anchors = [
[10, 13, 16, 30, 33, 23],
[30, 61, 62, 45, 59, 119],
[116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
net = get_yolov3(
'mobilenet1.0', stages, [512, 256, 128], anchors, strides, classes, '',
norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
else:
from ...model_zoo import get_model
net = get_model(
'yolo3_mobilenet1.0_' +
str(transfer),
pretrained=True,
**kwargs)
reuse_classes = [x for x in classes if x in net.classes]
net.reset_class(classes, reuse_weights=reuse_classes)
return net
[docs]def yolo3_mobilenet1_0_coco(pretrained_base=True, pretrained=False, norm_layer=BatchNorm,
norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with mobilenet base network on COCO dataset.
Parameters
----------
pretrained_base : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
pretrained : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
from ...data import COCODetection
pretrained_base = False if pretrained else pretrained_base
base_net = get_mobilenet(
multiplier=1,
pretrained=pretrained_base,
norm_layer=norm_layer, norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:33],
base_net.features[33:69],
base_net.features[69:-2]]
anchors = [[10, 13, 16, 30, 33, 23],
[30, 61, 62, 45, 59, 119],
[116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
classes = COCODetection.CLASSES
return get_yolov3(
'mobilenet1.0', stages, [512, 256, 128], anchors, strides, classes, 'coco',
pretrained=pretrained, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
[docs]def yolo3_mobilenet0_25_voc(pretrained_base=True, pretrained=False,
norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with mobilenet0.25 base network on VOC dataset.
Parameters
----------
pretrained_base : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
pretrained : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
from ...data import VOCDetection
pretrained_base = False if pretrained else pretrained_base
base_net = get_mobilenet(
multiplier=0.25,
pretrained=pretrained_base,
norm_layer=norm_layer, norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:33],
base_net.features[33:69],
base_net.features[69:-2]]
anchors = [[10, 13, 16, 30, 33, 23],
[30, 61, 62, 45, 59, 119],
[116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
classes = VOCDetection.CLASSES
return get_yolov3(
'mobilenet0.25', stages, [256, 128, 128], anchors, strides, classes, 'voc',
pretrained=pretrained, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
[docs]def yolo3_mobilenet0_25_custom(classes, transfer=None, pretrained_base=True, pretrained=False,
norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with mobilenet0.25 base network on custom dataset.
Parameters
----------
classes : iterable of str
Names of custom foreground classes. `len(classes)` is the number of foreground classes.
transfer : str or None
If not `None`, will try to reuse pre-trained weights from yolo networks trained on other
datasets.
pretrained_base : boolean
Whether fetch and load pretrained weights for base network.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
if pretrained:
warnings.warn("Custom models don't provide `pretrained` weights, ignored.")
if transfer is None:
base_net = get_mobilenet(multiplier=0.25,
pretrained=pretrained_base,
norm_layer=norm_layer, norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:33],
base_net.features[33:69],
base_net.features[69:-2]]
anchors = [
[10, 13, 16, 30, 33, 23],
[30, 61, 62, 45, 59, 119],
[116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
net = get_yolov3(
'mobilenet0.25', stages, [256, 128, 128], anchors, strides, classes, '',
norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)
else:
from ...model_zoo import get_model
net = get_model(
'yolo3_mobilenet0.25_' +
str(transfer),
pretrained=True,
**kwargs)
reuse_classes = [x for x in classes if x in net.classes]
net.reset_class(classes, reuse_weights=reuse_classes)
return net
[docs]def yolo3_mobilenet0_25_coco(pretrained_base=True, pretrained=False, norm_layer=BatchNorm,
norm_kwargs=None, **kwargs):
"""YOLO3 multi-scale with mobilenet0.25 base network on COCO dataset.
Parameters
----------
pretrained_base : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
pretrained : bool or str
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
norm_layer : object
Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`)
Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
norm_kwargs : dict
Additional `norm_layer` arguments, for example `num_devices=4`
for :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`.
Returns
-------
mxnet.gluon.HybridBlock
Fully hybrid yolo3 network.
"""
from ...data import COCODetection
pretrained_base = False if pretrained else pretrained_base
base_net = get_mobilenet(
multiplier=0.25,
pretrained=pretrained_base,
norm_layer=norm_layer, norm_kwargs=norm_kwargs,
**kwargs)
stages = [base_net.features[:33],
base_net.features[33:69],
base_net.features[69:-2]]
anchors = [[10, 13, 16, 30, 33, 23],
[30, 61, 62, 45, 59, 119],
[116, 90, 156, 198, 373, 326]]
strides = [8, 16, 32]
classes = COCODetection.CLASSES
return get_yolov3(
'mobilenet0.25', stages, [256, 128, 128], anchors, strides, classes, 'coco',
pretrained=pretrained, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs)