Source code for gluoncv.model_zoo.rcnn.faster_rcnn.predefined_models

"""Predefined Faster RCNN Model."""
from __future__ import absolute_import

import warnings

import mxnet as mx
from mxnet.gluon import nn
from mxnet.gluon.contrib.nn import SyncBatchNorm

from ..faster_rcnn import get_faster_rcnn
from .doublehead_rcnn import get_doublehead_rcnn
from ....nn.feature import FPNFeatureExpander

__all__ = ['faster_rcnn_resnet50_v1b_voc',
           'doublehead_rcnn_resnet50_v1b_voc',
           'faster_rcnn_resnet50_v1b_coco',
           'faster_rcnn_fpn_resnet50_v1b_coco',
           'faster_rcnn_fpn_syncbn_resnet50_v1b_coco',
           'faster_rcnn_fpn_syncbn_resnest50_coco',
           'faster_rcnn_resnet50_v1b_custom',
           'faster_rcnn_resnet101_v1d_voc',
           'faster_rcnn_resnet101_v1d_coco',
           'faster_rcnn_fpn_resnet101_v1d_coco',
           'faster_rcnn_fpn_syncbn_resnet101_v1d_coco',
           'faster_rcnn_fpn_syncbn_resnest101_coco',
           'faster_rcnn_resnet101_v1d_custom',
           'faster_rcnn_fpn_syncbn_resnest269_coco']


[docs]def faster_rcnn_resnet50_v1b_voc(pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet50_v1b_voc(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet50_v1b from ....data import VOCDetection classes = VOCDetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn( name='resnet50_v1b', dataset='voc', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=600, max_size=1000, train_patterns=train_patterns, nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), strides=16, clip=None, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def doublehead_rcnn_resnet50_v1b_voc(pretrained=False, pretrained_base=True, **kwargs): r"""Double Head Faster RCNN model from the paper "(2019). Rethinking Classification and Localization for Object Detection." Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet50_v1b_voc(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet50_v1b from ....data import VOCDetection classes = VOCDetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv', '.*double_fc', '.*double_conv']) return get_doublehead_rcnn( name='resnet50_v1b', dataset='voc', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=600, max_size=1000, train_patterns=train_patterns, nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), strides=16, clip=None, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_resnet50_v1b_coco(pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet50_v1b_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet50_v1b from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn( name='resnet50_v1b', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=800, max_size=1333, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(14, 14), strides=16, clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_fpn_resnet50_v1b_coco(pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model with FPN from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" "Lin, T., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S. (2016). Feature Pyramid Networks for Object Detection" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_fpn_resnet50_v1b_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet50_v1b from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=False, pretrained=pretrained_base) top_features = None # 2 FC layer before RCNN cls and reg box_features = nn.HybridSequential() for _ in range(2): box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01))) box_features.add(nn.Activation('relu')) train_patterns = '|'.join( ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv', 'P']) return get_faster_rcnn( name='fpn_resnet50_v1b', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, box_features=box_features, short=800, max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_fpn_syncbn_resnet50_v1b_coco(pretrained=False, pretrained_base=True, num_devices=0, **kwargs): r"""Faster RCNN model with FPN from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" "Lin, T., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S. (2016). Feature Pyramid Networks for Object Detection" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. num_devices : int, default is 0 Number of devices for sync batch norm layer. if less than 1, use all devices available. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_fpn_syncbn_resnet50_v1b_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet50_v1b from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {} base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=False, norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs, **kwargs) sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {} features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base, norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs) top_features = None # 1 Conv 1 FC layer before RCNN cls and reg box_features = nn.HybridSequential() box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), SyncBatchNorm(**gluon_norm_kwargs), nn.Activation('relu'), nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) train_patterns = '(?!.*moving)' # excluding symbol bn moving mean and var return get_faster_rcnn( name='fpn_syncbn_resnet50_v1b', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, box_features=box_features, short=(640, 800), max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_fpn_syncbn_resnest50_coco(pretrained=False, pretrained_base=True, num_devices=0, **kwargs): r"""Faster R-CNN with ResNeSt ResNeSt: Split Attention Network" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. num_devices : int, default is 0 Number of devices for sync batch norm layer. if less than 1, use all devices available. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_fpn_syncbn_resnest50_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnest import resnest50 from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {} base_network = resnest50(pretrained=pretrained_base, dilated=False, use_global_stats=False, norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs) from gluoncv.nn.dropblock import set_drop_prob from functools import partial apply_drop_prob = partial(set_drop_prob, 0.0) base_network.apply(apply_drop_prob) sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {} features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu23_fwd', 'layers4_relu11_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base, norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs) top_features = None # 1 Conv 1 FC layer before RCNN cls and reg box_features = nn.HybridSequential() for _ in range(4): box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), SyncBatchNorm(**gluon_norm_kwargs), nn.Activation('relu')) box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) train_patterns = '(?!.*moving)' # excluding symbol bn moving mean and var return get_faster_rcnn( name='fpn_syncbn_resnest50', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, box_features=box_features, short=(640, 800), max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_resnet50_v1b_custom(classes, transfer=None, pretrained_base=True, pretrained=False, **kwargs): r"""Faster RCNN model with resnet50_v1b base network on custom dataset. Parameters ---------- classes : iterable of str Names of custom foreground classes. `len(classes)` is the number of foreground classes. transfer : str or None If not `None`, will try to reuse pre-trained weights from faster RCNN networks trained on other datasets. pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Returns ------- mxnet.gluon.HybridBlock Hybrid faster RCNN network. """ if pretrained: warnings.warn("Custom models don't provide `pretrained` weights, ignored.") if transfer is None: from ....model_zoo.resnetv1b import resnet50_v1b base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn( name='resnet50_v1b', dataset='custom', pretrained=pretrained, features=features, top_features=top_features, classes=classes, train_patterns=train_patterns, **kwargs) else: from ...model_zoo import get_model net = get_model('faster_rcnn_resnet50_v1b_' + str(transfer), pretrained=True, **kwargs) reuse_classes = [x for x in classes if x in net.classes] net.reset_class(classes, reuse_weights=reuse_classes) return net
[docs]def faster_rcnn_resnet101_v1d_voc(pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- pretrained : bool, optional, default is False Load pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet101_v1d_voc(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet101_v1d from ....data import VOCDetection classes = VOCDetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet101_v1d(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn( name='resnet101_v1d', dataset='voc', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=600, max_size=1000, train_patterns=train_patterns, nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), strides=16, clip=None, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_resnet101_v1d_coco(pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- pretrained : bool, optional, default is False Load pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet101_v1d_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet101_v1d from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet101_v1d(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn( name='resnet101_v1d', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=800, max_size=1333, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(14, 14), strides=16, clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_fpn_resnet101_v1d_coco(pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model with FPN from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" "Lin, T., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S. (2016). Feature Pyramid Networks for Object Detection" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_fpn_resnet101_v1d_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet101_v1d from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet101_v1d(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu68_fwd', 'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=False, pretrained=pretrained_base) top_features = None # 2 FC layer before RCNN cls and reg box_features = nn.HybridSequential() for _ in range(2): box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01))) box_features.add(nn.Activation('relu')) train_patterns = '|'.join( ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv', 'P']) return get_faster_rcnn( name='fpn_resnet101_v1d', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, box_features=box_features, short=800, max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_fpn_syncbn_resnet101_v1d_coco(pretrained=False, pretrained_base=True, num_devices=0, **kwargs): r"""Faster RCNN model with FPN from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" "Lin, T., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S. (2016). Feature Pyramid Networks for Object Detection" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. num_devices : int, default is 0 Number of devices for sync batch norm layer. if less than 1, use all devices available. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_fpn_syncbn_resnet101_v1d_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnetv1b import resnet101_v1d from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {} base_network = resnet101_v1d(pretrained=pretrained_base, dilated=False, use_global_stats=False, norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs, **kwargs) sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {} features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu68_fwd', 'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base, norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs) top_features = None # 1 Conv 1 FC layer before RCNN cls and reg box_features = nn.HybridSequential() for _ in range(4): box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), SyncBatchNorm(**gluon_norm_kwargs), nn.Activation('relu')) box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) train_patterns = '(?!.*moving)' # excluding symbol bn moving mean and var return get_faster_rcnn( name='fpn_syncbn_resnet101_v1d', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, box_features=box_features, short=(640, 800), max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_fpn_syncbn_resnest101_coco(pretrained=False, pretrained_base=True, num_devices=0, **kwargs): r"""Faster R-CNN with ResNeSt ResNeSt: Split Attention Network" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. num_devices : int, default is 0 Number of devices for sync batch norm layer. if less than 1, use all devices available. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_fpn_syncbn_resnest101_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnest import resnest101 from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {} base_network = resnest101(pretrained=pretrained_base, dilated=False, use_global_stats=False, norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs) from gluoncv.nn.dropblock import set_drop_prob from functools import partial apply_drop_prob = partial(set_drop_prob, 0.0) base_network.apply(apply_drop_prob) sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {} features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu91_fwd', 'layers4_relu11_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base, norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs) top_features = None # 1 Conv 1 FC layer before RCNN cls and reg box_features = nn.HybridSequential() for _ in range(4): box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), SyncBatchNorm(**gluon_norm_kwargs), nn.Activation('relu')) box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) train_patterns = '(?!.*moving)' # excluding symbol bn moving mean and var return get_faster_rcnn( name='fpn_syncbn_resnest101', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, box_features=box_features, short=(640, 800), max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
[docs]def faster_rcnn_resnet101_v1d_custom(classes, transfer=None, pretrained_base=True, pretrained=False, **kwargs): r"""Faster RCNN model with resnet101_v1d base network on custom dataset. Parameters ---------- classes : iterable of str Names of custom foreground classes. `len(classes)` is the number of foreground classes. transfer : str or None If not `None`, will try to reuse pre-trained weights from faster RCNN networks trained on other datasets. pretrained_base : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Returns ------- mxnet.gluon.HybridBlock Hybrid faster RCNN network. """ if pretrained: warnings.warn("Custom models don't provide `pretrained` weights, ignored.") if transfer is None: from ....model_zoo.resnetv1b import resnet101_v1d base_network = resnet101_v1d(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn( name='resnet101_v1d', dataset='custom', pretrained=pretrained, features=features, top_features=top_features, classes=classes, train_patterns=train_patterns, **kwargs) else: from ....model_zoo import get_model net = get_model('faster_rcnn_resnet101_v1d_' + str(transfer), pretrained=True, **kwargs) reuse_classes = [x for x in classes if x in net.classes] net.reset_class(classes, reuse_weights=reuse_classes) return net
[docs]def faster_rcnn_fpn_syncbn_resnest269_coco(pretrained=False, pretrained_base=True, num_devices=0, **kwargs): r"""Faster R-CNN with ResNeSt ResNeSt: Split Attention Network" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. num_devices : int, default is 0 Number of devices for sync batch norm layer. if less than 1, use all devices available. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_fpn_syncbn_resnest269_coco(pretrained=True) >>> print(model) """ from ....model_zoo.resnest import resnest269 from ....data import COCODetection classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {} base_network = resnest269(pretrained=pretrained_base, dilated=False, use_global_stats=False, norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs) from gluoncv.nn.dropblock import set_drop_prob from functools import partial apply_drop_prob = partial(set_drop_prob, 0.0) base_network.apply(apply_drop_prob) sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {} features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu11_fwd', 'layers2_relu119_fwd', 'layers3_relu191_fwd', 'layers4_relu31_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base, norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs) top_features = None # 1 Conv 1 FC layer before RCNN cls and reg box_features = nn.HybridSequential() for _ in range(4): box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), SyncBatchNorm(**gluon_norm_kwargs), nn.Activation('relu')) box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) train_patterns = '(?!.*moving)' # excluding symbol bn moving mean and var return get_faster_rcnn( name='fpn_syncbn_resnest269', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, box_features=box_features, short=(640, 864), max_size=1440, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)