Source code for gluoncv.model_zoo.resnest

"""ResNeSt implemented in Gluon."""
# pylint: disable=arguments-differ,unused-argument,missing-docstring,line-too-long
from __future__ import division

import math

from mxnet.context import cpu
from mxnet.gluon import nn
from mxnet.gluon.block import HybridBlock
from mxnet.gluon.nn import BatchNorm

from ..nn.dropblock import DropBlock
from ..nn.splat import SplitAttentionConv

__all__ = ['ResNeSt', 'Bottleneck', 'resnest14', 'resnest26', 'resnest50', 'resnest101',
           'resnest200', 'resnest269']


def _update_input_size(input_size, stride):
    sh, sw = (stride, stride) if isinstance(stride, int) else stride
    ih, iw = (input_size, input_size) if isinstance(input_size, int) else input_size
    oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
    input_size = (oh, ow)
    return input_size


[docs]class Bottleneck(HybridBlock): """ResNeSt Bottleneck """ # pylint: disable=unused-argument expansion = 4 def __init__(self, channels, cardinality=1, bottleneck_width=64, strides=1, dilation=1, downsample=None, previous_dilation=1, norm_layer=None, norm_kwargs=None, last_gamma=False, dropblock_prob=0, input_size=None, use_splat=False, radix=2, avd=False, avd_first=False, in_channels=None, split_drop_ratio=0, **kwargs): super(Bottleneck, self).__init__() group_width = int(channels * (bottleneck_width / 64.)) * cardinality norm_kwargs = norm_kwargs if norm_kwargs is not None else {} self.dropblock_prob = dropblock_prob self.use_splat = use_splat self.avd = avd and (strides > 1 or previous_dilation != dilation) self.avd_first = avd_first if self.dropblock_prob > 0: self.dropblock1 = DropBlock(dropblock_prob, 3, group_width, *input_size) if self.avd: if avd_first: input_size = _update_input_size(input_size, strides) self.dropblock2 = DropBlock(dropblock_prob, 3, group_width, *input_size) if not avd_first: input_size = _update_input_size(input_size, strides) else: input_size = _update_input_size(input_size, strides) self.dropblock2 = DropBlock(dropblock_prob, 3, group_width, *input_size) self.dropblock3 = DropBlock(dropblock_prob, 3, channels * 4, *input_size) self.conv1 = nn.Conv2D(channels=group_width, kernel_size=1, use_bias=False, in_channels=in_channels) self.bn1 = norm_layer(in_channels=group_width, **norm_kwargs) self.relu1 = nn.Activation('relu') if self.use_splat: self.conv2 = SplitAttentionConv(channels=group_width, kernel_size=3, strides=1 if self.avd else strides, padding=dilation, dilation=dilation, groups=cardinality, use_bias=False, in_channels=group_width, norm_layer=norm_layer, norm_kwargs=norm_kwargs, radix=radix, drop_ratio=split_drop_ratio, **kwargs) else: self.conv2 = nn.Conv2D(channels=group_width, kernel_size=3, strides=1 if self.avd else strides, padding=dilation, dilation=dilation, groups=cardinality, use_bias=False, in_channels=group_width, **kwargs) self.bn2 = norm_layer(in_channels=group_width, **norm_kwargs) self.relu2 = nn.Activation('relu') self.conv3 = nn.Conv2D(channels=channels * 4, kernel_size=1, use_bias=False, in_channels=group_width) if not last_gamma: self.bn3 = norm_layer(in_channels=channels * 4, **norm_kwargs) else: self.bn3 = norm_layer(in_channels=channels * 4, gamma_initializer='zeros', **norm_kwargs) if self.avd: self.avd_layer = nn.AvgPool2D(3, strides, padding=1) self.relu3 = nn.Activation('relu') self.downsample = downsample self.dilation = dilation self.strides = strides
[docs] def hybrid_forward(self, F, x): residual = x out = self.conv1(x) out = self.bn1(out) if self.dropblock_prob > 0: out = self.dropblock1(out) out = self.relu1(out) if self.avd and self.avd_first: out = self.avd_layer(out) if self.use_splat: out = self.conv2(out) if self.dropblock_prob > 0: out = self.dropblock2(out) else: out = self.conv2(out) out = self.bn2(out) if self.dropblock_prob > 0: out = self.dropblock2(out) out = self.relu2(out) if self.avd and not self.avd_first: out = self.avd_layer(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) if self.dropblock_prob > 0: out = self.dropblock3(out) out = out + residual out = self.relu3(out) return out
[docs]class ResNeSt(HybridBlock): """ ResNeSt Model Parameters ---------- block : Block Class for the residual block. Options are BasicBlockV1, BottleneckV1. layers : list of int Numbers of layers in each block classes : int, default 1000 Number of classification classes. dilated : bool, default False Applying dilation strategy to pretrained ResNet yielding a stride-8 model, typically used in Semantic Segmentation. norm_layer : object Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`) Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`. last_gamma : bool, default False Whether to initialize the gamma of the last BatchNorm layer in each bottleneck to zero. deep_stem : bool, default False Whether to replace the 7x7 conv1 with 3 3x3 convolution layers. avg_down : bool, default False Whether to use average pooling for projection skip connection between stages/downsample. final_drop : float, default 0.0 Dropout ratio before the final classification layer. use_global_stats : bool, default False Whether forcing BatchNorm to use global statistics instead of minibatch statistics; optionally set to True if finetuning using ImageNet classification pretrained models. Reference: - He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016. - Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions." """ # pylint: disable=unused-variable def __init__(self, block, layers, cardinality=1, bottleneck_width=64, classes=1000, dilated=False, dilation=1, norm_layer=BatchNorm, norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, name_prefix='', dropblock_prob=0, input_size=224, use_splat=False, radix=2, avd=False, avd_first=False, split_drop_ratio=0): self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.inplanes = stem_width * 2 if deep_stem else 64 self.radix = radix self.split_drop_ratio = split_drop_ratio self.avd_first = avd_first super(ResNeSt, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs with self.name_scope(): if not deep_stem: self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False, in_channels=3) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add(nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False, in_channels=3)) self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add(nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) self.conv1.add(norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add(nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False, in_channels=stem_width)) input_size = _update_input_size(input_size, 2) self.bn1 = norm_layer(in_channels=64 if not deep_stem else stem_width * 2, **norm_kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) input_size = _update_input_size(input_size, 2) self.layer1 = self._make_layer(1, block, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) self.layer2 = self._make_layer(2, block, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) if dilated or dilation == 4: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=4, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation == 3: # special self.layer3 = self._make_layer(3, block, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, dilation=2, pre_dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) elif dilation == 2: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) else: self.layer3 = self._make_layer(3, block, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.layer4 = self._make_layer(4, block, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd) input_size = _update_input_size(input_size, 2) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) self.fc = nn.Dense(in_units=512 * block.expansion, units=classes) def _make_layer(self, stage_index, block, planes, blocks, strides=1, dilation=1, pre_dilation=1, avg_down=False, norm_layer=None, last_gamma=False, dropblock_prob=0, input_size=224, use_splat=False, avd=False): downsample = None if strides != 1 or self.inplanes != planes * block.expansion: downsample = nn.HybridSequential(prefix='down%d_' % stage_index) with downsample.name_scope(): if avg_down: if pre_dilation == 1: downsample.add(nn.AvgPool2D(pool_size=strides, strides=strides, ceil_mode=True, count_include_pad=False)) elif strides == 1: downsample.add(nn.AvgPool2D(pool_size=1, strides=1, ceil_mode=True, count_include_pad=False)) else: downsample.add( nn.AvgPool2D(pool_size=pre_dilation * strides, strides=strides, padding=1, ceil_mode=True, count_include_pad=False)) downsample.add(nn.Conv2D(channels=planes * block.expansion, kernel_size=1, strides=1, use_bias=False, in_channels=self.inplanes)) downsample.add(norm_layer(in_channels=planes * block.expansion, **self.norm_kwargs)) else: downsample.add(nn.Conv2D(channels=planes * block.expansion, kernel_size=1, strides=strides, use_bias=False, in_channels=self.inplanes)) downsample.add(norm_layer(in_channels=planes * block.expansion, **self.norm_kwargs)) layers = nn.HybridSequential(prefix='layers%d_' % stage_index) with layers.name_scope(): if dilation in (1, 2): layers.add(block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, strides=strides, dilation=pre_dilation, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) elif dilation == 4: layers.add(block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, strides=strides, dilation=pre_dilation, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) else: raise RuntimeError("=> unknown dilation size: {}".format(dilation)) input_size = _update_input_size(input_size, strides) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.add(block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, dilation=dilation, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) return layers
[docs] def hybrid_forward(self, F, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = self.flat(x) if self.drop is not None: x = self.drop(x) x = self.fc(x) return x
[docs]def resnest14(pretrained=False, root='~/.mxnet/models', ctx=cpu(0), **kwargs): """Constructs a ResNeSt-14 model. Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. ctx : Context, default CPU The context in which to load the pretrained weights. dilated: bool, default False Whether to apply dilation strategy to ResNeSt, yielding a stride 8 model. norm_layer : object Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`). Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`. """ model = ResNeSt(Bottleneck, [1, 1, 1, 1], radix=2, cardinality=1, bottleneck_width=64, deep_stem=True, avg_down=True, avd=True, avd_first=False, use_splat=True, dropblock_prob=0.0, name_prefix='resnest_', **kwargs) if pretrained: from .model_store import get_model_file model.load_parameters(get_model_file('resnest14', root=root), ctx=ctx) from ..data import ImageNet1kAttr attrib = ImageNet1kAttr() model.synset = attrib.synset model.classes = attrib.classes model.classes_long = attrib.classes_long return model
[docs]def resnest26(pretrained=False, root='~/.mxnet/models', ctx=cpu(0), **kwargs): """Constructs a ResNeSt-26 model. Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. ctx : Context, default CPU The context in which to load the pretrained weights. dilated: bool, default False Whether to apply dilation strategy to ResNeSt, yielding a stride 8 model. norm_layer : object Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`). Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`. """ model = ResNeSt(Bottleneck, [2, 2, 2, 2], radix=2, cardinality=1, bottleneck_width=64, deep_stem=True, avg_down=True, avd=True, avd_first=False, use_splat=True, dropblock_prob=0.1, name_prefix='resnest_', **kwargs) if pretrained: from .model_store import get_model_file model.load_parameters(get_model_file('resnest26', root=root), ctx=ctx) from ..data import ImageNet1kAttr attrib = ImageNet1kAttr() model.synset = attrib.synset model.classes = attrib.classes model.classes_long = attrib.classes_long return model
[docs]def resnest50(pretrained=False, root='~/.mxnet/models', ctx=cpu(0), **kwargs): """Constructs a ResNeSt-50 model. Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. ctx : Context, default CPU The context in which to load the pretrained weights. dilated: bool, default False Whether to apply dilation strategy to ResNeSt, yielding a stride 8 model. norm_layer : object Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`). Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`. """ model = ResNeSt(Bottleneck, [3, 4, 6, 3], radix=2, cardinality=1, bottleneck_width=64, deep_stem=True, avg_down=True, avd=True, avd_first=False, use_splat=True, dropblock_prob=0.1, name_prefix='resnest_', **kwargs) if pretrained: from .model_store import get_model_file model.load_parameters(get_model_file('resnest50', root=root), ctx=ctx) from ..data import ImageNet1kAttr attrib = ImageNet1kAttr() model.synset = attrib.synset model.classes = attrib.classes model.classes_long = attrib.classes_long return model
[docs]def resnest101(pretrained=False, root='~/.mxnet/models', ctx=cpu(0), **kwargs): """Constructs a ResNeSt-101 model. Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. ctx : Context, default CPU The context in which to load the pretrained weights. dilated: bool, default False Whether to apply dilation strategy to ResNeSt, yielding a stride 8 model. norm_layer : object Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`). Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`. """ model = ResNeSt(Bottleneck, [3, 4, 23, 3], radix=2, cardinality=1, bottleneck_width=64, deep_stem=True, avg_down=True, stem_width=64, avd=True, avd_first=False, use_splat=True, dropblock_prob=0.1, name_prefix='resnest_', **kwargs) if pretrained: from .model_store import get_model_file model.load_parameters(get_model_file('resnest101', root=root), ctx=ctx) from ..data import ImageNet1kAttr attrib = ImageNet1kAttr() model.synset = attrib.synset model.classes = attrib.classes model.classes_long = attrib.classes_long return model
[docs]def resnest200(pretrained=False, root='~/.mxnet/models', ctx=cpu(0), **kwargs): """Constructs a ResNeSt-200 model. Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. ctx : Context, default CPU The context in which to load the pretrained weights. dilated: bool, default False Whether to apply dilation strategy to ResNeSt, yielding a stride 8 model. norm_layer : object Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`). Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`. """ model = ResNeSt(Bottleneck, [3, 24, 36, 3], deep_stem=True, avg_down=True, stem_width=64, avd=True, use_splat=True, dropblock_prob=0.1, final_drop=0.2, name_prefix='resnest_', **kwargs) if pretrained: from .model_store import get_model_file model.load_parameters(get_model_file('resnest200', root=root), ctx=ctx) from ..data import ImageNet1kAttr attrib = ImageNet1kAttr() model.synset = attrib.synset model.classes = attrib.classes model.classes_long = attrib.classes_long return model
[docs]def resnest269(pretrained=False, root='~/.mxnet/models', ctx=cpu(0), **kwargs): """Constructs a ResNeSt-269 model. Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. ctx : Context, default CPU The context in which to load the pretrained weights. dilated: bool, default False Whether to apply dilation strategy to ResNeSt, yielding a stride 8 model. norm_layer : object Normalization layer used (default: :class:`mxnet.gluon.nn.BatchNorm`). Can be :class:`mxnet.gluon.nn.BatchNorm` or :class:`mxnet.gluon.contrib.nn.SyncBatchNorm`. """ model = ResNeSt(Bottleneck, [3, 30, 48, 8], deep_stem=True, avg_down=True, stem_width=64, avd=True, use_splat=True, dropblock_prob=0.1, final_drop=0.2, name_prefix='resnest_', **kwargs) if pretrained: from .model_store import get_model_file model.load_parameters(get_model_file('resnest269', root=root), ctx=ctx) from ..data import ImageNet1kAttr attrib = ImageNet1kAttr() model.synset = attrib.synset model.classes = attrib.classes model.classes_long = attrib.classes_long return model