Source code for gluoncv.model_zoo.fastscnn

# pylint: disable=unused-argument, arguments-differ, unused-variable
"""Fast-SCNN, implemented in Gluon. Code adapted from lxtGH/Fast_Seg"""
__all__ = ['FastSCNN', 'get_fastscnn', 'get_fastscnn_citys']

from mxnet.context import cpu
from mxnet.gluon.block import HybridBlock
from mxnet.gluon import nn


[docs]class FastSCNN(HybridBlock): r"""Fast-SCNN: Fast Semantic Segmentation Network Parameters ---------- nclass : int Number of categories for the training dataset. norm_layer : object Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`). aux : bool Auxiliary loss. Reference: Rudra P K Poudel, et al. https://bmvc2019.org/wp-content/uploads/papers/0959-paper.pdf "Fast-SCNN: Fast Semantic Segmentation Network." *BMVC*, 2019 """ def __init__(self, nclass, aux=True, ctx=cpu(), pretrained_base=False, height=None, width=None, base_size=2048, crop_size=1024, **kwargs): super(FastSCNN, self).__init__() height = height if height is not None else crop_size width = width if width is not None else crop_size self._up_kwargs = {'height': height, 'width': width} self.base_size = base_size self.crop_size = crop_size self.aux = aux with self.name_scope(): self.learning_to_downsample = LearningToDownsample(32, 48, 64, **kwargs) self.learning_to_downsample.initialize(ctx=ctx) self.global_feature_extractor = GlobalFeatureExtractor( 64, [64, 96, 128], 128, 6, [3, 3, 3], height=height//32, width=width//32, **kwargs) self.global_feature_extractor.initialize(ctx=ctx) self.feature_fusion = FeatureFusionModule(64, 128, 128, height=height//8, width=width//8, **kwargs) self.feature_fusion.initialize(ctx=ctx) self.classifier = Classifer(128, nclass, **kwargs) self.classifier.initialize(ctx=ctx) if self.aux: self.auxlayer = _auxHead(in_channels=64, channels=64, nclass=nclass, **kwargs) self.auxlayer.initialize(ctx=ctx) self.auxlayer.collect_params().setattr('lr_mult', 10)
[docs] def hybrid_forward(self, F, x): """hybrid forward for Fast SCNN""" higher_res_features = self.learning_to_downsample(x) x = self.global_feature_extractor(higher_res_features) x = self.feature_fusion(higher_res_features, x) x = self.classifier(x) x = F.contrib.BilinearResize2D(x, **self._up_kwargs) outputs = [] outputs.append(x) if self.aux: auxout = self.auxlayer(higher_res_features) auxout = F.contrib.BilinearResize2D(auxout, **self._up_kwargs) outputs.append(auxout) return tuple(outputs)
[docs] def demo(self, x): """fastscnn demo""" h, w = x.shape[2:] self._up_kwargs['height'] = h self._up_kwargs['width'] = w self.global_feature_extractor.ppm._up_kwargs = {'height': h // 32, 'width': w // 32} self.feature_fusion._up_kwargs = {'height': h // 8, 'width': w // 8} higher_res_features = self.learning_to_downsample(x) x = self.global_feature_extractor(higher_res_features) x = self.feature_fusion(higher_res_features, x) x = self.classifier(x) import mxnet.ndarray as F x = F.contrib.BilinearResize2D(x, **self._up_kwargs) return x
[docs] def predict(self, x): """fastscnn predict""" return self.demo(x)
[docs] def evaluate(self, x): """evaluating network with inputs and targets""" return self.forward(x)[0]
class FeatureFusionModule(HybridBlock): """FastSCNN feature fusion module""" def __init__(self, highter_in_channels, lower_in_channels, out_channels, height, width, scale_factor=4, norm_layer=nn.BatchNorm, norm_kwargs=None, **kwargs): super(FeatureFusionModule, self).__init__() self.scale_factor = scale_factor self._up_kwargs = {'height': height, 'width': width} with self.name_scope(): self.dwconv = _DWConv(lower_in_channels, out_channels, norm_layer=norm_layer, norm_kwargs=norm_kwargs) self.conv_lower_res = nn.Conv2D(in_channels=out_channels, channels=out_channels, kernel_size=1) self.conv_higher_res = nn.Conv2D(in_channels=highter_in_channels, channels=out_channels, kernel_size=1) self.bn = norm_layer(in_channels=out_channels) self.bn_high = norm_layer(in_channels=out_channels) self.relu = nn.Activation('relu') def hybrid_forward(self, F, higher_res_feature, lower_res_feature): lower_res_feature = F.contrib.BilinearResize2D(lower_res_feature, **self._up_kwargs) lower_res_feature = self.dwconv(lower_res_feature) lower_res_feature = self.bn(self.conv_lower_res(lower_res_feature)) higher_res_feature = self.bn_high(self.conv_higher_res(higher_res_feature)) out = higher_res_feature + lower_res_feature return self.relu(out) class Classifer(HybridBlock): """FastSCNN classifier""" def __init__(self, dw_channels, num_classes, stride=1, norm_layer=nn.BatchNorm, norm_kwargs=None, **kwargs): super(Classifer, self).__init__() with self.name_scope(): self.dsconv1 = _DSConv(dw_channels, dw_channels, stride, norm_layer=norm_layer, norm_kwargs=norm_kwargs) self.dsconv2 = _DSConv(dw_channels, dw_channels, stride, norm_layer=norm_layer, norm_kwargs=norm_kwargs) self.dp = nn.Dropout(0.1) self.conv = nn.Conv2D(in_channels=dw_channels, channels=num_classes, kernel_size=1) def hybrid_forward(self, F, x): x = self.dsconv1(x) x = self.dsconv2(x) x = self.dp(x) x = self.conv(x) return x class _auxHead(HybridBlock): # pylint: disable=redefined-outer-name def __init__(self, in_channels=64, channels=64, nclass=19, norm_layer=nn.BatchNorm, norm_kwargs=None, **kwargs): super(_auxHead, self).__init__() with self.name_scope(): self.block = nn.HybridSequential() with self.block.name_scope(): self.block.add(nn.Conv2D(in_channels=in_channels, channels=channels, kernel_size=3, padding=1, use_bias=False)) self.block.add(norm_layer(in_channels=channels, **({} if norm_kwargs is None else norm_kwargs))) self.block.add(nn.Activation('relu')) self.block.add(nn.Dropout(0.1)) self.block.add(nn.Conv2D(in_channels=channels, channels=nclass, kernel_size=1)) def hybrid_forward(self, F, x): return self.block(x) class _ConvBNReLU(HybridBlock): """Conv-BN-ReLU""" def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, norm_layer=nn.BatchNorm, norm_kwargs=None, **kwargs): super(_ConvBNReLU, self).__init__() with self.name_scope(): self.block = nn.HybridSequential() self.block.add(nn.Conv2D(in_channels=in_channels, channels=out_channels, kernel_size=kernel_size, padding=padding, strides=stride, use_bias=False)) self.block.add(norm_layer(in_channels=out_channels, **({} if norm_kwargs is None else norm_kwargs))) self.block.add(nn.Activation('relu')) def hybrid_forward(self, F, x): return self.block(x) class _DSConv(HybridBlock): """Depthwise Separable Convolutions""" def __init__(self, dw_channels, out_channels, stride=1, norm_layer=nn.BatchNorm, norm_kwargs=None, **kwargs): super(_DSConv, self).__init__() with self.name_scope(): self.conv = nn.HybridSequential() self.conv.add(nn.Conv2D(in_channels=dw_channels, channels=dw_channels, kernel_size=3, strides=stride, padding=1, groups=dw_channels, use_bias=False)) self.conv.add(norm_layer(in_channels=dw_channels, **({} if norm_kwargs is None else norm_kwargs))) self.conv.add(nn.Activation('relu')) self.conv.add(nn.Conv2D(in_channels=dw_channels, channels=out_channels, kernel_size=1, use_bias=False)) self.conv.add(norm_layer(in_channels=out_channels, **({} if norm_kwargs is None else norm_kwargs))) self.conv.add(nn.Activation('relu')) def hybrid_forward(self, F, x): return self.conv(x) class LearningToDownsample(HybridBlock): """Learning to downsample module""" def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64, norm_layer=nn.BatchNorm, norm_kwargs=None, **kwargs): super(LearningToDownsample, self).__init__() with self.name_scope(): self.conv = _ConvBNReLU(3, dw_channels1, 3, 2, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs) self.dsconv1 = _DSConv(dw_channels1, dw_channels2, 2, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs) self.dsconv2 = _DSConv(dw_channels2, out_channels, 2, norm_layer=norm_layer, norm_kwargs=norm_kwargs, **kwargs) def hybrid_forward(self, F, x): x = self.conv(x) x = self.dsconv1(x) x = self.dsconv2(x) return x class GlobalFeatureExtractor(HybridBlock): """Global feature extractor module""" def __init__(self, in_channels=64, block_channels=(64, 96, 128), out_channels=128, t=6, num_blocks=(3, 3, 3), height=32, width=64, norm_layer=nn.BatchNorm, norm_kwargs=None, **kwargs): super(GlobalFeatureExtractor, self).__init__() self.num_blocks = num_blocks with self.name_scope(): self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], t, 2, norm_layer, norm_kwargs) self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], t, 2, norm_layer, norm_kwargs) self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], t, 1, norm_layer, norm_kwargs) self.ppm = _FastPyramidPooling(block_channels[2], out_channels, height=height, width=width, norm_layer=norm_layer, norm_kwargs=norm_kwargs) def _make_layer(self, block, inplanes, planes, blocks, t=6, stride=1, norm_layer=nn.BatchNorm, norm_kwargs=None): layers = nn.HybridSequential() with layers.name_scope(): layers.add(block(inplanes, planes, t, stride, norm_layer, norm_kwargs)) for i in range(1, blocks): layers.add(block(planes, planes, t, 1, norm_layer, norm_kwargs)) return layers def hybrid_forward(self, F, x): x = self.bottleneck1(x) x = self.bottleneck2(x) x = self.bottleneck3(x) x = self.ppm(x) return x class LinearBottleneck(HybridBlock): """LinearBottleneck used in MobileNetV2""" def __init__(self, in_channels, out_channels, t=6, stride=2, norm_layer=nn.BatchNorm, norm_kwargs=None): super(LinearBottleneck, self).__init__() self.use_shortcut = stride == 1 and in_channels == out_channels with self.name_scope(): self.block = nn.HybridSequential() self.block.add(_ConvBNReLU(in_channels, in_channels * t, 1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.block.add(_DWConv(in_channels * t, in_channels * t, stride, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) self.block.add(nn.Conv2D(in_channels=in_channels * t, channels=out_channels, kernel_size=1, use_bias=False)) self.block.add(norm_layer(in_channels=out_channels, **({} if norm_kwargs is None else norm_kwargs))) def hybrid_forward(self, F, x): out = self.block(x) if self.use_shortcut: out = x + out return out class _DWConv(HybridBlock): def __init__(self, dw_channels, out_channels, stride=1, norm_layer=nn.BatchNorm, norm_kwargs=None): super(_DWConv, self).__init__() with self.name_scope(): self.conv = nn.HybridSequential() self.conv.add(nn.Conv2D(in_channels=dw_channels, channels=out_channels, kernel_size=3, strides=stride, padding=1, groups=dw_channels, use_bias=False)) self.conv.add(norm_layer(in_channels=out_channels, **({} if norm_kwargs is None else norm_kwargs))) self.conv.add(nn.Activation('relu')) def hybrid_forward(self, F, x): return self.conv(x) def _PSP1x1Conv(in_channels, out_channels, norm_layer, norm_kwargs): block = nn.HybridSequential() with block.name_scope(): block.add(nn.Conv2D(in_channels=in_channels, channels=out_channels, kernel_size=1, use_bias=False)) block.add(norm_layer(in_channels=out_channels, **({} if norm_kwargs is None else norm_kwargs))) block.add(nn.Activation('relu')) return block class _FastPyramidPooling(HybridBlock): def __init__(self, in_channels, ppm_out_channels, height=32, width=64, norm_layer=nn.BatchNorm, norm_kwargs=None): super(_FastPyramidPooling, self).__init__() out_channels = int(in_channels/4) self._up_kwargs = {'height': height, 'width': width} with self.name_scope(): self.conv1 = _PSP1x1Conv(in_channels, out_channels, norm_layer, norm_kwargs) self.conv2 = _PSP1x1Conv(in_channels, out_channels, norm_layer, norm_kwargs) self.conv3 = _PSP1x1Conv(in_channels, out_channels, norm_layer, norm_kwargs) self.conv4 = _PSP1x1Conv(in_channels, out_channels, norm_layer, norm_kwargs) self.out = _ConvBNReLU(in_channels * 2, ppm_out_channels, 1, norm_layer=norm_layer, norm_kwargs=norm_kwargs) def pool(self, F, x, size): return F.contrib.AdaptiveAvgPooling2D(x, output_size=size) def upsample(self, F, x): return F.contrib.BilinearResize2D(x, **self._up_kwargs) def hybrid_forward(self, F, x): feat1 = self.upsample(F, self.conv1(self.pool(F, x, 1))) feat2 = self.upsample(F, self.conv2(self.pool(F, x, 2))) feat3 = self.upsample(F, self.conv3(self.pool(F, x, 3))) feat4 = self.upsample(F, self.conv4(self.pool(F, x, 6))) x = F.concat(x, feat1, feat2, feat3, feat4, dim=1) x = self.out(x) return x def demo(self, x): """PyramidPooling for Fast SCNN """ self._up_kwargs['height'] = x.shape[2] self._up_kwargs['width'] = x.shape[3] import mxnet.ndarray as F feat1 = self.upsample(F, self.conv1(self.pool(F, x, 1))) feat2 = self.upsample(F, self.conv2(self.pool(F, x, 2))) feat3 = self.upsample(F, self.conv3(self.pool(F, x, 3))) feat4 = self.upsample(F, self.conv4(self.pool(F, x, 6))) x = F.concat(x, feat1, feat2, feat3, feat4, dim=1) x = self.out(x) return x
[docs]def get_fastscnn(dataset='citys', ctx=cpu(0), pretrained=False, root='~/.mxnet/models', **kwargs): r"""Fast-SCNN: Fast Semantic Segmentation Network Parameters ---------- dataset : str, default cityscapes ctx : Context, default CPU The context in which to load the pretrained weights. pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_fastscnn(dataset='citys') >>> print(model) """ acronyms = { 'citys': 'citys', } from ..data import datasets model = FastSCNN(datasets[dataset].NUM_CLASS, ctx=ctx, **kwargs) model.classes = datasets[dataset].classes if pretrained: from .model_store import get_model_file model.load_parameters(get_model_file('fastscnn_%s' % (acronyms[dataset]), tag=pretrained, root=root), ctx=ctx) return model
[docs]def get_fastscnn_citys(**kwargs): r"""Fast-SCNN: Fast Semantic Segmentation Network Parameters ---------- dataset : str, default cityscapes ctx : Context, default CPU The context in which to load the pretrained weights. Examples -------- >>> model = get_fastscnn_citys() >>> print(model) """ return get_fastscnn('citys', **kwargs)