Source code for gluoncv.model_zoo.monodepthv2.monodepth2

Digging Into Self-Supervised Monocular Depth Estimation, ICCV 2019
from mxnet.gluon import nn
from mxnet.context import cpu

from .resnet_encoder import ResnetEncoder
from .depth_decoder import DepthDecoder

[docs]class MonoDepth2(nn.HybridBlock): r"""Monodepth2 Parameters ---------- backbone : string Pre-trained dilated backbone network type ('resnet18', 'resnet34', 'resnet50', 'resnet101' or 'resnet152'). pretrained_base : bool or str Refers to if the backbone is pretrained or not. If `True`, model weights of a model that was trained on ImageNet is loaded. num_input_images : int The number of input sequences. 1 for depth encoder, larger than 1 for pose encoder. (Default: 1) scales: list The scales used in the loss. (Default: range(4)) num_output_channels: int The number of output channels. (Default: 1) use_skips: bool This will use skip architecture in the network. (Default: True) Reference: Clement Godard, Oisin Mac Aodha, Michael Firman, Gabriel Brostow. "Digging Into Self-Supervised Monocular Depth Estimation." ICCV, 2019 Examples -------- >>> model = MonoDepth2(backbone='resnet18', pretrained_base=True) >>> print(model) """ # pylint: disable=unused-argument def __init__(self, backbone, pretrained_base, num_input_images=1, scales=range(4), num_output_channels=1, use_skips=True, ctx=cpu(), **kwargs): super(MonoDepth2, self).__init__() with self.name_scope(): self.encoder = ResnetEncoder(backbone, pretrained_base, num_input_images=num_input_images, ctx=ctx) if not pretrained_base: self.encoder.initialize(ctx=ctx) self.decoder = DepthDecoder(self.encoder.num_ch_enc, scales, num_output_channels, use_skips) self.decoder.initialize(ctx=ctx)
[docs] def hybrid_forward(self, F, x): # pylint: disable=unused-argument features = self.encoder(x) outputs = self.decoder(features) return outputs
def demo(self, x): return self.predict(x) def predict(self, x): features = self.encoder.predict(x) outputs = self.decoder.predict(features) return outputs
[docs]def get_monodepth2(backbone='resnet18', pretrained_base=True, scales=range(4), num_output_channels=1, use_skips=True, root='~/.mxnet/models', ctx=cpu(0), pretrained=False, pretrained_model='kitti_stereo_640x192', **kwargs): r"""MonoDepth2 Parameters ---------- backbone : string, default:'resnet18' Pre-trained dilated backbone network type ('resnet18', 'resnet34', 'resnet50', 'resnet101' or 'resnet152'). pretrained_base : bool or str, default: True This will load pretrained backbone network, that was trained on ImageNet. scales: list, default: range(4) The scales used in the loss. num_output_channels: int, default: 1 The number of output channels. use_skips: bool, default: True This will use skip architecture in the network. ctx : Context, default: CPU The context in which to load the pretrained weights. root : str, default: '~/.mxnet/models' Location for keeping the model parameters. pretrained : bool or str, default: False Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_model : string, default: kitti_stereo_640x192 The dataset that model pretrained on. """ model = MonoDepth2(backbone=backbone, pretrained_base=pretrained_base, scales=scales, num_output_channels=num_output_channels, use_skips=use_skips, ctx=ctx, **kwargs) if pretrained: from ...model_zoo.model_store import get_model_file model.load_parameters( get_model_file('monodepth2_%s_%s' % (backbone, pretrained_model), tag=pretrained, root=root), ctx=ctx ) return model
[docs]def get_monodepth2_resnet18_kitti_stereo_640x192(**kwargs): r"""Monodepth2 Parameters ---------- backbone : string Pre-trained dilated backbone network type (default:'resnet18'). """ return get_monodepth2(backbone='resnet18', pretrained_model='kitti_stereo_640x192', **kwargs)
[docs]def get_monodepth2_resnet18_kitti_mono_640x192(**kwargs): r"""Monodepth2 Parameters ---------- backbone : string Pre-trained dilated backbone network type (default:'resnet18'). """ return get_monodepth2(backbone='resnet18', pretrained_model='kitti_mono_640x192', **kwargs)
[docs]def get_monodepth2_resnet18_kitti_mono_stereo_640x192(**kwargs): r"""Monodepth2 Parameters ---------- backbone : string Pre-trained dilated backbone network type (default:'resnet18'). """ return get_monodepth2(backbone='resnet18', pretrained_model='kitti_mono_stereo_640x192', **kwargs)