Source code for gluoncv.model_zoo.monodepthv2.monodepth2_posenet
"""Monodepth
Digging Into Self-Supervised Monocular Depth Estimation, ICCV 2019
https://arxiv.org/abs/1806.01260
"""
from mxnet.gluon import nn
from mxnet.context import cpu
from .resnet_encoder import ResnetEncoder
from .pose_decoder import PoseDecoder
[docs]class MonoDepth2PoseNet(nn.HybridBlock):
r"""Monodepth2
Parameters
----------
backbone : string
Pre-trained dilated backbone network type ('resnet18', 'resnet34', 'resnet50',
'resnet101' or 'resnet152').
pretrained_base : bool or str
Refers to if the backbone is pretrained or not. If `True`,
model weights of a model that was trained on ImageNet is loaded.
num_input_images : int
The number of input sequences. 1 for depth encoder, larger than 1 for pose encoder.
(Default: 2)
num_input_features : int
The number of input feature maps from posenet encoder. (Default: 1)
num_frames_to_predict_for: int
The number of output pose between frames; If None, it equals num_input_features - 1.
(Default: 2)
stride: int
The stride number for Conv in pose decoder. (Default: 1)
Reference:
Clement Godard, Oisin Mac Aodha, Michael Firman, Gabriel Brostow.
"Digging Into Self-Supervised Monocular Depth Estimation." ICCV, 2019
Examples
--------
>>> model = MonoDepth2PoseNet(backbone='resnet18', pretrained_base=True)
>>> print(model)
"""
# pylint: disable=unused-argument
def __init__(self, backbone, pretrained_base, num_input_images=2, num_input_features=1,
num_frames_to_predict_for=2, stride=1, ctx=cpu(), **kwargs):
super(MonoDepth2PoseNet, self).__init__()
with self.name_scope():
self.encoder = ResnetEncoder(backbone, pretrained_base,
num_input_images=num_input_images, ctx=ctx)
if not pretrained_base:
self.encoder.initialize(ctx=ctx)
self.decoder = PoseDecoder(self.encoder.num_ch_enc,
num_input_features=num_input_features,
num_frames_to_predict_for=num_frames_to_predict_for,
stride=stride)
self.decoder.initialize(ctx=ctx)
[docs] def hybrid_forward(self, F, x):
# pylint: disable=unused-argument
features = [self.encoder(x)]
axisangle, translation = self.decoder(features)
return axisangle, translation
def demo(self, x):
return self.predict(x)
def predict(self, x):
features = [self.encoder.predict(x)]
axisangle, translation = self.decoder.predict(features)
return axisangle, translation
[docs]def get_monodepth2posenet(backbone='resnet18', pretrained_base=True, num_input_images=2,
num_input_features=1, num_frames_to_predict_for=2, stride=1,
root='~/.mxnet/models', ctx=cpu(0), pretrained=False,
pretrained_model='kitti_stereo_640x192', **kwargs):
r"""Monodepth2
Parameters
----------
backbone : string
Pre-trained dilated backbone network type ('resnet18', 'resnet34', 'resnet50',
'resnet101' or 'resnet152').
pretrained_base : bool or str
Refers to if the backbone is pretrained or not. If `True`,
model weights of a model that was trained on ImageNet is loaded.
num_input_images : int
The number of input sequences. 1 for depth encoder, larger than 1 for pose encoder.
(Default: 2)
num_input_features : int
The number of input feature maps from posenet encoder. (Default: 1)
num_frames_to_predict_for: int
The number of output pose between frames; If None, it equals num_input_features - 1.
(Default: 2)
stride: int
The stride number for Conv in pose decoder. (Default: 1)
ctx : Context, default: CPU
The context in which to load the pretrained weights.
root : str, default: '~/.mxnet/models'
Location for keeping the model parameters.
pretrained : bool or str, default: False
Boolean value controls whether to load the default pretrained weights for model.
String value represents the hashtag for a certain version of pretrained weights.
pretrained_model : string, default: kitti_stereo_640x192
The dataset that model pretrained on.
"""
model = MonoDepth2PoseNet(
backbone=backbone, pretrained_base=pretrained_base,
num_input_images=num_input_images, num_input_features=num_input_features,
num_frames_to_predict_for=num_frames_to_predict_for, stride=stride,
ctx=ctx, **kwargs)
if pretrained:
from ...model_zoo.model_store import get_model_file
model.load_parameters(
get_model_file('monodepth2_%s_%s' % (backbone, pretrained_model),
tag=pretrained, root=root),
ctx=ctx
)
return model
[docs]def get_monodepth2_resnet18_posenet_kitti_mono_640x192(**kwargs):
r"""Monodepth2 PoseNet
Parameters
----------
backbone : string
Pre-trained dilated backbone network type (default:'resnet18').
"""
return get_monodepth2posenet(backbone='resnet18',
pretrained_model='posenet_kitti_mono_640x192', **kwargs)
[docs]def get_monodepth2_resnet18_posenet_kitti_mono_stereo_640x192(**kwargs):
r"""Monodepth2 PoseNet
Parameters
----------
backbone : string
Pre-trained dilated backbone network type (default:'resnet18').
"""
return get_monodepth2posenet(backbone='resnet18',
pretrained_model='posenet_kitti_mono_stereo_640x192', **kwargs)