Source code for gluoncv.data.transforms.bbox
"""Bounding boxes transformation functions."""
from __future__ import division
import numpy as np
__all__ = ['crop', 'flip', 'resize', 'translate', 'affine_transform', 'get_affine_transform']
[docs]def crop(bbox, crop_box=None, allow_outside_center=True):
"""Crop bounding boxes according to slice area.
This method is mainly used with image cropping to ensure bonding boxes fit
within the cropped image.
Parameters
----------
bbox : numpy.ndarray
Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
The second axis represents attributes of the bounding box.
Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
we allow additional attributes other than coordinates, which stay intact
during bounding box transformations.
crop_box : tuple
Tuple of length 4. :math:`(x_{min}, y_{min}, width, height)`
allow_outside_center : bool
If `False`, remove bounding boxes which have centers outside cropping area.
Returns
-------
numpy.ndarray
Cropped bounding boxes with shape (M, 4+) where M <= N.
"""
bbox = bbox.copy()
if crop_box is None:
return bbox
if not len(crop_box) == 4:
raise ValueError(
"Invalid crop_box parameter, requires length 4, given {}".format(str(crop_box)))
if sum([int(c is None) for c in crop_box]) == 4:
return bbox
l, t, w, h = crop_box
left = l if l else 0
top = t if t else 0
right = left + (w if w else np.inf)
bottom = top + (h if h else np.inf)
crop_bbox = np.array((left, top, right, bottom))
if allow_outside_center:
mask = np.ones(bbox.shape[0], dtype=bool)
else:
centers = (bbox[:, :2] + bbox[:, 2:4]) / 2
mask = np.logical_and(crop_bbox[:2] <= centers, centers < crop_bbox[2:]).all(axis=1)
# transform borders
bbox[:, :2] = np.maximum(bbox[:, :2], crop_bbox[:2])
bbox[:, 2:4] = np.minimum(bbox[:, 2:4], crop_bbox[2:4])
bbox[:, :2] -= crop_bbox[:2]
bbox[:, 2:4] -= crop_bbox[:2]
mask = np.logical_and(mask, (bbox[:, :2] < bbox[:, 2:4]).all(axis=1))
bbox = bbox[mask]
return bbox
[docs]def flip(bbox, size, flip_x=False, flip_y=False):
"""Flip bounding boxes according to image flipping directions.
Parameters
----------
bbox : numpy.ndarray
Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
The second axis represents attributes of the bounding box.
Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
we allow additional attributes other than coordinates, which stay intact
during bounding box transformations.
size : tuple
Tuple of length 2: (width, height).
flip_x : bool
Whether flip horizontally.
flip_y : bool
Whether flip vertically.
Returns
-------
numpy.ndarray
Flipped bounding boxes with original shape.
"""
if not len(size) == 2:
raise ValueError("size requires length 2 tuple, given {}".format(len(size)))
width, height = size
bbox = bbox.copy()
if flip_y:
ymax = height - bbox[:, 1]
ymin = height - bbox[:, 3]
bbox[:, 1] = ymin
bbox[:, 3] = ymax
if flip_x:
xmax = width - bbox[:, 0]
xmin = width - bbox[:, 2]
bbox[:, 0] = xmin
bbox[:, 2] = xmax
return bbox
[docs]def resize(bbox, in_size, out_size):
"""Resize bouding boxes according to image resize operation.
Parameters
----------
bbox : numpy.ndarray
Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
The second axis represents attributes of the bounding box.
Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
we allow additional attributes other than coordinates, which stay intact
during bounding box transformations.
in_size : tuple
Tuple of length 2: (width, height) for input.
out_size : tuple
Tuple of length 2: (width, height) for output.
Returns
-------
numpy.ndarray
Resized bounding boxes with original shape.
"""
if not len(in_size) == 2:
raise ValueError("in_size requires length 2 tuple, given {}".format(len(in_size)))
if not len(out_size) == 2:
raise ValueError("out_size requires length 2 tuple, given {}".format(len(out_size)))
bbox = bbox.copy().astype(float)
x_scale = out_size[0] / in_size[0]
y_scale = out_size[1] / in_size[1]
bbox[:, 1] = y_scale * bbox[:, 1]
bbox[:, 3] = y_scale * bbox[:, 3]
bbox[:, 0] = x_scale * bbox[:, 0]
bbox[:, 2] = x_scale * bbox[:, 2]
return bbox
[docs]def translate(bbox, x_offset=0, y_offset=0):
"""Translate bounding boxes by offsets.
Parameters
----------
bbox : numpy.ndarray
Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
The second axis represents attributes of the bounding box.
Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
we allow additional attributes other than coordinates, which stay intact
during bounding box transformations.
x_offset : int or float
Offset along x axis.
y_offset : int or float
Offset along y axis.
Returns
-------
numpy.ndarray
Translated bounding boxes with original shape.
"""
bbox = bbox.copy()
bbox[:, :2] += (x_offset, y_offset)
bbox[:, 2:4] += (x_offset, y_offset)
return bbox
[docs]def affine_transform(pt, t):
"""Apply affine transform to a bounding box given transform matrix t.
Parameters
----------
pt : numpy.ndarray
Bounding box with shape (1, 2).
t : numpy.ndarray
Transformation matrix with shape (2, 3).
Returns
-------
numpy.ndarray
New bounding box with shape (1, 2).
"""
new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
new_pt = np.dot(t, new_pt)
return new_pt[:2]
def get_rot_dir(src_point, rot_rad):
"""Get rotation direction.
Parameters
----------
src_point : tuple of float
Original point.
rot_rad : float
Rotation radian.
Returns
-------
tuple of float
Rotation.
"""
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
src_result = [0, 0]
src_result[0] = src_point[0] * cs - src_point[1] * sn
src_result[1] = src_point[0] * sn + src_point[1] * cs
return src_result
def get_3rd_point(a, b):
"""Get the 3rd point position given first two points.
Parameters
----------
a : tuple of float
First point.
b : tuple of float
Second point.
Returns
-------
tuple of float
Third point.
"""
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
[docs]def get_affine_transform(center,
scale,
rot,
output_size,
shift=np.array([0, 0], dtype=np.float32),
inv=0):
"""Get affine transform matrix given center, scale and rotation.
Parameters
----------
center : tuple of float
Center point.
scale : float
Scaling factor.
rot : float
Rotation degree.
output_size : tuple of int
(width, height) of the output size.
shift : float
Shift factor.
inv : bool
Whether inverse the computation.
Returns
-------
numpy.ndarray
Affine matrix.
"""
from ...utils.filesystem import try_import_cv2
cv2 = try_import_cv2()
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
scale = np.array([scale, scale], dtype=np.float32)
scale_tmp = scale
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = get_rot_dir([0, src_w * -0.5], rot_rad)
dst_dir = np.array([0, dst_w * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans