Source code for gluoncv.data.kinetics400.classification
# pylint: disable=line-too-long,too-many-lines,missing-docstring
"""Kinetics400 video action recognition dataset.
Code adapted from https://github.com/open-mmlab/mmaction and
https://github.com/bryanyzhu/two-stream-pytorch"""
import os
from ..video_custom import VideoClsCustom
__all__ = ['Kinetics400']
[docs]class Kinetics400(VideoClsCustom):
"""Load the Kinetics400 video action recognition dataset.
Refer to :doc:`../build/examples_datasets/kinetics400` for the description of
this dataset and how to prepare it.
Parameters
----------
root : str, required. Default '~/.mxnet/datasets/kinetics400/rawframes_train'.
Path to the root folder storing the dataset.
setting : str, required.
A text file describing the dataset, each line per video sample.
There are three items in each line: (1) video path; (2) video length and (3) video label.
train : bool, default True.
Whether to load the training or validation set.
test_mode : bool, default False.
Whether to perform evaluation on the test set.
Usually there is three-crop or ten-crop evaluation strategy involved.
name_pattern : str, default None.
The naming pattern of the decoded video frames.
For example, img_00012.jpg.
video_ext : str, default 'mp4'.
If video_loader is set to True, please specify the video format accordinly.
is_color : bool, default True.
Whether the loaded image is color or grayscale.
modality : str, default 'rgb'.
Input modalities, we support only rgb video frames for now.
Will add support for rgb difference image and optical flow image later.
num_segments : int, default 1.
Number of segments to evenly divide the video into clips.
A useful technique to obtain global video-level information.
Limin Wang, etal, Temporal Segment Networks: Towards Good Practices for Deep Action Recognition, ECCV 2016.
num_crop : int, default 1.
Number of crops for each image. default is 1.
Common choices are three crops and ten crops during evaluation.
new_length : int, default 1.
The length of input video clip. Default is a single image, but it can be multiple video frames.
For example, new_length=16 means we will extract a video clip of consecutive 16 frames.
new_step : int, default 1.
Temporal sampling rate. For example, new_step=1 means we will extract a video clip of consecutive frames.
new_step=2 means we will extract a video clip of every other frame.
new_width : int, default 340.
Scale the width of loaded image to 'new_width' for later multiscale cropping and resizing.
new_height : int, default 256.
Scale the height of loaded image to 'new_height' for later multiscale cropping and resizing.
target_width : int, default 224.
Scale the width of transformed image to the same 'target_width' for batch forwarding.
target_height : int, default 224.
Scale the height of transformed image to the same 'target_height' for batch forwarding.
temporal_jitter : bool, default False.
Whether to temporally jitter if new_step > 1.
video_loader : bool, default False.
Whether to use video loader to load data.
use_decord : bool, default True.
Whether to use Decord video loader to load data. Otherwise use mmcv video loader.
transform : function, default None.
A function that takes data and label and transforms them.
slowfast : bool, default False.
If set to True, use data loader designed for SlowFast network.
Christoph Feichtenhofer, etal, SlowFast Networks for Video Recognition, ICCV 2019.
slow_temporal_stride : int, default 16.
The temporal stride for sparse sampling of video frames in slow branch of a SlowFast network.
fast_temporal_stride : int, default 2.
The temporal stride for sparse sampling of video frames in fast branch of a SlowFast network.
data_aug : str, default 'v1'.
Different types of data augmentation auto. Supports v1, v2, v3 and v4.
lazy_init : bool, default False.
If set to True, build a dataset instance without loading any dataset.
"""
def __init__(self,
root=os.path.expanduser('~/.mxnet/datasets/kinetics400/rawframes_train'),
setting=os.path.expanduser('~/.mxnet/datasets/kinetics400/kinetics400_train_list_rawframes.txt'),
train=True,
test_mode=False,
name_pattern='img_%05d.jpg',
video_ext='mp4',
is_color=True,
modality='rgb',
num_segments=1,
num_crop=1,
new_length=1,
new_step=1,
new_width=340,
new_height=256,
target_width=224,
target_height=224,
temporal_jitter=False,
video_loader=False,
use_decord=False,
slowfast=False,
slow_temporal_stride=16,
fast_temporal_stride=2,
data_aug='v1',
lazy_init=False,
transform=None):
super(Kinetics400, self).__init__(root, setting, train, test_mode, name_pattern,
video_ext, is_color, modality, num_segments,
num_crop, new_length, new_step, new_width, new_height,
target_width, target_height, temporal_jitter,
video_loader, use_decord, slowfast, slow_temporal_stride,
fast_temporal_stride, data_aug, lazy_init, transform)
class Kinetics400Attr(object):
def __init__(self):
self.num_class = 400
self.classes = ['abseiling', 'air_drumming', 'answering_questions', 'applauding', 'applying_cream', 'archery',
'arm_wrestling', 'arranging_flowers', 'assembling_computer', 'auctioning', 'baby_waking_up', 'baking_cookies',
'balloon_blowing', 'bandaging', 'barbequing', 'bartending', 'beatboxing', 'bee_keeping', 'belly_dancing',
'bench_pressing', 'bending_back', 'bending_metal', 'biking_through_snow', 'blasting_sand', 'blowing_glass',
'blowing_leaves', 'blowing_nose', 'blowing_out_candles', 'bobsledding', 'bookbinding', 'bouncing_on_trampoline',
'bowling', 'braiding_hair', 'breading_or_breadcrumbing', 'breakdancing', 'brush_painting', 'brushing_hair',
'brushing_teeth', 'building_cabinet', 'building_shed', 'bungee_jumping', 'busking', 'canoeing_or_kayaking',
'capoeira', 'carrying_baby', 'cartwheeling', 'carving_pumpkin', 'catching_fish', 'catching_or_throwing_baseball',
'catching_or_throwing_frisbee', 'catching_or_throwing_softball', 'celebrating', 'changing_oil', 'changing_wheel',
'checking_tires', 'cheerleading', 'chopping_wood', 'clapping', 'clay_pottery_making', 'clean_and_jerk',
'cleaning_floor', 'cleaning_gutters', 'cleaning_pool', 'cleaning_shoes', 'cleaning_toilet', 'cleaning_windows',
'climbing_a_rope', 'climbing_ladder', 'climbing_tree', 'contact_juggling', 'cooking_chicken', 'cooking_egg',
'cooking_on_campfire', 'cooking_sausages', 'counting_money', 'country_line_dancing', 'cracking_neck', 'crawling_baby',
'crossing_river', 'crying', 'curling_hair', 'cutting_nails', 'cutting_pineapple', 'cutting_watermelon',
'dancing_ballet', 'dancing_charleston', 'dancing_gangnam_style', 'dancing_macarena', 'deadlifting',
'decorating_the_christmas_tree', 'digging', 'dining', 'disc_golfing', 'diving_cliff', 'dodgeball', 'doing_aerobics',
'doing_laundry', 'doing_nails', 'drawing', 'dribbling_basketball', 'drinking', 'drinking_beer', 'drinking_shots',
'driving_car', 'driving_tractor', 'drop_kicking', 'drumming_fingers', 'dunking_basketball', 'dying_hair',
'eating_burger', 'eating_cake', 'eating_carrots', 'eating_chips', 'eating_doughnuts', 'eating_hotdog',
'eating_ice_cream', 'eating_spaghetti', 'eating_watermelon', 'egg_hunting', 'exercising_arm',
'exercising_with_an_exercise_ball', 'extinguishing_fire', 'faceplanting', 'feeding_birds', 'feeding_fish',
'feeding_goats', 'filling_eyebrows', 'finger_snapping', 'fixing_hair', 'flipping_pancake', 'flying_kite',
'folding_clothes', 'folding_napkins', 'folding_paper', 'front_raises', 'frying_vegetables', 'garbage_collecting',
'gargling', 'getting_a_haircut', 'getting_a_tattoo', 'giving_or_receiving_award', 'golf_chipping', 'golf_driving',
'golf_putting', 'grinding_meat', 'grooming_dog', 'grooming_horse', 'gymnastics_tumbling', 'hammer_throw',
'headbanging', 'headbutting', 'high_jump', 'high_kick', 'hitting_baseball', 'hockey_stop', 'holding_snake',
'hopscotch', 'hoverboarding', 'hugging', 'hula_hooping', 'hurdling', 'hurling_-sport-', 'ice_climbing', 'ice_fishing',
'ice_skating', 'ironing', 'javelin_throw', 'jetskiing', 'jogging', 'juggling_balls', 'juggling_fire',
'juggling_soccer_ball', 'jumping_into_pool', 'jumpstyle_dancing', 'kicking_field_goal', 'kicking_soccer_ball',
'kissing', 'kitesurfing', 'knitting', 'krumping', 'laughing', 'laying_bricks', 'long_jump', 'lunge', 'making_a_cake',
'making_a_sandwich', 'making_bed', 'making_jewelry', 'making_pizza', 'making_snowman', 'making_sushi', 'making_tea',
'marching', 'massaging_back', 'massaging_feet', 'massaging_legs', "massaging_person's_head", 'milking_cow',
'mopping_floor', 'motorcycling', 'moving_furniture', 'mowing_lawn', 'news_anchoring', 'opening_bottle',
'opening_present', 'paragliding', 'parasailing', 'parkour', 'passing_American_football_-in_game-',
'passing_American_football_-not_in_game-', 'peeling_apples', 'peeling_potatoes', 'petting_animal_-not_cat-',
'petting_cat', 'picking_fruit', 'planting_trees', 'plastering', 'playing_accordion', 'playing_badminton',
'playing_bagpipes', 'playing_basketball', 'playing_bass_guitar', 'playing_cards', 'playing_cello', 'playing_chess',
'playing_clarinet', 'playing_controller', 'playing_cricket', 'playing_cymbals', 'playing_didgeridoo', 'playing_drums',
'playing_flute', 'playing_guitar', 'playing_harmonica', 'playing_harp', 'playing_ice_hockey', 'playing_keyboard',
'playing_kickball', 'playing_monopoly', 'playing_organ', 'playing_paintball', 'playing_piano', 'playing_poker',
'playing_recorder', 'playing_saxophone', 'playing_squash_or_racquetball', 'playing_tennis', 'playing_trombone',
'playing_trumpet', 'playing_ukulele', 'playing_violin', 'playing_volleyball', 'playing_xylophone', 'pole_vault',
'presenting_weather_forecast', 'pull_ups', 'pumping_fist', 'pumping_gas', 'punching_bag', 'punching_person_-boxing-',
'push_up', 'pushing_car', 'pushing_cart', 'pushing_wheelchair', 'reading_book', 'reading_newspaper', 'recording_music',
'riding_a_bike', 'riding_camel', 'riding_elephant', 'riding_mechanical_bull', 'riding_mountain_bike', 'riding_mule',
'riding_or_walking_with_horse', 'riding_scooter', 'riding_unicycle', 'ripping_paper', 'robot_dancing', 'rock_climbing',
'rock_scissors_paper', 'roller_skating', 'running_on_treadmill', 'sailing', 'salsa_dancing', 'sanding_floor',
'scrambling_eggs', 'scuba_diving', 'setting_table', 'shaking_hands', 'shaking_head', 'sharpening_knives',
'sharpening_pencil', 'shaving_head', 'shaving_legs', 'shearing_sheep', 'shining_shoes', 'shooting_basketball',
'shooting_goal_-soccer-', 'shot_put', 'shoveling_snow', 'shredding_paper', 'shuffling_cards', 'side_kick',
'sign_language_interpreting', 'singing', 'situp', 'skateboarding', 'ski_jumping', 'skiing_-not_slalom_or_crosscountry-',
'skiing_crosscountry', 'skiing_slalom', 'skipping_rope', 'skydiving', 'slacklining', 'slapping', 'sled_dog_racing',
'smoking', 'smoking_hookah', 'snatch_weight_lifting', 'sneezing', 'sniffing', 'snorkeling', 'snowboarding', 'snowkiting',
'snowmobiling', 'somersaulting', 'spinning_poi', 'spray_painting', 'spraying', 'springboard_diving', 'squat',
'sticking_tongue_out', 'stomping_grapes', 'stretching_arm', 'stretching_leg', 'strumming_guitar', 'surfing_crowd',
'surfing_water', 'sweeping_floor', 'swimming_backstroke', 'swimming_breast_stroke', 'swimming_butterfly_stroke',
'swing_dancing', 'swinging_legs', 'swinging_on_something', 'sword_fighting', 'tai_chi', 'taking_a_shower', 'tango_dancing',
'tap_dancing', 'tapping_guitar', 'tapping_pen', 'tasting_beer', 'tasting_food', 'testifying', 'texting', 'throwing_axe',
'throwing_ball', 'throwing_discus', 'tickling', 'tobogganing', 'tossing_coin', 'tossing_salad', 'training_dog',
'trapezing', 'trimming_or_shaving_beard', 'trimming_trees', 'triple_jump', 'tying_bow_tie', 'tying_knot_-not_on_a_tie-',
'tying_tie', 'unboxing', 'unloading_truck', 'using_computer', 'using_remote_controller_-not_gaming-', 'using_segway',
'vault', 'waiting_in_line', 'walking_the_dog', 'washing_dishes', 'washing_feet', 'washing_hair', 'washing_hands',
'water_skiing', 'water_sliding', 'watering_plants', 'waxing_back', 'waxing_chest', 'waxing_eyebrows', 'waxing_legs',
'weaving_basket', 'welding', 'whistling', 'windsurfing', 'wrapping_present', 'wrestling', 'writing', 'yawning', 'yoga', 'zumba']