Source code for mmaction.datasets.pipelines.augmentations

import random
from collections.abc import Sequence

import mmcv
import numpy as np
from torch.nn.modules.utils import _pair

from ..registry import PIPELINES


def _init_lazy_if_proper(results, lazy):
    """Initialize lazy operation properly.

    Make sure that a lazy operation is properly initialized,
    and avoid a non-lazy operation accidentally getting mixed in.

    Required keys in results are "imgs" if "img_shape" not in results,
    otherwise, Required keys in results are "img_shape", add or modified keys
    are "img_shape", "lazy".
    Add or modified keys in "lazy" are "original_shape", "crop_bbox", "flip",
    "flip_direction", "interpolation".

    Args:
        results (dict): A dict stores data pipeline result.
        lazy (bool): Determine whether to apply lazy operation. Default: False.
    """

    if 'img_shape' not in results:
        results['img_shape'] = results['imgs'][0].shape[:2]
    if lazy:
        if 'lazy' not in results:
            img_h, img_w = results['img_shape']
            lazyop = dict()
            lazyop['original_shape'] = results['img_shape']
            lazyop['crop_bbox'] = np.array([0, 0, img_w, img_h],
                                           dtype=np.float32)
            lazyop['flip'] = False
            lazyop['flip_direction'] = None
            lazyop['interpolation'] = None
            results['lazy'] = lazyop
    else:
        assert 'lazy' not in results, 'Use Fuse after lazy operations'


[docs]@PIPELINES.register_module() class Fuse(object): """Fuse lazy operations. Fusion order: crop -> resize -> flip Required keys are "imgs", "img_shape" and "lazy", added or modified keys are "imgs", "lazy". Required keys in "lazy" are "crop_bbox", "interpolation", "flip_direction". """ def __call__(self, results): if 'lazy' not in results: raise ValueError('No lazy operation detected') lazyop = results['lazy'] imgs = results['imgs'] # crop left, top, right, bottom = lazyop['crop_bbox'].round().astype(int) imgs = [img[top:bottom, left:right] for img in imgs] # resize img_h, img_w = results['img_shape'] if lazyop['interpolation'] is None: interpolation = 'bilinear' else: interpolation = lazyop['interpolation'] imgs = [ mmcv.imresize(img, (img_w, img_h), interpolation=interpolation) for img in imgs ] # flip if lazyop['flip']: for img in imgs: mmcv.imflip_(img, lazyop['flip_direction']) results['imgs'] = imgs del results['lazy'] return results
[docs]@PIPELINES.register_module() class RandomCrop(object): """Vanilla square random crop that specifics the output size. Required keys in results are "imgs" and "img_shape", added or modified keys are "imgs", "lazy"; Required keys in "lazy" are "flip", "crop_bbox", added or modified key is "crop_bbox". Args: size (int): The output size of the images. lazy (bool): Determine whether to apply lazy operation. Default: False. """ def __init__(self, size, lazy=False): if not isinstance(size, int): raise TypeError(f'Size must be an int, but got {type(size)}') self.size = size self.lazy = lazy def __call__(self, results): """Performs the RandomCrop augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ _init_lazy_if_proper(results, self.lazy) img_h, img_w = results['img_shape'] assert self.size <= img_h and self.size <= img_w y_offset = 0 x_offset = 0 if img_h > self.size: y_offset = int(np.random.randint(0, img_h - self.size)) if img_w > self.size: x_offset = int(np.random.randint(0, img_w - self.size)) new_h, new_w = self.size, self.size results['crop_bbox'] = np.array( [x_offset, y_offset, x_offset + new_w, y_offset + new_h]) results['img_shape'] = (new_h, new_w) if not self.lazy: results['imgs'] = [ img[y_offset:y_offset + new_h, x_offset:x_offset + new_w] for img in results['imgs'] ] else: lazyop = results['lazy'] if lazyop['flip']: raise NotImplementedError('Put Flip at last for now') # record crop_bbox in lazyop dict to ensure only crop once in Fuse lazy_left, lazy_top, lazy_right, lazy_bottom = lazyop['crop_bbox'] left = x_offset * (lazy_right - lazy_left) / img_w right = (x_offset + new_w) * (lazy_right - lazy_left) / img_w top = y_offset * (lazy_bottom - lazy_top) / img_h bottom = (y_offset + new_h) * (lazy_bottom - lazy_top) / img_h lazyop['crop_bbox'] = np.array([(lazy_left + left), (lazy_top + top), (lazy_left + right), (lazy_top + bottom)], dtype=np.float32) return results def __repr__(self): repr_str = (f'{self.__class__.__name__}(size={self.size}, ' f'lazy={self.lazy})') return repr_str
[docs]@PIPELINES.register_module() class RandomResizedCrop(object): """Random crop that specifics the area and height-weight ratio range. Required keys in results are "imgs", "img_shape", "crop_bbox" and "lazy", added or modified keys are "imgs", "crop_bbox" and "lazy"; Required keys in "lazy" are "flip", "crop_bbox", added or modified key is "crop_bbox". Args: area_range (Tuple[float]): The candidate area scales range of output cropped images. Default: (0.08, 1.0). aspect_ratio_range (Tuple[float]): The candidate aspect ratio range of output cropped images. Default: (3 / 4, 4 / 3). lazy (bool): Determine whether to apply lazy operation. Default: False. """ def __init__(self, area_range=(0.08, 1.0), aspect_ratio_range=(3 / 4, 4 / 3), lazy=False): self.area_range = area_range self.aspect_ratio_range = aspect_ratio_range self.lazy = lazy if not mmcv.is_tuple_of(self.area_range, float): raise TypeError(f'Area_range must be a tuple of float, ' f'but got {type(area_range)}') if not mmcv.is_tuple_of(self.aspect_ratio_range, float): raise TypeError(f'Aspect_ratio_range must be a tuple of float, ' f'but got {type(aspect_ratio_range)}')
[docs] @staticmethod def get_crop_bbox(img_shape, area_range, aspect_ratio_range, max_attempts=10): """Get a crop bbox given the area range and aspect ratio range. Args: img_shape (Tuple[int]): Image shape area_range (Tuple[float]): The candidate area scales range of output cropped images. Default: (0.08, 1.0). aspect_ratio_range (Tuple[float]): The candidate aspect ratio range of output cropped images. Default: (3 / 4, 4 / 3). max_attempts (int): The maximum of attempts. Default: 10. max_attempts (int): Max attempts times to generate random candidate bounding box. If it doesn't qualified one, the center bounding box will be used. Returns: (list[int]) A random crop bbox within the area range and aspect ratio range. """ assert 0 < area_range[0] <= area_range[1] <= 1 assert 0 < aspect_ratio_range[0] <= aspect_ratio_range[1] img_h, img_w = img_shape area = img_h * img_w min_ar, max_ar = aspect_ratio_range aspect_ratios = np.exp( np.random.uniform( np.log(min_ar), np.log(max_ar), size=max_attempts)) target_areas = np.random.uniform(*area_range, size=max_attempts) * area candidate_crop_w = np.round(np.sqrt(target_areas * aspect_ratios)).astype(np.int32) candidate_crop_h = np.round(np.sqrt(target_areas / aspect_ratios)).astype(np.int32) for i in range(max_attempts): crop_w = candidate_crop_w[i] crop_h = candidate_crop_h[i] if crop_h <= img_h and crop_w <= img_w: x_offset = random.randint(0, img_w - crop_w) y_offset = random.randint(0, img_h - crop_h) return x_offset, y_offset, x_offset + crop_w, y_offset + crop_h # Fallback crop_size = min(img_h, img_w) x_offset = (img_w - crop_size) // 2 y_offset = (img_h - crop_size) // 2 return x_offset, y_offset, x_offset + crop_size, y_offset + crop_size
def __call__(self, results): """Performs the RandomResizeCrop augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ _init_lazy_if_proper(results, self.lazy) img_h, img_w = results['img_shape'] left, top, right, bottom = self.get_crop_bbox( (img_h, img_w), self.area_range, self.aspect_ratio_range) new_h, new_w = bottom - top, right - left results['crop_bbox'] = np.array([left, top, right, bottom]) results['img_shape'] = (new_h, new_w) if not self.lazy: results['imgs'] = [ img[top:bottom, left:right] for img in results['imgs'] ] else: lazyop = results['lazy'] if lazyop['flip']: raise NotImplementedError('Put Flip at last for now') # record crop_bbox in lazyop dict to ensure only crop once in Fuse lazy_left, lazy_top, lazy_right, lazy_bottom = lazyop['crop_bbox'] left = left * (lazy_right - lazy_left) / img_w right = right * (lazy_right - lazy_left) / img_w top = top * (lazy_bottom - lazy_top) / img_h bottom = bottom * (lazy_bottom - lazy_top) / img_h lazyop['crop_bbox'] = np.array([(lazy_left + left), (lazy_top + top), (lazy_left + right), (lazy_top + bottom)], dtype=np.float32) return results def __repr__(self): repr_str = (f'{self.__class__.__name__}(' f'area_range={self.area_range}, ' f'aspect_ratio_range={self.aspect_ratio_range}, ' f'lazy={self.lazy})') return repr_str
[docs]@PIPELINES.register_module() class MultiScaleCrop(object): """Crop images with a list of randomly selected scales. Randomly select the w and h scales from a list of scales. Scale of 1 means the base size, which is the minimal of image weight and height. The scale level of w and h is controlled to be smaller than a certain value to prevent too large or small aspect ratio. Required keys are "imgs", "img_shape", added or modified keys are "imgs", "crop_bbox", "img_shape", "lazy" and "scales". Required keys in "lazy" are "crop_bbox", added or modified key is "crop_bbox". Args: input_size (int | tuple[int]): (w, h) of network input. scales (tuple[float]): Weight and height scales to be selected. max_wh_scale_gap (int): Maximum gap of w and h scale levels. Default: 1. random_crop (bool): If set to True, the cropping bbox will be randomly sampled, otherwise it will be sampler from fixed regions. Default: False. num_fixed_crops (int): If set to 5, the cropping bbox will keep 5 basic fixed regions: "upper left", "upper right", "lower left", "lower right", "center".If set to 13, the cropping bbox will append another 8 fix regions: "center left", "center right", "lower center", "upper center", "upper left quarter", "upper right quarter", "lower left quarter", "lower right quarter". Default: 5. lazy (bool): Determine whether to apply lazy operation. Default: False. """ def __init__(self, input_size, scales=(1, ), max_wh_scale_gap=1, random_crop=False, num_fixed_crops=5, lazy=False): self.input_size = _pair(input_size) if not mmcv.is_tuple_of(self.input_size, int): raise TypeError(f'Input_size must be int or tuple of int, ' f'but got {type(input_size)}') if not isinstance(scales, tuple): raise TypeError(f'Scales must be tuple, but got {type(scales)}') if num_fixed_crops not in [5, 13]: raise ValueError(f'Num_fix_crops must be in {[5, 13]}, ' f'but got {num_fixed_crops}') self.scales = scales self.max_wh_scale_gap = max_wh_scale_gap self.random_crop = random_crop self.num_fixed_crops = num_fixed_crops self.lazy = lazy def __call__(self, results): """Performs the MultiScaleCrop augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ _init_lazy_if_proper(results, self.lazy) img_h, img_w = results['img_shape'] base_size = min(img_h, img_w) crop_sizes = [int(base_size * s) for s in self.scales] candidate_sizes = [] for i, h in enumerate(crop_sizes): for j, w in enumerate(crop_sizes): if abs(i - j) <= self.max_wh_scale_gap: candidate_sizes.append([w, h]) crop_size = random.choice(candidate_sizes) for i in range(2): if abs(crop_size[i] - self.input_size[i]) < 3: crop_size[i] = self.input_size[i] crop_w, crop_h = crop_size if self.random_crop: x_offset = random.randint(0, img_w - crop_w) y_offset = random.randint(0, img_h - crop_h) else: w_step = (img_w - crop_w) // 4 h_step = (img_h - crop_h) // 4 candidate_offsets = [ (0, 0), # upper left (4 * w_step, 0), # upper right (0, 4 * h_step), # lower left (4 * w_step, 4 * h_step), # lower right (2 * w_step, 2 * h_step), # center ] if self.num_fixed_crops == 13: extra_candidate_offsets = [ (0, 2 * h_step), # center left (4 * w_step, 2 * h_step), # center right (2 * w_step, 4 * h_step), # lower center (2 * w_step, 0 * h_step), # upper center (1 * w_step, 1 * h_step), # upper left quarter (3 * w_step, 1 * h_step), # upper right quarter (1 * w_step, 3 * h_step), # lower left quarter (3 * w_step, 3 * h_step) # lower right quarter ] candidate_offsets.extend(extra_candidate_offsets) x_offset, y_offset = random.choice(candidate_offsets) new_h, new_w = crop_h, crop_w results['crop_bbox'] = np.array( [x_offset, y_offset, x_offset + new_w, y_offset + new_h]) results['img_shape'] = (new_h, new_w) results['scales'] = self.scales if not self.lazy: results['imgs'] = [ img[y_offset:y_offset + new_h, x_offset:x_offset + new_w] for img in results['imgs'] ] else: lazyop = results['lazy'] if lazyop['flip']: raise NotImplementedError('Put Flip at last for now') # record crop_bbox in lazyop dict to ensure only crop once in Fuse lazy_left, lazy_top, lazy_right, lazy_bottom = lazyop['crop_bbox'] left = x_offset * (lazy_right - lazy_left) / img_w right = (x_offset + new_w) * (lazy_right - lazy_left) / img_w top = y_offset * (lazy_bottom - lazy_top) / img_h bottom = (y_offset + new_h) * (lazy_bottom - lazy_top) / img_h lazyop['crop_bbox'] = np.array([(lazy_left + left), (lazy_top + top), (lazy_left + right), (lazy_top + bottom)], dtype=np.float32) return results def __repr__(self): repr_str = (f'{self.__class__.__name__}(' f'input_size={self.input_size}, scales={self.scales}, ' f'max_wh_scale_gap={self.max_wh_scale_gap}, ' f'random_crop={self.random_crop}, ' f'num_fixed_crops={self.num_fixed_crops}, ' f'lazy={self.lazy})') return repr_str
[docs]@PIPELINES.register_module() class Resize(object): """Resize images to a specific size. Required keys are "imgs", "img_shape", "modality", added or modified keys are "imgs", "img_shape", "keep_ratio", "scale_factor", "lazy", "resize_size". Required keys in "lazy" is None, added or modified key is "interpolation". Args: scale (float | Tuple[int]): If keep_ratio is True, it serves as scaling factor or maximum size: If it is a float number, the image will be rescaled by this factor, else if it is a tuple of 2 integers, the image will be rescaled as large as possible within the scale. Otherwise, it serves as (w, h) of output size. keep_ratio (bool): If set to True, Images will be resized without changing the aspect ratio. Otherwise, it will resize images to a given size. Default: True. interpolation (str): Algorithm used for interpolation: "nearest" | "bilinear". Default: "bilinear". lazy (bool): Determine whether to apply lazy operation. Default: False. """ def __init__(self, scale, keep_ratio=True, interpolation='bilinear', lazy=False): if isinstance(scale, float): if scale <= 0: raise ValueError(f'Invalid scale {scale}, must be positive.') elif isinstance(scale, tuple): max_long_edge = max(scale) max_short_edge = min(scale) if max_short_edge == -1: # assign np.inf to long edge for rescaling short edge later. scale = (np.inf, max_long_edge) else: raise TypeError( f'Scale must be float or tuple of int, but got {type(scale)}') self.scale = scale self.keep_ratio = keep_ratio self.interpolation = interpolation self.lazy = lazy def __call__(self, results): """Performs the Resize augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ _init_lazy_if_proper(results, self.lazy) if 'scale_factor' not in results: results['scale_factor'] = np.array([1, 1], dtype=np.float32) img_h, img_w = results['img_shape'] if self.keep_ratio: new_w, new_h = mmcv.rescale_size((img_w, img_h), self.scale) else: new_w, new_h = self.scale self.scale_factor = np.array([new_w / img_w, new_h / img_h], dtype=np.float32) results['img_shape'] = (new_h, new_w) results['keep_ratio'] = self.keep_ratio results['scale_factor'] = results['scale_factor'] * self.scale_factor if not self.lazy: results['imgs'] = [ mmcv.imresize( img, (new_w, new_h), interpolation=self.interpolation) for img in results['imgs'] ] else: lazyop = results['lazy'] if lazyop['flip']: raise NotImplementedError('Put Flip at last for now') lazyop['interpolation'] = self.interpolation return results def __repr__(self): repr_str = (f'{self.__class__.__name__}(' f'scale={self.scale}, keep_ratio={self.keep_ratio}, ' f'interpolation={self.interpolation}, ' f'lazy={self.lazy})') return repr_str
[docs]@PIPELINES.register_module() class Flip(object): """Flip the input images with a probability. Reverse the order of elements in the given imgs with a specific direction. The shape of the imgs is preserved, but the elements are reordered. Required keys are "imgs", "img_shape", "modality", added or modified keys are "imgs", "lazy" and "flip_direction". Required keys in "lazy" is None, added or modified key are "flip" and "flip_direction". Args: flip_ratio (float): Probability of implementing flip. Default: 0.5. direction (str): Flip imgs horizontally or vertically. Options are "horizontal" | "vertical". Default: "horizontal". lazy (bool): Determine whether to apply lazy operation. Default: False. """ _directions = ['horizontal', 'vertical'] def __init__(self, flip_ratio=0.5, direction='horizontal', lazy=False): if direction not in self._directions: raise ValueError(f'Direction {direction} is not supported. ' f'Currently support ones are {self._directions}') self.flip_ratio = flip_ratio self.direction = direction self.lazy = lazy def __call__(self, results): """Performs the Flip augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ _init_lazy_if_proper(results, self.lazy) modality = results['modality'] if modality == 'Flow': assert self.direction == 'horizontal' if np.random.rand() < self.flip_ratio: flip = True else: flip = False results['flip'] = flip results['flip_direction'] = self.direction if not self.lazy: if flip: for i, img in enumerate(results['imgs']): mmcv.imflip_(img, self.direction) lt = len(results['imgs']) for i in range(0, lt, 2): # flow with even indexes are x_flow, which need to be # inverted when doing horizontal flip if modality == 'Flow': results['imgs'][i] = mmcv.iminvert(results['imgs'][i]) else: results['imgs'] = list(results['imgs']) else: lazyop = results['lazy'] if lazyop['flip']: raise NotImplementedError('Use one Flip please') lazyop['flip'] = flip lazyop['flip_direction'] = self.direction return results def __repr__(self): repr_str = ( f'{self.__class__.__name__}(' f'flip_ratio={self.flip_ratio}, direction={self.direction}, ' f'lazy={self.lazy})') return repr_str
[docs]@PIPELINES.register_module() class Normalize(object): """Normalize images with the given mean and std value. Required keys are "imgs", "img_shape", "modality", added or modified keys are "imgs" and "img_norm_cfg". If modality is 'Flow', additional keys "scale_factor" is required Args: mean (Sequence[float]): Mean values of different channels. std (Sequence[float]): Std values of different channels. to_bgr (bool): Whether to convert channels from RGB to BGR. Default: False. adjust_magnitude (bool): Indicate whether to adjust the flow magnitude on 'scale_factor' when modality is 'Flow'. Default: False. """ def __init__(self, mean, std, to_bgr=False, adjust_magnitude=False): if not isinstance(mean, Sequence): raise TypeError( f'Mean must be list, tuple or np.ndarray, but got {type(mean)}' ) if not isinstance(std, Sequence): raise TypeError( f'Std must be list, tuple or np.ndarray, but got {type(std)}') self.mean = np.array(mean, dtype=np.float32) self.std = np.array(std, dtype=np.float32) self.to_bgr = to_bgr self.adjust_magnitude = adjust_magnitude def __call__(self, results): modality = results['modality'] if modality == 'RGB': n = len(results['imgs']) h, w, c = results['imgs'][0].shape imgs = np.empty((n, h, w, c), dtype=np.float32) for i, img in enumerate(results['imgs']): imgs[i] = img for img in imgs: mmcv.imnormalize_(img, self.mean, self.std, self.to_bgr) results['imgs'] = imgs results['img_norm_cfg'] = dict( mean=self.mean, std=self.std, to_bgr=self.to_bgr) return results elif modality == 'Flow': num_imgs = len(results['imgs']) assert num_imgs % 2 == 0 assert self.mean.shape[0] == 2 assert self.std.shape[0] == 2 n = num_imgs // 2 h, w = results['imgs'][0].shape x_flow = np.empty((n, h, w), dtype=np.float32) y_flow = np.empty((n, h, w), dtype=np.float32) for i in range(n): x_flow[i] = results['imgs'][2 * i] y_flow[i] = results['imgs'][2 * i + 1] x_flow = (x_flow - self.mean[0]) / self.std[0] y_flow = (y_flow - self.mean[1]) / self.std[1] if self.adjust_magnitude: x_flow = x_flow * results['scale_factor'][0] y_flow = y_flow * results['scale_factor'][1] imgs = np.stack([x_flow, y_flow], axis=-1) results['imgs'] = imgs args = dict( mean=self.mean, std=self.std, to_bgr=self.to_bgr, adjust_magnitude=self.adjust_magnitude) results['img_norm_cfg'] = args return results else: raise NotImplementedError def __repr__(self): repr_str = (f'{self.__class__.__name__}(' f'mean={self.mean}, ' f'std={self.std}, ' f'to_bgr={self.to_bgr}, ' f'adjust_magnitude={self.adjust_magnitude})') return repr_str
[docs]@PIPELINES.register_module() class CenterCrop(object): """Crop the center area from images. Required keys are "imgs", "img_shape", added or modified keys are "imgs", "crop_bbox", "lazy" and "img_shape". Required keys in "lazy" is "crop_bbox", added or modified key is "crop_bbox". Args: crop_size (int | tuple[int]): (w, h) of crop size. lazy (bool): Determine whether to apply lazy operation. Default: False. """ def __init__(self, crop_size, lazy=False): self.crop_size = _pair(crop_size) self.lazy = lazy if not mmcv.is_tuple_of(self.crop_size, int): raise TypeError(f'Crop_size must be int or tuple of int, ' f'but got {type(crop_size)}') def __call__(self, results): """Performs the CenterCrop augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ _init_lazy_if_proper(results, self.lazy) img_h, img_w = results['img_shape'] crop_w, crop_h = self.crop_size left = (img_w - crop_w) // 2 top = (img_h - crop_h) // 2 right = left + crop_w bottom = top + crop_h new_h, new_w = bottom - top, right - left results['crop_bbox'] = np.array([left, top, right, bottom]) results['img_shape'] = (new_h, new_w) if not self.lazy: results['imgs'] = [ img[top:bottom, left:right] for img in results['imgs'] ] else: lazyop = results['lazy'] if lazyop['flip']: raise NotImplementedError('Put Flip at last for now') # record crop_bbox in lazyop dict to ensure only crop once in Fuse lazy_left, lazy_top, lazy_right, lazy_bottom = lazyop['crop_bbox'] left = left * (lazy_right - lazy_left) / img_w right = right * (lazy_right - lazy_left) / img_w top = top * (lazy_bottom - lazy_top) / img_h bottom = bottom * (lazy_bottom - lazy_top) / img_h lazyop['crop_bbox'] = np.array([(lazy_left + left), (lazy_top + top), (lazy_left + right), (lazy_top + bottom)], dtype=np.float32) return results def __repr__(self): repr_str = (f'{self.__class__.__name__}(crop_size={self.crop_size}, ' f'lazy={self.lazy})') return repr_str
[docs]@PIPELINES.register_module() class ThreeCrop(object): """Crop images into three crops. Crop the images equally into three crops with equal intervals along the shorter side. Required keys are "imgs", "img_shape", added or modified keys are "imgs", "crop_bbox" and "img_shape". Args: crop_size(int | tuple[int]): (w, h) of crop size. """ def __init__(self, crop_size): self.crop_size = _pair(crop_size) if not mmcv.is_tuple_of(self.crop_size, int): raise TypeError(f'Crop_size must be int or tuple of int, ' f'but got {type(crop_size)}') def __call__(self, results): """Performs the ThreeCrop augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ _init_lazy_if_proper(results, False) imgs = results['imgs'] img_h, img_w = results['imgs'][0].shape[:2] crop_w, crop_h = self.crop_size assert crop_h == img_h or crop_w == img_w if crop_h == img_h: w_step = (img_w - crop_w) // 2 offsets = [ (0, 0), # left (2 * w_step, 0), # right (w_step, 0), # middle ] elif crop_w == img_w: h_step = (img_h - crop_h) // 2 offsets = [ (0, 0), # top (0, 2 * h_step), # down (0, h_step), # middle ] cropped = [] crop_bboxes = [] for x_offset, y_offset in offsets: bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h] crop = [ img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w] for img in imgs ] cropped.extend(crop) crop_bboxes.extend([bbox for _ in range(len(imgs))]) crop_bboxes = np.array(crop_bboxes) results['imgs'] = cropped results['crop_bbox'] = crop_bboxes results['img_shape'] = results['imgs'][0].shape[:2] return results def __repr__(self): repr_str = f'{self.__class__.__name__}(crop_size={self.crop_size})' return repr_str
[docs]@PIPELINES.register_module() class TenCrop(object): """Crop the images into 10 crops (corner + center + flip). Crop the four corners and the center part of the image with the same given crop_size, and flip it horizontally. Required keys are "imgs", "img_shape", added or modified keys are "imgs", "crop_bbox" and "img_shape". Args: crop_size(int | tuple[int]): (w, h) of crop size. """ def __init__(self, crop_size): self.crop_size = _pair(crop_size) if not mmcv.is_tuple_of(self.crop_size, int): raise TypeError(f'Crop_size must be int or tuple of int, ' f'but got {type(crop_size)}') def __call__(self, results): """Performs the TenCrop augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ _init_lazy_if_proper(results, False) imgs = results['imgs'] img_h, img_w = results['imgs'][0].shape[:2] crop_w, crop_h = self.crop_size w_step = (img_w - crop_w) // 4 h_step = (img_h - crop_h) // 4 offsets = [ (0, 0), # upper left (4 * w_step, 0), # upper right (0, 4 * h_step), # lower left (4 * w_step, 4 * h_step), # lower right (2 * w_step, 2 * h_step), # center ] img_crops = list() crop_bboxes = list() for x_offset, y_offsets in offsets: crop = [ img[y_offsets:y_offsets + crop_h, x_offset:x_offset + crop_w] for img in imgs ] flip_crop = [np.flip(c, axis=1).copy() for c in crop] bbox = [x_offset, y_offsets, x_offset + crop_w, y_offsets + crop_h] img_crops.extend(crop) img_crops.extend(flip_crop) crop_bboxes.extend([bbox for _ in range(len(imgs) * 2)]) crop_bboxes = np.array(crop_bboxes) results['imgs'] = img_crops results['crop_bbox'] = crop_bboxes results['img_shape'] = results['imgs'][0].shape[:2] return results def __repr__(self): repr_str = f'{self.__class__.__name__}(crop_size={self.crop_size})' return repr_str
[docs]@PIPELINES.register_module() class MultiGroupCrop(object): """Randomly crop the images into several groups. Crop the random region with the same given crop_size and bounding box into several groups. Required keys are "imgs", added or modified keys are "imgs", "crop_bbox" and "img_shape". Args: crop_size(int | tuple[int]): (w, h) of crop size. groups(int): Number of groups. """ def __init__(self, crop_size, groups): self.crop_size = _pair(crop_size) self.groups = groups if not mmcv.is_tuple_of(self.crop_size, int): raise TypeError( 'Crop size must be int or tuple of int, but got {}'.format( type(crop_size))) if not isinstance(groups, int): raise TypeError(f'Groups must be int, but got {type(groups)}.') if groups <= 0: raise ValueError('Groups must be positive.') def __call__(self, results): """Performs the MultiGroupCrop augmentation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ imgs = results['imgs'] img_h, img_w = imgs[0].shape[:2] crop_w, crop_h = self.crop_size img_crops = [] crop_bboxes = [] for _ in range(self.groups): x_offset = random.randint(0, img_w - crop_w) y_offset = random.randint(0, img_h - crop_h) bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h] crop = [ img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w] for img in imgs ] img_crops.extend(crop) crop_bboxes.extend([bbox for _ in range(len(imgs))]) crop_bboxes = np.array(crop_bboxes) results['imgs'] = img_crops results['crop_bbox'] = crop_bboxes results['img_shape'] = results['imgs'][0].shape[:2] return results def __repr__(self): repr_str = (f'{self.__class__.__name__}' f'(crop_size={self.crop_size}, ' f'groups={self.groups})') return repr_str