Source code for mmaction.apis.inference

import os
import os.path as osp
from operator import itemgetter

import mmcv
import torch
from mmcv.parallel import collate, scatter
from mmcv.runner import load_checkpoint

from mmaction.core import OutputHook
from mmaction.datasets.pipelines import Compose
from mmaction.models import build_recognizer


[docs]def init_recognizer(config, checkpoint=None, device='cuda:0', use_frames=False): """Initialize a recognizer from config file. Args: config (str | :obj:`mmcv.Config`): Config file path or the config object. checkpoint (str | None, optional): Checkpoint path/url. If set to None, the model will not load any weights. Default: None. device (str | :obj:`torch.device`): The desired device of returned tensor. Default: 'cuda:0'. use_frames (bool): Whether to use rawframes as input. Default:False. Returns: nn.Module: The constructed recognizer. """ if isinstance(config, str): config = mmcv.Config.fromfile(config) elif not isinstance(config, mmcv.Config): raise TypeError('config must be a filename or Config object, ' f'but got {type(config)}') if ((use_frames and config.dataset_type != 'RawframeDataset') or (not use_frames and config.dataset_type != 'VideoDataset')): input_type = 'rawframes' if use_frames else 'video' raise RuntimeError('input data type should be consist with the ' f'dataset type in config, but got input type ' f"'{input_type}' and dataset type " f"'{config.dataset_type}'") # pretrained model is unnecessary since we directly load checkpoint later config.model.backbone.pretrained = None model = build_recognizer(config.model, test_cfg=config.get('test_cfg')) if checkpoint is not None: load_checkpoint(model, checkpoint, map_location=device) model.cfg = config model.to(device) model.eval() return model
[docs]def inference_recognizer(model, video_path, label_path, use_frames=False, outputs=None, as_tensor=True): """Inference a video with the detector. Args: model (nn.Module): The loaded recognizer. video_path (str): The video file path/url or the rawframes directory path. If ``use_frames`` is set to True, it should be rawframes directory path. Otherwise, it should be video file path. label_path (str): The label file path. use_frames (bool): Whether to use rawframes as input. Default:False. outputs (list(str) | tuple(str) | str | None) : Names of layers whose outputs need to be returned, default: None. as_tensor (bool): Same as that in ``OutputHook``. Default: True. Returns: dict[tuple(str, float)]: Top-5 recognition result dict. dict[torch.tensor | np.ndarray]: Output feature maps from layers specified in `outputs`. """ if not (osp.exists(video_path) or video_path.startswith('http')): raise RuntimeError(f"'{video_path}' is missing") if osp.isfile(video_path) and use_frames: raise RuntimeError( f"'{video_path}' is a video file, not a rawframe directory") if osp.isdir(video_path) and not use_frames: raise RuntimeError( f"'{video_path}' is a rawframe directory, not a video file") if isinstance(outputs, str): outputs = (outputs, ) assert outputs is None or isinstance(outputs, (tuple, list)) cfg = model.cfg device = next(model.parameters()).device # model device # construct label map with open(label_path, 'r') as f: label = [line.strip() for line in f] # build the data pipeline test_pipeline = cfg.data.test.pipeline test_pipeline = Compose(test_pipeline) # prepare data if use_frames: filename_tmpl = cfg.data.test.get('filename_tmpl', 'img_{:05}.jpg') modality = cfg.data.test.get('modality', 'RGB') start_index = cfg.data.test.get('start_index', 1) data = dict( frame_dir=video_path, total_frames=len(os.listdir(video_path)), # assuming files in ``video_path`` are all named with ``filename_tmpl`` # noqa: E501 label=-1, start_index=start_index, filename_tmpl=filename_tmpl, modality=modality) else: start_index = cfg.data.test.get('start_index', 0) data = dict( filename=video_path, label=-1, start_index=start_index, modality='RGB') data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] # forward the model with OutputHook(model, outputs=outputs, as_tensor=as_tensor) as h: with torch.no_grad(): scores = model(return_loss=False, **data)[0] returned_features = h.layer_outputs if outputs else None score_tuples = tuple(zip(label, scores)) score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True) top5_label = score_sorted[:5] if outputs: return top5_label, returned_features return top5_label