Source code for datumaro.plugins.coco_format.importer

# Copyright (C) 2019-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT

from glob import glob
import logging as log
import os.path as osp

from datumaro.components.extractor import DEFAULT_SUBSET_NAME, Importer
from datumaro.components.format_detection import (
    FormatDetectionConfidence, FormatDetectionContext,
)
from datumaro.plugins.coco_format.extractor import (
    CocoCaptionsExtractor, CocoImageInfoExtractor, CocoInstancesExtractor,
    CocoLabelsExtractor, CocoPanopticExtractor, CocoPersonKeypointsExtractor,
    CocoStuffExtractor,
)

from .format import CocoTask


[docs]class CocoImporter(Importer): _TASKS = { CocoTask.instances: CocoInstancesExtractor, CocoTask.person_keypoints: CocoPersonKeypointsExtractor, CocoTask.captions: CocoCaptionsExtractor, CocoTask.labels: CocoLabelsExtractor, CocoTask.image_info: CocoImageInfoExtractor, CocoTask.panoptic: CocoPanopticExtractor, CocoTask.stuff: CocoStuffExtractor, }
[docs] @classmethod def build_cmdline_parser(cls, **kwargs): parser = super().build_cmdline_parser(**kwargs) parser.add_argument('--keep-original-category-ids', action='store_true', help="Add dummy label categories so that category indices " "correspond to the category IDs in the original annotation " "file") return parser
[docs] @classmethod def detect( cls, context: FormatDetectionContext, ) -> FormatDetectionConfidence: # The `coco` format is inherently ambiguous with `coco_instances`, # `coco_stuff`, etc. To remove the ambiguity (and thus make it possible # to use autodetection with the COCO dataset), disable autodetection # for the single-task formats. if len(cls._TASKS) == 1: context.fail('this format cannot be autodetected') with context.require_any(): for task in cls._TASKS.keys(): with context.alternative(): context.require_file(f'annotations/{task.name}_*.json')
def __call__(self, path, **extra_params): subsets = self.find_sources(path) if len(subsets) == 0: raise Exception("Failed to find 'coco' dataset at '%s'" % path) # TODO: should be removed when proper label merging is implemented conflicting_types = {CocoTask.instances, CocoTask.person_keypoints, CocoTask.labels, CocoTask.panoptic, CocoTask.stuff} ann_types = set(t for s in subsets.values() for t in s) \ & conflicting_types if 1 <= len(ann_types): selected_ann_type = sorted(ann_types, key=lambda x: x.name)[0] if 1 < len(ann_types): log.warning("Not implemented: " "Found potentially conflicting source types with labels: %s. " "Only one type will be used: %s" \ % (", ".join(t.name for t in ann_types), selected_ann_type.name)) sources = [] for ann_files in subsets.values(): for ann_type, ann_file in ann_files.items(): if ann_type in conflicting_types: if ann_type is not selected_ann_type: log.warning("Not implemented: " "conflicting source '%s' is skipped." % ann_file) continue log.info("Found a dataset at '%s'" % ann_file) sources.append({ 'url': ann_file, 'format': self._TASKS[ann_type].NAME, 'options': dict(extra_params), }) return sources
[docs] @classmethod def find_sources(cls, path): def detect_coco_task(filename): for task in CocoTask: if filename.startswith(task.name + '_'): return task return None if osp.isfile(path): if len(cls._TASKS) == 1: return {'': { next(iter(cls._TASKS)): path }} subset_paths = [path] if path.endswith('.json') else [] else: subset_paths = glob(osp.join(path, '**', '*_*.json'), recursive=True) subsets = {} for subset_path in subset_paths: ann_type = detect_coco_task(osp.basename(subset_path)) if ann_type is None and len(cls._TASKS) == 1: ann_type = list(cls._TASKS)[0] if ann_type not in cls._TASKS: log.warning("File '%s' was skipped, could't match this file " "with any of these tasks: %s" % (subset_path, ','.join(e.NAME for e in cls._TASKS.values())) ) continue parts = osp.splitext(osp.basename(subset_path))[0] \ .split(ann_type.name + '_', maxsplit=1) subset_name = parts[1] if len(parts) == 2 else DEFAULT_SUBSET_NAME subsets.setdefault(subset_name, {})[ann_type] = subset_path return subsets
[docs]class CocoImageInfoImporter(CocoImporter): _TASK = CocoTask.image_info _TASKS = { _TASK: CocoImporter._TASKS[_TASK] }
[docs]class CocoCaptionsImporter(CocoImporter): _TASK = CocoTask.captions _TASKS = { _TASK: CocoImporter._TASKS[_TASK] }
[docs]class CocoInstancesImporter(CocoImporter): _TASK = CocoTask.instances _TASKS = { _TASK: CocoImporter._TASKS[_TASK] }
[docs]class CocoPersonKeypointsImporter(CocoImporter): _TASK = CocoTask.person_keypoints _TASKS = { _TASK: CocoImporter._TASKS[_TASK] }
[docs]class CocoLabelsImporter(CocoImporter): _TASK = CocoTask.labels _TASKS = { _TASK: CocoImporter._TASKS[_TASK] }
[docs]class CocoPanopticImporter(CocoImporter): _TASK = CocoTask.panoptic _TASKS = { _TASK: CocoImporter._TASKS[_TASK] }
[docs]class CocoStuffImporter(CocoImporter): _TASK = CocoTask.stuff _TASKS = { _TASK: CocoImporter._TASKS[_TASK] }