Source code for datumaro.util.annotation_util

# Copyright (C) 2020-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT

from itertools import groupby
from typing import (
    Callable, Dict, Iterable, NewType, Optional, Sequence, Tuple, Union,
)

from typing_extensions import Literal
import numpy as np

from datumaro.components.annotation import (
    AnnotationType, LabelCategories, Mask, RleMask, _Shape,
)
from datumaro.util.mask_tools import mask_to_rle


[docs]def find_instances(instance_anns): instance_anns = sorted(instance_anns, key=lambda a: a.group) ann_groups = [] for g_id, group in groupby(instance_anns, lambda a: a.group): if not g_id: ann_groups.extend(([a] for a in group)) else: ann_groups.append(list(group)) return ann_groups
[docs]def find_group_leader(group): return max(group, key=lambda x: x.get_area())
BboxCoords = Tuple[float, float, float, float] Shape = NewType('Shape', _Shape) SpatialAnnotation = Union[Shape, Mask]
[docs]def _get_bbox(ann: Union[Sequence, SpatialAnnotation]) -> BboxCoords: if isinstance(ann, (_Shape, Mask)): return ann.get_bbox() elif hasattr(ann, '__len__') and len(ann) == 4: return ann else: raise ValueError("The value of type '%s' can't be treated as a " "bounding box" % type(ann))
[docs]def max_bbox(annotations: Iterable[Union[BboxCoords, SpatialAnnotation]]) \ -> BboxCoords: """ Computes the maximum bbox for the set of spatial annotations and boxes. Returns: bbox (tuple): (x, y, w, h) """ boxes = [_get_bbox(ann) for ann in annotations] x0 = min((b[0] for b in boxes), default=0) y0 = min((b[1] for b in boxes), default=0) x1 = max((b[0] + b[2] for b in boxes), default=0) y1 = max((b[1] + b[3] for b in boxes), default=0) return [x0, y0, x1 - x0, y1 - y0]
[docs]def mean_bbox(annotations: Iterable[Union[BboxCoords, SpatialAnnotation]]) \ -> BboxCoords: """ Computes the mean bbox for the set of spatial annotations and boxes. Returns: bbox (tuple): (x, y, w, h) """ le = len(annotations) boxes = [_get_bbox(ann) for ann in annotations] mlb = sum(b[0] for b in boxes) / le mtb = sum(b[1] for b in boxes) / le mrb = sum(b[0] + b[2] for b in boxes) / le mbb = sum(b[1] + b[3] for b in boxes) / le return [mlb, mtb, mrb - mlb, mbb - mtb]
[docs]def softmax(x): return np.exp(x) / sum(np.exp(x))
[docs]def nms(segments, iou_thresh=0.5): """ Non-maxima suppression algorithm. """ indices = np.argsort([b.attributes['score'] for b in segments]) ious = np.array([[segment_iou(a, b) for b in segments] for a in segments]) predictions = [] while len(indices) != 0: i = len(indices) - 1 pred_idx = indices[i] to_remove = [i] predictions.append(segments[pred_idx]) for i, box_idx in enumerate(indices[:i]): if iou_thresh < ious[pred_idx, box_idx]: to_remove.append(i) indices = np.delete(indices, to_remove) return predictions
[docs]def bbox_iou(a, b) -> Union[Literal[-1], float]: """ IoU computations for simple cases with bounding boxes """ bbox_a = _get_bbox(a) bbox_b = _get_bbox(b) aX, aY, aW, aH = bbox_a bX, bY, bW, bH = bbox_b in_right = min(aX + aW, bX + bW) in_left = max(aX, bX) in_top = max(aY, bY) in_bottom = min(aY + aH, bY + bH) in_w = max(0, in_right - in_left) in_h = max(0, in_bottom - in_top) intersection = in_w * in_h if not intersection: return -1 a_area = aW * aH b_area = bW * bH union = a_area + b_area - intersection return intersection / union
[docs]def segment_iou(a, b): """ Generic IoU computation with masks, polygons, and boxes. Returns -1 if no intersection, [0; 1] otherwise """ from pycocotools import mask as mask_utils a_bbox = list(a.get_bbox()) b_bbox = list(b.get_bbox()) is_bbox = AnnotationType.bbox in [a.type, b.type] if is_bbox: a = [a_bbox] b = [b_bbox] else: w = max(a_bbox[0] + a_bbox[2], b_bbox[0] + b_bbox[2]) h = max(a_bbox[1] + a_bbox[3], b_bbox[1] + b_bbox[3]) def _to_rle(ann): if ann.type == AnnotationType.polygon: return mask_utils.frPyObjects([ann.points], h, w) elif isinstance(ann, RleMask): return [ann.rle] elif ann.type == AnnotationType.mask: return mask_utils.frPyObjects([mask_to_rle(ann.image)], h, w) else: raise TypeError("Unexpected arguments: %s, %s" % (a, b)) a = _to_rle(a) b = _to_rle(b) return float(mask_utils.iou(a, b, [not is_bbox]))
[docs]def PDJ(a, b, eps=None, ratio=0.05, bbox=None): """ Percentage of Detected Joints metric. Counts the number of matching points. """ assert eps is not None or ratio is not None p1 = np.array(a.points).reshape((-1, 2)) p2 = np.array(b.points).reshape((-1, 2)) if len(p1) != len(p2): return 0 if not eps: if bbox is None: bbox = mean_bbox([a, b]) diag = (bbox[2] ** 2 + bbox[3] ** 2) ** 0.5 eps = ratio * diag dists = np.linalg.norm(p1 - p2, axis=1) return np.sum(dists < eps) / len(p1)
[docs]def OKS(a, b, sigma=None, bbox=None, scale=None): """ Object Keypoint Similarity metric. https://cocodataset.org/#keypoints-eval """ p1 = np.array(a.points).reshape((-1, 2)) p2 = np.array(b.points).reshape((-1, 2)) if len(p1) != len(p2): return 0 if not sigma: sigma = 0.1 else: assert len(sigma) == len(p1) if not scale: if bbox is None: bbox = mean_bbox([a, b]) scale = bbox[2] * bbox[3] dists = np.linalg.norm(p1 - p2, axis=1) return np.sum(np.exp(-(dists ** 2) / (2 * scale * (2 * sigma) ** 2)))
def approximate_line(points: Sequence[float], segments: int) -> np.ndarray: """ Approximates a 2d line to the required number of segments. The new points are distributed uniformly across the input line. Args: points (Sequence): an array of line point coordinates. The size is [points * 2], the layout is [x0, y0, x1, y1, ...]. segments (int): the required numebr of segments in the resulting line. Returns: new_points (ndarray): an array of new line point coordinates. The size is [(segments + 1) * 2], the layout is [x0, y0, x1, y1, ...]. """ assert 2 <= len(points) // 2 and len(points) % 2 == 0 assert 0 < segments points = list(points) if len(points) == 2: points.extend(points) points = np.array(points).reshape((-1, 2)) lengths = np.linalg.norm(points[1:] - points[:-1], axis=1) dists = [0] for l in lengths: dists.append(dists[-1] + l) step = dists[-1] / segments new_points = np.zeros((segments + 1, 2)) new_points[0] = points[0] old_segment = 0 for new_segment in range(1, segments + 1): pos = new_segment * step while dists[old_segment + 1] < pos and old_segment + 2 < len(dists): old_segment += 1 segment_start = dists[old_segment] segment_len = lengths[old_segment] prev_p = points[old_segment] next_p = points[old_segment + 1] r = (pos - segment_start) / segment_len new_points[new_segment] = prev_p * (1 - r) + next_p * r return np.reshape(new_points, (-1, ))
[docs]def make_label_id_mapping( src_labels: LabelCategories, dst_labels: LabelCategories, fallback: int = 0) \ -> Tuple[ Callable[[int], Optional[int]], Dict[int, int], Dict[int, str], Dict[int, str] ]: """ Maps label ids from source to destination. Fallback id is used for missing labels. Returns: map_id (callable): src id -> dst id id_mapping (dict): src id -> dst id src_labels (dict): src id -> src label dst_labels (dict): dst id -> dst label """ source_labels = { id: label.name for id, label in enumerate(src_labels or ()) } target_labels = { label.name: id for id, label in enumerate(dst_labels or ()) } id_mapping = { src_id: target_labels.get(src_label, fallback) for src_id, src_label in source_labels.items() } def map_id(src_id): return id_mapping.get(src_id, fallback) return map_id, id_mapping, source_labels, target_labels