Source code for datumaro.util.image

# Copyright (C) 2019-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT

from enum import Enum, auto
from io import BytesIO
from typing import (
    Any, Callable, Dict, Iterable, Iterator, Optional, Tuple, Union,
)
import importlib
import os
import os.path as osp
import shlex
import weakref

import numpy as np

try:
    # Introduced in 1.20
    from numpy.typing import DTypeLike
except ImportError:
    DTypeLike = Any


[docs]class _IMAGE_BACKENDS(Enum):
    cv2 = auto()
    PIL = auto()
_IMAGE_BACKEND = None
_image_loading_errors = (FileNotFoundError, )
try:
    importlib.import_module('cv2')
    _IMAGE_BACKEND = _IMAGE_BACKENDS.cv2
except ModuleNotFoundError:
    import PIL
    _IMAGE_BACKEND = _IMAGE_BACKENDS.PIL
    _image_loading_errors = (*_image_loading_errors, PIL.UnidentifiedImageError)

import warnings

from datumaro.util.image_cache import ImageCache
from datumaro.util.os_util import walk


def __getattr__(name: str):
    if name in {'Image', 'ByteImage'}:
        warnings.warn(f"Using {name} from 'util.image' is deprecated, "
            "the class is moved to 'components.media'", DeprecationWarning,
            stacklevel=2)

        import datumaro.components.media as media_module
        return getattr(media_module, name)
    raise AttributeError(f"module {__name__} has no attribute {name}")

[docs]def load_image(path: str, dtype: DTypeLike = np.float32):
    """
    Reads an image in the HWC Grayscale/BGR(A) float [0; 255] format.
    """

    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
        # cv2.imread does not support paths that are not representable
        # in the locale encoding on Windows, so we read the image bytes
        # ourselves.

        with open(path, 'rb') as f:
            image_bytes = f.read()

        return decode_image(image_bytes, dtype=dtype)
    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
        from PIL import Image
        image = Image.open(path)
        image = np.asarray(image, dtype=dtype)
        if len(image.shape) == 3 and image.shape[2] in {3, 4}:
            image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
    else:
        raise NotImplementedError()

    assert len(image.shape) in {2, 3}
    if len(image.shape) == 3:
        assert image.shape[2] in {3, 4}
    return image

[docs]def save_image(path: str, image: np.ndarray, create_dir: bool = False,
        dtype: DTypeLike = np.uint8, **kwargs) -> None:
    # NOTE: Check destination path for existence
    # OpenCV silently fails if target directory does not exist
    dst_dir = osp.dirname(path)
    if dst_dir:
        if create_dir:
            os.makedirs(dst_dir, exist_ok=True)
        elif not osp.isdir(dst_dir):
            raise FileNotFoundError("Directory does not exist: '%s'" % dst_dir)

    if not kwargs:
        kwargs = {}

    # NOTE: OpenCV documentation says "If the image format is not supported,
    # the image will be converted to 8-bit unsigned and saved that way".
    # Conversion from np.int32 to np.uint8 is not working properly
    backend = _IMAGE_BACKEND
    if dtype == np.int32:
        backend = _IMAGE_BACKENDS.PIL
    if backend == _IMAGE_BACKENDS.cv2:
        # cv2.imwrite does not support paths that are not representable
        # in the locale encoding on Windows, so we write the image bytes
        # ourselves.

        ext = osp.splitext(path)[1]
        image_bytes = encode_image(image, ext, dtype=dtype, **kwargs)

        with open(path, 'wb') as f:
            f.write(image_bytes)
    elif backend == _IMAGE_BACKENDS.PIL:
        from PIL import Image

        params = {}
        params['quality'] = kwargs.get('jpeg_quality')
        if kwargs.get('jpeg_quality') == 100:
            params['subsampling'] = 0

        image = image.astype(dtype)
        if len(image.shape) == 3 and image.shape[2] in {3, 4}:
            image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
        image = Image.fromarray(image)
        image.save(path, **params)
    else:
        raise NotImplementedError()

[docs]def encode_image(image: np.ndarray, ext: str, dtype: DTypeLike = np.uint8,
        **kwargs) -> bytes:
    if not kwargs:
        kwargs = {}

    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
        import cv2

        params = []

        if not ext.startswith('.'):
            ext = '.' + ext

        if ext.upper() == '.JPG':
            params = [
                int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75)
            ]

        image = image.astype(dtype)
        success, result = cv2.imencode(ext, image, params=params)
        if not success:
            raise Exception("Failed to encode image to '%s' format" % (ext))
        return result.tobytes()
    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
        from PIL import Image

        if ext.startswith('.'):
            ext = ext[1:]

        params = {}
        params['quality'] = kwargs.get('jpeg_quality')
        if kwargs.get('jpeg_quality') == 100:
            params['subsampling'] = 0

        image = image.astype(dtype)
        if len(image.shape) == 3 and image.shape[2] in {3, 4}:
            image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB
        image = Image.fromarray(image)
        with BytesIO() as buffer:
            image.save(buffer, format=ext, **params)
            return buffer.getvalue()
    else:
        raise NotImplementedError()

[docs]def decode_image(image_bytes: bytes,
        dtype: DTypeLike = np.float32) -> np.ndarray:
    if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
        import cv2
        image = np.frombuffer(image_bytes, dtype=np.uint8)
        image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED)
        image = image.astype(dtype)
    elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
        from PIL import Image
        image = Image.open(BytesIO(image_bytes))
        image = np.asarray(image, dtype=dtype)
        if len(image.shape) == 3 and image.shape[2] in {3, 4}:
            image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR
    else:
        raise NotImplementedError()

    assert len(image.shape) in {2, 3}
    if len(image.shape) == 3:
        assert image.shape[2] in {3, 4}
    return image

IMAGE_EXTENSIONS = {
    '.jpg', '.jpeg', '.jpe', '.jp2',
    '.png', '.bmp', '.dib', '.tif', '.tiff', '.tga', '.webp', '.pfm',
    '.sr', '.ras', '.exr', '.hdr', '.pic',
    '.pbm', '.pgm', '.ppm', '.pxm', '.pnm',
}

[docs]def find_images(dirpath: str, exts: Union[str, Iterable[str]] = None,
        recursive: bool = False, max_depth: int = None) -> Iterator[str]:
    if isinstance(exts, str):
        exts = {'.' + exts.lower().lstrip('.')}
    elif exts is None:
        exts = IMAGE_EXTENSIONS
    else:
        exts = {'.' + e.lower().lstrip('.') for e in exts}

    def _check_image_ext(filename: str):
        dotpos = filename.rfind('.')
        if 0 < dotpos: # exclude '.ext' cases too
            ext = filename[dotpos:].lower()
            if ext in exts:
                return True
        return False

    for d, _, filenames in walk(dirpath,
            max_depth=max_depth if recursive else 0):
        for filename in filenames:
            if not _check_image_ext(filename):
                continue

            yield osp.join(d, filename)

[docs]def is_image(path: str) -> bool:
    trunk, ext = osp.splitext(osp.basename(path))
    return trunk and ext.lower() in IMAGE_EXTENSIONS and \
        osp.isfile(path)

[docs]class lazy_image:
[docs]    def __init__(self, path: str, loader: Callable[[str], np.ndarray] = None,
            cache: Union[bool, ImageCache] = True) -> None:
        """
        Cache:
            - False: do not cache
            - True: use the global cache
            - ImageCache instance: an object to be used as cache
        """

        if loader is None:
            loader = load_image
        self._path = path
        self._loader = loader

        assert isinstance(cache, (ImageCache, bool))
        self._cache = cache

    def __call__(self) -> np.ndarray:
        image = None
        cache_key = weakref.ref(self)

        cache = self._get_cache()
        if cache is not None:
            image = cache.get(cache_key)

        if image is None:
            image = self._loader(self._path)
            if cache is not None:
                cache.push(cache_key, image)
        return image

    def _get_cache(self) -> Optional[ImageCache]:
        if self._cache is True:
            cache = ImageCache.get_instance()
        elif self._cache is False:
            cache = None
        else:
            cache = self._cache
        return cache

ImageMeta = Dict[str, Tuple[int, int]] # filename, height, width

DEFAULT_IMAGE_META_FILE_NAME = 'images.meta'

[docs]def load_image_meta_file(image_meta_path: str) -> ImageMeta:
    """
    Loads image metadata from a file with the following format:

        <image name 1> <height 1> <width 1>
        <image name 2> <height 2> <width 2>
        ...

    Shell-like comments and quoted fields are allowed.

    This can be useful to support datasets in which image dimensions are
    required to interpret annotations.
    """
    assert isinstance(image_meta_path, str)

    if not osp.isfile(image_meta_path):
        raise FileNotFoundError("Can't read image meta file '%s'" % \
            image_meta_path)

    image_meta = {}

    with open(image_meta_path, encoding='utf-8') as f:
        for line in f:
            fields = shlex.split(line, comments=True)
            if not fields:
                continue

            # ignore extra fields, so that the format can be extended later
            image_name, h, w = fields[:3]
            image_meta[image_name] = (int(h), int(w))

    return image_meta

[docs]def save_image_meta_file(image_meta: ImageMeta, image_meta_path: str) -> None:
    """
    Saves image_meta to the path specified by image_meta_path in the format
    defined in load_image_meta_file's documentation.
    """

    assert isinstance(image_meta_path, str)

    with open(image_meta_path, 'w', encoding='utf-8') as f:
        # Add a comment about file syntax
        print("# <image name> <height> <width>", file=f)
        print("", file=f)

        for image_name, (height, width) in image_meta.items():
            print(shlex.quote(image_name), height, width, file=f)