# Copyright (C) 2020-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
from typing import Any, Tuple
from attrs import define, field
[docs]class ImmutableObjectError(Exception):
def __str__(self):
return "Cannot set value of immutable object"
[docs]class DatumaroError(Exception):
pass
[docs]class VcsError(DatumaroError):
pass
[docs]class ReadonlyDatasetError(VcsError):
def __str__(self):
return "Can't update a read-only dataset"
[docs]class ReadonlyProjectError(VcsError):
def __str__(self):
return "Can't change a read-only project"
[docs]@define(auto_exc=False)
class UnknownRefError(VcsError):
ref = field()
def __str__(self):
return f"Can't parse ref '{self.ref}'"
[docs]class MissingObjectError(VcsError):
pass
[docs]class MismatchingObjectError(VcsError):
pass
[docs]@define(auto_exc=False)
class UnsavedChangesError(VcsError):
paths = field()
def __str__(self):
return "There are some uncommitted changes: %s" % ', '.join(self.paths)
[docs]class ForeignChangesError(VcsError):
pass
[docs]class EmptyCommitError(VcsError):
pass
[docs]class PathOutsideSourceError(VcsError):
pass
[docs]class SourceUrlInsideProjectError(VcsError):
def __str__(self):
return "Source URL cannot point inside the project"
[docs]class UnexpectedUrlError(VcsError):
pass
[docs]class MissingSourceHashError(VcsError):
pass
[docs]class PipelineError(DatumaroError):
pass
[docs]class InvalidPipelineError(PipelineError):
pass
[docs]class EmptyPipelineError(InvalidPipelineError):
pass
[docs]class MultiplePipelineHeadsError(InvalidPipelineError):
pass
[docs]class InvalidStageError(InvalidPipelineError):
pass
[docs]class UnknownStageError(InvalidStageError):
pass
[docs]class MigrationError(DatumaroError):
pass
[docs]class OldProjectError(DatumaroError):
def __str__(self):
return """
The project you're trying to load was
created by the old Datumaro version. Try to migrate the
project with 'datum project migrate' and then reload.
"""
[docs]@define(auto_exc=False)
class ProjectNotFoundError(DatumaroError):
path = field()
def __str__(self):
return f"Can't find project at '{self.path}'"
[docs]@define(auto_exc=False)
class ProjectAlreadyExists(DatumaroError):
path = field()
def __str__(self):
return f"Can't create project: a project already exists " \
f"at '{self.path}'"
[docs]@define(auto_exc=False)
class UnknownSourceError(DatumaroError):
name = field()
def __str__(self):
return f"Unknown source '{self.name}'"
[docs]@define(auto_exc=False)
class UnknownTargetError(DatumaroError):
name = field()
def __str__(self):
return f"Unknown target '{self.name}'"
[docs]@define(auto_exc=False)
class SourceExistsError(DatumaroError):
name = field()
def __str__(self):
return f"Source '{self.name}' already exists"
class DatasetExportError(DatumaroError):
pass
@define(auto_exc=False)
class ItemExportError(DatasetExportError):
"""
Represents additional item error info. The error itself is supposed to be
in the `__cause__` member.
"""
item_id: Tuple[str, str]
def __str__(self):
return "Failed to export item %s" % (self.item_id, )
class AnnotationExportError(ItemExportError):
pass
[docs]class DatasetImportError(DatumaroError):
pass
@define(auto_exc=False)
class ItemImportError(DatasetImportError):
"""
Represents additional item error info. The error itself is supposed to be
in the `__cause__` member.
"""
item_id: Tuple[str, str]
def __str__(self):
return "Failed to import item %s" % (self.item_id, )
class AnnotationImportError(ItemImportError):
pass
[docs]@define(auto_exc=False)
class DatasetNotFoundError(DatasetImportError):
path = field()
def __str__(self):
return f"Failed to find dataset at '{self.path}'"
[docs]class DatasetError(DatumaroError):
pass
[docs]class CategoriesRedefinedError(DatasetError):
def __str__(self):
return "Categories can only be set once for a dataset"
[docs]@define(auto_exc=False)
class RepeatedItemError(DatasetError):
item_id = field()
def __str__(self):
return f"Item {self.item_id} is repeated in the source sequence."
[docs]class DatasetQualityError(DatasetError):
pass
[docs]@define(auto_exc=False)
class AnnotationsTooCloseError(DatasetQualityError):
item_id = field()
a = field()
b = field()
distance = field()
def __str__(self):
return "Item %s: annotations are too close: %s, %s, distance = %s" % \
(self.item_id, self.a, self.b, self.distance)
[docs]@define(auto_exc=False)
class WrongGroupError(DatasetQualityError):
item_id = field()
found = field(converter=set)
expected = field(converter=set)
group = field(converter=list)
def __str__(self):
return "Item %s: annotation group has wrong labels: " \
"found %s, expected %s, group %s" % \
(self.item_id, self.found, self.expected, self.group)
[docs]@define(auto_exc=False, init=False)
class DatasetMergeError(DatasetError):
sources = field(converter=set, factory=set, kw_only=True)
def _my__init__(self, msg=None, *, sources=None):
super().__init__(msg)
self.__attrs_init__(sources=sources or set())
# Pylint will raise false positive warnings for derived classes,
# when __init__ is defined directly
setattr(DatasetMergeError, '__init__', DatasetMergeError._my__init__)
[docs]@define(auto_exc=False)
class MismatchingImageInfoError(DatasetMergeError):
item_id: Tuple[str, str]
a: Tuple[int, int]
b: Tuple[int, int]
def __str__(self):
return "Item %s: mismatching image size info: %s vs %s" % \
(self.item_id, self.a, self.b)
@define(auto_exc=False)
class MismatchingImagePathError(DatasetMergeError):
item_id: Tuple[str, str]
a: str
b: str
def __str__(self):
return "Item %s: mismatching image path info: %s vs %s" % \
(self.item_id, self.a, self.b)
@define(auto_exc=False)
class MismatchingAttributesError(DatasetMergeError):
item_id: Tuple[str, str]
key: str
a: Any
b: Any
def __str__(self):
return "Item %s: mismatching image attribute %s: %s vs %s" % \
(self.item_id, self.key, self.a, self.b)
[docs]class ConflictingCategoriesError(DatasetMergeError):
pass
[docs]@define(auto_exc=False)
class NoMatchingAnnError(DatasetMergeError):
item_id = field()
ann = field()
def __str__(self):
return "Item %s: can't find matching annotation " \
"in sources %s, annotation is %s" % \
(self.item_id, self.sources, self.ann)
[docs]@define(auto_exc=False)
class NoMatchingItemError(DatasetMergeError):
item_id = field()
def __str__(self):
return "Item %s: can't find matching item in sources %s" % \
(self.item_id, self.sources)
[docs]@define(auto_exc=False)
class FailedLabelVotingError(DatasetMergeError):
item_id = field()
votes = field()
ann = field(default=None)
def __str__(self):
return "Item %s: label voting failed%s, votes %s, sources %s" % \
(self.item_id, 'for ann %s' % self.ann if self.ann else '',
self.votes, self.sources)
[docs]@define(auto_exc=False)
class FailedAttrVotingError(DatasetMergeError):
item_id = field()
attr = field()
votes = field()
ann = field()
def __str__(self):
return "Item %s: attribute voting failed " \
"for ann %s, votes %s, sources %s" % \
(self.item_id, self.ann, self.votes, self.sources)
[docs]@define(auto_exc=False)
class DatasetValidationError(DatumaroError):
severity = field()
[docs] def to_dict(self):
return {
'anomaly_type': self.__class__.__name__,
'description': str(self),
'severity': self.severity.name,
}
[docs]@define(auto_exc=False)
class DatasetItemValidationError(DatasetValidationError):
item_id = field()
subset = field()
[docs] def to_dict(self):
dict_repr = super().to_dict()
dict_repr['item_id'] = self.item_id
dict_repr['subset'] = self.subset
return dict_repr
[docs]@define(auto_exc=False)
class MissingLabelCategories(DatasetValidationError):
def __str__(self):
return "Metadata (ex. LabelCategories) should be defined" \
" to validate a dataset."
[docs]@define(auto_exc=False)
class MissingAnnotation(DatasetItemValidationError):
ann_type = field()
def __str__(self):
return f"Item needs '{self.ann_type}' annotation(s), " \
"but not found."
[docs]@define(auto_exc=False)
class MultiLabelAnnotations(DatasetItemValidationError):
def __str__(self):
return 'Item needs a single label but multiple labels are found.'
[docs]@define(auto_exc=False)
class MissingAttribute(DatasetItemValidationError):
label_name = field()
attr_name = field()
def __str__(self):
return f"Item needs the attribute '{self.attr_name}' " \
f"for the label '{self.label_name}'."
[docs]@define(auto_exc=False)
class UndefinedLabel(DatasetItemValidationError):
label_name = field()
def __str__(self):
return f"Item has the label '{self.label_name}' which " \
"is not defined in metadata."
[docs]@define(auto_exc=False)
class UndefinedAttribute(DatasetItemValidationError):
label_name = field()
attr_name = field()
def __str__(self):
return f"Item has the attribute '{self.attr_name}' for the " \
f"label '{self.label_name}' which is not defined in metadata."
[docs]@define(auto_exc=False)
class LabelDefinedButNotFound(DatasetValidationError):
label_name = field()
def __str__(self):
return f"The label '{self.label_name}' is defined in " \
"metadata, but not found in the dataset."
[docs]@define(auto_exc=False)
class AttributeDefinedButNotFound(DatasetValidationError):
label_name = field()
attr_name = field()
def __str__(self):
return f"The attribute '{self.attr_name}' for the label " \
f"'{self.label_name}' is defined in metadata, but not " \
"found in the dataset."
[docs]@define(auto_exc=False)
class OnlyOneLabel(DatasetValidationError):
label_name = field()
def __str__(self):
return f"The dataset has only one label '{self.label_name}'."
[docs]@define(auto_exc=False)
class OnlyOneAttributeValue(DatasetValidationError):
label_name = field()
attr_name = field()
value = field()
def __str__(self):
return "The dataset has the only attribute value " \
f"'{self.value}' for the attribute '{self.attr_name}' for the " \
f"label '{self.label_name}'."
[docs]@define(auto_exc=False)
class FewSamplesInLabel(DatasetValidationError):
label_name = field()
count = field()
def __str__(self):
return f"The number of samples in the label '{self.label_name}'" \
f" might be too low. Found '{self.count}' samples."
[docs]@define(auto_exc=False)
class FewSamplesInAttribute(DatasetValidationError):
label_name = field()
attr_name = field()
attr_value = field()
count = field()
def __str__(self):
return "The number of samples for attribute = value " \
f"'{self.attr_name} = {self.attr_value}' for the label " \
f"'{self.label_name}' might be too low. " \
f"Found '{self.count}' samples."
[docs]@define(auto_exc=False)
class ImbalancedLabels(DatasetValidationError):
def __str__(self):
return 'There is an imbalance in the label distribution.'
[docs]@define(auto_exc=False)
class ImbalancedAttribute(DatasetValidationError):
label_name = field()
attr_name = field()
def __str__(self):
return "There is an imbalance in the distribution of attribute" \
f" '{self. attr_name}' for the label '{self.label_name}'."
[docs]@define(auto_exc=False)
class ImbalancedDistInLabel(DatasetValidationError):
label_name = field()
prop = field()
def __str__(self):
return f"Values of '{self.prop}' are not evenly " \
f"distributed for '{self.label_name}' label."
[docs]@define(auto_exc=False)
class ImbalancedDistInAttribute(DatasetValidationError):
label_name = field()
attr_name = field()
attr_value = field()
prop = field()
def __str__(self):
return f"Values of '{self.prop}' are not evenly " \
f"distributed for '{self.attr_name}' = '{self.attr_value}' for " \
f"the '{self.label_name}' label."
[docs]@define(auto_exc=False)
class NegativeLength(DatasetItemValidationError):
ann_id = field()
prop = field()
val = field()
def __str__(self):
return f"Annotation '{self.ann_id}' in " \
"the item should have a positive value of " \
f"'{self.prop}' but got '{self.val}'."
[docs]@define(auto_exc=False)
class InvalidValue(DatasetItemValidationError):
ann_id = field()
prop = field()
def __str__(self):
return f"Annotation '{self.ann_id}' in " \
'the item has an inf or a NaN value of ' \
f"'{self.prop}'."
[docs]@define(auto_exc=False)
class FarFromLabelMean(DatasetItemValidationError):
label_name = field()
ann_id = field()
prop = field()
mean = field()
val = field()
def __str__(self):
return f"Annotation '{self.ann_id}' in " \
f"the item has a value of '{self.prop}' that " \
"is too far from the label average. (mean of " \
f"'{self.label_name}' label: {self.mean}, got '{self.val}')."
[docs]@define(auto_exc=False)
class FarFromAttrMean(DatasetItemValidationError):
label_name = field()
ann_id = field()
attr_name = field()
attr_value = field()
prop = field()
mean = field()
val = field()
def __str__(self):
return f"Annotation '{self.ann_id}' in the " \
f"item has a value of '{self.prop}' that " \
"is too far from the attribute average. (mean of " \
f"'{self.attr_name}' = '{self.attr_value}' for the " \
f"'{self.label_name}' label: {self.mean}, got '{self.val}')."