Source code for datumaro.cli.util.project

# Copyright (C) 2019-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT

import os
import re
from typing import Optional, Tuple

from datumaro.cli.util.errors import WrongRevpathError
from datumaro.components.dataset import Dataset
from datumaro.components.environment import Environment
from datumaro.components.errors import DatumaroError, ProjectNotFoundError
from datumaro.components.project import Project, Revision
from datumaro.util.os_util import generate_next_name
from datumaro.util.scope import on_error_do, scoped


[docs]def load_project(project_dir, readonly=False): return Project(project_dir, readonly=readonly)
[docs]def generate_next_file_name(basename, basedir=".", sep=".", ext=""): """ If basedir does not contain basename, returns basename, otherwise generates a name by appending sep to the basename and the number, next to the last used number in the basedir for files with basename prefix. Optionally, appends ext. """ return generate_next_name(os.listdir(basedir), basename, sep, ext)
[docs]def parse_dataset_pathspec(s: str, env: Optional[Environment] = None) -> Dataset: """ Parses Dataset paths. The syntax is: - <dataset path>[ :<format> ] Returns: a dataset from the parsed path """ match = re.fullmatch( r""" (?P<dataset_path>(?: [^:] | :[/\\] )+) (:(?P<format>.+))? """, s, flags=re.VERBOSE, ) if not match: raise ValueError("Failed to recognize dataset pathspec in '%s'" % s) match = match.groupdict() path = match["dataset_path"] format = match["format"] return Dataset.import_from(path, format, env=env)
[docs]@scoped def parse_revspec(s: str, ctx_project: Optional[Project] = None) -> Tuple[Dataset, Project]: """ Parses Revision paths. The syntax is: - <project path> [ @<rev> ] [ :<target> ] - <rev> [ :<target> ] - <target> The second and the third forms assume an existing "current" project. Returns: the dataset and the project from the parsed path. The project is only returned when specified in the revpath. """ match = re.fullmatch( r""" (?P<proj_path>(?: [^@:] | :[/\\] )+) (@(?P<rev>[^:]+))? (:(?P<source>.+))? """, s, flags=re.VERBOSE, ) if not match: raise ValueError("Failed to recognize revspec in '%s'" % s) match = match.groupdict() proj_path = match["proj_path"] rev = match["rev"] source = match["source"] target_project = None assert proj_path if rev: target_project = load_project(proj_path, readonly=True) project = target_project # proj_path is either proj_path or rev or source name elif Project.find_project_dir(proj_path): target_project = load_project(proj_path, readonly=True) project = target_project elif ctx_project: project = ctx_project if project.is_ref(proj_path): rev = proj_path elif not source: source = proj_path else: raise ProjectNotFoundError( "Failed to find project at '%s'. " "Specify project path with '-p/--project' or in the " "target pathspec." % proj_path ) if target_project: on_error_do(Project.close, target_project, ignore_errors=True) tree = project.get_rev(rev) return tree.make_dataset(source), target_project
[docs]def parse_full_revpath( s: str, ctx_project: Optional[Project] = None ) -> Tuple[Dataset, Optional[Project]]: """ revpath - either a Dataset path or a Revision path. Returns: the dataset and the project from the parsed path The project is only returned when specified in the revpath. """ if ctx_project: env = ctx_project.env else: env = Environment() errors = [] try: return parse_revspec(s, ctx_project=ctx_project) except (DatumaroError, OSError) as e: errors.append(e) try: return parse_dataset_pathspec(s, env=env), None except (DatumaroError, OSError) as e: errors.append(e) raise WrongRevpathError(problems=errors)
[docs]def split_local_revpath(revpath: str) -> Tuple[Revision, str]: """ Splits the given string into revpath components. A local revpath is a path to a revision withing the current project. The syntax is: - [ <revision> : ] [ <target> ] At least one part must be present. Returns: (revision, build target) """ sep_pos = revpath.find(":") if -1 < sep_pos: rev = revpath[:sep_pos] target = revpath[sep_pos + 1 :] else: rev = "" target = revpath return rev, target