Skip to content

ValidationMixin

Provides dataset structure validation functionality.

Overview

The ValidationMixin validates dataset structure to catch errors early:

  • Dimension checks - Ensure all referenced dimensions exist
  • Shape validation - Verify data shapes match dimension sizes
  • Coordinate checks - Validate coordinate presence (strict mode)

Key Methods

  • validate(strict=False) - Validate dataset structure
  • _infer_and_register_dims(name, dims, data) - Internal dimension inference

Validation Checks

Basic Validation

  • Unknown dimensions referenced by variables/coordinates
  • Shape mismatches between data and declared dimensions

Strict Mode

When strict=True:

  • All dimensions must have corresponding coordinates
  • Raises ValueError on any validation failure

Usage

ds = DummyDataset()
ds.add_dim("time", 10)
ds.add_variable("temp", dims=["time", "lat"])  # Error: 'lat' not defined

# Validate
try:
    ds.validate()
except ValueError as e:
    print(f"Validation error: {e}")

# Strict validation
ds.validate(strict=True)  # Raises error if issues found

API Reference

Mixin providing dataset validation capabilities.

Source code in src/dummyxarray/validation.py
class ValidationMixin:
    """Mixin providing dataset validation capabilities."""

    def validate(self, strict_coords=False):
        """
        Validate the entire dataset structure.

        Parameters
        ----------
        strict_coords : bool, default False
            If True, require that all variable dimensions have corresponding coordinates

        Raises
        ------
        ValueError
            If validation fails
        """
        errors = []

        # 1. Dimensions must be known
        all_dims = set(self.dims.keys())

        for name, arr in {**self.coords, **self.variables}.items():
            if arr.dims is None:
                continue
            for d in arr.dims:
                if d not in all_dims:
                    errors.append(f"{name}: Unknown dimension '{d}'.")

        # 2. Data shapes must match dims
        for name, arr in {**self.coords, **self.variables}.items():
            if arr.data is not None and arr.dims is not None:
                shape = np.asarray(arr.data).shape
                dim_sizes = [self.dims[d] for d in arr.dims]
                if tuple(dim_sizes) != shape:
                    errors.append(f"{name}: Data shape {shape} does not match dims {dim_sizes}.")

        # 3. Variables reference coords?
        if strict_coords:
            coord_names = set(self.coords.keys())
            for name, arr in self.variables.items():
                if arr.dims:
                    for d in arr.dims:
                        if d not in coord_names:
                            errors.append(f"{name}: Missing coordinate for dimension '{d}'.")

        if errors:
            raise ValueError("Dataset validation failed:\n" + "\n".join(errors))

    def _infer_and_register_dims(self, arr):
        """
        Infer dimension sizes from data and register them.

        Parameters
        ----------
        arr : DummyArray
            Array to infer dimensions from

        Raises
        ------
        ValueError
            If dimension sizes conflict
        """
        inferred = arr.infer_dims_from_data()

        for dim, size in inferred.items():
            if dim in self.dims:
                if self.dims[dim] != size:
                    raise ValueError(
                        f"Dimension mismatch for '{dim}': existing={self.dims[dim]} new={size}"
                    )
            else:
                self.dims[dim] = size

validate

validate(strict_coords=False)

Validate the entire dataset structure.

Parameters:

Name Type Description Default
strict_coords bool

If True, require that all variable dimensions have corresponding coordinates

False

Raises:

Type Description
ValueError

If validation fails

Source code in src/dummyxarray/validation.py
def validate(self, strict_coords=False):
    """
    Validate the entire dataset structure.

    Parameters
    ----------
    strict_coords : bool, default False
        If True, require that all variable dimensions have corresponding coordinates

    Raises
    ------
    ValueError
        If validation fails
    """
    errors = []

    # 1. Dimensions must be known
    all_dims = set(self.dims.keys())

    for name, arr in {**self.coords, **self.variables}.items():
        if arr.dims is None:
            continue
        for d in arr.dims:
            if d not in all_dims:
                errors.append(f"{name}: Unknown dimension '{d}'.")

    # 2. Data shapes must match dims
    for name, arr in {**self.coords, **self.variables}.items():
        if arr.data is not None and arr.dims is not None:
            shape = np.asarray(arr.data).shape
            dim_sizes = [self.dims[d] for d in arr.dims]
            if tuple(dim_sizes) != shape:
                errors.append(f"{name}: Data shape {shape} does not match dims {dim_sizes}.")

    # 3. Variables reference coords?
    if strict_coords:
        coord_names = set(self.coords.keys())
        for name, arr in self.variables.items():
            if arr.dims:
                for d in arr.dims:
                    if d not in coord_names:
                        errors.append(f"{name}: Missing coordinate for dimension '{d}'.")

    if errors:
        raise ValueError("Dataset validation failed:\n" + "\n".join(errors))