Source code for kwarray.util_slices

"""
Utilities related to slicing

References:
    https://stackoverflow.com/questions/41153803/zero-padding-slice-past-end-of-array-in-numpy

TODO:
    - [ ] Could have a kwarray function to expose this inverse slice
          functionality. Also having a top-level call to apply an embedded slice
          would be good.
"""
import ubelt as ub
import numpy as np



[docs]
def padded_slice(data, slices, pad=None, padkw=None, return_info=False):
    """
    Allows slices with out-of-bound coordinates. Any out of bounds coordinate
    will be sampled via padding.

    Args:
        data (Sliceable): data to slice into. Any channels must be the last dimension.
        slices (slice | Tuple[slice, ...]): slice for each dimensions
        ndim (int): number of spatial dimensions
        pad (List[int|Tuple]): additional padding of the slice
        padkw (Dict): if unspecified defaults to ``{'mode': 'constant'}``
        return_info (bool, default=False): if True, return extra information
            about the transform.

    Note:
        Negative slices have a different meaning here then they usually do.
        Normally, they indicate a wrap-around or a reversed stride, but here
        they index into out-of-bounds space (which depends on the pad mode).
        For example a slice of -2:1 literally samples two pixels to the left of
        the data and one pixel from the data, so you get two padded values and
        one data value.

    SeeAlso:
        embed_slice - finds the embedded slice and padding

    Returns:

        Sliceable:
            data_sliced: subregion of the input data (possibly with padding,
                depending on if the original slice went out of bounds)


        Tuple[Sliceable, Dict] :
            data_sliced : as above

            transform : information on how to return to the original coordinates

                Currently a dict containing:
                    st_dims: a list indicating the low and high space-time
                        coordinate values of the returned data slice.

                The structure of this dictionary mach change in the future

    Example:
        >>> import kwarray
        >>> data = np.arange(5)
        >>> slices = [slice(-2, 7)]

        >>> data_sliced = kwarray.padded_slice(data, slices)
        >>> print(ub.urepr(data_sliced, with_dtype=False))
        np.array([0, 0, 0, 1, 2, 3, 4, 0, 0])

        >>> data_sliced = kwarray.padded_slice(data, slices, pad=[(3, 3)])
        >>> print(ub.urepr(data_sliced, with_dtype=False))
        np.array([0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0])

        >>> data_sliced = kwarray.padded_slice(data, slice(3, 4), pad=[(1, 0)])
        >>> print(ub.urepr(data_sliced, with_dtype=False))
        np.array([2, 3])

    """
    if isinstance(slices, slice):
        slices = [slices]

    if padkw is None:
        padkw = {
            'mode': 'constant',
        }

    ndim = len(slices)
    data_dims = data.shape[:ndim]

    # separate requested slice into an in-bounds part and a padding part
    data_slice, extra_padding = embed_slice(slices, data_dims, pad=pad)

    # Crop to the data slice, and then extend with requested padding
    data_sliced = apply_embedded_slice(data, data_slice, extra_padding, **padkw)

    if return_info:
        st_dims = [(sl.start - pad_[0], sl.stop + pad_[1])
                   for sl, pad_ in zip(data_slice, extra_padding)]
        # TODO: return a better transform back to the original space
        transform = {
            'st_dims': st_dims,
            'st_offset': [d[0] for d in st_dims]
        }
        return data_sliced, transform
    else:
        return data_sliced



__TODO__ = """

    - [ ] Could have a kwarray function to expose this inverse slice
          functionality. Also having a top-level call to apply an embedded slice
          would be good.

    chip_index = tuple([slice(tl_y, br_y), slice(tl_x, br_x)])
    data_slice, padding = kwarray.embed_slice(chip_index, imdata.shape)
    # TODO: could have a kwarray function to expose this inverse slice
    # functionality. Also having a top-level call to apply an embedded
    # slice would be good
    inverse_slice = (
        slice(padding[0][0], imdata.shape[0] - padding[0][1]),
        slice(padding[1][0], imdata.shape[1] - padding[1][1]),
    )
    chip = kwarray.padded_slice(imdata, chip_index)
    chip = imdata[chip_index]

    fgdata = function(chip)

    # Apply just the data part back to the original
    imdata[tl_y:br_y, tl_x:br_x, :] = fgdata[inverse_slice]
"""



[docs]
def apply_embedded_slice(data, data_slice, extra_padding, **padkw):
    """
    Apply a precomputed embedded slice.

    This is used as a subroutine in padded_slice.

    Args:
        data (ndarray): data to slice
        data_slice (Tuple[slice]) first output of embed_slice
        extra_padding (Tuple[slice]) second output of embed_slice

    Returns:
        ndarray
    """
    # Get the parts of the image that are in-bounds
    data_clipped = data[data_slice]
    # Apply the padding part
    data_sliced = _apply_padding(data_clipped, extra_padding, **padkw)
    return data_sliced




[docs]
def _apply_padding(array, pad_width, **padkw):
    """
    Alternative to numpy pad with different short-cut semantics for
    the "pad_width" argument.

    Unlike numpy pad, you must specify a (start, stop) tuple for each
    dimension. The shortcut is that you only need to specify this for the
    leading dimensions. Any unspecified trailing dimension will get an implicit
    (0, 0) padding.

    TODO: does this get exposed as a public function?
    """
    if sum(map(sum, pad_width)) == 0:
        # No padding was requested
        padded = array
    else:
        trailing_dims = len(array.shape) - len(pad_width)
        if trailing_dims > 0:
            pad_width = pad_width + ([(0, 0)] * trailing_dims)
        padded = np.pad(array, pad_width, **padkw)
    return padded




[docs]
def embed_slice(slices, data_dims, pad=None):
    """
    Embeds a "padded-slice" inside known data dimension.

    Returns the valid data portion of the slice with extra padding for regions
    outside of the available dimension.

    Given a slices for each dimension, image dimensions, and a padding get the
    corresponding slice from the image and any extra padding needed to achieve
    the requested window size.

    TODO:
        - [ ] Add the option to return the inverse slice

    Args:
        slices (Tuple[slice, ...]):
            a tuple of slices for to apply to data data dimension.

        data_dims (Tuple[int, ...]):
            n-dimension data sizes (e.g. 2d height, width)

        pad (int | List[int | Tuple[int, int]]):
            extra pad applied to (start / end) / (both) sides of each slice dim

    Returns:
        Tuple:
            data_slice - Tuple[slice] a slice that can be applied to an array
                with with shape `data_dims`. This slice will not correspond to
                the full window size if the requested slice is out of bounds.
            extra_padding - extra padding needed after slicing to achieve
                the requested window size.

    Example:
        >>> # Case where slice is inside the data dims on left edge
        >>> import kwarray
        >>> slices = (slice(0, 10), slice(0, 10))
        >>> data_dims  = [300, 300]
        >>> pad        = [10, 5]
        >>> a, b = kwarray.embed_slice(slices, data_dims, pad)
        >>> print('data_slice = {!r}'.format(a))
        >>> print('extra_padding = {!r}'.format(b))
        data_slice = (slice(0, 20, None), slice(0, 15, None))
        extra_padding = [(10, 0), (5, 0)]

    Example:
        >>> # Case where slice is bigger than the image
        >>> import kwarray
        >>> slices = (slice(-10, 400), slice(-10, 400))
        >>> data_dims  = [300, 300]
        >>> pad        = [10, 5]
        >>> a, b = kwarray.embed_slice(slices, data_dims, pad)
        >>> print('data_slice = {!r}'.format(a))
        >>> print('extra_padding = {!r}'.format(b))
        data_slice = (slice(0, 300, None), slice(0, 300, None))
        extra_padding = [(20, 110), (15, 105)]

    Example:
        >>> # Case where slice is inside than the image
        >>> import kwarray
        >>> slices = (slice(10, 40), slice(10, 40))
        >>> data_dims  = [300, 300]
        >>> pad        = None
        >>> a, b = kwarray.embed_slice(slices, data_dims, pad)
        >>> print('data_slice = {!r}'.format(a))
        >>> print('extra_padding = {!r}'.format(b))
        data_slice = (slice(10, 40, None), slice(10, 40, None))
        extra_padding = [(0, 0), (0, 0)]

    Example:
        >>> # Test error cases
        >>> import kwarray
        >>> import pytest
        >>> slices = (slice(0, 40), slice(10, 40))
        >>> data_dims  = [300, 300]
        >>> with pytest.raises(ValueError):
        >>>     kwarray.embed_slice(slices, data_dims[0:1])
        >>> with pytest.raises(ValueError):
        >>>     kwarray.embed_slice(slices[0:1], data_dims)
        >>> with pytest.raises(ValueError):
        >>>     kwarray.embed_slice(slices, data_dims, pad=[(1, 1)])
        >>> with pytest.raises(ValueError):
        >>>     kwarray.embed_slice(slices, data_dims, pad=[1])
    """
    low_dims = [sl.start for sl in slices]
    high_dims = [sl.stop for sl in slices]

    ndims = len(data_dims)
    if len(low_dims) != ndims:
        raise ValueError('slices and data_dims must have the same length')

    pad_slice = _coerce_pad(pad, ndims)

    # Determine the real part of the image that can be sliced out
    data_slice_st = []
    extra_padding = []

    # Determine the real part of the image that can be sliced out
    for D_img, d_low, d_high, d_pad in zip(data_dims, low_dims, high_dims, pad_slice):
        if d_low is None:
            d_low = 0
        if d_high is None:
            d_high = D_img
        if d_low > d_high:
            raise ValueError('d_low > d_high: {} > {}'.format(d_low, d_high))
        # Determine where the bounds would be if the image size was inf
        raw_low = d_low - d_pad[0]
        raw_high = d_high + d_pad[1]
        # Clip the slice positions to the real part of the image
        sl_low = min(D_img, max(0, raw_low))
        sl_high = min(D_img, max(0, raw_high))
        data_slice_st.append((sl_low, sl_high))

        # Add extra padding when the window extends past the real part
        low_diff = sl_low - raw_low
        high_diff = raw_high - sl_high

        # Hand the case where both raw coordinates are out of bounds
        extra_low = max(0, low_diff + min(0, high_diff))
        extra_high = max(0, high_diff + min(0, low_diff))
        extra = (extra_low, extra_high)
        extra_padding.append(extra)

    data_slice = tuple(slice(s, t) for s, t in data_slice_st)
    return data_slice, extra_padding




[docs]
def _coerce_pad(pad, ndims):
    if pad is None:
        pad_slice = [(0, 0)] * ndims
    elif isinstance(pad, int):
        pad_slice = [(pad, pad)] * ndims
    else:
        # Normalize to left/right pad value for each dim
        pad_slice = [p if ub.iterable(p) else [p, p] for p in pad]

    if len(pad_slice) != ndims:
        # We could "fix" it, but the user probably made a mistake
        # n_trailing = ndims - len(pad)
        # if n_trailing > 0:
        #     pad = list(pad) + [(0, 0)] * n_trailing
        raise ValueError('pad and data_dims must have the same length')
    return pad_slice