:py:mod:`kwarray.util_numpy`
============================

.. py:module:: kwarray.util_numpy

.. autoapi-nested-parse::

   Numpy specific extensions


Module Contents
---------------


Functions
~~~~~~~~~

.. autoapisummary::

   kwarray.util_numpy.boolmask
   kwarray.util_numpy.iter_reduce_ufunc
   kwarray.util_numpy.isect_flags
   kwarray.util_numpy.atleast_nd
   kwarray.util_numpy.argmaxima
   kwarray.util_numpy.argminima
   kwarray.util_numpy.unique_rows
   kwarray.util_numpy.arglexmax
   kwarray.util_numpy.normalize


.. py:function:: boolmask(indices, shape=None)

   Constructs an array of booleans where an item is True if its position is in
   ``indices`` otherwise it is False. This can be viewed as the inverse of
   :func:`numpy.where`.

   :Parameters: * **indices** (*ndarray*) -- list of integer indices
                * **shape** (*int | tuple*) -- length of the returned list. If not specified
                  the minimal possible shape to incoporate all the indices is used.
                  In general, it is best practice to always specify this argument.

   :returns: mask: mask[idx] is True if idx in indices
   :rtype: ndarray[int]

   .. rubric:: Example

   >>> indices = [0, 1, 4]
   >>> mask = boolmask(indices, shape=6)
   >>> assert np.all(mask == [True, True, False, False, True, False])
   >>> mask = boolmask(indices)
   >>> assert np.all(mask == [True, True, False, False, True])

   .. rubric:: Example

   >>> indices = np.array([(0, 0), (1, 1), (2, 1)])
   >>> shape = (3, 3)
   >>> mask = boolmask(indices, shape)
   >>> import ubelt as ub  # NOQA
   >>> result = ub.repr2(mask)
   >>> print(result)
   np.array([[ True, False, False],
             [False,  True, False],
             [False,  True, False]], dtype=np.bool)


.. py:function:: iter_reduce_ufunc(ufunc, arrs, out=None, default=None)

   constant memory iteration and reduction

   applys ufunc from left to right over the input arrays

   :Parameters: * **ufunc** (*Callable*) -- called on each pair of consecutive ndarrays
                * **arrs** (*Iterator[ndarray]*) -- iterator of ndarrays
                * **default** (*object*) -- return value when iterator is empty

   :returns:     if len(arrs) == 0, returns ``default``
                 if len(arrs) == 1, returns arrs[0],
                 if len(arrs) >= 2, returns
                     ufunc(...ufunc(ufunc(arrs[0], arrs[1]), arrs[2]),...arrs[n-1])
   :rtype: ndarray

   .. rubric:: Example

   >>> arr_list = [
   ...     np.array([0, 1, 2, 3, 8, 9]),
   ...     np.array([4, 1, 2, 3, 4, 5]),
   ...     np.array([0, 5, 2, 3, 4, 5]),
   ...     np.array([1, 1, 6, 3, 4, 5]),
   ...     np.array([0, 1, 2, 7, 4, 5])
   ... ]
   >>> memory = np.array([9, 9, 9, 9, 9, 9])
   >>> gen_memory = memory.copy()
   >>> def arr_gen(arr_list, gen_memory):
   ...     for arr in arr_list:
   ...         gen_memory[:] = arr
   ...         yield gen_memory
   >>> print('memory = %r' % (memory,))
   >>> print('gen_memory = %r' % (gen_memory,))
   >>> ufunc = np.maximum
   >>> res1 = iter_reduce_ufunc(ufunc, iter(arr_list), out=None)
   >>> res2 = iter_reduce_ufunc(ufunc, iter(arr_list), out=memory)
   >>> res3 = iter_reduce_ufunc(ufunc, arr_gen(arr_list, gen_memory), out=memory)
   >>> print('res1       = %r' % (res1,))
   >>> print('res2       = %r' % (res2,))
   >>> print('res3       = %r' % (res3,))
   >>> print('memory     = %r' % (memory,))
   >>> print('gen_memory = %r' % (gen_memory,))
   >>> assert np.all(res1 == res2)
   >>> assert np.all(res2 == res3)


.. py:function:: isect_flags(arr, other)

   Check which items in an array intersect with another set of items

   :Parameters: * **arr** (*ndarray*) -- items to check
                * **other** (*Iterable*) -- items to check if they exist in arr

   :returns:

             booleans corresponding to arr indicating if that item is
                 also contained in other.
   :rtype: ndarray

   .. rubric:: Example

   >>> arr = np.array([
   >>>     [1, 2, 3, 4],
   >>>     [5, 6, 3, 4],
   >>>     [1, 1, 3, 4],
   >>> ])
   >>> other = np.array([1, 4, 6])
   >>> mask = isect_flags(arr, other)
   >>> print(mask)
   [[ True False False  True]
    [False  True False  True]
    [ True  True False  True]]


.. py:function:: atleast_nd(arr, n, front=False)

   View inputs as arrays with at least n dimensions.

   :Parameters: * **arr** (*array_like*) -- An array-like object.  Non-array inputs are converted to arrays.
                  Arrays that already have n or more dimensions are preserved.
                * **n** (*int*) -- number of dimensions to ensure
                * **front** (*bool, default=False*) -- if True new dimensions are added to the front of the array.
                  otherwise they are added to the back.

   :returns:     An array with ``a.ndim >= n``.  Copies are avoided where possible,
                 and views with three or more dimensions are returned.  For example,
                 a 1-D array of shape ``(N,)`` becomes a view of shape
                 ``(1, N, 1)``, and a 2-D array of shape ``(M, N)`` becomes a view
                 of shape ``(M, N, 1)``.
   :rtype: ndarray

   .. seealso:: numpy.atleast_1d, numpy.atleast_2d, numpy.atleast_3d

   .. rubric:: Example

   >>> n = 2
   >>> arr = np.array([1, 1, 1])
   >>> arr_ = atleast_nd(arr, n)
   >>> import ubelt as ub  # NOQA
   >>> result = ub.repr2(arr_.tolist(), nl=0)
   >>> print(result)
   [[1], [1], [1]]

   .. rubric:: Example

   >>> n = 4
   >>> arr1 = [1, 1, 1]
   >>> arr2 = np.array(0)
   >>> arr3 = np.array([[[[[1]]]]])
   >>> arr1_ = atleast_nd(arr1, n)
   >>> arr2_ = atleast_nd(arr2, n)
   >>> arr3_ = atleast_nd(arr3, n)
   >>> import ubelt as ub  # NOQA
   >>> result1 = ub.repr2(arr1_.tolist(), nl=0)
   >>> result2 = ub.repr2(arr2_.tolist(), nl=0)
   >>> result3 = ub.repr2(arr3_.tolist(), nl=0)
   >>> result = '\n'.join([result1, result2, result3])
   >>> print(result)
   [[[[1]]], [[[1]]], [[[1]]]]
   [[[[0]]]]
   [[[[[1]]]]]

   .. rubric:: Notes

   Extensive benchmarks are in
   kwarray/dev/bench_atleast_nd.py

   These demonstrate that this function is statistically faster than the
   numpy variants, although the difference is small.  On average this
   function takes 480ns versus numpy which takes 790ns.


.. py:function:: argmaxima(arr, num, axis=None, ordered=True)

   Returns the top ``num`` maximum indicies.

   This can be significantly faster than using argsort.

   :Parameters: * **arr** (*ndarray*) -- input array
                * **num** (*int*) -- number of maximum indices to return
                * **axis** (*int|None*) -- axis to find maxima over. If None this is equivalent
                  to using arr.ravel().
                * **ordered** (*bool*) -- if False, returns the maximum elements in an arbitrary
                  order, otherwise they are in decending order. (Setting this to
                  false is a bit faster).

   .. todo:: - [ ] if num is None, return arg for all values equal to the maximum

   :returns: ndarray

   .. rubric:: Example

   >>> # Test cases with axis=None
   >>> arr = (np.random.rand(100) * 100).astype(int)
   >>> for num in range(0, len(arr) + 1):
   >>>     idxs = argmaxima(arr, num)
   >>>     idxs2 = argmaxima(arr, num, ordered=False)
   >>>     assert np.all(arr[idxs] == np.array(sorted(arr)[::-1][:len(idxs)])), 'ordered=True must return in order'
   >>>     assert sorted(idxs2) == sorted(idxs), 'ordered=False must return the right idxs, but in any order'

   .. rubric:: Example

   >>> # Test cases with axis
   >>> arr = (np.random.rand(3, 5, 7) * 100).astype(int)
   >>> for axis in range(len(arr.shape)):
   >>>     for num in range(0, len(arr) + 1):
   >>>         idxs = argmaxima(arr, num, axis=axis)
   >>>         idxs2 = argmaxima(arr, num, ordered=False, axis=axis)
   >>>         assert idxs.shape[axis] == num
   >>>         assert idxs2.shape[axis] == num


.. py:function:: argminima(arr, num, axis=None, ordered=True)

   Returns the top ``num`` minimum indicies.

   This can be significantly faster than using argsort.

   :Parameters: * **arr** (*ndarray*) -- input array
                * **num** (*int*) -- number of minimum indices to return
                * **axis** (*int|None*) -- axis to find minima over.
                  If None this is equivalent to using arr.ravel().
                * **ordered** (*bool*) -- if False, returns the minimum elements in an arbitrary
                  order, otherwise they are in ascending order. (Setting this to
                  false is a bit faster).

   .. rubric:: Example

   >>> arr = (np.random.rand(100) * 100).astype(int)
   >>> for num in range(0, len(arr) + 1):
   >>>     idxs = argminima(arr, num)
   >>>     assert np.all(arr[idxs] == np.array(sorted(arr)[:len(idxs)])), 'ordered=True must return in order'
   >>>     idxs2 = argminima(arr, num, ordered=False)
   >>>     assert sorted(idxs2) == sorted(idxs), 'ordered=False must return the right idxs, but in any order'

   .. rubric:: Example

   >>> # Test cases with axis
   >>> from kwarray.util_numpy import *  # NOQA
   >>> arr = (np.random.rand(3, 5, 7) * 100).astype(int)
   >>> # make a unique array so we can check argmax consistency
   >>> arr = np.arange(3 * 5 * 7)
   >>> np.random.shuffle(arr)
   >>> arr = arr.reshape(3, 5, 7)
   >>> for axis in range(len(arr.shape)):
   >>>     for num in range(0, len(arr) + 1):
   >>>         idxs = argminima(arr, num, axis=axis)
   >>>         idxs2 = argminima(arr, num, ordered=False, axis=axis)
   >>>         print('idxs = {!r}'.format(idxs))
   >>>         print('idxs2 = {!r}'.format(idxs2))
   >>>         assert idxs.shape[axis] == num
   >>>         assert idxs2.shape[axis] == num
   >>>         # Check if argmin argrees with -argmax
   >>>         idxs3 = argmaxima(-arr, num, axis=axis)
   >>>         assert np.all(idxs3 == idxs)

   .. rubric:: Example

   >>> arr = np.arange(20).reshape(4, 5) % 6
   >>> argminima(arr, axis=1, num=2, ordered=False)
   >>> argminima(arr, axis=1, num=2, ordered=True)
   >>> argmaxima(-arr, axis=1, num=2, ordered=True)
   >>> argmaxima(-arr, axis=1, num=2, ordered=False)


.. py:function:: unique_rows(arr, ordered=False)

   Like unique, but works on rows

   :Parameters: * **arr** (*ndarray*) -- must be a contiguous C style array
                * **ordered** (*bool*) -- if true, keeps relative ordering

   .. rubric:: References

   https://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array

   .. rubric:: Example

   >>> import kwarray
   >>> from kwarray.util_numpy import *  # NOQA
   >>> rng = kwarray.ensure_rng(0)
   >>> arr = rng.randint(0, 2, size=(12, 3))
   >>> arr_unique = unique_rows(arr)
   >>> print('arr_unique = {!r}'.format(arr_unique))


.. py:function:: arglexmax(keys, multi=False)

   Find the index of the maximum element in a sequence of keys.

   :Parameters: * **keys** (*tuple*) -- a k-tuple of k N-dimensional arrays.
                  Like np.lexsort the last key in the sequence is used for the
                  primary sort order, the second-to-last key for the secondary sort
                  order, and so on.
                * **multi** (*bool*) -- if True, returns all indices that share the max value

   :returns: either the index or list of indices
   :rtype: int | ndarray[int]

   .. rubric:: Example

   >>> k, N = 100, 100
   >>> rng = np.random.RandomState(0)
   >>> keys = [(rng.rand(N) * N).astype(int) for _ in range(k)]
   >>> multi_idx = arglexmax(keys, multi=True)
   >>> idxs = np.lexsort(keys)
   >>> assert sorted(idxs[::-1][:len(multi_idx)]) == sorted(multi_idx)

   Benchark:
       >>> import ubelt as ub
       >>> k, N = 100, 100
       >>> rng = np.random
       >>> keys = [(rng.rand(N) * N).astype(int) for _ in range(k)]
       >>> for timer in ub.Timerit(100, bestof=10, label='arglexmax'):
       >>>     with timer:
       >>>         arglexmax(keys)
       >>> for timer in ub.Timerit(100, bestof=10, label='lexsort'):
       >>>     with timer:
       >>>         np.lexsort(keys)[-1]


.. py:function:: normalize(arr, mode='linear', alpha=None, beta=None, out=None)

   Rebalance signal values via contrast stretching.

   By default linearly stretches array values to minimum and maximum values.

   :Parameters: * **arr** (*ndarray*) -- array to normalize, usually an image
                * **out** (*ndarray | None*) -- output array. Note, that we will create an
                  internal floating point copy for integer computations.
                * **mode** (*str*) -- either linear or sigmoid.
                * **alpha** (*float*) -- Only used if mode=sigmoid.  Division factor
                  (pre-sigmoid). If unspecified computed as:
                  ``max(abs(old_min - beta), abs(old_max - beta)) / 6.212606``.
                  Note this parameter is sensitive to if the input is a float or
                  uint8 image.
                * **beta** (*float*) -- subtractive factor (pre-sigmoid). This should be the
                  intensity of the most interesting bits of the image, i.e. bring
                  them to the center (0) of the distribution.
                  Defaults to ``(max - min) / 2``.  Note this parameter is sensitive
                  to if the input is a float or uint8 image.

   .. rubric:: References

   https://en.wikipedia.org/wiki/Normalization_(image_processing)

   .. rubric:: Example

   >>> raw_f = np.random.rand(8, 8)
   >>> norm_f = normalize(raw_f)

   >>> raw_f = np.random.rand(8, 8) * 100
   >>> norm_f = normalize(raw_f)
   >>> assert isclose(norm_f.min(), 0)
   >>> assert isclose(norm_f.max(), 1)

   >>> raw_u = (np.random.rand(8, 8) * 255).astype(np.uint8)
   >>> norm_u = normalize(raw_u)

   .. rubric:: Example

   >>> # xdoctest: +REQUIRES(module:kwimage)
   >>> import kwimage
   >>> arr = kwimage.grab_test_image('lowcontrast')
   >>> arr = kwimage.ensure_float01(arr)
   >>> norms = {}
   >>> norms['arr'] = arr.copy()
   >>> norms['linear'] = normalize(arr, mode='linear')
   >>> norms['sigmoid'] = normalize(arr, mode='sigmoid')
   >>> # xdoctest: +REQUIRES(--show)
   >>> import kwplot
   >>> kwplot.autompl()
   >>> kwplot.figure(fnum=1, doclf=True)
   >>> pnum_ = kwplot.PlotNums(nSubplots=len(norms))
   >>> for key, img in norms.items():
   >>>     kwplot.imshow(img, pnum=pnum_(), title=key)

   Benchmark:
       # Our method is faster than standard in-line implementations.

       import timerit
       ti = timerit.Timerit(100, bestof=10, verbose=2, unit='ms')
       arr = kwimage.grab_test_image('lowcontrast', dsize=(512, 512))

       print('--- uint8 ---')
       arr = ensure_float01(arr)
       out = arr.copy()
       for timer in ti.reset('naive1-float'):
           with timer:
               (arr - arr.min()) / (arr.max() - arr.min())

       import timerit
       for timer in ti.reset('simple-float'):
           with timer:
               max_ = arr.max()
               min_ = arr.min()
               result = (arr - min_) / (max_ - min_)

       for timer in ti.reset('normalize-float'):
           with timer:
               normalize(arr)

       for timer in ti.reset('normalize-float-inplace'):
           with timer:
               normalize(arr, out=out)

       print('--- float ---')
       arr = ensure_uint255(arr)
       out = arr.copy()
       for timer in ti.reset('naive1-uint8'):
           with timer:
               (arr - arr.min()) / (arr.max() - arr.min())

       import timerit
       for timer in ti.reset('simple-uint8'):
           with timer:
               max_ = arr.max()
               min_ = arr.min()
               result = (arr - min_) / (max_ - min_)

       for timer in ti.reset('normalize-uint8'):
           with timer:
               normalize(arr)

       for timer in ti.reset('normalize-uint8-inplace'):
           with timer:
               normalize(arr, out=out)

   Ignore:
       globals().update(xdev.get_func_kwargs(normalize))