:py:mod:`kwarray.algo_setcover`
===============================

.. py:module:: kwarray.algo_setcover


Module Contents
---------------


Functions
~~~~~~~~~

.. autoapisummary::

   kwarray.algo_setcover.setcover
   kwarray.algo_setcover._setcover_greedy_old
   kwarray.algo_setcover._setcover_greedy_new
   kwarray.algo_setcover._setcover_ilp


.. py:function:: setcover(candidate_sets_dict, items=None, set_weights=None, item_values=None, max_weight=None, algo='approx')

   Finds a feasible solution to the minimum weight maximum value set cover.
   The quality and runtime of the solution will depend on the backend
   algorithm selected.

   :Parameters: * **candidate_sets_dict** (*Dict[Hashable, List[Hashable]]*) -- a dictionary where keys are the candidate set ids and each value is
                  a candidate cover set.
                * **items** (*Hashable, optional*) -- the set of all items to be covered,
                  if not specified, it is infered from the candidate cover sets
                * **set_weights** (*Dict, optional*) -- maps candidate set ids to a cost
                  for using this candidate cover in the solution. If not specified
                  the weight of each candiate cover defaults to 1.
                * **item_values** (*Dict, optional*) -- maps each item to a value we get for
                  returning this item in the solution. If not specified the value
                  of each item defaults to 1.
                * **max_weight** (*float*) -- if specified, the total cost of the returned cover
                  is constrained to be less than this number.
                * **algo** (*str*) -- specifies which algorithm to use. Can either be
                  'approx' for the greedy solution or 'exact' for the globally
                  optimal solution. Note the 'exact' algorithm solves an
                  integer-linear-program, which can be very slow and requires
                  the `pulp` package to be installed.

   :returns: a subdict of candidate_sets_dict containing the chosen solution.
   :rtype: Dict

   .. rubric:: Example

   >>> candidate_sets_dict = {
   >>>     'a': [1, 2, 3, 8, 9, 0],
   >>>     'b': [1, 2, 3, 4, 5],
   >>>     'c': [4, 5, 7],
   >>>     'd': [5, 6, 7],
   >>>     'e': [6, 7, 8, 9, 0],
   >>> }
   >>> greedy_soln = setcover(candidate_sets_dict, algo='greedy')
   >>> print('greedy_soln = {}'.format(ub.repr2(greedy_soln, nl=0)))
   greedy_soln = {'a': [1, 2, 3, 8, 9, 0], 'c': [4, 5, 7], 'd': [5, 6, 7]}
   >>> # xdoc: +REQUIRES(module:pulp)
   >>> exact_soln = setcover(candidate_sets_dict, algo='exact')
   >>> print('exact_soln = {}'.format(ub.repr2(exact_soln, nl=0)))
   exact_soln = {'b': [1, 2, 3, 4, 5], 'e': [6, 7, 8, 9, 0]}


.. py:function:: _setcover_greedy_old(candidate_sets_dict, items=None, set_weights=None, item_values=None, max_weight=None)

   Benchmark:
       items = np.arange(10000)
       candidate_sets_dict = {}
       for i in range(1000):
           candidate_sets_dict[i] = np.random.choice(items, 200).tolist()

       _setcover_greedy_new(candidate_sets_dict) == _setcover_greedy_old(candidate_sets_dict)
       _ = nh.util.profile_onthefly(_setcover_greedy_new)(candidate_sets_dict)
       _ = nh.util.profile_onthefly(_setcover_greedy_old)(candidate_sets_dict)

       import ubelt as ub
       for timer in ub.Timerit(3, bestof=1, label='time'):
           with timer:
               len(_setcover_greedy_new(candidate_sets_dict))

       import ubelt as ub
       for timer in ub.Timerit(3, bestof=1, label='time'):
           with timer:
               len(_setcover_greedy_old(candidate_sets_dict))


.. py:function:: _setcover_greedy_new(candidate_sets_dict, items=None, set_weights=None, item_values=None, max_weight=None)

   Implements Johnson's / Chvatal's greedy set-cover approximation algorithms.

   The approximation gaurentees depend on specifications of set weights and
   item values

   Running time:
       N = number of universe items
       C = number of candidate covering sets

       Worst case running time is: O(C^2 * CN)
           (note this is via simple analysis, the big-oh might be better)

   Set Cover: log(len(items) + 1) approximation algorithm
   Weighted Maximum Cover: 1 - 1/e == .632 approximation algorithm
   Generalized maximum coverage is not implemented

   .. rubric:: References

   https://en.wikipedia.org/wiki/Maximum_coverage_problem

   .. rubric:: Notes

   # pip install git+git://github.com/tangentlabs/django-oscar.git#egg=django-oscar.
   # TODO: wrap https://github.com/martin-steinegger/setcover/blob/master/SetCover.cpp
   # pip install SetCoverPy
   # This is actually much slower than my implementation
   from SetCoverPy import setcover
   g = setcover.SetCover(full_overlaps, cost=np.ones(len(full_overlaps)))
   g.greedy()
   keep = np.where(g.s)[0]

   .. rubric:: Example

   >>> candidate_sets_dict = {
   >>>     'a': [1, 2, 3, 8, 9, 0],
   >>>     'b': [1, 2, 3, 4, 5],
   >>>     'c': [4, 5, 7],
   >>>     'd': [5, 6, 7],
   >>>     'e': [6, 7, 8, 9, 0],
   >>> }
   >>> greedy_soln = _setcover_greedy_new(candidate_sets_dict)
   >>> #print(repr(greedy_soln))
   ...
   >>> print('greedy_soln = {}'.format(ub.repr2(greedy_soln, nl=0)))
   greedy_soln = {'a': [1, 2, 3, 8, 9, 0], 'c': [4, 5, 7], 'd': [5, 6, 7]}

   .. rubric:: Example

   >>> candidate_sets_dict = {
   >>>     'a': [1, 2, 3, 8, 9, 0],
   >>>     'b': [1, 2, 3, 4, 5],
   >>>     'c': [4, 5, 7],
   >>>     'd': [5, 6, 7],
   >>>     'e': [6, 7, 8, 9, 0],
   >>> }
   >>> items = list(set(it.chain(*candidate_sets_dict.values())))
   >>> set_weights = {i: 1 for i in candidate_sets_dict.keys()}
   >>> item_values = {e: 1 for e in items}
   >>> greedy_soln = _setcover_greedy_new(candidate_sets_dict,
   >>>                             item_values=item_values,
   >>>                             set_weights=set_weights)
   >>> print('greedy_soln = {}'.format(ub.repr2(greedy_soln, nl=0)))
   greedy_soln = {'a': [1, 2, 3, 8, 9, 0], 'c': [4, 5, 7], 'd': [5, 6, 7]}

   .. rubric:: Example

   >>> candidate_sets_dict = {}
   >>> greedy_soln = _setcover_greedy_new(candidate_sets_dict)
   >>> print('greedy_soln = {}'.format(ub.repr2(greedy_soln, nl=0)))
   greedy_soln = {}


.. py:function:: _setcover_ilp(candidate_sets_dict, items=None, set_weights=None, item_values=None, max_weight=None, verbose=False)

   Set cover / Weighted Maximum Cover exact algorithm using an integer linear
   program.

   .. todo:: - [ ] Use CPLEX solver if available

   https://en.wikipedia.org/wiki/Maximum_coverage_problem

   .. rubric:: Example

   >>> # xdoc: +REQUIRES(module:pulp)
   >>> candidate_sets_dict = {}
   >>> exact_soln = _setcover_ilp(candidate_sets_dict)
   >>> print('exact_soln = {}'.format(ub.repr2(exact_soln, nl=0)))
   exact_soln = {}

   .. rubric:: Example

   >>> # xdoc: +REQUIRES(module:pulp)
   >>> candidate_sets_dict = {
   >>>     'a': [1, 2, 3, 8, 9, 0],
   >>>     'b': [1, 2, 3, 4, 5],
   >>>     'c': [4, 5, 7],
   >>>     'd': [5, 6, 7],
   >>>     'e': [6, 7, 8, 9, 0],
   >>> }
   >>> items = list(set(it.chain(*candidate_sets_dict.values())))
   >>> set_weights = {i: 1 for i in candidate_sets_dict.keys()}
   >>> item_values = {e: 1 for e in items}
   >>> exact_soln1 = _setcover_ilp(candidate_sets_dict,
   >>>                             item_values=item_values,
   >>>                             set_weights=set_weights)
   >>> exact_soln2 = _setcover_ilp(candidate_sets_dict)
   >>> assert exact_soln1 == exact_soln2