chapter 18: set¶

18.1 find keyboard row¶

Given a List of words, return the words that can be typed using letters of alphabet on only one row's of American keyboard.

For example: Input: ["Hello", "Alaska", "Dad", "Peace"] Output: ["Alaska", "Dad"]

Reference: https://leetcode.com/problems/keyboard-row/description/

def find_keyboard_row(words):
    """
    :type words: List[str]
    :rtype: List[str]
    """
    keyboard = [
        set('qwertyuiop'),
        set('asdfghjkl'),
        set('zxcvbnm'),
    ]
    result = []
    for word in words:
        for key in keyboard:
            if set(word.lower()).issubset(key):
                result.append(word)
    return result

18.2 randomized set¶

Design a data structure that supports all following operations in average O(1) time.

insert(val): Inserts an item val to the set if not already present. remove(val): Removes an item val from the set if present. random_element: Returns a random element from current set of elements.

Each element must have the same probability of being returned.

import random


class RandomizedSet():
    """
    idea: shoot
    """

    def __init__(self):
        self.elements = []
        self.index_map = {}  # element -> index

    def insert(self, new_one):
        if new_one in self.index_map:
            return
        self.index_map[new_one] = len(self.elements)
        self.elements.append(new_one)

    def remove(self, old_one):
        if not old_one in self.index_map:
            return
        index = self.index_map[old_one]
        last = self.elements.pop()
        self.index_map.pop(old_one)
        if index == len(self.elements):
            return
        self.elements[index] = last
        self.index_map[last] = index

    def random_element(self):
        return random.choice(self.elements)


def __test():
    rset = RandomizedSet()
    ground_truth = set()
    n = 64

    for i in range(n):
        rset.insert(i)
        ground_truth.add(i)

    # Remove a half
    for i in random.sample(range(n), n // 2):
        rset.remove(i)
        ground_truth.remove(i)

    print(len(ground_truth), len(rset.elements), len(rset.index_map))
    for i in ground_truth:
        assert(i == rset.elements[rset.index_map[i]])

    for i in range(n):
        print(rset.random_element(), end=' ')
    print()


if __name__ == "__main__":
    __test()

18.3 set covering¶

Universe U of n elements Collection of subsets of U:

S = S1,S2...,Sm Where every substet Si has an associated cost.

Find a minimum cost subcollection of S that covers all elements of U

Example:

U = {1,2,3,4,5} S = {S1,S2,S3}

S1 = {4,1,3}, Cost(S1) = 5 S2 = {2,5}, Cost(S2) = 10 S3 = {1,4,3,2}, Cost(S3) = 3

Output:: Set cover = {S2, S3} Min Cost = 13

def powerset(iterable):
    """Calculate the powerset of any iterable.

    For a range of integers up to the length of the given list,
    make all possible combinations and chain them together as one object.
    From https://docs.python.org/3/library/itertools.html#itertools-recipes
    """
    "list(powerset([1,2,3])) --> [(), (1,), (2,), (3,), (1,2), (1,3), (2,3), (1,2,3)]"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))


def optimal_set_cover(universe, subsets, costs):
    """ Optimal algorithm - DONT USE ON BIG INPUTS - O(2^n) complexity!
    Finds the minimum cost subcollection os S that covers all elements of U

    Args:
        universe (list): Universe of elements
        subsets (dict): Subsets of U {S1:elements,S2:elements}
        costs (dict): Costs of each subset in S - {S1:cost, S2:cost...}
    """
    pset = powerset(subsets.keys())
    best_set = None
    best_cost = float("inf")
    for subset in pset:
        covered = set()
        cost = 0
        for s in subset:
            covered.update(subsets[s])
            cost += costs[s]
        if len(covered) == len(universe) and cost < best_cost:
            best_set = subset
            best_cost = cost
    return best_set


def greedy_set_cover(universe, subsets, costs):
    """Approximate greedy algorithm for set-covering. Can be used on large
    inputs - though not an optimal solution.

    Args:
        universe (list): Universe of elements
        subsets (dict): Subsets of U {S1:elements,S2:elements}
        costs (dict): Costs of each subset in S - {S1:cost, S2:cost...}
    """
    elements = set(e for s in subsets.keys() for e in subsets[s])
    # elements don't cover universe -> invalid input for set cover
    if elements != universe:
        return None

    # track elements of universe covered
    covered = set()
    cover_sets = []

    while covered != universe:
        min_cost_elem_ratio = float("inf")
        min_set = None
        # find set with minimum cost:elements_added ratio
        for s, elements in subsets.items():
            new_elements = len(elements - covered)
            # set may have same elements as already covered -> new_elements = 0
            # check to avoid division by 0 error
            if new_elements != 0:
                cost_elem_ratio = costs[s] / new_elements
                if cost_elem_ratio < min_cost_elem_ratio:
                    min_cost_elem_ratio = cost_elem_ratio
                    min_set = s
        cover_sets.append(min_set)
        # union
        covered |= subsets[min_set]
    return cover_sets


if __name__ == '__main__':
    universe = {1, 2, 3, 4, 5}
    subsets = {'S1': {4, 1, 3}, 'S2': {2, 5}, 'S3': {1, 4, 3, 2}}
    costs = {'S1': 5, 'S2': 10, 'S3': 3}

    optimal_cover = optimal_set_cover(universe, subsets, costs)
    optimal_cost = sum(costs[s] for s in optimal_cover)

    greedy_cover = greedy_set_cover(universe, subsets, costs)
    greedy_cost = sum(costs[s] for s in greedy_cover)

    print('Optimal Set Cover:')
    print(optimal_cover)
    print('Cost = %s' % optimal_cost)

    print('Greedy Set Cover:')
    print(greedy_cover)
    print('Cost = %s' % greedy_cost)