Source code for ways.core.grouping

#!/usr/bin/env python
# -*- coding: utf-8 -*-

'''Various functions for grouping sequences of integers.'''

# IMPORT STANDARD LIBRARIES
import itertools

# IMPORT THIRD-PARTY LIBRARIES
from six import moves


[docs]def has_common_elements(*args):
    '''bool: Tests a variable number of sequences for common elements.'''
    for index, seq in enumerate(args):
        try:
            seq2 = args[index + 1]
        except IndexError:
            break

        try:
            next(index for index in seq if index in seq2)
        except StopIteration:
            return False
    return True


[docs]def pairwise(iterable):
    "Change an iterable item in to pairs -> (s0,s1), (s1,s2), (s2, s3), ..."
    first, second = itertools.tee(iterable)
    next(second, None)
    return moves.zip(first, second)


[docs]def grouper(obj):
    '''Group a list together by its items.

    Slightly different result than ranges, in cases where outlier items lie
    between two ranges.

    Example:
        >>> list(ranges([0, 1, 2, 3, 4, 7, 10, 12, 14, 16]))
        >>> [xrange(0, 4, 1), xrange(10, 16, 2)]

    Args:
        obj (iterable): The list (or iterable) to process

    Returns:
        list[int or tuple]: The ranges from the given list

    '''
    result = []
    for key, group in itertools.groupby(pairwise(obj), key=lambda x: x[1] - x[0]):
        group = list(group)
        if len(group) > 1:
            if result and group[0][0] == result[-1]:
                del result[-1]
            result.append((group[0][0], group[-1][-1], key))
        else:
            result.append(group[0][-1])
    return result


[docs]def ranges(obj, return_range=True):
    '''Get the start and end ranges for a list of discontinuous int ranges.

    Reference: http://stackoverflow.com/questions/39709606/

    Example:

        >>> list(ranges([0, 1, 2, 3, 4  7, 10, 12, 14, 16]))
        >>> [xrange(0, 4, 1), 7, xrange(10, 16, 2)]

    Args:
        obj (list[int]): A list of integers to get the sequence of
        return_range (bool): If you just need an iterable and you don't care
                             about keeping the start/end/step, setting to True
                             is more efficient on memory. If False, returns
                             a tuple with start, end, and step.

    Yields:
        [int or range or tuple]: The ranges from the given list

    '''
    iterobj = iter(obj)
    # move to second element for comparison
    next(iterobj)  # pylint: disable=stop-iteration-return
    grps = itertools.groupby(obj, key=lambda x: (x - next(iterobj, -float("inf"))))
    for _, value in grps:
        index = next(value)  # pylint: disable=stop-iteration-return
        try:
            # catches single element v or gives us a step
            step = next(value, -1) - index  # pylint: disable=stop-iteration-return
            nxt = list(next(grps, [None, []])[1])
            if return_range:
                yield moves.range(index, nxt.pop(0), step)
            else:
                yield (index, nxt.pop(0), step)
            # outliers or another group
            if nxt:
                if len(nxt) == 1:
                    yield nxt[0]
                else:
                    output = (nxt[0], next(next(grps)[1]), nxt[1] - nxt[0])
                    if return_range:
                        yield moves.range(*output)
                    else:
                        yield output
        except StopIteration:
            yield index  # no seq


[docs]def get_difference(list1, list2):
    '''Get the elements of list1 that are not in list2.

    Note:
        This is NOT a symmetric_difference

    Warning:
        This function will cause you to lose list order of list1 and list2

    Args:
        list1 (list): The list to get the intersection with
        list2 (list): The list to get the intersection against

    Returns:
        list: The combination of list1 and list2

    '''
    return list(set(list1) - set(list2))


[docs]def get_ordered_intersection(seq1, seq2, memory_efficient=False):
    '''Get the elements that exist in both given sequences.

    This code will preserve the order of the first sequence given.

    Args:
        seq1 (iterable): The sequence to iterate. Also determines return order.
        seq2 (iterable): The second sequence to compare against the first
        memory_efficient (:obj:`bool`, optional):
            If you know that every element in both sequences are small in size,
            enable this option for a potential speed boost.

    Returns:
        iterable[any]: The common elements of the two sequences

    '''
    if memory_efficient:
        seq2 = frozenset(seq1)
    return [value for value in seq1 if value in seq2]


[docs]def filter_consecutive_items(obj):
    '''Remove all consecutive elements but keep duplicate items.

    Args:
        obj (iterable): The list (or iterable) to process

    Returns:
        list[int or tuple]: The ranges from the given list

    '''
    result = []
    for key, group in itertools.groupby(pairwise(obj), key=lambda val: val[1] - val[0]):
        group = list(group)
        if len(group) > 1:
            if result and group[0][0] == result[-1]:
                del result[-1]
            result.append((group[0][0], group[-1][-1], key))
        else:
            result.append(group[0][-1])
    return result


[docs]def uniquify_list(seq, idfun=None):
    '''Order preserving way to get unique elements in a list.

    This function is a bit dirty but extremely fast (see benchmark).

    Reference: https://www.peterbe.com/plog/uniqifiers-benchmark

    Args:
        seq (list): The list to make unique
        idfun (func): An optional function to run, as part of the uniquifier

    Returns:
        list: The uniquified list

    '''
    if not idfun:
        def _idfun(obj):
            '''Return the original object and do nothing.'''
            return obj
        idfun = _idfun

    seen = {}
    result = []
    for item in seq:
        marker = idfun(item)
        # in old Python versions:
        # if seen.has_key(marker)
        # but in new ones:
        #
        if marker in seen:
            continue

        seen[marker] = 1
        result.append(item)
    return result


[docs]def group_into(seq, maximum):
    '''Break a sequence up in a specified number of groups.

    Example:
        >>> seq = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        >>> maximum = 4
        >>> group_into(seq=seq, maximum=maximum)
        >>> [[1, 4, 7, 10], [2, 5, 8], [3, 6, 9]]

    Args:
        seq (iterable): The sequence to split up
        maximum (int): The number of groups to make

    Returns:
        list[iterable]: Group of the original iterable sequence object

    '''
    return [seq[item::maximum] for item in moves.range(maximum)]


[docs]def group_nth(seq, by):  # pylint: disable=invalid-name
    '''Split the sequence by the given number.

    Example:
        >>> seq = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        >>> maximum = 4
        >>> group_nth(seq=seq, by=by)
        >>> [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10]]

    Note:
        The number of groups that will be made is (len(seq) // by) + 1

    Args:
        seq (iterable): The sequence to split up into some groups
        maximum (int):
            The size of the groups that will be made from the original seqence

    Returns:
        list[iterable]: A group of the original iterable sequence object

    '''
    return [seq[value * by:(value * by) + by]
            for value in moves.range((len(seq) // by) + 1)
            if value * by < len(seq)]


[docs]def chunkwise_iter(seq, size=2):
    '''generator: Split the given sequence by `size`.'''
    block = iter(seq)
    return moves.zip(*[block] * size)